2012-08-07 19:41:35

by Steven Rostedt

[permalink] [raw]
Subject: [RFC PATCH 4/4] ftrace/x86: Add support for -mfentry to x86_64

From: Steven Rostedt <[email protected]>

If the kernel is compiled with gcc 4.6.0 which supports -mfentry,
then use that instead of mcount.

With mcount, frame pointers are forced with the -pg option and we
get something like:

<can_vma_merge_before>:
55 push %rbp
48 89 e5 mov %rsp,%rbp
53 push %rbx
41 51 push %r9
e8 fe 6a 39 00 callq ffffffff81483d00 <mcount>
31 c0 xor %eax,%eax
48 89 fb mov %rdi,%rbx
48 89 d7 mov %rdx,%rdi
48 33 73 30 xor 0x30(%rbx),%rsi
48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi

With -mfentry, frame pointers are no longer forced and the call looks
like this:

<can_vma_merge_before>:
e8 33 af 37 00 callq ffffffff81461b40 <__fentry__>
53 push %rbx
48 89 fb mov %rdi,%rbx
31 c0 xor %eax,%eax
48 89 d7 mov %rdx,%rdi
41 51 push %r9
48 33 73 30 xor 0x30(%rbx),%rsi
48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi

This adds the ftrace hook at the beginning of the function before a
frame is set up, and allows the function callbacks to be able to access
parameters. As kprobes now can use function tracing (at least on x86)
this speeds up the kprobe hooks that are at the beginning of the
function.

Cc: Masami Hiramatsu <[email protected]>
Cc: Andi Kleen <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/ftrace.h | 7 ++++++-
arch/x86/kernel/entry_64.S | 18 +++++++++++++++++-
arch/x86/kernel/x8664_ksyms_64.c | 6 +++++-
4 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c70684f..bbbf5d8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -36,6 +36,7 @@ config X86
select HAVE_KRETPROBES
select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FENTRY if X86_64
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a6cae0c..9a25b52 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -35,7 +35,11 @@
#endif

#ifdef CONFIG_FUNCTION_TRACER
-#define MCOUNT_ADDR ((long)(mcount))
+#ifdef CC_USING_FENTRY
+# define MCOUNT_ADDR ((long)(__fentry__))
+#else
+# define MCOUNT_ADDR ((long)(mcount))
+#endif
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */

#ifdef CONFIG_DYNAMIC_FTRACE
@@ -46,6 +50,7 @@
#ifndef __ASSEMBLY__
extern void mcount(void);
extern atomic_t modifying_ftrace_code;
+extern void __fentry__(void);

static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38308fa..2add3bb 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -69,9 +69,16 @@

#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
+
+#ifdef CC_USING_FENTRY
+ENTRY(__fentry__)
+ retq
+END(__fentry__)
+#else
ENTRY(mcount)
retq
END(mcount)
+#endif

/* skip is set if stack has been adjusted */
.macro ftrace_caller_setup skip=0
@@ -84,7 +91,11 @@ END(mcount)
movq RIP(%rsp), %rdi
subq $MCOUNT_INSN_SIZE, %rdi
/* Load the parent_ip into the second parameter */
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
movq 8(%rbp), %rsi
+#endif
.endm

ENTRY(ftrace_caller)
@@ -215,9 +226,14 @@ END(mcount)
ENTRY(ftrace_graph_caller)
MCOUNT_SAVE_FRAME

+#ifdef CC_USING_FENTRY
+ leaq SS+16(%rsp), %rdi
+ movq $0, %rdx /* No framepointers needed */
+#else
leaq 8(%rbp), %rdi
- movq RIP(%rsp), %rsi
movq (%rbp), %rdx
+#endif
+ movq RIP(%rsp), %rsi
subq $MCOUNT_INSN_SIZE, %rsi

call prepare_ftrace_return
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 9796c2f..643b236 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -13,9 +13,13 @@
#include <asm/ftrace.h>

#ifdef CONFIG_FUNCTION_TRACER
-/* mcount is defined in assembly */
+/* mcount and __fentry__ are defined in assembly */
+#ifdef CC_USING_FENTRY
+EXPORT_SYMBOL(__fentry__);
+#else
EXPORT_SYMBOL(mcount);
#endif
+#endif

EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);
--
1.7.10.4



Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

Subject: Re: [RFC PATCH 4/4] ftrace/x86: Add support for -mfentry to x86_64

(2012/08/08 4:38), Steven Rostedt wrote:
> From: Steven Rostedt <[email protected]>
>
> If the kernel is compiled with gcc 4.6.0 which supports -mfentry,
> then use that instead of mcount.
>
> With mcount, frame pointers are forced with the -pg option and we
> get something like:
>
> <can_vma_merge_before>:
> 55 push %rbp
> 48 89 e5 mov %rsp,%rbp
> 53 push %rbx
> 41 51 push %r9
> e8 fe 6a 39 00 callq ffffffff81483d00 <mcount>
> 31 c0 xor %eax,%eax
> 48 89 fb mov %rdi,%rbx
> 48 89 d7 mov %rdx,%rdi
> 48 33 73 30 xor 0x30(%rbx),%rsi
> 48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi
>
> With -mfentry, frame pointers are no longer forced and the call looks
> like this:
>
> <can_vma_merge_before>:
> e8 33 af 37 00 callq ffffffff81461b40 <__fentry__>
> 53 push %rbx
> 48 89 fb mov %rdi,%rbx
> 31 c0 xor %eax,%eax
> 48 89 d7 mov %rdx,%rdi
> 41 51 push %r9
> 48 33 73 30 xor 0x30(%rbx),%rsi
> 48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi
>
> This adds the ftrace hook at the beginning of the function before a
> frame is set up, and allows the function callbacks to be able to access
> parameters. As kprobes now can use function tracing (at least on x86)
> this speeds up the kprobe hooks that are at the beginning of the
> function.

This looks good for me:)

Reviewed-by: Masami Hiramatsu <[email protected]>

Thanks!

>
> Cc: Masami Hiramatsu <[email protected]>
> Cc: Andi Kleen <[email protected]>
> Signed-off-by: Steven Rostedt <[email protected]>
> ---
> arch/x86/Kconfig | 1 +
> arch/x86/include/asm/ftrace.h | 7 ++++++-
> arch/x86/kernel/entry_64.S | 18 +++++++++++++++++-
> arch/x86/kernel/x8664_ksyms_64.c | 6 +++++-
> 4 files changed, 29 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index c70684f..bbbf5d8 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -36,6 +36,7 @@ config X86
> select HAVE_KRETPROBES
> select HAVE_OPTPROBES
> select HAVE_FTRACE_MCOUNT_RECORD
> + select HAVE_FENTRY if X86_64
> select HAVE_C_RECORDMCOUNT
> select HAVE_DYNAMIC_FTRACE
> select HAVE_FUNCTION_TRACER
> diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
> index a6cae0c..9a25b52 100644
> --- a/arch/x86/include/asm/ftrace.h
> +++ b/arch/x86/include/asm/ftrace.h
> @@ -35,7 +35,11 @@
> #endif
>
> #ifdef CONFIG_FUNCTION_TRACER
> -#define MCOUNT_ADDR ((long)(mcount))
> +#ifdef CC_USING_FENTRY
> +# define MCOUNT_ADDR ((long)(__fentry__))
> +#else
> +# define MCOUNT_ADDR ((long)(mcount))
> +#endif
> #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
>
> #ifdef CONFIG_DYNAMIC_FTRACE
> @@ -46,6 +50,7 @@
> #ifndef __ASSEMBLY__
> extern void mcount(void);
> extern atomic_t modifying_ftrace_code;
> +extern void __fentry__(void);
>
> static inline unsigned long ftrace_call_adjust(unsigned long addr)
> {
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 38308fa..2add3bb 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -69,9 +69,16 @@
>
> #ifdef CONFIG_FUNCTION_TRACER
> #ifdef CONFIG_DYNAMIC_FTRACE
> +
> +#ifdef CC_USING_FENTRY
> +ENTRY(__fentry__)
> + retq
> +END(__fentry__)
> +#else
> ENTRY(mcount)
> retq
> END(mcount)
> +#endif
>
> /* skip is set if stack has been adjusted */
> .macro ftrace_caller_setup skip=0
> @@ -84,7 +91,11 @@ END(mcount)
> movq RIP(%rsp), %rdi
> subq $MCOUNT_INSN_SIZE, %rdi
> /* Load the parent_ip into the second parameter */
> +#ifdef CC_USING_FENTRY
> + movq SS+16(%rsp), %rsi
> +#else
> movq 8(%rbp), %rsi
> +#endif
> .endm
>
> ENTRY(ftrace_caller)
> @@ -215,9 +226,14 @@ END(mcount)
> ENTRY(ftrace_graph_caller)
> MCOUNT_SAVE_FRAME
>
> +#ifdef CC_USING_FENTRY
> + leaq SS+16(%rsp), %rdi
> + movq $0, %rdx /* No framepointers needed */
> +#else
> leaq 8(%rbp), %rdi
> - movq RIP(%rsp), %rsi
> movq (%rbp), %rdx
> +#endif
> + movq RIP(%rsp), %rsi
> subq $MCOUNT_INSN_SIZE, %rsi
>
> call prepare_ftrace_return
> diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
> index 9796c2f..643b236 100644
> --- a/arch/x86/kernel/x8664_ksyms_64.c
> +++ b/arch/x86/kernel/x8664_ksyms_64.c
> @@ -13,9 +13,13 @@
> #include <asm/ftrace.h>
>
> #ifdef CONFIG_FUNCTION_TRACER
> -/* mcount is defined in assembly */
> +/* mcount and __fentry__ are defined in assembly */
> +#ifdef CC_USING_FENTRY
> +EXPORT_SYMBOL(__fentry__);
> +#else
> EXPORT_SYMBOL(mcount);
> #endif
> +#endif
>
> EXPORT_SYMBOL(__get_user_1);
> EXPORT_SYMBOL(__get_user_2);
>


--
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

2012-08-09 13:46:15

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC PATCH 4/4] ftrace/x86: Add support for -mfentry to x86_64

Peter and Masami

During my final tests, I found that this change breaks the
!DYNAMIC_FTRACE config. That is, when we don't do the run-time updates
of mcount calls to nops, the compiler will use fentry but the code still
uses mcount.

I fixed this in the patch below. But as you two have acked and reviewed
it, I can't add your tags if I have changed the code. Can you ack/review
it again.

Thanks!

-- Steve


On Tue, 2012-08-07 at 15:38 -0400, Steven Rostedt wrote:
> From: Steven Rostedt <[email protected]>
>
> If the kernel is compiled with gcc 4.6.0 which supports -mfentry,
> then use that instead of mcount.
>
> With mcount, frame pointers are forced with the -pg option and we
> get something like:
>
> <can_vma_merge_before>:
> 55 push %rbp
> 48 89 e5 mov %rsp,%rbp
> 53 push %rbx
> 41 51 push %r9
> e8 fe 6a 39 00 callq ffffffff81483d00 <mcount>
> 31 c0 xor %eax,%eax
> 48 89 fb mov %rdi,%rbx
> 48 89 d7 mov %rdx,%rdi
> 48 33 73 30 xor 0x30(%rbx),%rsi
> 48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi
>
> With -mfentry, frame pointers are no longer forced and the call looks
> like this:
>
> <can_vma_merge_before>:
> e8 33 af 37 00 callq ffffffff81461b40 <__fentry__>
> 53 push %rbx
> 48 89 fb mov %rdi,%rbx
> 31 c0 xor %eax,%eax
> 48 89 d7 mov %rdx,%rdi
> 41 51 push %r9
> 48 33 73 30 xor 0x30(%rbx),%rsi
> 48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi
>
> This adds the ftrace hook at the beginning of the function before a
> frame is set up, and allows the function callbacks to be able to access
> parameters. As kprobes now can use function tracing (at least on x86)
> this speeds up the kprobe hooks that are at the beginning of the
> function.
>
> Cc: Masami Hiramatsu <[email protected]>
> Cc: Andi Kleen <[email protected]>
> Signed-off-by: Steven Rostedt <[email protected]>

(change log kept the same)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c70684f..bbbf5d8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -36,6 +36,7 @@ config X86
select HAVE_KRETPROBES
select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FENTRY if X86_64
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a6cae0c..9a25b52 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -35,7 +35,11 @@
#endif

#ifdef CONFIG_FUNCTION_TRACER
-#define MCOUNT_ADDR ((long)(mcount))
+#ifdef CC_USING_FENTRY
+# define MCOUNT_ADDR ((long)(__fentry__))
+#else
+# define MCOUNT_ADDR ((long)(mcount))
+#endif
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */

#ifdef CONFIG_DYNAMIC_FTRACE
@@ -46,6 +50,7 @@
#ifndef __ASSEMBLY__
extern void mcount(void);
extern atomic_t modifying_ftrace_code;
+extern void __fentry__(void);

static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38308fa..a698521 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,10 +68,18 @@
.section .entry.text, "ax"

#ifdef CONFIG_FUNCTION_TRACER
+
+#ifdef CC_USING_FENTRY
+# define function_hook __fentry__
+#else
+# define function_hook mcount
+#endif
+
#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(mcount)
+
+ENTRY(function_hook)
retq
-END(mcount)
+END(function_hook)

/* skip is set if stack has been adjusted */
.macro ftrace_caller_setup skip=0
@@ -84,7 +92,11 @@ END(mcount)
movq RIP(%rsp), %rdi
subq $MCOUNT_INSN_SIZE, %rdi
/* Load the parent_ip into the second parameter */
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
movq 8(%rbp), %rsi
+#endif
.endm

ENTRY(ftrace_caller)
@@ -177,7 +189,8 @@ END(ftrace_regs_caller)


#else /* ! CONFIG_DYNAMIC_FTRACE */
-ENTRY(mcount)
+
+ENTRY(function_hook)
cmpl $0, function_trace_stop
jne ftrace_stub

@@ -199,7 +212,11 @@ trace:
MCOUNT_SAVE_FRAME

movq RIP(%rsp), %rdi
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
movq 8(%rbp), %rsi
+#endif
subq $MCOUNT_INSN_SIZE, %rdi

call *ftrace_trace_function
@@ -207,7 +224,7 @@ trace:
MCOUNT_RESTORE_FRAME

jmp ftrace_stub
-END(mcount)
+END(function_hook)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */

@@ -215,9 +232,14 @@ END(mcount)
ENTRY(ftrace_graph_caller)
MCOUNT_SAVE_FRAME

+#ifdef CC_USING_FENTRY
+ leaq SS+16(%rsp), %rdi
+ movq $0, %rdx /* No framepointers needed */
+#else
leaq 8(%rbp), %rdi
- movq RIP(%rsp), %rsi
movq (%rbp), %rdx
+#endif
+ movq RIP(%rsp), %rsi
subq $MCOUNT_INSN_SIZE, %rsi

call prepare_ftrace_return
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 9796c2f..643b236 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -13,9 +13,13 @@
#include <asm/ftrace.h>

#ifdef CONFIG_FUNCTION_TRACER
-/* mcount is defined in assembly */
+/* mcount and __fentry__ are defined in assembly */
+#ifdef CC_USING_FENTRY
+EXPORT_SYMBOL(__fentry__);
+#else
EXPORT_SYMBOL(mcount);
#endif
+#endif

EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);

2012-08-09 13:48:46

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC PATCH 4/4] ftrace/x86: Add support for -mfentry to x86_64

On Thu, 2012-08-09 at 09:46 -0400, Steven Rostedt wrote:
> Peter and Masami
>
> During my final tests, I found that this change breaks the
> !DYNAMIC_FTRACE config. That is, when we don't do the run-time updates
> of mcount calls to nops, the compiler will use fentry but the code still
> uses mcount.
>
> I fixed this in the patch below. But as you two have acked and reviewed
> it, I can't add your tags if I have changed the code. Can you ack/review
> it again.
>
> Thanks!
>

This is the changes that were made against the original patch:

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2add3bb..a698521 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,18 +68,19 @@
.section .entry.text, "ax"

#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE

#ifdef CC_USING_FENTRY
-ENTRY(__fentry__)
- retq
-END(__fentry__)
+# define function_hook __fentry__
#else
-ENTRY(mcount)
- retq
-END(mcount)
+# define function_hook mcount
#endif

+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ENTRY(function_hook)
+ retq
+END(function_hook)
+
/* skip is set if stack has been adjusted */
.macro ftrace_caller_setup skip=0
MCOUNT_SAVE_FRAME \skip
@@ -188,7 +189,8 @@ END(ftrace_regs_caller)


#else /* ! CONFIG_DYNAMIC_FTRACE */
-ENTRY(mcount)
+
+ENTRY(function_hook)
cmpl $0, function_trace_stop
jne ftrace_stub

@@ -210,7 +212,11 @@ trace:
MCOUNT_SAVE_FRAME

movq RIP(%rsp), %rdi
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
movq 8(%rbp), %rsi
+#endif
subq $MCOUNT_INSN_SIZE, %rdi

call *ftrace_trace_function
@@ -218,7 +224,7 @@ trace:
MCOUNT_RESTORE_FRAME

jmp ftrace_stub
-END(mcount)
+END(function_hook)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */


Subject: Re: Re: [RFC PATCH 4/4] ftrace/x86: Add support for -mfentry to x86_64

(2012/08/09 22:46), Steven Rostedt wrote:
> Peter and Masami
>
> During my final tests, I found that this change breaks the
> !DYNAMIC_FTRACE config. That is, when we don't do the run-time updates
> of mcount calls to nops, the compiler will use fentry but the code still
> uses mcount.

Ah, right. we have to take care about it.

>
> I fixed this in the patch below. But as you two have acked and reviewed
> it, I can't add your tags if I have changed the code. Can you ack/review
> it again.

This looks good for me.

Reviewed-by: Masami Hiramatsu <[email protected]>

>
> Thanks!
>
> -- Steve
>
>
> On Tue, 2012-08-07 at 15:38 -0400, Steven Rostedt wrote:
>> From: Steven Rostedt <[email protected]>
>>
>> If the kernel is compiled with gcc 4.6.0 which supports -mfentry,
>> then use that instead of mcount.
>>
>> With mcount, frame pointers are forced with the -pg option and we
>> get something like:
>>
>> <can_vma_merge_before>:
>> 55 push %rbp
>> 48 89 e5 mov %rsp,%rbp
>> 53 push %rbx
>> 41 51 push %r9
>> e8 fe 6a 39 00 callq ffffffff81483d00 <mcount>
>> 31 c0 xor %eax,%eax
>> 48 89 fb mov %rdi,%rbx
>> 48 89 d7 mov %rdx,%rdi
>> 48 33 73 30 xor 0x30(%rbx),%rsi
>> 48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi
>>
>> With -mfentry, frame pointers are no longer forced and the call looks
>> like this:
>>
>> <can_vma_merge_before>:
>> e8 33 af 37 00 callq ffffffff81461b40 <__fentry__>
>> 53 push %rbx
>> 48 89 fb mov %rdi,%rbx
>> 31 c0 xor %eax,%eax
>> 48 89 d7 mov %rdx,%rdi
>> 41 51 push %r9
>> 48 33 73 30 xor 0x30(%rbx),%rsi
>> 48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi
>>
>> This adds the ftrace hook at the beginning of the function before a
>> frame is set up, and allows the function callbacks to be able to access
>> parameters. As kprobes now can use function tracing (at least on x86)
>> this speeds up the kprobe hooks that are at the beginning of the
>> function.
>>
>> Cc: Masami Hiramatsu <[email protected]>
>> Cc: Andi Kleen <[email protected]>
>> Signed-off-by: Steven Rostedt <[email protected]>
>
> (change log kept the same)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index c70684f..bbbf5d8 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -36,6 +36,7 @@ config X86
> select HAVE_KRETPROBES
> select HAVE_OPTPROBES
> select HAVE_FTRACE_MCOUNT_RECORD
> + select HAVE_FENTRY if X86_64
> select HAVE_C_RECORDMCOUNT
> select HAVE_DYNAMIC_FTRACE
> select HAVE_FUNCTION_TRACER
> diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
> index a6cae0c..9a25b52 100644
> --- a/arch/x86/include/asm/ftrace.h
> +++ b/arch/x86/include/asm/ftrace.h
> @@ -35,7 +35,11 @@
> #endif
>
> #ifdef CONFIG_FUNCTION_TRACER
> -#define MCOUNT_ADDR ((long)(mcount))
> +#ifdef CC_USING_FENTRY
> +# define MCOUNT_ADDR ((long)(__fentry__))
> +#else
> +# define MCOUNT_ADDR ((long)(mcount))
> +#endif
> #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
>
> #ifdef CONFIG_DYNAMIC_FTRACE
> @@ -46,6 +50,7 @@
> #ifndef __ASSEMBLY__
> extern void mcount(void);
> extern atomic_t modifying_ftrace_code;
> +extern void __fentry__(void);
>
> static inline unsigned long ftrace_call_adjust(unsigned long addr)
> {
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 38308fa..a698521 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -68,10 +68,18 @@
> .section .entry.text, "ax"
>
> #ifdef CONFIG_FUNCTION_TRACER
> +
> +#ifdef CC_USING_FENTRY
> +# define function_hook __fentry__
> +#else
> +# define function_hook mcount
> +#endif
> +
> #ifdef CONFIG_DYNAMIC_FTRACE
> -ENTRY(mcount)
> +
> +ENTRY(function_hook)
> retq
> -END(mcount)
> +END(function_hook)
>
> /* skip is set if stack has been adjusted */
> .macro ftrace_caller_setup skip=0
> @@ -84,7 +92,11 @@ END(mcount)
> movq RIP(%rsp), %rdi
> subq $MCOUNT_INSN_SIZE, %rdi
> /* Load the parent_ip into the second parameter */
> +#ifdef CC_USING_FENTRY
> + movq SS+16(%rsp), %rsi
> +#else
> movq 8(%rbp), %rsi
> +#endif
> .endm
>
> ENTRY(ftrace_caller)
> @@ -177,7 +189,8 @@ END(ftrace_regs_caller)
>
>
> #else /* ! CONFIG_DYNAMIC_FTRACE */
> -ENTRY(mcount)
> +
> +ENTRY(function_hook)
> cmpl $0, function_trace_stop
> jne ftrace_stub
>
> @@ -199,7 +212,11 @@ trace:
> MCOUNT_SAVE_FRAME
>
> movq RIP(%rsp), %rdi
> +#ifdef CC_USING_FENTRY
> + movq SS+16(%rsp), %rsi
> +#else
> movq 8(%rbp), %rsi
> +#endif
> subq $MCOUNT_INSN_SIZE, %rdi
>
> call *ftrace_trace_function
> @@ -207,7 +224,7 @@ trace:
> MCOUNT_RESTORE_FRAME
>
> jmp ftrace_stub
> -END(mcount)
> +END(function_hook)
> #endif /* CONFIG_DYNAMIC_FTRACE */
> #endif /* CONFIG_FUNCTION_TRACER */
>
> @@ -215,9 +232,14 @@ END(mcount)
> ENTRY(ftrace_graph_caller)
> MCOUNT_SAVE_FRAME
>
> +#ifdef CC_USING_FENTRY
> + leaq SS+16(%rsp), %rdi
> + movq $0, %rdx /* No framepointers needed */
> +#else
> leaq 8(%rbp), %rdi
> - movq RIP(%rsp), %rsi
> movq (%rbp), %rdx
> +#endif
> + movq RIP(%rsp), %rsi
> subq $MCOUNT_INSN_SIZE, %rsi
>
> call prepare_ftrace_return
> diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
> index 9796c2f..643b236 100644
> --- a/arch/x86/kernel/x8664_ksyms_64.c
> +++ b/arch/x86/kernel/x8664_ksyms_64.c
> @@ -13,9 +13,13 @@
> #include <asm/ftrace.h>
>
> #ifdef CONFIG_FUNCTION_TRACER
> -/* mcount is defined in assembly */
> +/* mcount and __fentry__ are defined in assembly */
> +#ifdef CC_USING_FENTRY
> +EXPORT_SYMBOL(__fentry__);
> +#else
> EXPORT_SYMBOL(mcount);
> #endif
> +#endif
>
> EXPORT_SYMBOL(__get_user_1);
> EXPORT_SYMBOL(__get_user_2);
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>


--
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

2012-08-27 17:06:37

by Steven Rostedt

[permalink] [raw]
Subject: [tip:perf/core] ftrace/x86: Add support for -mfentry to x86_64

Commit-ID: d57c5d51a30152f3175d2344cb6395f08bf8ee0c
Gitweb: http://git.kernel.org/tip/d57c5d51a30152f3175d2344cb6395f08bf8ee0c
Author: Steven Rostedt <[email protected]>
AuthorDate: Wed, 9 Feb 2011 13:32:18 -0500
Committer: Steven Rostedt <[email protected]>
CommitDate: Thu, 23 Aug 2012 11:26:36 -0400

ftrace/x86: Add support for -mfentry to x86_64

If the kernel is compiled with gcc 4.6.0 which supports -mfentry,
then use that instead of mcount.

With mcount, frame pointers are forced with the -pg option and we
get something like:

<can_vma_merge_before>:
55 push %rbp
48 89 e5 mov %rsp,%rbp
53 push %rbx
41 51 push %r9
e8 fe 6a 39 00 callq ffffffff81483d00 <mcount>
31 c0 xor %eax,%eax
48 89 fb mov %rdi,%rbx
48 89 d7 mov %rdx,%rdi
48 33 73 30 xor 0x30(%rbx),%rsi
48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi

With -mfentry, frame pointers are no longer forced and the call looks
like this:

<can_vma_merge_before>:
e8 33 af 37 00 callq ffffffff81461b40 <__fentry__>
53 push %rbx
48 89 fb mov %rdi,%rbx
31 c0 xor %eax,%eax
48 89 d7 mov %rdx,%rdi
41 51 push %r9
48 33 73 30 xor 0x30(%rbx),%rsi
48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi

This adds the ftrace hook at the beginning of the function before a
frame is set up, and allows the function callbacks to be able to access
parameters. As kprobes now can use function tracing (at least on x86)
this speeds up the kprobe hooks that are at the beginning of the
function.

Link: http://lkml.kernel.org/r/[email protected]

Acked-by: Ingo Molnar <[email protected]>
Reviewed-by: Masami Hiramatsu <[email protected]>
Cc: Andi Kleen <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/ftrace.h | 7 ++++++-
arch/x86/kernel/entry_64.S | 32 +++++++++++++++++++++++++++-----
arch/x86/kernel/x8664_ksyms_64.c | 6 +++++-
4 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a2d19ee..28dd891 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -36,6 +36,7 @@ config X86
select HAVE_KRETPROBES
select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FENTRY if X86_64
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a6cae0c..9a25b52 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -35,7 +35,11 @@
#endif

#ifdef CONFIG_FUNCTION_TRACER
-#define MCOUNT_ADDR ((long)(mcount))
+#ifdef CC_USING_FENTRY
+# define MCOUNT_ADDR ((long)(__fentry__))
+#else
+# define MCOUNT_ADDR ((long)(mcount))
+#endif
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */

#ifdef CONFIG_DYNAMIC_FTRACE
@@ -46,6 +50,7 @@
#ifndef __ASSEMBLY__
extern void mcount(void);
extern atomic_t modifying_ftrace_code;
+extern void __fentry__(void);

static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b7a81dc..ed767b7 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,10 +68,18 @@
.section .entry.text, "ax"

#ifdef CONFIG_FUNCTION_TRACER
+
+#ifdef CC_USING_FENTRY
+# define function_hook __fentry__
+#else
+# define function_hook mcount
+#endif
+
#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(mcount)
+
+ENTRY(function_hook)
retq
-END(mcount)
+END(function_hook)

/* skip is set if stack has been adjusted */
.macro ftrace_caller_setup skip=0
@@ -84,7 +92,11 @@ END(mcount)
movq RIP(%rsp), %rdi
subq $MCOUNT_INSN_SIZE, %rdi
/* Load the parent_ip into the second parameter */
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
movq 8(%rbp), %rsi
+#endif
.endm

ENTRY(ftrace_caller)
@@ -177,7 +189,8 @@ END(ftrace_regs_caller)


#else /* ! CONFIG_DYNAMIC_FTRACE */
-ENTRY(mcount)
+
+ENTRY(function_hook)
cmpl $0, function_trace_stop
jne ftrace_stub

@@ -199,7 +212,11 @@ trace:
MCOUNT_SAVE_FRAME

movq RIP(%rsp), %rdi
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
movq 8(%rbp), %rsi
+#endif
subq $MCOUNT_INSN_SIZE, %rdi

call *ftrace_trace_function
@@ -207,7 +224,7 @@ trace:
MCOUNT_RESTORE_FRAME

jmp ftrace_stub
-END(mcount)
+END(function_hook)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */

@@ -215,9 +232,14 @@ END(mcount)
ENTRY(ftrace_graph_caller)
MCOUNT_SAVE_FRAME

+#ifdef CC_USING_FENTRY
+ leaq SS+16(%rsp), %rdi
+ movq $0, %rdx /* No framepointers needed */
+#else
leaq 8(%rbp), %rdi
- movq RIP(%rsp), %rsi
movq (%rbp), %rdx
+#endif
+ movq RIP(%rsp), %rsi
subq $MCOUNT_INSN_SIZE, %rsi

call prepare_ftrace_return
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 6020f6f..1330dd1 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -13,9 +13,13 @@
#include <asm/ftrace.h>

#ifdef CONFIG_FUNCTION_TRACER
-/* mcount is defined in assembly */
+/* mcount and __fentry__ are defined in assembly */
+#ifdef CC_USING_FENTRY
+EXPORT_SYMBOL(__fentry__);
+#else
EXPORT_SYMBOL(mcount);
#endif
+#endif

EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);