LinuxLists.cc - [PATCH 4/4] tracing/function-return-tracer: Support for dynamic ftrace on function return tracer

2008-11-16 05:02:21

Subject: [PATCH 4/4] tracing/function-return-tracer: Support for dynamic ftrace on function return tracer

This patch adds the support for dynamic tracing on the function return tracer.
The whole difference with normal dynamic function tracing is that we don't need
to hook on a particular callback. The only pro that we want is to nop or set
dynamically the calls to ftrace_caller (which is ftrace_return_caller here).

Some security checks ensure that we are not trying to launch dynamic tracing for
return tracing while normal function tracing is already running.

An example of trace with getnstimeofday set as a filter:

ktime_get_ts+0x22/0x50 -> getnstimeofday (2283 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1396 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1825 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1426 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1464 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1524 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1434 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1464 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1502 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1404 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1397 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1051 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1314 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1344 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1163 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1390 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1374 ns)

Signed-off-by: Frederic Weisbecker <[email protected]>
Cc: Steven Rostedt <[email protected]>
---
arch/x86/kernel/entry_32.S | 18 ++-
arch/x86/kernel/ftrace.c | 258 +++++++++++++++++----------------
include/linux/ftrace.h | 16 ++-
kernel/trace/Kconfig | 1 -
kernel/trace/ftrace.c | 58 +++++++-
kernel/trace/trace_functions_return.c | 15 +--
6 files changed, 211 insertions(+), 155 deletions(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 266b317..fa4d1b1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1200,7 +1200,7 @@ ENTRY(mcount)
jnz trace
#ifdef CONFIG_FUNCTION_RET_TRACER
cmpl $ftrace_stub, ftrace_function_return
- jnz trace_return
+ jnz ftrace_return_caller
#endif
.globl ftrace_stub
ftrace_stub:
@@ -1221,9 +1221,15 @@ trace:
popl %ecx
popl %eax
jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */

#ifdef CONFIG_FUNCTION_RET_TRACER
-trace_return:
+ENTRY(ftrace_return_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
pushl %eax
pushl %ecx
pushl %edx
@@ -1233,7 +1239,8 @@ trace_return:
popl %edx
popl %ecx
popl %eax
- jmp ftrace_stub
+ ret
+END(ftrace_return_caller)

.globl return_to_handler
return_to_handler:
@@ -1247,10 +1254,7 @@ return_to_handler:
popl %ecx
popl %eax
ret
-#endif /* CONFIG_FUNCTION_RET_TRACER */
-END(mcount)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FUNCTION_TRACER */
+#endif

.section .rodata,"a"
#include "syscall_table_32.S"
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d98b5a8..924153e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,134 +24,6 @@
#include <asm/nmi.h>

-
-#ifdef CONFIG_FUNCTION_RET_TRACER
-
-/*
- * These functions are picked from those used on
- * this page for dynamic ftrace. They have been
- * simplified to ignore all traces in NMI context.
- */
-static atomic_t in_nmi;
-
-void ftrace_nmi_enter(void)
-{
- atomic_inc(&in_nmi);
-}
-
-void ftrace_nmi_exit(void)
-{
- atomic_dec(&in_nmi);
-}
-
-/* Add a function return address to the trace stack on thread info.*/
-static int push_return_trace(unsigned long ret, unsigned long long time,
- unsigned long func)
-{
- int index;
- struct thread_info *ti = current_thread_info();
-
- /* The return trace stack is full */
- if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1)
- return -EBUSY;
-
- index = ++ti->curr_ret_stack;
- barrier();
- ti->ret_stack[index].ret = ret;
- ti->ret_stack[index].func = func;
- ti->ret_stack[index].calltime = time;
-
- return 0;
-}
-
-/* Retrieve a function return address to the trace stack on thread info.*/
-static void pop_return_trace(unsigned long *ret, unsigned long long *time,
- unsigned long *func)
-{
- int index;
-
- struct thread_info *ti = current_thread_info();
- index = ti->curr_ret_stack;
- *ret = ti->ret_stack[index].ret;
- *func = ti->ret_stack[index].func;
- *time = ti->ret_stack[index].calltime;
- ti->curr_ret_stack--;
-}
-
-/*
- * Send the trace to the ring-buffer.
- * @return the original return address.
- */
-unsigned long ftrace_return_to_handler(void)
-{
- struct ftrace_retfunc trace;
- pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
- trace.rettime = cpu_clock(raw_smp_processor_id());
- ftrace_function_return(&trace);
-
- return trace.ret;
-}
-
-/*
- * Hook the return address and push it in the stack of return addrs
- * in current thread info.
- */
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
-{
- unsigned long old;
- unsigned long long calltime;
- int faulted;
- unsigned long return_hooker = (unsigned long)
- &return_to_handler;
-
- /* Nmi's are currently unsupported */
- if (atomic_read(&in_nmi))
- return;
-
- /*
- * Protect against fault, even if it shouldn't
- * happen. This tool is too much intrusive to
- * ignore such a protection.
- */
- asm volatile(
- "1: movl (%[parent_old]), %[old]\n"
- "2: movl %[return_hooker], (%[parent_replaced])\n"
- " movl $0, %[faulted]\n"
-
- ".section .fixup, \"ax\"\n"
- "3: movl $1, %[faulted]\n"
- ".previous\n"
-
- ".section __ex_table, \"a\"\n"
- " .long 1b, 3b\n"
- " .long 2b, 3b\n"
- ".previous\n"
-
- : [parent_replaced] "=r" (parent), [old] "=r" (old),
- [faulted] "=r" (faulted)
- : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
- : "memory"
- );
-
- if (WARN_ON(faulted)) {
- unregister_ftrace_return();
- return;
- }
-
- if (WARN_ON(!__kernel_text_address(old))) {
- unregister_ftrace_return();
- *parent = old;
- return;
- }
-
- calltime = cpu_clock(raw_smp_processor_id());
-
- if (push_return_trace(old, calltime, self_addr) == -EBUSY)
- *parent = old;
-}
-
-#endif
-
#ifdef CONFIG_DYNAMIC_FTRACE

union ftrace_code_union {
@@ -450,3 +322,133 @@ int __init ftrace_dyn_arch_init(void *data)
return 0;
}
#endif
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+
+/*
+ * These functions are picked from those used on
+ * this page for dynamic ftrace. They have been
+ * simplified to ignore all traces in NMI context.
+ */
+static atomic_t in_nmi;
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+}
+
+void ftrace_nmi_exit(void)
+{
+ atomic_dec(&in_nmi);
+}
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
+/* Add a function return address to the trace stack on thread info.*/
+static int push_return_trace(unsigned long ret, unsigned long long time,
+ unsigned long func)
+{
+ int index;
+ struct thread_info *ti = current_thread_info();
+
+ /* The return trace stack is full */
+ if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1)
+ return -EBUSY;
+
+ index = ++ti->curr_ret_stack;
+ barrier();
+ ti->ret_stack[index].ret = ret;
+ ti->ret_stack[index].func = func;
+ ti->ret_stack[index].calltime = time;
+
+ return 0;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void pop_return_trace(unsigned long *ret, unsigned long long *time,
+ unsigned long *func)
+{
+ int index;
+
+ struct thread_info *ti = current_thread_info();
+ index = ti->curr_ret_stack;
+ *ret = ti->ret_stack[index].ret;
+ *func = ti->ret_stack[index].func;
+ *time = ti->ret_stack[index].calltime;
+ ti->curr_ret_stack--;
+}
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(void)
+{
+ struct ftrace_retfunc trace;
+ pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
+ trace.rettime = cpu_clock(raw_smp_processor_id());
+ ftrace_function_return(&trace);
+
+ return trace.ret;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+ unsigned long old;
+ unsigned long long calltime;
+ int faulted;
+ unsigned long return_hooker = (unsigned long)
+ &return_to_handler;
+
+ /* Nmi's are currently unsupported */
+ if (atomic_read(&in_nmi))
+ return;
+
+ /*
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
+ */
+ asm volatile(
+ "1: movl (%[parent_old]), %[old]\n"
+ "2: movl %[return_hooker], (%[parent_replaced])\n"
+ " movl $0, %[faulted]\n"
+
+ ".section .fixup, \"ax\"\n"
+ "3: movl $1, %[faulted]\n"
+ ".previous\n"
+
+ ".section __ex_table, \"a\"\n"
+ " .long 1b, 3b\n"
+ " .long 2b, 3b\n"
+ ".previous\n"
+
+ : [parent_replaced] "=r" (parent), [old] "=r" (old),
+ [faulted] "=r" (faulted)
+ : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+ : "memory"
+ );
+
+ if (WARN_ON(faulted)) {
+ unregister_ftrace_return();
+ return;
+ }
+
+ if (WARN_ON(!__kernel_text_address(old))) {
+ unregister_ftrace_return();
+ *parent = old;
+ return;
+ }
+
+ calltime = cpu_clock(raw_smp_processor_id());
+
+ if (push_return_trace(old, calltime, self_addr) == -EBUSY)
+ *parent = old;
+}
+
+#endif /* CONFIG_FUNCTION_RET_TRACER */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 166a207..f1af1aa 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -25,6 +25,17 @@ struct ftrace_ops {

extern int function_trace_stop;

+/*
+ * Type of the current tracing.
+ */
+enum ftrace_tracing_type_t {
+ FTRACE_TYPE_ENTER = 0, /* Hook the call of the function */
+ FTRACE_TYPE_RETURN, /* Hook the return of the function */
+};
+
+/* Current tracing type, default is FTRACE_TYPE_ENTER */
+extern enum ftrace_tracing_type_t ftrace_tracing_type;
+
/**
* ftrace_stop - stop function tracer.
*
@@ -104,6 +115,9 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func);
extern void ftrace_caller(void);
extern void ftrace_call(void);
extern void mcount_call(void);
+#ifdef CONFIG_FUNCTION_RET_TRACER
+extern void ftrace_return_caller(void);
+#endif

/**
* ftrace_make_nop - convert code into top
@@ -310,7 +324,7 @@ struct ftrace_retfunc {
/* Type of a callback handler of tracing return function */
typedef void (*trace_function_return_t)(struct ftrace_retfunc *);

-extern void register_ftrace_return(trace_function_return_t func);
+extern int register_ftrace_return(trace_function_return_t func);
/* The current handler in use */
extern trace_function_return_t ftrace_function_return;
extern void unregister_ftrace_return(void);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 9c89526..b8378fa 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -59,7 +59,6 @@ config FUNCTION_TRACER

config FUNCTION_RET_TRACER
bool "Kernel Function return Tracer"
- depends on !DYNAMIC_FTRACE
depends on HAVE_FUNCTION_RET_TRACER
depends on FUNCTION_TRACER
help
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b42ec1d..2f78a45 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -50,6 +50,9 @@ static int last_ftrace_enabled;
/* Quick disabling of function tracer. */
int function_trace_stop;

+/* By default, current tracing type is normal tracing. */
+enum ftrace_tracing_type_t ftrace_tracing_type = FTRACE_TYPE_ENTER;
+
/*
* ftrace_disabled is set when an anomaly is discovered.
* ftrace_disabled is much stronger than ftrace_enabled.
@@ -385,12 +388,21 @@ static void ftrace_bug(int failed, unsigned long ip)
}
}

-#define FTRACE_ADDR ((long)(ftrace_caller))

static int
__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
{
unsigned long ip, fl;
+ unsigned long ftrace_addr;
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+ if (ftrace_tracing_type == FTRACE_TYPE_ENTER)
+ ftrace_addr = (unsigned long)ftrace_caller;
+ else
+ ftrace_addr = (unsigned long)ftrace_return_caller;
+#else
+ ftrace_addr = (unsigned long)ftrace_caller;
+#endif

ip = rec->ip;

@@ -450,9 +462,9 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
}

if (rec->flags & FTRACE_FL_ENABLED)
- return ftrace_make_call(rec, FTRACE_ADDR);
+ return ftrace_make_call(rec, ftrace_addr);
else
- return ftrace_make_nop(NULL, rec, FTRACE_ADDR);
+ return ftrace_make_nop(NULL, rec, ftrace_addr);
}

static void ftrace_replace_code(int enable)
@@ -1405,10 +1417,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
return -1;

mutex_lock(&ftrace_sysctl_lock);
+
+ if (ftrace_tracing_type == FTRACE_TYPE_RETURN) {
+ ret = -EBUSY;
+ goto out;
+ }
+
ret = __register_ftrace_function(ops);
ftrace_startup();
- mutex_unlock(&ftrace_sysctl_lock);

+out:
+ mutex_unlock(&ftrace_sysctl_lock);
return ret;
}

@@ -1474,16 +1493,45 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
}

#ifdef CONFIG_FUNCTION_RET_TRACER
+
+/* The callback that hooks the return of a function */
trace_function_return_t ftrace_function_return =
(trace_function_return_t)ftrace_stub;
-void register_ftrace_return(trace_function_return_t func)
+
+int register_ftrace_return(trace_function_return_t func)
{
+ int ret = 0;
+
+ mutex_lock(&ftrace_sysctl_lock);
+
+ /*
+ * Don't launch return tracing if normal function
+ * tracing is already running.
+ */
+ if (ftrace_trace_function != ftrace_stub) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ ftrace_tracing_type = FTRACE_TYPE_RETURN;
ftrace_function_return = func;
+ ftrace_startup();
+
+out:
+ mutex_unlock(&ftrace_sysctl_lock);
+ return ret;
}

void unregister_ftrace_return(void)
{
+ mutex_lock(&ftrace_sysctl_lock);
+
ftrace_function_return = (trace_function_return_t)ftrace_stub;
+ ftrace_shutdown();
+ /* Restore normal tracing type */
+ ftrace_tracing_type = FTRACE_TYPE_ENTER;
+
+ mutex_unlock(&ftrace_sysctl_lock);
}
#endif

diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c
index 61185f7..a68564a 100644
--- a/kernel/trace/trace_functions_return.c
+++ b/kernel/trace/trace_functions_return.c
@@ -14,29 +14,18 @@
#include "trace.h"

-static void start_return_trace(struct trace_array *tr)
-{
- register_ftrace_return(&trace_function_return);
-}
-
-static void stop_return_trace(struct trace_array *tr)
-{
- unregister_ftrace_return();
-}
-
static int return_trace_init(struct trace_array *tr)
{
int cpu;
for_each_online_cpu(cpu)
tracing_reset(tr, cpu);

- start_return_trace(tr);
- return 0;
+ return register_ftrace_return(&trace_function_return);
}

static void return_trace_reset(struct trace_array *tr)
{
- stop_return_trace(tr);
+ unregister_ftrace_return();
}

--
1.5.2.5

2008-11-24 22:08:33

by Steven Rostedt

[permalink] [raw]

Subject: Re: [PATCH 4/4] tracing/function-return-tracer: Support for dynamic ftrace on function return tracer

On Sun, 16 Nov 2008, Frederic Weisbecker wrote:
>
> static void ftrace_replace_code(int enable)
> @@ -1405,10 +1417,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
> return -1;
>
> mutex_lock(&ftrace_sysctl_lock);
> +
> + if (ftrace_tracing_type == FTRACE_TYPE_RETURN) {
> + ret = -EBUSY;
> + goto out;
> + }
> +

Why do we need to disable all new tracing functions when we do return
tracing??

-- Steve

> ret = __register_ftrace_function(ops);
> ftrace_startup();
> - mutex_unlock(&ftrace_sysctl_lock);
>
> +out:
> + mutex_unlock(&ftrace_sysctl_lock);
> return ret;
> }
>

2008-11-24 22:21:18

by Frederic Weisbecker

[permalink] [raw]

Subject: Re: [PATCH 4/4] tracing/function-return-tracer: Support for dynamic ftrace on function return tracer

2008/11/24 Steven Rostedt <[email protected]>:
>
> On Sun, 16 Nov 2008, Frederic Weisbecker wrote:
>>
>> static void ftrace_replace_code(int enable)
>> @@ -1405,10 +1417,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
>> return -1;
>>
>> mutex_lock(&ftrace_sysctl_lock);
>> +
>> + if (ftrace_tracing_type == FTRACE_TYPE_RETURN) {
>> + ret = -EBUSY;
>> + goto out;
>> + }
>> +
>
> Why do we need to disable all new tracing functions when we do return
> tracing??
>
> -- Steve
>
>> ret = __register_ftrace_function(ops);
>> ftrace_startup();
>> - mutex_unlock(&ftrace_sysctl_lock);
>>
>> +out:
>> + mutex_unlock(&ftrace_sysctl_lock);
>> return ret;
>> }
>>
>

You will see the same test when trying to register a return handler,
it verifies that normal ftrace is not running.
Otherwise it will fail.
I made ftrace and ftrace-return not abled to work simultaneously for the moment.
That made the integration of ftrace-return into dynamic tracing much
more simpler...

But if someone needs to enable both tracing at the same time, I can change it...

2008-11-24 22:44:14

by Steven Rostedt

[permalink] [raw]

Subject: Re: [PATCH 4/4] tracing/function-return-tracer: Support for dynamic ftrace on function return tracer

On Mon, 24 Nov 2008, Fr?d?ric Weisbecker wrote:

>
> You will see the same test when trying to register a return handler,
> it verifies that normal ftrace is not running.
> Otherwise it will fail.
> I made ftrace and ftrace-return not abled to work simultaneously for the moment.
> That made the integration of ftrace-return into dynamic tracing much
> more simpler...
>
> But if someone needs to enable both tracing at the same time, I can change it...

I'm just trying to figure out why the two can not work together?

The function hooks are used by things other than tracing functions. The
stack trace uses it too.

-- Steve

2008-11-26 18:24:33

by Tim Bird

[permalink] [raw]

Subject: Re: [PATCH 4/4] tracing/function-return-tracer: Support for dynamic ftrace on function return tracer

� wrote:
> 2008/11/24 Steven Rostedt <[email protected]>:
>> On Sun, 16 Nov 2008, Frederic Weisbecker wrote:
>>> static void ftrace_replace_code(int enable)
>>> @@ -1405,10 +1417,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
>>> return -1;
>>>
>>> mutex_lock(&ftrace_sysctl_lock);
>>> +
>>> + if (ftrace_tracing_type == FTRACE_TYPE_RETURN) {
>>> + ret = -EBUSY;
>>> + goto out;
>>> + }
>>> +
>> Why do we need to disable all new tracing functions when we do return
>> tracing??
>>
>> -- Steve
>>
>>> ret = __register_ftrace_function(ops);
>>> ftrace_startup();
>>> - mutex_unlock(&ftrace_sysctl_lock);
>>>
>>> +out:
>>> + mutex_unlock(&ftrace_sysctl_lock);
>>> return ret;
>>> }
>>>
>
> You will see the same test when trying to register a return handler,
> it verifies that normal ftrace is not running.
> Otherwise it will fail.
> I made ftrace and ftrace-return not abled to work simultaneously for the moment.
> That made the integration of ftrace-return into dynamic tracing much
> more simpler...
>
> But if someone needs to enable both tracing at the same time, I can change it...

Very sorry I'm coming to this thread late. I didn't notice it until
today.

Not to question the whole approach, and sorry if this was
discussed before, but why wasn't -finstrument-functions used
to instrument the function exits. This worked well for KFT
(See http://elinux.org/Kernel_Function_Trace). I'm not sure if the
function prologue and epilogue modifications done by -mcount are
different than -finstrument-functions, but I thought I remember something
about Steven testing -finstrument-functions in an early version of ftrace.

By the way, I'm really excited to see this "function_cost" stuff being
worked on. It has proven to be extremely useful for analyzing early boot
latencies at Sony.

Sorry again I didn't catch this and previous related threads
earlier. I have some post-processing tools which might be useful here.
Also, I've found it very handy to have the capability to filter by minimum
function duration. Is there any work to do that with the
current system. If not, maybe I could take a look at that and see if
I can add something.

Regards,
-- Tim

=============================
Tim Bird
Architecture Group Chair, CE Linux Forum
Senior Staff Engineer, Sony Corporation of America
=============================

2008-11-26 19:16:26

by Frederic Weisbecker

[permalink] [raw]

Subject: Re: [PATCH 4/4] tracing/function-return-tracer: Support for dynamic ftrace on function return tracer

Hi Tim,
2008/11/26 Tim Bird <[email protected]>:> � wrote:> Very sorry I'm coming to this thread late. I didn't notice it until> today.>> Not to question the whole approach, and sorry if this was> discussed before, but why wasn't -finstrument-functions used> to instrument the function exits. This worked well for KFT> (See http://elinux.org/Kernel_Function_Trace). I'm not sure if the> function prologue and epilogue modifications done by -mcount are> different than -finstrument-functions, but I thought I remember something> about Steven testing -finstrument-functions in an early version of ftrace.

Yes, we discussed it earlier but because of some reasons (kernel size,dynamic patchingcost), we used a stack of return address for each task.See the discussion about using -finstrument-functions:http://lkml.org/lkml/2008/10/30/372

> By the way, I'm really excited to see this "function_cost" stuff being> worked on. It has proven to be extremely useful for analyzing early boot> latencies at Sony.>> Sorry again I didn't catch this and previous related threads> earlier. I have some post-processing tools which might be useful here.

That would be welcome. Note that the output of this tracer is a bit differentfrom KFT. I'm currently working on the a C-like output:http://lkml.org/lkml/2008/11/25/389So perhaps the post-processing would have to be done in a way quitedifferent from thisused on KFT traces. I don't know.
> Also, I've found it very handy to have the capability to filter by minimum> function duration. Is there any work to do that with the> current system. If not, maybe I could take a look at that and see if> I can add something.
Yes there would be some modifications to do so.I guess the best thing to do that would be to use the trace_optionsfiles in whichany tracer can set some custom flags. See set_flag callback in struct tracer.
For now, this file can only receive boolean value on particular options.For example if you have a tracepid option. You either echo tracepid ornotracepid toset the value on trace_option. Your needs would require this file, andthis set_flag callback,to receive string values, like kernel parameters, by using get_option().This way you could set duration=50 as an example.This work is more tracing specific than function-return-tracerspecific (it was renamed function_branchtracer recently).
After that, handling this minimal duration set on function branchtracer will be pretty easy. :-)
One other solution would be to have a dedicated debugfs file for thistracer but that would be a pity, IMHO,to not let too other tracers having custom options with string values.
If you want to have a look on the current state, see the -tip tree.
I will add you in Cc for next patches....????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?

2008-11-26 19:22:20

by Steven Rostedt

[permalink] [raw]

Subject: Re: [PATCH 4/4] tracing/function-return-tracer: Support for dynamic ftrace on function return tracer

[
Sorry Tim to reply again, but alpine broke on Frederic's funny
characters in his email address, and it chopped off all the Cc's :-(
]

On Wed, 26 Nov 2008, Tim Bird wrote:
>
> Very sorry I'm coming to this thread late. I didn't notice it until
> today.
>
> Not to question the whole approach, and sorry if this was
> discussed before, but why wasn't -finstrument-functions used
> to instrument the function exits. This worked well for KFT
> (See http://elinux.org/Kernel_Function_Trace). I'm not sure if the
> function prologue and epilogue modifications done by -mcount are
> different than -finstrument-functions, but I thought I remember
something
> about Steven testing -finstrument-functions in an early version of
ftrace.
>
> By the way, I'm really excited to see this "function_cost" stuff being
> worked on. It has proven to be extremely useful for analyzing early
boot
> latencies at Sony.
>

No prob Tim. I need to make a standard template to explain the reason not
to use -finstrument-functions. (/me goes look for a previous post...)

Here it is:

(here's the original post: http://lkml.org/lkml/2008/10/30/372 that
was posted at the beginning of Frederic's work)

========================================================
Lets take a simple C file called traceme.c:

---
static int x;

void trace_me(void)
{
x++;
}
---

Normal compiling of:

gcc -c traceme.c

produces:

00000000 <trace_me>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: a1 00 00 00 00 mov 0x0,%eax
4: R_386_32 .bss
8: 83 c0 01 add $0x1,%eax
b: a3 00 00 00 00 mov %eax,0x0
c: R_386_32 .bss
10: 5d pop %ebp
11: c3 ret

With

gcc -c -pg traceme.c

00000000 <trace_me>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: e8 fc ff ff ff call 4 <trace_me+0x4>
4: R_386_PC32 mcount
8: a1 00 00 00 00 mov 0x0,%eax
9: R_386_32 .bss
d: 83 c0 01 add $0x1,%eax
10: a3 00 00 00 00 mov %eax,0x0
11: R_386_32 .bss
15: 5d pop %ebp
16: c3 ret

The only difference between the two is an added "call mcount".
5 byte op to replace with dynamic ftrace.

But now lets look at:

gcc -c -finstrument-functions traceme.c

00000000 <trace_me>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 83 ec 18 sub $0x18,%esp
6: 8b 45 04 mov 0x4(%ebp),%eax
9: 89 44 24 04 mov %eax,0x4(%esp)
d: c7 04 24 00 00 00 00 movl $0x0,(%esp)
10: R_386_32 trace_me
14: e8 fc ff ff ff call 15 <trace_me+0x15>
15: R_386_PC32 __cyg_profile_func_enter
19: a1 00 00 00 00 mov 0x0,%eax
1a: R_386_32 .bss
1e: 83 c0 01 add $0x1,%eax
21: a3 00 00 00 00 mov %eax,0x0
22: R_386_32 .bss
26: 8b 45 04 mov 0x4(%ebp),%eax
29: 89 44 24 04 mov %eax,0x4(%esp)
2d: c7 04 24 00 00 00 00 movl $0x0,(%esp)
30: R_386_32 trace_me
34: e8 fc ff ff ff call 35 <trace_me+0x35>
35: R_386_PC32 __cyg_profile_func_exit
39: c9 leave
3a: c3 ret

Here we see that

mov %eax, 0x4(%esp)
movl $trace_me,(%esp)
call _cyg_profile_func_enter

is added at the beginning and

mov %eax,0x4(%esp)
mov $trace_me,(%esp)
call __cyg_profile_func_exit

is added at the end.

This is not 5 extra bytes but 27 extra bytes for a total of 32 bytes
at every function.

========================================================

The overhead of -finstrument-functions is way too big. Remember, with
using mcount, we can also patch away the callers. It's 5 bytes on both
x86_64 and i386 and 4 bytes on most other archs. To patch away the
-finstrument-functions would be too large, not to mention, you could not
do it with nops. You whould have to have a jump, since patching more than
one op code would lead to race conditions with a task being preempted in
the middle of those nops.

I need to file this information away to give the next person that asks
this question ;-)

> Sorry again I didn't catch this and previous related threads
> earlier. I have some post-processing tools which might be useful here.
> Also, I've found it very handy to have the capability to filter by
minimum
> function duration. Is there any work to do that with the
> current system. If not, maybe I could take a look at that and see if
> I can add something.
>

Actually, I probably should have Cc'd you anyway. I know you did similar
work. You may be able to add that filtering to the dynamic ftrace so that
the tracer will filter them out as well. If you want to work on this,
feel free. There's already been a lot of good ideas (and code) coming from
other developers :-)

-- Steve