Add support for tracing_thresh to the function_graph tracer. This
version of this feature isolates the checks into new entry and
return functions, to avoid adding more conditional code into the
main function_graph paths.
Also, add support for specifying tracing_thresh on the kernel
command line. When used like so: "tracing_thresh=200 ftrace=function_graph"
this can be used to analyse system startup. It is important to disable
tracing soon after boot, in order to avoid losing the trace data.
Note: the elimination of 'notrace' in the definition of '__init'
may be controversial. This can be removed, or made conditional,
if it's a bit too scary, but it worked OK for me. Tracing during
kernel startup still works, just with no visibility of routines
declared __init.
Signed-off-by: Tim Bird <[email protected]>
---
include/linux/init.h | 2 +-
kernel/trace/trace.c | 20 ++++++++++++++++++--
kernel/trace/trace.h | 3 ++-
kernel/trace/trace_functions_graph.c | 32 +++++++++++++++++++++++++++++---
4 files changed, 50 insertions(+), 7 deletions(-)
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -40,7 +40,7 @@
/* These are for everybody (although not all archs will actually
discard it in modules) */
-#define __init __section(.init.text) __cold notrace
+#define __init __section(.init.text) __cold
#define __initdata __section(.init.data)
#define __initconst __section(.init.rodata)
#define __exitdata __section(.exit.data)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -297,6 +297,21 @@ static int __init set_buf_size(char *str
}
__setup("trace_buf_size=", set_buf_size);
+static int __init set_tracing_thresh(char *str)
+{
+ unsigned long threshhold;
+ int ret;
+
+ if (!str)
+ return 0;
+ ret = strict_strtoul(str, 0, &threshhold);
+ if (ret < 0)
+ return 0;
+ tracing_thresh = threshhold * 1000;
+ return 1;
+}
+__setup("tracing_thresh=", set_tracing_thresh);
+
unsigned long nsecs_to_usecs(unsigned long nsecs)
{
return nsecs / 1000;
@@ -502,9 +517,10 @@ static ssize_t trace_seq_to_buffer(struc
static arch_spinlock_t ftrace_max_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+unsigned long __read_mostly tracing_thresh;
+
#ifdef CONFIG_TRACER_MAX_TRACE
unsigned long __read_mostly tracing_max_latency;
-unsigned long __read_mostly tracing_thresh;
/*
* Copy the new maximum trace into the separate maximum-trace
@@ -4181,10 +4197,10 @@ static __init int tracer_init_debugfs(vo
#ifdef CONFIG_TRACER_MAX_TRACE
trace_create_file("tracing_max_latency", 0644, d_tracer,
&tracing_max_latency, &tracing_max_lat_fops);
+#endif
trace_create_file("tracing_thresh", 0644, d_tracer,
&tracing_thresh, &tracing_max_lat_fops);
-#endif
trace_create_file("README", 0444, d_tracer,
NULL, &tracing_readme_fops);
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
+extern unsigned long tracing_thresh;
+
#ifdef CONFIG_TRACER_MAX_TRACE
extern unsigned long tracing_max_latency;
-extern unsigned long tracing_thresh;
void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
void update_max_tr_single(struct trace_array *tr,
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -241,6 +241,14 @@ int trace_graph_entry(struct ftrace_grap
return ret;
}
+int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
+{
+ if (tracing_thresh)
+ return 1;
+ else
+ return trace_graph_entry(trace);
+}
+
static void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned long flags,
@@ -287,13 +295,27 @@ void trace_graph_return(struct ftrace_gr
local_irq_restore(flags);
}
+void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
+{
+ if (tracing_thresh &&
+ (trace->rettime - trace->calltime < tracing_thresh))
+ return;
+ else
+ trace_graph_return(trace);
+}
+
static int graph_trace_init(struct trace_array *tr)
{
int ret;
graph_array = tr;
- ret = register_ftrace_graph(&trace_graph_return,
- &trace_graph_entry);
+ if (tracing_thresh)
+ ret = register_ftrace_graph(&trace_graph_thresh_return,
+ &trace_graph_thresh_entry);
+ else
+ ret = register_ftrace_graph(&trace_graph_return,
+ &trace_graph_entry);
+
if (ret)
return ret;
tracing_start_cmdline_record();
@@ -891,7 +913,11 @@ print_graph_return(struct ftrace_graph_r
return TRACE_TYPE_PARTIAL_LINE;
}
- ret = trace_seq_printf(s, "}\n");
+ if (tracing_thresh) {
+ ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func);
+ } else {
+ ret = trace_seq_printf(s, "}\n");
+ }
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
On Thu, Feb 25, 2010 at 03:36:43PM -0800, Tim Bird wrote:
> Add support for tracing_thresh to the function_graph tracer. This
> version of this feature isolates the checks into new entry and
> return functions, to avoid adding more conditional code into the
> main function_graph paths.
>
> Also, add support for specifying tracing_thresh on the kernel
> command line. When used like so: "tracing_thresh=200 ftrace=function_graph"
> this can be used to analyse system startup. It is important to disable
> tracing soon after boot, in order to avoid losing the trace data.
>
> Note: the elimination of 'notrace' in the definition of '__init'
> may be controversial. This can be removed, or made conditional,
> if it's a bit too scary, but it worked OK for me. Tracing during
> kernel startup still works, just with no visibility of routines
> declared __init.
__init functions are notrace otherwise ftrace would hot patch
callsites of function that have disappeared.
That said, tracing __init functions is indeed interesting.
Perhaps we can handle that by removing the __init functions
from ftrace records each time we release init pages.
> +static int __init set_tracing_thresh(char *str)
> +{
> + unsigned long threshhold;
> + int ret;
> +
> + if (!str)
> + return 0;
> + ret = strict_strtoul(str, 0, &threshhold);
> + if (ret < 0)
> + return 0;
> + tracing_thresh = threshhold * 1000;
> + return 1;
> +}
> +__setup("tracing_thresh=", set_tracing_thresh);
Looks like setting this, while the function graph tracer (normal
mode) is running, will have no effect. That said it's perfectly
fine as it would be pointless to change this value in the middle
of the tracing.
> +
> unsigned long nsecs_to_usecs(unsigned long nsecs)
> {
> return nsecs / 1000;
> @@ -502,9 +517,10 @@ static ssize_t trace_seq_to_buffer(struc
> static arch_spinlock_t ftrace_max_lock =
> (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
>
> +unsigned long __read_mostly tracing_thresh;
> +
> #ifdef CONFIG_TRACER_MAX_TRACE
> unsigned long __read_mostly tracing_max_latency;
> -unsigned long __read_mostly tracing_thresh;
>
> /*
> * Copy the new maximum trace into the separate maximum-trace
> @@ -4181,10 +4197,10 @@ static __init int tracer_init_debugfs(vo
> #ifdef CONFIG_TRACER_MAX_TRACE
> trace_create_file("tracing_max_latency", 0644, d_tracer,
> &tracing_max_latency, &tracing_max_lat_fops);
> +#endif
>
> trace_create_file("tracing_thresh", 0644, d_tracer,
> &tracing_thresh, &tracing_max_lat_fops);
> -#endif
>
> trace_create_file("README", 0444, d_tracer,
> NULL, &tracing_readme_fops);
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *
>
> extern unsigned long nsecs_to_usecs(unsigned long nsecs);
>
> +extern unsigned long tracing_thresh;
> +
> #ifdef CONFIG_TRACER_MAX_TRACE
> extern unsigned long tracing_max_latency;
> -extern unsigned long tracing_thresh;
>
> void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
> void update_max_tr_single(struct trace_array *tr,
> --- a/kernel/trace/trace_functions_graph.c
> +++ b/kernel/trace/trace_functions_graph.c
> @@ -241,6 +241,14 @@ int trace_graph_entry(struct ftrace_grap
> return ret;
> }
>
> +int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
> +{
> + if (tracing_thresh)
> + return 1;
> + else
> + return trace_graph_entry(trace);
> +}
> +
> static void __trace_graph_return(struct trace_array *tr,
> struct ftrace_graph_ret *trace,
> unsigned long flags,
> @@ -287,13 +295,27 @@ void trace_graph_return(struct ftrace_gr
> local_irq_restore(flags);
> }
>
> +void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
> +{
> + if (tracing_thresh &&
> + (trace->rettime - trace->calltime < tracing_thresh))
> + return;
> + else
> + trace_graph_return(trace);
> +}
> +
> static int graph_trace_init(struct trace_array *tr)
> {
> int ret;
>
> graph_array = tr;
> - ret = register_ftrace_graph(&trace_graph_return,
> - &trace_graph_entry);
> + if (tracing_thresh)
> + ret = register_ftrace_graph(&trace_graph_thresh_return,
> + &trace_graph_thresh_entry);
> + else
> + ret = register_ftrace_graph(&trace_graph_return,
> + &trace_graph_entry);
> +
> if (ret)
> return ret;
> tracing_start_cmdline_record();
> @@ -891,7 +913,11 @@ print_graph_return(struct ftrace_graph_r
> return TRACE_TYPE_PARTIAL_LINE;
> }
>
> - ret = trace_seq_printf(s, "}\n");
> + if (tracing_thresh) {
> + ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func);
> + } else {
> + ret = trace_seq_printf(s, "}\n");
> + }
Ok, looks good to me.
Thanks.
> if (!ret)
> return TRACE_TYPE_PARTIAL_LINE;
>
>
>
Tim, FYI, It's best to email my [email protected] account. Or CC both
if you want. But I check my redhat mail at most once a day, and maybe
not event that.
On Thu, 2010-02-25 at 15:36 -0800, Tim Bird wrote:
> Add support for tracing_thresh to the function_graph tracer. This
> version of this feature isolates the checks into new entry and
> return functions, to avoid adding more conditional code into the
> main function_graph paths.
>
> Also, add support for specifying tracing_thresh on the kernel
> command line. When used like so: "tracing_thresh=200 ftrace=function_graph"
> this can be used to analyse system startup. It is important to disable
> tracing soon after boot, in order to avoid losing the trace data.
>
> Note: the elimination of 'notrace' in the definition of '__init'
> may be controversial. This can be removed, or made conditional,
> if it's a bit too scary, but it worked OK for me. Tracing during
> kernel startup still works, just with no visibility of routines
> declared __init.
It basically is a nop if you have DYNAMIC_FTRACE defined. Remember that
little bug that caused the killing of e1000e NICs? One of the
conditions that made that happen was that there's no way to know when a
__init function will disappear.
But notrace isn't there to protect against that. The recordmcount.pl
code wont even look at a function declared with __init. This means that
the __init functions will always call mcount if you remove this notrace.
That's not really a big deal because mcount is defined as:
ENTRY(mcount)
retq
END(mcount)
So you just made all those init functions call this little mcount
routine. I bet you wont notice the impact.
At boot up, all mcount callers that recordmcount reported are recorded
and converted to nops. When we enable function tracing, we don't make
them call mcount again, but instead we make them call a little heavier
function "ftrace_caller".
Now, your patch will have an impact if you do not enable DYNAMIC_FTRACE,
because then all callers to mcount can be traced. But then you have a
13% overhead in the system.
>
> Signed-off-by: Tim Bird <[email protected]>
> ---
> include/linux/init.h | 2 +-
> kernel/trace/trace.c | 20 ++++++++++++++++++--
> kernel/trace/trace.h | 3 ++-
> kernel/trace/trace_functions_graph.c | 32 +++++++++++++++++++++++++++++---
> 4 files changed, 50 insertions(+), 7 deletions(-)
>
> --- a/include/linux/init.h
> +++ b/include/linux/init.h
> @@ -40,7 +40,7 @@
>
> /* These are for everybody (although not all archs will actually
> discard it in modules) */
> -#define __init __section(.init.text) __cold notrace
> +#define __init __section(.init.text) __cold
This isn't needed as I explained above.
> #define __initdata __section(.init.data)
> #define __initconst __section(.init.rodata)
> #define __exitdata __section(.exit.data)
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -297,6 +297,21 @@ static int __init set_buf_size(char *str
> }
> __setup("trace_buf_size=", set_buf_size);
>
> +static int __init set_tracing_thresh(char *str)
> +{
> + unsigned long threshhold;
> + int ret;
> +
> + if (!str)
> + return 0;
> + ret = strict_strtoul(str, 0, &threshhold);
> + if (ret < 0)
> + return 0;
> + tracing_thresh = threshhold * 1000;
> + return 1;
> +}
> +__setup("tracing_thresh=", set_tracing_thresh);
> +
> unsigned long nsecs_to_usecs(unsigned long nsecs)
> {
> return nsecs / 1000;
> @@ -502,9 +517,10 @@ static ssize_t trace_seq_to_buffer(struc
> static arch_spinlock_t ftrace_max_lock =
> (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
>
> +unsigned long __read_mostly tracing_thresh;
> +
> #ifdef CONFIG_TRACER_MAX_TRACE
> unsigned long __read_mostly tracing_max_latency;
> -unsigned long __read_mostly tracing_thresh;
>
> /*
> * Copy the new maximum trace into the separate maximum-trace
> @@ -4181,10 +4197,10 @@ static __init int tracer_init_debugfs(vo
> #ifdef CONFIG_TRACER_MAX_TRACE
> trace_create_file("tracing_max_latency", 0644, d_tracer,
> &tracing_max_latency, &tracing_max_lat_fops);
> +#endif
>
> trace_create_file("tracing_thresh", 0644, d_tracer,
> &tracing_thresh, &tracing_max_lat_fops);
> -#endif
>
> trace_create_file("README", 0444, d_tracer,
> NULL, &tracing_readme_fops);
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *
>
> extern unsigned long nsecs_to_usecs(unsigned long nsecs);
>
> +extern unsigned long tracing_thresh;
> +
> #ifdef CONFIG_TRACER_MAX_TRACE
> extern unsigned long tracing_max_latency;
> -extern unsigned long tracing_thresh;
>
> void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
> void update_max_tr_single(struct trace_array *tr,
> --- a/kernel/trace/trace_functions_graph.c
> +++ b/kernel/trace/trace_functions_graph.c
> @@ -241,6 +241,14 @@ int trace_graph_entry(struct ftrace_grap
> return ret;
> }
>
> +int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
> +{
> + if (tracing_thresh)
> + return 1;
> + else
> + return trace_graph_entry(trace);
> +}
> +
> static void __trace_graph_return(struct trace_array *tr,
> struct ftrace_graph_ret *trace,
> unsigned long flags,
> @@ -287,13 +295,27 @@ void trace_graph_return(struct ftrace_gr
> local_irq_restore(flags);
> }
>
> +void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
> +{
> + if (tracing_thresh &&
> + (trace->rettime - trace->calltime < tracing_thresh))
> + return;
> + else
> + trace_graph_return(trace);
> +}
> +
> static int graph_trace_init(struct trace_array *tr)
> {
> int ret;
>
> graph_array = tr;
> - ret = register_ftrace_graph(&trace_graph_return,
> - &trace_graph_entry);
> + if (tracing_thresh)
> + ret = register_ftrace_graph(&trace_graph_thresh_return,
> + &trace_graph_thresh_entry);
> + else
> + ret = register_ftrace_graph(&trace_graph_return,
> + &trace_graph_entry);
> +
> if (ret)
> return ret;
> tracing_start_cmdline_record();
> @@ -891,7 +913,11 @@ print_graph_return(struct ftrace_graph_r
> return TRACE_TYPE_PARTIAL_LINE;
> }
>
> - ret = trace_seq_printf(s, "}\n");
> + if (tracing_thresh) {
> + ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func);
Ah, this isn't what I wanted. I wanted something a bit more complex ;-)
I was thinking of keeping a small stack per cpu that keeps track of
previous entries. If a exit is called that does not have a matching
entry (can easily check the depth parameter) then we should print out
the function name. This way, even without the tracing_thresh tracer, we
can see what dangling '}'s are. This happens now with the current
tracer.
I can add this tomorrow. Then I can take this patch and cut out the
stuff not needed.
-- Steve
> + } else {
> + ret = trace_seq_printf(s, "}\n");
> + }
> if (!ret)
> return TRACE_TYPE_PARTIAL_LINE;
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
On 02/25/2010 06:46 PM, Frederic Weisbecker wrote:
> On Thu, Feb 25, 2010 at 03:36:43PM -0800, Tim Bird wrote:
>> Add support for tracing_thresh to the function_graph tracer. This
>> version of this feature isolates the checks into new entry and
>> return functions, to avoid adding more conditional code into the
>> main function_graph paths.
>>
>> Also, add support for specifying tracing_thresh on the kernel
>> command line. When used like so: "tracing_thresh=200 ftrace=function_graph"
>> this can be used to analyse system startup. It is important to disable
>> tracing soon after boot, in order to avoid losing the trace data.
>>
>> Note: the elimination of 'notrace' in the definition of '__init'
>> may be controversial. This can be removed, or made conditional,
>> if it's a bit too scary, but it worked OK for me. Tracing during
>> kernel startup still works, just with no visibility of routines
>> declared __init.
>
>
> __init functions are notrace otherwise ftrace would hot patch
> callsites of function that have disappeared.
>
> That said, tracing __init functions is indeed interesting.
> Perhaps we can handle that by removing the __init functions
> from ftrace records each time we release init pages.
Thanks. Given what Steven said about __init routines,
I'll have to think some more about this. I'm OK with
just removing that piece of the patch for now.
>
>> +static int __init set_tracing_thresh(char *str)
>> +{
>> + unsigned long threshhold;
>> + int ret;
>> +
>> + if (!str)
>> + return 0;
>> + ret = strict_strtoul(str, 0, &threshhold);
>> + if (ret < 0)
>> + return 0;
>> + tracing_thresh = threshhold * 1000;
>> + return 1;
>> +}
>> +__setup("tracing_thresh=", set_tracing_thresh);
>
> Looks like setting this, while the function graph tracer (normal
> mode) is running, will have no effect. That said it's perfectly
> fine as it would be pointless to change this value in the middle
> of the tracing.
>
The command line is parsed before the tracer is activated, so
tracing_thresh is set when trace_graph_init() is called, which
results in the desired behaviour (that is, if you specify the
tracing_thresh on the command line, you get the duration
filtering on bootup).
...
> Ok, looks good to me.
>
> Thanks.
Thanks for taking a look at it.
-- Tim
=============================
Tim Bird
Architecture Group Chair, CE Linux Forum
Senior Staff Engineer, Sony Corporation of America
=============================
On 02/25/2010 08:37 PM, Steven Rostedt wrote:
> Tim, FYI, It's best to email my [email protected] account. Or CC both
> if you want. But I check my redhat mail at most once a day, and maybe
> not event that.
Will do. Sorry about that.
>
> On Thu, 2010-02-25 at 15:36 -0800, Tim Bird wrote:
>> Add support for tracing_thresh to the function_graph tracer. This
>> version of this feature isolates the checks into new entry and
>> return functions, to avoid adding more conditional code into the
>> main function_graph paths.
>>
>> Also, add support for specifying tracing_thresh on the kernel
>> command line. When used like so: "tracing_thresh=200 ftrace=function_graph"
>> this can be used to analyse system startup. It is important to disable
>> tracing soon after boot, in order to avoid losing the trace data.
>>
>> Note: the elimination of 'notrace' in the definition of '__init'
>> may be controversial. This can be removed, or made conditional,
>> if it's a bit too scary, but it worked OK for me. Tracing during
>> kernel startup still works, just with no visibility of routines
>> declared __init.
>
> It basically is a nop if you have DYNAMIC_FTRACE defined. Remember that
> little bug that caused the killing of e1000e NICs? One of the
> conditions that made that happen was that there's no way to know when a
> __init function will disappear.
I originally found this while testing on ARM, which doesn't AFAIK have
DYNAMIC_FTRACE yet. Without the change, I couldn't see a lot of the
initialization routines in the trace (as the code bounced between
__init routines and 'normal' routines).
>
> But notrace isn't there to protect against that. The recordmcount.pl
> code wont even look at a function declared with __init. This means that
> the __init functions will always call mcount if you remove this notrace.
> That's not really a big deal because mcount is defined as:
>
> ENTRY(mcount)
> retq
> END(mcount)
>
>
> So you just made all those init functions call this little mcount
> routine. I bet you wont notice the impact.
>
> At boot up, all mcount callers that recordmcount reported are recorded
> and converted to nops. When we enable function tracing, we don't make
> them call mcount again, but instead we make them call a little heavier
> function "ftrace_caller".
OK - interesting.
> Now, your patch will have an impact if you do not enable DYNAMIC_FTRACE,
> because then all callers to mcount can be traced. But then you have a
> 13% overhead in the system.
Yes. I have primarily been using this for bootup time tracing,
on ARM (without DYNAMIC_FTRACE), and only during development.
I wouldn't leave it on in a production system.
I'm fine with yanking that out of this patch, since I have other
stuff I still have to patch to use it on ARM.
>>
>> Signed-off-by: Tim Bird <[email protected]>
>> ---
>> include/linux/init.h | 2 +-
>> kernel/trace/trace.c | 20 ++++++++++++++++++--
>> kernel/trace/trace.h | 3 ++-
>> kernel/trace/trace_functions_graph.c | 32 +++++++++++++++++++++++++++++---
>> 4 files changed, 50 insertions(+), 7 deletions(-)
>>
>> --- a/include/linux/init.h
>> +++ b/include/linux/init.h
>> @@ -40,7 +40,7 @@
>>
>> /* These are for everybody (although not all archs will actually
>> discard it in modules) */
>> -#define __init __section(.init.text) __cold notrace
>> +#define __init __section(.init.text) __cold
>
> This isn't needed as I explained above.
It's probably best to take it out. It's easy to patch back
in when I do my non-X86 testing.
>
>> #define __initdata __section(.init.data)
>> #define __initconst __section(.init.rodata)
>> #define __exitdata __section(.exit.data)
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -297,6 +297,21 @@ static int __init set_buf_size(char *str
>> }
>> __setup("trace_buf_size=", set_buf_size);
>>
>> +static int __init set_tracing_thresh(char *str)
>> +{
>> + unsigned long threshhold;
>> + int ret;
>> +
>> + if (!str)
>> + return 0;
>> + ret = strict_strtoul(str, 0, &threshhold);
>> + if (ret < 0)
>> + return 0;
>> + tracing_thresh = threshhold * 1000;
>> + return 1;
>> +}
>> +__setup("tracing_thresh=", set_tracing_thresh);
>> +
>> unsigned long nsecs_to_usecs(unsigned long nsecs)
>> {
>> return nsecs / 1000;
>> @@ -502,9 +517,10 @@ static ssize_t trace_seq_to_buffer(struc
>> static arch_spinlock_t ftrace_max_lock =
>> (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
>>
>> +unsigned long __read_mostly tracing_thresh;
>> +
>> #ifdef CONFIG_TRACER_MAX_TRACE
>> unsigned long __read_mostly tracing_max_latency;
>> -unsigned long __read_mostly tracing_thresh;
>>
>> /*
>> * Copy the new maximum trace into the separate maximum-trace
>> @@ -4181,10 +4197,10 @@ static __init int tracer_init_debugfs(vo
>> #ifdef CONFIG_TRACER_MAX_TRACE
>> trace_create_file("tracing_max_latency", 0644, d_tracer,
>> &tracing_max_latency, &tracing_max_lat_fops);
>> +#endif
>>
>> trace_create_file("tracing_thresh", 0644, d_tracer,
>> &tracing_thresh, &tracing_max_lat_fops);
>> -#endif
>>
>> trace_create_file("README", 0444, d_tracer,
>> NULL, &tracing_readme_fops);
>> --- a/kernel/trace/trace.h
>> +++ b/kernel/trace/trace.h
>> @@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *
>>
>> extern unsigned long nsecs_to_usecs(unsigned long nsecs);
>>
>> +extern unsigned long tracing_thresh;
>> +
>> #ifdef CONFIG_TRACER_MAX_TRACE
>> extern unsigned long tracing_max_latency;
>> -extern unsigned long tracing_thresh;
>>
>> void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
>> void update_max_tr_single(struct trace_array *tr,
>> --- a/kernel/trace/trace_functions_graph.c
>> +++ b/kernel/trace/trace_functions_graph.c
>> @@ -241,6 +241,14 @@ int trace_graph_entry(struct ftrace_grap
>> return ret;
>> }
>>
>> +int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
>> +{
>> + if (tracing_thresh)
>> + return 1;
>> + else
>> + return trace_graph_entry(trace);
>> +}
>> +
>> static void __trace_graph_return(struct trace_array *tr,
>> struct ftrace_graph_ret *trace,
>> unsigned long flags,
>> @@ -287,13 +295,27 @@ void trace_graph_return(struct ftrace_gr
>> local_irq_restore(flags);
>> }
>>
>> +void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
>> +{
>> + if (tracing_thresh &&
>> + (trace->rettime - trace->calltime < tracing_thresh))
>> + return;
>> + else
>> + trace_graph_return(trace);
>> +}
>> +
>> static int graph_trace_init(struct trace_array *tr)
>> {
>> int ret;
>>
>> graph_array = tr;
>> - ret = register_ftrace_graph(&trace_graph_return,
>> - &trace_graph_entry);
>> + if (tracing_thresh)
>> + ret = register_ftrace_graph(&trace_graph_thresh_return,
>> + &trace_graph_thresh_entry);
>> + else
>> + ret = register_ftrace_graph(&trace_graph_return,
>> + &trace_graph_entry);
>> +
>> if (ret)
>> return ret;
>> tracing_start_cmdline_record();
>> @@ -891,7 +913,11 @@ print_graph_return(struct ftrace_graph_r
>> return TRACE_TYPE_PARTIAL_LINE;
>> }
>>
>> - ret = trace_seq_printf(s, "}\n");
>> + if (tracing_thresh) {
>> + ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func);
>
> Ah, this isn't what I wanted. I wanted something a bit more complex ;-)
:-)
This is simpler than the EXIT_FUNC option I had earlier, but still
not automatic as you describe below.
>
> I was thinking of keeping a small stack per cpu that keeps track of
> previous entries. If a exit is called that does not have a matching
> entry (can easily check the depth parameter) then we should print out
> the function name. This way, even without the tracing_thresh tracer, we
> can see what dangling '}'s are. This happens now with the current
> tracer.
This would be a nice addition. I was a little worried about
the checking tracing_thresh at output time (as opposed to trace time).
A user could theoretically change the tracing_thresh after capture
but before output, and get only the closing braces.
In my humble opinion, even when you've seen the opening braces,
it can be useful to show the function name on the exit line.
For example, right now you can't use grep on a big trace
(just a regular function graph trace, not the one using
tracing_thresh) to find the pattern of function durations
for specific functions.
> I can add this tomorrow. Then I can take this patch and cut out the
> stuff not needed.
Thanks.
-- Tim
=============================
Tim Bird
Architecture Group Chair, CE Linux Forum
Senior Staff Engineer, Sony Corporation of America
=============================
On Fri, 2010-02-26 at 10:52 -0800, Tim Bird wrote:
> > Now, your patch will have an impact if you do not enable DYNAMIC_FTRACE,
> > because then all callers to mcount can be traced. But then you have a
> > 13% overhead in the system.
>
> Yes. I have primarily been using this for bootup time tracing,
> on ARM (without DYNAMIC_FTRACE), and only during development.
> I wouldn't leave it on in a production system.
>
> I'm fine with yanking that out of this patch, since I have other
> stuff I still have to patch to use it on ARM.
Still this change is off topic for this patch. Anyway, what you could
add is:
+#ifdef CONFIG_DYNAMIC_FTRACE
+# define init_notrace
+#else
+# define init_notrace notrace
+#endif
-#define __init __section(.init.text) __cold notrace
+#define __init __section(.init.text) __cold init_notrace
Would also need a comment that dynamic ftrace, for safety reasons does
not trace __init sections with or without notrace. Just make it always
notrace. But static ftrace can safely trace init sections, so keep them
on when dynamic ftrace is disabled.
> >> - ret = trace_seq_printf(s, "}\n");
> >> + if (tracing_thresh) {
> >> + ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func);
> >
> > Ah, this isn't what I wanted. I wanted something a bit more complex ;-)
>
> :-)
>
> This is simpler than the EXIT_FUNC option I had earlier, but still
> not automatic as you describe below.
> >
> > I was thinking of keeping a small stack per cpu that keeps track of
> > previous entries. If a exit is called that does not have a matching
> > entry (can easily check the depth parameter) then we should print out
> > the function name. This way, even without the tracing_thresh tracer, we
> > can see what dangling '}'s are. This happens now with the current
> > tracer.
>
> This would be a nice addition. I was a little worried about
> the checking tracing_thresh at output time (as opposed to trace time).
> A user could theoretically change the tracing_thresh after capture
> but before output, and get only the closing braces.
>
> In my humble opinion, even when you've seen the opening braces,
> it can be useful to show the function name on the exit line.
> For example, right now you can't use grep on a big trace
> (just a regular function graph trace, not the one using
> tracing_thresh) to find the pattern of function durations
> for specific functions.
Then we should add an option to always show it as well. I've added
showing it before, and it does become a distraction in scanning it with
the human eye. But a grep is a reason to have the option.
>
> > I can add this tomorrow. Then I can take this patch and cut out the
> > stuff not needed.
> Thanks.
Currently working on it.
-- Steve
On Fri, Feb 26, 2010 at 10:27:29AM -0800, Tim Bird wrote:
> On 02/25/2010 06:46 PM, Frederic Weisbecker wrote:
> >> +static int __init set_tracing_thresh(char *str)
> >> +{
> >> + unsigned long threshhold;
> >> + int ret;
> >> +
> >> + if (!str)
> >> + return 0;
> >> + ret = strict_strtoul(str, 0, &threshhold);
> >> + if (ret < 0)
> >> + return 0;
> >> + tracing_thresh = threshhold * 1000;
> >> + return 1;
> >> +}
> >> +__setup("tracing_thresh=", set_tracing_thresh);
> >
> > Looks like setting this, while the function graph tracer (normal
> > mode) is running, will have no effect. That said it's perfectly
> > fine as it would be pointless to change this value in the middle
> > of the tracing.
> >
> The command line is parsed before the tracer is activated, so
> tracing_thresh is set when trace_graph_init() is called, which
> results in the desired behaviour (that is, if you specify the
> tracing_thresh on the command line, you get the duration
> filtering on bootup).
Oh right, I made a confusion with the tracing_thresh file
write callback.