From: Arnaldo Carvalho de Melo <[email protected]>
Hi guys,
Jiri asked me to post this series, so here it is, please take a
look, I'd love to harvest your Reviewed-by/Tested-by/Acked-by,
- Arnaldo
Arnaldo Carvalho de Melo (5):
perf callchain: Fix attr.sample_max_stack setting
perf unwind: Do not look at globals
perf trace: Setup DWARF callchains for non-syscall events when --max-stack is used
perf trace: Allow overriding global --max-stack per event
perf callchains: Ask for PERF_RECORD_MMAP for data mmaps for DWARF unwinding
tools/perf/builtin-trace.c | 19 ++++++++++++++++---
tools/perf/util/evsel.c | 24 +++++++++++++++++-------
tools/perf/util/unwind-libunwind-local.c | 9 ---------
3 files changed, 33 insertions(+), 19 deletions(-)
--
2.14.3
From: Arnaldo Carvalho de Melo <[email protected]>
When setting the "dwarf" unwinder for a specific event and not
specifying the max-stack, the attr.sample_max_stack ended up using an
uninitialized callchain_param.max_stack, fix it by using designated
initializers for that callchain_param variable, zeroing all non
explicitely initialized struct members.
Here is what happened:
# perf trace -vv --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
callchain: type DWARF
callchain: stack dump size 8192
perf_event_attr:
type 2
size 112
config 0x730
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|ADDR|CALLCHAIN|CPU|PERIOD|RAW|REGS_USER|STACK_USER|DATA_SRC
exclude_callchain_user 1
{ wakeup_events, wakeup_watermark } 1
sample_regs_user 0xff0fff
sample_stack_user 8192
sample_max_stack 50656
sys_perf_event_open failed, error -75
Value too large for defined data type
# perf trace -vv --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
callchain: type DWARF
callchain: stack dump size 8192
perf_event_attr:
type 2
size 112
config 0x730
sample_type IP|TID|TIME|ADDR|CALLCHAIN|CPU|PERIOD|RAW|REGS_USER|STACK_USER|DATA_SRC
exclude_callchain_user 1
sample_regs_user 0xff0fff
sample_stack_user 8192
sample_max_stack 30448
sys_perf_event_open failed, error -75
Value too large for defined data type
#
Now the attr.sample_max_stack is set to zero and the above works as
expected:
# perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.072 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.072/0.072/0.072/0.000 ms
0.000 probe_libc:inet_pton:(7feb7a998350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffaa39b6108f3f] (/usr/bin/ping)
#
Cc: Adrian Hunter <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Hendrick Brueckner <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Thomas Richter <[email protected]>
Cc: Wang Nan <[email protected]>
Link: https://lkml.kernel.org/n/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/evsel.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index efa2e629a669..8f971a2301d1 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -731,14 +731,14 @@ static void apply_config_terms(struct perf_evsel *evsel,
struct perf_evsel_config_term *term;
struct list_head *config_terms = &evsel->config_terms;
struct perf_event_attr *attr = &evsel->attr;
- struct callchain_param param;
+ /* callgraph default */
+ struct callchain_param param = {
+ .record_mode = callchain_param.record_mode,
+ };
u32 dump_size = 0;
int max_stack = 0;
const char *callgraph_buf = NULL;
- /* callgraph default */
- param.record_mode = callchain_param.record_mode;
-
list_for_each_entry(term, config_terms, list) {
switch (term->type) {
case PERF_EVSEL__CONFIG_TERM_PERIOD:
--
2.14.3
From: Arnaldo Carvalho de Melo <[email protected]>
When we use a global DWARF setting as in:
perf record --call-graph dwarf
According to 5c0cf22477ea ("perf record: Store data mmaps for dwarf unwind") we need
to set up some extra perf_event_attr bits.
But when we instead do a per event dwarf setting:
perf record -e cycles/call-graph=dwarf/
This was not being done, make them equivalent.
This didn't produce any output changes in my tests while fixing up loose
ends in the per-event settings, I found it just by comparing the
perf_event_attr fields trying to find an explanation for those problems.
Cc: Adrian Hunter <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Hendrick Brueckner <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Noel Grandin <[email protected]>
Cc: Thomas Richter <[email protected]>
Cc: Wang Nan <[email protected]>
Link: https://lkml.kernel.org/n/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/evsel.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8f971a2301d1..85eb84dfdf91 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -726,7 +726,7 @@ perf_evsel__reset_callgraph(struct perf_evsel *evsel,
}
static void apply_config_terms(struct perf_evsel *evsel,
- struct record_opts *opts)
+ struct record_opts *opts, bool track)
{
struct perf_evsel_config_term *term;
struct list_head *config_terms = &evsel->config_terms;
@@ -797,6 +797,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
/* User explicitly set per-event callgraph, clear the old setting and reset. */
if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
+ bool sample_address = false;
+
if (max_stack) {
param.max_stack = max_stack;
if (callgraph_buf == NULL)
@@ -816,6 +818,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
evsel->name);
return;
}
+ if (param.record_mode == CALLCHAIN_DWARF)
+ sample_address = true;
}
}
if (dump_size > 0) {
@@ -828,8 +832,14 @@ static void apply_config_terms(struct perf_evsel *evsel,
perf_evsel__reset_callgraph(evsel, &callchain_param);
/* set perf-event callgraph */
- if (param.enabled)
+ if (param.enabled) {
+ if (sample_address) {
+ perf_evsel__set_sample_bit(evsel, ADDR);
+ perf_evsel__set_sample_bit(evsel, DATA_SRC);
+ evsel->attr.mmap_data = track;
+ }
perf_evsel__config_callchain(evsel, opts, ¶m);
+ }
}
}
@@ -1060,7 +1070,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
* Apply event specific term settings,
* it overloads any global configuration.
*/
- apply_config_terms(evsel, opts);
+ apply_config_terms(evsel, opts, track);
evsel->ignore_missing_thread = opts->ignore_missing_thread;
}
--
2.14.3
From: Arnaldo Carvalho de Melo <[email protected]>
The per-event max-stack setting wasn't overriding the global --max-stack
setting:
# perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf,max-stack=2/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.072 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.072/0.072/0.072/0.000 ms
0.000 probe_libc:inet_pton:(7feb7a998350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffaa39b6108f3f] (/usr/bin/ping)
#
Fix it:
# perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf,max-stack=2/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.073 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.073/0.073/0.073/0.000 ms
0.000 probe_libc:inet_pton:(7f1083221350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
#
Cc: Adrian Hunter <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Hendrick Brueckner <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Thomas Richter <[email protected]>
Cc: Wang Nan <[email protected]>
Link: https://lkml.kernel.org/n/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-trace.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ee85c29dbf70..531d43bf57e1 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1644,7 +1644,7 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
struct addr_location al;
if (machine__resolve(trace->host, &al, sample) < 0 ||
- thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
+ thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack))
return -1;
return 0;
@@ -2423,6 +2423,18 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
evlist->threads->nr > 1 ||
perf_evlist__first(evlist)->attr.inherit;
+
+ /*
+ * Now that we already used evsel->attr to ask the kernel to setup the
+ * events, lets reuse evsel->attr.sample_max_stack as the limit in
+ * trace__resolve_callchain(), allowing per-event max-stack settings
+ * to override an explicitely set --max-stack global setting.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
+ evsel->attr.sample_max_stack == 0)
+ evsel->attr.sample_max_stack = trace->max_stack;
+ }
again:
before = trace->nr_events;
--
2.14.3
From: Arnaldo Carvalho de Melo <[email protected]>
If we use:
perf trace --max-stack=4
then the syscall events will use DWARF callchains, when available
(libunwind enabled in the build) and the printing will stop at 4 levels.
When we introduced support for tracepoint events this ended up not
applying for them, fix it.
Before:
# perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.058 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.058/0.058/0.058/0.000 ms
0.000 probe_libc:inet_pton:(7fc6c2a16350))
#
After:
# perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.087 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.087/0.087/0.087/0.000 ms
0.000 probe_libc:inet_pton:(7fbf9a041350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffaa947cb67f3f] (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[0xffffaa947cb68379] (/usr/bin/ping)
#
Reported-by: Thomas Richter <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Hendrick Brueckner <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Wang Nan <[email protected]>
Link: https://lkml.kernel.org/n/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-trace.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0362974854e9..ee85c29dbf70 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2350,7 +2350,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
goto out_delete_evlist;
}
- perf_evlist__config(evlist, &trace->opts, NULL);
+ perf_evlist__config(evlist, &trace->opts, &callchain_param);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
@@ -3065,8 +3065,9 @@ int cmd_trace(int argc, const char **argv)
}
#ifdef HAVE_DWARF_UNWIND_SUPPORT
- if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
+ if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
+ }
#endif
if (callchain_param.enabled) {
--
2.14.3
From: Arnaldo Carvalho de Melo <[email protected]>
When setting up DWARF callchains on specific events, without using
'record' or 'trace' --call-graph, but instead doing it like:
perf trace -e cycles/call-graph=dwarf/
The unwind__prepare_access() call in thread__insert_map() when we
process PERF_RECORD_MMAP(2) metadata events were not being performed,
precluding us from using per-event DWARF callchains, handling them just
when we asked for all events to be DWARF, using "--call-graph dwarf".
We do it in the PERF_RECORD_MMAP because we have to look at one of the
executable maps to figure out the executable type (64-bit, 32-bit) of
the DSO laid out in that mmap. Also to look at the architecture where
the perf.data file was recorded.
All this probably should be deferred to when we process a sample for
some thread that has callchains, so that we do this processing only for
the threads with samples, not for all of them.
For now, fix using DWARF on specific events.
Before:
# perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.048 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.048/0.048/0.048/0.000 ms
0.000 probe_libc:inet_pton:(7fe9597bb350))
Problem processing probe_libc:inet_pton callchain, skipping...
#
After:
# perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.060 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.060/0.060/0.060/0.000 ms
0.000 probe_libc:inet_pton:(7fd4aa930350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffaa804e51af3f] (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[0xffffaa804e51b379] (/usr/bin/ping)
#
# perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.057 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.057/0.057/0.057/0.000 ms
0.000 probe_libc:inet_pton:(7f9363b9e350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffa9e8a14e0f3f] (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[0xffffa9e8a14e1379] (/usr/bin/ping)
#
# perf trace --call-graph=fp --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.077 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.077/0.077/0.077/0.000 ms
0.000 probe_libc:inet_pton:(7f4947e1c350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffaa716d88ef3f] (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[0xffffaa716d88f379] (/usr/bin/ping)
#
# perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=fp/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.078 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.078/0.078/0.078/0.000 ms
0.000 probe_libc:inet_pton:(7fa157696350))
__GI___inet_pton (/usr/lib64/libc-2.26.so)
getaddrinfo (/usr/lib64/libc-2.26.so)
[0xffffa9ba39c74f40] (/usr/bin/ping)
#
Cc: Adrian Hunter <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Hendrick Brueckner <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Thomas Richter <[email protected]>
Cc: Wang Nan <[email protected]>
Link: https://lkml.kernel.org/n/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/unwind-libunwind-local.c | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 7a42f703e858..02dc5a9d8f72 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -631,9 +631,6 @@ static unw_accessors_t accessors = {
static int _unwind__prepare_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return 0;
-
thread->addr_space = unw_create_addr_space(&accessors, 0);
if (!thread->addr_space) {
pr_err("unwind: Can't create unwind address space.\n");
@@ -646,17 +643,11 @@ static int _unwind__prepare_access(struct thread *thread)
static void _unwind__flush_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return;
-
unw_flush_cache(thread->addr_space, 0, 0);
}
static void _unwind__finish_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return;
-
unw_destroy_addr_space(thread->addr_space);
}
--
2.14.3
On Tue, Jan 16, 2018 at 11:24:35AM -0300, Arnaldo Carvalho de Melo wrote:
SNIP
> Cc: Adrian Hunter <[email protected]>
> Cc: David Ahern <[email protected]>
> Cc: Hendrick Brueckner <[email protected]>
> Cc: Jiri Olsa <[email protected]>
> Cc: Namhyung Kim <[email protected]>
> Cc: Thomas Richter <[email protected]>
> Cc: Wang Nan <[email protected]>
> Link: https://lkml.kernel.org/n/[email protected]
> Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
> ---
> tools/perf/util/unwind-libunwind-local.c | 9 ---------
> 1 file changed, 9 deletions(-)
>
> diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
> index 7a42f703e858..02dc5a9d8f72 100644
> --- a/tools/perf/util/unwind-libunwind-local.c
> +++ b/tools/perf/util/unwind-libunwind-local.c
> @@ -631,9 +631,6 @@ static unw_accessors_t accessors = {
>
> static int _unwind__prepare_access(struct thread *thread)
> {
> - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> - return 0;
> -
this would create thread->addr_space also for data without
dwarf callchains data, so I think we need to keep it
it should get set in apply_config_terms which calls parse_callchain_record
once it detects some 'call-graph' term setup.. something's probably wrong
there?
jirka
> thread->addr_space = unw_create_addr_space(&accessors, 0);
> if (!thread->addr_space) {
> pr_err("unwind: Can't create unwind address space.\n");
> @@ -646,17 +643,11 @@ static int _unwind__prepare_access(struct thread *thread)
>
> static void _unwind__flush_access(struct thread *thread)
> {
> - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> - return;
> -
> unw_flush_cache(thread->addr_space, 0, 0);
> }
>
> static void _unwind__finish_access(struct thread *thread)
> {
> - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> - return;
> -
> unw_destroy_addr_space(thread->addr_space);
> }
>
> --
> 2.14.3
>
On 01/16/2018 03:24 PM, Arnaldo Carvalho de Melo wrote:
> From: Arnaldo Carvalho de Melo <[email protected]>
>
> Hi guys,
>
> Jiri asked me to post this series, so here it is, please take a
> look, I'd love to harvest your Reviewed-by/Tested-by/Acked-by,
>
> - Arnaldo
>
> Arnaldo Carvalho de Melo (5):
> perf callchain: Fix attr.sample_max_stack setting
> perf unwind: Do not look at globals
> perf trace: Setup DWARF callchains for non-syscall events when --max-stack is used
> perf trace: Allow overriding global --max-stack per event
> perf callchains: Ask for PERF_RECORD_MMAP for data mmaps for DWARF unwinding
>
> tools/perf/builtin-trace.c | 19 ++++++++++++++++---
> tools/perf/util/evsel.c | 24 +++++++++++++++++-------
> tools/perf/util/unwind-libunwind-local.c | 9 ---------
> 3 files changed, 33 insertions(+), 19 deletions(-)
>
I have tested your patches on my s390x. I applied
them on top of your perf/core branch.
Here is the output:
[root@s8360047 perf]# ./perf trace --no-syscalls
-e probe_libc:inet_pton ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.046 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.046/0.046/0.046/0.000 ms
0.000 probe_libc:inet_pton:(3ffa4ec2060))
[root@s8360047 perf]#
Test ok !
[root@s8360047 perf]# ./perf trace --no-syscalls
-e probe_libc:inet_pton/max-stack=5,call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.054 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.054/0.054/0.054/0.000 ms
0.000 probe_libc:inet_pton:(3ff7e742060))
__GI___inet_pton (/usr/lib64/libc-2.26.so)
gaih_inet (inlined)
__GI_getaddrinfo (inlined)
main (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[root@s8360047 perf]#
Test ok!
[root@s8360047 perf]# ./perf trace --no-syscalls
-e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.062 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.062/0.062/0.062/0.000 ms
0.000 probe_libc:inet_pton:(3ff86642060))
__GI___inet_pton (/usr/lib64/libc-2.26.so)
gaih_inet (inlined)
__GI_getaddrinfo (inlined)
main (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
_start (/usr/bin/ping)
[root@s8360047 perf]#
Test ok:
[root@s8360047 perf]# ./perf trace --no-syscalls --call-graph dwarf
--max-stack 4 -e probe_libc:inet_pton ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.031 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.031/0.031/0.031/0.000 ms
0.000 probe_libc:inet_pton:(3ffa1bc2060))
__GI___inet_pton (/usr/lib64/libc-2.26.so)
gaih_inet (inlined)
__GI_getaddrinfo (inlined)
main (/usr/bin/ping)
[root@s8360047 perf]#
Test ok, also when --max-stack 4 is omitted the complete
backchain is printed.
[root@s8360047 perf]# ./perf trace --no-syscalls --call-graph dwarf
-e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.047 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.047/0.047/0.047/0.000 ms
0.000 probe_libc:inet_pton:(3ffb4bc2060))
[root@s8360047 perf]#
Test fails, I would have expected some callchain output.
[root@s8360047 perf]# ./perf trace --no-syscalls --call-graph dwarf --max-stack 4
-e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.055 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.055/0.055/0.055/0.000 ms
0.000 probe_libc:inet_pton:(3ff7e6c2060))
[root@s8360047 perf]#
Test fails, I would have expected some callchain output.
It looks like /max-stack=3/ without ,call-graph=dwarf
does not trigger the stack unwinding.
--
Thomas Richter, Dept 3303, IBM LTC Boeblingen Germany
--
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschäftsführung: Dirk Wittkopp
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294
Em Tue, Jan 16, 2018 at 04:19:15PM +0100, Jiri Olsa escreveu:
> On Tue, Jan 16, 2018 at 11:24:35AM -0300, Arnaldo Carvalho de Melo wrote:
>
> SNIP
>
> > Cc: Adrian Hunter <[email protected]>
> > Cc: David Ahern <[email protected]>
> > Cc: Hendrick Brueckner <[email protected]>
> > Cc: Jiri Olsa <[email protected]>
> > Cc: Namhyung Kim <[email protected]>
> > Cc: Thomas Richter <[email protected]>
> > Cc: Wang Nan <[email protected]>
> > Link: https://lkml.kernel.org/n/[email protected]
> > Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
> > ---
> > tools/perf/util/unwind-libunwind-local.c | 9 ---------
> > 1 file changed, 9 deletions(-)
> >
> > diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
> > index 7a42f703e858..02dc5a9d8f72 100644
> > --- a/tools/perf/util/unwind-libunwind-local.c
> > +++ b/tools/perf/util/unwind-libunwind-local.c
> > @@ -631,9 +631,6 @@ static unw_accessors_t accessors = {
> >
> > static int _unwind__prepare_access(struct thread *thread)
> > {
> > - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> > - return 0;
> > -
>
> this would create thread->addr_space also for data without
> dwarf callchains data, so I think we need to keep it
>
> it should get set in apply_config_terms which calls parse_callchain_record
No, it should not set the global parameter, as this is just for a
specific event, i.e. we can have something like:
perf record -e cycles/call-graph=dwarf/ \
-e instructions/call-graph=lbr/
-e cache-misses/call-graph=fp
And then get these samples for the same thread.
If you want to avoid creating thread->addr_space, and we do want that,
sure, a followup patch should address that, we need to postpone
allocating it till we get a DWARF callchain in a sample for that
specific thread, when we then should allocate thread->addr_space.
But then we need to break that prepare routine, the part that needs the
map needs to set something in the 'struct thread' to mark what kind of
unwind ops should be used if we ever get a sample with a DWARF
callchain for that thread later on the perf.data (or live session), and
when that happens, look if the thread->addr_space is allocated and
allocate it if not.
> once it detects some 'call-graph' term setup.. something's probably wrong
> there?
See above. The fix in this patch is the quickest, i.e. we make sure that
if we ever find DWARF callchains, what we need will be there. We could
introduce a new global variable that would be touched by
apply_config_terms() now, and touch that, not the global config, that
may be used by other code, that would think that hey, DWARF is globally
configured, when it is just by some of the events.
But if we do that, we will waste some space anyway since not all threads
will have DWARF callchains, e.g. in a system wide session.
- Arnaldo
> jirka
>
> > thread->addr_space = unw_create_addr_space(&accessors, 0);
> > if (!thread->addr_space) {
> > pr_err("unwind: Can't create unwind address space.\n");
> > @@ -646,17 +643,11 @@ static int _unwind__prepare_access(struct thread *thread)
> >
> > static void _unwind__flush_access(struct thread *thread)
> > {
> > - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> > - return;
> > -
> > unw_flush_cache(thread->addr_space, 0, 0);
> > }
> >
> > static void _unwind__finish_access(struct thread *thread)
> > {
> > - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> > - return;
> > -
> > unw_destroy_addr_space(thread->addr_space);
> > }
> >
> > --
> > 2.14.3
> >
Em Tue, Jan 16, 2018 at 04:27:52PM +0100, Thomas-Mich Richter escreveu:
> --- ::1 ping statistics ---
> 1 packets transmitted, 1 received, 0% packet loss, time 0ms
> rtt min/avg/max/mdev = 0.047/0.047/0.047/0.000 ms
> 0.000 probe_libc:inet_pton:(3ffb4bc2060))
> [root@s8360047 perf]#
>
> Test fails, I would have expected some callchain output.
>
> [root@s8360047 perf]# ./perf trace --no-syscalls --call-graph dwarf --max-stack 4
> -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1
> PING ::1(::1) 56 data bytes
> 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.055 ms
>
> --- ::1 ping statistics ---
> 1 packets transmitted, 1 received, 0% packet loss, time 0ms
> rtt min/avg/max/mdev = 0.055/0.055/0.055/0.000 ms
> 0.000 probe_libc:inet_pton:(3ff7e6c2060))
> [root@s8360047 perf]#
>
> Test fails, I would have expected some callchain output.
>
> It looks like /max-stack=3/ without ,call-graph=dwarf
> does not trigger the stack unwinding.
Ok, will work on those, then redo all the tests you did, grr, tricky
stuff.
- Arnaldo
Em Tue, Jan 16, 2018 at 12:36:21PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Tue, Jan 16, 2018 at 04:19:15PM +0100, Jiri Olsa escreveu:
> > it should get set in apply_config_terms which calls parse_callchain_record
> No, it should not set the global parameter, as this is just for a
> specific event, i.e. we can have something like:
> perf record -e cycles/call-graph=dwarf/ \
> -e instructions/call-graph=lbr/
> -e cache-misses/call-graph=fp
> And then get these samples for the same thread.
<SNIP>
> > once it detects some 'call-graph' term setup.. something's probably wrong
> > there?
> See above. The fix in this patch is the quickest, i.e. we make sure that
> if we ever find DWARF callchains, what we need will be there. We could
> introduce a new global variable that would be touched by
> apply_config_terms() now, and touch that, not the global config, that
> may be used by other code, that would think that hey, DWARF is globally
> configured, when it is just by some of the events.
So, look at the patch at the end of this message, better (for now),
replacing this one (2/5).
I retested everything, same results.
> But if we do that, we will waste some space anyway since not all threads
> will have DWARF callchains, e.g. in a system wide session.
- Arnaldo
commit 8a302012fabdb387ab45a0d0b283411a8fd05b32
Author: Arnaldo Carvalho de Melo <[email protected]>
Date: Mon Jan 15 16:48:46 2018 -0300
perf unwind: Do not look just at the global callchain_param.record_mode
When setting up DWARF callchains on specific events, without using
'record' or 'trace' --call-graph, but instead doing it like:
perf trace -e cycles/call-graph=dwarf/
The unwind__prepare_access() call in thread__insert_map() when we
process PERF_RECORD_MMAP(2) metadata events were not being performed,
precluding us from using per-event DWARF callchains, handling them just
when we asked for all events to be DWARF, using "--call-graph dwarf".
We do it in the PERF_RECORD_MMAP because we have to look at one of the
executable maps to figure out the executable type (64-bit, 32-bit) of
the DSO laid out in that mmap. Also to look at the architecture where
the perf.data file was recorded.
All this probably should be deferred to when we process a sample for
some thread that has callchains, so that we do this processing only for
the threads with samples, not for all of them.
For now, fix using DWARF on specific events.
Before:
# perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.048 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.048/0.048/0.048/0.000 ms
0.000 probe_libc:inet_pton:(7fe9597bb350))
Problem processing probe_libc:inet_pton callchain, skipping...
#
After:
# perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.060 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.060/0.060/0.060/0.000 ms
0.000 probe_libc:inet_pton:(7fd4aa930350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffaa804e51af3f] (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[0xffffaa804e51b379] (/usr/bin/ping)
#
# perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.057 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.057/0.057/0.057/0.000 ms
0.000 probe_libc:inet_pton:(7f9363b9e350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffa9e8a14e0f3f] (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[0xffffa9e8a14e1379] (/usr/bin/ping)
#
# perf trace --call-graph=fp --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.077 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.077/0.077/0.077/0.000 ms
0.000 probe_libc:inet_pton:(7f4947e1c350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffaa716d88ef3f] (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[0xffffaa716d88f379] (/usr/bin/ping)
#
# perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=fp/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.078 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.078/0.078/0.078/0.000 ms
0.000 probe_libc:inet_pton:(7fa157696350))
__GI___inet_pton (/usr/lib64/libc-2.26.so)
getaddrinfo (/usr/lib64/libc-2.26.so)
[0xffffa9ba39c74f40] (/usr/bin/ping)
#
Cc: Adrian Hunter <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Hendrick Brueckner <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Thomas Richter <[email protected]>
Cc: Wang Nan <[email protected]>
Link: https://lkml.kernel.org/n/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c0debc3f79b6..c0815a37fdb5 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2390,9 +2390,10 @@ static int setup_callchain(struct perf_evlist *evlist)
enum perf_call_graph_mode mode = CALLCHAIN_NONE;
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER))
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
mode = CALLCHAIN_DWARF;
- else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
mode = CALLCHAIN_LBR;
else if (sample_type & PERF_SAMPLE_CALLCHAIN)
mode = CALLCHAIN_FP;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index dd4df9a5cd06..6593779224d5 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -338,9 +338,10 @@ static int report__setup_sample_type(struct report *rep)
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER))
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
callchain_param.record_mode = CALLCHAIN_DWARF;
- else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR;
else
callchain_param.record_mode = CALLCHAIN_FP;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c1cce474c0f1..08bc818f371b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2919,9 +2919,10 @@ static void script__setup_sample_type(struct perf_script *script)
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER))
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
callchain_param.record_mode = CALLCHAIN_DWARF;
- else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR;
else
callchain_param.record_mode = CALLCHAIN_FP;
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index ac40e05bcab4..260418969120 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -173,6 +173,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
}
callchain_param.record_mode = CALLCHAIN_DWARF;
+ dwarf_callchain_users = true;
if (init_live_machine(machine)) {
pr_err("Could not init machine\n");
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 082505d08d72..32ef7bdca1cf 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -37,6 +37,15 @@ struct callchain_param callchain_param = {
CALLCHAIN_PARAM_DEFAULT
};
+/*
+ * Are there any events usind DWARF callchains?
+ *
+ * I.e.
+ *
+ * -e cycles/call-graph=dwarf/
+ */
+bool dwarf_callchain_users;
+
struct callchain_param callchain_param_default = {
CALLCHAIN_PARAM_DEFAULT
};
@@ -265,6 +274,7 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
ret = 0;
param->record_mode = CALLCHAIN_DWARF;
param->dump_size = default_stack_dump_size;
+ dwarf_callchain_users = true;
tok = strtok_r(NULL, ",", &saveptr);
if (tok) {
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index b79ef2478a57..154560b1eb65 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -89,6 +89,8 @@ enum chain_value {
CCVAL_COUNT,
};
+extern bool dwarf_callchain_users;
+
struct callchain_param {
bool enabled;
enum perf_call_graph_mode record_mode;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 7a42f703e858..af873044d33a 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -631,9 +631,8 @@ static unw_accessors_t accessors = {
static int _unwind__prepare_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ if (!dwarf_callchain_users)
return 0;
-
thread->addr_space = unw_create_addr_space(&accessors, 0);
if (!thread->addr_space) {
pr_err("unwind: Can't create unwind address space.\n");
@@ -646,17 +645,15 @@ static int _unwind__prepare_access(struct thread *thread)
static void _unwind__flush_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ if (!dwarf_callchain_users)
return;
-
unw_flush_cache(thread->addr_space, 0, 0);
}
static void _unwind__finish_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ if (!dwarf_callchain_users)
return;
-
unw_destroy_addr_space(thread->addr_space);
}
On Tue, Jan 16, 2018 at 12:36:21PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Tue, Jan 16, 2018 at 04:19:15PM +0100, Jiri Olsa escreveu:
> > On Tue, Jan 16, 2018 at 11:24:35AM -0300, Arnaldo Carvalho de Melo wrote:
> >
> > SNIP
> >
> > > Cc: Adrian Hunter <[email protected]>
> > > Cc: David Ahern <[email protected]>
> > > Cc: Hendrick Brueckner <[email protected]>
> > > Cc: Jiri Olsa <[email protected]>
> > > Cc: Namhyung Kim <[email protected]>
> > > Cc: Thomas Richter <[email protected]>
> > > Cc: Wang Nan <[email protected]>
> > > Link: https://lkml.kernel.org/n/[email protected]
> > > Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
> > > ---
> > > tools/perf/util/unwind-libunwind-local.c | 9 ---------
> > > 1 file changed, 9 deletions(-)
> > >
> > > diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
> > > index 7a42f703e858..02dc5a9d8f72 100644
> > > --- a/tools/perf/util/unwind-libunwind-local.c
> > > +++ b/tools/perf/util/unwind-libunwind-local.c
> > > @@ -631,9 +631,6 @@ static unw_accessors_t accessors = {
> > >
> > > static int _unwind__prepare_access(struct thread *thread)
> > > {
> > > - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> > > - return 0;
> > > -
> >
> > this would create thread->addr_space also for data without
> > dwarf callchains data, so I think we need to keep it
> >
> > it should get set in apply_config_terms which calls parse_callchain_record
>
> No, it should not set the global parameter, as this is just for a
> specific event, i.e. we can have something like:
>
> perf record -e cycles/call-graph=dwarf/ \
> -e instructions/call-graph=lbr/
> -e cache-misses/call-graph=fp
>
> And then get these samples for the same thread.
>
> If you want to avoid creating thread->addr_space, and we do want that,
> sure, a followup patch should address that, we need to postpone
> allocating it till we get a DWARF callchain in a sample for that
> specific thread, when we then should allocate thread->addr_space.
yea that could be fixed.. however current code prevents
to create that for data without DWARF data, and your change
forces it
I guess that's addressed in your next email.. going to read it ;-)
jirka
Em Tue, Jan 16, 2018 at 08:30:35PM +0100, Jiri Olsa escreveu:
> On Tue, Jan 16, 2018 at 12:36:21PM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Tue, Jan 16, 2018 at 04:19:15PM +0100, Jiri Olsa escreveu:
> > > On Tue, Jan 16, 2018 at 11:24:35AM -0300, Arnaldo Carvalho de Melo wrote:
> > >
> > > SNIP
> > >
> > > > Cc: Adrian Hunter <[email protected]>
> > > > Cc: David Ahern <[email protected]>
> > > > Cc: Hendrick Brueckner <[email protected]>
> > > > Cc: Jiri Olsa <[email protected]>
> > > > Cc: Namhyung Kim <[email protected]>
> > > > Cc: Thomas Richter <[email protected]>
> > > > Cc: Wang Nan <[email protected]>
> > > > Link: https://lkml.kernel.org/n/[email protected]
> > > > Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
> > > > ---
> > > > tools/perf/util/unwind-libunwind-local.c | 9 ---------
> > > > 1 file changed, 9 deletions(-)
> > > >
> > > > diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
> > > > index 7a42f703e858..02dc5a9d8f72 100644
> > > > --- a/tools/perf/util/unwind-libunwind-local.c
> > > > +++ b/tools/perf/util/unwind-libunwind-local.c
> > > > @@ -631,9 +631,6 @@ static unw_accessors_t accessors = {
> > > >
> > > > static int _unwind__prepare_access(struct thread *thread)
> > > > {
> > > > - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> > > > - return 0;
> > > > -
> > >
> > > this would create thread->addr_space also for data without
> > > dwarf callchains data, so I think we need to keep it
> > >
> > > it should get set in apply_config_terms which calls parse_callchain_record
> >
> > No, it should not set the global parameter, as this is just for a
> > specific event, i.e. we can have something like:
> >
> > perf record -e cycles/call-graph=dwarf/ \
> > -e instructions/call-graph=lbr/
> > -e cache-misses/call-graph=fp
> >
> > And then get these samples for the same thread.
> >
> > If you want to avoid creating thread->addr_space, and we do want that,
> > sure, a followup patch should address that, we need to postpone
> > allocating it till we get a DWARF callchain in a sample for that
> > specific thread, when we then should allocate thread->addr_space.
>
> yea that could be fixed.. however current code prevents
> to create that for data without DWARF data, and your change
> forces it
Humm? Which code?
[root@jouet ~]# perf record -e cycles/call-graph=dwarf/ -e instructions/call-graph=lbr/ -e cache-misses/call-graph=fp/ sleep 2
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.140 MB perf.data (43 samples) ]
[root@jouet ~]# perf evlist -v
cycles/call-graph=dwarf/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER|DATA_SRC, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, mmap_data: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, mmap2: 1, comm_exec: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192
instructions/call-graph=lbr/: size: 112, config: 0x1, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES
cache-misses/call-graph=fp/: size: 112, config: 0x3, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1
[root@jouet ~]# perf report --no-header | head -20
no symbols found in /usr/bin/sleep, maybe install a debug package?
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 15 of event 'cycles/call-graph=dwarf/'
# Event count (approx.): 2153016
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................ .......................................
#
48.98% 0.00% sleep ld-2.26.so [.] _start
|
---_start
|
|--35.15%--_dl_start
| _dl_sysdep_start
| dl_main
| |
| |--18.44%--_dl_relocate_object
[root@jouet ~]#
- Arnaldo
On Tue, Jan 16, 2018 at 03:26:50PM -0300, Arnaldo Carvalho de Melo wrote:
SNIP
> diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
> index c0debc3f79b6..c0815a37fdb5 100644
> --- a/tools/perf/builtin-c2c.c
> +++ b/tools/perf/builtin-c2c.c
> @@ -2390,9 +2390,10 @@ static int setup_callchain(struct perf_evlist *evlist)
> enum perf_call_graph_mode mode = CALLCHAIN_NONE;
>
> if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> - (sample_type & PERF_SAMPLE_STACK_USER))
> + (sample_type & PERF_SAMPLE_STACK_USER)) {
> mode = CALLCHAIN_DWARF;
> - else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> + dwarf_callchain_users = true;
> + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> mode = CALLCHAIN_LBR;
> else if (sample_type & PERF_SAMPLE_CALLCHAIN)
> mode = CALLCHAIN_FP;
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index dd4df9a5cd06..6593779224d5 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -338,9 +338,10 @@ static int report__setup_sample_type(struct report *rep)
>
> if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
> if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> - (sample_type & PERF_SAMPLE_STACK_USER))
> + (sample_type & PERF_SAMPLE_STACK_USER)) {
> callchain_param.record_mode = CALLCHAIN_DWARF;
> - else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> + dwarf_callchain_users = true;
> + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> callchain_param.record_mode = CALLCHAIN_LBR;
> else
> callchain_param.record_mode = CALLCHAIN_FP;
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index c1cce474c0f1..08bc818f371b 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -2919,9 +2919,10 @@ static void script__setup_sample_type(struct perf_script *script)
>
> if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
> if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> - (sample_type & PERF_SAMPLE_STACK_USER))
> + (sample_type & PERF_SAMPLE_STACK_USER)) {
> callchain_param.record_mode = CALLCHAIN_DWARF;
> - else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> + dwarf_callchain_users = true;
> + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> callchain_param.record_mode = CALLCHAIN_LBR;
> else
> callchain_param.record_mode = CALLCHAIN_FP;
> diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
> index ac40e05bcab4..260418969120 100644
> --- a/tools/perf/tests/dwarf-unwind.c
> +++ b/tools/perf/tests/dwarf-unwind.c
> @@ -173,6 +173,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
> }
>
> callchain_param.record_mode = CALLCHAIN_DWARF;
> + dwarf_callchain_users = true;
>
> if (init_live_machine(machine)) {
> pr_err("Could not init machine\n");
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 082505d08d72..32ef7bdca1cf 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -37,6 +37,15 @@ struct callchain_param callchain_param = {
> CALLCHAIN_PARAM_DEFAULT
> };
>
> +/*
> + * Are there any events usind DWARF callchains?
> + *
> + * I.e.
> + *
> + * -e cycles/call-graph=dwarf/
> + */
> +bool dwarf_callchain_users;
hum, I don't follow.. this bool seems to mirror the usage of
'param->record_mode = CALLCHAIN_DWARF', whats the difference?
also, the patch title says 'Do not look at globals', while inside you
add new global dwarf_callchain_users and work with it.. what do I miss?
I'll check tomorrow with clean head ;-)
jirka
On Tue, Jan 16, 2018 at 04:45:20PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Tue, Jan 16, 2018 at 08:30:35PM +0100, Jiri Olsa escreveu:
> > On Tue, Jan 16, 2018 at 12:36:21PM -0300, Arnaldo Carvalho de Melo wrote:
> > > Em Tue, Jan 16, 2018 at 04:19:15PM +0100, Jiri Olsa escreveu:
> > > > On Tue, Jan 16, 2018 at 11:24:35AM -0300, Arnaldo Carvalho de Melo wrote:
> > > >
> > > > SNIP
> > > >
> > > > > Cc: Adrian Hunter <[email protected]>
> > > > > Cc: David Ahern <[email protected]>
> > > > > Cc: Hendrick Brueckner <[email protected]>
> > > > > Cc: Jiri Olsa <[email protected]>
> > > > > Cc: Namhyung Kim <[email protected]>
> > > > > Cc: Thomas Richter <[email protected]>
> > > > > Cc: Wang Nan <[email protected]>
> > > > > Link: https://lkml.kernel.org/n/[email protected]
> > > > > Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
> > > > > ---
> > > > > tools/perf/util/unwind-libunwind-local.c | 9 ---------
> > > > > 1 file changed, 9 deletions(-)
> > > > >
> > > > > diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
> > > > > index 7a42f703e858..02dc5a9d8f72 100644
> > > > > --- a/tools/perf/util/unwind-libunwind-local.c
> > > > > +++ b/tools/perf/util/unwind-libunwind-local.c
> > > > > @@ -631,9 +631,6 @@ static unw_accessors_t accessors = {
> > > > >
> > > > > static int _unwind__prepare_access(struct thread *thread)
> > > > > {
> > > > > - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> > > > > - return 0;
> > > > > -
> > > >
> > > > this would create thread->addr_space also for data without
> > > > dwarf callchains data, so I think we need to keep it
> > > >
> > > > it should get set in apply_config_terms which calls parse_callchain_record
> > >
> > > No, it should not set the global parameter, as this is just for a
> > > specific event, i.e. we can have something like:
> > >
> > > perf record -e cycles/call-graph=dwarf/ \
> > > -e instructions/call-graph=lbr/
> > > -e cache-misses/call-graph=fp
> > >
> > > And then get these samples for the same thread.
> > >
> > > If you want to avoid creating thread->addr_space, and we do want that,
> > > sure, a followup patch should address that, we need to postpone
> > > allocating it till we get a DWARF callchain in a sample for that
> > > specific thread, when we then should allocate thread->addr_space.
> >
> > yea that could be fixed.. however current code prevents
> > to create that for data without DWARF data, and your change
> > forces it
>
> Humm? Which code?
these bits
jirka
---
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 7a42f703e858..02dc5a9d8f72 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -631,9 +631,6 @@ static unw_accessors_t accessors = {
static int _unwind__prepare_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return 0;
-
thread->addr_space = unw_create_addr_space(&accessors, 0);
if (!thread->addr_space) {
pr_err("unwind: Can't create unwind address space.\n");
@@ -646,17 +643,11 @@ static int _unwind__prepare_access(struct thread *thread)
static void _unwind__flush_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return;
-
unw_flush_cache(thread->addr_space, 0, 0);
}
static void _unwind__finish_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return;
-
unw_destroy_addr_space(thread->addr_space);
}
Em Tue, Jan 16, 2018 at 08:49:09PM +0100, Jiri Olsa escreveu:
> On Tue, Jan 16, 2018 at 03:26:50PM -0300, Arnaldo Carvalho de Melo wrote:
>
> SNIP
>
> > diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
> > index c0debc3f79b6..c0815a37fdb5 100644
> > --- a/tools/perf/builtin-c2c.c
> > +++ b/tools/perf/builtin-c2c.c
> > @@ -2390,9 +2390,10 @@ static int setup_callchain(struct perf_evlist *evlist)
> > enum perf_call_graph_mode mode = CALLCHAIN_NONE;
> >
> > if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> > - (sample_type & PERF_SAMPLE_STACK_USER))
> > + (sample_type & PERF_SAMPLE_STACK_USER)) {
> > mode = CALLCHAIN_DWARF;
> > - else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > + dwarf_callchain_users = true;
> > + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > mode = CALLCHAIN_LBR;
> > else if (sample_type & PERF_SAMPLE_CALLCHAIN)
> > mode = CALLCHAIN_FP;
> > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> > index dd4df9a5cd06..6593779224d5 100644
> > --- a/tools/perf/builtin-report.c
> > +++ b/tools/perf/builtin-report.c
> > @@ -338,9 +338,10 @@ static int report__setup_sample_type(struct report *rep)
> >
> > if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
> > if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> > - (sample_type & PERF_SAMPLE_STACK_USER))
> > + (sample_type & PERF_SAMPLE_STACK_USER)) {
> > callchain_param.record_mode = CALLCHAIN_DWARF;
> > - else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > + dwarf_callchain_users = true;
> > + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > callchain_param.record_mode = CALLCHAIN_LBR;
> > else
> > callchain_param.record_mode = CALLCHAIN_FP;
> > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> > index c1cce474c0f1..08bc818f371b 100644
> > --- a/tools/perf/builtin-script.c
> > +++ b/tools/perf/builtin-script.c
> > @@ -2919,9 +2919,10 @@ static void script__setup_sample_type(struct perf_script *script)
> >
> > if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
> > if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> > - (sample_type & PERF_SAMPLE_STACK_USER))
> > + (sample_type & PERF_SAMPLE_STACK_USER)) {
> > callchain_param.record_mode = CALLCHAIN_DWARF;
> > - else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > + dwarf_callchain_users = true;
> > + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > callchain_param.record_mode = CALLCHAIN_LBR;
> > else
> > callchain_param.record_mode = CALLCHAIN_FP;
> > diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
> > index ac40e05bcab4..260418969120 100644
> > --- a/tools/perf/tests/dwarf-unwind.c
> > +++ b/tools/perf/tests/dwarf-unwind.c
> > @@ -173,6 +173,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
> > }
> >
> > callchain_param.record_mode = CALLCHAIN_DWARF;
> > + dwarf_callchain_users = true;
> >
> > if (init_live_machine(machine)) {
> > pr_err("Could not init machine\n");
> > diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> > index 082505d08d72..32ef7bdca1cf 100644
> > --- a/tools/perf/util/callchain.c
> > +++ b/tools/perf/util/callchain.c
> > @@ -37,6 +37,15 @@ struct callchain_param callchain_param = {
> > CALLCHAIN_PARAM_DEFAULT
> > };
> >
> > +/*
> > + * Are there any events usind DWARF callchains?
> > + *
> > + * I.e.
> > + *
> > + * -e cycles/call-graph=dwarf/
> > + */
> > +bool dwarf_callchain_users;
>
> hum, I don't follow.. this bool seems to mirror the usage of
> 'param->record_mode = CALLCHAIN_DWARF', whats the difference?
>
> also, the patch title says 'Do not look at globals', while inside you
The first version didn't look at globals, the second one doesn't look at
an _specific_ global variable, the global config for --call-graph, which
is a global variable, callchain_param, which _we_ can't touch at
apply_config_terms(), since that is about _just_ that event, not all of
them.
> add new global dwarf_callchain_users and work with it.. what do I miss?
>
> I'll check tomorrow with clean head ;-)
Look closely at apply_config_terms() it passes a _local_ variable to
perf_evsel__config_callchain(evsel, opts, ¶m);
It will not affect any globals that tools/perf/util/unwind-libunwind-local.c
could possibly use... and that is the problem. :-)
The right fix, as I said, is more involved and may allow us to remove
these two global variables, both callchain_param and
dwarf_callchain_users.
We need to have per-evsel unwind ops, per thread addr_space continues to
be used by the dwarf unwinder _for the events sampled in that thread_,
etc.
The prepare_unwind is to be made to evsel and thread (for thread we need
to look at one of its executable maps, to determine if it is 32-bit or
64-bit, etc, but not necessarily at that insert_map part, etc).
- Arnaldo
Em Tue, Jan 16, 2018 at 08:55:55PM +0100, Jiri Olsa escreveu:
> On Tue, Jan 16, 2018 at 04:45:20PM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Tue, Jan 16, 2018 at 08:30:35PM +0100, Jiri Olsa escreveu:
> > > On Tue, Jan 16, 2018 at 12:36:21PM -0300, Arnaldo Carvalho de Melo wrote:
> > > > Em Tue, Jan 16, 2018 at 04:19:15PM +0100, Jiri Olsa escreveu:
> > > > > On Tue, Jan 16, 2018 at 11:24:35AM -0300, Arnaldo Carvalho de Melo wrote:
> > > > >
> > > > > SNIP
> > > > >
> > > > > > Cc: Adrian Hunter <[email protected]>
> > > > > > Cc: David Ahern <[email protected]>
> > > > > > Cc: Hendrick Brueckner <[email protected]>
> > > > > > Cc: Jiri Olsa <[email protected]>
> > > > > > Cc: Namhyung Kim <[email protected]>
> > > > > > Cc: Thomas Richter <[email protected]>
> > > > > > Cc: Wang Nan <[email protected]>
> > > > > > Link: https://lkml.kernel.org/n/[email protected]
> > > > > > Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
> > > > > > ---
> > > > > > tools/perf/util/unwind-libunwind-local.c | 9 ---------
> > > > > > 1 file changed, 9 deletions(-)
> > > > > >
> > > > > > diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
> > > > > > index 7a42f703e858..02dc5a9d8f72 100644
> > > > > > --- a/tools/perf/util/unwind-libunwind-local.c
> > > > > > +++ b/tools/perf/util/unwind-libunwind-local.c
> > > > > > @@ -631,9 +631,6 @@ static unw_accessors_t accessors = {
> > > > > >
> > > > > > static int _unwind__prepare_access(struct thread *thread)
> > > > > > {
> > > > > > - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> > > > > > - return 0;
> > > > > > -
> > > > >
> > > > > this would create thread->addr_space also for data without
> > > > > dwarf callchains data, so I think we need to keep it
> > > > >
> > > > > it should get set in apply_config_terms which calls parse_callchain_record
> > > >
> > > > No, it should not set the global parameter, as this is just for a
> > > > specific event, i.e. we can have something like:
> > > >
> > > > perf record -e cycles/call-graph=dwarf/ \
> > > > -e instructions/call-graph=lbr/
> > > > -e cache-misses/call-graph=fp
> > > >
> > > > And then get these samples for the same thread.
> > > >
> > > > If you want to avoid creating thread->addr_space, and we do want that,
> > > > sure, a followup patch should address that, we need to postpone
> > > > allocating it till we get a DWARF callchain in a sample for that
> > > > specific thread, when we then should allocate thread->addr_space.
> > >
> > > yea that could be fixed.. however current code prevents
> > > to create that for data without DWARF data, and your change
> > > forces it
> >
> > Humm? Which code?
>
> these bits
Those are replaced already, based on your comments.
> jirka
>
>
> ---
> diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
> index 7a42f703e858..02dc5a9d8f72 100644
> --- a/tools/perf/util/unwind-libunwind-local.c
> +++ b/tools/perf/util/unwind-libunwind-local.c
> @@ -631,9 +631,6 @@ static unw_accessors_t accessors = {
>
> static int _unwind__prepare_access(struct thread *thread)
> {
> - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> - return 0;
> -
> thread->addr_space = unw_create_addr_space(&accessors, 0);
> if (!thread->addr_space) {
> pr_err("unwind: Can't create unwind address space.\n");
> @@ -646,17 +643,11 @@ static int _unwind__prepare_access(struct thread *thread)
>
> static void _unwind__flush_access(struct thread *thread)
> {
> - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> - return;
> -
> unw_flush_cache(thread->addr_space, 0, 0);
> }
>
> static void _unwind__finish_access(struct thread *thread)
> {
> - if (callchain_param.record_mode != CALLCHAIN_DWARF)
> - return;
> -
> unw_destroy_addr_space(thread->addr_space);
> }
Hi Arnaldo,
On Tue, Jan 16, 2018 at 05:05:20PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Tue, Jan 16, 2018 at 08:49:09PM +0100, Jiri Olsa escreveu:
> > On Tue, Jan 16, 2018 at 03:26:50PM -0300, Arnaldo Carvalho de Melo wrote:
> >
> > SNIP
> >
> > > diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
> > > index c0debc3f79b6..c0815a37fdb5 100644
> > > --- a/tools/perf/builtin-c2c.c
> > > +++ b/tools/perf/builtin-c2c.c
> > > @@ -2390,9 +2390,10 @@ static int setup_callchain(struct perf_evlist *evlist)
> > > enum perf_call_graph_mode mode = CALLCHAIN_NONE;
> > >
> > > if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> > > - (sample_type & PERF_SAMPLE_STACK_USER))
> > > + (sample_type & PERF_SAMPLE_STACK_USER)) {
> > > mode = CALLCHAIN_DWARF;
> > > - else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > > + dwarf_callchain_users = true;
> > > + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > > mode = CALLCHAIN_LBR;
> > > else if (sample_type & PERF_SAMPLE_CALLCHAIN)
> > > mode = CALLCHAIN_FP;
> > > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> > > index dd4df9a5cd06..6593779224d5 100644
> > > --- a/tools/perf/builtin-report.c
> > > +++ b/tools/perf/builtin-report.c
> > > @@ -338,9 +338,10 @@ static int report__setup_sample_type(struct report *rep)
> > >
> > > if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
> > > if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> > > - (sample_type & PERF_SAMPLE_STACK_USER))
> > > + (sample_type & PERF_SAMPLE_STACK_USER)) {
> > > callchain_param.record_mode = CALLCHAIN_DWARF;
> > > - else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > > + dwarf_callchain_users = true;
> > > + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > > callchain_param.record_mode = CALLCHAIN_LBR;
> > > else
> > > callchain_param.record_mode = CALLCHAIN_FP;
> > > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> > > index c1cce474c0f1..08bc818f371b 100644
> > > --- a/tools/perf/builtin-script.c
> > > +++ b/tools/perf/builtin-script.c
> > > @@ -2919,9 +2919,10 @@ static void script__setup_sample_type(struct perf_script *script)
> > >
> > > if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
> > > if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> > > - (sample_type & PERF_SAMPLE_STACK_USER))
> > > + (sample_type & PERF_SAMPLE_STACK_USER)) {
> > > callchain_param.record_mode = CALLCHAIN_DWARF;
> > > - else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > > + dwarf_callchain_users = true;
> > > + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> > > callchain_param.record_mode = CALLCHAIN_LBR;
> > > else
> > > callchain_param.record_mode = CALLCHAIN_FP;
> > > diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
> > > index ac40e05bcab4..260418969120 100644
> > > --- a/tools/perf/tests/dwarf-unwind.c
> > > +++ b/tools/perf/tests/dwarf-unwind.c
> > > @@ -173,6 +173,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
> > > }
> > >
> > > callchain_param.record_mode = CALLCHAIN_DWARF;
> > > + dwarf_callchain_users = true;
> > >
> > > if (init_live_machine(machine)) {
> > > pr_err("Could not init machine\n");
> > > diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> > > index 082505d08d72..32ef7bdca1cf 100644
> > > --- a/tools/perf/util/callchain.c
> > > +++ b/tools/perf/util/callchain.c
> > > @@ -37,6 +37,15 @@ struct callchain_param callchain_param = {
> > > CALLCHAIN_PARAM_DEFAULT
> > > };
> > >
> > > +/*
> > > + * Are there any events usind DWARF callchains?
> > > + *
> > > + * I.e.
> > > + *
> > > + * -e cycles/call-graph=dwarf/
> > > + */
> > > +bool dwarf_callchain_users;
> >
> > hum, I don't follow.. this bool seems to mirror the usage of
> > 'param->record_mode = CALLCHAIN_DWARF', whats the difference?
> >
> > also, the patch title says 'Do not look at globals', while inside you
>
> The first version didn't look at globals, the second one doesn't look at
> an _specific_ global variable, the global config for --call-graph, which
> is a global variable, callchain_param, which _we_ can't touch at
> apply_config_terms(), since that is about _just_ that event, not all of
> them.
Right, we need to call the prepare routine when any of event requires
DWARF unwind even though the global callchain_param is FP, for example.
>
> > add new global dwarf_callchain_users and work with it.. what do I miss?
> >
> > I'll check tomorrow with clean head ;-)
>
> Look closely at apply_config_terms() it passes a _local_ variable to
>
> perf_evsel__config_callchain(evsel, opts, ¶m);
>
> It will not affect any globals that tools/perf/util/unwind-libunwind-local.c
> could possibly use... and that is the problem. :-)
>
> The right fix, as I said, is more involved and may allow us to remove
> these two global variables, both callchain_param and
> dwarf_callchain_users.
>
> We need to have per-evsel unwind ops, per thread addr_space continues to
> be used by the dwarf unwinder _for the events sampled in that thread_,
> etc.
>
> The prepare_unwind is to be made to evsel and thread (for thread we need
> to look at one of its executable maps, to determine if it is 32-bit or
> 64-bit, etc, but not necessarily at that insert_map part, etc).
Yep, but I'm ok with the proposed solution right now.
Thanks,
Namhyung
On Tue, Jan 16, 2018 at 05:05:20PM -0300, Arnaldo Carvalho de Melo wrote:
SNIP
> > >
> > > +/*
> > > + * Are there any events usind DWARF callchains?
> > > + *
> > > + * I.e.
> > > + *
> > > + * -e cycles/call-graph=dwarf/
> > > + */
> > > +bool dwarf_callchain_users;
> >
> > hum, I don't follow.. this bool seems to mirror the usage of
> > 'param->record_mode = CALLCHAIN_DWARF', whats the difference?
> >
> > also, the patch title says 'Do not look at globals', while inside you
>
> The first version didn't look at globals, the second one doesn't look at
> an _specific_ global variable, the global config for --call-graph, which
> is a global variable, callchain_param, which _we_ can't touch at
> apply_config_terms(), since that is about _just_ that event, not all of
> them.
>
> > add new global dwarf_callchain_users and work with it.. what do I miss?
> >
> > I'll check tomorrow with clean head ;-)
>
> Look closely at apply_config_terms() it passes a _local_ variable to
>
> perf_evsel__config_callchain(evsel, opts, ¶m);
>
> It will not affect any globals that tools/perf/util/unwind-libunwind-local.c
> could possibly use... and that is the problem. :-)
ah ok, that one is local.. just to set up the attrs
then adding that new variable is ok, though it could be
aded inside struct callchain_param, to keep callchain
config stuff together
thanks,
jirka
Commit-ID: eabad8c6856f185f876b54c426c2cc69fe0f0a7d
Gitweb: https://git.kernel.org/tip/eabad8c6856f185f876b54c426c2cc69fe0f0a7d
Author: Arnaldo Carvalho de Melo <[email protected]>
AuthorDate: Mon, 15 Jan 2018 16:48:46 -0300
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 17 Jan 2018 10:23:32 -0300
perf unwind: Do not look just at the global callchain_param.record_mode
When setting up DWARF callchains on specific events, without using
'record' or 'trace' --call-graph, but instead doing it like:
perf trace -e cycles/call-graph=dwarf/
The unwind__prepare_access() call in thread__insert_map() when we
process PERF_RECORD_MMAP(2) metadata events were not being performed,
precluding us from using per-event DWARF callchains, handling them just
when we asked for all events to be DWARF, using "--call-graph dwarf".
We do it in the PERF_RECORD_MMAP because we have to look at one of the
executable maps to figure out the executable type (64-bit, 32-bit) of
the DSO laid out in that mmap. Also to look at the architecture where
the perf.data file was recorded.
All this probably should be deferred to when we process a sample for
some thread that has callchains, so that we do this processing only for
the threads with samples, not for all of them.
For now, fix using DWARF on specific events.
Before:
# perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.048 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.048/0.048/0.048/0.000 ms
0.000 probe_libc:inet_pton:(7fe9597bb350))
Problem processing probe_libc:inet_pton callchain, skipping...
#
After:
# perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.060 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.060/0.060/0.060/0.000 ms
0.000 probe_libc:inet_pton:(7fd4aa930350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffaa804e51af3f] (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[0xffffaa804e51b379] (/usr/bin/ping)
#
# perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.057 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.057/0.057/0.057/0.000 ms
0.000 probe_libc:inet_pton:(7f9363b9e350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffa9e8a14e0f3f] (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[0xffffa9e8a14e1379] (/usr/bin/ping)
#
# perf trace --call-graph=fp --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.077 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.077/0.077/0.077/0.000 ms
0.000 probe_libc:inet_pton:(7f4947e1c350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffaa716d88ef3f] (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
[0xffffaa716d88f379] (/usr/bin/ping)
#
# perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=fp/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.078 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.078/0.078/0.078/0.000 ms
0.000 probe_libc:inet_pton:(7fa157696350))
__GI___inet_pton (/usr/lib64/libc-2.26.so)
getaddrinfo (/usr/lib64/libc-2.26.so)
[0xffffa9ba39c74f40] (/usr/bin/ping)
#
Acked-by: Namhyung Kim <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Hendrick Brueckner <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Thomas Richter <[email protected]>
Cc: Wang Nan <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-c2c.c | 5 +++--
tools/perf/builtin-report.c | 5 +++--
tools/perf/builtin-script.c | 5 +++--
tools/perf/tests/dwarf-unwind.c | 1 +
tools/perf/util/callchain.c | 10 ++++++++++
tools/perf/util/callchain.h | 2 ++
tools/perf/util/unwind-libunwind-local.c | 9 +++------
7 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c0debc3..c0815a3 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2390,9 +2390,10 @@ static int setup_callchain(struct perf_evlist *evlist)
enum perf_call_graph_mode mode = CALLCHAIN_NONE;
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER))
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
mode = CALLCHAIN_DWARF;
- else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
mode = CALLCHAIN_LBR;
else if (sample_type & PERF_SAMPLE_CALLCHAIN)
mode = CALLCHAIN_FP;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index dd4df9a..6593779 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -338,9 +338,10 @@ static int report__setup_sample_type(struct report *rep)
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER))
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
callchain_param.record_mode = CALLCHAIN_DWARF;
- else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR;
else
callchain_param.record_mode = CALLCHAIN_FP;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c1cce47..08bc818 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2919,9 +2919,10 @@ static void script__setup_sample_type(struct perf_script *script)
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER))
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
callchain_param.record_mode = CALLCHAIN_DWARF;
- else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR;
else
callchain_param.record_mode = CALLCHAIN_FP;
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index ac40e05..26041896 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -173,6 +173,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
}
callchain_param.record_mode = CALLCHAIN_DWARF;
+ dwarf_callchain_users = true;
if (init_live_machine(machine)) {
pr_err("Could not init machine\n");
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 082505d0..32ef7bd 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -37,6 +37,15 @@ struct callchain_param callchain_param = {
CALLCHAIN_PARAM_DEFAULT
};
+/*
+ * Are there any events usind DWARF callchains?
+ *
+ * I.e.
+ *
+ * -e cycles/call-graph=dwarf/
+ */
+bool dwarf_callchain_users;
+
struct callchain_param callchain_param_default = {
CALLCHAIN_PARAM_DEFAULT
};
@@ -265,6 +274,7 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
ret = 0;
param->record_mode = CALLCHAIN_DWARF;
param->dump_size = default_stack_dump_size;
+ dwarf_callchain_users = true;
tok = strtok_r(NULL, ",", &saveptr);
if (tok) {
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index b79ef24..154560b 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -89,6 +89,8 @@ enum chain_value {
CCVAL_COUNT,
};
+extern bool dwarf_callchain_users;
+
struct callchain_param {
bool enabled;
enum perf_call_graph_mode record_mode;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 7a42f70..af87304 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -631,9 +631,8 @@ static unw_accessors_t accessors = {
static int _unwind__prepare_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ if (!dwarf_callchain_users)
return 0;
-
thread->addr_space = unw_create_addr_space(&accessors, 0);
if (!thread->addr_space) {
pr_err("unwind: Can't create unwind address space.\n");
@@ -646,17 +645,15 @@ static int _unwind__prepare_access(struct thread *thread)
static void _unwind__flush_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ if (!dwarf_callchain_users)
return;
-
unw_flush_cache(thread->addr_space, 0, 0);
}
static void _unwind__finish_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ if (!dwarf_callchain_users)
return;
-
unw_destroy_addr_space(thread->addr_space);
}