2021-03-04 18:27:33

by Alexandre Truong

[permalink] [raw]
Subject: [PATCH RESEND WITH CCs v3 1/4] perf tools: record aarch64 registers automatically

On arm64, automatically record all the registers if the frame pointer
mode is on. They will be used to do a dwarf unwind to find the caller
of the leaf frame if the frame pointer was omitted.

Signed-off-by: Alexandre Truong <[email protected]>
Cc: John Garry <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Mathieu Poirier <[email protected]>
Cc: Leo Yan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Kemeng Shi <[email protected]>
Cc: Ian Rogers <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Jin Yao <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Suzuki K Poulose <[email protected]>
Cc: Al Grant <[email protected]>
Cc: James Clark <[email protected]>
Cc: Wilco Dijkstra <[email protected]>
---
tools/perf/arch/arm64/util/machine.c | 7 +++++++
tools/perf/builtin-record.c | 7 +++++++
tools/perf/util/callchain.h | 2 ++
3 files changed, 16 insertions(+)

diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index 40c5e0b5bda8..bf2f9c447867 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -5,6 +5,8 @@
#include <string.h>
#include "debug.h"
#include "symbol.h"
+#include "callchain.h"
+#include "record.h"

/* On arm64, kernel text segment start at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
@@ -26,3 +28,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
+
+void arch__add_leaf_frame_record_opts(struct record_opts *opts)
+{
+ opts->sample_user_regs = arch__user_reg_mask();
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8a0127d4fb52..496307ef490e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2244,6 +2244,10 @@ static int record__parse_mmap_pages(const struct option *opt,
return ret;
}

+void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
+{
+}
+
static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
@@ -2813,6 +2817,9 @@ int cmd_record(int argc, const char **argv)
/* Enable ignoring missing threads when -u/-p option is defined. */
rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;

+ if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
+ arch__add_leaf_frame_record_opts(&rec->opts);
+
err = -ENOMEM;
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 5824134f983b..77fba053c677 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
}
#endif

+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
char *callchain_node__scnprintf_value(struct callchain_node *node,
--
2.23.0


2021-03-04 18:28:13

by Alexandre Truong

[permalink] [raw]
Subject: [PATCH RESEND WITH CCs v3 4/4] perf tools: determine if LR is the return address

On arm64 and frame pointer mode (e.g: perf record --callgraph fp),
use dwarf unwind info to check if the link register is the return
address in order to inject it to the frame pointer stack.

Write the following application:

int a = 10;

void f2(void)
{
for (int i = 0; i < 1000000; i++)
a *= a;
}

void f1()
{
for (int i = 0; i < 10; i++)
f2();
}

int main (void)
{
f1();
return 0;
}

with the following compilation flags:
gcc -fno-omit-frame-pointer -fno-inline -O2

The compiler omits the frame pointer for f2 on arm. This is a problem
with any leaf call, for example an application with many different
calls to malloc() would always omit the calling frame, even if it
can be determined.

./perf record --call-graph fp ./a.out
./perf report

currently gives the following stack:

0xffffea52f361
_start
__libc_start_main
main
f2

After this change, perf report correctly shows f1() calling f2(),
even though it was missing from the frame pointer unwind:

./perf report

0xffffea52f361
_start
__libc_start_main
main
f1
f2

Signed-off-by: Alexandre Truong <[email protected]>
Cc: John Garry <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Mathieu Poirier <[email protected]>
Cc: Leo Yan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Kemeng Shi <[email protected]>
Cc: Ian Rogers <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Jin Yao <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Suzuki K Poulose <[email protected]>
Cc: Al Grant <[email protected]>
Cc: James Clark <[email protected]>
Cc: Wilco Dijkstra <[email protected]>
---
tools/perf/util/Build | 1 +
.../util/arm-frame-pointer-unwind-support.c | 44 +++++++++++++++++++
.../util/arm-frame-pointer-unwind-support.h | 7 +++
tools/perf/util/machine.c | 9 ++--
4 files changed, 58 insertions(+), 3 deletions(-)
create mode 100644 tools/perf/util/arm-frame-pointer-unwind-support.c
create mode 100644 tools/perf/util/arm-frame-pointer-unwind-support.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 188521f34347..3b82cb992bce 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,3 +1,4 @@
+perf-y += arm-frame-pointer-unwind-support.o
perf-y += annotate.o
perf-y += block-info.o
perf-y += block-range.o
diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.c b/tools/perf/util/arm-frame-pointer-unwind-support.c
new file mode 100644
index 000000000000..964efd08e72e
--- /dev/null
+++ b/tools/perf/util/arm-frame-pointer-unwind-support.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../arch/arm64/include/uapi/asm/perf_regs.h"
+#include "arch/arm64/include/perf_regs.h"
+#include "event.h"
+#include "arm-frame-pointer-unwind-support.h"
+#include "callchain.h"
+#include "unwind.h"
+
+struct entries {
+ u64 stack[2];
+ size_t length;
+};
+
+static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
+{
+ return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
+ && sample->user_regs.mask == PERF_REGS_MASK;
+}
+
+static int add_entry(struct unwind_entry *entry, void *arg)
+{
+ struct entries *entries = arg;
+
+ entries->stack[entries->length++] = entry->ip;
+ return 0;
+}
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread)
+{
+ int ret;
+
+ struct entries entries = {{0, 0}, 0};
+
+ if (!get_leaf_frame_caller_enabled(sample))
+ return 0;
+
+ ret = unwind__get_entries(add_entry, &entries, thread, sample, 2);
+
+ if (ret || entries.length != 2)
+ return ret;
+
+ return callchain_param.order == ORDER_CALLER ?
+ entries.stack[0] : entries.stack[1];
+}
diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.h b/tools/perf/util/arm-frame-pointer-unwind-support.h
new file mode 100644
index 000000000000..16dc03fa9abe
--- /dev/null
+++ b/tools/perf/util/arm-frame-pointer-unwind-support.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread);
+
+#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 7f03ffa016b0..dfb72dbc0e2d 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -34,6 +34,7 @@
#include "bpf-event.h"
#include <internal/lib.h> // page_size
#include "cgroup.h"
+#include "arm-frame-pointer-unwind-support.h"

#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -2671,10 +2672,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
return err;
}

-static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
- struct thread *thread __maybe_unused)
+static u64 get_leaf_frame_caller(struct perf_sample *sample, struct thread *thread)
{
- return 0;
+ if (strncmp(thread->maps->machine->env->arch, "aarch64", 7) == 0)
+ return get_leaf_frame_caller_aarch64(sample, thread);
+ else
+ return 0;
}

static int thread__resolve_callchain_sample(struct thread *thread,
--
2.23.0

2021-03-05 00:52:16

by Alexandre Truong

[permalink] [raw]
Subject: [PATCH RESEND WITH CCs v3 3/4] perf tools: enable dwarf_callchain_users on aarch64

On arm64, enable dwarf_callchain_users which will be needed
to do a dwarf unwind in order to get the caller of the leaf frame.

Signed-off-by: Alexandre Truong <[email protected]>
Cc: John Garry <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Mathieu Poirier <[email protected]>
Cc: Leo Yan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Kemeng Shi <[email protected]>
Cc: Ian Rogers <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Jin Yao <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Suzuki K Poulose <[email protected]>
Cc: Al Grant <[email protected]>
Cc: James Clark <[email protected]>
Cc: Wilco Dijkstra <[email protected]>
---
tools/perf/builtin-report.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2a845d6cac09..93661a3eaeb1 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -405,6 +405,10 @@ static int report__setup_sample_type(struct report *rep)

callchain_param_setup(sample_type);

+ if (callchain_param.record_mode == CALLCHAIN_FP &&
+ strncmp(rep->session->header.env.arch, "aarch64", 7) == 0)
+ dwarf_callchain_users = true;
+
if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
"Please apply --call-graph lbr when recording.\n");
--
2.23.0

2021-03-05 00:53:03

by Alexandre Truong

[permalink] [raw]
Subject: [PATCH RESEND WITH CCs v3 2/4] perf tools: add a mechanism to inject stack frames

Add a mechanism for platforms to inject stack frames for the leaf
frame caller if there is enough information to determine a frame
is missing from dwarf or other post processing mechanisms.

Signed-off-by: Alexandre Truong <[email protected]>
Cc: John Garry <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Mathieu Poirier <[email protected]>
Cc: Leo Yan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Kemeng Shi <[email protected]>
Cc: Ian Rogers <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Jin Yao <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Suzuki K Poulose <[email protected]>
Cc: Al Grant <[email protected]>
Cc: James Clark <[email protected]>
Cc: Wilco Dijkstra <[email protected]>
---
tools/perf/util/machine.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index ab8a6b3e801d..7f03ffa016b0 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2671,6 +2671,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
return err;
}

+static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
+ struct thread *thread __maybe_unused)
+{
+ return 0;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
@@ -2687,6 +2693,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i, j, err, nr_entries;
int skip_idx = -1;
int first_call = 0;
+ u64 leaf_frame_caller;
+ int pos;

if (chain)
chain_nr = chain->nr;
@@ -2811,6 +2819,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
continue;
}

+ pos = callchain_param.order == ORDER_CALLEE ? 2 : chain_nr - 2;
+
+ if (i == pos) {
+ leaf_frame_caller = get_leaf_frame_caller(sample, thread);
+
+ if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, leaf_frame_caller,
+ false, NULL, NULL, 0);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ }
+
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, NULL, 0);
--
2.23.0

2021-03-05 11:57:09

by Leo Yan

[permalink] [raw]
Subject: Re: [PATCH RESEND WITH CCs v3 3/4] perf tools: enable dwarf_callchain_users on aarch64

Hi Alexandre,

On Thu, Mar 04, 2021 at 04:32:54PM +0000, Alexandre Truong wrote:
> On arm64, enable dwarf_callchain_users which will be needed
> to do a dwarf unwind in order to get the caller of the leaf frame.
>
> Signed-off-by: Alexandre Truong <[email protected]>
> Cc: John Garry <[email protected]>
> Cc: Will Deacon <[email protected]>
> Cc: Mathieu Poirier <[email protected]>
> Cc: Leo Yan <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Cc: Mark Rutland <[email protected]>
> Cc: Alexander Shishkin <[email protected]>
> Cc: Jiri Olsa <[email protected]>
> Cc: Namhyung Kim <[email protected]>
> Cc: Kemeng Shi <[email protected]>
> Cc: Ian Rogers <[email protected]>
> Cc: Andi Kleen <[email protected]>
> Cc: Kan Liang <[email protected]>
> Cc: Jin Yao <[email protected]>
> Cc: Adrian Hunter <[email protected]>
> Cc: Suzuki K Poulose <[email protected]>
> Cc: Al Grant <[email protected]>
> Cc: James Clark <[email protected]>
> Cc: Wilco Dijkstra <[email protected]>
> ---
> tools/perf/builtin-report.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 2a845d6cac09..93661a3eaeb1 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -405,6 +405,10 @@ static int report__setup_sample_type(struct report *rep)
>
> callchain_param_setup(sample_type);
>
> + if (callchain_param.record_mode == CALLCHAIN_FP &&
> + strncmp(rep->session->header.env.arch, "aarch64", 7) == 0)
> + dwarf_callchain_users = true;
> +

I don't have knowledge for dwarf or FP.

This patch is suspicious for me that since it only fixes the issue for
"perf report" command, but it cannot support "perf script".

I did a quick testing for "perf script" command with the test code from
patch 04, seems to me it cannot fix the fp omitting issue for
"perf script" command:

arm64_fp_test 11211 2282.355095: 176307 cycles:
aaaac2e40740 f2+0x10 (/root/arm64_fp_test)
aaaac2e4061c main+0xc (/root/arm64_fp_test)
ffff961fbd24 __libc_start_main+0xe4 (/usr/lib/aarch64-linux-gnu/libc-2.28.so)
aaaac2e4065c _start+0x34 (/root/arm64_fp_test)

Could you check for this? Thanks!

Leo

> if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
> ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
> "Please apply --call-graph lbr when recording.\n");
> --
> 2.23.0
>

2021-03-05 14:12:11

by Leo Yan

[permalink] [raw]
Subject: Re: [PATCH RESEND WITH CCs v3 3/4] perf tools: enable dwarf_callchain_users on aarch64

On Fri, Mar 05, 2021 at 07:51:20PM +0800, Leo Yan wrote:

[...]

> > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> > index 2a845d6cac09..93661a3eaeb1 100644
> > --- a/tools/perf/builtin-report.c
> > +++ b/tools/perf/builtin-report.c
> > @@ -405,6 +405,10 @@ static int report__setup_sample_type(struct report *rep)
> >
> > callchain_param_setup(sample_type);
> >
> > + if (callchain_param.record_mode == CALLCHAIN_FP &&
> > + strncmp(rep->session->header.env.arch, "aarch64", 7) == 0)
> > + dwarf_callchain_users = true;
> > +
>
> I don't have knowledge for dwarf or FP.
>
> This patch is suspicious for me that since it only fixes the issue for
> "perf report" command, but it cannot support "perf script".
>
> I did a quick testing for "perf script" command with the test code from
> patch 04, seems to me it cannot fix the fp omitting issue for
> "perf script" command:
>
> arm64_fp_test 11211 2282.355095: 176307 cycles:
> aaaac2e40740 f2+0x10 (/root/arm64_fp_test)
> aaaac2e4061c main+0xc (/root/arm64_fp_test)
> ffff961fbd24 __libc_start_main+0xe4 (/usr/lib/aarch64-linux-gnu/libc-2.28.so)
> aaaac2e4065c _start+0x34 (/root/arm64_fp_test)
>
> Could you check for this? Thanks!

Maybe we can consolidate the setting for the global variable
"dwarf_callchain_users" with below change; this can help us to cover
the tools for most cases. I used the below change to replact patch
03, "perf report" and "perf script" both can work well with it.

Please note, if you want to move forward with this way, it's better to
use a saperate patch for firstly refactoring the function
script__setup_sample_type() by using the general API
callchain_param_setup() to replace the duplicate code pieces for
callchain parameter setting up.

After that, you could apply the reset change for adding new parameter
"arch" for the function script__setup_sample_type().


diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2a845d6cac09..ca2e8c9096ea 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1090,7 +1090,8 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(*pevlist);
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type,
+ perf_env__arch((*pevlist)->env));
return 0;
}

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 5915f19cee55..c49212c135b2 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2250,7 +2250,8 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(evlist);
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type,
+ perf_env__arch((*pevlist)->env));

/* Enable fields for callchain entries */
if (symbol_conf.use_callchain &&
@@ -3309,16 +3310,8 @@ static void script__setup_sample_type(struct perf_script *script)
struct perf_session *session = script->session;
u64 sample_type = evlist__combined_sample_type(session->evlist);

- if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
- if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER)) {
- callchain_param.record_mode = CALLCHAIN_DWARF;
- dwarf_callchain_users = true;
- } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
- callchain_param.record_mode = CALLCHAIN_LBR;
- else
- callchain_param.record_mode = CALLCHAIN_FP;
- }
+ callchain_param_setup(sample_type,
+ perf_env__arch(session->machines.host.env));

if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 1b60985690bb..d9766b54cd1a 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
map__zput(node->ms.map);
}

-void callchain_param_setup(u64 sample_type)
+void callchain_param_setup(u64 sample_type, const char *arch)
{
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -1612,6 +1612,14 @@ void callchain_param_setup(u64 sample_type)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
+
+ /*
+ * Fixup for arm64 due to the frame pointer was omitted for the
+ * caller of the leaf frame.
+ */
+ if (callchain_param.record_mode == CALLCHAIN_FP &&
+ strncmp(arch, "arm64", 6) == 0)
+ dwarf_callchain_users = true;
}

static bool chain_match(struct callchain_list *base_chain,
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 77fba053c677..d95615daed73 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -300,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count);

-void callchain_param_setup(u64 sample_type);
+void callchain_param_setup(u64 sample_type, const char *arch);

bool callchain_cnode_matched(struct callchain_node *base_cnode,
struct callchain_node *pair_cnode);

2021-03-06 13:00:48

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH RESEND WITH CCs v3 4/4] perf tools: determine if LR is the return address

Em Fri, Mar 05, 2021 at 10:54:03AM +0200, James Clark escreveu:
> I've tested this patchset on a few different applications and have seen it significantly improve
> quality of frame pointer stacks on aarch64. For example with GDB 10 and default build options,
> 'bfd_calc_gnu_debuglink_crc32' is a leaf function, and its caller 'gdb_bfd_crc' is ommitted,
> but with the patchset it is included. I've also confirmed that this is correct from looking at
> the source code.
>
> Before:
>
> # Children Self Command Shared Object Symbol
> # ........ ........ ............... .......................... ...........
> #
> 34.55% 0.00% gdb-100 gdb-100 [.] _start
> 0.78%
> _start
> __libc_start_main
> main
> gdb_main
> captured_command_loop
> gdb_do_one_event
> check_async_event_handlers
> fetch_inferior_event
> inferior_event_handler
> do_all_inferior_continuations
> attach_post_wait
> post_create_inferior
> svr4_solib_create_inferior_hook
> solib_add
> solib_read_symbols
> symbol_file_add_with_addrs
> read_symbols
> elf_symfile_read
> find_separate_debug_file_by_debuglink[abi:cxx11]
> find_separate_debug_file
> separate_debug_file_exists
> gdb_bfd_crc
> bfd_calc_gnu_debuglink_crc32
>
> After:
>
> # Children Self Command Shared Object Symbol
> # ........ ........ ............... .......................... ...........
> #
> 34.55% 0.00% gdb-100 gdb-100 [.] _start
> 0.78%
> _start
> __libc_start_main
> main
> gdb_main
> captured_command_loop
> gdb_do_one_event
> check_async_event_handlers
> fetch_inferior_event
> inferior_event_handler
> do_all_inferior_continuations
> attach_post_wait
> post_create_inferior
> svr4_solib_create_inferior_hook
> solib_add
> solib_read_symbols
> symbol_file_add_with_addrs
> read_symbols
> elf_symfile_read
> find_separate_debug_file_by_debuglink[abi:cxx11]
> find_separate_debug_file
> separate_debug_file_exists
> get_file_crc <--------------------- leaf frame caller added
> bfd_calc_gnu_debuglink_crc32
>
> There is a question about whether the overhead of recording all the registers is acceptable, for
> filesize and time. We could make it a manual step, at the cost of not showing better frame pointer
> stacks by default.

Can someone quantify this, i.e. how much space per perf.data for a
typical scenario? But anyway, I'm applying it as is now, we can change
it if needed, its not like files with the extra registers won't be
valid if/when we decide not to collect it by default in the future.

If we decide to make this selectable, we should have it as a .perfconfig
knob as well, so that one can set it and change the default, etc.

- Arnaldo

> Tested-by: James Clark <[email protected]>
>
> On 04/03/2021 18:32, Alexandre Truong wrote:
> > On arm64 and frame pointer mode (e.g: perf record --callgraph fp),
> > use dwarf unwind info to check if the link register is the return
> > address in order to inject it to the frame pointer stack.
> >
> > Write the following application:
> >
> > int a = 10;
> >
> > void f2(void)
> > {
> > for (int i = 0; i < 1000000; i++)
> > a *= a;
> > }
> >
> > void f1()
> > {
> > for (int i = 0; i < 10; i++)
> > f2();
> > }
> >
> > int main (void)
> > {
> > f1();
> > return 0;
> > }
> >
> > with the following compilation flags:
> > gcc -fno-omit-frame-pointer -fno-inline -O2
> >
> > The compiler omits the frame pointer for f2 on arm. This is a problem
> > with any leaf call, for example an application with many different
> > calls to malloc() would always omit the calling frame, even if it
> > can be determined.
> >
> > ./perf record --call-graph fp ./a.out
> > ./perf report
> >
> > currently gives the following stack:
> >
> > 0xffffea52f361
> > _start
> > __libc_start_main
> > main
> > f2
> >
> > After this change, perf report correctly shows f1() calling f2(),
> > even though it was missing from the frame pointer unwind:
> >
> > ./perf report
> >
> > 0xffffea52f361
> > _start
> > __libc_start_main
> > main
> > f1
> > f2
> >
> > Signed-off-by: Alexandre Truong <[email protected]>
> > Cc: John Garry <[email protected]>
> > Cc: Will Deacon <[email protected]>
> > Cc: Mathieu Poirier <[email protected]>
> > Cc: Leo Yan <[email protected]>
> > Cc: Peter Zijlstra <[email protected]>
> > Cc: Ingo Molnar <[email protected]>
> > Cc: Arnaldo Carvalho de Melo <[email protected]>
> > Cc: Mark Rutland <[email protected]>
> > Cc: Alexander Shishkin <[email protected]>
> > Cc: Jiri Olsa <[email protected]>
> > Cc: Namhyung Kim <[email protected]>
> > Cc: Kemeng Shi <[email protected]>
> > Cc: Ian Rogers <[email protected]>
> > Cc: Andi Kleen <[email protected]>
> > Cc: Kan Liang <[email protected]>
> > Cc: Jin Yao <[email protected]>
> > Cc: Adrian Hunter <[email protected]>
> > Cc: Suzuki K Poulose <[email protected]>
> > Cc: Al Grant <[email protected]>
> > Cc: James Clark <[email protected]>
> > Cc: Wilco Dijkstra <[email protected]>
> > ---
> > tools/perf/util/Build | 1 +
> > .../util/arm-frame-pointer-unwind-support.c | 44 +++++++++++++++++++
> > .../util/arm-frame-pointer-unwind-support.h | 7 +++
> > tools/perf/util/machine.c | 9 ++--
> > 4 files changed, 58 insertions(+), 3 deletions(-)
> > create mode 100644 tools/perf/util/arm-frame-pointer-unwind-support.c
> > create mode 100644 tools/perf/util/arm-frame-pointer-unwind-support.h
> >
> > diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> > index 188521f34347..3b82cb992bce 100644
> > --- a/tools/perf/util/Build
> > +++ b/tools/perf/util/Build
> > @@ -1,3 +1,4 @@
> > +perf-y += arm-frame-pointer-unwind-support.o
> > perf-y += annotate.o
> > perf-y += block-info.o
> > perf-y += block-range.o
> > diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.c b/tools/perf/util/arm-frame-pointer-unwind-support.c
> > new file mode 100644
> > index 000000000000..964efd08e72e
> > --- /dev/null
> > +++ b/tools/perf/util/arm-frame-pointer-unwind-support.c
> > @@ -0,0 +1,44 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include "../arch/arm64/include/uapi/asm/perf_regs.h"
> > +#include "arch/arm64/include/perf_regs.h"
> > +#include "event.h"
> > +#include "arm-frame-pointer-unwind-support.h"
> > +#include "callchain.h"
> > +#include "unwind.h"
> > +
> > +struct entries {
> > + u64 stack[2];
> > + size_t length;
> > +};
> > +
> > +static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
> > +{
> > + return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
> > + && sample->user_regs.mask == PERF_REGS_MASK;
> > +}
> > +
> > +static int add_entry(struct unwind_entry *entry, void *arg)
> > +{
> > + struct entries *entries = arg;
> > +
> > + entries->stack[entries->length++] = entry->ip;
> > + return 0;
> > +}
> > +
> > +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread)
> > +{
> > + int ret;
> > +
> > + struct entries entries = {{0, 0}, 0};
> > +
> > + if (!get_leaf_frame_caller_enabled(sample))
> > + return 0;
> > +
> > + ret = unwind__get_entries(add_entry, &entries, thread, sample, 2);
> > +
> > + if (ret || entries.length != 2)
> > + return ret;
> > +
> > + return callchain_param.order == ORDER_CALLER ?
> > + entries.stack[0] : entries.stack[1];
> > +}
> > diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.h b/tools/perf/util/arm-frame-pointer-unwind-support.h
> > new file mode 100644
> > index 000000000000..16dc03fa9abe
> > --- /dev/null
> > +++ b/tools/perf/util/arm-frame-pointer-unwind-support.h
> > @@ -0,0 +1,7 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
> > +#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
> > +
> > +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread);
> > +
> > +#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
> > diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> > index 7f03ffa016b0..dfb72dbc0e2d 100644
> > --- a/tools/perf/util/machine.c
> > +++ b/tools/perf/util/machine.c
> > @@ -34,6 +34,7 @@
> > #include "bpf-event.h"
> > #include <internal/lib.h> // page_size
> > #include "cgroup.h"
> > +#include "arm-frame-pointer-unwind-support.h"
> >
> > #include <linux/ctype.h>
> > #include <symbol/kallsyms.h>
> > @@ -2671,10 +2672,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
> > return err;
> > }
> >
> > -static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
> > - struct thread *thread __maybe_unused)
> > +static u64 get_leaf_frame_caller(struct perf_sample *sample, struct thread *thread)
> > {
> > - return 0;
> > + if (strncmp(thread->maps->machine->env->arch, "aarch64", 7) == 0)
> > + return get_leaf_frame_caller_aarch64(sample, thread);
> > + else
> > + return 0;
> > }
> >
> > static int thread__resolve_callchain_sample(struct thread *thread,
> >

--

- Arnaldo

2021-03-06 19:17:19

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH RESEND WITH CCs v3 4/4] perf tools: determine if LR is the return address

Em Sat, Mar 06, 2021 at 09:55:32AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Fri, Mar 05, 2021 at 10:54:03AM +0200, James Clark escreveu:
> > I've tested this patchset on a few different applications and have seen it significantly improve
> > quality of frame pointer stacks on aarch64. For example with GDB 10 and default build options,
> > 'bfd_calc_gnu_debuglink_crc32' is a leaf function, and its caller 'gdb_bfd_crc' is ommitted,
> > but with the patchset it is included. I've also confirmed that this is correct from looking at
> > the source code.
> >
> > Before:
> >
> > # Children Self Command Shared Object Symbol
> > # ........ ........ ............... .......................... ...........
> > #
> > 34.55% 0.00% gdb-100 gdb-100 [.] _start
> > 0.78%
> > _start
> > __libc_start_main
> > main
> > gdb_main
> > captured_command_loop
> > gdb_do_one_event
> > check_async_event_handlers
> > fetch_inferior_event
> > inferior_event_handler
> > do_all_inferior_continuations
> > attach_post_wait
> > post_create_inferior
> > svr4_solib_create_inferior_hook
> > solib_add
> > solib_read_symbols
> > symbol_file_add_with_addrs
> > read_symbols
> > elf_symfile_read
> > find_separate_debug_file_by_debuglink[abi:cxx11]
> > find_separate_debug_file
> > separate_debug_file_exists
> > gdb_bfd_crc
> > bfd_calc_gnu_debuglink_crc32
> >
> > After:
> >
> > # Children Self Command Shared Object Symbol
> > # ........ ........ ............... .......................... ...........
> > #
> > 34.55% 0.00% gdb-100 gdb-100 [.] _start
> > 0.78%
> > _start
> > __libc_start_main
> > main
> > gdb_main
> > captured_command_loop
> > gdb_do_one_event
> > check_async_event_handlers
> > fetch_inferior_event
> > inferior_event_handler
> > do_all_inferior_continuations
> > attach_post_wait
> > post_create_inferior
> > svr4_solib_create_inferior_hook
> > solib_add
> > solib_read_symbols
> > symbol_file_add_with_addrs
> > read_symbols
> > elf_symfile_read
> > find_separate_debug_file_by_debuglink[abi:cxx11]
> > find_separate_debug_file
> > separate_debug_file_exists
> > get_file_crc <--------------------- leaf frame caller added
> > bfd_calc_gnu_debuglink_crc32
> >
> > There is a question about whether the overhead of recording all the registers is acceptable, for
> > filesize and time. We could make it a manual step, at the cost of not showing better frame pointer
> > stacks by default.
>
> Can someone quantify this, i.e. how much space per perf.data for a
> typical scenario? But anyway, I'm applying it as is now, we can change
> it if needed, its not like files with the extra registers won't be
> valid if/when we decide not to collect it by default in the future.
>
> If we decide to make this selectable, we should have it as a .perfconfig
> knob as well, so that one can set it and change the default, etc.

> > Tested-by: James Clark <[email protected]>


This is unconditionally asking for asm/perf_regs.h and it is not available
everywhere, so I think this has to be abstracted away, maybe using a weak
function that arm provides a replacement for?

A:Humm

+++ b/tools/perf/util/Build
@@ -1,3 +1,4 @@
+perf-y += arm-frame-pointer-unwind-support.o

Is this for doing cross-platform analysis? I.e. record a perf.data file
on arm64 and then do a perf-report on it on a x86_64 machine? Yeah, that
is expected to work, but then:

+++ b/tools/perf/util/arm-frame-pointer-unwind-support.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../arch/arm64/include/uapi/asm/perf_regs.h"
+#include "arch/arm64/include/perf_regs.h"


[acme@five perf]$ head -25 tools/perf/arch/arm64/include/perf_regs.h
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_PERF_REGS_H
#define ARCH_PERF_REGS_H

#include <stdlib.h>
#include <linux/types.h>
#include <asm/perf_regs.h>

void perf_regs_load(u64 *regs);

#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
#define PERF_REGS_MAX PERF_REG_ARM64_MAX
#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64

#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP

static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM64_X0:
return "x0";
case PERF_REG_ARM64_X1:
return "x1";
case PERF_REG_ARM64_X2:
[acme@five perf]$

Won't this get the wrong file when cross-building? See below.

- Arnaldo

[perfbuilder@five ~]$ time dm
Sat Mar 6 12:02:01 PM -03 2021
# export PERF_TARBALL=http://192.168.86.5/perf/perf-5.11.0.tar.xz
# dm
1 78.76 alpine:3.4 : Ok gcc (Alpine 5.3.0) 5.3.0 , clang version 3.8.0 (tags/RELEASE_380/final)
2 79.07 alpine:3.5 : Ok gcc (Alpine 6.2.1) 6.2.1 20160822 , clang version 3.8.1 (tags/RELEASE_381/final)
3 82.68 alpine:3.6 : Ok gcc (Alpine 6.3.0) 6.3.0 , clang version 4.0.0 (tags/RELEASE_400/final)
4 88.57 alpine:3.7 : Ok gcc (Alpine 6.4.0) 6.4.0 , Alpine clang version 5.0.0 (tags/RELEASE_500/final) (based on LLVM 5.0.0)
5 89.47 alpine:3.8 : Ok gcc (Alpine 6.4.0) 6.4.0 , Alpine clang version 5.0.1 (tags/RELEASE_501/final) (based on LLVM 5.0.1)
6 93.63 alpine:3.9 : Ok gcc (Alpine 8.3.0) 8.3.0 , Alpine clang version 5.0.1 (tags/RELEASE_502/final) (based on LLVM 5.0.1)
7 125.41 alpine:3.10 : Ok gcc (Alpine 8.3.0) 8.3.0 , Alpine clang version 8.0.0 (tags/RELEASE_800/final) (based on LLVM 8.0.0)
8 142.89 alpine:3.11 : Ok gcc (Alpine 9.3.0) 9.3.0 , Alpine clang version 9.0.0 (https://git.alpinelinux.org/aports f7f0d2c2b8bcd6a5843401a9a702029556492689) (based on LLVM 9.0.0)
9 126.52 alpine:3.12 : Ok gcc (Alpine 9.3.0) 9.3.0 , Alpine clang version 10.0.0 (https://gitlab.alpinelinux.org/alpine/aports.git 7445adce501f8473efdb93b17b5eaf2f1445ed4c)
10 135.80 alpine:3.13 : Ok gcc (Alpine 10.2.1_pre1) 10.2.1 20201203 , Alpine clang version 10.0.1
11 134.19 alpine:edge : Ok gcc (Alpine 10.2.1_pre1) 10.2.1 20201203 , Alpine clang version 10.0.1
12 78.12 alt:p8 : Ok x86_64-alt-linux-gcc (GCC) 5.3.1 20151207 (ALT p8 5.3.1-alt3.M80P.1) , clang version 3.8.0 (tags/RELEASE_380/final)
13 92.57 alt:p9 : Ok x86_64-alt-linux-gcc (GCC) 8.4.1 20200305 (ALT p9 8.4.1-alt0.p9.1) , clang version 10.0.0
14 93.72 alt:sisyphus : Ok x86_64-alt-linux-gcc (GCC) 10.2.1 20201125 (ALT Sisyphus 10.2.1-alt2) , clang version 10.0.1
15 75.10 amazonlinux:1 : Ok gcc (GCC) 7.2.1 20170915 (Red Hat 7.2.1-2) , clang version 3.6.2 (tags/RELEASE_362/final)
16 116.14 amazonlinux:2 : Ok gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-12) , clang version 7.0.1 (Amazon Linux 2 7.0.1-1.amzn2.0.2)
17 10.26 android-ndk:r12b-arm : FAIL gcc version 4.9.x 20150123 (prerelease) (GCC)
from util/arm-frame-pointer-unwind-support.c:3:
/git/linux/tools/arch/arm/include/uapi/asm/perf_regs.h:5:6: error: nested redefinition of 'enum perf_event_arm_regs'
enum perf_event_arm_regs {
^
/git/linux/tools/arch/arm/include/uapi/asm/perf_regs.h:5:6: error: redeclaration of 'enum perf_event_arm_regs'
In file included from util/arm-frame-pointer-unwind-support.c:2:0:
/git/linux/tools/include/../arch/arm64/include/uapi/asm/perf_regs.h:5:6: note: originally defined here
enum perf_event_arm_regs {
^
make[3]: *** [/git/linux/tools/build/Makefile.build:139: util] Error 2
18 10.71 android-ndk:r15c-arm : FAIL gcc version 4.9.x 20150123 (prerelease) (GCC)
from util/arm-frame-pointer-unwind-support.c:3:
/git/linux/tools/arch/arm/include/uapi/asm/perf_regs.h:5:6: error: nested redefinition of 'enum perf_event_arm_regs'
enum perf_event_arm_regs {
^
/git/linux/tools/arch/arm/include/uapi/asm/perf_regs.h:5:6: error: redeclaration of 'enum perf_event_arm_regs'
In file included from util/arm-frame-pointer-unwind-support.c:2:0:
/git/linux/tools/include/../arch/arm64/include/uapi/asm/perf_regs.h:5:6: note: originally defined here
enum perf_event_arm_regs {
^
19 28.97 centos:6 : Ok gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23)
20 34.56 centos:7 : Ok gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
21 110.26 centos:8 : Ok gcc (GCC) 8.3.1 20191121 (Red Hat 8.3.1-5) , clang version 10.0.1 (Red Hat 10.0.1-1.module_el8.3.0+467+cb298d5b)
22 67.12 clearlinux:latest : Ok gcc (Clear Linux OS for Intel Architecture) 10.2.1 20201217 releases/gcc-10.2.0-643-g7cbb07d2fc , clang version 10.0.1
23 87.08 debian:8 : Ok gcc (Debian 4.9.2-10+deb8u2) 4.9.2 , Debian clang version 3.5.0-10 (tags/RELEASE_350/final) (based on LLVM 3.5.0)
24 92.73 debian:9 : Ok gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516 , clang version 3.8.1-24 (tags/RELEASE_381/final)
25 86.91 debian:10 : Ok gcc (Debian 8.3.0-6) 8.3.0 , clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)
26 86.13 debian:experimental : Ok gcc (Debian 10.2.1-6) 10.2.1 20210110 , Debian clang version 11.0.1-2
27 36.91 debian:experimental-x-arm64 : Ok aarch64-linux-gnu-gcc (Debian 10.2.1-6) 10.2.1 20210110
28 9.08 debian:experimental-x-mips : FAIL gcc version 10.2.1 20201224 (Debian 10.2.1-3)
from builtin-diff.c:12:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
29 33.76 debian:experimental-x-mips64 : Ok mips64-linux-gnuabi64-gcc (Debian 10.2.1-3) 10.2.1 20201224
30 12.74 debian:experimental-x-mipsel : FAIL gcc version 10.2.1 20201224 (Debian 10.2.1-3)
from builtin-diff.c:12:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
In file included from util/perf_regs.h:30,
from util/event.h:15,
from util/branch.h:15,
from util/callchain.h:8,
from builtin-record.c:16:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
In file included from util/perf_regs.h:30,
from util/event.h:15,
from util/session.h:6,
from builtin-buildid-list.c:17:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
In file included from util/perf_regs.h:30,
from util/event.h:15,
from util/thread.h:16,
from builtin-sched.c:11:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
In file included from util/perf_regs.h:30,
from util/event.h:15,
from builtin-top.c:31:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
In file included from util/perf_regs.h:30,
from util/event.h:15,
from util/session.h:6,
from builtin-evlist.c:16:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
In file included from util/perf_regs.h:30,
from util/event.h:15,
from util/session.h:6,
from builtin-buildid-cache.c:24:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
In file included from util/perf_regs.h:30,
from util/event.h:15,
from builtin-stat.c:49:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
In file included from util/perf_regs.h:30,
from util/event.h:15,
from util/branch.h:15,
from builtin-report.c:24:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
In file included from util/perf_regs.h:30,
from util/event.h:15,
from builtin-annotate.c:24:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
In file included from util/perf_regs.h:30,
from util/event.h:15,
from util/thread.h:16,
from builtin-timechart.c:24:
/git/linux/tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
31 32.64 fedora:20 : Ok gcc (GCC) 4.8.3 20140911 (Red Hat 4.8.3-7)
32 35.41 fedora:22 : Ok gcc (GCC) 5.3.1 20160406 (Red Hat 5.3.1-6) , clang version 3.5.0 (tags/RELEASE_350/final)
33 77.83 fedora:23 : Ok gcc (GCC) 5.3.1 20160406 (Red Hat 5.3.1-6) , clang version 3.7.0 (tags/RELEASE_370/final)
34 96.28 fedora:24 : Ok gcc (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1) , clang version 3.8.1 (tags/RELEASE_381/final)
35 10.80 fedora:24-x-ARC-uClibc : FAIL gcc version 7.1.1 20170710 (ARCompact ISA Linux uClibc toolchain 2017.09-rc2)
In file included from util/arm-frame-pointer-unwind-support.c:3:0:
/git/linux/tools/perf/arch/arm64/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
#include <asm/perf_regs.h>
^~~~~~~~~~~~~~~~~
compilation terminated.
/git/linux/tools/build/Makefile.build:139: recipe for target 'util' failed
make[3]: *** [util] Error 2
36 98.38 fedora:25 : Ok gcc (GCC) 6.4.1 20170727 (Red Hat 6.4.1-1) , clang version 3.9.1 (tags/RELEASE_391/final)
37 111.11 fedora:26 : Ok gcc (GCC) 7.3.1 20180130 (Red Hat 7.3.1-2) , clang version 4.0.1 (tags/RELEASE_401/final)
38 111.55 fedora:27 : Ok gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-6) , clang version 5.0.2 (tags/RELEASE_502/final)
39 127.99 fedora:28 : Ok gcc (GCC) 8.3.1 20190223 (Red Hat 8.3.1-2) , clang version 6.0.1 (tags/RELEASE_601/final)
40 136.36 fedora:29 : Ok gcc (GCC) 8.3.1 20190223 (Red Hat 8.3.1-2) , clang version 7.0.1 (Fedora 7.0.1-6.fc29)
41 139.67 fedora:30 : Ok gcc (GCC) 9.3.1 20200408 (Red Hat 9.3.1-2) , clang version 8.0.0 (Fedora 8.0.0-3.fc30)
42 10.57 fedora:30-x-ARC-uClibc : FAIL gcc version 8.3.1 20190225 (ARCv2 ISA Linux uClibc toolchain 2019.03-rc1)
In file included from util/arm-frame-pointer-unwind-support.c:3:
/git/linux/tools/perf/arch/arm64/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
#include <asm/perf_regs.h>
^~~~~~~~~~~~~~~~~
compilation terminated.
make[3]: *** [/git/linux/tools/build/Makefile.build:139: util] Error 2
43 136.09 fedora:31 : Ok gcc (GCC) 9.3.1 20200408 (Red Hat 9.3.1-2) , clang version 9.0.1 (Fedora 9.0.1-4.fc31)
44 112.14 fedora:32 : Ok gcc (GCC) 10.2.1 20201125 (Red Hat 10.2.1-9) , clang version 10.0.1 (Fedora 10.0.1-3.fc32)
45 111.28 fedora:33 : Ok gcc (GCC) 10.2.1 20201125 (Red Hat 10.2.1-9) , clang version 11.0.0 (Fedora 11.0.0-2.fc33)
46 116.62 fedora:34 : Ok gcc (GCC) 11.0.0 20210225 (Red Hat 11.0.0-0) , clang version 12.0.0 (Fedora 12.0.0-0.1.rc1.fc34)
47 117.53 fedora:rawhide : Ok gcc (GCC) 11.0.0 20210210 (Red Hat 11.0.0-0) , clang version 12.0.0 (Fedora 12.0.0-0.1.rc1.fc35)
48 38.03 gentoo-stage3-amd64:latest : Ok gcc (Gentoo 9.3.0-r1 p3) 9.3.0
49 78.35 mageia:5 : Ok gcc (GCC) 4.9.2 , clang version 3.5.2 (tags/RELEASE_352/final)
50 97.37 mageia:6 : Ok gcc (Mageia 5.5.0-1.mga6) 5.5.0 , clang version 3.9.1 (tags/RELEASE_391/final)
51 116.36 manjaro:latest : Ok gcc (GCC) 10.2.0 , clang version 10.0.1
52 246.83 openmandriva:cooker : Ok gcc (GCC) 10.2.0 20200723 (OpenMandriva) , OpenMandriva 11.0.0-1 clang version 11.0.0 (/builddir/build/BUILD/llvm-project-llvmorg-11.0.0/clang 63e22714ac938c6b537bd958f70680d3331a2030)
53 134.08 opensuse:15.0 : Ok gcc (SUSE Linux) 7.4.1 20190905 [gcc-7-branch revision 275407] , clang version 5.0.1 (tags/RELEASE_501/final 312548)
54 142.32 opensuse:15.1 : Ok gcc (SUSE Linux) 7.5.0 , clang version 7.0.1 (tags/RELEASE_701/final 349238)
55 133.68 opensuse:15.2 : Ok gcc (SUSE Linux) 7.5.0 , clang version 9.0.1
56 146.85 opensuse:15.3 : Ok gcc (SUSE Linux) 7.5.0 , clang version 7.0.1 (tags/RELEASE_701/final 349238)
57 136.96 opensuse:42.3 : Ok gcc (SUSE Linux) 4.8.5 , clang version 3.8.0 (tags/RELEASE_380/final 262553)
58 122.69 opensuse:tumbleweed : Ok gcc (SUSE Linux) 10.2.1 20200825 [revision c0746a1beb1ba073c7981eb09f55b3d993b32e5c] , clang version 10.0.1
59 28.36 oraclelinux:6 : Ok gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23.0.1)
60 35.22 oraclelinux:7 : Ok gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44.0.3)
61 109.60 oraclelinux:8 : Ok gcc (GCC) 8.3.1 20191121 (Red Hat 8.3.1-5.0.1) , clang version 10.0.1 (Red Hat 10.0.1-1.0.1.module+el8.3.0+7827+89335dbf)
62 29.20 ubuntu:12.04 : Ok gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3 , Ubuntu clang version 3.0-6ubuntu3 (tags/RELEASE_30/final) (based on LLVM 3.0)
63 33.46 ubuntu:14.04 : Ok gcc (Ubuntu 4.8.4-2ubuntu1~14.04.4) 4.8.4
64 93.53 ubuntu:16.04 : Ok gcc (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609 , clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)
65 11.70 ubuntu:16.04-x-arm : FAIL gcc version 5.4.0 20160609 (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9)
from util/arm-frame-pointer-unwind-support.c:3:
/git/linux/tools/arch/arm/include/uapi/asm/perf_regs.h:5:6: error: nested redefinition of 'enum perf_event_arm_regs'
enum perf_event_arm_regs {
^
/git/linux/tools/arch/arm/include/uapi/asm/perf_regs.h:5:6: error: redeclaration of 'enum perf_event_arm_regs'
In file included from util/arm-frame-pointer-unwind-support.c:2:0:
/git/linux/tools/include/../arch/arm64/include/uapi/asm/perf_regs.h:5:6: note: originally defined here
enum perf_event_arm_regs {
^
/git/linux/tools/build/Makefile.build:139: recipe for target 'util' failed
make[3]: *** [util] Error 2
66 28.74 ubuntu:16.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
67 27.61 ubuntu:16.04-x-powerpc : Ok powerpc-linux-gnu-gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
68 28.82 ubuntu:16.04-x-powerpc64 : Ok powerpc64-linux-gnu-gcc (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
69 28.16 ubuntu:16.04-x-powerpc64el : Ok powerpc64le-linux-gnu-gcc (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
70 27.96 ubuntu:16.04-x-s390 : Ok s390x-linux-gnu-gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
71 101.40 ubuntu:18.04 : Ok gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 , clang version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)
72 11.69 ubuntu:18.04-x-arm : FAIL gcc version 7.5.0 (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04)
from util/arm-frame-pointer-unwind-support.c:3:
/git/linux/tools/arch/arm/include/uapi/asm/perf_regs.h:5:6: error: nested redefinition of 'enum perf_event_arm_regs'
enum perf_event_arm_regs {
^~~~~~~~~~~~~~~~~~~
/git/linux/tools/arch/arm/include/uapi/asm/perf_regs.h:5:6: error: redeclaration of 'enum perf_event_arm_regs'
In file included from util/arm-frame-pointer-unwind-support.c:2:0:
/git/linux/tools/include/../arch/arm64/include/uapi/asm/perf_regs.h:5:6: note: originally defined here
enum perf_event_arm_regs {
^~~~~~~~~~~~~~~~~~~
/git/linux/tools/build/Makefile.build:139: recipe for target 'util' failed
make[3]: *** [util] Error 2
73 29.62 ubuntu:18.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) 7.5.0
74 10.74 ubuntu:18.04-x-m68k : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)
In file included from util/arm-frame-pointer-unwind-support.c:3:0:
/git/linux/tools/perf/arch/arm64/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
#include <asm/perf_regs.h>
^~~~~~~~~~~~~~~~~
compilation terminated.
/git/linux/tools/build/Makefile.build:139: recipe for target 'util' failed
make[3]: *** [util] Error 2
75 29.37 ubuntu:18.04-x-powerpc : Ok powerpc-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
76 32.06 ubuntu:18.04-x-powerpc64 : Ok powerpc64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
77 31.37 ubuntu:18.04-x-powerpc64el : Ok powerpc64le-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
78 11.66 ubuntu:18.04-x-riscv64 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)
In file included from util/arm-frame-pointer-unwind-support.c:3:0:
/git/linux/tools/perf/arch/arm64/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
#include <asm/perf_regs.h>
^~~~~~~~~~~~~~~~~
compilation terminated.
/git/linux/tools/build/Makefile.build:139: recipe for target 'util' failed
make[3]: *** [util] Error 2
79 26.84 ubuntu:18.04-x-s390 : Ok s390x-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
80 11.50 ubuntu:18.04-x-sh4 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)
In file included from util/arm-frame-pointer-unwind-support.c:3:0:
/git/linux/tools/perf/arch/arm64/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
#include <asm/perf_regs.h>
^~~~~~~~~~~~~~~~~
compilation terminated.
/git/linux/tools/build/Makefile.build:139: recipe for target 'util' failed
make[3]: *** [util] Error 2
81 10.62 ubuntu:18.04-x-sparc64 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)
In file included from util/arm-frame-pointer-unwind-support.c:3:0:
/git/linux/tools/perf/arch/arm64/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
#include <asm/perf_regs.h>
^~~~~~~~~~~~~~~~~
compilation terminated.
/git/linux/tools/build/Makefile.build:139: recipe for target 'util' failed
make[3]: *** [util] Error 2
82 80.18 ubuntu:19.10 : Ok gcc (Ubuntu 9.2.1-9ubuntu2) 9.2.1 20191008 , clang version 8.0.1-3build1 (tags/RELEASE_801/final)
83 11.44 ubuntu:19.10-x-alpha : FAIL gcc version 9.2.1 20191008 (Ubuntu 9.2.1-9ubuntu1)
In file included from util/arm-frame-pointer-unwind-support.c:3:
/git/linux/tools/perf/arch/arm64/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
make[3]: *** [/git/linux/tools/build/Makefile.build:139: util] Error 2
84 11.67 ubuntu:19.10-x-hppa : FAIL gcc version 9.2.1 20191008 (Ubuntu 9.2.1-9ubuntu1)
In file included from util/arm-frame-pointer-unwind-support.c:3:
/git/linux/tools/perf/arch/arm64/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory
7 | #include <asm/perf_regs.h>
| ^~~~~~~~~~~~~~~~~
compilation terminated.
make[3]: *** [/git/linux/tools/build/Makefile.build:139: util] Error 2
85 83.72 ubuntu:20.04 : Ok gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0 , clang version 10.0.0-4ubuntu1
86 34.23 ubuntu:20.04-x-powerpc64el : Ok powerpc64le-linux-gnu-gcc (Ubuntu 10.2.0-5ubuntu1~20.04) 10.2.0
87 83.44 ubuntu:20.10 : Ok gcc (Ubuntu 10.2.0-13ubuntu1) 10.2.0 , Ubuntu clang version 11.0.0-2
88 79.47 ubuntu:21.04 : Ok gcc (Ubuntu 10.2.1-6ubuntu1) 10.2.1 20210110 , Ubuntu clang version 11.0.1-2
89 6489

real 109m25.234s
user 1m34.076s
sys 0m55.476s
[perfbuilder@five ~]$




2021-03-09 16:12:07

by Alexandre Truong

[permalink] [raw]
Subject: Re: [PATCH RESEND WITH CCs v3 3/4] perf tools: enable dwarf_callchain_users on aarch64

Hi Leo,

Thanks for your message, I'll apply your suggestion for the v4 of the patch.

Regards,

Alexandre

On 3/5/21 2:07 PM, Leo Yan wrote:
> On Fri, Mar 05, 2021 at 07:51:20PM +0800, Leo Yan wrote:
>
> [...]
>
>>> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
>>> index 2a845d6cac09..93661a3eaeb1 100644
>>> --- a/tools/perf/builtin-report.c
>>> +++ b/tools/perf/builtin-report.c
>>> @@ -405,6 +405,10 @@ static int report__setup_sample_type(struct report *rep)
>>>
>>> callchain_param_setup(sample_type);
>>>
>>> + if (callchain_param.record_mode == CALLCHAIN_FP &&
>>> + strncmp(rep->session->header.env.arch, "aarch64", 7) == 0)
>>> + dwarf_callchain_users = true;
>>> +
>>
>> I don't have knowledge for dwarf or FP.
>>
>> This patch is suspicious for me that since it only fixes the issue for
>> "perf report" command, but it cannot support "perf script".
>>
>> I did a quick testing for "perf script" command with the test code from
>> patch 04, seems to me it cannot fix the fp omitting issue for
>> "perf script" command:
>>
>> arm64_fp_test 11211 2282.355095: 176307 cycles:
>> aaaac2e40740 f2+0x10 (/root/arm64_fp_test)
>> aaaac2e4061c main+0xc (/root/arm64_fp_test)
>> ffff961fbd24 __libc_start_main+0xe4 (/usr/lib/aarch64-linux-gnu/libc-2.28.so)
>> aaaac2e4065c _start+0x34 (/root/arm64_fp_test)
>>
>> Could you check for this? Thanks!
>
> Maybe we can consolidate the setting for the global variable
> "dwarf_callchain_users" with below change; this can help us to cover
> the tools for most cases. I used the below change to replact patch
> 03, "perf report" and "perf script" both can work well with it.
>
> Please note, if you want to move forward with this way, it's better to
> use a saperate patch for firstly refactoring the function
> script__setup_sample_type() by using the general API
> callchain_param_setup() to replace the duplicate code pieces for
> callchain parameter setting up.
>
> After that, you could apply the reset change for adding new parameter
> "arch" for the function script__setup_sample_type().
>
>
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 2a845d6cac09..ca2e8c9096ea 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -1090,7 +1090,8 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
> * on events sample_type.
> */
> sample_type = evlist__combined_sample_type(*pevlist);
> - callchain_param_setup(sample_type);
> + callchain_param_setup(sample_type,
> + perf_env__arch((*pevlist)->env));
> return 0;
> }
>
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 5915f19cee55..c49212c135b2 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -2250,7 +2250,8 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
> * on events sample_type.
> */
> sample_type = evlist__combined_sample_type(evlist);
> - callchain_param_setup(sample_type);
> + callchain_param_setup(sample_type,
> + perf_env__arch((*pevlist)->env));
>
> /* Enable fields for callchain entries */
> if (symbol_conf.use_callchain &&
> @@ -3309,16 +3310,8 @@ static void script__setup_sample_type(struct perf_script *script)
> struct perf_session *session = script->session;
> u64 sample_type = evlist__combined_sample_type(session->evlist);
>
> - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
> - if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> - (sample_type & PERF_SAMPLE_STACK_USER)) {
> - callchain_param.record_mode = CALLCHAIN_DWARF;
> - dwarf_callchain_users = true;
> - } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
> - callchain_param.record_mode = CALLCHAIN_LBR;
> - else
> - callchain_param.record_mode = CALLCHAIN_FP;
> - }
> + callchain_param_setup(sample_type,
> + perf_env__arch(session->machines.host.env));
>
> if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
> pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 1b60985690bb..d9766b54cd1a 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
> map__zput(node->ms.map);
> }
>
> -void callchain_param_setup(u64 sample_type)
> +void callchain_param_setup(u64 sample_type, const char *arch)
> {
> if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
> if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> @@ -1612,6 +1612,14 @@ void callchain_param_setup(u64 sample_type)
> else
> callchain_param.record_mode = CALLCHAIN_FP;
> }
> +
> + /*
> + * Fixup for arm64 due to the frame pointer was omitted for the
> + * caller of the leaf frame.
> + */
> + if (callchain_param.record_mode == CALLCHAIN_FP &&
> + strncmp(arch, "arm64", 6) == 0)
> + dwarf_callchain_users = true;
> }
>
> static bool chain_match(struct callchain_list *base_chain,
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 77fba053c677..d95615daed73 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -300,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
> u64 *branch_count, u64 *predicted_count,
> u64 *abort_count, u64 *cycles_count);
>
> -void callchain_param_setup(u64 sample_type);
> +void callchain_param_setup(u64 sample_type, const char *arch);
>
> bool callchain_cnode_matched(struct callchain_node *base_cnode,
> struct callchain_node *pair_cnode);
>
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.