2021-02-10 13:24:47

by Alexandre Truong

[permalink] [raw]
Subject: [PATCH v2 1/4] perf tools: record aarch64 registers automatically

On arm64, automatically record all the registers if the frame pointer
mode is on. They will be used to do a dwarf unwind to find the caller
of the leaf frame if the frame pointer was omitted.

Signed-off-by: Alexandre Truong <[email protected]>
---
tools/perf/arch/arm64/util/machine.c | 7 +++++++
tools/perf/builtin-record.c | 7 +++++++
tools/perf/util/callchain.h | 2 ++
3 files changed, 16 insertions(+)

diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index d41b27e781d3..8a8bab5f4e68 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -4,6 +4,8 @@
#include <string.h>
#include "debug.h"
#include "symbol.h"
+#include "callchain.h"
+#include "record.h"

/* On arm64, kernel text segment start at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
@@ -25,3 +27,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end = c->start;
pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
}
+
+void arch__add_leaf_frame_record_opts(struct record_opts *opts)
+{
+ opts->sample_user_regs = arch__user_reg_mask();
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7bb10e9863bd..a5161f54b838 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2243,6 +2243,10 @@ static int record__parse_mmap_pages(const struct option *opt,
return ret;
}

+void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
+{
+}
+
static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
@@ -2810,6 +2814,9 @@ int cmd_record(int argc, const char **argv)
/* Enable ignoring missing threads when -u/-p option is defined. */
rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;

+ if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
+ arch__add_leaf_frame_record_opts(&rec->opts);
+
err = -ENOMEM;
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 5824134f983b..77fba053c677 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
}
#endif

+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
char *callchain_node__scnprintf_value(struct callchain_node *node,
--
2.23.0


2021-02-10 13:25:17

by Alexandre Truong

[permalink] [raw]
Subject: [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames

Add a mechanism for platforms to inject stack frames for the leaf
frame caller if there is enough information to determine a frame
is missing from dwarf or other post processing mechanisms.

Signed-off-by: Alexandre Truong <[email protected]>
---
tools/perf/util/machine.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 522ea3236bcc..40082d70eec1 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2671,6 +2671,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
return err;
}

+static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
+ struct thread *thread __maybe_unused)
+{
+ return 0;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
@@ -2687,6 +2693,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i, j, err, nr_entries;
int skip_idx = -1;
int first_call = 0;
+ u64 leaf_frame_caller;
+ int pos;

if (chain)
chain_nr = chain->nr;
@@ -2811,6 +2819,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
continue;
}

+ pos = callchain_param.order == ORDER_CALLEE ? 2 : chain_nr - 2;
+
+ if (i == pos) {
+ leaf_frame_caller = get_leaf_frame_caller(sample, thread);
+
+ if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, leaf_frame_caller,
+ false, NULL, NULL, 0);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ }
+
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, NULL, 0);
--
2.23.0

2021-02-10 13:25:25

by Alexandre Truong

[permalink] [raw]
Subject: [PATCH v2 3/4] perf tools: enable dwarf_callchain_users on aarch64

On arm64, enable dwarf_callchain_users which will be needed
to do a dwarf unwind in order to get the caller of the leaf frame.

Signed-off-by: Alexandre Truong <[email protected]>
---
tools/perf/builtin-report.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2a845d6cac09..93661a3eaeb1 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -405,6 +405,10 @@ static int report__setup_sample_type(struct report *rep)

callchain_param_setup(sample_type);

+ if (callchain_param.record_mode == CALLCHAIN_FP &&
+ strncmp(rep->session->header.env.arch, "aarch64", 7) == 0)
+ dwarf_callchain_users = true;
+
if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
"Please apply --call-graph lbr when recording.\n");
--
2.23.0