2015-08-19 08:39:11

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v1 0/4] perf: improve script and record for iregs and brstack

This short series of patches improves perf record and perf script support
for interrupted machine state and branch stack.

this makes it easier to postprocess the data and narrow down the volume
and limit the overhead of capturing interrupt machine state registers.
For some analysis, only a subset of the registers is useful.

Changes:
- Make --intr-regs accept register names to limit volume of data collected
- Make perf script print interrupted machine register values with -F iregs
- Make perf script print branch stack content with -F brstack

$ perf record --intr-regs=\?
available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15
...

$ perf record --intr-regs=ax,bx,cx,dx,si ....

$ perf script -F ip,iregs
40afc2 AX:0x6c5770 BX:0x1e CX:0x5f4d80a DX:0x101010101010101 SI:0x1

$ perf script -F ip,brstack ....


Stephane Eranian (4):
perf script: enable printing of interrupted machine state
perf/x86: add list of register names
perf record: add ability to name registers to record
perf script: enable printing of branch stack

tools/perf/Documentation/perf-record.txt | 6 ++-
tools/perf/Documentation/perf-script.txt | 2 +-
tools/perf/arch/x86/util/Build | 1 +
tools/perf/arch/x86/util/perf_regs.c | 31 ++++++++++++++
tools/perf/builtin-record.c | 7 +++-
tools/perf/builtin-script.c | 52 ++++++++++++++++++++++-
tools/perf/perf.h | 2 +-
tools/perf/util/Build | 1 +
tools/perf/util/evsel.c | 2 +-
tools/perf/util/parse-regs-options.c | 71 ++++++++++++++++++++++++++++++++
tools/perf/util/parse-regs-options.h | 5 +++
tools/perf/util/perf_regs.h | 7 ++++
12 files changed, 180 insertions(+), 7 deletions(-)
create mode 100644 tools/perf/arch/x86/util/perf_regs.c
create mode 100644 tools/perf/util/parse-regs-options.c
create mode 100644 tools/perf/util/parse-regs-options.h

--
1.9.1


2015-08-19 08:39:15

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v1 1/4] perf script: enable printing of interrupted machine state

This patch adds the output of the interrupted machine state (iregs)
to perf script. It presents them as NAME:VALUE so this is easy to
parse during post processing.

To capture the interrupted machine state:
$ perf record -I ....

to display iregs, use the -F option:

$ perf script -F ip,iregs
40afc2 AX:0x6c5770 BX:0x1e CX:0x5f4d80a DX:0x101010101010101 SI:0x1

Signed-off-by: Stephane Eranian <[email protected]>
---
tools/perf/Documentation/perf-script.txt | 2 +-
tools/perf/builtin-script.c | 31 ++++++++++++++++++++++++++++++-
2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 8e9be1f..4b0acf4 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,7 +116,7 @@ OPTIONS
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
- srcline, period, flags.
+ srcline, period, iregs, flags.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 7b376d2..3fed110 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -6,6 +6,7 @@
#include "util/exec_cmd.h"
#include "util/header.h"
#include "util/parse-options.h"
+#include "util/perf_regs.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/symbol.h"
@@ -46,6 +47,7 @@ enum perf_output_field {
PERF_OUTPUT_SYMOFFSET = 1U << 11,
PERF_OUTPUT_SRCLINE = 1U << 12,
PERF_OUTPUT_PERIOD = 1U << 13,
+ PERF_OUTPUT_IREGS = 1U << 14,
};

struct output_option {
@@ -66,6 +68,7 @@ struct output_option {
{.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
{.str = "period", .field = PERF_OUTPUT_PERIOD},
+ {.str = "iregs", .field = PERF_OUTPUT_IREGS},
};

/* default set to maintain compatibility with current format */
@@ -255,6 +258,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
PERF_OUTPUT_PERIOD))
return -EINVAL;

+ if (PRINT_FIELD(IREGS) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
+ PERF_OUTPUT_IREGS))
+ return -EINVAL;
+
return 0;
}

@@ -352,6 +360,24 @@ static int perf_session__check_output_opt(struct perf_session *session)
return 0;
}

+static void print_sample_iregs(union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct thread *thread __maybe_unused,
+ struct perf_event_attr *attr)
+{
+ struct regs_dump *regs = &sample->intr_regs;
+ uint64_t mask = attr->sample_regs_intr;
+ unsigned i = 0, r;
+
+ if (!regs)
+ return;
+
+ for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+ u64 val = regs->regs[i++];
+ printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+ }
+}
+
static void print_sample_start(struct perf_sample *sample,
struct thread *thread,
struct perf_evsel *evsel)
@@ -525,6 +551,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
PERF_MAX_STACK_DEPTH);
}

+ if (PRINT_FIELD(IREGS))
+ print_sample_iregs(event, sample, thread, attr);
+
printf("\n");
}

@@ -1627,7 +1656,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
- "addr,symoff,period,flags", parse_output_fields),
+ "addr,symoff,period,iregs,flags", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
--
1.9.1

2015-08-19 08:40:27

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v1 2/4] perf/x86: add list of register names

This patch adds a way to locate a register identifier (PERF_X86_REG_*)
based on its name, e.g., AX.

This will be used by a subsequent patch to improved flexibility of
perf record.

Signed-off-by: Stephane Eranian <[email protected]>
---
tools/perf/arch/x86/util/Build | 1 +
tools/perf/arch/x86/util/perf_regs.c | 31 +++++++++++++++++++++++++++++++
tools/perf/util/perf_regs.h | 7 +++++++
3 files changed, 39 insertions(+)
create mode 100644 tools/perf/arch/x86/util/perf_regs.c

diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index cfbccc4..6981b19 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -1,6 +1,7 @@
libperf-y += header.o
libperf-y += tsc.o
libperf-y += kvm-stat.o
+libperf-y += perf_regs.o

libperf-$(CONFIG_DWARF) += dwarf-regs.o

diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
new file mode 100644
index 0000000..3c75faf
--- /dev/null
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -0,0 +1,31 @@
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+#define REG(n, b) { .name = #n, .mask = 1ULL << (b) }
+#define REG_END { .name = NULL }
+const struct sample_reg sample_reg_masks[] = {
+ REG(AX, PERF_REG_X86_AX),
+ REG(BX, PERF_REG_X86_BX),
+ REG(CX, PERF_REG_X86_CX),
+ REG(DX, PERF_REG_X86_DX),
+ REG(SI, PERF_REG_X86_SI),
+ REG(DI, PERF_REG_X86_DI),
+ REG(BP, PERF_REG_X86_BP),
+ REG(SP, PERF_REG_X86_SP),
+ REG(IP, PERF_REG_X86_IP),
+ REG(FLAGS, PERF_REG_X86_FLAGS),
+ REG(CS, PERF_REG_X86_CS),
+ REG(SS, PERF_REG_X86_SS),
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ REG(R8, PERF_REG_X86_R8),
+ REG(R9, PERF_REG_X86_R9),
+ REG(R10, PERF_REG_X86_R10),
+ REG(R11, PERF_REG_X86_R11),
+ REG(R12, PERF_REG_X86_R12),
+ REG(R13, PERF_REG_X86_R13),
+ REG(R14, PERF_REG_X86_R14),
+ REG(R15, PERF_REG_X86_R15),
+#endif
+ REG_END
+};
+
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index 980dbf7..92c1fff 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -5,6 +5,13 @@

struct regs_dump;

+struct sample_reg {
+ const char *name;
+ uint64_t mask;
+};
+
+extern const struct sample_reg sample_reg_masks[];
+
#ifdef HAVE_PERF_REGS_SUPPORT
#include <perf_regs.h>

--
1.9.1

2015-08-19 08:39:54

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v1 3/4] perf record: add ability to name registers to record

This patch modifies the -I/--int-regs option to enablepassing the name
of the registers to sample on interrupt. Registers can be specified
by their symbolic names. For instance on x86, --intr-regs=ax,si.

The motivation is to reduce the size of the perf.data file and the
overhead of sampling by only collecting the registers useful to
a specific analysis. For instance, for value profiling, sampling
only the registers used to passed arguements to functions.

With no parameter, the --intr-regs still records all possible
registers based on the architecture.

To name registers, it is necessary to use the long form of the
option, i.e., --intr-regs:

$ perf record --intr-regs=si,di,r8,r9 .....

To record any possible registers:
$ perf record -I .....
$ perf report --intr-regs ...

To display the register, one can use perf report -D

To list the available registers:
$ perf record --intr-regs=\?
available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15

Signed-off-by: Stephane Eranian <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 6 ++-
tools/perf/builtin-record.c | 7 +++-
tools/perf/perf.h | 2 +-
tools/perf/util/Build | 1 +
tools/perf/util/evsel.c | 2 +-
tools/perf/util/parse-regs-options.c | 71 ++++++++++++++++++++++++++++++++
tools/perf/util/parse-regs-options.h | 5 +++
7 files changed, 89 insertions(+), 5 deletions(-)
create mode 100644 tools/perf/util/parse-regs-options.c
create mode 100644 tools/perf/util/parse-regs-options.h

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 347a273..2e9ce77 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -276,7 +276,11 @@ filter out the startup phase of the program, which is often very different.
--intr-regs::
Capture machine state (registers) at interrupt, i.e., on counter overflows for
each sample. List of captured registers depends on the architecture. This option
-is off by default.
+is off by default. It is possible to select the registers to sample using their
+symbolic names, e.g. on x86, ax, si. To list the available registers use
+--intr-regs=\?. To name registers, pass a comma separated list such as
+--intr-regs=ax,bx. The list of register is architecture dependent.
+

--running-time::
Record running and enabled time for read events (:S)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 25cf6b4..5d9f9f4 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -27,8 +27,10 @@
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/data.h"
+#include "util/perf_regs.h"
#include "util/auxtrace.h"
#include "util/parse-branch-options.h"
+#include "util/parse-regs-options.h"

#include <unistd.h>
#include <sched.h>
@@ -1069,8 +1071,9 @@ struct option __record_options[] = {
"sample transaction flags (special events only)"),
OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
"use per-thread mmaps"),
- OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
- "Sample machine registers on interrupt"),
+ OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
+ "sample selected machine registers on interrupt,"
+ " use -I ? to list register names", parse_regs),
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
"Record running/enabled time of read (:S) events"),
OPT_CALLBACK('k', "clockid", &record.opts,
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cccb4cf..90129ac 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -54,7 +54,6 @@ struct record_opts {
bool sample_time_set;
bool callgraph_set;
bool period;
- bool sample_intr_regs;
bool running_time;
bool full_auxtrace;
bool auxtrace_snapshot_mode;
@@ -64,6 +63,7 @@ struct record_opts {
unsigned int auxtrace_mmap_pages;
unsigned int user_freq;
u64 branch_stack;
+ u64 sample_intr_regs;
u64 default_interval;
u64 user_interval;
size_t auxtrace_snapshot_size;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 1ce0adc..0811d17 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -79,6 +79,7 @@ libperf-y += cloexec.o
libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-y += parse-branch-options.o
+libperf-y += parse-regs-options.o

libperf-$(CONFIG_LIBELF) += symbol-elf.o
libperf-$(CONFIG_LIBELF) += probe-file.o
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 3b23767..08d8498 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -786,7 +786,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
perf_evsel__config_callgraph(evsel, opts, &callchain_param);

if (opts->sample_intr_regs) {
- attr->sample_regs_intr = PERF_REGS_MASK;
+ attr->sample_regs_intr = opts->sample_intr_regs;
perf_evsel__set_sample_bit(evsel, REGS_INTR);
}

diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
new file mode 100644
index 0000000..4f2c1c2
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.c
@@ -0,0 +1,71 @@
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/parse-options.h"
+#include "util/parse-regs-options.h"
+
+int
+parse_regs(const struct option *opt, const char *str, int unset)
+{
+ uint64_t *mode = (uint64_t *)opt->value;
+ const struct sample_reg *r;
+ char *s, *os = NULL, *p;
+ int ret = -1;
+
+ if (unset)
+ return 0;
+
+ /*
+ * cannot set it twice
+ */
+ if (*mode)
+ return -1;
+
+ /* str may be NULL in case no arg is passed to -I */
+ if (str) {
+ /* because str is read-only */
+ s = os = strdup(str);
+ if (!s)
+ return -1;
+
+ for (;;) {
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ if (!strcmp(s, "?")) {
+ fprintf(stderr, "available registers: ");
+ for (r = sample_reg_masks; r->name; r++) {
+ fprintf(stderr, "%s ", r->name);
+ }
+ fputc('\n', stderr);
+ /* just printing available regs */
+ return -1;
+ }
+ for (r = sample_reg_masks; r->name; r++) {
+ if (!strcasecmp(s, r->name))
+ break;
+ }
+ if (!r->name) {
+ ui__warning("unknown register %s,"
+ " check man page\n", s);
+ goto error;
+ }
+
+ *mode |= r->mask;
+
+ if (!p)
+ break;
+
+ s = p + 1;
+ }
+ }
+ ret = 0;
+
+ /* default to all possible regs */
+ if (*mode == 0)
+ *mode = PERF_REGS_MASK;
+error:
+ free(os);
+ return ret;
+}
diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h
new file mode 100644
index 0000000..7d762b1
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.h
@@ -0,0 +1,5 @@
+#ifndef _PERF_PARSE_REGS_OPTIONS_H
+#define _PERF_PARSE_REGS_OPTIONS_H 1
+struct option;
+int parse_regs(const struct option *opt, const char *str, int unset);
+#endif /* _PERF_PARSE_REGS_OPTIONS_H */
--
1.9.1

2015-08-19 08:39:26

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v1 4/4] perf script: enable printing of branch stack

This patch improves perf script by enabling printing of the
branch stack via the 'brstack' argument to the field selection
option -F. The option is off by default and operates only if the
perf.data file has branch stack content.

The branches are printed in to/from pairs. the most recent branch
is printed first. The number of branch entries vary based on the
underlying hardware are filtering used.

Signed-off-by: Stephane Eranian <[email protected]>
---
tools/perf/Documentation/perf-script.txt | 2 +-
tools/perf/builtin-script.c | 23 ++++++++++++++++++++++-
2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 4b0acf4..fd95384 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,7 +116,7 @@ OPTIONS
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
- srcline, period, iregs, flags.
+ srcline, period, iregs, brstack, flags.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3fed110..9ac7140 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -48,6 +48,7 @@ enum perf_output_field {
PERF_OUTPUT_SRCLINE = 1U << 12,
PERF_OUTPUT_PERIOD = 1U << 13,
PERF_OUTPUT_IREGS = 1U << 14,
+ PERF_OUTPUT_BRSTACK = 1U << 15,
};

struct output_option {
@@ -69,6 +70,7 @@ struct output_option {
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
{.str = "period", .field = PERF_OUTPUT_PERIOD},
{.str = "iregs", .field = PERF_OUTPUT_IREGS},
+ {.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
};

/* default set to maintain compatibility with current format */
@@ -419,6 +421,22 @@ static void print_sample_start(struct perf_sample *sample,
}
}

+static void print_sample_brstack(union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct thread *thread __maybe_unused,
+ struct perf_event_attr *attr __maybe_unused)
+{
+ struct branch_stack *br = sample->branch_stack;
+ u64 i;
+
+ if (!(br && br->nr))
+ return;
+
+ for (i = 0; i < br->nr; i++) {
+ printf(" 0x%"PRIx64"/0x%"PRIx64" ", br->entries[i].from, br->entries[i].to);
+ }
+}
+
static void print_sample_addr(union perf_event *event,
struct perf_sample *sample,
struct thread *thread,
@@ -554,6 +572,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
if (PRINT_FIELD(IREGS))
print_sample_iregs(event, sample, thread, attr);

+ if (PRINT_FIELD(BRSTACK))
+ print_sample_brstack(event, sample, thread, attr);
+
printf("\n");
}

@@ -1656,7 +1677,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
- "addr,symoff,period,iregs,flags", parse_output_fields),
+ "addr,symoff,period,iregs,brstack,flags", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
--
1.9.1

2015-08-19 22:12:20

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH v1 4/4] perf script: enable printing of branch stack

On Wed, Aug 19, 2015 at 10:38:26AM +0200, Stephane Eranian wrote:
> This patch improves perf script by enabling printing of the
> branch stack via the 'brstack' argument to the field selection
> option -F. The option is off by default and operates only if the
> perf.data file has branch stack content.

Thanks that's very useful. I wanted that several times.
>
> +static void print_sample_brstack(union perf_event *event __maybe_unused,
> + struct perf_sample *sample,
> + struct thread *thread __maybe_unused,
> + struct perf_event_attr *attr __maybe_unused)
> +{
> + struct branch_stack *br = sample->branch_stack;
> + u64 i;
> +
> + if (!(br && br->nr))
> + return;
> +
> + for (i = 0; i < br->nr; i++) {
> + printf(" 0x%"PRIx64"/0x%"PRIx64" ", br->entries[i].from, br->entries[i].to);

Should print the various flags too. Also tip just added cycles information.

Also it would be good to resolve the addresses to symbol + offset
(and perhaps have support for srcline too).


-Andi
--
[email protected] -- Speaking for myself only

2015-08-19 22:20:48

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH v1 4/4] perf script: enable printing of branch stack

On Wed, Aug 19, 2015 at 3:11 PM, Andi Kleen <[email protected]> wrote:
> On Wed, Aug 19, 2015 at 10:38:26AM +0200, Stephane Eranian wrote:
>> This patch improves perf script by enabling printing of the
>> branch stack via the 'brstack' argument to the field selection
>> option -F. The option is off by default and operates only if the
>> perf.data file has branch stack content.
>
> Thanks that's very useful. I wanted that several times.
>>
>> +static void print_sample_brstack(union perf_event *event __maybe_unused,
>> + struct perf_sample *sample,
>> + struct thread *thread __maybe_unused,
>> + struct perf_event_attr *attr __maybe_unused)
>> +{
>> + struct branch_stack *br = sample->branch_stack;
>> + u64 i;
>> +
>> + if (!(br && br->nr))
>> + return;
>> +
>> + for (i = 0; i < br->nr; i++) {
>> + printf(" 0x%"PRIx64"/0x%"PRIx64" ", br->entries[i].from, br->entries[i].to);
>
> Should print the various flags too. Also tip just added cycles information.
>
Ok, I will add that in v2.

> Also it would be good to resolve the addresses to symbol + offset

But then, that becomes harder to post-process or even read with long
C++ signatures.

> (and perhaps have support for srcline too).
>
>
> -Andi
> --
> [email protected] -- Speaking for myself only

2015-08-19 23:32:24

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH v1 4/4] perf script: enable printing of branch stack

> > Also it would be good to resolve the addresses to symbol + offset
>
> But then, that becomes harder to post-process or even read with long
> C++ signatures.

Ok. Maybe make it optional?

For many usages resolved addresses will be much more useful than raw.
Normall call stacks also do that.

-Andi

--
[email protected] -- Speaking for myself only

2015-08-20 11:18:43

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v1 3/4] perf record: add ability to name registers to record

On Wed, Aug 19, 2015 at 10:38:25AM +0200, Stephane Eranian wrote:

SNIP

> diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
> new file mode 100644
> index 0000000..4f2c1c2
> --- /dev/null
> +++ b/tools/perf/util/parse-regs-options.c
> @@ -0,0 +1,71 @@
> +#include "perf.h"
> +#include "util/util.h"
> +#include "util/debug.h"
> +#include "util/parse-options.h"
> +#include "util/parse-regs-options.h"
> +
> +int
> +parse_regs(const struct option *opt, const char *str, int unset)
> +{
> + uint64_t *mode = (uint64_t *)opt->value;
> + const struct sample_reg *r;
> + char *s, *os = NULL, *p;
> + int ret = -1;
> +
> + if (unset)
> + return 0;
> +
> + /*
> + * cannot set it twice
> + */
> + if (*mode)
> + return -1;
> +
> + /* str may be NULL in case no arg is passed to -I */
> + if (str) {
> + /* because str is read-only */
> + s = os = strdup(str);
> + if (!s)
> + return -1;
> +
> + for (;;) {
> + p = strchr(s, ',');
> + if (p)
> + *p = '\0';
> +
> + if (!strcmp(s, "?")) {
> + fprintf(stderr, "available registers: ");
> + for (r = sample_reg_masks; r->name; r++) {
> + fprintf(stderr, "%s ", r->name);
> + }
> + fputc('\n', stderr);
> + /* just printing available regs */
> + return -1;
> + }
> + for (r = sample_reg_masks; r->name; r++) {

the new 'I' is arch dependent, would this fail to link on
other archs besides x86_64 ? haven't tried though

jirka

2015-08-20 20:02:31

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH v1 3/4] perf record: add ability to name registers to record

On Thu, Aug 20, 2015 at 4:18 AM, Jiri Olsa <[email protected]> wrote:
> On Wed, Aug 19, 2015 at 10:38:25AM +0200, Stephane Eranian wrote:
>
> SNIP
>
>> diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
>> new file mode 100644
>> index 0000000..4f2c1c2
>> --- /dev/null
>> +++ b/tools/perf/util/parse-regs-options.c
>> @@ -0,0 +1,71 @@
>> +#include "perf.h"
>> +#include "util/util.h"
>> +#include "util/debug.h"
>> +#include "util/parse-options.h"
>> +#include "util/parse-regs-options.h"
>> +
>> +int
>> +parse_regs(const struct option *opt, const char *str, int unset)
>> +{
>> + uint64_t *mode = (uint64_t *)opt->value;
>> + const struct sample_reg *r;
>> + char *s, *os = NULL, *p;
>> + int ret = -1;
>> +
>> + if (unset)
>> + return 0;
>> +
>> + /*
>> + * cannot set it twice
>> + */
>> + if (*mode)
>> + return -1;
>> +
>> + /* str may be NULL in case no arg is passed to -I */
>> + if (str) {
>> + /* because str is read-only */
>> + s = os = strdup(str);
>> + if (!s)
>> + return -1;
>> +
>> + for (;;) {
>> + p = strchr(s, ',');
>> + if (p)
>> + *p = '\0';
>> +
>> + if (!strcmp(s, "?")) {
>> + fprintf(stderr, "available registers: ");
>> + for (r = sample_reg_masks; r->name; r++) {
>> + fprintf(stderr, "%s ", r->name);
>> + }
>> + fputc('\n', stderr);
>> + /* just printing available regs */
>> + return -1;
>> + }
>> + for (r = sample_reg_masks; r->name; r++) {
>
> the new 'I' is arch dependent, would this fail to link on
> other archs besides x86_64 ? haven't tried though
>
It should not if there is a weak symbol for the sample_regs_masks[].
I think I forgot to add that. Will do in v2.