AMD Last Branch Record Extension Version 2 (LbrExtV2) provides branch
speculation information and the perf UAPI is extended to provide this in a
generic manner. Update the tools headers and perf tool to be able to show
this additional information.
The LbrExtV2 enablement and UAPI changes can be found at:
https://lore.kernel.org/all/[email protected]/
Sandipan Das (4):
tools headers uapi: Sync perf_event.h with kernel sources
tools headers x86: Sync msr-index.h with kernel sources
perf script: Show branch speculation info
perf session: Show branch speculation info in raw dump
tools/arch/x86/include/asm/msr-index.h | 5 +++++
tools/include/uapi/linux/perf_event.h | 15 ++++++++++++++-
tools/perf/builtin-script.c | 5 +++--
tools/perf/util/branch.c | 15 +++++++++++++++
tools/perf/util/branch.h | 5 ++++-
tools/perf/util/evsel.c | 9 ++++++---
tools/perf/util/session.c | 5 +++--
7 files changed, 50 insertions(+), 9 deletions(-)
--
2.34.1
Show the branch speculation info if provided by the branch recording
hardware feature. This can be useful for optimizing code further.
E.g.
$ perf record -j any,u ./test_branch
$ perf report --dump-raw-trace
Before:
[...]
8380958377610 0x40b178 [0x1b0]: PERF_RECORD_SAMPLE(IP, 0x2): 7952/7952: 0x4f851a period: 48973 addr: 0
... branch stack: nr:16
..... 0: 00000000004b52fd -> 00000000004f82c0 0 cycles P 0
..... 1: ffffffff8220137c -> 00000000004b52f0 0 cycles M 0
..... 2: 000000000041d1c4 -> 00000000004b52f0 0 cycles P 0
..... 3: 00000000004e7ead -> 000000000041d1b0 0 cycles M 0
..... 4: 00000000004e7f91 -> 00000000004e7ead 0 cycles P 0
..... 5: 00000000004e7ea8 -> 00000000004e7f70 0 cycles P 0
..... 6: 00000000004e7e52 -> 00000000004e7e98 0 cycles M 0
..... 7: 00000000004e7e1f -> 00000000004e7e40 0 cycles M 0
..... 8: 00000000004e7f60 -> 00000000004e7df0 0 cycles P 0
..... 9: 00000000004e7f58 -> 00000000004e7f60 0 cycles M 0
..... 10: 000000000041d85d -> 00000000004e7f50 0 cycles P 0
..... 11: 000000000043306a -> 000000000041d840 0 cycles P 0
..... 12: ffffffff8220137c -> 0000000000433040 0 cycles M 0
..... 13: 000000000041e4a1 -> 0000000000433040 0 cycles P 0
..... 14: ffffffff8220137c -> 000000000041e490 0 cycles M 0
..... 15: 000000000041d89b -> 000000000041e487 0 cycles P 0
... thread: test_branch:7952
...... dso: /data/sandipan/test_branch
[...]
After:
[...]
8380958377610 0x40b178 [0x1b0]: PERF_RECORD_SAMPLE(IP, 0x2): 7952/7952: 0x4f851a period: 48973 addr: 0
... branch stack: nr:16
..... 0: 00000000004b52fd -> 00000000004f82c0 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 1: ffffffff8220137c -> 00000000004b52f0 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 2: 000000000041d1c4 -> 00000000004b52f0 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 3: 00000000004e7ead -> 000000000041d1b0 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 4: 00000000004e7f91 -> 00000000004e7ead 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 5: 00000000004e7ea8 -> 00000000004e7f70 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 6: 00000000004e7e52 -> 00000000004e7e98 0 cycles M 0 SPEC_CORRECT_PATH
..... 7: 00000000004e7e1f -> 00000000004e7e40 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 8: 00000000004e7f60 -> 00000000004e7df0 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 9: 00000000004e7f58 -> 00000000004e7f60 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 10: 000000000041d85d -> 00000000004e7f50 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 11: 000000000043306a -> 000000000041d840 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 12: ffffffff8220137c -> 0000000000433040 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 13: 000000000041e4a1 -> 0000000000433040 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 14: ffffffff8220137c -> 000000000041e490 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 15: 000000000041d89b -> 000000000041e487 0 cycles P 0 NON_SPEC_CORRECT_PATH
... thread: test_branch:7952
...... dso: /data/sandipan/test_branch
[...]
Signed-off-by: Sandipan Das <[email protected]>
---
tools/perf/util/session.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 98e16659a149..39ba4a21aa42 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1172,7 +1172,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
struct branch_entry *e = &entries[i];
if (!callstack) {
- printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s\n",
+ printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s %s\n",
i, e->from, e->to,
(unsigned short)e->flags.cycles,
e->flags.mispred ? "M" : " ",
@@ -1180,7 +1180,8 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
e->flags.abort ? "A" : " ",
e->flags.in_tx ? "T" : " ",
(unsigned)e->flags.reserved,
- e->flags.type ? branch_type_name(e->flags.type) : "");
+ e->flags.type ? branch_type_name(e->flags.type) : "",
+ e->flags.spec ? branch_spec_desc(e->flags.spec) : "");
} else {
if (i == 0) {
printf("..... %2"PRIu64": %016" PRIx64 "\n"
--
2.34.1
Show the branch speculation info if provided by the branch recording
hardware feature. This can be useful for optimizing code further.
The speculation info is appended to the end of the list of fields so any
existing tools that use "/" as a delimiter for access fields via an index
remain unaffected. Also show "-" instead of "N/A" when speculation info
is unavailable because "/" is used as the field separator.
E.g.
$ perf record -j any,u,save_type ./test_branch
$ perf script --fields brstacksym
Before:
[...]
check_match+0x60/strcmp+0x0/P/-/-/0/CALL
do_lookup_x+0x3c5/check_match+0x0/P/-/-/0/CALL
[...]
After:
[...]
check_match+0x60/strcmp+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH
do_lookup_x+0x3c5/check_match+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH
[...]
Signed-off-by: Sandipan Das <[email protected]>
---
tools/perf/builtin-script.c | 5 +++--
tools/perf/util/branch.c | 15 +++++++++++++++
tools/perf/util/branch.h | 5 ++++-
tools/perf/util/evsel.c | 9 ++++++---
4 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ac19fee62d8e..d294ac804244 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -872,12 +872,13 @@ mispred_str(struct branch_entry *br)
static int print_bstack_flags(FILE *fp, struct branch_entry *br)
{
- return fprintf(fp, "/%c/%c/%c/%d/%s ",
+ return fprintf(fp, "/%c/%c/%c/%d/%s/%s ",
mispred_str(br),
br->flags.in_tx ? 'X' : '-',
br->flags.abort ? 'A' : '-',
br->flags.cycles,
- br->flags.type ? branch_type_name(br->flags.type) : "-");
+ br->flags.type ? branch_type_name(br->flags.type) : "-",
+ br->flags.spec ? branch_spec_desc(br->flags.spec) : "-");
}
static int perf_sample__fprintf_brstack(struct perf_sample *sample,
diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c
index a9a909db8cc7..29d89425a011 100644
--- a/tools/perf/util/branch.c
+++ b/tools/perf/util/branch.c
@@ -146,3 +146,18 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size)
return printed;
}
+
+const char *branch_spec_desc(int spec)
+{
+ const char *branch_spec_outcomes[PERF_BR_SPEC_MAX] = {
+ "N/A",
+ "SPEC_WRONG_PATH",
+ "NON_SPEC_CORRECT_PATH",
+ "SPEC_CORRECT_PATH",
+ };
+
+ if (spec >= 0 && spec < PERF_BR_SPEC_MAX)
+ return branch_spec_outcomes[spec];
+
+ return NULL;
+}
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 17b2ccc61094..8563b88b8281 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -24,7 +24,8 @@ struct branch_flags {
u64 abort:1;
u64 cycles:16;
u64 type:4;
- u64 reserved:40;
+ u64 spec:2;
+ u64 reserved:38;
};
};
};
@@ -85,4 +86,6 @@ const char *branch_type_name(int type);
void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
+const char *branch_spec_desc(int spec);
+
#endif /* _PERF_BRANCH_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4852089e1d79..e47cf16bef0a 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2313,7 +2313,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
* abort:1 //transaction abort
* cycles:16 //cycle count to last branch
* type:4 //branch type
- * reserved:40
+ * spec:2 //branch speculation info
+ * reserved:38
* }
* }
*
@@ -2332,7 +2333,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
new_val |= bitfield_swap(value, 3, 1);
new_val |= bitfield_swap(value, 4, 16);
new_val |= bitfield_swap(value, 20, 4);
- new_val |= bitfield_swap(value, 24, 40);
+ new_val |= bitfield_swap(value, 24, 2);
+ new_val |= bitfield_swap(value, 26, 38);
} else {
new_val = bitfield_swap(value, 63, 1);
new_val |= bitfield_swap(value, 62, 1);
@@ -2340,7 +2342,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
new_val |= bitfield_swap(value, 60, 1);
new_val |= bitfield_swap(value, 44, 16);
new_val |= bitfield_swap(value, 40, 4);
- new_val |= bitfield_swap(value, 0, 40);
+ new_val |= bitfield_swap(value, 38, 2);
+ new_val |= bitfield_swap(value, 0, 38);
}
return new_val;
--
2.34.1
Sync perf_event.h with the kernel sources by adding the new "spec" bitfield
to branch entries.
Signed-off-by: Sandipan Das <[email protected]>
---
tools/include/uapi/linux/perf_event.h | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 4653834f078f..3afda2dc84dd 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -256,6 +256,17 @@ enum {
PERF_BR_MAX,
};
+/*
+ * Common branch speculation outcome classification
+ */
+enum {
+ PERF_BR_SPEC_NA = 0, /* Not available */
+ PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
+ PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
+ PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
+ PERF_BR_SPEC_MAX,
+};
+
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\
@@ -1360,6 +1371,7 @@ union perf_mem_data_src {
* abort: aborting a hardware transaction
* cycles: cycles from last branch (or 0 if not supported)
* type: branch type
+ * spec: branch speculation info (or 0 if not supported)
*/
struct perf_branch_entry {
__u64 from;
@@ -1370,7 +1382,8 @@ struct perf_branch_entry {
abort:1, /* transaction abort */
cycles:16, /* cycle count to last branch */
type:4, /* branch type */
- reserved:40;
+ spec:2, /* branch speculation info */
+ reserved:38;
};
union perf_sample_weight {
--
2.34.1