From: Kan Liang <[email protected]>
Currently, the additional information of a branch entry is stored in a
u64 space. With more and more information added, the space is running
out. For example, the information of occurrences of events will be added
for each branch.
Two places were suggested to append the counters.
https://lore.kernel.org/lkml/[email protected]/
One place is right after the flags of each branch entry. It changes the
existing struct perf_branch_entry. The later ARCH specific
implementation has to be really careful to consistently pick
the right struct.
The other place is right after the entire struct perf_branch_stack.
The disadvantage is that the pointer of the extra space has to be
recorded. The common interface perf_sample_save_brstack() has to be
updated.
The latter is much straightforward, and should be easily understood and
maintained. It is implemented in the patch.
Add a new branch sample type, PERF_SAMPLE_BRANCH_COUNTERS, to indicate
the event which is recorded in the branch info.
The "u64 counters" may store the occurrences of several events. The
information regarding the number of events/counters and the width of
each counter should be exposed via sysfs as a reference for the perf
tool. Define the branch_counter_nr and branch_counter_width ABI here.
The support will be implemented later in the Intel-specific patch.
Suggested-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
Cc: Sandipan Das <[email protected]>
Cc: Ravi Bangoria <[email protected]>
Cc: Athira Rajeev <[email protected]>
---
No changes since V4
.../testing/sysfs-bus-event_source-devices-caps | 6 ++++++
arch/powerpc/perf/core-book3s.c | 2 +-
arch/x86/events/amd/core.c | 2 +-
arch/x86/events/core.c | 2 +-
arch/x86/events/intel/core.c | 2 +-
arch/x86/events/intel/ds.c | 4 ++--
include/linux/perf_event.h | 17 ++++++++++++++++-
include/uapi/linux/perf_event.h | 10 ++++++++++
kernel/events/core.c | 8 ++++++++
9 files changed, 46 insertions(+), 7 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
index 8757dcf41c08..a5f506f7d481 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
@@ -16,3 +16,9 @@ Description:
Example output in powerpc:
grep . /sys/bus/event_source/devices/cpu/caps/*
/sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9
+
+ The "branch_counter_nr" in the supported platform exposes the
+ maximum number of counters which can be shown in the u64 counters
+ of PERF_SAMPLE_BRANCH_COUNTERS, while the "branch_counter_width"
+ exposes the width of each counter. Both of them can be used by
+ the perf tool to parse the logged counters in each branch.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 8c1f7def596e..3c14596bbfaf 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2313,7 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(event, cpuhw);
- perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
+ perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL);
}
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index e24976593a29..4ee6390b45c9 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -940,7 +940,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
continue;
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 40ad1425ffa2..40c9af124128 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1702,7 +1702,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a08f794a0e79..41a164764a84 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3047,7 +3047,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index dc8204afec76..1d309e97a629 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1755,7 +1755,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
setup_pebs_time(event, data, pebs->tsc);
if (has_branch_stack(event))
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
}
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1912,7 +1912,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
}
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0367d748fae0..7897ef066027 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1139,6 +1139,10 @@ static inline bool branch_sample_priv(const struct perf_event *event)
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
}
+static inline bool branch_sample_counters(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
+}
struct perf_sample_data {
/*
@@ -1173,6 +1177,7 @@ struct perf_sample_data {
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
+ u64 *br_stack_cntr;
union perf_sample_weight weight;
union perf_mem_data_src data_src;
u64 txn;
@@ -1250,7 +1255,8 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
static inline void perf_sample_save_brstack(struct perf_sample_data *data,
struct perf_event *event,
- struct perf_branch_stack *brs)
+ struct perf_branch_stack *brs,
+ u64 *brs_cntr)
{
int size = sizeof(u64); /* nr */
@@ -1258,7 +1264,16 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
size += sizeof(u64);
size += brs->nr * sizeof(struct perf_branch_entry);
+ /*
+ * The extension space for counters is appended after the
+ * struct perf_branch_stack. It is used to store the occurrences
+ * of events of each branch.
+ */
+ if (brs_cntr)
+ size += brs->nr * sizeof(u64);
+
data->br_stack = brs;
+ data->br_stack_cntr = brs_cntr;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 39c6a250dd1b..4461f380425b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */
+ PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -235,6 +237,8 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
@@ -982,6 +986,12 @@ enum perf_event_type {
* { u64 nr;
* { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
* { u64 from, to, flags } lbr[nr];
+ * #
+ * # The format of the counters is decided by the
+ * # "branch_counter_nr" and "branch_counter_width",
+ * # which are defined in the ABI.
+ * #
+ * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
* } && PERF_SAMPLE_BRANCH_STACK
*
* { u64 abi; # enum perf_sample_regs_abi
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3eb26c2c6e65..d27ffd80ed67 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7341,6 +7341,14 @@ void perf_output_sample(struct perf_output_handle *handle,
if (branch_sample_hw_index(event))
perf_output_put(handle, data->br_stack->hw_idx);
perf_output_copy(handle, data->br_stack->entries, size);
+ /*
+ * Add the extension space which is appended
+ * right after the struct perf_branch_stack.
+ */
+ if (data->br_stack_cntr) {
+ size = data->br_stack->nr * sizeof(u64);
+ perf_output_copy(handle, data->br_stack_cntr, size);
+ }
} else {
/*
* we always store at least the value of nr
--
2.35.1
From: Kan Liang <[email protected]>
Currently, branch_sample_type !=0 is used to check whether a branch
stack setup is required. But it doesn't check the sample type,
unnecessary branch stack setup may be done for a counting event. E.g.,
perf record -e "{branch-instructions,branch-misses}:S" -j any
Also, the event only with the new PERF_SAMPLE_BRANCH_COUNTERS branch
sample type may not require a branch stack setup either.
Add a new flag NEEDS_BRANCH_STACK to indicate whether the event requires
a branch stack setup. Replace the needs_branch_stack() by checking the
new flag.
The counting event check is implemented here. The later patch will take
the new PERF_SAMPLE_BRANCH_COUNTERS into account.
Signed-off-by: Kan Liang <[email protected]>
---
No changes since V4
arch/x86/events/intel/core.c | 14 +++++++++++---
arch/x86/events/perf_event_flags.h | 1 +
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 41a164764a84..a99449c0d77c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
perf_report_aux_output_id(event, idx);
}
+static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
+{
+ return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+}
+
static void intel_pmu_del_event(struct perf_event *event)
{
- if (needs_branch_stack(event))
+ if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_del(event);
if (event->attr.precise_ip)
intel_pmu_pebs_del(event);
@@ -2820,7 +2825,7 @@ static void intel_pmu_add_event(struct perf_event *event)
{
if (event->attr.precise_ip)
intel_pmu_pebs_add(event);
- if (needs_branch_stack(event))
+ if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_add(event);
}
@@ -3897,7 +3902,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
x86_pmu.pebs_aliases(event);
}
- if (needs_branch_stack(event)) {
+ if (needs_branch_stack(event) && is_sampling_event(event))
+ event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+
+ if (intel_pmu_needs_branch_stack(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
return ret;
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index 1dc19b9b4426..a1685981c520 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -20,3 +20,4 @@ PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
+PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
--
2.35.1
From: Kan Liang <[email protected]>
Add a helper function to check call stack sample type.
The later patch will invoke the function in several places.
Signed-off-by: Kan Liang <[email protected]>
---
No changes since V4
arch/x86/events/core.c | 2 +-
include/linux/perf_event.h | 5 +++++
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 40c9af124128..09050641ce5d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -601,7 +601,7 @@ int x86_pmu_hw_config(struct perf_event *event)
}
}
- if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
+ if (branch_sample_call_stack(event))
event->attach_state |= PERF_ATTACH_TASK_DATA;
/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7897ef066027..ac1a59c1f252 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1144,6 +1144,11 @@ static inline bool branch_sample_counters(const struct perf_event *event)
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
}
+static inline bool branch_sample_call_stack(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
+}
+
struct perf_sample_data {
/*
* Fields set by perf_sample_data_init() unconditionally,
--
2.35.1
From: Kan Liang <[email protected]>
Some attrs and is_visible implementations are rather far away from one
another which makes the whole thing hard to interpret.
There are only two attribute groups which have both .attrs and
.is_visible, group_default and group_caps_lbr. Move them together.
No functional changes.
Suggested-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
---
New patch
arch/x86/events/intel/core.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a99449c0d77c..584b58df7bf6 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -5540,6 +5540,12 @@ static struct attribute *lbr_attrs[] = {
NULL
};
+static umode_t
+lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+}
+
static char pmu_name_str[30];
static ssize_t pmu_name_show(struct device *cdev,
@@ -5566,6 +5572,15 @@ static struct attribute *intel_pmu_attrs[] = {
NULL,
};
+static umode_t
+default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ if (attr == &dev_attr_allow_tsx_force_abort.attr)
+ return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
+
+ return attr->mode;
+}
+
static umode_t
tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
@@ -5587,27 +5602,12 @@ mem_is_visible(struct kobject *kobj, struct attribute *attr, int i)
return pebs_is_visible(kobj, attr, i);
}
-static umode_t
-lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
-{
- return x86_pmu.lbr_nr ? attr->mode : 0;
-}
-
static umode_t
exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
return x86_pmu.version >= 2 ? attr->mode : 0;
}
-static umode_t
-default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
-{
- if (attr == &dev_attr_allow_tsx_force_abort.attr)
- return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
-
- return attr->mode;
-}
-
static struct attribute_group group_events_td = {
.name = "events",
};
--
2.35.1
From: Kan Liang <[email protected]>
Sync the new sample type for the branch counters feature.
Signed-off-by: Kan Liang <[email protected]>
---
Changes since V4
- Add PERF_BRANCH_ENTRY_INFO_BITS_MAX
tools/include/uapi/linux/perf_event.h | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 39c6a250dd1b..3a64499b0f5d 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */
+ PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -235,6 +237,8 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
@@ -982,6 +986,12 @@ enum perf_event_type {
* { u64 nr;
* { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
* { u64 from, to, flags } lbr[nr];
+ * #
+ * # The format of the counters is decided by the
+ * # "branch_counter_nr" and "branch_counter_width",
+ * # which are defined in the ABI.
+ * #
+ * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
* } && PERF_SAMPLE_BRANCH_STACK
*
* { u64 abi; # enum perf_sample_regs_abi
@@ -1427,6 +1437,9 @@ struct perf_branch_entry {
reserved:31;
};
+/* Size of used info bits in struct perf_branch_entry */
+#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33
+
union perf_sample_weight {
__u64 full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
--
2.35.1
From: Kan Liang <[email protected]>
To support the branch counters feature, the information of the maximum
number of supported counters and the width of the counters is exposed
in the sysfs caps folder. The perf tool can use the information to parse
the logged counters in each branch.
Store the information in the perf_env for later usage.
Signed-off-by: Kan Liang <[email protected]>
---
No changes since V4
tools/perf/util/env.h | 5 +++++
tools/perf/util/header.c | 18 +++++++++++++++---
2 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 4566c51f2fd9..48d7f8759a2a 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -46,6 +46,9 @@ struct hybrid_node {
struct pmu_caps {
int nr_caps;
unsigned int max_branches;
+ unsigned int br_cntr_nr;
+ unsigned int br_cntr_width;
+
char **caps;
char *pmu_name;
};
@@ -62,6 +65,8 @@ struct perf_env {
unsigned long long total_mem;
unsigned int msr_pmu_type;
unsigned int max_branches;
+ unsigned int br_cntr_nr;
+ unsigned int br_cntr_width;
int kernel_is_64_bit;
int nr_cmdline;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index d812e1e371a7..9664062ba835 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3256,7 +3256,9 @@ static int process_compressed(struct feat_fd *ff,
}
static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps,
- char ***caps, unsigned int *max_branches)
+ char ***caps, unsigned int *max_branches,
+ unsigned int *br_cntr_nr,
+ unsigned int *br_cntr_width)
{
char *name, *value, *ptr;
u32 nr_pmu_caps, i;
@@ -3291,6 +3293,12 @@ static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps,
if (!strcmp(name, "branches"))
*max_branches = atoi(value);
+ if (!strcmp(name, "branch_counter_nr"))
+ *br_cntr_nr = atoi(value);
+
+ if (!strcmp(name, "branch_counter_width"))
+ *br_cntr_width = atoi(value);
+
free(value);
free(name);
}
@@ -3315,7 +3323,9 @@ static int process_cpu_pmu_caps(struct feat_fd *ff,
{
int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
&ff->ph->env.cpu_pmu_caps,
- &ff->ph->env.max_branches);
+ &ff->ph->env.max_branches,
+ &ff->ph->env.br_cntr_nr,
+ &ff->ph->env.br_cntr_width);
if (!ret && !ff->ph->env.cpu_pmu_caps)
pr_debug("cpu pmu capabilities not available\n");
@@ -3344,7 +3354,9 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
for (i = 0; i < nr_pmu; i++) {
ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps,
&pmu_caps[i].caps,
- &pmu_caps[i].max_branches);
+ &pmu_caps[i].max_branches,
+ &pmu_caps[i].br_cntr_nr,
+ &pmu_caps[i].br_cntr_width);
if (ret)
goto err;
--
2.35.1
From: Kan Liang <[email protected]>
Add a new branch filter, "counter", for the branch counter option. It is
used to mark the events which should be logged in the branch. If it is
applied with the -j option, the counters of all the events should be
logged in the branch. If the legacy kernel doesn't support the new
branch sample type, switching off the branch counter filter.
The stored counter values in each branch are displayed right after the
regular branch stack information via perf report -D.
Usage examples:
perf record -e "{branch-instructions,branch-misses}:S" -j any,counter
Only the first event, branch-instructions, collect the LBR. Both
branch-instructions and branch-misses are marked as logged events.
The occurrences information of them can be found in the branch stack
extension space of each branch.
perf record -e "{cpu/branch-instructions,branch_type=any/,
cpu/branch-misses,branch_type=counter/}"
Only the first event, branch-instructions, collect the LBR. Only the
branch-misses event is marked as a logged event.
Signed-off-by: Kan Liang <[email protected]>
---
No changes since V4
tools/perf/Documentation/perf-record.txt | 4 +++
tools/perf/util/evsel.c | 31 ++++++++++++++++++++++-
tools/perf/util/evsel.h | 1 +
tools/perf/util/parse-branch-options.c | 1 +
tools/perf/util/perf_event_attr_fprintf.c | 1 +
tools/perf/util/sample.h | 1 +
tools/perf/util/session.c | 15 +++++++++--
7 files changed, 51 insertions(+), 3 deletions(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index d5217be012d7..b6afe7cc948d 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -442,6 +442,10 @@ following filters are defined:
4th-Gen Xeon+ server), the save branch type is unconditionally enabled
when the taken branch stack sampling is enabled.
- priv: save privilege state during sampling in case binary is not available later
+ - counter: save occurrences of the event since the last branch entry. Currently, the
+ feature is only supported by a newer CPU, e.g., Intel Sierra Forest and
+ later platforms. An error out is expected if it's used on the unsupported
+ kernel or CPUs.
+
The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a8a5ff87cc1f..58a9b8c82790 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1831,6 +1831,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
static void evsel__disable_missing_features(struct evsel *evsel)
{
+ if (perf_missing_features.branch_counters)
+ evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS;
if (perf_missing_features.read_lost)
evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
if (perf_missing_features.weight_struct) {
@@ -1884,7 +1886,12 @@ bool evsel__detect_missing_features(struct evsel *evsel)
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.read_lost &&
+ if (!perf_missing_features.branch_counters &&
+ (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) {
+ perf_missing_features.branch_counters = true;
+ pr_debug2("switching off branch counters support\n");
+ return true;
+ } else if (!perf_missing_features.read_lost &&
(evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
perf_missing_features.read_lost = true;
pr_debug2("switching off PERF_FORMAT_LOST support\n");
@@ -2344,6 +2351,18 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
return new_val;
}
+static inline bool evsel__has_branch_counters(const struct evsel *evsel)
+{
+ struct evsel *cur, *leader = evsel__leader(evsel);
+
+ evlist__for_each_entry(evsel->evlist, cur) {
+ if ((leader == evsel__leader(cur)) &&
+ (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
+ return true;
+ }
+ return false;
+}
+
int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
struct perf_sample *data)
{
@@ -2577,6 +2596,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
OVERFLOW_CHECK(array, sz, max_size);
array = (void *)array + sz;
+
+ if (evsel__has_branch_counters(evsel)) {
+ OVERFLOW_CHECK_u64(array);
+
+ data->branch_stack_cntr = (u64 *)array;
+ sz = data->branch_stack->nr * sizeof(u64);
+
+ OVERFLOW_CHECK(array, sz, max_size);
+ array = (void *)array + sz;
+ }
}
if (type & PERF_SAMPLE_REGS_USER) {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 848534ec74fa..85f24c986392 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -191,6 +191,7 @@ struct perf_missing_features {
bool code_page_size;
bool weight_struct;
bool read_lost;
+ bool branch_counters;
};
extern struct perf_missing_features perf_missing_features;
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
index fd67d204d720..f7f7aff3d85a 100644
--- a/tools/perf/util/parse-branch-options.c
+++ b/tools/perf/util/parse-branch-options.c
@@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = {
BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX),
BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
+ BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS),
BRANCH_END
};
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 2247991451f3..8f04d3b7f3ec 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -55,6 +55,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
+ bit_name(COUNTERS),
{ .name = NULL, }
};
#undef bit_name
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index c92ad0f51ecd..70b2c3135555 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -113,6 +113,7 @@ struct perf_sample {
void *raw_data;
struct ip_callchain *callchain;
struct branch_stack *branch_stack;
+ u64 *branch_stack_cntr;
struct regs_dump user_regs;
struct regs_dump intr_regs;
struct stack_dump user_stack;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1e9aa8ed15b6..4a094ab0362b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1150,9 +1150,13 @@ static void callchain__printf(struct evsel *evsel,
i, callchain->ips[i]);
}
-static void branch_stack__printf(struct perf_sample *sample, bool callstack)
+static void branch_stack__printf(struct perf_sample *sample,
+ struct evsel *evsel)
{
struct branch_entry *entries = perf_sample__branch_entries(sample);
+ bool callstack = evsel__has_branch_callstack(evsel);
+ u64 *branch_stack_cntr = sample->branch_stack_cntr;
+ struct perf_env *env = evsel__env(evsel);
uint64_t i;
if (!callstack) {
@@ -1194,6 +1198,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
}
}
}
+
+ if (branch_stack_cntr) {
+ printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n",
+ sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr);
+ for (i = 0; i < sample->branch_stack->nr; i++)
+ printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]);
+ }
}
static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
@@ -1355,7 +1366,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
callchain__printf(evsel, sample);
if (evsel__has_br_stack(evsel))
- branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
+ branch_stack__printf(sample, evsel);
if (sample_type & PERF_SAMPLE_REGS_USER)
regs_user__printf(sample, arch);
--
2.35.1
From: Kan Liang <[email protected]>
The branch counters logging (A.K.A LBR event logging) introduces a
per-counter indication of precise event occurrences in LBRs. It can
provide a means to attribute exposed retirement latency to combinations
of events across a block of instructions. It also provides a means of
attributing Timed LBR latencies to events.
The feature is first introduced on SRF/GRR. It is an enhancement of the
ARCH LBR. It adds new fields in the LBR_INFO MSRs to log the occurrences
of events on the GP counters. The information is displayed by the order
of counters.
The design proposed in this patch requires that the events which are
logged must be in a group with the event that has LBR. If there are
more than one LBR group, the counters logging information only from the
current group (overflowed) are stored for the perf tool, otherwise the
perf tool cannot know which and when other groups are scheduled
especially when multiplexing is triggered. The user can ensure it uses
the maximum number of counters that support LBR info (4 by now) by
making the group large enough.
The HW only logs events by the order of counters. The order may be
different from the order of enabling which the perf tool can understand.
When parsing the information of each branch entry, convert the counter
order to the enabled order, and store the enabled order in the extension
space.
Unconditionally reset LBRs for an LBR event group when it's deleted. The
logged counter information is only valid for the current LBR group. If
another LBR group is scheduled later, the information from the stale
LBRs would be otherwise wrongly interpreted.
Add a sanity check in intel_pmu_hw_config(). Disable the feature if other
counter filters (inv, cmask, edge, in_tx) are set or LBR call stack mode
is enabled. (For the LBR call stack mode, we cannot simply flush the
LBR, since it will break the call stack. Also, there is no obvious usage
with the call stack mode for now.)
Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't require any branch
stack setup.
Expose the maximum number of supported counters and the width of the
counters into the sysfs. The perf tool can use the information to parse
the logged counters in each branch.
Signed-off-by: Kan Liang <[email protected]>
---
Changes since V4:
- Only reset LBR once for a branch counter group when the group is
deleted. Force the leader event of a branch counter group a LBR event.
- Use the name "branch counters" to replace "event logging"
- Use the suggested code from PeterZ for branch counter reordering
- Check the max number for a branch counter group in hw_config
- Use the reserved field to store the temporary branch counters
information
arch/x86/events/intel/core.c | 103 +++++++++++++++++++++++++++--
arch/x86/events/intel/ds.c | 2 +-
arch/x86/events/intel/lbr.c | 85 +++++++++++++++++++++++-
arch/x86/events/perf_event.h | 12 ++++
arch/x86/events/perf_event_flags.h | 1 +
arch/x86/include/asm/msr-index.h | 5 ++
arch/x86/include/asm/perf_event.h | 4 ++
include/uapi/linux/perf_event.h | 3 +
8 files changed, 207 insertions(+), 8 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 584b58df7bf6..3a249b026d5c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2792,6 +2792,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
static void intel_pmu_enable_event(struct perf_event *event)
{
+ u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -2800,8 +2801,10 @@ static void intel_pmu_enable_event(struct perf_event *event)
switch (idx) {
case 0 ... INTEL_PMC_IDX_FIXED - 1:
+ if (branch_sample_counters(event))
+ enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
intel_set_masks(event, idx);
- __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ __x86_pmu_enable_event(hwc, enable_mask);
break;
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
@@ -3052,7 +3055,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
+ intel_pmu_lbr_save_brstack(&data, cpuc, event);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -3617,6 +3620,13 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
if (cpuc->excl_cntrs)
return intel_get_excl_constraints(cpuc, event, idx, c2);
+ /* Not all counters support the branch counter feature. */
+ if (branch_sample_counters(event)) {
+ c2 = dyn_constraint(cpuc, c2, idx);
+ c2->idxmsk64 &= x86_pmu.lbr_counters;
+ c2->weight = hweight64(c2->idxmsk64);
+ }
+
return c2;
}
@@ -3905,6 +3915,58 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (needs_branch_stack(event) && is_sampling_event(event))
event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+ if (branch_sample_counters(event)) {
+ struct perf_event *leader, *sibling;
+ int num = 0;
+
+ if (!(x86_pmu.flags & PMU_FL_BR_CNTR) ||
+ (event->attr.config & ~INTEL_ARCH_EVENT_MASK))
+ return -EINVAL;
+
+ /*
+ * The branch counter logging is not supported in the call stack
+ * mode yet, since we cannot simply flush the LBR during e.g.,
+ * multiplexing. Also, there is no obvious usage with the call
+ * stack mode. Simply forbids it for now.
+ *
+ * If any events in the group enable the branch counter logging
+ * feature, the group is treated as a branch counter logging
+ * group, which requires the extra space to store the counters.
+ */
+ leader = event->group_leader;
+ if (branch_sample_call_stack(leader))
+ return -EINVAL;
+ if (branch_sample_counters(leader))
+ num++;
+ leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
+
+ for_each_sibling_event(sibling, leader) {
+ if (branch_sample_call_stack(sibling))
+ return -EINVAL;
+ if (branch_sample_counters(sibling))
+ num++;
+ }
+
+ if (num > fls(x86_pmu.lbr_counters))
+ return -EINVAL;
+ /*
+ * Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't
+ * require any branch stack setup.
+ * Clear the bit to avoid unnecessary branch stack setup.
+ */
+ if (0 == (event->attr.branch_sample_type &
+ ~(PERF_SAMPLE_BRANCH_PLM_ALL |
+ PERF_SAMPLE_BRANCH_COUNTERS)))
+ event->hw.flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+
+ /*
+ * Force the leader to be a LBR event. So LBRs can be reset
+ * with the leader event. See intel_pmu_lbr_del() for details.
+ */
+ if (!intel_pmu_needs_branch_stack(leader))
+ return -EINVAL;
+ }
+
if (intel_pmu_needs_branch_stack(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
@@ -4383,8 +4445,13 @@ cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
*/
if (event->attr.precise_ip == 3) {
/* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
- if (constraint_match(&fixed0_constraint, event->hw.config))
- return &fixed0_counter0_1_constraint;
+ if (constraint_match(&fixed0_constraint, event->hw.config)) {
+ /* The fixed counter 0 doesn't support LBR event logging. */
+ if (branch_sample_counters(event))
+ return &counter0_1_constraint;
+ else
+ return &fixed0_counter0_1_constraint;
+ }
switch (c->idxmsk64 & 0x3ull) {
case 0x1:
@@ -4563,7 +4630,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
goto err;
}
- if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
+ if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
@@ -5535,15 +5602,39 @@ static ssize_t branches_show(struct device *cdev,
static DEVICE_ATTR_RO(branches);
+static ssize_t branch_counter_nr_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", fls(x86_pmu.lbr_counters));
+}
+
+static DEVICE_ATTR_RO(branch_counter_nr);
+
+static ssize_t branch_counter_width_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "2\n");
+}
+
+static DEVICE_ATTR_RO(branch_counter_width);
+
static struct attribute *lbr_attrs[] = {
&dev_attr_branches.attr,
+ &dev_attr_branch_counter_nr.attr,
+ &dev_attr_branch_counter_width.attr,
NULL
};
static umode_t
lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
- return x86_pmu.lbr_nr ? attr->mode : 0;
+ /* branches */
+ if (i == 0)
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+
+ return (x86_pmu.flags & PMU_FL_BR_CNTR) ? attr->mode : 0;
}
static char pmu_name_str[30];
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 1d309e97a629..98bda9796a28 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1912,7 +1912,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
+ intel_pmu_lbr_save_brstack(data, cpuc, event);
}
}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index c3b0d15a9841..78cd5084104e 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -676,6 +676,25 @@ void intel_pmu_lbr_del(struct perf_event *event)
WARN_ON_ONCE(cpuc->lbr_users < 0);
WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
perf_sched_cb_dec(event->pmu);
+
+ /*
+ * The logged occurrences information is only valid for the
+ * current LBR group. If another LBR group is scheduled in
+ * later, the information from the stale LBRs will be wrongly
+ * interpreted. Reset the LBRs here.
+ *
+ * Only clear once for a branch counter group with the leader
+ * event. Because
+ * - Cannot simply reset the LBRs with the !cpuc->lbr_users.
+ * Because it's possible that the last LBR user is not in a
+ * branch counter group, e.g., a branch_counters group +
+ * several normal LBR events.
+ * - The LBR reset can be done with any one of the events in a
+ * branch counter group, since they are always scheduled together.
+ * It's easy to force the leader event an LBR event.
+ */
+ if (is_branch_counters_group(event) && event == event->group_leader)
+ intel_pmu_lbr_reset();
}
static inline bool vlbr_exclude_host(void)
@@ -866,6 +885,8 @@ static __always_inline u16 get_lbr_cycles(u64 info)
return cycles;
}
+static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS);
+
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
struct lbr_entry *entries)
{
@@ -898,11 +919,67 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
e->type = get_lbr_br_type(info);
+
+ /*
+ * Leverage the reserved field of cpuc->lbr_entries[i] to
+ * temporarily store the branch counters information.
+ * The later code will decide what content can be disclosed
+ * to the perf tool. Pleae see intel_pmu_lbr_counters_reorder().
+ */
+ e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK;
}
cpuc->lbr_stack.nr = i;
}
+/*
+ * The enabled order may be different from the counter order.
+ * Update the lbr_counters with the enabled order.
+ */
+static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int i, j, pos = 0, order[X86_PMC_IDX_MAX];
+ struct perf_event *leader, *sibling;
+ u64 src, dst, cnt;
+
+ leader = event->group_leader;
+ if (branch_sample_counters(leader))
+ order[pos++] = leader->hw.idx;
+
+ for_each_sibling_event(sibling, leader) {
+ if (!branch_sample_counters(sibling))
+ continue;
+ order[pos++] = sibling->hw.idx;
+ }
+
+ WARN_ON_ONCE(!pos);
+
+ for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+ src = cpuc->lbr_entries[i].reserved;
+ dst = 0;
+ for (j = 0; j < pos; j++) {
+ cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK;
+ dst |= cnt << j * LBR_INFO_BR_CNTR_BITS;
+ }
+ cpuc->lbr_counters[i] = dst;
+ cpuc->lbr_entries[i].reserved = 0;
+ }
+}
+
+void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
+ struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (is_branch_counters_group(event)) {
+ intel_pmu_lbr_counters_reorder(cpuc, event);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters);
+ return;
+ }
+
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
+}
+
static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
{
intel_pmu_store_lbr(cpuc, NULL);
@@ -1173,8 +1250,10 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
for (i = 0; i < cpuc->lbr_stack.nr; ) {
if (!cpuc->lbr_entries[i].from) {
j = i;
- while (++j < cpuc->lbr_stack.nr)
+ while (++j < cpuc->lbr_stack.nr) {
cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+ cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j];
+ }
cpuc->lbr_stack.nr--;
if (!cpuc->lbr_entries[i].from)
continue;
@@ -1525,8 +1604,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
+ x86_pmu.lbr_counters = ecx.split.lbr_counters;
x86_pmu.lbr_nr = lbr_nr;
+ if (!!x86_pmu.lbr_counters)
+ x86_pmu.flags |= PMU_FL_BR_CNTR;
+
if (x86_pmu.lbr_mispred)
static_branch_enable(&x86_lbr_mispred);
if (x86_pmu.lbr_timed_lbr)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 53dd5d495ba6..fb56518356ec 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -110,6 +110,11 @@ static inline bool is_topdown_event(struct perf_event *event)
return is_metric_event(event) || is_slots_event(event);
}
+static inline bool is_branch_counters_group(struct perf_event *event)
+{
+ return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS;
+}
+
struct amd_nb {
int nb_id; /* NorthBridge id */
int refcnt; /* reference count */
@@ -283,6 +288,7 @@ struct cpu_hw_events {
int lbr_pebs_users;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
+ u64 lbr_counters[MAX_LBR_ENTRIES]; /* branch stack extra */
union {
struct er_account *lbr_sel;
struct er_account *lbr_ctl;
@@ -888,6 +894,7 @@ struct x86_pmu {
unsigned int lbr_mispred:1;
unsigned int lbr_timed_lbr:1;
unsigned int lbr_br_type:1;
+ unsigned int lbr_counters:4;
void (*lbr_reset)(void);
void (*lbr_read)(struct cpu_hw_events *cpuc);
@@ -1012,6 +1019,7 @@ do { \
#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
+#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -1552,6 +1560,10 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
void intel_ds_init(void);
+void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
+ struct cpu_hw_events *cpuc,
+ struct perf_event *event);
+
void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc);
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index a1685981c520..6c977c19f2cd 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -21,3 +21,4 @@ PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
+PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index dc159acb350a..27e6f1c89fbf 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -236,6 +236,11 @@
#define LBR_INFO_CYCLES 0xffff
#define LBR_INFO_BR_TYPE_OFFSET 56
#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+#define LBR_INFO_BR_CNTR_OFFSET 32
+#define LBR_INFO_BR_CNTR_NUM 4
+#define LBR_INFO_BR_CNTR_BITS 2
+#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0)
+#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0)
#define MSR_ARCH_LBR_CTL 0x000014ce
#define ARCH_LBR_CTL_LBREN BIT(0)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2618ec7c3d1d..3736b8a46c04 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -31,6 +31,7 @@
#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
+#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35)
#define INTEL_FIXED_BITS_MASK 0xFULL
#define INTEL_FIXED_BITS_STRIDE 4
@@ -223,6 +224,9 @@ union cpuid28_ecx {
unsigned int lbr_timed_lbr:1;
/* Branch Type Field Supported */
unsigned int lbr_br_type:1;
+ unsigned int reserved:13;
+ /* Branch counters (Event Logging) Supported */
+ unsigned int lbr_counters:4;
} split;
unsigned int full;
};
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 4461f380425b..3a64499b0f5d 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1437,6 +1437,9 @@ struct perf_branch_entry {
reserved:31;
};
+/* Size of used info bits in struct perf_branch_entry */
+#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33
+
union perf_sample_weight {
__u64 full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
--
2.35.1
On Wed, Oct 25, 2023 at 1:16 PM <[email protected]> wrote:
>
> From: Kan Liang <[email protected]>
>
> To support the branch counters feature, the information of the maximum
> number of supported counters and the width of the counters is exposed
> in the sysfs caps folder. The perf tool can use the information to parse
> the logged counters in each branch.
>
> Store the information in the perf_env for later usage.
>
> Signed-off-by: Kan Liang <[email protected]>
Reviewed-by: Ian Rogers <[email protected]>
Thanks,
Ian
> ---
>
> No changes since V4
>
> tools/perf/util/env.h | 5 +++++
> tools/perf/util/header.c | 18 +++++++++++++++---
> 2 files changed, 20 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index 4566c51f2fd9..48d7f8759a2a 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -46,6 +46,9 @@ struct hybrid_node {
> struct pmu_caps {
> int nr_caps;
> unsigned int max_branches;
> + unsigned int br_cntr_nr;
> + unsigned int br_cntr_width;
> +
> char **caps;
> char *pmu_name;
> };
> @@ -62,6 +65,8 @@ struct perf_env {
> unsigned long long total_mem;
> unsigned int msr_pmu_type;
> unsigned int max_branches;
> + unsigned int br_cntr_nr;
> + unsigned int br_cntr_width;
> int kernel_is_64_bit;
>
> int nr_cmdline;
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index d812e1e371a7..9664062ba835 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -3256,7 +3256,9 @@ static int process_compressed(struct feat_fd *ff,
> }
>
> static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps,
> - char ***caps, unsigned int *max_branches)
> + char ***caps, unsigned int *max_branches,
> + unsigned int *br_cntr_nr,
> + unsigned int *br_cntr_width)
> {
> char *name, *value, *ptr;
> u32 nr_pmu_caps, i;
> @@ -3291,6 +3293,12 @@ static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps,
> if (!strcmp(name, "branches"))
> *max_branches = atoi(value);
>
> + if (!strcmp(name, "branch_counter_nr"))
> + *br_cntr_nr = atoi(value);
> +
> + if (!strcmp(name, "branch_counter_width"))
> + *br_cntr_width = atoi(value);
> +
> free(value);
> free(name);
> }
> @@ -3315,7 +3323,9 @@ static int process_cpu_pmu_caps(struct feat_fd *ff,
> {
> int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
> &ff->ph->env.cpu_pmu_caps,
> - &ff->ph->env.max_branches);
> + &ff->ph->env.max_branches,
> + &ff->ph->env.br_cntr_nr,
> + &ff->ph->env.br_cntr_width);
>
> if (!ret && !ff->ph->env.cpu_pmu_caps)
> pr_debug("cpu pmu capabilities not available\n");
> @@ -3344,7 +3354,9 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
> for (i = 0; i < nr_pmu; i++) {
> ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps,
> &pmu_caps[i].caps,
> - &pmu_caps[i].max_branches);
> + &pmu_caps[i].max_branches,
> + &pmu_caps[i].br_cntr_nr,
> + &pmu_caps[i].br_cntr_width);
> if (ret)
> goto err;
>
> --
> 2.35.1
>
On Wed, Oct 25, 2023 at 1:16 PM <[email protected]> wrote:
>
> From: Kan Liang <[email protected]>
>
> Add a new branch filter, "counter", for the branch counter option. It is
> used to mark the events which should be logged in the branch. If it is
> applied with the -j option, the counters of all the events should be
> logged in the branch. If the legacy kernel doesn't support the new
> branch sample type, switching off the branch counter filter.
>
> The stored counter values in each branch are displayed right after the
> regular branch stack information via perf report -D.
>
> Usage examples:
>
> perf record -e "{branch-instructions,branch-misses}:S" -j any,counter
>
> Only the first event, branch-instructions, collect the LBR. Both
> branch-instructions and branch-misses are marked as logged events.
> The occurrences information of them can be found in the branch stack
> extension space of each branch.
>
> perf record -e "{cpu/branch-instructions,branch_type=any/,
> cpu/branch-misses,branch_type=counter/}"
>
> Only the first event, branch-instructions, collect the LBR. Only the
> branch-misses event is marked as a logged event.
>
> Signed-off-by: Kan Liang <[email protected]>
Reviewed-by: Ian Rogers <[email protected]>
Perhaps add a test somewhere like:
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/tests/shell/record.sh
for coverage when hardware supports the feature.
Thanks,
Ian
> ---
>
> No changes since V4
>
> tools/perf/Documentation/perf-record.txt | 4 +++
> tools/perf/util/evsel.c | 31 ++++++++++++++++++++++-
> tools/perf/util/evsel.h | 1 +
> tools/perf/util/parse-branch-options.c | 1 +
> tools/perf/util/perf_event_attr_fprintf.c | 1 +
> tools/perf/util/sample.h | 1 +
> tools/perf/util/session.c | 15 +++++++++--
> 7 files changed, 51 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index d5217be012d7..b6afe7cc948d 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -442,6 +442,10 @@ following filters are defined:
> 4th-Gen Xeon+ server), the save branch type is unconditionally enabled
> when the taken branch stack sampling is enabled.
> - priv: save privilege state during sampling in case binary is not available later
> + - counter: save occurrences of the event since the last branch entry. Currently, the
> + feature is only supported by a newer CPU, e.g., Intel Sierra Forest and
> + later platforms. An error out is expected if it's used on the unsupported
> + kernel or CPUs.
>
> +
> The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index a8a5ff87cc1f..58a9b8c82790 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1831,6 +1831,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>
> static void evsel__disable_missing_features(struct evsel *evsel)
> {
> + if (perf_missing_features.branch_counters)
> + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS;
> if (perf_missing_features.read_lost)
> evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
> if (perf_missing_features.weight_struct) {
> @@ -1884,7 +1886,12 @@ bool evsel__detect_missing_features(struct evsel *evsel)
> * Must probe features in the order they were added to the
> * perf_event_attr interface.
> */
> - if (!perf_missing_features.read_lost &&
> + if (!perf_missing_features.branch_counters &&
> + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) {
> + perf_missing_features.branch_counters = true;
> + pr_debug2("switching off branch counters support\n");
> + return true;
> + } else if (!perf_missing_features.read_lost &&
> (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
> perf_missing_features.read_lost = true;
> pr_debug2("switching off PERF_FORMAT_LOST support\n");
> @@ -2344,6 +2351,18 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
> return new_val;
> }
>
> +static inline bool evsel__has_branch_counters(const struct evsel *evsel)
> +{
> + struct evsel *cur, *leader = evsel__leader(evsel);
> +
> + evlist__for_each_entry(evsel->evlist, cur) {
> + if ((leader == evsel__leader(cur)) &&
> + (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
> + return true;
> + }
> + return false;
> +}
> +
> int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> struct perf_sample *data)
> {
> @@ -2577,6 +2596,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>
> OVERFLOW_CHECK(array, sz, max_size);
> array = (void *)array + sz;
> +
> + if (evsel__has_branch_counters(evsel)) {
> + OVERFLOW_CHECK_u64(array);
> +
> + data->branch_stack_cntr = (u64 *)array;
> + sz = data->branch_stack->nr * sizeof(u64);
> +
> + OVERFLOW_CHECK(array, sz, max_size);
> + array = (void *)array + sz;
> + }
> }
>
> if (type & PERF_SAMPLE_REGS_USER) {
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 848534ec74fa..85f24c986392 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -191,6 +191,7 @@ struct perf_missing_features {
> bool code_page_size;
> bool weight_struct;
> bool read_lost;
> + bool branch_counters;
> };
>
> extern struct perf_missing_features perf_missing_features;
> diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
> index fd67d204d720..f7f7aff3d85a 100644
> --- a/tools/perf/util/parse-branch-options.c
> +++ b/tools/perf/util/parse-branch-options.c
> @@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = {
> BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
> BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX),
> BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
> + BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS),
> BRANCH_END
> };
>
> diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
> index 2247991451f3..8f04d3b7f3ec 100644
> --- a/tools/perf/util/perf_event_attr_fprintf.c
> +++ b/tools/perf/util/perf_event_attr_fprintf.c
> @@ -55,6 +55,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
> bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
> bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
> bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
> + bit_name(COUNTERS),
> { .name = NULL, }
> };
> #undef bit_name
> diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
> index c92ad0f51ecd..70b2c3135555 100644
> --- a/tools/perf/util/sample.h
> +++ b/tools/perf/util/sample.h
> @@ -113,6 +113,7 @@ struct perf_sample {
> void *raw_data;
> struct ip_callchain *callchain;
> struct branch_stack *branch_stack;
> + u64 *branch_stack_cntr;
> struct regs_dump user_regs;
> struct regs_dump intr_regs;
> struct stack_dump user_stack;
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 1e9aa8ed15b6..4a094ab0362b 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -1150,9 +1150,13 @@ static void callchain__printf(struct evsel *evsel,
> i, callchain->ips[i]);
> }
>
> -static void branch_stack__printf(struct perf_sample *sample, bool callstack)
> +static void branch_stack__printf(struct perf_sample *sample,
> + struct evsel *evsel)
> {
> struct branch_entry *entries = perf_sample__branch_entries(sample);
> + bool callstack = evsel__has_branch_callstack(evsel);
> + u64 *branch_stack_cntr = sample->branch_stack_cntr;
> + struct perf_env *env = evsel__env(evsel);
> uint64_t i;
>
> if (!callstack) {
> @@ -1194,6 +1198,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
> }
> }
> }
> +
> + if (branch_stack_cntr) {
> + printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n",
> + sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr);
> + for (i = 0; i < sample->branch_stack->nr; i++)
> + printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]);
> + }
> }
>
> static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
> @@ -1355,7 +1366,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
> callchain__printf(evsel, sample);
>
> if (evsel__has_br_stack(evsel))
> - branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
> + branch_stack__printf(sample, evsel);
>
> if (sample_type & PERF_SAMPLE_REGS_USER)
> regs_user__printf(sample, arch);
> --
> 2.35.1
>
On 2023-10-25 10:12 p.m., Ian Rogers wrote:
> On Wed, Oct 25, 2023 at 1:16 PM <[email protected]> wrote:
>>
>> From: Kan Liang <[email protected]>
>>
>> Add a new branch filter, "counter", for the branch counter option. It is
>> used to mark the events which should be logged in the branch. If it is
>> applied with the -j option, the counters of all the events should be
>> logged in the branch. If the legacy kernel doesn't support the new
>> branch sample type, switching off the branch counter filter.
>>
>> The stored counter values in each branch are displayed right after the
>> regular branch stack information via perf report -D.
>>
>> Usage examples:
>>
>> perf record -e "{branch-instructions,branch-misses}:S" -j any,counter
>>
>> Only the first event, branch-instructions, collect the LBR. Both
>> branch-instructions and branch-misses are marked as logged events.
>> The occurrences information of them can be found in the branch stack
>> extension space of each branch.
>>
>> perf record -e "{cpu/branch-instructions,branch_type=any/,
>> cpu/branch-misses,branch_type=counter/}"
>>
>> Only the first event, branch-instructions, collect the LBR. Only the
>> branch-misses event is marked as a logged event.
>>
>> Signed-off-by: Kan Liang <[email protected]>
>
> Reviewed-by: Ian Rogers <[email protected]>
Thanks Ian for the review.
>
> Perhaps add a test somewhere like:
> https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/tests/shell/record.sh
> for coverage when hardware supports the feature.
Sure, I will add a test when posting the perf tool patches.
Thanks,
Kan
>
> Thanks,
> Ian
>
>> ---
>>
>> No changes since V4
>>
>> tools/perf/Documentation/perf-record.txt | 4 +++
>> tools/perf/util/evsel.c | 31 ++++++++++++++++++++++-
>> tools/perf/util/evsel.h | 1 +
>> tools/perf/util/parse-branch-options.c | 1 +
>> tools/perf/util/perf_event_attr_fprintf.c | 1 +
>> tools/perf/util/sample.h | 1 +
>> tools/perf/util/session.c | 15 +++++++++--
>> 7 files changed, 51 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
>> index d5217be012d7..b6afe7cc948d 100644
>> --- a/tools/perf/Documentation/perf-record.txt
>> +++ b/tools/perf/Documentation/perf-record.txt
>> @@ -442,6 +442,10 @@ following filters are defined:
>> 4th-Gen Xeon+ server), the save branch type is unconditionally enabled
>> when the taken branch stack sampling is enabled.
>> - priv: save privilege state during sampling in case binary is not available later
>> + - counter: save occurrences of the event since the last branch entry. Currently, the
>> + feature is only supported by a newer CPU, e.g., Intel Sierra Forest and
>> + later platforms. An error out is expected if it's used on the unsupported
>> + kernel or CPUs.
>>
>> +
>> The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>> index a8a5ff87cc1f..58a9b8c82790 100644
>> --- a/tools/perf/util/evsel.c
>> +++ b/tools/perf/util/evsel.c
>> @@ -1831,6 +1831,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>>
>> static void evsel__disable_missing_features(struct evsel *evsel)
>> {
>> + if (perf_missing_features.branch_counters)
>> + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS;
>> if (perf_missing_features.read_lost)
>> evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
>> if (perf_missing_features.weight_struct) {
>> @@ -1884,7 +1886,12 @@ bool evsel__detect_missing_features(struct evsel *evsel)
>> * Must probe features in the order they were added to the
>> * perf_event_attr interface.
>> */
>> - if (!perf_missing_features.read_lost &&
>> + if (!perf_missing_features.branch_counters &&
>> + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) {
>> + perf_missing_features.branch_counters = true;
>> + pr_debug2("switching off branch counters support\n");
>> + return true;
>> + } else if (!perf_missing_features.read_lost &&
>> (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
>> perf_missing_features.read_lost = true;
>> pr_debug2("switching off PERF_FORMAT_LOST support\n");
>> @@ -2344,6 +2351,18 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
>> return new_val;
>> }
>>
>> +static inline bool evsel__has_branch_counters(const struct evsel *evsel)
>> +{
>> + struct evsel *cur, *leader = evsel__leader(evsel);
>> +
>> + evlist__for_each_entry(evsel->evlist, cur) {
>> + if ((leader == evsel__leader(cur)) &&
>> + (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>> struct perf_sample *data)
>> {
>> @@ -2577,6 +2596,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>>
>> OVERFLOW_CHECK(array, sz, max_size);
>> array = (void *)array + sz;
>> +
>> + if (evsel__has_branch_counters(evsel)) {
>> + OVERFLOW_CHECK_u64(array);
>> +
>> + data->branch_stack_cntr = (u64 *)array;
>> + sz = data->branch_stack->nr * sizeof(u64);
>> +
>> + OVERFLOW_CHECK(array, sz, max_size);
>> + array = (void *)array + sz;
>> + }
>> }
>>
>> if (type & PERF_SAMPLE_REGS_USER) {
>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>> index 848534ec74fa..85f24c986392 100644
>> --- a/tools/perf/util/evsel.h
>> +++ b/tools/perf/util/evsel.h
>> @@ -191,6 +191,7 @@ struct perf_missing_features {
>> bool code_page_size;
>> bool weight_struct;
>> bool read_lost;
>> + bool branch_counters;
>> };
>>
>> extern struct perf_missing_features perf_missing_features;
>> diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
>> index fd67d204d720..f7f7aff3d85a 100644
>> --- a/tools/perf/util/parse-branch-options.c
>> +++ b/tools/perf/util/parse-branch-options.c
>> @@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = {
>> BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
>> BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX),
>> BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
>> + BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS),
>> BRANCH_END
>> };
>>
>> diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
>> index 2247991451f3..8f04d3b7f3ec 100644
>> --- a/tools/perf/util/perf_event_attr_fprintf.c
>> +++ b/tools/perf/util/perf_event_attr_fprintf.c
>> @@ -55,6 +55,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
>> bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
>> bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
>> bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
>> + bit_name(COUNTERS),
>> { .name = NULL, }
>> };
>> #undef bit_name
>> diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
>> index c92ad0f51ecd..70b2c3135555 100644
>> --- a/tools/perf/util/sample.h
>> +++ b/tools/perf/util/sample.h
>> @@ -113,6 +113,7 @@ struct perf_sample {
>> void *raw_data;
>> struct ip_callchain *callchain;
>> struct branch_stack *branch_stack;
>> + u64 *branch_stack_cntr;
>> struct regs_dump user_regs;
>> struct regs_dump intr_regs;
>> struct stack_dump user_stack;
>> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
>> index 1e9aa8ed15b6..4a094ab0362b 100644
>> --- a/tools/perf/util/session.c
>> +++ b/tools/perf/util/session.c
>> @@ -1150,9 +1150,13 @@ static void callchain__printf(struct evsel *evsel,
>> i, callchain->ips[i]);
>> }
>>
>> -static void branch_stack__printf(struct perf_sample *sample, bool callstack)
>> +static void branch_stack__printf(struct perf_sample *sample,
>> + struct evsel *evsel)
>> {
>> struct branch_entry *entries = perf_sample__branch_entries(sample);
>> + bool callstack = evsel__has_branch_callstack(evsel);
>> + u64 *branch_stack_cntr = sample->branch_stack_cntr;
>> + struct perf_env *env = evsel__env(evsel);
>> uint64_t i;
>>
>> if (!callstack) {
>> @@ -1194,6 +1198,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
>> }
>> }
>> }
>> +
>> + if (branch_stack_cntr) {
>> + printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n",
>> + sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr);
>> + for (i = 0; i < sample->branch_stack->nr; i++)
>> + printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]);
>> + }
>> }
>>
>> static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
>> @@ -1355,7 +1366,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
>> callchain__printf(evsel, sample);
>>
>> if (evsel__has_br_stack(evsel))
>> - branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
>> + branch_stack__printf(sample, evsel);
>>
>> if (sample_type & PERF_SAMPLE_REGS_USER)
>> regs_user__printf(sample, arch);
>> --
>> 2.35.1
>>
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 1f2376cd03dd3b965d130ed46a7c92769d614ba1
Gitweb: https://git.kernel.org/tip/1f2376cd03dd3b965d130ed46a7c92769d614ba1
Author: Kan Liang <[email protected]>
AuthorDate: Wed, 25 Oct 2023 13:16:21 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 27 Oct 2023 15:05:09 +02:00
perf: Add branch_sample_call_stack
Add a helper function to check call stack sample type.
The later patch will invoke the function in several places.
Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/events/core.c | 2 +-
include/linux/perf_event.h | 5 +++++
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 40c9af1..0905064 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -601,7 +601,7 @@ int x86_pmu_hw_config(struct perf_event *event)
}
}
- if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
+ if (branch_sample_call_stack(event))
event->attach_state |= PERF_ATTACH_TASK_DATA;
/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7897ef0..ac1a59c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1144,6 +1144,11 @@ static inline bool branch_sample_counters(const struct perf_event *event)
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
}
+static inline bool branch_sample_call_stack(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
+}
+
struct perf_sample_data {
/*
* Fields set by perf_sample_data_init() unconditionally,
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 85846b27072defc7ab3dcee7ff36563a040079dc
Gitweb: https://git.kernel.org/tip/85846b27072defc7ab3dcee7ff36563a040079dc
Author: Kan Liang <[email protected]>
AuthorDate: Wed, 25 Oct 2023 13:16:20 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 27 Oct 2023 15:05:09 +02:00
perf/x86: Add PERF_X86_EVENT_NEEDS_BRANCH_STACK flag
Currently, branch_sample_type !=0 is used to check whether a branch
stack setup is required. But it doesn't check the sample type,
unnecessary branch stack setup may be done for a counting event. E.g.,
perf record -e "{branch-instructions,branch-misses}:S" -j any
Also, the event only with the new PERF_SAMPLE_BRANCH_COUNTERS branch
sample type may not require a branch stack setup either.
Add a new flag NEEDS_BRANCH_STACK to indicate whether the event requires
a branch stack setup. Replace the needs_branch_stack() by checking the
new flag.
The counting event check is implemented here. The later patch will take
the new PERF_SAMPLE_BRANCH_COUNTERS into account.
Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/events/intel/core.c | 14 +++++++++++---
arch/x86/events/perf_event_flags.h | 1 +
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 41a1647..a99449c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
perf_report_aux_output_id(event, idx);
}
+static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
+{
+ return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+}
+
static void intel_pmu_del_event(struct perf_event *event)
{
- if (needs_branch_stack(event))
+ if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_del(event);
if (event->attr.precise_ip)
intel_pmu_pebs_del(event);
@@ -2820,7 +2825,7 @@ static void intel_pmu_add_event(struct perf_event *event)
{
if (event->attr.precise_ip)
intel_pmu_pebs_add(event);
- if (needs_branch_stack(event))
+ if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_add(event);
}
@@ -3897,7 +3902,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
x86_pmu.pebs_aliases(event);
}
- if (needs_branch_stack(event)) {
+ if (needs_branch_stack(event) && is_sampling_event(event))
+ event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+
+ if (intel_pmu_needs_branch_stack(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
return ret;
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index 1dc19b9..a168598 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -20,3 +20,4 @@ PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
+PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 33744916196b4ed7a50f6f47af7c3ad46b730ce6
Gitweb: https://git.kernel.org/tip/33744916196b4ed7a50f6f47af7c3ad46b730ce6
Author: Kan Liang <[email protected]>
AuthorDate: Wed, 25 Oct 2023 13:16:23 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 27 Oct 2023 15:05:11 +02:00
perf/x86/intel: Support branch counters logging
The branch counters logging (A.K.A LBR event logging) introduces a
per-counter indication of precise event occurrences in LBRs. It can
provide a means to attribute exposed retirement latency to combinations
of events across a block of instructions. It also provides a means of
attributing Timed LBR latencies to events.
The feature is first introduced on SRF/GRR. It is an enhancement of the
ARCH LBR. It adds new fields in the LBR_INFO MSRs to log the occurrences
of events on the GP counters. The information is displayed by the order
of counters.
The design proposed in this patch requires that the events which are
logged must be in a group with the event that has LBR. If there are
more than one LBR group, the counters logging information only from the
current group (overflowed) are stored for the perf tool, otherwise the
perf tool cannot know which and when other groups are scheduled
especially when multiplexing is triggered. The user can ensure it uses
the maximum number of counters that support LBR info (4 by now) by
making the group large enough.
The HW only logs events by the order of counters. The order may be
different from the order of enabling which the perf tool can understand.
When parsing the information of each branch entry, convert the counter
order to the enabled order, and store the enabled order in the extension
space.
Unconditionally reset LBRs for an LBR event group when it's deleted. The
logged counter information is only valid for the current LBR group. If
another LBR group is scheduled later, the information from the stale
LBRs would be otherwise wrongly interpreted.
Add a sanity check in intel_pmu_hw_config(). Disable the feature if other
counter filters (inv, cmask, edge, in_tx) are set or LBR call stack mode
is enabled. (For the LBR call stack mode, we cannot simply flush the
LBR, since it will break the call stack. Also, there is no obvious usage
with the call stack mode for now.)
Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't require any branch
stack setup.
Expose the maximum number of supported counters and the width of the
counters into the sysfs. The perf tool can use the information to parse
the logged counters in each branch.
Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/events/intel/core.c | 103 ++++++++++++++++++++++++++--
arch/x86/events/intel/ds.c | 2 +-
arch/x86/events/intel/lbr.c | 85 ++++++++++++++++++++++-
arch/x86/events/perf_event.h | 12 +++-
arch/x86/events/perf_event_flags.h | 1 +-
arch/x86/include/asm/msr-index.h | 5 +-
arch/x86/include/asm/perf_event.h | 4 +-
include/uapi/linux/perf_event.h | 3 +-
8 files changed, 207 insertions(+), 8 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 584b58d..e068a96 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2792,6 +2792,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
static void intel_pmu_enable_event(struct perf_event *event)
{
+ u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -2800,8 +2801,10 @@ static void intel_pmu_enable_event(struct perf_event *event)
switch (idx) {
case 0 ... INTEL_PMC_IDX_FIXED - 1:
+ if (branch_sample_counters(event))
+ enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
intel_set_masks(event, idx);
- __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ __x86_pmu_enable_event(hwc, enable_mask);
break;
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
@@ -3052,7 +3055,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
+ intel_pmu_lbr_save_brstack(&data, cpuc, event);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -3617,6 +3620,13 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
if (cpuc->excl_cntrs)
return intel_get_excl_constraints(cpuc, event, idx, c2);
+ /* Not all counters support the branch counter feature. */
+ if (branch_sample_counters(event)) {
+ c2 = dyn_constraint(cpuc, c2, idx);
+ c2->idxmsk64 &= x86_pmu.lbr_counters;
+ c2->weight = hweight64(c2->idxmsk64);
+ }
+
return c2;
}
@@ -3905,6 +3915,58 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (needs_branch_stack(event) && is_sampling_event(event))
event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+ if (branch_sample_counters(event)) {
+ struct perf_event *leader, *sibling;
+ int num = 0;
+
+ if (!(x86_pmu.flags & PMU_FL_BR_CNTR) ||
+ (event->attr.config & ~INTEL_ARCH_EVENT_MASK))
+ return -EINVAL;
+
+ /*
+ * The branch counter logging is not supported in the call stack
+ * mode yet, since we cannot simply flush the LBR during e.g.,
+ * multiplexing. Also, there is no obvious usage with the call
+ * stack mode. Simply forbids it for now.
+ *
+ * If any events in the group enable the branch counter logging
+ * feature, the group is treated as a branch counter logging
+ * group, which requires the extra space to store the counters.
+ */
+ leader = event->group_leader;
+ if (branch_sample_call_stack(leader))
+ return -EINVAL;
+ if (branch_sample_counters(leader))
+ num++;
+ leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
+
+ for_each_sibling_event(sibling, leader) {
+ if (branch_sample_call_stack(sibling))
+ return -EINVAL;
+ if (branch_sample_counters(sibling))
+ num++;
+ }
+
+ if (num > fls(x86_pmu.lbr_counters))
+ return -EINVAL;
+ /*
+ * Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't
+ * require any branch stack setup.
+ * Clear the bit to avoid unnecessary branch stack setup.
+ */
+ if (0 == (event->attr.branch_sample_type &
+ ~(PERF_SAMPLE_BRANCH_PLM_ALL |
+ PERF_SAMPLE_BRANCH_COUNTERS)))
+ event->hw.flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+
+ /*
+ * Force the leader to be a LBR event. So LBRs can be reset
+ * with the leader event. See intel_pmu_lbr_del() for details.
+ */
+ if (!intel_pmu_needs_branch_stack(leader))
+ return -EINVAL;
+ }
+
if (intel_pmu_needs_branch_stack(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
@@ -4383,8 +4445,13 @@ cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
*/
if (event->attr.precise_ip == 3) {
/* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
- if (constraint_match(&fixed0_constraint, event->hw.config))
- return &fixed0_counter0_1_constraint;
+ if (constraint_match(&fixed0_constraint, event->hw.config)) {
+ /* The fixed counter 0 doesn't support LBR event logging. */
+ if (branch_sample_counters(event))
+ return &counter0_1_constraint;
+ else
+ return &fixed0_counter0_1_constraint;
+ }
switch (c->idxmsk64 & 0x3ull) {
case 0x1:
@@ -4563,7 +4630,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
goto err;
}
- if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
+ if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
@@ -5535,15 +5602,39 @@ static ssize_t branches_show(struct device *cdev,
static DEVICE_ATTR_RO(branches);
+static ssize_t branch_counter_nr_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", fls(x86_pmu.lbr_counters));
+}
+
+static DEVICE_ATTR_RO(branch_counter_nr);
+
+static ssize_t branch_counter_width_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", LBR_INFO_BR_CNTR_BITS);
+}
+
+static DEVICE_ATTR_RO(branch_counter_width);
+
static struct attribute *lbr_attrs[] = {
&dev_attr_branches.attr,
+ &dev_attr_branch_counter_nr.attr,
+ &dev_attr_branch_counter_width.attr,
NULL
};
static umode_t
lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
- return x86_pmu.lbr_nr ? attr->mode : 0;
+ /* branches */
+ if (i == 0)
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+
+ return (x86_pmu.flags & PMU_FL_BR_CNTR) ? attr->mode : 0;
}
static char pmu_name_str[30];
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index cb3f329..d49d661 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1912,7 +1912,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
+ intel_pmu_lbr_save_brstack(data, cpuc, event);
}
}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index c3b0d15..78cd508 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -676,6 +676,25 @@ void intel_pmu_lbr_del(struct perf_event *event)
WARN_ON_ONCE(cpuc->lbr_users < 0);
WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
perf_sched_cb_dec(event->pmu);
+
+ /*
+ * The logged occurrences information is only valid for the
+ * current LBR group. If another LBR group is scheduled in
+ * later, the information from the stale LBRs will be wrongly
+ * interpreted. Reset the LBRs here.
+ *
+ * Only clear once for a branch counter group with the leader
+ * event. Because
+ * - Cannot simply reset the LBRs with the !cpuc->lbr_users.
+ * Because it's possible that the last LBR user is not in a
+ * branch counter group, e.g., a branch_counters group +
+ * several normal LBR events.
+ * - The LBR reset can be done with any one of the events in a
+ * branch counter group, since they are always scheduled together.
+ * It's easy to force the leader event an LBR event.
+ */
+ if (is_branch_counters_group(event) && event == event->group_leader)
+ intel_pmu_lbr_reset();
}
static inline bool vlbr_exclude_host(void)
@@ -866,6 +885,8 @@ static __always_inline u16 get_lbr_cycles(u64 info)
return cycles;
}
+static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS);
+
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
struct lbr_entry *entries)
{
@@ -898,11 +919,67 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
e->type = get_lbr_br_type(info);
+
+ /*
+ * Leverage the reserved field of cpuc->lbr_entries[i] to
+ * temporarily store the branch counters information.
+ * The later code will decide what content can be disclosed
+ * to the perf tool. Pleae see intel_pmu_lbr_counters_reorder().
+ */
+ e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK;
}
cpuc->lbr_stack.nr = i;
}
+/*
+ * The enabled order may be different from the counter order.
+ * Update the lbr_counters with the enabled order.
+ */
+static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int i, j, pos = 0, order[X86_PMC_IDX_MAX];
+ struct perf_event *leader, *sibling;
+ u64 src, dst, cnt;
+
+ leader = event->group_leader;
+ if (branch_sample_counters(leader))
+ order[pos++] = leader->hw.idx;
+
+ for_each_sibling_event(sibling, leader) {
+ if (!branch_sample_counters(sibling))
+ continue;
+ order[pos++] = sibling->hw.idx;
+ }
+
+ WARN_ON_ONCE(!pos);
+
+ for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+ src = cpuc->lbr_entries[i].reserved;
+ dst = 0;
+ for (j = 0; j < pos; j++) {
+ cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK;
+ dst |= cnt << j * LBR_INFO_BR_CNTR_BITS;
+ }
+ cpuc->lbr_counters[i] = dst;
+ cpuc->lbr_entries[i].reserved = 0;
+ }
+}
+
+void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
+ struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (is_branch_counters_group(event)) {
+ intel_pmu_lbr_counters_reorder(cpuc, event);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters);
+ return;
+ }
+
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
+}
+
static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
{
intel_pmu_store_lbr(cpuc, NULL);
@@ -1173,8 +1250,10 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
for (i = 0; i < cpuc->lbr_stack.nr; ) {
if (!cpuc->lbr_entries[i].from) {
j = i;
- while (++j < cpuc->lbr_stack.nr)
+ while (++j < cpuc->lbr_stack.nr) {
cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+ cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j];
+ }
cpuc->lbr_stack.nr--;
if (!cpuc->lbr_entries[i].from)
continue;
@@ -1525,8 +1604,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
+ x86_pmu.lbr_counters = ecx.split.lbr_counters;
x86_pmu.lbr_nr = lbr_nr;
+ if (!!x86_pmu.lbr_counters)
+ x86_pmu.flags |= PMU_FL_BR_CNTR;
+
if (x86_pmu.lbr_mispred)
static_branch_enable(&x86_lbr_mispred);
if (x86_pmu.lbr_timed_lbr)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 53dd5d4..fb56518 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -110,6 +110,11 @@ static inline bool is_topdown_event(struct perf_event *event)
return is_metric_event(event) || is_slots_event(event);
}
+static inline bool is_branch_counters_group(struct perf_event *event)
+{
+ return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS;
+}
+
struct amd_nb {
int nb_id; /* NorthBridge id */
int refcnt; /* reference count */
@@ -283,6 +288,7 @@ struct cpu_hw_events {
int lbr_pebs_users;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
+ u64 lbr_counters[MAX_LBR_ENTRIES]; /* branch stack extra */
union {
struct er_account *lbr_sel;
struct er_account *lbr_ctl;
@@ -888,6 +894,7 @@ struct x86_pmu {
unsigned int lbr_mispred:1;
unsigned int lbr_timed_lbr:1;
unsigned int lbr_br_type:1;
+ unsigned int lbr_counters:4;
void (*lbr_reset)(void);
void (*lbr_read)(struct cpu_hw_events *cpuc);
@@ -1012,6 +1019,7 @@ do { \
#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
+#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -1552,6 +1560,10 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
void intel_ds_init(void);
+void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
+ struct cpu_hw_events *cpuc,
+ struct perf_event *event);
+
void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc);
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index a168598..6c977c1 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -21,3 +21,4 @@ PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
+PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index f8b5028..a5b0a19 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -236,6 +236,11 @@
#define LBR_INFO_CYCLES 0xffff
#define LBR_INFO_BR_TYPE_OFFSET 56
#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+#define LBR_INFO_BR_CNTR_OFFSET 32
+#define LBR_INFO_BR_CNTR_NUM 4
+#define LBR_INFO_BR_CNTR_BITS 2
+#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0)
+#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0)
#define MSR_ARCH_LBR_CTL 0x000014ce
#define ARCH_LBR_CTL_LBREN BIT(0)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2618ec7..3736b8a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -31,6 +31,7 @@
#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
+#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35)
#define INTEL_FIXED_BITS_MASK 0xFULL
#define INTEL_FIXED_BITS_STRIDE 4
@@ -223,6 +224,9 @@ union cpuid28_ecx {
unsigned int lbr_timed_lbr:1;
/* Branch Type Field Supported */
unsigned int lbr_br_type:1;
+ unsigned int reserved:13;
+ /* Branch counters (Event Logging) Supported */
+ unsigned int lbr_counters:4;
} split;
unsigned int full;
};
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 4461f38..3a64499 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1437,6 +1437,9 @@ struct perf_branch_entry {
reserved:31;
};
+/* Size of used info bits in struct perf_branch_entry */
+#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33
+
union perf_sample_weight {
__u64 full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 571d91dcadfa3cef499010b4eddb9b58b0da4d24
Gitweb: https://git.kernel.org/tip/571d91dcadfa3cef499010b4eddb9b58b0da4d24
Author: Kan Liang <[email protected]>
AuthorDate: Wed, 25 Oct 2023 13:16:19 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 27 Oct 2023 15:05:08 +02:00
perf: Add branch stack counters
Currently, the additional information of a branch entry is stored in a
u64 space. With more and more information added, the space is running
out. For example, the information of occurrences of events will be added
for each branch.
Two places were suggested to append the counters.
https://lore.kernel.org/lkml/[email protected]/
One place is right after the flags of each branch entry. It changes the
existing struct perf_branch_entry. The later ARCH specific
implementation has to be really careful to consistently pick
the right struct.
The other place is right after the entire struct perf_branch_stack.
The disadvantage is that the pointer of the extra space has to be
recorded. The common interface perf_sample_save_brstack() has to be
updated.
The latter is much straightforward, and should be easily understood and
maintained. It is implemented in the patch.
Add a new branch sample type, PERF_SAMPLE_BRANCH_COUNTERS, to indicate
the event which is recorded in the branch info.
The "u64 counters" may store the occurrences of several events. The
information regarding the number of events/counters and the width of
each counter should be exposed via sysfs as a reference for the perf
tool. Define the branch_counter_nr and branch_counter_width ABI here.
The support will be implemented later in the Intel-specific patch.
Suggested-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
Documentation/ABI/testing/sysfs-bus-event_source-devices-caps | 6 ++-
arch/powerpc/perf/core-book3s.c | 2 +-
arch/x86/events/amd/core.c | 2 +-
arch/x86/events/core.c | 2 +-
arch/x86/events/intel/core.c | 2 +-
arch/x86/events/intel/ds.c | 4 +-
include/linux/perf_event.h | 17 ++++++-
include/uapi/linux/perf_event.h | 10 ++++-
kernel/events/core.c | 8 +++-
9 files changed, 46 insertions(+), 7 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
index 8757dcf..a5f506f 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
@@ -16,3 +16,9 @@ Description:
Example output in powerpc:
grep . /sys/bus/event_source/devices/cpu/caps/*
/sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9
+
+ The "branch_counter_nr" in the supported platform exposes the
+ maximum number of counters which can be shown in the u64 counters
+ of PERF_SAMPLE_BRANCH_COUNTERS, while the "branch_counter_width"
+ exposes the width of each counter. Both of them can be used by
+ the perf tool to parse the logged counters in each branch.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 8c1f7de..3c14596 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2313,7 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(event, cpuhw);
- perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
+ perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL);
}
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index e249765..4ee6390 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -940,7 +940,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
continue;
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 40ad142..40c9af1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1702,7 +1702,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a08f794..41a1647 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3047,7 +3047,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index bf97ab9..cb3f329 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1755,7 +1755,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
setup_pebs_time(event, data, pebs->tsc);
if (has_branch_stack(event))
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
}
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1912,7 +1912,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
}
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0367d74..7897ef0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1139,6 +1139,10 @@ static inline bool branch_sample_priv(const struct perf_event *event)
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
}
+static inline bool branch_sample_counters(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
+}
struct perf_sample_data {
/*
@@ -1173,6 +1177,7 @@ struct perf_sample_data {
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
+ u64 *br_stack_cntr;
union perf_sample_weight weight;
union perf_mem_data_src data_src;
u64 txn;
@@ -1250,7 +1255,8 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
static inline void perf_sample_save_brstack(struct perf_sample_data *data,
struct perf_event *event,
- struct perf_branch_stack *brs)
+ struct perf_branch_stack *brs,
+ u64 *brs_cntr)
{
int size = sizeof(u64); /* nr */
@@ -1258,7 +1264,16 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
size += sizeof(u64);
size += brs->nr * sizeof(struct perf_branch_entry);
+ /*
+ * The extension space for counters is appended after the
+ * struct perf_branch_stack. It is used to store the occurrences
+ * of events of each branch.
+ */
+ if (brs_cntr)
+ size += brs->nr * sizeof(u64);
+
data->br_stack = brs;
+ data->br_stack_cntr = brs_cntr;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 39c6a25..4461f38 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */
+ PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -235,6 +237,8 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
@@ -982,6 +986,12 @@ enum perf_event_type {
* { u64 nr;
* { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
* { u64 from, to, flags } lbr[nr];
+ * #
+ * # The format of the counters is decided by the
+ * # "branch_counter_nr" and "branch_counter_width",
+ * # which are defined in the ABI.
+ * #
+ * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
* } && PERF_SAMPLE_BRANCH_STACK
*
* { u64 abi; # enum perf_sample_regs_abi
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3eb26c2..d27ffd8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7341,6 +7341,14 @@ void perf_output_sample(struct perf_output_handle *handle,
if (branch_sample_hw_index(event))
perf_output_put(handle, data->br_stack->hw_idx);
perf_output_copy(handle, data->br_stack->entries, size);
+ /*
+ * Add the extension space which is appended
+ * right after the struct perf_branch_stack.
+ */
+ if (data->br_stack_cntr) {
+ size = data->br_stack->nr * sizeof(u64);
+ perf_output_copy(handle, data->br_stack_cntr, size);
+ }
} else {
/*
* we always store at least the value of nr
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 318c4985911245508f7e0bab5265e208a38b5f18
Gitweb: https://git.kernel.org/tip/318c4985911245508f7e0bab5265e208a38b5f18
Author: Kan Liang <[email protected]>
AuthorDate: Wed, 25 Oct 2023 13:16:22 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 27 Oct 2023 15:05:10 +02:00
perf/x86/intel: Reorganize attrs and is_visible
Some attrs and is_visible implementations are rather far away from one
another which makes the whole thing hard to interpret.
There are only two attribute groups which have both .attrs and
.is_visible, group_default and group_caps_lbr. Move them together.
No functional changes.
Suggested-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/events/intel/core.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a99449c..584b58d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -5540,6 +5540,12 @@ static struct attribute *lbr_attrs[] = {
NULL
};
+static umode_t
+lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+}
+
static char pmu_name_str[30];
static ssize_t pmu_name_show(struct device *cdev,
@@ -5567,6 +5573,15 @@ static struct attribute *intel_pmu_attrs[] = {
};
static umode_t
+default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ if (attr == &dev_attr_allow_tsx_force_abort.attr)
+ return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
+
+ return attr->mode;
+}
+
+static umode_t
tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0;
@@ -5588,26 +5603,11 @@ mem_is_visible(struct kobject *kobj, struct attribute *attr, int i)
}
static umode_t
-lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
-{
- return x86_pmu.lbr_nr ? attr->mode : 0;
-}
-
-static umode_t
exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
return x86_pmu.version >= 2 ? attr->mode : 0;
}
-static umode_t
-default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
-{
- if (attr == &dev_attr_allow_tsx_force_abort.attr)
- return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
-
- return attr->mode;
-}
-
static struct attribute_group group_events_td = {
.name = "events",
};
Em Wed, Oct 25, 2023 at 01:16:20PM -0700, [email protected] escreveu:
> From: Kan Liang <[email protected]>
>
> Currently, branch_sample_type !=0 is used to check whether a branch
> stack setup is required. But it doesn't check the sample type,
> unnecessary branch stack setup may be done for a counting event. E.g.,
> perf record -e "{branch-instructions,branch-misses}:S" -j any
> Also, the event only with the new PERF_SAMPLE_BRANCH_COUNTERS branch
> sample type may not require a branch stack setup either.
>
> Add a new flag NEEDS_BRANCH_STACK to indicate whether the event requires
> a branch stack setup. Replace the needs_branch_stack() by checking the
> new flag.
>
> The counting event check is implemented here. The later patch will take
> the new PERF_SAMPLE_BRANCH_COUNTERS into account.
>
> Signed-off-by: Kan Liang <[email protected]>
> ---
>
> No changes since V4
So I saw this on tip/perf/urgent, I'm picking the tools bits then.
- Arnaldo
> arch/x86/events/intel/core.c | 14 +++++++++++---
> arch/x86/events/perf_event_flags.h | 1 +
> 2 files changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 41a164764a84..a99449c0d77c 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
> perf_report_aux_output_id(event, idx);
> }
>
> +static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
> +{
> + return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
> +}
> +
> static void intel_pmu_del_event(struct perf_event *event)
> {
> - if (needs_branch_stack(event))
> + if (intel_pmu_needs_branch_stack(event))
> intel_pmu_lbr_del(event);
> if (event->attr.precise_ip)
> intel_pmu_pebs_del(event);
> @@ -2820,7 +2825,7 @@ static void intel_pmu_add_event(struct perf_event *event)
> {
> if (event->attr.precise_ip)
> intel_pmu_pebs_add(event);
> - if (needs_branch_stack(event))
> + if (intel_pmu_needs_branch_stack(event))
> intel_pmu_lbr_add(event);
> }
>
> @@ -3897,7 +3902,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
> x86_pmu.pebs_aliases(event);
> }
>
> - if (needs_branch_stack(event)) {
> + if (needs_branch_stack(event) && is_sampling_event(event))
> + event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
> +
> + if (intel_pmu_needs_branch_stack(event)) {
> ret = intel_pmu_setup_lbr_filter(event);
> if (ret)
> return ret;
> diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
> index 1dc19b9b4426..a1685981c520 100644
> --- a/arch/x86/events/perf_event_flags.h
> +++ b/arch/x86/events/perf_event_flags.h
> @@ -20,3 +20,4 @@ PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
> PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
> PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
> PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
> +PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
> --
> 2.35.1
>
--
- Arnaldo
On 2023-11-06 4:12 p.m., Arnaldo Carvalho de Melo wrote:
> Em Wed, Oct 25, 2023 at 01:16:20PM -0700, [email protected] escreveu:
>> From: Kan Liang <[email protected]>
>>
>> Currently, branch_sample_type !=0 is used to check whether a branch
>> stack setup is required. But it doesn't check the sample type,
>> unnecessary branch stack setup may be done for a counting event. E.g.,
>> perf record -e "{branch-instructions,branch-misses}:S" -j any
>> Also, the event only with the new PERF_SAMPLE_BRANCH_COUNTERS branch
>> sample type may not require a branch stack setup either.
>>
>> Add a new flag NEEDS_BRANCH_STACK to indicate whether the event requires
>> a branch stack setup. Replace the needs_branch_stack() by checking the
>> new flag.
>>
>> The counting event check is implemented here. The later patch will take
>> the new PERF_SAMPLE_BRANCH_COUNTERS into account.
>>
>> Signed-off-by: Kan Liang <[email protected]>
>> ---
>>
>> No changes since V4
>
> So I saw this on tip/perf/urgent, I'm picking the tools bits then.
Thanks Arnaldo.
Ian has already reviewed the tool parts.
But I still owe a test case for the feature. I will post a patch later.
https://lore.kernel.org/lkml/[email protected]/
Thanks,
Kan
>
> - Arnaldo
>
>> arch/x86/events/intel/core.c | 14 +++++++++++---
>> arch/x86/events/perf_event_flags.h | 1 +
>> 2 files changed, 12 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 41a164764a84..a99449c0d77c 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
>> perf_report_aux_output_id(event, idx);
>> }
>>
>> +static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
>> +{
>> + return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
>> +}
>> +
>> static void intel_pmu_del_event(struct perf_event *event)
>> {
>> - if (needs_branch_stack(event))
>> + if (intel_pmu_needs_branch_stack(event))
>> intel_pmu_lbr_del(event);
>> if (event->attr.precise_ip)
>> intel_pmu_pebs_del(event);
>> @@ -2820,7 +2825,7 @@ static void intel_pmu_add_event(struct perf_event *event)
>> {
>> if (event->attr.precise_ip)
>> intel_pmu_pebs_add(event);
>> - if (needs_branch_stack(event))
>> + if (intel_pmu_needs_branch_stack(event))
>> intel_pmu_lbr_add(event);
>> }
>>
>> @@ -3897,7 +3902,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
>> x86_pmu.pebs_aliases(event);
>> }
>>
>> - if (needs_branch_stack(event)) {
>> + if (needs_branch_stack(event) && is_sampling_event(event))
>> + event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
>> +
>> + if (intel_pmu_needs_branch_stack(event)) {
>> ret = intel_pmu_setup_lbr_filter(event);
>> if (ret)
>> return ret;
>> diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
>> index 1dc19b9b4426..a1685981c520 100644
>> --- a/arch/x86/events/perf_event_flags.h
>> +++ b/arch/x86/events/perf_event_flags.h
>> @@ -20,3 +20,4 @@ PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
>> PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
>> PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
>> PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
>> +PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
>> --
>> 2.35.1
>>
>