For the default events, parse from strings rather than use pre-cooked
perf_event_attr. This fixes non-x86 heterogeneous CPUs where legacy
hardware events wouldn't be opened for all PMUs. v2 was previously
just patch 3 but it is extended in v3 to incorporate related fixes.
When a sysfs/json event is used in preference to a legacy event, allow
evsel__match to still function using a saved hardware config
number. This fixes hard coded metrics in stat-shadow for events like
"instructions" on Intel that have a sysfs file.
Fix/improve uniquifying event names fixing the test "102: perf stat
metrics (shadow stat) test:" that was broken by a formatting issue
when the sysfs instructions event was used.
Having evsel->pmu_name and evsel->pmu->name is confusing, get rid of
the former. Fix/improve evsel__sys_has_perf_metrics in the process.
Ian Rogers (5):
perf evsel: Add alternate_hw_config and use in evsel__match
perf stat: Uniquify event name improvements
perf stat: Remove evlist__add_default_attrs use strings
perf evsel x86: Make evsel__has_perf_metrics work for legacy events
perf evsel: Remove pmu_name
tools/perf/arch/x86/util/evlist.c | 74 +-------
tools/perf/arch/x86/util/evsel.c | 35 +++-
tools/perf/builtin-diff.c | 6 +-
tools/perf/builtin-stat.c | 291 ++++++++++++------------------
tools/perf/tests/parse-events.c | 2 +-
tools/perf/util/evlist.c | 46 +----
tools/perf/util/evlist.h | 12 --
tools/perf/util/evsel.c | 28 ++-
tools/perf/util/evsel.h | 22 +--
tools/perf/util/metricgroup.c | 4 +-
tools/perf/util/parse-events.c | 52 ++++--
tools/perf/util/parse-events.h | 6 +
tools/perf/util/pmu.c | 6 +-
tools/perf/util/pmu.h | 2 +-
tools/perf/util/stat-display.c | 101 ++++++++---
tools/perf/util/stat-shadow.c | 14 +-
tools/perf/util/stat.c | 2 +-
17 files changed, 305 insertions(+), 398 deletions(-)
--
2.45.0.118.g7fe29c98d7-goog
There are cases where we want to match events like instructions and
cycles with legacy hardware values, in particular in stat-shadow's
hard coded metrics. An evsel's name isn't a good point of reference as
it gets altered, strstr would be too imprecise and re-parsing the
event from its name is silly. Instead, hold the legacy hardware event
name, determined during parsing, in the evsel for this matching case.
Inline evsel__match2 that is only used in builtin-diff.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/builtin-diff.c | 6 ++--
tools/perf/util/evsel.c | 21 ++++++++++++++
tools/perf/util/evsel.h | 19 ++-----------
tools/perf/util/parse-events.c | 51 +++++++++++++++++++++-------------
tools/perf/util/parse-events.h | 6 ++++
tools/perf/util/pmu.c | 6 +++-
tools/perf/util/pmu.h | 2 +-
7 files changed, 70 insertions(+), 41 deletions(-)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 57d300d8e570..500a8f9e7c0d 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -483,13 +483,13 @@ static struct perf_diff pdiff = {
},
};
-static struct evsel *evsel_match(struct evsel *evsel,
- struct evlist *evlist)
+static struct evsel *evsel_match(struct evsel *evsel, struct evlist *evlist)
{
struct evsel *e;
evlist__for_each_entry(evlist, e) {
- if (evsel__match2(evsel, e))
+ if ((evsel->core.attr.type == e->core.attr.type) &&
+ (evsel->core.attr.config == e->core.attr.config))
return e;
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 3536404e9447..dfdb60c7a364 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -295,6 +295,7 @@ void evsel__init(struct evsel *evsel,
evsel->pmu_name = NULL;
evsel->group_pmu_name = NULL;
evsel->skippable = false;
+ evsel->alternate_hw_config = PERF_COUNT_HW_MAX;
}
struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
@@ -441,6 +442,8 @@ struct evsel *evsel__clone(struct evsel *orig)
if (evsel__copy_config_terms(evsel, orig) < 0)
goto out_err;
+ evsel->alternate_hw_config = orig->alternate_hw_config;
+
return evsel;
out_err:
@@ -1600,6 +1603,24 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
return evsel__process_group_data(leader, cpu_map_idx, thread, data);
}
+bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
+{
+
+ u32 e_type = evsel->core.attr.type;
+ u64 e_config = evsel->core.attr.config;
+
+ if (e_type != type) {
+ return type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core &&
+ evsel->alternate_hw_config == config;
+ }
+
+ if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) &&
+ perf_pmus__supports_extended_type())
+ e_config &= PERF_HW_EVENT_MASK;
+
+ return e_config == config;
+}
+
int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
{
u64 read_format = evsel->core.attr.read_format;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 517cff431de2..46e3589314f1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -101,6 +101,7 @@ struct evsel {
int bpf_fd;
struct bpf_object *bpf_obj;
struct list_head config_terms;
+ u64 alternate_hw_config;
};
/*
@@ -354,26 +355,10 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name);
-static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
-{
- if (evsel->core.attr.type != type)
- return false;
-
- if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) &&
- perf_pmus__supports_extended_type())
- return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
-
- return evsel->core.attr.config == config;
-}
+bool __evsel__match(const struct evsel *evsel, u32 type, u64 config);
#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c)
-static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
-{
- return (e1->core.attr.type == e2->core.attr.type) &&
- (e1->core.attr.config == e2->core.attr.config);
-}
-
int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread);
int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 2b9ede311c31..c72e1722b1fb 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -230,7 +230,7 @@ __add_event(struct list_head *list, int *idx,
bool init_attr,
const char *name, const char *metric_id, struct perf_pmu *pmu,
struct list_head *config_terms, bool auto_merge_stats,
- const char *cpu_list)
+ const char *cpu_list, u64 alternate_hw_config)
{
struct evsel *evsel;
struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
@@ -266,6 +266,7 @@ __add_event(struct list_head *list, int *idx,
evsel->auto_merge_stats = auto_merge_stats;
evsel->pmu = pmu;
evsel->pmu_name = pmu ? strdup(pmu->name) : NULL;
+ evsel->alternate_hw_config = alternate_hw_config;
if (name)
evsel->name = strdup(name);
@@ -288,16 +289,19 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
{
return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name,
metric_id, pmu, /*config_terms=*/NULL,
- /*auto_merge_stats=*/false, /*cpu_list=*/NULL);
+ /*auto_merge_stats=*/false, /*cpu_list=*/NULL,
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX);
}
static int add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr, const char *name,
- const char *metric_id, struct list_head *config_terms)
+ const char *metric_id, struct list_head *config_terms,
+ u64 alternate_hw_config)
{
return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id,
/*pmu=*/NULL, config_terms,
- /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM;
+ /*auto_merge_stats=*/false, /*cpu_list=*/NULL,
+ alternate_hw_config) ? 0 : -ENOMEM;
}
static int add_event_tool(struct list_head *list, int *idx,
@@ -312,7 +316,8 @@ static int add_event_tool(struct list_head *list, int *idx,
evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL,
/*metric_id=*/NULL, /*pmu=*/NULL,
/*config_terms=*/NULL, /*auto_merge_stats=*/false,
- /*cpu_list=*/"0");
+ /*cpu_list=*/"0",
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX);
if (!evsel)
return -ENOMEM;
evsel->tool_event = tool_event;
@@ -446,7 +451,7 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
static int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, struct perf_pmu *pmu,
const struct parse_events_terms *const_parsed_terms,
- bool auto_merge_stats);
+ bool auto_merge_stats, u64 alternate_hw_config);
int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
struct parse_events_state *parse_state,
@@ -472,7 +477,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
*/
ret = parse_events_add_pmu(parse_state, list, pmu,
parsed_terms,
- perf_pmu__auto_merge_stats(pmu));
+ perf_pmu__auto_merge_stats(pmu),
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX);
if (ret)
return ret;
continue;
@@ -503,7 +509,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name,
metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
- /*cpu_list=*/NULL) == NULL)
+ /*cpu_list=*/NULL,
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL)
return -ENOMEM;
free_config_terms(&config_terms);
@@ -743,7 +750,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state,
name = get_config_name(head_config);
return add_event(list, &parse_state->idx, &attr, name, /*mertic_id=*/NULL,
- &config_terms);
+ &config_terms, /*alternate_hw_config=*/PERF_COUNT_HW_MAX);
}
static int check_type_val(struct parse_events_term *term,
@@ -1043,6 +1050,7 @@ static int config_term_pmu(struct perf_event_attr *attr,
if (perf_pmu__have_event(pmu, term->config)) {
term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
term->no_value = true;
+ term->alternate_hw_config = true;
} else {
attr->type = PERF_TYPE_HARDWARE;
attr->config = term->val.num;
@@ -1354,8 +1362,9 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state,
name = get_config_name(head_config);
metric_id = get_config_metric_id(head_config);
ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name,
- metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
- /*cpu_list=*/NULL) ? 0 : -ENOMEM;
+ metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
+ /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX
+ ) == NULL ? -ENOMEM : 0;
free_config_terms(&config_terms);
return ret;
}
@@ -1413,7 +1422,7 @@ static bool config_term_percore(struct list_head *config_terms)
static int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, struct perf_pmu *pmu,
const struct parse_events_terms *const_parsed_terms,
- bool auto_merge_stats)
+ bool auto_merge_stats, u64 alternate_hw_config)
{
struct perf_event_attr attr;
struct perf_pmu_info info;
@@ -1450,7 +1459,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
/*init_attr=*/true, /*name=*/NULL,
/*metric_id=*/NULL, pmu,
/*config_terms=*/NULL, auto_merge_stats,
- /*cpu_list=*/NULL);
+ /*cpu_list=*/NULL, alternate_hw_config);
return evsel ? 0 : -ENOMEM;
}
@@ -1471,7 +1480,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
/* Look for event names in the terms and rewrite into format based terms. */
if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms,
- &info, &alias_rewrote_terms, err)) {
+ &info, &alias_rewrote_terms,
+ &alternate_hw_config, err)) {
parse_events_terms__exit(&parsed_terms);
return -EINVAL;
}
@@ -1517,7 +1527,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true,
get_config_name(&parsed_terms),
get_config_metric_id(&parsed_terms), pmu,
- &config_terms, auto_merge_stats, /*cpu_list=*/NULL);
+ &config_terms, auto_merge_stats, /*cpu_list=*/NULL,
+ alternate_hw_config);
if (!evsel) {
parse_events_terms__exit(&parsed_terms);
return -ENOMEM;
@@ -1596,7 +1607,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
if (!parse_events_add_pmu(parse_state, list, pmu,
- &parsed_terms, auto_merge_stats)) {
+ &parsed_terms, auto_merge_stats, hw_config)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
@@ -1611,7 +1622,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
if (parse_state->fake_pmu) {
if (!parse_events_add_pmu(parse_state, list, parse_state->fake_pmu, &parsed_terms,
- /*auto_merge_stats=*/true)) {
+ /*auto_merge_stats=*/true, hw_config)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
@@ -1664,7 +1675,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
/* Attempt to add to list assuming event_or_pmu is a PMU name. */
pmu = parse_state->fake_pmu ?: perf_pmus__find(event_or_pmu);
if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms,
- /*auto_merge_stats=*/false))
+ /*auto_merge_stats=*/false,
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX))
return 0;
pmu = NULL;
@@ -1676,7 +1688,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
if (!parse_events_add_pmu(parse_state, *listp, pmu,
const_parsed_terms,
- auto_merge_stats)) {
+ auto_merge_stats,
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) {
ok++;
parse_state->wild_card_pmus = true;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index e7ac1f13376d..8dd426b8aeb9 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -127,6 +127,12 @@ struct parse_events_term {
* value is assumed to be 1. An event name also has no value.
*/
bool no_value;
+ /**
+ * @alternate_hw_config: config is the event name but num is an
+ * alternate PERF_TYPE_HARDWARE config value which is often nice for the
+ * sake of quick matching.
+ */
+ bool alternate_hw_config;
};
struct parse_events_error {
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index b3b072feef02..8f02fc5365aa 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1547,7 +1547,7 @@ static int check_info_data(struct perf_pmu *pmu,
*/
int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms,
struct perf_pmu_info *info, bool *rewrote_terms,
- struct parse_events_error *err)
+ u64 *alternate_hw_config, struct parse_events_error *err)
{
struct parse_events_term *term, *h;
struct perf_pmu_alias *alias;
@@ -1575,6 +1575,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
NULL);
return ret;
}
+
*rewrote_terms = true;
ret = check_info_data(pmu, alias, info, err, term->err_term);
if (ret)
@@ -1583,6 +1584,9 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
if (alias->per_pkg)
info->per_pkg = true;
+ if (term->alternate_hw_config)
+ *alternate_hw_config = term->val.num;
+
list_del_init(&term->list);
parse_events_term__delete(term);
}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 561716aa2b25..a4df58ce70ce 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -211,7 +211,7 @@ __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name);
int perf_pmu__format_type(struct perf_pmu *pmu, const char *name);
int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms,
struct perf_pmu_info *info, bool *rewrote_terms,
- struct parse_events_error *err);
+ u64 *alternate_hw_config, struct parse_events_error *err);
int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb);
void perf_pmu_format__set_value(void *format, int config, unsigned long *bits);
--
2.45.0.118.g7fe29c98d7-goog
Without aggregation on Intel:
```
$ perf stat -e instructions,cycles ...
```
Will use "cycles" for the name of the legacy cycles event but as
"instructions" has a sysfs name it will and a "[cpu]" PMU suffix. This
often breaks things as the space between the event and the PMU name
look like an extra column. The existing uniquify logic was also
uniquifying in cases when all events are core and not with uncore
events, it was not correctly handling modifiers, etc.
Change the logic so that an initial pass that can disable
uniquification is run. For individual counters, disable uniquification
in more cases such as for consistency with legacy events or for
libpfm4 events. Don't use the "[pmu]" style suffix in uniquification,
always use "pmu/.../". Change how modifiers/terms are handled in the
uniquification so that they look like parse-able events.
This fixes "102: perf stat metrics (shadow stat) test:" that has been
failing due to "instructions [cpu]" breaking its column/awk logic when
values aren't aggregated. This started happening when instructions
could match a sysfs rather than a legacy event, so the fixes tag
reflects this.
Fixes: 617824a7f0f7 ("perf parse-events: Prefer sysfs/JSON hardware events over legacy")
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/stat-display.c | 101 +++++++++++++++++++++++++--------
1 file changed, 78 insertions(+), 23 deletions(-)
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bfc1d705f437..ea11e3437444 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -868,38 +868,66 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
static void uniquify_event_name(struct evsel *counter)
{
- char *new_name;
- char *config;
- int ret = 0;
+ const char *name, *pmu_name;
+ char *new_name, *config;
+ int ret;
- if (counter->uniquified_name || counter->use_config_name ||
- !counter->pmu_name || !strncmp(evsel__name(counter), counter->pmu_name,
- strlen(counter->pmu_name)))
+ /* The evsel was already uniquified. */
+ if (counter->uniquified_name)
return;
- config = strchr(counter->name, '/');
+ /* Avoid checking to uniquify twice. */
+ counter->uniquified_name = true;
+
+ /* The evsel has a "name=" config term or is from libpfm. */
+ if (counter->use_config_name || counter->is_libpfm_event)
+ return;
+
+ /* Legacy no PMU event, don't uniquify. */
+ if (!counter->pmu ||
+ (counter->pmu->type < PERF_TYPE_MAX && counter->pmu->type != PERF_TYPE_RAW))
+ return;
+
+ /* A sysfs or json event replacing a legacy event, don't uniquify. */
+ if (counter->pmu->is_core && counter->alternate_hw_config != PERF_COUNT_HW_MAX)
+ return;
+
+ name = evsel__name(counter);
+ pmu_name = counter->pmu->name;
+ /* Already prefixed by the PMU name. */
+ if (!strncmp(name, pmu_name, strlen(pmu_name)))
+ return;
+
+ config = strchr(name, '/');
if (config) {
- if (asprintf(&new_name,
- "%s%s", counter->pmu_name, config) > 0) {
- free(counter->name);
- counter->name = new_name;
- }
- } else {
- if (evsel__is_hybrid(counter)) {
- ret = asprintf(&new_name, "%s/%s/",
- counter->pmu_name, counter->name);
+ int len = config - name;
+
+ if (config[1] == '/') {
+ /* case: event// */
+ ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2);
} else {
- ret = asprintf(&new_name, "%s [%s]",
- counter->name, counter->pmu_name);
+ /* case: event/.../ */
+ ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1);
}
+ } else {
+ config = strchr(name, ':');
+ if (config) {
+ /* case: event:.. */
+ int len = config - name;
- if (ret) {
- free(counter->name);
- counter->name = new_name;
+ ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1);
+ } else {
+ /* case: event */
+ ret = asprintf(&new_name, "%s/%s/", pmu_name, name);
}
}
-
- counter->uniquified_name = true;
+ if (ret > 0) {
+ free(counter->name);
+ counter->name = new_name;
+ } else {
+ /* ENOMEM from asprintf. */
+ counter->uniquified_name = false;
+ }
}
static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config)
@@ -1541,6 +1569,31 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist
print_metric_end(config, os);
}
+static void disable_uniquify(struct evlist *evlist)
+{
+ struct evsel *counter;
+ struct perf_pmu *last_pmu = NULL;
+ bool first = true;
+
+ evlist__for_each_entry(evlist, counter) {
+ /* If PMUs vary then uniquify can be useful. */
+ if (!first && counter->pmu != last_pmu)
+ return;
+ first = false;
+ if (counter->pmu) {
+ /* Allow uniquify for uncore PMUs. */
+ if (!counter->pmu->is_core)
+ return;
+ /* Keep hybrid event names uniquified for clarity. */
+ if (perf_pmus__num_core_pmus() > 1)
+ return;
+ }
+ }
+ evlist__for_each_entry_continue(evlist, counter) {
+ counter->uniquified_name = true;
+ }
+}
+
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
struct target *_target, struct timespec *ts,
int argc, const char **argv)
@@ -1554,6 +1607,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
.first = true,
};
+ disable_uniquify(evlist);
+
if (config->iostat_run)
evlist->selected = evlist__first(evlist);
--
2.45.0.118.g7fe29c98d7-goog
add_default_atttributes would add evsels by having pre-created
perf_event_attr, however, this needed fixing for hybrid as the
extended PMU type was necessary for each core PMU. The logic for this
was in an arch specific x86 function and wasn't present for ARM,
meaning that default events weren't being opened on all PMUs on
ARM. Change the creation of the default events to use parse_events and
strings as that will open the events on all PMUs.
Rather than try to detect events on PMUs before parsing, parse the
event but skip its output in stat-display.
The previous order of hardware events was: cycles,
stalled-cycles-frontend, stalled-cycles-backend, instructions. As
instructions is a more fundamental concept the order is changed to:
instructions, cycles, stalled-cycles-frontend, stalled-cycles-backend.
Closes: https://lore.kernel.org/lkml/CAP-5=fVABSBZnsmtRn1uF-k-G1GWM-L5SgiinhPTfHbQsKXb_g@mail.gmail.com/
Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: Kan Liang <[email protected]>
---
tools/perf/arch/x86/util/evlist.c | 74 +-------
tools/perf/builtin-stat.c | 291 ++++++++++++------------------
tools/perf/util/evlist.c | 43 -----
tools/perf/util/evlist.h | 12 --
tools/perf/util/stat-shadow.c | 4 +-
5 files changed, 117 insertions(+), 307 deletions(-)
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index b1ce0c52d88d..fb8e314aa364 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -1,78 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include "util/pmu.h"
-#include "util/pmus.h"
-#include "util/evlist.h"
-#include "util/parse-events.h"
-#include "util/event.h"
+#include <string.h>
+#include "../../../util/evlist.h"
+#include "../../../util/evsel.h"
#include "topdown.h"
#include "evsel.h"
-static int ___evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs,
- size_t nr_attrs)
-{
- LIST_HEAD(head);
- size_t i = 0;
-
- for (i = 0; i < nr_attrs; i++)
- event_attr_init(attrs + i);
-
- if (perf_pmus__num_core_pmus() == 1)
- return evlist__add_attrs(evlist, attrs, nr_attrs);
-
- for (i = 0; i < nr_attrs; i++) {
- struct perf_pmu *pmu = NULL;
-
- if (attrs[i].type == PERF_TYPE_SOFTWARE) {
- struct evsel *evsel = evsel__new(attrs + i);
-
- if (evsel == NULL)
- goto out_delete_partial_list;
- list_add_tail(&evsel->core.node, &head);
- continue;
- }
-
- while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
- struct perf_cpu_map *cpus;
- struct evsel *evsel;
-
- evsel = evsel__new(attrs + i);
- if (evsel == NULL)
- goto out_delete_partial_list;
- evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
- cpus = perf_cpu_map__get(pmu->cpus);
- evsel->core.cpus = cpus;
- evsel->core.own_cpus = perf_cpu_map__get(cpus);
- evsel->pmu_name = strdup(pmu->name);
- list_add_tail(&evsel->core.node, &head);
- }
- }
-
- evlist__splice_list_tail(evlist, &head);
-
- return 0;
-
-out_delete_partial_list:
- {
- struct evsel *evsel, *n;
-
- __evlist__for_each_entry_safe(&head, n, evsel)
- evsel__delete(evsel);
- }
- return -1;
-}
-
-int arch_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs,
- size_t nr_attrs)
-{
- if (!nr_attrs)
- return 0;
-
- return ___evlist__add_default_attrs(evlist, attrs, nr_attrs);
-}
-
int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
{
if (topdown_sys_has_perf_metrics() &&
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 35f79b48e8dc..758e22576b30 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1934,130 +1934,25 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
}
/*
- * Add default attributes, if there were no attributes specified or
+ * Add default events, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
*/
-static int add_default_attributes(void)
+static int add_default_events(void)
{
- struct perf_event_attr default_attrs0[] = {
-
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
-
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
-};
- struct perf_event_attr frontend_attrs[] = {
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
-};
- struct perf_event_attr backend_attrs[] = {
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
-};
- struct perf_event_attr default_attrs1[] = {
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
-
-};
-
-/*
- * Detailed stats (-d), covering the L1 and last level data caches:
- */
- struct perf_event_attr detailed_attrs[] = {
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1D << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1D << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_LL << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_LL << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-};
-
-/*
- * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
- */
- struct perf_event_attr very_detailed_attrs[] = {
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1I << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1I << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_DTLB << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_DTLB << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_ITLB << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_ITLB << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-
-};
+ const char *pmu = parse_events_option_args.pmu_filter ?: "all";
+ struct parse_events_error err;
+ struct evlist *evlist = evlist__new();
+ struct evsel *evsel;
+ int ret = 0;
-/*
- * Very, very detailed stats (-d -d -d), adding prefetch events:
- */
- struct perf_event_attr very_very_detailed_attrs[] = {
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1D << 0 |
- (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1D << 0 |
- (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-};
+ if (!evlist)
+ return -ENOMEM;
- struct perf_event_attr default_null_attrs[] = {};
- const char *pmu = parse_events_option_args.pmu_filter ?: "all";
+ parse_events_error__init(&err);
/* Set attrs if no event is selected and !null_run: */
if (stat_config.null_run)
- return 0;
+ goto out;
if (transaction_run) {
/* Handle -T as -M transaction. Once platform specific metrics
@@ -2067,9 +1962,10 @@ static int add_default_attributes(void)
*/
if (!metricgroup__has_metric(pmu, "transaction")) {
pr_err("Missing transaction metrics\n");
- return -1;
+ ret = -1;
+ goto out;
}
- return metricgroup__parse_groups(evsel_list, pmu, "transaction",
+ ret = metricgroup__parse_groups(evlist, pmu, "transaction",
stat_config.metric_no_group,
stat_config.metric_no_merge,
stat_config.metric_no_threshold,
@@ -2077,6 +1973,7 @@ static int add_default_attributes(void)
stat_config.system_wide,
stat_config.hardware_aware_grouping,
&stat_config.metric_events);
+ goto out;
}
if (smi_cost) {
@@ -2084,26 +1981,29 @@ static int add_default_attributes(void)
if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
pr_err("freeze_on_smi is not supported.\n");
- return -1;
+ ret = -1;
+ goto out;
}
if (!smi) {
if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
- fprintf(stderr, "Failed to set freeze_on_smi.\n");
- return -1;
+ pr_err("Failed to set freeze_on_smi.\n");
+ ret = -1;
+ goto out;
}
smi_reset = true;
}
if (!metricgroup__has_metric(pmu, "smi")) {
pr_err("Missing smi metrics\n");
- return -1;
+ ret = -1;
+ goto out;
}
if (!force_metric_only)
stat_config.metric_only = true;
- return metricgroup__parse_groups(evsel_list, pmu, "smi",
+ ret = metricgroup__parse_groups(evlist, pmu, "smi",
stat_config.metric_no_group,
stat_config.metric_no_merge,
stat_config.metric_no_threshold,
@@ -2111,6 +2011,7 @@ static int add_default_attributes(void)
stat_config.system_wide,
stat_config.hardware_aware_grouping,
&stat_config.metric_events);
+ goto out;
}
if (topdown_run) {
@@ -2123,21 +2024,23 @@ static int add_default_attributes(void)
if (!max_level) {
pr_err("Topdown requested but the topdown metric groups aren't present.\n"
"(See perf list the metric groups have names like TopdownL1)\n");
- return -1;
+ ret = -1;
+ goto out;
}
if (stat_config.topdown_level > max_level) {
pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
- return -1;
- } else if (!stat_config.topdown_level)
+ ret = -1;
+ goto out;
+ } else if (!stat_config.topdown_level) {
stat_config.topdown_level = 1;
-
+ }
if (!stat_config.interval && !stat_config.metric_only) {
fprintf(stat_config.output,
"Topdown accuracy may decrease when measuring long periods.\n"
"Please print the result regularly, e.g. -I1000\n");
}
str[8] = stat_config.topdown_level + '0';
- if (metricgroup__parse_groups(evsel_list,
+ if (metricgroup__parse_groups(evlist,
pmu, str,
/*metric_no_group=*/false,
/*metric_no_merge=*/false,
@@ -2145,41 +2048,49 @@ static int add_default_attributes(void)
stat_config.user_requested_cpu_list,
stat_config.system_wide,
stat_config.hardware_aware_grouping,
- &stat_config.metric_events) < 0)
- return -1;
+ &stat_config.metric_events) < 0) {
+ ret = -1;
+ goto out;
+ }
}
if (!stat_config.topdown_level)
stat_config.topdown_level = 1;
- if (!evsel_list->core.nr_entries) {
+ if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) {
/* No events so add defaults. */
if (target__has_cpu(&target))
- default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
+ ret = parse_events(evlist, "cpu-clock", &err);
+ else
+ ret = parse_events(evlist, "task-clock", &err);
+ if (ret)
+ goto out;
+
+ ret = parse_events(evlist,
+ "context-switches,"
+ "cpu-migrations,"
+ "page-faults,"
+ "instructions,"
+ "cycles,"
+ "stalled-cycles-frontend,"
+ "stalled-cycles-backend,"
+ "branches,"
+ "branch-misses",
+ &err);
+ if (ret)
+ goto out;
- if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
- return -1;
- if (perf_pmus__have_event("cpu", "stalled-cycles-frontend")) {
- if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
- return -1;
- }
- if (perf_pmus__have_event("cpu", "stalled-cycles-backend")) {
- if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
- return -1;
- }
- if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
- return -1;
/*
* Add TopdownL1 metrics if they exist. To minimize
* multiplexing, don't request threshold computation.
*/
if (metricgroup__has_metric(pmu, "Default")) {
struct evlist *metric_evlist = evlist__new();
- struct evsel *metric_evsel;
-
- if (!metric_evlist)
- return -1;
+ if (!metric_evlist) {
+ ret = -ENOMEM;
+ goto out;
+ }
if (metricgroup__parse_groups(metric_evlist, pmu, "Default",
/*metric_no_group=*/false,
/*metric_no_merge=*/false,
@@ -2187,43 +2098,65 @@ static int add_default_attributes(void)
stat_config.user_requested_cpu_list,
stat_config.system_wide,
stat_config.hardware_aware_grouping,
- &stat_config.metric_events) < 0)
- return -1;
-
- evlist__for_each_entry(metric_evlist, metric_evsel) {
- metric_evsel->skippable = true;
- metric_evsel->default_metricgroup = true;
+ &stat_config.metric_events) < 0) {
+ ret = -1;
+ goto out;
}
- evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries);
+
+ evlist__for_each_entry(metric_evlist, evsel)
+ evsel->default_metricgroup = true;
+
+ evlist__splice_list_tail(evlist, &metric_evlist->core.entries);
evlist__delete(metric_evlist);
}
-
- /* Platform specific attrs */
- if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0)
- return -1;
}
/* Detailed events get appended to the event list: */
- if (detailed_run < 1)
- return 0;
-
- /* Append detailed run extra attributes: */
- if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
- return -1;
-
- if (detailed_run < 2)
- return 0;
-
- /* Append very detailed run extra attributes: */
- if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
- return -1;
-
- if (detailed_run < 3)
- return 0;
-
- /* Append very, very detailed run extra attributes: */
- return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
+ if (!ret && detailed_run >= 1) {
+ /*
+ * Detailed stats (-d), covering the L1 and last level data
+ * caches:
+ */
+ ret = parse_events(evlist,
+ "L1-dcache-loads,"
+ "L1-dcache-load-misses,"
+ "LLC-loads,"
+ "LLC-load-misses",
+ &err);
+ }
+ if (!ret && detailed_run >= 2) {
+ /*
+ * Very detailed stats (-d -d), covering the instruction cache
+ * and the TLB caches:
+ */
+ ret = parse_events(evlist,
+ "L1-icache-loads,"
+ "L1-icache-load-misses,"
+ "dTLB-loads,"
+ "dTLB-load-misses,"
+ "iTLB-loads,"
+ "iTLB-load-misses",
+ &err);
+ }
+ if (!ret && detailed_run >= 3) {
+ /*
+ * Very, very detailed stats (-d -d -d), adding prefetch events:
+ */
+ ret = parse_events(evlist,
+ "L1-dcache-prefetches,"
+ "L1-dcache-prefetch-misses",
+ &err);
+ }
+out:
+ if (!ret) {
+ evlist__for_each_entry(evlist, evsel)
+ evsel->skippable = true;
+ }
+ parse_events_error__exit(&err);
+ evlist__splice_list_tail(evsel_list, &evlist->core.entries);
+ evlist__delete(evlist);
+ return ret;
}
static const char * const stat_record_usage[] = {
@@ -2731,7 +2664,7 @@ int cmd_stat(int argc, const char **argv)
}
}
- if (add_default_attributes())
+ if (add_default_events())
goto out;
if (stat_config.cgroup_list) {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 55a300a0977b..de498ba5ac1c 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -314,49 +314,6 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
}
#endif
-int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
-{
- struct evsel *evsel, *n;
- LIST_HEAD(head);
- size_t i;
-
- for (i = 0; i < nr_attrs; i++) {
- evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
- if (evsel == NULL)
- goto out_delete_partial_list;
- list_add_tail(&evsel->core.node, &head);
- }
-
- evlist__splice_list_tail(evlist, &head);
-
- return 0;
-
-out_delete_partial_list:
- __evlist__for_each_entry_safe(&head, n, evsel)
- evsel__delete(evsel);
- return -1;
-}
-
-int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
-{
- size_t i;
-
- for (i = 0; i < nr_attrs; i++)
- event_attr_init(attrs + i);
-
- return evlist__add_attrs(evlist, attrs, nr_attrs);
-}
-
-__weak int arch_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs,
- size_t nr_attrs)
-{
- if (!nr_attrs)
- return 0;
-
- return __evlist__add_default_attrs(evlist, attrs, nr_attrs);
-}
-
struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
{
struct evsel *evsel;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index cb91dc9117a2..947a78cbd7f0 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -100,18 +100,6 @@ void evlist__delete(struct evlist *evlist);
void evlist__add(struct evlist *evlist, struct evsel *entry);
void evlist__remove(struct evlist *evlist, struct evsel *evsel);
-int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs);
-
-int __evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs, size_t nr_attrs);
-
-int arch_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs,
- size_t nr_attrs);
-
-#define evlist__add_default_attrs(evlist, array) \
- arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
-
int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs);
int evlist__add_dummy(struct evlist *evlist);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 3466aa952442..ffdd53a07654 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -76,7 +76,7 @@ void perf_stat__reset_shadow_stats(void)
memset(&ru_stats, 0, sizeof(ru_stats));
}
-static enum stat_type evsel__stat_type(const struct evsel *evsel)
+static enum stat_type evsel__stat_type(struct evsel *evsel)
{
/* Fake perf_hw_cache_op_id values for use with evsel__match. */
u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
@@ -152,7 +152,7 @@ static const char *get_ratio_color(const double ratios[3], double val)
static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type)
{
- const struct evsel *cur;
+ struct evsel *cur;
int evsel_ctx = evsel_context(evsel);
evlist__for_each_entry(evsel->evlist, cur) {
--
2.45.0.118.g7fe29c98d7-goog
Use PMU interface to better detect core PMU for legacy events. Look
for slots event on core PMU if it is appropriate for the event.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/x86/util/evsel.c | 31 ++++++++++++++++++++++++++-----
1 file changed, 26 insertions(+), 5 deletions(-)
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index 090d0f371891..1eaae8819c5e 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -21,7 +21,8 @@ void arch_evsel__set_sample_weight(struct evsel *evsel)
/* Check whether the evsel's PMU supports the perf metrics */
bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
{
- const char *pmu_name = evsel->pmu_name ? evsel->pmu_name : "cpu";
+ struct perf_pmu *pmu;
+ u32 type = evsel->core.attr.type;
/*
* The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU
@@ -31,11 +32,31 @@ bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
* Checking both the PERF_TYPE_RAW type and the slots event
* should be good enough to detect the perf metrics feature.
*/
- if ((evsel->core.attr.type == PERF_TYPE_RAW) &&
- perf_pmus__have_event(pmu_name, "slots"))
- return true;
+again:
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
+ if (type)
+ goto again;
+ break;
+ case PERF_TYPE_RAW:
+ break;
+ default:
+ return false;
+ }
+
+ pmu = evsel->pmu;
+ if (pmu == &perf_pmu__fake)
+ pmu = NULL;
- return false;
+ if (!pmu) {
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ if (pmu->type == PERF_TYPE_RAW)
+ break;
+ }
+ }
+ return pmu && perf_pmu__have_event(pmu, "slots");
}
bool arch_evsel__must_be_in_group(const struct evsel *evsel)
--
2.45.0.118.g7fe29c98d7-goog
"evsel->pmu_name" is only ever assigned a strdup of "pmu->name", a
strdup of "evsel->pmu_name" or NULL. As such, prefer to use
"pmu->name" directly and even to directly compare PMUs than PMU
names. For safety, add some additional NULL tests.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/x86/util/evsel.c | 4 ++--
tools/perf/tests/parse-events.c | 2 +-
tools/perf/util/evlist.c | 3 ++-
tools/perf/util/evsel.c | 7 -------
tools/perf/util/evsel.h | 3 +--
tools/perf/util/metricgroup.c | 4 ++--
tools/perf/util/parse-events.c | 1 -
tools/perf/util/stat-shadow.c | 10 +++++-----
tools/perf/util/stat.c | 2 +-
9 files changed, 14 insertions(+), 22 deletions(-)
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index 1eaae8819c5e..fe8a817d2061 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -84,7 +84,7 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
return scnprintf(bf, size, "%s", event_name);
return scnprintf(bf, size, "%s/%s/",
- evsel->pmu_name ? evsel->pmu_name : "cpu",
+ evsel->pmu ? evsel->pmu->name : "cpu",
event_name);
}
@@ -129,7 +129,7 @@ int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
return 0;
if (!evsel->core.attr.precise_ip &&
- !(evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3)))
+ !(evsel->pmu && !strncmp(evsel->pmu->name, "ibs", 3)))
return 0;
/* More verbose IBS errors. */
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 993e482f094c..c7c0edf3d4ca 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -730,7 +730,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type ||
- strcmp(evsel->pmu_name, "cpu"));
+ strcmp(evsel->pmu->name, "cpu"));
TEST_ASSERT_VAL("wrong exclude_user",
!evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel",
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index de498ba5ac1c..2e3f4f876792 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -2504,7 +2504,8 @@ void evlist__uniquify_name(struct evlist *evlist)
else
attributes = empty_attributes;
- if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) {
+ if (asprintf(&new_name, "%s/%s/%s", pos->pmu ? "" : pos->pmu->name,
+ pos->name, attributes + 1)) {
free(pos->name);
pos->name = new_name;
} else {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dfdb60c7a364..96b545f4d8b0 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -292,7 +292,6 @@ void evsel__init(struct evsel *evsel,
evsel->metric_events = NULL;
evsel->per_pkg_mask = NULL;
evsel->collect_stat = false;
- evsel->pmu_name = NULL;
evsel->group_pmu_name = NULL;
evsel->skippable = false;
evsel->alternate_hw_config = PERF_COUNT_HW_MAX;
@@ -390,11 +389,6 @@ struct evsel *evsel__clone(struct evsel *orig)
if (evsel->group_name == NULL)
goto out_err;
}
- if (orig->pmu_name) {
- evsel->pmu_name = strdup(orig->pmu_name);
- if (evsel->pmu_name == NULL)
- goto out_err;
- }
if (orig->group_pmu_name) {
evsel->group_pmu_name = strdup(orig->group_pmu_name);
if (evsel->group_pmu_name == NULL)
@@ -1481,7 +1475,6 @@ void evsel__exit(struct evsel *evsel)
zfree(&evsel->group_name);
zfree(&evsel->name);
zfree(&evsel->filter);
- zfree(&evsel->pmu_name);
zfree(&evsel->group_pmu_name);
zfree(&evsel->unit);
zfree(&evsel->metric_id);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 46e3589314f1..2f99cc8bfa7d 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -72,7 +72,6 @@ struct evsel {
struct {
char *name;
char *group_name;
- const char *pmu_name;
const char *group_pmu_name;
#ifdef HAVE_LIBTRACEEVENT
struct tep_event *tp_format;
@@ -169,7 +168,7 @@ struct evsel {
unsigned long open_flags;
int precise_ip_original;
- /* for missing_features */
+ /* The PMU the event is from. Used for missing_features, PMU name, etc. */
struct perf_pmu *pmu;
};
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 9be406524617..b0700db6e1cc 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -297,8 +297,8 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids,
struct expr_id_data *val_ptr;
/* Don't match events for the wrong hybrid PMU. */
- if (!all_pmus && ev->pmu_name && evsel__is_hybrid(ev) &&
- strcmp(ev->pmu_name, pmu))
+ if (!all_pmus && ev->pmu && evsel__is_hybrid(ev) &&
+ strcmp(ev->pmu->name, pmu))
continue;
/*
* Check for duplicate events with the same name. For
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c72e1722b1fb..01d502d802ee 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -265,7 +265,6 @@ __add_event(struct list_head *list, int *idx,
evsel->core.is_pmu_core = pmu ? pmu->is_core : false;
evsel->auto_merge_stats = auto_merge_stats;
evsel->pmu = pmu;
- evsel->pmu_name = pmu ? strdup(pmu->name) : NULL;
evsel->alternate_hw_config = alternate_hw_config;
if (name)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index ffdd53a07654..7bf39b069c5b 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -566,7 +566,7 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config,
{
bool need_full_name = perf_pmus__num_core_pmus() > 1;
static const char *last_name;
- static const char *last_pmu;
+ static const struct perf_pmu *last_pmu;
char full_name[64];
/*
@@ -577,21 +577,21 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config,
* different metric events.
*/
if (last_name && !strcmp(last_name, name)) {
- if (!need_full_name || !strcmp(last_pmu, evsel->pmu_name)) {
+ if (!need_full_name || last_pmu != evsel->pmu) {
out->print_metricgroup_header(config, ctxp, NULL);
return;
}
}
- if (need_full_name)
- scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu_name);
+ if (need_full_name && evsel->pmu)
+ scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu->name);
else
scnprintf(full_name, sizeof(full_name), "%s", name);
out->print_metricgroup_header(config, ctxp, full_name);
last_name = name;
- last_pmu = evsel->pmu_name;
+ last_pmu = evsel->pmu;
}
/**
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 0bd5467389e4..7c2ccdcc3fdb 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -553,7 +553,7 @@ static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b))
return false;
- return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name);
+ return evsel_a->pmu != evsel_b->pmu;
}
static void evsel__merge_aliases(struct evsel *evsel)
--
2.45.0.118.g7fe29c98d7-goog
On Thu, May 9, 2024 at 10:38 PM Ian Rogers <[email protected]> wrote:
>
> For the default events, parse from strings rather than use pre-cooked
> perf_event_attr. This fixes non-x86 heterogeneous CPUs where legacy
> hardware events wouldn't be opened for all PMUs. v2 was previously
> just patch 3 but it is extended in v3 to incorporate related fixes.
>
> When a sysfs/json event is used in preference to a legacy event, allow
> evsel__match to still function using a saved hardware config
> number. This fixes hard coded metrics in stat-shadow for events like
> "instructions" on Intel that have a sysfs file.
>
> Fix/improve uniquifying event names fixing the test "102: perf stat
> metrics (shadow stat) test:" that was broken by a formatting issue
> when the sysfs instructions event was used.
>
> Having evsel->pmu_name and evsel->pmu->name is confusing, get rid of
> the former. Fix/improve evsel__sys_has_perf_metrics in the process.
>
> Ian Rogers (5):
> perf evsel: Add alternate_hw_config and use in evsel__match
> perf stat: Uniquify event name improvements
> perf stat: Remove evlist__add_default_attrs use strings
> perf evsel x86: Make evsel__has_perf_metrics work for legacy events
> perf evsel: Remove pmu_name
Hopefully the first 3 patches here can be a priority given the fixes.
Thanks,
Ian
> tools/perf/arch/x86/util/evlist.c | 74 +-------
> tools/perf/arch/x86/util/evsel.c | 35 +++-
> tools/perf/builtin-diff.c | 6 +-
> tools/perf/builtin-stat.c | 291 ++++++++++++------------------
> tools/perf/tests/parse-events.c | 2 +-
> tools/perf/util/evlist.c | 46 +----
> tools/perf/util/evlist.h | 12 --
> tools/perf/util/evsel.c | 28 ++-
> tools/perf/util/evsel.h | 22 +--
> tools/perf/util/metricgroup.c | 4 +-
> tools/perf/util/parse-events.c | 52 ++++--
> tools/perf/util/parse-events.h | 6 +
> tools/perf/util/pmu.c | 6 +-
> tools/perf/util/pmu.h | 2 +-
> tools/perf/util/stat-display.c | 101 ++++++++---
> tools/perf/util/stat-shadow.c | 14 +-
> tools/perf/util/stat.c | 2 +-
> 17 files changed, 305 insertions(+), 398 deletions(-)
>
> --
> 2.45.0.118.g7fe29c98d7-goog
>
On Mon, May 13, 2024 at 9:48 PM Ian Rogers <[email protected]> wrote:
>
> On Thu, May 9, 2024 at 10:38 PM Ian Rogers <[email protected]> wrote:
> >
> > For the default events, parse from strings rather than use pre-cooked
> > perf_event_attr. This fixes non-x86 heterogeneous CPUs where legacy
> > hardware events wouldn't be opened for all PMUs. v2 was previously
> > just patch 3 but it is extended in v3 to incorporate related fixes.
> >
> > When a sysfs/json event is used in preference to a legacy event, allow
> > evsel__match to still function using a saved hardware config
> > number. This fixes hard coded metrics in stat-shadow for events like
> > "instructions" on Intel that have a sysfs file.
> >
> > Fix/improve uniquifying event names fixing the test "102: perf stat
> > metrics (shadow stat) test:" that was broken by a formatting issue
> > when the sysfs instructions event was used.
> >
> > Having evsel->pmu_name and evsel->pmu->name is confusing, get rid of
> > the former. Fix/improve evsel__sys_has_perf_metrics in the process.
> >
> > Ian Rogers (5):
> > perf evsel: Add alternate_hw_config and use in evsel__match
> > perf stat: Uniquify event name improvements
> > perf stat: Remove evlist__add_default_attrs use strings
> > perf evsel x86: Make evsel__has_perf_metrics work for legacy events
> > perf evsel: Remove pmu_name
>
> Hopefully the first 3 patches here can be a priority given the fixes.
Testing:
https://lore.kernel.org/lkml/[email protected]/
git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools.git
tags/perf-tools-for-v6.10-1-2024-05-21
I see this failure:
```
102: perf stat metrics (shadow stat) test:
--- start ---
test child forked, pid 785992
IPC is different: 1.31 != (2725291544 / 2085879817)
---- end(-1) ----
```
Which is addressed in the first 3 patches here.
Thanks,
Ian
> Thanks,
> Ian
>
> > tools/perf/arch/x86/util/evlist.c | 74 +-------
> > tools/perf/arch/x86/util/evsel.c | 35 +++-
> > tools/perf/builtin-diff.c | 6 +-
> > tools/perf/builtin-stat.c | 291 ++++++++++++------------------
> > tools/perf/tests/parse-events.c | 2 +-
> > tools/perf/util/evlist.c | 46 +----
> > tools/perf/util/evlist.h | 12 --
> > tools/perf/util/evsel.c | 28 ++-
> > tools/perf/util/evsel.h | 22 +--
> > tools/perf/util/metricgroup.c | 4 +-
> > tools/perf/util/parse-events.c | 52 ++++--
> > tools/perf/util/parse-events.h | 6 +
> > tools/perf/util/pmu.c | 6 +-
> > tools/perf/util/pmu.h | 2 +-
> > tools/perf/util/stat-display.c | 101 ++++++++---
> > tools/perf/util/stat-shadow.c | 14 +-
> > tools/perf/util/stat.c | 2 +-
> > 17 files changed, 305 insertions(+), 398 deletions(-)
> >
> > --
> > 2.45.0.118.g7fe29c98d7-goog
> >
On 10/05/2024 06:37, Ian Rogers wrote:
> add_default_atttributes would add evsels by having pre-created
> perf_event_attr, however, this needed fixing for hybrid as the
> extended PMU type was necessary for each core PMU. The logic for this
> was in an arch specific x86 function and wasn't present for ARM,
> meaning that default events weren't being opened on all PMUs on
> ARM. Change the creation of the default events to use parse_events and
> strings as that will open the events on all PMUs.
>
> Rather than try to detect events on PMUs before parsing, parse the
> event but skip its output in stat-display.
>
> The previous order of hardware events was: cycles,
> stalled-cycles-frontend, stalled-cycles-backend, instructions. As
> instructions is a more fundamental concept the order is changed to:
> instructions, cycles, stalled-cycles-frontend, stalled-cycles-backend.
>
> Closes: https://lore.kernel.org/lkml/CAP-5=fVABSBZnsmtRn1uF-k-G1GWM-L5SgiinhPTfHbQsKXb_g@mail.gmail.com/
Taking a look at this one now. I think some example commands and outputs
in the commit message would be helpful because there are quite a few
different things mentioned in the closes link.
But I'm assuming this is just for the command without specifying an event:
$ perf stat
I didn't realise that wasn't working properly and I'd missed that Mark
spotted it in that link.
On Wed, May 29, 2024 at 8:39 AM James Clark <[email protected]> wrote:
>
>
>
> On 10/05/2024 06:37, Ian Rogers wrote:
> > add_default_atttributes would add evsels by having pre-created
> > perf_event_attr, however, this needed fixing for hybrid as the
> > extended PMU type was necessary for each core PMU. The logic for this
> > was in an arch specific x86 function and wasn't present for ARM,
> > meaning that default events weren't being opened on all PMUs on
> > ARM. Change the creation of the default events to use parse_events and
> > strings as that will open the events on all PMUs.
> >
> > Rather than try to detect events on PMUs before parsing, parse the
> > event but skip its output in stat-display.
> >
> > The previous order of hardware events was: cycles,
> > stalled-cycles-frontend, stalled-cycles-backend, instructions. As
> > instructions is a more fundamental concept the order is changed to:
> > instructions, cycles, stalled-cycles-frontend, stalled-cycles-backend.
> >
> > Closes: https://lore.kernel.org/lkml/CAP-5=fVABSBZnsmtRn1uF-k-G1GWM-L5SgiinhPTfHbQsKXb_g@mail.gmail.com/
>
> Taking a look at this one now. I think some example commands and outputs
> in the commit message would be helpful because there are quite a few
> different things mentioned in the closes link.
>
> But I'm assuming this is just for the command without specifying an event:
>
> $ perf stat
>
> I didn't realise that wasn't working properly and I'd missed that Mark
> spotted it in that link.
Hi James,
Do you want to take on owning fixing this? My concern is that by using
event parsing:
+ "context-switches,"
+ "cpu-migrations,"
+ "page-faults,"
+ "instructions,"
+ "cycles,"
+ "stalled-cycles-frontend,"
+ "stalled-cycles-backend,"
+ "branches,"
+ "branch-misses",
any of the names could conflict with something advertised on an ARM
PMU somewhere. Clearly cycles has already proved to be controversial
and broken for perf record on Neoverse.
Thanks,
Ian
On Wed, May 29, 2024 at 10:39 AM Ian Rogers <[email protected]> wrote:
>
> On Wed, May 29, 2024 at 8:39 AM James Clark <[email protected]> wrote:
> >
> >
> >
> > On 10/05/2024 06:37, Ian Rogers wrote:
> > > add_default_atttributes would add evsels by having pre-created
> > > perf_event_attr, however, this needed fixing for hybrid as the
> > > extended PMU type was necessary for each core PMU. The logic for this
> > > was in an arch specific x86 function and wasn't present for ARM,
> > > meaning that default events weren't being opened on all PMUs on
> > > ARM. Change the creation of the default events to use parse_events and
> > > strings as that will open the events on all PMUs.
> > >
> > > Rather than try to detect events on PMUs before parsing, parse the
> > > event but skip its output in stat-display.
> > >
> > > The previous order of hardware events was: cycles,
> > > stalled-cycles-frontend, stalled-cycles-backend, instructions. As
> > > instructions is a more fundamental concept the order is changed to:
> > > instructions, cycles, stalled-cycles-frontend, stalled-cycles-backend.
> > >
> > > Closes: https://lore.kernel.org/lkml/CAP-5=fVABSBZnsmtRn1uF-k-G1GWM-L5SgiinhPTfHbQsKXb_g@mail.gmail.com/
> >
> > Taking a look at this one now. I think some example commands and outputs
> > in the commit message would be helpful because there are quite a few
> > different things mentioned in the closes link.
> >
> > But I'm assuming this is just for the command without specifying an event:
> >
> > $ perf stat
> >
> > I didn't realise that wasn't working properly and I'd missed that Mark
> > spotted it in that link.
>
> Hi James,
>
> Do you want to take on owning fixing this? My concern is that by using
> event parsing:
>
> + "context-switches,"
> + "cpu-migrations,"
> + "page-faults,"
> + "instructions,"
> + "cycles,"
> + "stalled-cycles-frontend,"
> + "stalled-cycles-backend,"
> + "branches,"
> + "branch-misses",
>
> any of the names could conflict with something advertised on an ARM
> PMU somewhere. Clearly cycles has already proved to be controversial
> and broken for perf record on Neoverse.
Oh and these events are also broken on ARM M? PMUs with the revert
that's in v6.10, so I don't know how to go about fixing this.
Thanks,
Ian
On 29/05/2024 19:18, Ian Rogers wrote:
> On Wed, May 29, 2024 at 10:39 AM Ian Rogers <[email protected]> wrote:
>>
>> On Wed, May 29, 2024 at 8:39 AM James Clark <[email protected]> wrote:
>>>
>>>
>>>
>>> On 10/05/2024 06:37, Ian Rogers wrote:
>>>> add_default_atttributes would add evsels by having pre-created
>>>> perf_event_attr, however, this needed fixing for hybrid as the
>>>> extended PMU type was necessary for each core PMU. The logic for this
>>>> was in an arch specific x86 function and wasn't present for ARM,
>>>> meaning that default events weren't being opened on all PMUs on
>>>> ARM. Change the creation of the default events to use parse_events and
>>>> strings as that will open the events on all PMUs.
>>>>
>>>> Rather than try to detect events on PMUs before parsing, parse the
>>>> event but skip its output in stat-display.
>>>>
>>>> The previous order of hardware events was: cycles,
>>>> stalled-cycles-frontend, stalled-cycles-backend, instructions. As
>>>> instructions is a more fundamental concept the order is changed to:
>>>> instructions, cycles, stalled-cycles-frontend, stalled-cycles-backend.
>>>>
>>>> Closes: https://lore.kernel.org/lkml/CAP-5=fVABSBZnsmtRn1uF-k-G1GWM-L5SgiinhPTfHbQsKXb_g@mail.gmail.com/
>>>
>>> Taking a look at this one now. I think some example commands and outputs
>>> in the commit message would be helpful because there are quite a few
>>> different things mentioned in the closes link.
>>>
>>> But I'm assuming this is just for the command without specifying an event:
>>>
>>> $ perf stat
>>>
>>> I didn't realise that wasn't working properly and I'd missed that Mark
>>> spotted it in that link.
>>
>> Hi James,
>>
>> Do you want to take on owning fixing this? My concern is that by using
>> event parsing:
>>
>> + "context-switches,"
>> + "cpu-migrations,"
>> + "page-faults,"
>> + "instructions,"
>> + "cycles,"
>> + "stalled-cycles-frontend,"
>> + "stalled-cycles-backend,"
>> + "branches,"
>> + "branch-misses",
>>
>> any of the names could conflict with something advertised on an ARM
>> PMU somewhere. Clearly cycles has already proved to be controversial
>> and broken for perf record on Neoverse.
>
> Oh and these events are also broken on ARM M? PMUs with the revert
> that's in v6.10, so I don't know how to go about fixing this.
>
> Thanks,
> Ian
I'll take it yep. But the fix probably needs to take into context
whatever else we decide with the other issue. Conflicting names might
not actually be an issue if we work around that with the other change.
But I'm still getting up to speed with the other one.
James