LinuxLists.cc - [PATCH v1] perf stat: Remove evlist__add_default

2024-04-06 07:38:17

Subject: [PATCH v1] perf stat: Remove evlist__add_default_attrs use strings

add_default_atttributes would add evsels by having pre-created
perf_event_attr, however, this needed fixing for hybrid as the
extended PMU type was necessary for each core PMU. The logic for this
was in an arch specific x86 function and wasn't present for ARM,
meaning that default events weren't being opened on all PMUs on
ARM. Change the creation of the default events to use parse_events and
strings as that will open the events on all PMUs.

Rather than try to detect events on PMUs before parsing, parse the
event but skip its output in stat-display.

Closes: https://lore.kernel.org/lkml/CAP-5=fVABSBZnsmtRn1uF-k-G1GWM-L5SgiinhPTfHbQsKXb_g@mail.gmail.com/
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/x86/util/evlist.c | 74 +-------
tools/perf/builtin-stat.c | 291 ++++++++++++------------------
tools/perf/util/evlist.c | 43 -----
tools/perf/util/evlist.h | 12 --
tools/perf/util/stat-display.c | 3 +
5 files changed, 118 insertions(+), 305 deletions(-)

diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index b1ce0c52d88d..fb8e314aa364 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -1,78 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include "util/pmu.h"
-#include "util/pmus.h"
-#include "util/evlist.h"
-#include "util/parse-events.h"
-#include "util/event.h"
+#include <string.h>
+#include "../../../util/evlist.h"
+#include "../../../util/evsel.h"
#include "topdown.h"
#include "evsel.h"

-static int ___evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs,
- size_t nr_attrs)
-{
- LIST_HEAD(head);
- size_t i = 0;
-
- for (i = 0; i < nr_attrs; i++)
- event_attr_init(attrs + i);
-
- if (perf_pmus__num_core_pmus() == 1)
- return evlist__add_attrs(evlist, attrs, nr_attrs);
-
- for (i = 0; i < nr_attrs; i++) {
- struct perf_pmu *pmu = NULL;
-
- if (attrs[i].type == PERF_TYPE_SOFTWARE) {
- struct evsel *evsel = evsel__new(attrs + i);
-
- if (evsel == NULL)
- goto out_delete_partial_list;
- list_add_tail(&evsel->core.node, &head);
- continue;
- }
-
- while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
- struct perf_cpu_map *cpus;
- struct evsel *evsel;
-
- evsel = evsel__new(attrs + i);
- if (evsel == NULL)
- goto out_delete_partial_list;
- evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
- cpus = perf_cpu_map__get(pmu->cpus);
- evsel->core.cpus = cpus;
- evsel->core.own_cpus = perf_cpu_map__get(cpus);
- evsel->pmu_name = strdup(pmu->name);
- list_add_tail(&evsel->core.node, &head);
- }
- }
-
- evlist__splice_list_tail(evlist, &head);
-
- return 0;
-
-out_delete_partial_list:
- {
- struct evsel *evsel, *n;
-
- __evlist__for_each_entry_safe(&head, n, evsel)
- evsel__delete(evsel);
- }
- return -1;
-}
-
-int arch_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs,
- size_t nr_attrs)
-{
- if (!nr_attrs)
- return 0;
-
- return ___evlist__add_default_attrs(evlist, attrs, nr_attrs);
-}
-
int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
{
if (topdown_sys_has_perf_metrics() &&
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 65388c57bb5d..e7de32a8c10c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1944,130 +1944,25 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
}

/*
- * Add default attributes, if there were no attributes specified or
+ * Add default events, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
*/
-static int add_default_attributes(void)
+static int add_default_events(void)
{
- struct perf_event_attr default_attrs0[] = {
-
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
-
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
-};
- struct perf_event_attr frontend_attrs[] = {
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
-};
- struct perf_event_attr backend_attrs[] = {
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
-};
- struct perf_event_attr default_attrs1[] = {
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
-
-};
-
-/*
- * Detailed stats (-d), covering the L1 and last level data caches:
- */
- struct perf_event_attr detailed_attrs[] = {
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1D << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1D << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_LL << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_LL << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-};
-
-/*
- * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
- */
- struct perf_event_attr very_detailed_attrs[] = {
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1I << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1I << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_DTLB << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_DTLB << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_ITLB << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_ITLB << 0 |
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-
-};
+ const char *pmu = parse_events_option_args.pmu_filter ?: "all";
+ struct parse_events_error err;
+ struct evlist *evlist = evlist__new();
+ struct evsel *evsel;
+ int ret = 0;

-/*
- * Very, very detailed stats (-d -d -d), adding prefetch events:
- */
- struct perf_event_attr very_very_detailed_attrs[] = {
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1D << 0 |
- (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
-
- { .type = PERF_TYPE_HW_CACHE,
- .config =
- PERF_COUNT_HW_CACHE_L1D << 0 |
- (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
-};
+ if (!evlist)
+ return -ENOMEM;

- struct perf_event_attr default_null_attrs[] = {};
- const char *pmu = parse_events_option_args.pmu_filter ?: "all";
+ parse_events_error__init(&err);

/* Set attrs if no event is selected and !null_run: */
if (stat_config.null_run)
- return 0;
+ goto out;

if (transaction_run) {
/* Handle -T as -M transaction. Once platform specific metrics
@@ -2077,15 +1972,17 @@ static int add_default_attributes(void)
*/
if (!metricgroup__has_metric(pmu, "transaction")) {
pr_err("Missing transaction metrics\n");
- return -1;
+ ret = -1;
+ goto out;
}
- return metricgroup__parse_groups(evsel_list, pmu, "transaction",
+ ret = metricgroup__parse_groups(evlist, pmu, "transaction",
stat_config.metric_no_group,
stat_config.metric_no_merge,
stat_config.metric_no_threshold,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
&stat_config.metric_events);
+ goto out;
}

if (smi_cost) {
@@ -2093,32 +1990,36 @@ static int add_default_attributes(void)

if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
pr_err("freeze_on_smi is not supported.\n");
- return -1;
+ ret = -1;
+ goto out;
}

if (!smi) {
if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
- fprintf(stderr, "Failed to set freeze_on_smi.\n");
- return -1;
+ pr_err("Failed to set freeze_on_smi.\n");
+ ret = -1;
+ goto out;
}
smi_reset = true;
}

if (!metricgroup__has_metric(pmu, "smi")) {
pr_err("Missing smi metrics\n");
- return -1;
+ ret = -1;
+ goto out;
}

if (!force_metric_only)
stat_config.metric_only = true;

- return metricgroup__parse_groups(evsel_list, pmu, "smi",
+ ret = metricgroup__parse_groups(evlist, pmu, "smi",
stat_config.metric_no_group,
stat_config.metric_no_merge,
stat_config.metric_no_threshold,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
&stat_config.metric_events);
+ goto out;
}

if (topdown_run) {
@@ -2131,105 +2032,137 @@ static int add_default_attributes(void)
if (!max_level) {
pr_err("Topdown requested but the topdown metric groups aren't present.\n"
"(See perf list the metric groups have names like TopdownL1)\n");
- return -1;
+ ret = -1;
+ goto out;
}
if (stat_config.topdown_level > max_level) {
pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
- return -1;
- } else if (!stat_config.topdown_level)
+ ret = -1;
+ goto out;
+ } else if (!stat_config.topdown_level) {
stat_config.topdown_level = 1;
-
+ }
if (!stat_config.interval && !stat_config.metric_only) {
fprintf(stat_config.output,
"Topdown accuracy may decrease when measuring long periods.\n"
"Please print the result regularly, e.g. -I1000\n");
}
str[8] = stat_config.topdown_level + '0';
- if (metricgroup__parse_groups(evsel_list,
+ if (metricgroup__parse_groups(evlist,
pmu, str,
/*metric_no_group=*/false,
/*metric_no_merge=*/false,
/*metric_no_threshold=*/true,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
- &stat_config.metric_events) < 0)
- return -1;
+ &stat_config.metric_events) < 0) {
+ ret = -1;
+ goto out;
+ }
}

if (!stat_config.topdown_level)
stat_config.topdown_level = 1;

- if (!evsel_list->core.nr_entries) {
+ if (!evlist->core.nr_entries) {
/* No events so add defaults. */
if (target__has_cpu(&target))
- default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
+ ret = parse_events(evlist, "cpu-clock", &err);
+ else
+ ret = parse_events(evlist, "task-clock", &err);
+ if (ret)
+ goto out;
+
+ ret = parse_events(evlist,
+ "context-switches,"
+ "cpu-migrations,"
+ "page-faults,"
+ "instructions,"
+ "cycles,"
+ "stalled-cycles-frontend,"
+ "stalled-cycles-backend,"
+ "branches,"
+ "branch-misses",
+ &err);
+ if (ret)
+ goto out;

- if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
- return -1;
- if (perf_pmus__have_event("cpu", "stalled-cycles-frontend")) {
- if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
- return -1;
- }
- if (perf_pmus__have_event("cpu", "stalled-cycles-backend")) {
- if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
- return -1;
- }
- if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
- return -1;
/*
* Add TopdownL1 metrics if they exist. To minimize
* multiplexing, don't request threshold computation.
*/
if (metricgroup__has_metric(pmu, "Default")) {
struct evlist *metric_evlist = evlist__new();
- struct evsel *metric_evsel;
-
- if (!metric_evlist)
- return -1;

+ if (!metric_evlist) {
+ ret = -ENOMEM;
+ goto out;
+ }
if (metricgroup__parse_groups(metric_evlist, pmu, "Default",
/*metric_no_group=*/false,
/*metric_no_merge=*/false,
/*metric_no_threshold=*/true,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
- &stat_config.metric_events) < 0)
- return -1;
-
- evlist__for_each_entry(metric_evlist, metric_evsel) {
- metric_evsel->skippable = true;
- metric_evsel->default_metricgroup = true;
+ &stat_config.metric_events) < 0) {
+ ret = -1;
+ goto out;
}
- evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries);
+
+ evlist__for_each_entry(metric_evlist, evsel)
+ evsel->default_metricgroup = true;
+
+ evlist__splice_list_tail(evlist, &metric_evlist->core.entries);
evlist__delete(metric_evlist);
}
-
- /* Platform specific attrs */
- if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0)
- return -1;
}

/* Detailed events get appended to the event list: */

- if (detailed_run < 1)
- return 0;
-
- /* Append detailed run extra attributes: */
- if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
- return -1;
-
- if (detailed_run < 2)
- return 0;
-
- /* Append very detailed run extra attributes: */
- if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
- return -1;
-
- if (detailed_run < 3)
- return 0;
-
- /* Append very, very detailed run extra attributes: */
- return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
+ if (!ret && detailed_run >= 1) {
+ /*
+ * Detailed stats (-d), covering the L1 and last level data
+ * caches:
+ */
+ ret = parse_events(evlist,
+ "L1-dcache-loads,"
+ "L1-dcache-load-misses,"
+ "LLC-loads,"
+ "LLC-load-misses",
+ &err);
+ }
+ if (!ret && detailed_run >= 2) {
+ /*
+ * Very detailed stats (-d -d), covering the instruction cache
+ * and the TLB caches:
+ */
+ ret = parse_events(evlist,
+ "L1-icache-loads,"
+ "L1-icache-load-misses,"
+ "dTLB-loads,"
+ "dTLB-load-misses,"
+ "iTLB-loads,"
+ "iTLB-load-misses",
+ &err);
+ }
+ if (!ret && detailed_run >= 3) {
+ /*
+ * Very, very detailed stats (-d -d -d), adding prefetch events:
+ */
+ ret = parse_events(evlist,
+ "L1-dcache-prefetches,"
+ "L1-dcache-prefetch-misses",
+ &err);
+ }
+out:
+ if (!ret) {
+ evlist__for_each_entry(evlist, evsel)
+ evsel->skippable = true;
+ }
+ parse_events_error__exit(&err);
+ evlist__splice_list_tail(evsel_list, &evlist->core.entries);
+ evlist__delete(evlist);
+ return ret;
}

static const char * const stat_record_usage[] = {
@@ -2736,7 +2669,7 @@ int cmd_stat(int argc, const char **argv)
}
}

- if (add_default_attributes())
+ if (add_default_events())
goto out;

if (stat_config.cgroup_list) {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 55a300a0977b..de498ba5ac1c 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -314,49 +314,6 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
}
#endif

-int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
-{
- struct evsel *evsel, *n;
- LIST_HEAD(head);
- size_t i;
-
- for (i = 0; i < nr_attrs; i++) {
- evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
- if (evsel == NULL)
- goto out_delete_partial_list;
- list_add_tail(&evsel->core.node, &head);
- }
-
- evlist__splice_list_tail(evlist, &head);
-
- return 0;
-
-out_delete_partial_list:
- __evlist__for_each_entry_safe(&head, n, evsel)
- evsel__delete(evsel);
- return -1;
-}
-
-int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
-{
- size_t i;
-
- for (i = 0; i < nr_attrs; i++)
- event_attr_init(attrs + i);
-
- return evlist__add_attrs(evlist, attrs, nr_attrs);
-}
-
-__weak int arch_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs,
- size_t nr_attrs)
-{
- if (!nr_attrs)
- return 0;
-
- return __evlist__add_default_attrs(evlist, attrs, nr_attrs);
-}
-
struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
{
struct evsel *evsel;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index cb91dc9117a2..947a78cbd7f0 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -100,18 +100,6 @@ void evlist__delete(struct evlist *evlist);
void evlist__add(struct evlist *evlist, struct evsel *entry);
void evlist__remove(struct evlist *evlist, struct evsel *evsel);

-int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs);
-
-int __evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs, size_t nr_attrs);
-
-int arch_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs,
- size_t nr_attrs);
-
-#define evlist__add_default_attrs(evlist, array) \
- arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
-
int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs);

int evlist__add_dummy(struct evlist *evlist);
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bfc1d705f437..6631d03ad799 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -663,6 +663,9 @@ static void print_counter_value_std(struct perf_stat_config *config,
const char *fmt;
const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;

+ if (!evsel->supported && evsel->skippable)
+ return;
+
if (config->big_num)
fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f ";
else
--
2.44.0.478.gd926399ef9-goog

2024-04-09 15:22:48

by Ian Rogers

[permalink] [raw]

Subject: Re: [PATCH v1] perf stat: Remove evlist__add_default_attrs use strings

On Tue, Apr 9, 2024 at 7:41 AM Liang, Kan <[email protected]> wrote:
>
>
>
> On 2024-04-06 3:38 a.m., Ian Rogers wrote:
> > add_default_atttributes would add evsels by having pre-created
> > perf_event_attr, however, this needed fixing for hybrid as the
> > extended PMU type was necessary for each core PMU. The logic for this
> > was in an arch specific x86 function and wasn't present for ARM,
> > meaning that default events weren't being opened on all PMUs on
> > ARM. Change the creation of the default events to use parse_events and
> > strings as that will open the events on all PMUs.
> >
> > Rather than try to detect events on PMUs before parsing, parse the
> > event but skip its output in stat-display.
>
> But the current skip could leave unnecessary empty lines. It's better to
> avoid the empty lines as well.
>
> $perf stat sleep 1
>
> Performance counter stats for 'sleep 1':
>
> 0.91 msec task-clock # 0.001 CPUs
> utilized
> 1 context-switches # 1.096 K/sec
> 0 cpu-migrations # 0.000 /sec
> 68 page-faults # 74.534 K/sec
> 1,121,410 instructions # 1.04 insn
> per cycle
> 1,081,095 cycles # 1.185 GHz
>
>
> 251,649 branches # 275.829 M/sec
> 7,252 branch-misses # 2.88% of
> all branches

Agreed. Will fix for v2.

>
> >
> > Closes: https://lore.kernel.org/lkml/CAP-5=fVABSBZnsmtRn1uF-k-G1GWM-L5SgiinhPTfHbQsKXb_g@mail.gmail.com/
> > Signed-off-by: Ian Rogers <[email protected]>
> > ---
> > tools/perf/arch/x86/util/evlist.c | 74 +-------
> > tools/perf/builtin-stat.c | 291 ++++++++++++------------------
> > tools/perf/util/evlist.c | 43 -----
> > tools/perf/util/evlist.h | 12 --
> > tools/perf/util/stat-display.c | 3 +
> > 5 files changed, 118 insertions(+), 305 deletions(-)
> >
> > diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
> > index b1ce0c52d88d..fb8e314aa364 100644
> > --- a/tools/perf/arch/x86/util/evlist.c
> > +++ b/tools/perf/arch/x86/util/evlist.c
> > @@ -1,78 +1,10 @@
> > // SPDX-License-Identifier: GPL-2.0
> > -#include <stdio.h>
> > -#include "util/pmu.h"
> > -#include "util/pmus.h"
> > -#include "util/evlist.h"
> > -#include "util/parse-events.h"
> > -#include "util/event.h"
> > +#include <string.h>
> > +#include "../../../util/evlist.h"
> > +#include "../../../util/evsel.h"
> > #include "topdown.h"
> > #include "evsel.h"
> >
> > -static int ___evlist__add_default_attrs(struct evlist *evlist,
> > - struct perf_event_attr *attrs,
> > - size_t nr_attrs)
> > -{
> > - LIST_HEAD(head);
> > - size_t i = 0;
> > -
> > - for (i = 0; i < nr_attrs; i++)
> > - event_attr_init(attrs + i);
> > -
> > - if (perf_pmus__num_core_pmus() == 1)
> > - return evlist__add_attrs(evlist, attrs, nr_attrs);
> > -
> > - for (i = 0; i < nr_attrs; i++) {
> > - struct perf_pmu *pmu = NULL;
> > -
> > - if (attrs[i].type == PERF_TYPE_SOFTWARE) {
> > - struct evsel *evsel = evsel__new(attrs + i);
> > -
> > - if (evsel == NULL)
> > - goto out_delete_partial_list;
> > - list_add_tail(&evsel->core.node, &head);
> > - continue;
> > - }
> > -
> > - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> > - struct perf_cpu_map *cpus;
> > - struct evsel *evsel;
> > -
> > - evsel = evsel__new(attrs + i);
> > - if (evsel == NULL)
> > - goto out_delete_partial_list;
> > - evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
> > - cpus = perf_cpu_map__get(pmu->cpus);
> > - evsel->core.cpus = cpus;
> > - evsel->core.own_cpus = perf_cpu_map__get(cpus);
> > - evsel->pmu_name = strdup(pmu->name);
> > - list_add_tail(&evsel->core.node, &head);
> > - }
> > - }
> > -
> > - evlist__splice_list_tail(evlist, &head);
> > -
> > - return 0;
> > -
> > -out_delete_partial_list:
> > - {
> > - struct evsel *evsel, *n;
> > -
> > - __evlist__for_each_entry_safe(&head, n, evsel)
> > - evsel__delete(evsel);
> > - }
> > - return -1;
> > -}
> > -
> > -int arch_evlist__add_default_attrs(struct evlist *evlist,
> > - struct perf_event_attr *attrs,
> > - size_t nr_attrs)
> > -{
> > - if (!nr_attrs)
> > - return 0;
> > -
> > - return ___evlist__add_default_attrs(evlist, attrs, nr_attrs);
> > -}
> > -
> > int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
> > {
> > if (topdown_sys_has_perf_metrics() &&
> > diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> > index 65388c57bb5d..e7de32a8c10c 100644
> > --- a/tools/perf/builtin-stat.c
> > +++ b/tools/perf/builtin-stat.c
> > @@ -1944,130 +1944,25 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
> > }
> >
> > /*
> > - * Add default attributes, if there were no attributes specified or
> > + * Add default events, if there were no attributes specified or
> > * if -d/--detailed, -d -d or -d -d -d is used:
> > */
> > -static int add_default_attributes(void)
> > +static int add_default_events(void)
> > {
> > - struct perf_event_attr default_attrs0[] = {
> > -
> > - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
> > - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
> > - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
> > - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
> > -
> > - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
> > -};
> > - struct perf_event_attr frontend_attrs[] = {
> > - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
> > -};
> > - struct perf_event_attr backend_attrs[] = {
> > - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
> > -};
> > - struct perf_event_attr default_attrs1[] = {
> > - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
> > - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
> > - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
> > -
> > -};
> > -
> > -/*
> > - * Detailed stats (-d), covering the L1 and last level data caches:
> > - */
> > - struct perf_event_attr detailed_attrs[] = {
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_L1D << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_L1D << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_LL << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_LL << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> > -};
> > -
> > -/*
> > - * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
> > - */
> > - struct perf_event_attr very_detailed_attrs[] = {
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_L1I << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_L1I << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_DTLB << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_DTLB << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_ITLB << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_ITLB << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> > -
> > -};
> > + const char *pmu = parse_events_option_args.pmu_filter ?: "all";
> > + struct parse_events_error err;
> > + struct evlist *evlist = evlist__new();
> > + struct evsel *evsel;
> > + int ret = 0;
> >
> > -/*
> > - * Very, very detailed stats (-d -d -d), adding prefetch events:
> > - */
> > - struct perf_event_attr very_very_detailed_attrs[] = {
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_L1D << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> > -
> > - { .type = PERF_TYPE_HW_CACHE,
> > - .config =
> > - PERF_COUNT_HW_CACHE_L1D << 0 |
> > - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
> > - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> > -};
> > + if (!evlist)
> > + return -ENOMEM;
> >
> > - struct perf_event_attr default_null_attrs[] = {};
> > - const char *pmu = parse_events_option_args.pmu_filter ?: "all";
> > + parse_events_error__init(&err);
> >
> > /* Set attrs if no event is selected and !null_run: */
> > if (stat_config.null_run)
> > - return 0;
> > + goto out;
> >
> > if (transaction_run) {
> > /* Handle -T as -M transaction. Once platform specific metrics
> > @@ -2077,15 +1972,17 @@ static int add_default_attributes(void)
> > */
> > if (!metricgroup__has_metric(pmu, "transaction")) {
> > pr_err("Missing transaction metrics\n");
> > - return -1;
> > + ret = -1;
> > + goto out;
> > }
> > - return metricgroup__parse_groups(evsel_list, pmu, "transaction",
> > + ret = metricgroup__parse_groups(evlist, pmu, "transaction",
> > stat_config.metric_no_group,
> > stat_config.metric_no_merge,
> > stat_config.metric_no_threshold,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > &stat_config.metric_events);
> > + goto out;
> > }
> >
> > if (smi_cost) {
> > @@ -2093,32 +1990,36 @@ static int add_default_attributes(void)
> >
> > if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
> > pr_err("freeze_on_smi is not supported.\n");
> > - return -1;
> > + ret = -1;
> > + goto out;
> > }
> >
> > if (!smi) {
> > if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
> > - fprintf(stderr, "Failed to set freeze_on_smi.\n");
> > - return -1;
> > + pr_err("Failed to set freeze_on_smi.\n");
> > + ret = -1;
> > + goto out;
> > }
> > smi_reset = true;
> > }
> >
> > if (!metricgroup__has_metric(pmu, "smi")) {
> > pr_err("Missing smi metrics\n");
> > - return -1;
> > + ret = -1;
> > + goto out;
> > }
> >
> > if (!force_metric_only)
> > stat_config.metric_only = true;
> >
> > - return metricgroup__parse_groups(evsel_list, pmu, "smi",
> > + ret = metricgroup__parse_groups(evlist, pmu, "smi",
> > stat_config.metric_no_group,
> > stat_config.metric_no_merge,
> > stat_config.metric_no_threshold,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > &stat_config.metric_events);
> > + goto out;
> > }
> >
> > if (topdown_run) {
> > @@ -2131,105 +2032,137 @@ static int add_default_attributes(void)
> > if (!max_level) {
> > pr_err("Topdown requested but the topdown metric groups aren't present.\n"
> > "(See perf list the metric groups have names like TopdownL1)\n");
> > - return -1;
> > + ret = -1;
> > + goto out;
> > }
> > if (stat_config.topdown_level > max_level) {
> > pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
> > - return -1;
> > - } else if (!stat_config.topdown_level)
> > + ret = -1;
> > + goto out;
> > + } else if (!stat_config.topdown_level) {
> > stat_config.topdown_level = 1;
> > -
> > + }
> > if (!stat_config.interval && !stat_config.metric_only) {
> > fprintf(stat_config.output,
> > "Topdown accuracy may decrease when measuring long periods.\n"
> > "Please print the result regularly, e.g. -I1000\n");
> > }
> > str[8] = stat_config.topdown_level + '0';
> > - if (metricgroup__parse_groups(evsel_list,
> > + if (metricgroup__parse_groups(evlist,
> > pmu, str,
> > /*metric_no_group=*/false,
> > /*metric_no_merge=*/false,
> > /*metric_no_threshold=*/true,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > - &stat_config.metric_events) < 0)
> > - return -1;
> > + &stat_config.metric_events) < 0) {
> > + ret = -1;
> > + goto out;
> > + }
> > }
> >
> > if (!stat_config.topdown_level)
> > stat_config.topdown_level = 1;
> >
> > - if (!evsel_list->core.nr_entries) {
> > + if (!evlist->core.nr_entries) {
> > /* No events so add defaults. */
> > if (target__has_cpu(&target))
> > - default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
> > + ret = parse_events(evlist, "cpu-clock", &err);
> > + else
> > + ret = parse_events(evlist, "task-clock", &err);
> > + if (ret)
> > + goto out;
> > +
> > + ret = parse_events(evlist,
> > + "context-switches,"
> > + "cpu-migrations,"
> > + "page-faults,"
> > + "instructions,"
> > + "cycles,"
>
> "cycles",
> "instructions",
>
> It's better to keep the original order.

So the original order was:
"cycles,"
"stalled-cycles-frontend,"
"stalled-cycles-backend,"
"instructions"

but many/most/all core PMUs don't provide the stalled-* events. At the
programmer level instructions is the most fundamental thing, so having
it last felt wrong hence moving it to be the first after the software
events. My thought was, if we're going to reorder things then let's
not do a half measure like:
"cycles,"
"instructions,"
"stalled-cycles-frontend,"
"stalled-cycles-backend"

let's just put things into their best order. It is obviously easy to
change but having this way wasn't an accident. There's obviously
subjectivity about whether cycles is more fundamental than
instructions, my thought is that you get taught instructions first and
that these take some number of cycles to execute, hence thinking
instructions should have some priority in the output over cycles -
some people may not even know what cycles means, it is hard enough
when you do given the variety of different clocks :-)

Thanks,
Ian

> Thanks,
> Kan
>
> > + "stalled-cycles-frontend,"
> > + "stalled-cycles-backend,"
> > + "branches,"
> > + "branch-misses",
> > + &err);
> > + if (ret)
> > + goto out;
> >
> > - if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
> > - return -1;
> > - if (perf_pmus__have_event("cpu", "stalled-cycles-frontend")) {
> > - if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
> > - return -1;
> > - }
> > - if (perf_pmus__have_event("cpu", "stalled-cycles-backend")) {
> > - if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
> > - return -1;
> > - }
> > - if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
> > - return -1;
> > /*
> > * Add TopdownL1 metrics if they exist. To minimize
> > * multiplexing, don't request threshold computation.
> > */
> > if (metricgroup__has_metric(pmu, "Default")) {
> > struct evlist *metric_evlist = evlist__new();
> > - struct evsel *metric_evsel;
> > -
> > - if (!metric_evlist)
> > - return -1;
> >
> > + if (!metric_evlist) {
> > + ret = -ENOMEM;
> > + goto out;
> > + }
> > if (metricgroup__parse_groups(metric_evlist, pmu, "Default",
> > /*metric_no_group=*/false,
> > /*metric_no_merge=*/false,
> > /*metric_no_threshold=*/true,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > - &stat_config.metric_events) < 0)
> > - return -1;
> > -
> > - evlist__for_each_entry(metric_evlist, metric_evsel) {
> > - metric_evsel->skippable = true;
> > - metric_evsel->default_metricgroup = true;
> > + &stat_config.metric_events) < 0) {
> > + ret = -1;
> > + goto out;
> > }
> > - evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries);
> > +
> > + evlist__for_each_entry(metric_evlist, evsel)
> > + evsel->default_metricgroup = true;
> > +
> > + evlist__splice_list_tail(evlist, &metric_evlist->core.entries);
> > evlist__delete(metric_evlist);
> > }
> > -
> > - /* Platform specific attrs */
> > - if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0)
> > - return -1;
> > }
> >
> > /* Detailed events get appended to the event list: */
> >
> > - if (detailed_run < 1)
> > - return 0;
> > -
> > - /* Append detailed run extra attributes: */
> > - if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
> > - return -1;
> > -
> > - if (detailed_run < 2)
> > - return 0;
> > -
> > - /* Append very detailed run extra attributes: */
> > - if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
> > - return -1;
> > -
> > - if (detailed_run < 3)
> > - return 0;
> > -
> > - /* Append very, very detailed run extra attributes: */
> > - return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
> > + if (!ret && detailed_run >= 1) {
> > + /*
> > + * Detailed stats (-d), covering the L1 and last level data
> > + * caches:
> > + */
> > + ret = parse_events(evlist,
> > + "L1-dcache-loads,"
> > + "L1-dcache-load-misses,"
> > + "LLC-loads,"
> > + "LLC-load-misses",
> > + &err);
> > + }
> > + if (!ret && detailed_run >= 2) {
> > + /*
> > + * Very detailed stats (-d -d), covering the instruction cache
> > + * and the TLB caches:
> > + */
> > + ret = parse_events(evlist,
> > + "L1-icache-loads,"
> > + "L1-icache-load-misses,"
> > + "dTLB-loads,"
> > + "dTLB-load-misses,"
> > + "iTLB-loads,"
> > + "iTLB-load-misses",
> > + &err);
> > + }
> > + if (!ret && detailed_run >= 3) {
> > + /*
> > + * Very, very detailed stats (-d -d -d), adding prefetch events:
> > + */
> > + ret = parse_events(evlist,
> > + "L1-dcache-prefetches,"
> > + "L1-dcache-prefetch-misses",
> > + &err);
> > + }
> > +out:
> > + if (!ret) {
> > + evlist__for_each_entry(evlist, evsel)
> > + evsel->skippable = true;
> > + }
> > + parse_events_error__exit(&err);
> > + evlist__splice_list_tail(evsel_list, &evlist->core.entries);
> > + evlist__delete(evlist);
> > + return ret;
> > }
> >
> > static const char * const stat_record_usage[] = {
> > @@ -2736,7 +2669,7 @@ int cmd_stat(int argc, const char **argv)
> > }
> > }
> >
> > - if (add_default_attributes())
> > + if (add_default_events())
> > goto out;
> >
> > if (stat_config.cgroup_list) {
> > diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> > index 55a300a0977b..de498ba5ac1c 100644
> > --- a/tools/perf/util/evlist.c
> > +++ b/tools/perf/util/evlist.c
> > @@ -314,49 +314,6 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
> > }
> > #endif
> >
> > -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
> > -{
> > - struct evsel *evsel, *n;
> > - LIST_HEAD(head);
> > - size_t i;
> > -
> > - for (i = 0; i < nr_attrs; i++) {
> > - evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
> > - if (evsel == NULL)
> > - goto out_delete_partial_list;
> > - list_add_tail(&evsel->core.node, &head);
> > - }
> > -
> > - evlist__splice_list_tail(evlist, &head);
> > -
> > - return 0;
> > -
> > -out_delete_partial_list:
> > - __evlist__for_each_entry_safe(&head, n, evsel)
> > - evsel__delete(evsel);
> > - return -1;
> > -}
> > -
> > -int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
> > -{
> > - size_t i;
> > -
> > - for (i = 0; i < nr_attrs; i++)
> > - event_attr_init(attrs + i);
> > -
> > - return evlist__add_attrs(evlist, attrs, nr_attrs);
> > -}
> > -
> > -__weak int arch_evlist__add_default_attrs(struct evlist *evlist,
> > - struct perf_event_attr *attrs,
> > - size_t nr_attrs)
> > -{
> > - if (!nr_attrs)
> > - return 0;
> > -
> > - return __evlist__add_default_attrs(evlist, attrs, nr_attrs);
> > -}
> > -
> > struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
> > {
> > struct evsel *evsel;
> > diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> > index cb91dc9117a2..947a78cbd7f0 100644
> > --- a/tools/perf/util/evlist.h
> > +++ b/tools/perf/util/evlist.h
> > @@ -100,18 +100,6 @@ void evlist__delete(struct evlist *evlist);
> > void evlist__add(struct evlist *evlist, struct evsel *entry);
> > void evlist__remove(struct evlist *evlist, struct evsel *evsel);
> >
> > -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs);
> > -
> > -int __evlist__add_default_attrs(struct evlist *evlist,
> > - struct perf_event_attr *attrs, size_t nr_attrs);
> > -
> > -int arch_evlist__add_default_attrs(struct evlist *evlist,
> > - struct perf_event_attr *attrs,
> > - size_t nr_attrs);
> > -
> > -#define evlist__add_default_attrs(evlist, array) \
> > - arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
> > -
> > int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs);
> >
> > int evlist__add_dummy(struct evlist *evlist);
> > diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> > index bfc1d705f437..6631d03ad799 100644
> > --- a/tools/perf/util/stat-display.c
> > +++ b/tools/perf/util/stat-display.c
> > @@ -663,6 +663,9 @@ static void print_counter_value_std(struct perf_stat_config *config,
> > const char *fmt;
> > const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
> >
> > + if (!evsel->supported && evsel->skippable)
> > + return;
> > +
> > if (config->big_num)
> > fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f ";
> > else

2024-04-09 15:49:22

by Liang, Kan

[permalink] [raw]

Subject: Re: [PATCH v1] perf stat: Remove evlist__add_default_attrs use strings

On 2024-04-06 3:38 a.m., Ian Rogers wrote:
> add_default_atttributes would add evsels by having pre-created
> perf_event_attr, however, this needed fixing for hybrid as the
> extended PMU type was necessary for each core PMU. The logic for this
> was in an arch specific x86 function and wasn't present for ARM,
> meaning that default events weren't being opened on all PMUs on
> ARM. Change the creation of the default events to use parse_events and
> strings as that will open the events on all PMUs.
>
> Rather than try to detect events on PMUs before parsing, parse the
> event but skip its output in stat-display.

But the current skip could leave unnecessary empty lines. It's better to
avoid the empty lines as well.

$perf stat sleep 1

Performance counter stats for 'sleep 1':

0.91 msec task-clock # 0.001 CPUs
utilized
1 context-switches # 1.096 K/sec
0 cpu-migrations # 0.000 /sec
68 page-faults # 74.534 K/sec
1,121,410 instructions # 1.04 insn
per cycle
1,081,095 cycles # 1.185 GHz

251,649 branches # 275.829 M/sec
7,252 branch-misses # 2.88% of
all branches

>
> Closes: https://lore.kernel.org/lkml/CAP-5=fVABSBZnsmtRn1uF-k-G1GWM-L5SgiinhPTfHbQsKXb_g@mail.gmail.com/
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/perf/arch/x86/util/evlist.c | 74 +-------
> tools/perf/builtin-stat.c | 291 ++++++++++++------------------
> tools/perf/util/evlist.c | 43 -----
> tools/perf/util/evlist.h | 12 --
> tools/perf/util/stat-display.c | 3 +
> 5 files changed, 118 insertions(+), 305 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
> index b1ce0c52d88d..fb8e314aa364 100644
> --- a/tools/perf/arch/x86/util/evlist.c
> +++ b/tools/perf/arch/x86/util/evlist.c
> @@ -1,78 +1,10 @@
> // SPDX-License-Identifier: GPL-2.0
> -#include <stdio.h>
> -#include "util/pmu.h"
> -#include "util/pmus.h"
> -#include "util/evlist.h"
> -#include "util/parse-events.h"
> -#include "util/event.h"
> +#include <string.h>
> +#include "../../../util/evlist.h"
> +#include "../../../util/evsel.h"
> #include "topdown.h"
> #include "evsel.h"
>
> -static int ___evlist__add_default_attrs(struct evlist *evlist,
> - struct perf_event_attr *attrs,
> - size_t nr_attrs)
> -{
> - LIST_HEAD(head);
> - size_t i = 0;
> -
> - for (i = 0; i < nr_attrs; i++)
> - event_attr_init(attrs + i);
> -
> - if (perf_pmus__num_core_pmus() == 1)
> - return evlist__add_attrs(evlist, attrs, nr_attrs);
> -
> - for (i = 0; i < nr_attrs; i++) {
> - struct perf_pmu *pmu = NULL;
> -
> - if (attrs[i].type == PERF_TYPE_SOFTWARE) {
> - struct evsel *evsel = evsel__new(attrs + i);
> -
> - if (evsel == NULL)
> - goto out_delete_partial_list;
> - list_add_tail(&evsel->core.node, &head);
> - continue;
> - }
> -
> - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> - struct perf_cpu_map *cpus;
> - struct evsel *evsel;
> -
> - evsel = evsel__new(attrs + i);
> - if (evsel == NULL)
> - goto out_delete_partial_list;
> - evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
> - cpus = perf_cpu_map__get(pmu->cpus);
> - evsel->core.cpus = cpus;
> - evsel->core.own_cpus = perf_cpu_map__get(cpus);
> - evsel->pmu_name = strdup(pmu->name);
> - list_add_tail(&evsel->core.node, &head);
> - }
> - }
> -
> - evlist__splice_list_tail(evlist, &head);
> -
> - return 0;
> -
> -out_delete_partial_list:
> - {
> - struct evsel *evsel, *n;
> -
> - __evlist__for_each_entry_safe(&head, n, evsel)
> - evsel__delete(evsel);
> - }
> - return -1;
> -}
> -
> -int arch_evlist__add_default_attrs(struct evlist *evlist,
> - struct perf_event_attr *attrs,
> - size_t nr_attrs)
> -{
> - if (!nr_attrs)
> - return 0;
> -
> - return ___evlist__add_default_attrs(evlist, attrs, nr_attrs);
> -}
> -
> int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
> {
> if (topdown_sys_has_perf_metrics() &&
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 65388c57bb5d..e7de32a8c10c 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -1944,130 +1944,25 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
> }
>
> /*
> - * Add default attributes, if there were no attributes specified or
> + * Add default events, if there were no attributes specified or
> * if -d/--detailed, -d -d or -d -d -d is used:
> */
> -static int add_default_attributes(void)
> +static int add_default_events(void)
> {
> - struct perf_event_attr default_attrs0[] = {
> -
> - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
> - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
> - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
> - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
> -
> - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
> -};
> - struct perf_event_attr frontend_attrs[] = {
> - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
> -};
> - struct perf_event_attr backend_attrs[] = {
> - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
> -};
> - struct perf_event_attr default_attrs1[] = {
> - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
> - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
> - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
> -
> -};
> -
> -/*
> - * Detailed stats (-d), covering the L1 and last level data caches:
> - */
> - struct perf_event_attr detailed_attrs[] = {
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_L1D << 0 |
> - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_L1D << 0 |
> - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_LL << 0 |
> - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_LL << 0 |
> - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> -};
> -
> -/*
> - * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
> - */
> - struct perf_event_attr very_detailed_attrs[] = {
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_L1I << 0 |
> - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_L1I << 0 |
> - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_DTLB << 0 |
> - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_DTLB << 0 |
> - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_ITLB << 0 |
> - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_ITLB << 0 |
> - (PERF_COUNT_HW_CACHE_OP_READ << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> -
> -};
> + const char *pmu = parse_events_option_args.pmu_filter ?: "all";
> + struct parse_events_error err;
> + struct evlist *evlist = evlist__new();
> + struct evsel *evsel;
> + int ret = 0;
>
> -/*
> - * Very, very detailed stats (-d -d -d), adding prefetch events:
> - */
> - struct perf_event_attr very_very_detailed_attrs[] = {
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_L1D << 0 |
> - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
> -
> - { .type = PERF_TYPE_HW_CACHE,
> - .config =
> - PERF_COUNT_HW_CACHE_L1D << 0 |
> - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
> - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
> -};
> + if (!evlist)
> + return -ENOMEM;
>
> - struct perf_event_attr default_null_attrs[] = {};
> - const char *pmu = parse_events_option_args.pmu_filter ?: "all";
> + parse_events_error__init(&err);
>
> /* Set attrs if no event is selected and !null_run: */
> if (stat_config.null_run)
> - return 0;
> + goto out;
>
> if (transaction_run) {
> /* Handle -T as -M transaction. Once platform specific metrics
> @@ -2077,15 +1972,17 @@ static int add_default_attributes(void)
> */
> if (!metricgroup__has_metric(pmu, "transaction")) {
> pr_err("Missing transaction metrics\n");
> - return -1;
> + ret = -1;
> + goto out;
> }
> - return metricgroup__parse_groups(evsel_list, pmu, "transaction",
> + ret = metricgroup__parse_groups(evlist, pmu, "transaction",
> stat_config.metric_no_group,
> stat_config.metric_no_merge,
> stat_config.metric_no_threshold,
> stat_config.user_requested_cpu_list,
> stat_config.system_wide,
> &stat_config.metric_events);
> + goto out;
> }
>
> if (smi_cost) {
> @@ -2093,32 +1990,36 @@ static int add_default_attributes(void)
>
> if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
> pr_err("freeze_on_smi is not supported.\n");
> - return -1;
> + ret = -1;
> + goto out;
> }
>
> if (!smi) {
> if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
> - fprintf(stderr, "Failed to set freeze_on_smi.\n");
> - return -1;
> + pr_err("Failed to set freeze_on_smi.\n");
> + ret = -1;
> + goto out;
> }
> smi_reset = true;
> }
>
> if (!metricgroup__has_metric(pmu, "smi")) {
> pr_err("Missing smi metrics\n");
> - return -1;
> + ret = -1;
> + goto out;
> }
>
> if (!force_metric_only)
> stat_config.metric_only = true;
>
> - return metricgroup__parse_groups(evsel_list, pmu, "smi",
> + ret = metricgroup__parse_groups(evlist, pmu, "smi",
> stat_config.metric_no_group,
> stat_config.metric_no_merge,
> stat_config.metric_no_threshold,
> stat_config.user_requested_cpu_list,
> stat_config.system_wide,
> &stat_config.metric_events);
> + goto out;
> }
>
> if (topdown_run) {
> @@ -2131,105 +2032,137 @@ static int add_default_attributes(void)
> if (!max_level) {
> pr_err("Topdown requested but the topdown metric groups aren't present.\n"
> "(See perf list the metric groups have names like TopdownL1)\n");
> - return -1;
> + ret = -1;
> + goto out;
> }
> if (stat_config.topdown_level > max_level) {
> pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
> - return -1;
> - } else if (!stat_config.topdown_level)
> + ret = -1;
> + goto out;
> + } else if (!stat_config.topdown_level) {
> stat_config.topdown_level = 1;
> -
> + }
> if (!stat_config.interval && !stat_config.metric_only) {
> fprintf(stat_config.output,
> "Topdown accuracy may decrease when measuring long periods.\n"
> "Please print the result regularly, e.g. -I1000\n");
> }
> str[8] = stat_config.topdown_level + '0';
> - if (metricgroup__parse_groups(evsel_list,
> + if (metricgroup__parse_groups(evlist,
> pmu, str,
> /*metric_no_group=*/false,
> /*metric_no_merge=*/false,
> /*metric_no_threshold=*/true,
> stat_config.user_requested_cpu_list,
> stat_config.system_wide,
> - &stat_config.metric_events) < 0)
> - return -1;
> + &stat_config.metric_events) < 0) {
> + ret = -1;
> + goto out;
> + }
> }
>
> if (!stat_config.topdown_level)
> stat_config.topdown_level = 1;
>
> - if (!evsel_list->core.nr_entries) {
> + if (!evlist->core.nr_entries) {
> /* No events so add defaults. */
> if (target__has_cpu(&target))
> - default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
> + ret = parse_events(evlist, "cpu-clock", &err);
> + else
> + ret = parse_events(evlist, "task-clock", &err);
> + if (ret)
> + goto out;
> +
> + ret = parse_events(evlist,
> + "context-switches,"
> + "cpu-migrations,"
> + "page-faults,"
> + "instructions,"
> + "cycles,"

"cycles",
"instructions",

It's better to keep the original order.

Thanks,
Kan

> + "stalled-cycles-frontend,"
> + "stalled-cycles-backend,"
> + "branches,"
> + "branch-misses",
> + &err);
> + if (ret)
> + goto out;
>
> - if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
> - return -1;
> - if (perf_pmus__have_event("cpu", "stalled-cycles-frontend")) {
> - if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
> - return -1;
> - }
> - if (perf_pmus__have_event("cpu", "stalled-cycles-backend")) {
> - if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
> - return -1;
> - }
> - if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
> - return -1;
> /*
> * Add TopdownL1 metrics if they exist. To minimize
> * multiplexing, don't request threshold computation.
> */
> if (metricgroup__has_metric(pmu, "Default")) {
> struct evlist *metric_evlist = evlist__new();
> - struct evsel *metric_evsel;
> -
> - if (!metric_evlist)
> - return -1;
>
> + if (!metric_evlist) {
> + ret = -ENOMEM;
> + goto out;
> + }
> if (metricgroup__parse_groups(metric_evlist, pmu, "Default",
> /*metric_no_group=*/false,
> /*metric_no_merge=*/false,
> /*metric_no_threshold=*/true,
> stat_config.user_requested_cpu_list,
> stat_config.system_wide,
> - &stat_config.metric_events) < 0)
> - return -1;
> -
> - evlist__for_each_entry(metric_evlist, metric_evsel) {
> - metric_evsel->skippable = true;
> - metric_evsel->default_metricgroup = true;
> + &stat_config.metric_events) < 0) {
> + ret = -1;
> + goto out;
> }
> - evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries);
> +
> + evlist__for_each_entry(metric_evlist, evsel)
> + evsel->default_metricgroup = true;
> +
> + evlist__splice_list_tail(evlist, &metric_evlist->core.entries);
> evlist__delete(metric_evlist);
> }
> -
> - /* Platform specific attrs */
> - if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0)
> - return -1;
> }
>
> /* Detailed events get appended to the event list: */
>
> - if (detailed_run < 1)
> - return 0;
> -
> - /* Append detailed run extra attributes: */
> - if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
> - return -1;
> -
> - if (detailed_run < 2)
> - return 0;
> -
> - /* Append very detailed run extra attributes: */
> - if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
> - return -1;
> -
> - if (detailed_run < 3)
> - return 0;
> -
> - /* Append very, very detailed run extra attributes: */
> - return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
> + if (!ret && detailed_run >= 1) {
> + /*
> + * Detailed stats (-d), covering the L1 and last level data
> + * caches:
> + */
> + ret = parse_events(evlist,
> + "L1-dcache-loads,"
> + "L1-dcache-load-misses,"
> + "LLC-loads,"
> + "LLC-load-misses",
> + &err);
> + }
> + if (!ret && detailed_run >= 2) {
> + /*
> + * Very detailed stats (-d -d), covering the instruction cache
> + * and the TLB caches:
> + */
> + ret = parse_events(evlist,
> + "L1-icache-loads,"
> + "L1-icache-load-misses,"
> + "dTLB-loads,"
> + "dTLB-load-misses,"
> + "iTLB-loads,"
> + "iTLB-load-misses",
> + &err);
> + }
> + if (!ret && detailed_run >= 3) {
> + /*
> + * Very, very detailed stats (-d -d -d), adding prefetch events:
> + */
> + ret = parse_events(evlist,
> + "L1-dcache-prefetches,"
> + "L1-dcache-prefetch-misses",
> + &err);
> + }
> +out:
> + if (!ret) {
> + evlist__for_each_entry(evlist, evsel)
> + evsel->skippable = true;
> + }
> + parse_events_error__exit(&err);
> + evlist__splice_list_tail(evsel_list, &evlist->core.entries);
> + evlist__delete(evlist);
> + return ret;
> }
>
> static const char * const stat_record_usage[] = {
> @@ -2736,7 +2669,7 @@ int cmd_stat(int argc, const char **argv)
> }
> }
>
> - if (add_default_attributes())
> + if (add_default_events())
> goto out;
>
> if (stat_config.cgroup_list) {
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 55a300a0977b..de498ba5ac1c 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -314,49 +314,6 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
> }
> #endif
>
> -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
> -{
> - struct evsel *evsel, *n;
> - LIST_HEAD(head);
> - size_t i;
> -
> - for (i = 0; i < nr_attrs; i++) {
> - evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
> - if (evsel == NULL)
> - goto out_delete_partial_list;
> - list_add_tail(&evsel->core.node, &head);
> - }
> -
> - evlist__splice_list_tail(evlist, &head);
> -
> - return 0;
> -
> -out_delete_partial_list:
> - __evlist__for_each_entry_safe(&head, n, evsel)
> - evsel__delete(evsel);
> - return -1;
> -}
> -
> -int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
> -{
> - size_t i;
> -
> - for (i = 0; i < nr_attrs; i++)
> - event_attr_init(attrs + i);
> -
> - return evlist__add_attrs(evlist, attrs, nr_attrs);
> -}
> -
> -__weak int arch_evlist__add_default_attrs(struct evlist *evlist,
> - struct perf_event_attr *attrs,
> - size_t nr_attrs)
> -{
> - if (!nr_attrs)
> - return 0;
> -
> - return __evlist__add_default_attrs(evlist, attrs, nr_attrs);
> -}
> -
> struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
> {
> struct evsel *evsel;
> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> index cb91dc9117a2..947a78cbd7f0 100644
> --- a/tools/perf/util/evlist.h
> +++ b/tools/perf/util/evlist.h
> @@ -100,18 +100,6 @@ void evlist__delete(struct evlist *evlist);
> void evlist__add(struct evlist *evlist, struct evsel *entry);
> void evlist__remove(struct evlist *evlist, struct evsel *evsel);
>
> -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs);
> -
> -int __evlist__add_default_attrs(struct evlist *evlist,
> - struct perf_event_attr *attrs, size_t nr_attrs);
> -
> -int arch_evlist__add_default_attrs(struct evlist *evlist,
> - struct perf_event_attr *attrs,
> - size_t nr_attrs);
> -
> -#define evlist__add_default_attrs(evlist, array) \
> - arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
> -
> int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs);
>
> int evlist__add_dummy(struct evlist *evlist);
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index bfc1d705f437..6631d03ad799 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -663,6 +663,9 @@ static void print_counter_value_std(struct perf_stat_config *config,
> const char *fmt;
> const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
>
> + if (!evsel->supported && evsel->skippable)
> + return;
> +
> if (config->big_num)
> fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f ";
> else

2024-04-09 16:00:22

by Liang, Kan

[permalink] [raw]

Subject: Re: [PATCH v1] perf stat: Remove evlist__add_default_attrs use strings

On 2024-04-09 11:20 a.m., Ian Rogers wrote:
>>> + ret = parse_events(evlist,
>>> + "context-switches,"
>>> + "cpu-migrations,"
>>> + "page-faults,"
>>> + "instructions,"
>>> + "cycles,"
>> "cycles",
>> "instructions",
>>
>> It's better to keep the original order.
> So the original order was:
> "cycles,"
> "stalled-cycles-frontend,"
> "stalled-cycles-backend,"
> "instructions"
>

Right. The stalled-* events are added between default_attrs0 and
default_attrs1.

> but many/most/all core PMUs don't provide the stalled-* events. At the
> programmer level instructions is the most fundamental thing, so having
> it last felt wrong hence moving it to be the first after the software
> events. My thought was, if we're going to reorder things then let's
> not do a half measure like:
> "cycles,"
> "instructions,"
> "stalled-cycles-frontend,"
> "stalled-cycles-backend"
>
> let's just put things into their best order. It is obviously easy to
> change but having this way wasn't an accident. There's obviously
> subjectivity about whether cycles is more fundamental than
> instructions, my thought is that you get taught instructions first and
> that these take some number of cycles to execute, hence thinking
> instructions should have some priority in the output over cycles -
> some people may not even know what cycles means, it is hard enough
> when you do given the variety of different clocks ????
>

My concern is that there may be someone who still relies on the std
output of perf stat default. So the output format/order matters for
them. Their scripts probably be broken if the order is changed.

Thanks,
Kan

2024-04-09 16:04:35

by Ian Rogers

[permalink] [raw]

Subject: Re: [PATCH v1] perf stat: Remove evlist__add_default_attrs use strings

On Tue, Apr 9, 2024 at 9:00 AM Liang, Kan <[email protected]> wrote:
>
>
>
> On 2024-04-09 11:20 a.m., Ian Rogers wrote:
> >>> + ret = parse_events(evlist,
> >>> + "context-switches,"
> >>> + "cpu-migrations,"
> >>> + "page-faults,"
> >>> + "instructions,"
> >>> + "cycles,"
> >> "cycles",
> >> "instructions",
> >>
> >> It's better to keep the original order.
> > So the original order was:
> > "cycles,"
> > "stalled-cycles-frontend,"
> > "stalled-cycles-backend,"
> > "instructions"
> >
>
> Right. The stalled-* events are added between default_attrs0 and
> default_attrs1.
>
>
> > but many/most/all core PMUs don't provide the stalled-* events. At the
> > programmer level instructions is the most fundamental thing, so having
> > it last felt wrong hence moving it to be the first after the software
> > events. My thought was, if we're going to reorder things then let's
> > not do a half measure like:
> > "cycles,"
> > "instructions,"
> > "stalled-cycles-frontend,"
> > "stalled-cycles-backend"
> >
> > let's just put things into their best order. It is obviously easy to
> > change but having this way wasn't an accident. There's obviously
> > subjectivity about whether cycles is more fundamental than
> > instructions, my thought is that you get taught instructions first and
> > that these take some number of cycles to execute, hence thinking
> > instructions should have some priority in the output over cycles -
> > some people may not even know what cycles means, it is hard enough
> > when you do given the variety of different clocks ????
> >
>
> My concern is that there may be someone who still relies on the std
> output of perf stat default. So the output format/order matters for
> them. Their scripts probably be broken if the order is changed.

I think making everyone suffer for the case of a tool that may behave
in this way doesn't make sense. The tool should transition to not care
or to say the json output, or at least contribute a test. There is
precedent for this attitude, the default metrics for topdown removed
the event names in perf stat default output - no one screamed, and I
expect that to be the case here.

Thanks,
Ian

> Thanks,
> Kan
>

2024-04-09 20:39:06

by Ian Rogers

[permalink] [raw]

Subject: Re: [PATCH v1] perf stat: Remove evlist__add_default_attrs use strings

On Tue, Apr 9, 2024 at 11:50 AM Liang, Kan <[email protected]> wrote:
>
>
>
> On 2024-04-09 12:04 p.m., Ian Rogers wrote:
> > On Tue, Apr 9, 2024 at 9:00 AM Liang, Kan <[email protected]> wrote:
> >>
> >>
> >>
> >> On 2024-04-09 11:20 a.m., Ian Rogers wrote:
> >>>>> + ret = parse_events(evlist,
> >>>>> + "context-switches,"
> >>>>> + "cpu-migrations,"
> >>>>> + "page-faults,"
> >>>>> + "instructions,"
> >>>>> + "cycles,"
> >>>> "cycles",
> >>>> "instructions",
> >>>>
> >>>> It's better to keep the original order.
> >>> So the original order was:
> >>> "cycles,"
> >>> "stalled-cycles-frontend,"
> >>> "stalled-cycles-backend,"
> >>> "instructions"
> >>>
> >>
> >> Right. The stalled-* events are added between default_attrs0 and
> >> default_attrs1.
> >>
> >>
> >>> but many/most/all core PMUs don't provide the stalled-* events. At the
> >>> programmer level instructions is the most fundamental thing, so having
> >>> it last felt wrong hence moving it to be the first after the software
> >>> events. My thought was, if we're going to reorder things then let's
> >>> not do a half measure like:
> >>> "cycles,"
> >>> "instructions,"
> >>> "stalled-cycles-frontend,"
> >>> "stalled-cycles-backend"
> >>>
> >>> let's just put things into their best order. It is obviously easy to
> >>> change but having this way wasn't an accident. There's obviously
> >>> subjectivity about whether cycles is more fundamental than
> >>> instructions, my thought is that you get taught instructions first and
> >>> that these take some number of cycles to execute, hence thinking
> >>> instructions should have some priority in the output over cycles -
> >>> some people may not even know what cycles means, it is hard enough
> >>> when you do given the variety of different clocks ????
> >>>
> >>
> >> My concern is that there may be someone who still relies on the std
> >> output of perf stat default. So the output format/order matters for
> >> them. Their scripts probably be broken if the order is changed.
> >
> > I think making everyone suffer for the case of a tool that may behave
> > in this way doesn't make sense. The tool should transition to not care
> > or to say the json output, or at least contribute a test. There is
> > precedent for this attitude, the default metrics for topdown removed
> > the event names in perf stat default output - no one screamed, and I
> > expect that to be the case here.
> >
>
> They did, but that happened after the change was merged. And there was
> no test case for the output at that time.
>
> I agree that if the order is important, there should be a test for it.
> I've emailed the tool owners I know and see if the change impacts them.
> But they are all out of office this week and should be back next week.
> I will let you know regarding their feedback. If the order is important,
> I will update the stat+std_output.sh.

Thanks Kan, just knowing if we should or shouldn't care is progress.

Ian

> Thanks,
> Kan

2024-04-09 23:41:23

by Liang, Kan

[permalink] [raw]

Subject: Re: [PATCH v1] perf stat: Remove evlist__add_default_attrs use strings

On 2024-04-09 12:04 p.m., Ian Rogers wrote:
> On Tue, Apr 9, 2024 at 9:00 AM Liang, Kan <[email protected]> wrote:
>>
>>
>>
>> On 2024-04-09 11:20 a.m., Ian Rogers wrote:
>>>>> + ret = parse_events(evlist,
>>>>> + "context-switches,"
>>>>> + "cpu-migrations,"
>>>>> + "page-faults,"
>>>>> + "instructions,"
>>>>> + "cycles,"
>>>> "cycles",
>>>> "instructions",
>>>>
>>>> It's better to keep the original order.
>>> So the original order was:
>>> "cycles,"
>>> "stalled-cycles-frontend,"
>>> "stalled-cycles-backend,"
>>> "instructions"
>>>
>>
>> Right. The stalled-* events are added between default_attrs0 and
>> default_attrs1.
>>
>>
>>> but many/most/all core PMUs don't provide the stalled-* events. At the
>>> programmer level instructions is the most fundamental thing, so having
>>> it last felt wrong hence moving it to be the first after the software
>>> events. My thought was, if we're going to reorder things then let's
>>> not do a half measure like:
>>> "cycles,"
>>> "instructions,"
>>> "stalled-cycles-frontend,"
>>> "stalled-cycles-backend"
>>>
>>> let's just put things into their best order. It is obviously easy to
>>> change but having this way wasn't an accident. There's obviously
>>> subjectivity about whether cycles is more fundamental than
>>> instructions, my thought is that you get taught instructions first and
>>> that these take some number of cycles to execute, hence thinking
>>> instructions should have some priority in the output over cycles -
>>> some people may not even know what cycles means, it is hard enough
>>> when you do given the variety of different clocks ????
>>>
>>
>> My concern is that there may be someone who still relies on the std
>> output of perf stat default. So the output format/order matters for
>> them. Their scripts probably be broken if the order is changed.
>
> I think making everyone suffer for the case of a tool that may behave
> in this way doesn't make sense. The tool should transition to not care
> or to say the json output, or at least contribute a test. There is
> precedent for this attitude, the default metrics for topdown removed
> the event names in perf stat default output - no one screamed, and I
> expect that to be the case here.
>

They did, but that happened after the change was merged. And there was
no test case for the output at that time.

I agree that if the order is important, there should be a test for it.
I've emailed the tool owners I know and see if the change impacts them.
But they are all out of office this week and should be back next week.
I will let you know regarding their feedback. If the order is important,
I will update the stat+std_output.sh.

Thanks,
Kan