2022-04-22 18:05:15

by Xing Zhengjun

[permalink] [raw]
Subject: [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs

From: Zhengjun Xing <[email protected]>

For hybrid events, by default stat aggregates and reports the event counts
per pmu.

# ./perf stat -e cycles -a sleep 1

Performance counter stats for 'system wide':

14,066,877,268 cpu_core/cycles/
6,814,443,147 cpu_atom/cycles/

1.002760625 seconds time elapsed

Sometimes, it's also useful to aggregate event counts from all PMUs.
Create a new option '--hybrid-merge' to enable that behavior and report
the counts without PMUs.

# ./perf stat -e cycles -a --hybrid-merge sleep 1

Performance counter stats for 'system wide':

20,732,982,512 cycles

1.002776793 seconds time elapsed

Signed-off-by: Zhengjun Xing <[email protected]>
Reviewed-by: Kan Liang <[email protected]>
---
tools/perf/Documentation/perf-stat.txt | 10 ++++++++++
tools/perf/builtin-stat.c | 2 ++
tools/perf/util/stat-display.c | 17 +++++++++++++++--
tools/perf/util/stat.h | 1 +
4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index c06c341e72b9..8d1cde00b8d6 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -454,6 +454,16 @@ Multiple events are created from a single event specification when:
2. Aliases, which are listed immediately after the Kernel PMU events
by perf list, are used.

+--hybrid-merge::
+Merge the hybrid event counts from all PMUs.
+
+For hybrid events, by default, the stat aggregates and reports the event
+counts per PMU. But sometimes, it's also useful to aggregate event counts
+from all PMUs. This option enables that behavior and reports the counts
+without PMUs.
+
+For non-hybrid events, it should be no effect.
+
--smi-cost::
Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a96f106dc93a..ea88ac5bed2d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1235,6 +1235,8 @@ static struct option stat_options[] = {
OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
"disable CPU count aggregation", AGGR_NONE),
OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
+ OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
+ "Merge identical named hybrid events"),
OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
"print counts with custom separator"),
OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 46b3dd134656..d9629a83aa78 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -612,6 +612,19 @@ static bool hybrid_uniquify(struct evsel *evsel)
return perf_pmu__has_hybrid() && !is_uncore(evsel);
}

+static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
+ bool check)
+{
+ if (hybrid_uniquify(counter)) {
+ if (check)
+ return config && config->hybrid_merge;
+ else
+ return config && !config->hybrid_merge;
+ }
+
+ return false;
+}
+
static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
bool first),
@@ -620,9 +633,9 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
if (counter->merged_stat)
return false;
cb(config, counter, data, true);
- if (config->no_merge || hybrid_uniquify(counter))
+ if (config->no_merge || hybrid_merge(counter, config, false))
uniquify_event_name(counter, config);
- else if (counter->auto_merge_stats)
+ else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
collect_all_aliases(config, counter, cb, data);
return true;
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 335d19cc3063..91d989dfeca4 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -122,6 +122,7 @@ struct perf_stat_config {
bool ru_display;
bool big_num;
bool no_merge;
+ bool hybrid_merge;
bool walltime_run_table;
bool all_kernel;
bool all_user;
--
2.25.1


2022-05-09 04:10:45

by Xing Zhengjun

[permalink] [raw]
Subject: Re: [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs



On 5/7/2022 12:03 PM, Ian Rogers wrote:
> On Thu, Apr 21, 2022 at 11:57 PM <[email protected]> wrote:
>>
>> From: Zhengjun Xing <[email protected]>
>>
>> For hybrid events, by default stat aggregates and reports the event counts
>> per pmu.
>>
>> # ./perf stat -e cycles -a sleep 1
>>
>> Performance counter stats for 'system wide':
>>
>> 14,066,877,268 cpu_core/cycles/
>> 6,814,443,147 cpu_atom/cycles/
>>
>> 1.002760625 seconds time elapsed
>>
>> Sometimes, it's also useful to aggregate event counts from all PMUs.
>> Create a new option '--hybrid-merge' to enable that behavior and report
>> the counts without PMUs.
>>
>> # ./perf stat -e cycles -a --hybrid-merge sleep 1
>>
>> Performance counter stats for 'system wide':
>>
>> 20,732,982,512 cycles
>>
>> 1.002776793 seconds time elapsed
>>
>> Signed-off-by: Zhengjun Xing <[email protected]>
>> Reviewed-by: Kan Liang <[email protected]>
>
> This feels related to aggregation, but aggregation is for a single
> evsel on a single PMU. What happens if you have both instructions and
> cycles with --hybrid-merge? Normally we aggregate all counts for each
> CPU into a the two evsels and then compute a metric:
> ```
# ./perf stat -e instructions,cycles -a /bin/true

Performance counter stats for 'system wide':

2,416,092 cpu_core/instructions/
305,840 cpu_atom/instructions/
2,645,138 cpu_core/cycles/
789,631 cpu_atom/cycles/

0.002345159 seconds time elapsed

# ./perf stat -e instructions,cycles -a --hybrid-merge /bin/true

Performance counter stats for 'system wide':

2,702,612 instructions
3,607,773 cycles

0.002475749 seconds time elapsed

Currently, no metrics showed for the hybrid systems.

> $ perf stat -e instructions,cycles /bin/true
>
> Performance counter stats for '/bin/true':
>
> 1,830,554 instructions # 1.17 insn per
> cycle
> 1,561,415 cycles
> ```
> This kind of aggregation behavior may be needed more widely for metrics.
>
> Thanks,
> Ian
>
>> ---
>> tools/perf/Documentation/perf-stat.txt | 10 ++++++++++
>> tools/perf/builtin-stat.c | 2 ++
>> tools/perf/util/stat-display.c | 17 +++++++++++++++--
>> tools/perf/util/stat.h | 1 +
>> 4 files changed, 28 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
>> index c06c341e72b9..8d1cde00b8d6 100644
>> --- a/tools/perf/Documentation/perf-stat.txt
>> +++ b/tools/perf/Documentation/perf-stat.txt
>> @@ -454,6 +454,16 @@ Multiple events are created from a single event specification when:
>> 2. Aliases, which are listed immediately after the Kernel PMU events
>> by perf list, are used.
>>
>> +--hybrid-merge::
>> +Merge the hybrid event counts from all PMUs.
>> +
>> +For hybrid events, by default, the stat aggregates and reports the event
>> +counts per PMU. But sometimes, it's also useful to aggregate event counts
>> +from all PMUs. This option enables that behavior and reports the counts
>> +without PMUs.
>> +
>> +For non-hybrid events, it should be no effect.
>> +
>> --smi-cost::
>> Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
>>
>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>> index a96f106dc93a..ea88ac5bed2d 100644
>> --- a/tools/perf/builtin-stat.c
>> +++ b/tools/perf/builtin-stat.c
>> @@ -1235,6 +1235,8 @@ static struct option stat_options[] = {
>> OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
>> "disable CPU count aggregation", AGGR_NONE),
>> OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
>> + OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
>> + "Merge identical named hybrid events"),
>> OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
>> "print counts with custom separator"),
>> OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
>> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
>> index 46b3dd134656..d9629a83aa78 100644
>> --- a/tools/perf/util/stat-display.c
>> +++ b/tools/perf/util/stat-display.c
>> @@ -612,6 +612,19 @@ static bool hybrid_uniquify(struct evsel *evsel)
>> return perf_pmu__has_hybrid() && !is_uncore(evsel);
>> }
>>
>> +static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
>> + bool check)
>> +{
>> + if (hybrid_uniquify(counter)) {
>> + if (check)
>> + return config && config->hybrid_merge;
>> + else
>> + return config && !config->hybrid_merge;
>> + }
>> +
>> + return false;
>> +}
>> +
>> static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>> void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
>> bool first),
>> @@ -620,9 +633,9 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>> if (counter->merged_stat)
>> return false;
>> cb(config, counter, data, true);
>> - if (config->no_merge || hybrid_uniquify(counter))
>> + if (config->no_merge || hybrid_merge(counter, config, false))
>> uniquify_event_name(counter, config);
>> - else if (counter->auto_merge_stats)
>> + else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
>> collect_all_aliases(config, counter, cb, data);
>> return true;
>> }
>> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
>> index 335d19cc3063..91d989dfeca4 100644
>> --- a/tools/perf/util/stat.h
>> +++ b/tools/perf/util/stat.h
>> @@ -122,6 +122,7 @@ struct perf_stat_config {
>> bool ru_display;
>> bool big_num;
>> bool no_merge;
>> + bool hybrid_merge;
>> bool walltime_run_table;
>> bool all_kernel;
>> bool all_user;
>> --
>> 2.25.1
>>

--
Zhengjun Xing

2022-05-09 06:25:24

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs

On Thu, Apr 21, 2022 at 11:57 PM <[email protected]> wrote:
>
> From: Zhengjun Xing <[email protected]>
>
> For hybrid events, by default stat aggregates and reports the event counts
> per pmu.
>
> # ./perf stat -e cycles -a sleep 1
>
> Performance counter stats for 'system wide':
>
> 14,066,877,268 cpu_core/cycles/
> 6,814,443,147 cpu_atom/cycles/
>
> 1.002760625 seconds time elapsed
>
> Sometimes, it's also useful to aggregate event counts from all PMUs.
> Create a new option '--hybrid-merge' to enable that behavior and report
> the counts without PMUs.
>
> # ./perf stat -e cycles -a --hybrid-merge sleep 1
>
> Performance counter stats for 'system wide':
>
> 20,732,982,512 cycles
>
> 1.002776793 seconds time elapsed
>
> Signed-off-by: Zhengjun Xing <[email protected]>
> Reviewed-by: Kan Liang <[email protected]>

This feels related to aggregation, but aggregation is for a single
evsel on a single PMU. What happens if you have both instructions and
cycles with --hybrid-merge? Normally we aggregate all counts for each
CPU into a the two evsels and then compute a metric:
```
$ perf stat -e instructions,cycles /bin/true

Performance counter stats for '/bin/true':

1,830,554 instructions # 1.17 insn per
cycle
1,561,415 cycles
```
This kind of aggregation behavior may be needed more widely for metrics.

Thanks,
Ian

> ---
> tools/perf/Documentation/perf-stat.txt | 10 ++++++++++
> tools/perf/builtin-stat.c | 2 ++
> tools/perf/util/stat-display.c | 17 +++++++++++++++--
> tools/perf/util/stat.h | 1 +
> 4 files changed, 28 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
> index c06c341e72b9..8d1cde00b8d6 100644
> --- a/tools/perf/Documentation/perf-stat.txt
> +++ b/tools/perf/Documentation/perf-stat.txt
> @@ -454,6 +454,16 @@ Multiple events are created from a single event specification when:
> 2. Aliases, which are listed immediately after the Kernel PMU events
> by perf list, are used.
>
> +--hybrid-merge::
> +Merge the hybrid event counts from all PMUs.
> +
> +For hybrid events, by default, the stat aggregates and reports the event
> +counts per PMU. But sometimes, it's also useful to aggregate event counts
> +from all PMUs. This option enables that behavior and reports the counts
> +without PMUs.
> +
> +For non-hybrid events, it should be no effect.
> +
> --smi-cost::
> Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index a96f106dc93a..ea88ac5bed2d 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -1235,6 +1235,8 @@ static struct option stat_options[] = {
> OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
> "disable CPU count aggregation", AGGR_NONE),
> OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
> + OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
> + "Merge identical named hybrid events"),
> OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
> "print counts with custom separator"),
> OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index 46b3dd134656..d9629a83aa78 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -612,6 +612,19 @@ static bool hybrid_uniquify(struct evsel *evsel)
> return perf_pmu__has_hybrid() && !is_uncore(evsel);
> }
>
> +static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
> + bool check)
> +{
> + if (hybrid_uniquify(counter)) {
> + if (check)
> + return config && config->hybrid_merge;
> + else
> + return config && !config->hybrid_merge;
> + }
> +
> + return false;
> +}
> +
> static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
> void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
> bool first),
> @@ -620,9 +633,9 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
> if (counter->merged_stat)
> return false;
> cb(config, counter, data, true);
> - if (config->no_merge || hybrid_uniquify(counter))
> + if (config->no_merge || hybrid_merge(counter, config, false))
> uniquify_event_name(counter, config);
> - else if (counter->auto_merge_stats)
> + else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
> collect_all_aliases(config, counter, cb, data);
> return true;
> }
> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
> index 335d19cc3063..91d989dfeca4 100644
> --- a/tools/perf/util/stat.h
> +++ b/tools/perf/util/stat.h
> @@ -122,6 +122,7 @@ struct perf_stat_config {
> bool ru_display;
> bool big_num;
> bool no_merge;
> + bool hybrid_merge;
> bool walltime_run_table;
> bool all_kernel;
> bool all_user;
> --
> 2.25.1
>