2021-06-16 06:31:55

by Jin Yao

[permalink] [raw]
Subject: [PATCH] perf stat: Merge uncore events by default for hybrid platform

On hybrid platform, by default stat aggregates and reports the event counts
per pmu. For example,

# perf stat -e cycles -a true

Performance counter stats for 'system wide':

1,400,445 cpu_core/cycles/
680,881 cpu_atom/cycles/

0.001770773 seconds time elapsed

While for uncore events, that's not a suitable method. Uncore has nothing
to do with hybrid. So for uncore events, we aggregate event counts from all
PMUs and report the counts without PMUs.

Before:

# perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true

Performance counter stats for 'system wide':

2,058 uncore_arb_0/event=0x81,umask=0x1/
2,028 uncore_arb_1/event=0x81,umask=0x1/
0 uncore_arb_0/event=0x84,umask=0x1/
0 uncore_arb_1/event=0x84,umask=0x1/

0.000614498 seconds time elapsed

After:

# perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true

Performance counter stats for 'system wide':

3,996 arb/event=0x81,umask=0x1/
0 arb/event=0x84,umask=0x1/

0.000630046 seconds time elapsed

Of course, we also keep the '--no-merge' still works for uncore events.

# perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ --no-merge true

Performance counter stats for 'system wide':

1,952 uncore_arb_0/event=0x81,umask=0x1/
1,921 uncore_arb_1/event=0x81,umask=0x1/
0 uncore_arb_0/event=0x84,umask=0x1/
0 uncore_arb_1/event=0x84,umask=0x1/

0.000575536 seconds time elapsed

Signed-off-by: Jin Yao <[email protected]>
---
tools/perf/builtin-stat.c | 3 ---
tools/perf/util/stat-display.c | 29 +++++++++++++++++++++++++----
2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f9f74a514315..b67a44982b61 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2442,9 +2442,6 @@ int cmd_stat(int argc, const char **argv)

evlist__check_cpu_maps(evsel_list);

- if (perf_pmu__has_hybrid())
- stat_config.no_merge = true;
-
/*
* Initialize thread_map with comm names,
* so we could print it out on output.
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index b759dfd633b4..c6070f4684ca 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -595,6 +595,19 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
}
}

+static bool is_uncore(struct evsel *evsel)
+{
+ struct perf_pmu *pmu;
+
+ if (evsel->pmu_name) {
+ pmu = perf_pmu__find(evsel->pmu_name);
+ if (pmu)
+ return pmu->is_uncore;
+ }
+
+ return false;
+}
+
static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
bool first),
@@ -603,10 +616,18 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
if (counter->merged_stat)
return false;
cb(config, counter, data, true);
- if (config->no_merge)
- uniquify_event_name(counter);
- else if (counter->auto_merge_stats)
- collect_all_aliases(config, counter, cb, data);
+ if (perf_pmu__has_hybrid()) {
+ if (config->no_merge || !is_uncore(counter))
+ uniquify_event_name(counter);
+ else if (counter->auto_merge_stats)
+ collect_all_aliases(config, counter, cb, data);
+ } else {
+ if (config->no_merge)
+ uniquify_event_name(counter);
+ else if (counter->auto_merge_stats)
+ collect_all_aliases(config, counter, cb, data);
+ }
+
return true;
}

--
2.17.1


2021-07-06 02:34:48

by Jin Yao

[permalink] [raw]
Subject: Re: [PATCH] perf stat: Merge uncore events by default for hybrid platform

Hi,

On 6/16/2021 2:30 PM, Jin Yao wrote:
> On hybrid platform, by default stat aggregates and reports the event counts
> per pmu. For example,
>
> # perf stat -e cycles -a true
>
> Performance counter stats for 'system wide':
>
> 1,400,445 cpu_core/cycles/
> 680,881 cpu_atom/cycles/
>
> 0.001770773 seconds time elapsed
>
> While for uncore events, that's not a suitable method. Uncore has nothing
> to do with hybrid. So for uncore events, we aggregate event counts from all
> PMUs and report the counts without PMUs.
>
> Before:
>
> # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
>
> Performance counter stats for 'system wide':
>
> 2,058 uncore_arb_0/event=0x81,umask=0x1/
> 2,028 uncore_arb_1/event=0x81,umask=0x1/
> 0 uncore_arb_0/event=0x84,umask=0x1/
> 0 uncore_arb_1/event=0x84,umask=0x1/
>
> 0.000614498 seconds time elapsed
>
> After:
>
> # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
>
> Performance counter stats for 'system wide':
>
> 3,996 arb/event=0x81,umask=0x1/
> 0 arb/event=0x84,umask=0x1/
>
> 0.000630046 seconds time elapsed
>
> Of course, we also keep the '--no-merge' still works for uncore events.
>
> # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ --no-merge true
>
> Performance counter stats for 'system wide':
>
> 1,952 uncore_arb_0/event=0x81,umask=0x1/
> 1,921 uncore_arb_1/event=0x81,umask=0x1/
> 0 uncore_arb_0/event=0x84,umask=0x1/
> 0 uncore_arb_1/event=0x84,umask=0x1/
>
> 0.000575536 seconds time elapsed
>
> Signed-off-by: Jin Yao <[email protected]>
> ---
> tools/perf/builtin-stat.c | 3 ---
> tools/perf/util/stat-display.c | 29 +++++++++++++++++++++++++----
> 2 files changed, 25 insertions(+), 7 deletions(-)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index f9f74a514315..b67a44982b61 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -2442,9 +2442,6 @@ int cmd_stat(int argc, const char **argv)
>
> evlist__check_cpu_maps(evsel_list);
>
> - if (perf_pmu__has_hybrid())
> - stat_config.no_merge = true;
> -
> /*
> * Initialize thread_map with comm names,
> * so we could print it out on output.
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index b759dfd633b4..c6070f4684ca 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -595,6 +595,19 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
> }
> }
>
> +static bool is_uncore(struct evsel *evsel)
> +{
> + struct perf_pmu *pmu;
> +
> + if (evsel->pmu_name) {
> + pmu = perf_pmu__find(evsel->pmu_name);
> + if (pmu)
> + return pmu->is_uncore;
> + }
> +
> + return false;
> +}
> +
> static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
> void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
> bool first),
> @@ -603,10 +616,18 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
> if (counter->merged_stat)
> return false;
> cb(config, counter, data, true);
> - if (config->no_merge)
> - uniquify_event_name(counter);
> - else if (counter->auto_merge_stats)
> - collect_all_aliases(config, counter, cb, data);
> + if (perf_pmu__has_hybrid()) {
> + if (config->no_merge || !is_uncore(counter))
> + uniquify_event_name(counter);
> + else if (counter->auto_merge_stats)
> + collect_all_aliases(config, counter, cb, data);
> + } else {
> + if (config->no_merge)
> + uniquify_event_name(counter);
> + else if (counter->auto_merge_stats)
> + collect_all_aliases(config, counter, cb, data);
> + }
> +
> return true;
> }
>
>

Any comments for this patch? :)

Thanks
Jin Yao

2021-07-06 19:53:41

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH] perf stat: Merge uncore events by default for hybrid platform

On Tue, Jul 06, 2021 at 10:32:57AM +0800, Jin, Yao wrote:
> Hi,
>
> On 6/16/2021 2:30 PM, Jin Yao wrote:
> > On hybrid platform, by default stat aggregates and reports the event counts
> > per pmu. For example,
> >
> > # perf stat -e cycles -a true
> >
> > Performance counter stats for 'system wide':
> >
> > 1,400,445 cpu_core/cycles/
> > 680,881 cpu_atom/cycles/
> >
> > 0.001770773 seconds time elapsed
> >
> > While for uncore events, that's not a suitable method. Uncore has nothing
> > to do with hybrid. So for uncore events, we aggregate event counts from all
> > PMUs and report the counts without PMUs.
> >
> > Before:
> >
> > # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
> >
> > Performance counter stats for 'system wide':
> >
> > 2,058 uncore_arb_0/event=0x81,umask=0x1/
> > 2,028 uncore_arb_1/event=0x81,umask=0x1/
> > 0 uncore_arb_0/event=0x84,umask=0x1/
> > 0 uncore_arb_1/event=0x84,umask=0x1/
> >
> > 0.000614498 seconds time elapsed
> >
> > After:
> >
> > # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
> >
> > Performance counter stats for 'system wide':
> >
> > 3,996 arb/event=0x81,umask=0x1/
> > 0 arb/event=0x84,umask=0x1/
> >
> > 0.000630046 seconds time elapsed
> >
> > Of course, we also keep the '--no-merge' still works for uncore events.
> >
> > # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ --no-merge true
> >
> > Performance counter stats for 'system wide':
> >
> > 1,952 uncore_arb_0/event=0x81,umask=0x1/
> > 1,921 uncore_arb_1/event=0x81,umask=0x1/
> > 0 uncore_arb_0/event=0x84,umask=0x1/
> > 0 uncore_arb_1/event=0x84,umask=0x1/
> >
> > 0.000575536 seconds time elapsed
> >
> > Signed-off-by: Jin Yao <[email protected]>
> > ---
> > tools/perf/builtin-stat.c | 3 ---
> > tools/perf/util/stat-display.c | 29 +++++++++++++++++++++++++----
> > 2 files changed, 25 insertions(+), 7 deletions(-)
> >
> > diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> > index f9f74a514315..b67a44982b61 100644
> > --- a/tools/perf/builtin-stat.c
> > +++ b/tools/perf/builtin-stat.c
> > @@ -2442,9 +2442,6 @@ int cmd_stat(int argc, const char **argv)
> > evlist__check_cpu_maps(evsel_list);
> > - if (perf_pmu__has_hybrid())
> > - stat_config.no_merge = true;
> > -
> > /*
> > * Initialize thread_map with comm names,
> > * so we could print it out on output.
> > diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> > index b759dfd633b4..c6070f4684ca 100644
> > --- a/tools/perf/util/stat-display.c
> > +++ b/tools/perf/util/stat-display.c
> > @@ -595,6 +595,19 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
> > }
> > }
> > +static bool is_uncore(struct evsel *evsel)
> > +{
> > + struct perf_pmu *pmu;
> > +
> > + if (evsel->pmu_name) {
> > + pmu = perf_pmu__find(evsel->pmu_name);

evsel__find_pmu might be one line shorter? ;-)


> > + if (pmu)
> > + return pmu->is_uncore;
> > + }
> > +
> > + return false;
> > +}
> > +
> > static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
> > void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
> > bool first),
> > @@ -603,10 +616,18 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
> > if (counter->merged_stat)
> > return false;
> > cb(config, counter, data, true);
> > - if (config->no_merge)
> > - uniquify_event_name(counter);
> > - else if (counter->auto_merge_stats)
> > - collect_all_aliases(config, counter, cb, data);
> > + if (perf_pmu__has_hybrid()) {
> > + if (config->no_merge || !is_uncore(counter))

hum, this is all the same except for the !is_uncore condition, right?

could we just add 'config->no_merge || hybrid_uniquify(count)'

that would cover both perf_pmu__has_hybrid and !is_uncore conditions?

jirka

> > + uniquify_event_name(counter);
> > + else if (counter->auto_merge_stats)
> > + collect_all_aliases(config, counter, cb, data);
> > + } else {
> > + if (config->no_merge)
> > + uniquify_event_name(counter);
> > + else if (counter->auto_merge_stats)
> > + collect_all_aliases(config, counter, cb, data);
> > + }
> > +
> > return true;
> > }
> >
>
> Any comments for this patch? :)
>
> Thanks
> Jin Yao
>

2021-07-07 05:47:40

by Jin Yao

[permalink] [raw]
Subject: Re: [PATCH] perf stat: Merge uncore events by default for hybrid platform

Hi Jiri,

On 7/7/2021 3:51 AM, Jiri Olsa wrote:
> On Tue, Jul 06, 2021 at 10:32:57AM +0800, Jin, Yao wrote:
>> Hi,
>>
>> On 6/16/2021 2:30 PM, Jin Yao wrote:
>>> On hybrid platform, by default stat aggregates and reports the event counts
>>> per pmu. For example,
>>>
>>> # perf stat -e cycles -a true
>>>
>>> Performance counter stats for 'system wide':
>>>
>>> 1,400,445 cpu_core/cycles/
>>> 680,881 cpu_atom/cycles/
>>>
>>> 0.001770773 seconds time elapsed
>>>
>>> While for uncore events, that's not a suitable method. Uncore has nothing
>>> to do with hybrid. So for uncore events, we aggregate event counts from all
>>> PMUs and report the counts without PMUs.
>>>
>>> Before:
>>>
>>> # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
>>>
>>> Performance counter stats for 'system wide':
>>>
>>> 2,058 uncore_arb_0/event=0x81,umask=0x1/
>>> 2,028 uncore_arb_1/event=0x81,umask=0x1/
>>> 0 uncore_arb_0/event=0x84,umask=0x1/
>>> 0 uncore_arb_1/event=0x84,umask=0x1/
>>>
>>> 0.000614498 seconds time elapsed
>>>
>>> After:
>>>
>>> # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
>>>
>>> Performance counter stats for 'system wide':
>>>
>>> 3,996 arb/event=0x81,umask=0x1/
>>> 0 arb/event=0x84,umask=0x1/
>>>
>>> 0.000630046 seconds time elapsed
>>>
>>> Of course, we also keep the '--no-merge' still works for uncore events.
>>>
>>> # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ --no-merge true
>>>
>>> Performance counter stats for 'system wide':
>>>
>>> 1,952 uncore_arb_0/event=0x81,umask=0x1/
>>> 1,921 uncore_arb_1/event=0x81,umask=0x1/
>>> 0 uncore_arb_0/event=0x84,umask=0x1/
>>> 0 uncore_arb_1/event=0x84,umask=0x1/
>>>
>>> 0.000575536 seconds time elapsed
>>>
>>> Signed-off-by: Jin Yao <[email protected]>
>>> ---
>>> tools/perf/builtin-stat.c | 3 ---
>>> tools/perf/util/stat-display.c | 29 +++++++++++++++++++++++++----
>>> 2 files changed, 25 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>>> index f9f74a514315..b67a44982b61 100644
>>> --- a/tools/perf/builtin-stat.c
>>> +++ b/tools/perf/builtin-stat.c
>>> @@ -2442,9 +2442,6 @@ int cmd_stat(int argc, const char **argv)
>>> evlist__check_cpu_maps(evsel_list);
>>> - if (perf_pmu__has_hybrid())
>>> - stat_config.no_merge = true;
>>> -
>>> /*
>>> * Initialize thread_map with comm names,
>>> * so we could print it out on output.
>>> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
>>> index b759dfd633b4..c6070f4684ca 100644
>>> --- a/tools/perf/util/stat-display.c
>>> +++ b/tools/perf/util/stat-display.c
>>> @@ -595,6 +595,19 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
>>> }
>>> }
>>> +static bool is_uncore(struct evsel *evsel)
>>> +{
>>> + struct perf_pmu *pmu;
>>> +
>>> + if (evsel->pmu_name) {
>>> + pmu = perf_pmu__find(evsel->pmu_name);
>
> evsel__find_pmu might be one line shorter? ;-)
>

Yes, this is a better method, thanks!

>
>>> + if (pmu)
>>> + return pmu->is_uncore;
>>> + }
>>> +
>>> + return false;
>>> +}
>>> +
>>> static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>>> void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
>>> bool first),
>>> @@ -603,10 +616,18 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>>> if (counter->merged_stat)
>>> return false;
>>> cb(config, counter, data, true);
>>> - if (config->no_merge)
>>> - uniquify_event_name(counter);
>>> - else if (counter->auto_merge_stats)
>>> - collect_all_aliases(config, counter, cb, data);
>>> + if (perf_pmu__has_hybrid()) {
>>> + if (config->no_merge || !is_uncore(counter))
>
> hum, this is all the same except for the !is_uncore condition, right?
>
> could we just add 'config->no_merge || hybrid_uniquify(count)'
>
> that would cover both perf_pmu__has_hybrid and !is_uncore conditions?
>

Yes, I will create a new function 'hybrid_uniquify' to check if uniquify event name for hybrid.

Thanks
Jin Yao

> jirka
>
>>> + uniquify_event_name(counter);
>>> + else if (counter->auto_merge_stats)
>>> + collect_all_aliases(config, counter, cb, data);
>>> + } else {
>>> + if (config->no_merge)
>>> + uniquify_event_name(counter);
>>> + else if (counter->auto_merge_stats)
>>> + collect_all_aliases(config, counter, cb, data);
>>> + }
>>> +
>>> return true;
>>> }
>>>
>>
>> Any comments for this patch? :)
>>
>> Thanks
>> Jin Yao
>>
>