2020-05-25 07:00:12

by Jin Yao

[permalink] [raw]
Subject: [PATCH v2 1/2] perf evlist: Ensure grouped events with same cpu map

A metric may consist of core event and uncore event (or other
per-socket event)

For example, the metric "C2_Pkg_Residency" consists of
"cstate_pkg/c2-residency" and "msr/tsc". The former is per-socket
event and the latter is per-cpu event.

"C2_Pkg_Residency" hits assertion failure on cascadelakex.

# perf stat -M "C2_Pkg_Residency" -a -- sleep 1
perf: util/evsel.c:1464: get_group_fd: Assertion `!(fd == -1)' failed.
Aborted

The root cause is one issue in get_group_fd(), access violation!

For a group mixed with per-socket event and per-cpu event and the
group leader is per-socket event, access violation will happen.

perf_evsel__alloc_fd allocates one FD member for per-socket event.
Only FD(evsel, 0, 0) is valid (suppose one-socket system).

But for per-cpu event, perf_evsel__alloc_fd allocates N FD members
(N = ncpus). For example, if ncpus is 8, FD(evsel, 0, 0) to
FD(evsel, 7, 0) are valid.

get_group_fd(struct evsel *evsel, int cpu, int thread)
{
struct evsel *leader = evsel->leader;

fd = FD(leader, cpu, thread); /* access violation */
}

If leader is per-socket event, only FD(leader, 0, 0) is valid.
So when get_group_fd tries to access FD(leader, 1, 0), access
violation will happen.

This patch ensures that the grouped events with same cpu maps
before we go to get_group_fd.

If the cpu maps are not matched, we force to disable the group.

v2:
---
Process for the cases such as -e '{A,B}','{C,D,E}',F.

Fixes: 6a4bb04caacc8 ("perf tools: Enable grouping logic for parsed events")
Signed-off-by: Jin Yao <[email protected]>
---
tools/perf/builtin-stat.c | 3 +++
tools/perf/util/evlist.c | 49 +++++++++++++++++++++++++++++++++++++++
tools/perf/util/evlist.h | 5 ++++
3 files changed, 57 insertions(+)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 377e575f9645..5be1f9048a01 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -584,6 +584,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (affinity__setup(&affinity) < 0)
return -1;

+ if (!evlist__cpus_map_matched(evsel_list))
+ evlist__force_disable_group(evsel_list);
+
evlist__for_each_cpu (evsel_list, i, cpu) {
affinity__set(&affinity, cpu);

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 2a9de6491700..1161cffc0688 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1704,3 +1704,52 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
}
return leader;
}
+
+static bool cpus_map_matched(struct evsel *prev, struct evsel *evsel)
+{
+ if (evsel->core.cpus->nr != prev->core.cpus->nr)
+ return false;
+
+ for (int i = 0; i < evsel->core.cpus->nr; i++) {
+ if (evsel->core.cpus->map[i] != prev->core.cpus->map[i])
+ return false;
+ }
+
+ return true;
+}
+
+bool evlist__cpus_map_matched(struct evlist *evlist)
+{
+ struct evsel *prev = evlist__first(evlist), *evsel = prev;
+ int nr_members = prev->core.nr_members;
+
+ evlist__for_each_entry_continue(evlist, evsel) {
+ if (nr_members <= 1) {
+ prev = evsel;
+ nr_members = evsel->core.nr_members;
+ continue;
+ }
+
+ nr_members--;
+
+ if (!cpus_map_matched(prev, evsel))
+ return false;
+
+ prev = evsel;
+ }
+
+ return true;
+}
+
+void evlist__force_disable_group(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ pr_warning("WARNING: event cpu maps are not fully matched, "
+ "stop event grouping\n");
+
+ evlist__for_each_entry(evlist, evsel) {
+ evsel->leader = evsel;
+ evsel->core.nr_members = 0;
+ }
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index b6f325dfb4d2..b09c3fb2cad7 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -355,4 +355,9 @@ void perf_evlist__force_leader(struct evlist *evlist);
struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
struct evsel *evsel,
bool close);
+
+bool evlist__cpus_map_matched(struct evlist *evlist);
+
+void evlist__force_disable_group(struct evlist *evlist);
+
#endif /* __PERF_EVLIST_H */
--
2.17.1


2020-05-25 07:00:13

by Jin Yao

[permalink] [raw]
Subject: [PATCH v2 2/2] perf test: Add test case for group members

The evlist may consist of some event + group combinations.

For example, perf stat -e "A,{B,C},D".

This patch testes the event in evlist to see if it has the
correct leader and correct nr_members.

Signed-off-by: Jin Yao <[email protected]>
---
tools/perf/tests/parse-events.c | 35 ++++++++++++++++++++++++++++++++-
1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 895188b63f96..d584b77c878d 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1386,6 +1386,34 @@ static int test__sym_event_dc(struct evlist *evlist)
return 0;
}

+static int test__mixed_group(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 4 == evlist->core.nr_entries);
+
+ /* cycles - leader is also itself */
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong nr_members", 0 == evsel->core.nr_members);
+
+ /* {cache-misses,branch-misses} - leader is cache-misses */
+ evsel = leader = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong nr_members", 2 == evsel->core.nr_members);
+
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong nr_members", 0 == evsel->core.nr_members);
+
+ /* instructions - leader is also itself */
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong nr_members", 0 == evsel->core.nr_members);
+
+ return 0;
+}
+
static int count_tracepoints(void)
{
struct dirent *events_ent;
@@ -1737,7 +1765,12 @@ static struct evlist_test test__events[] = {
.name = "cycles:k",
.check = test__sym_event_dc,
.id = 55,
- }
+ },
+ {
+ .name = "cycles,{cache-misses,branch-misses},instructions",
+ .check = test__mixed_group,
+ .id = 56,
+ },
};

static struct evlist_test test__events_pmu[] = {
--
2.17.1

2020-05-26 13:45:04

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] perf evlist: Ensure grouped events with same cpu map

On Mon, May 25, 2020 at 02:55:58PM +0800, Jin Yao wrote:

SNIP

> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 2a9de6491700..1161cffc0688 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -1704,3 +1704,52 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
> }
> return leader;
> }
> +
> +static bool cpus_map_matched(struct evsel *prev, struct evsel *evsel)
> +{
> + if (evsel->core.cpus->nr != prev->core.cpus->nr)
> + return false;
> +
> + for (int i = 0; i < evsel->core.cpus->nr; i++) {
> + if (evsel->core.cpus->map[i] != prev->core.cpus->map[i])
> + return false;
> + }
> +
> + return true;
> +}
> +
> +bool evlist__cpus_map_matched(struct evlist *evlist)
> +{
> + struct evsel *prev = evlist__first(evlist), *evsel = prev;
> + int nr_members = prev->core.nr_members;
> +
> + evlist__for_each_entry_continue(evlist, evsel) {
> + if (nr_members <= 1) {
> + prev = evsel;
> + nr_members = evsel->core.nr_members;
> + continue;
> + }
> +
> + nr_members--;
> +
> + if (!cpus_map_matched(prev, evsel))
> + return false;
> +
> + prev = evsel;
> + }
> +
> + return true;
> +}
> +
> +void evlist__force_disable_group(struct evlist *evlist)
> +{
> + struct evsel *evsel;
> +
> + pr_warning("WARNING: event cpu maps are not fully matched, "
> + "stop event grouping\n");
> +
> + evlist__for_each_entry(evlist, evsel) {
> + evsel->leader = evsel;
> + evsel->core.nr_members = 0;
> + }
> +}

I think this is too much, we need to disable only groups with not
matching cpus, not all of them, how about something like this


struct evsel *pos;

evlist__for_each_entry(evlist, evsel) {
if (evsel->leader == evsel)
continue;
if (!cpus_map_matched(evsel->leader, evsel))
continue;

pr_warn("Disabling group...

for_each_group_member(pos, evsel->leader) {
pos->leader = pos;
evsel->core.nr_members = 0;
}
}

jirka

2020-05-27 05:25:02

by Jin Yao

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] perf evlist: Ensure grouped events with same cpu map

Hi Jiri,

On 5/26/2020 7:51 PM, Jiri Olsa wrote:
> On Mon, May 25, 2020 at 02:55:58PM +0800, Jin Yao wrote:
>
> SNIP
>
>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
>> index 2a9de6491700..1161cffc0688 100644
>> --- a/tools/perf/util/evlist.c
>> +++ b/tools/perf/util/evlist.c
>> @@ -1704,3 +1704,52 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
>> }
>> return leader;
>> }
>> +
>> +static bool cpus_map_matched(struct evsel *prev, struct evsel *evsel)
>> +{
>> + if (evsel->core.cpus->nr != prev->core.cpus->nr)
>> + return false;
>> +
>> + for (int i = 0; i < evsel->core.cpus->nr; i++) {
>> + if (evsel->core.cpus->map[i] != prev->core.cpus->map[i])
>> + return false;
>> + }
>> +
>> + return true;
>> +}
>> +
>> +bool evlist__cpus_map_matched(struct evlist *evlist)
>> +{
>> + struct evsel *prev = evlist__first(evlist), *evsel = prev;
>> + int nr_members = prev->core.nr_members;
>> +
>> + evlist__for_each_entry_continue(evlist, evsel) {
>> + if (nr_members <= 1) {
>> + prev = evsel;
>> + nr_members = evsel->core.nr_members;
>> + continue;
>> + }
>> +
>> + nr_members--;
>> +
>> + if (!cpus_map_matched(prev, evsel))
>> + return false;
>> +
>> + prev = evsel;
>> + }
>> +
>> + return true;
>> +}
>> +
>> +void evlist__force_disable_group(struct evlist *evlist)
>> +{
>> + struct evsel *evsel;
>> +
>> + pr_warning("WARNING: event cpu maps are not fully matched, "
>> + "stop event grouping\n");
>> +
>> + evlist__for_each_entry(evlist, evsel) {
>> + evsel->leader = evsel;
>> + evsel->core.nr_members = 0;
>> + }
>> +}
>
> I think this is too much, we need to disable only groups with not
> matching cpus, not all of them, how about something like this
>

Yes, that's too much.

>
> struct evsel *pos;
>
> evlist__for_each_entry(evlist, evsel) {
> if (evsel->leader == evsel)
> continue;
> if (!cpus_map_matched(evsel->leader, evsel))
> continue;
>
> pr_warn("Disabling group...
>
> for_each_group_member(pos, evsel->leader) {
> pos->leader = pos;
> evsel->core.nr_members = 0;
> }
> }
>
> jirka
>

Hmm, change "!cpus_map_matched()" to "cpus_map_matched()"? and use for_each_group_evsel() to replace
for_each_group_member()?

How about something like following?

void evlist__check_cpu_maps(struct evlist *evlist)
{
struct evsel *evsel, *pos;

evlist__for_each_entry(evlist, evsel) {
if (evsel->leader == evsel)
continue;

if (cpu_maps_matched(evsel->leader, evsel))
continue;

pr_warning("WARNING: event cpu maps are not fully matched, "
"disable group\n");

for_each_group_evsel(pos, evsel->leader) {
pos->leader = pos;
pos->core.nr_members = 0;
}

/*
* For core & uncore mixed event group, for example,
* '{cycles,unc_cbo_cache_lookup.any_i}',
* In evlist:
* cycles,
* unc_cbo_cache_lookup.any_i,
* unc_cbo_cache_lookup.any_i,
* unc_cbo_cache_lookup.any_i,
* unc_cbo_cache_lookup.any_i,
*
* cycles is leader and all unc_cbo_cache_lookup.any_i
* point to this leader. But for_each_group_evsel can't
* iterate all members from cycles. It only iterates
* cycles and one unc_cbo_cache_lookup.any_i. So we
* set extra evsel here.
*/
evsel->leader = evsel;
evsel->core.nr_members = 0;
}
}

Thanks
Jin Yao

2020-05-27 10:04:37

by Jin Yao

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] perf evlist: Ensure grouped events with same cpu map

Hi Jiri,

On 5/27/2020 11:20 AM, Jin, Yao wrote:
> Hi Jiri,
>
> On 5/26/2020 7:51 PM, Jiri Olsa wrote:
>> On Mon, May 25, 2020 at 02:55:58PM +0800, Jin Yao wrote:
>>
>> SNIP
>>
>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
>>> index 2a9de6491700..1161cffc0688 100644
>>> --- a/tools/perf/util/evlist.c
>>> +++ b/tools/perf/util/evlist.c
>>> @@ -1704,3 +1704,52 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
>>>       }
>>>       return leader;
>>>   }
>>> +
>>> +static bool cpus_map_matched(struct evsel *prev, struct evsel *evsel)
>>> +{
>>> +    if (evsel->core.cpus->nr != prev->core.cpus->nr)
>>> +        return false;
>>> +
>>> +    for (int i = 0; i < evsel->core.cpus->nr; i++) {
>>> +        if (evsel->core.cpus->map[i] != prev->core.cpus->map[i])
>>> +            return false;
>>> +    }
>>> +
>>> +    return true;
>>> +}
>>> +
>>> +bool evlist__cpus_map_matched(struct evlist *evlist)
>>> +{
>>> +    struct evsel *prev = evlist__first(evlist), *evsel = prev;
>>> +    int nr_members = prev->core.nr_members;
>>> +
>>> +    evlist__for_each_entry_continue(evlist, evsel) {
>>> +        if (nr_members <= 1) {
>>> +            prev = evsel;
>>> +            nr_members = evsel->core.nr_members;
>>> +            continue;
>>> +        }
>>> +
>>> +        nr_members--;
>>> +
>>> +        if (!cpus_map_matched(prev, evsel))
>>> +            return false;
>>> +
>>> +        prev = evsel;
>>> +    }
>>> +
>>> +    return true;
>>> +}
>>> +
>>> +void evlist__force_disable_group(struct evlist *evlist)
>>> +{
>>> +    struct evsel *evsel;
>>> +
>>> +    pr_warning("WARNING: event cpu maps are not fully matched, "
>>> +           "stop event grouping\n");
>>> +
>>> +    evlist__for_each_entry(evlist, evsel) {
>>> +        evsel->leader = evsel;
>>> +        evsel->core.nr_members = 0;
>>> +    }
>>> +}
>>
>> I think this is too much, we need to disable only groups with not
>> matching cpus, not all of them, how about something like this
>>
>
> Yes, that's too much.
>
>>
>>          struct evsel *pos;
>>
>>          evlist__for_each_entry(evlist, evsel) {
>>                  if (evsel->leader == evsel)
>>                          continue;
>>                  if (!cpus_map_matched(evsel->leader, evsel))
>>                          continue;
>>                  pr_warn("Disabling group...
>>
>>                  for_each_group_member(pos, evsel->leader) {
>>                          pos->leader = pos;
>>                          evsel->core.nr_members = 0;
>>                  }
>>          }
>>
>> jirka
>>
>
> Hmm, change "!cpus_map_matched()" to "cpus_map_matched()"? and use for_each_group_evsel() to replace
> for_each_group_member()?
>
> How about something like following?
>
> void evlist__check_cpu_maps(struct evlist *evlist)
> {
>     struct evsel *evsel, *pos;
>
>     evlist__for_each_entry(evlist, evsel) {
>         if (evsel->leader == evsel)
>             continue;
>
>         if (cpu_maps_matched(evsel->leader, evsel))
>             continue;
>
>         pr_warning("WARNING: event cpu maps are not fully matched, "
>                "disable group\n");
>
>         for_each_group_evsel(pos, evsel->leader) {
>             pos->leader = pos;
>             pos->core.nr_members = 0;
>         }
>
>         /*
>          * For core & uncore mixed event group, for example,
>          * '{cycles,unc_cbo_cache_lookup.any_i}',
>          * In evlist:
>          * cycles,
>          * unc_cbo_cache_lookup.any_i,
>          * unc_cbo_cache_lookup.any_i,
>          * unc_cbo_cache_lookup.any_i,
>          * unc_cbo_cache_lookup.any_i,
>          *
>          * cycles is leader and all unc_cbo_cache_lookup.any_i
>          * point to this leader. But for_each_group_evsel can't
>          * iterate all members from cycles. It only iterates
>          * cycles and one unc_cbo_cache_lookup.any_i. So we
>          * set extra evsel here.
>          */
>         evsel->leader = evsel;
>         evsel->core.nr_members = 0;
>     }
> }
>
> Thanks
> Jin Yao

Issue is found!

It looks we can't set "pos->leader = pos" in either for_each_group_member() or in
for_each_group_evsel() because it may exit the iteration immediately.

evlist__for_each_entry(evlist, evsel) {
if (evsel->leader == evsel)
continue;

if (cpu_maps_matched(evsel->leader, evsel))
continue;

pr_warning("WARNING: event cpu maps are not fully matched, "
"disable group\n");

for_each_group_member(pos, evsel->leader) {
pos->leader = pos;
pos->core.nr_members = 0;
}

Let me use the example of '{cycles,unc_cbo_cache_lookup.any_i}' again.

In evlist:
cycles,
unc_cbo_cache_lookup.any_i,
unc_cbo_cache_lookup.any_i,
unc_cbo_cache_lookup.any_i,
unc_cbo_cache_lookup.any_i,

When we reach the for_each_group_member at first time, evsel is the first unc_cbo_cache_lookup.any_i
and evsel->leader is cycles. pos is same as the evsel (the first unc_cbo_cache_lookup.any_i).

Once we execute "pos->leader = pos;", it's actually "evsel->leader = evsel". So now evsel->leader is
changed to the first unc_cbo_cache_lookup.any_i.

In next iteration, pos is the second unc_cbo_cache_lookup.any_i. pos->leader is cycles but
unfortunately evsel->leader has been changed to the first unc_cbo_cache_lookup.any_i. So iteration
stops immediately.

I'm now thinking if we can solve this issue by an easy way.

Thanks
Jin Yao

2020-05-27 10:28:45

by Jin Yao

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] perf evlist: Ensure grouped events with same cpu map

Hi Jiri,

On 5/27/2020 2:31 PM, Jin, Yao wrote:
> Hi Jiri,
>
> On 5/27/2020 11:20 AM, Jin, Yao wrote:
>> Hi Jiri,
>>
>> On 5/26/2020 7:51 PM, Jiri Olsa wrote:
>>> On Mon, May 25, 2020 at 02:55:58PM +0800, Jin Yao wrote:
>>>
>>> SNIP
>>>
>>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
>>>> index 2a9de6491700..1161cffc0688 100644
>>>> --- a/tools/perf/util/evlist.c
>>>> +++ b/tools/perf/util/evlist.c
>>>> @@ -1704,3 +1704,52 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
>>>>       }
>>>>       return leader;
>>>>   }
>>>> +
>>>> +static bool cpus_map_matched(struct evsel *prev, struct evsel *evsel)
>>>> +{
>>>> +    if (evsel->core.cpus->nr != prev->core.cpus->nr)
>>>> +        return false;
>>>> +
>>>> +    for (int i = 0; i < evsel->core.cpus->nr; i++) {
>>>> +        if (evsel->core.cpus->map[i] != prev->core.cpus->map[i])
>>>> +            return false;
>>>> +    }
>>>> +
>>>> +    return true;
>>>> +}
>>>> +
>>>> +bool evlist__cpus_map_matched(struct evlist *evlist)
>>>> +{
>>>> +    struct evsel *prev = evlist__first(evlist), *evsel = prev;
>>>> +    int nr_members = prev->core.nr_members;
>>>> +
>>>> +    evlist__for_each_entry_continue(evlist, evsel) {
>>>> +        if (nr_members <= 1) {
>>>> +            prev = evsel;
>>>> +            nr_members = evsel->core.nr_members;
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        nr_members--;
>>>> +
>>>> +        if (!cpus_map_matched(prev, evsel))
>>>> +            return false;
>>>> +
>>>> +        prev = evsel;
>>>> +    }
>>>> +
>>>> +    return true;
>>>> +}
>>>> +
>>>> +void evlist__force_disable_group(struct evlist *evlist)
>>>> +{
>>>> +    struct evsel *evsel;
>>>> +
>>>> +    pr_warning("WARNING: event cpu maps are not fully matched, "
>>>> +           "stop event grouping\n");
>>>> +
>>>> +    evlist__for_each_entry(evlist, evsel) {
>>>> +        evsel->leader = evsel;
>>>> +        evsel->core.nr_members = 0;
>>>> +    }
>>>> +}
>>>
>>> I think this is too much, we need to disable only groups with not
>>> matching cpus, not all of them, how about something like this
>>>
>>
>> Yes, that's too much.
>>
>>>
>>>          struct evsel *pos;
>>>
>>>          evlist__for_each_entry(evlist, evsel) {
>>>                  if (evsel->leader == evsel)
>>>                          continue;
>>>                  if (!cpus_map_matched(evsel->leader, evsel))
>>>                          continue;
>>>                  pr_warn("Disabling group...
>>>
>>>                  for_each_group_member(pos, evsel->leader) {
>>>                          pos->leader = pos;
>>>                          evsel->core.nr_members = 0;
>>>                  }
>>>          }
>>>
>>> jirka
>>>
>>
>> Hmm, change "!cpus_map_matched()" to "cpus_map_matched()"? and use for_each_group_evsel() to
>> replace for_each_group_member()?
>>
>> How about something like following?
>>
>> void evlist__check_cpu_maps(struct evlist *evlist)
>> {
>>      struct evsel *evsel, *pos;
>>
>>      evlist__for_each_entry(evlist, evsel) {
>>          if (evsel->leader == evsel)
>>              continue;
>>
>>          if (cpu_maps_matched(evsel->leader, evsel))
>>              continue;
>>
>>          pr_warning("WARNING: event cpu maps are not fully matched, "
>>                 "disable group\n");
>>
>>          for_each_group_evsel(pos, evsel->leader) {
>>              pos->leader = pos;
>>              pos->core.nr_members = 0;
>>          }
>>
>>          /*
>>           * For core & uncore mixed event group, for example,
>>           * '{cycles,unc_cbo_cache_lookup.any_i}',
>>           * In evlist:
>>           * cycles,
>>           * unc_cbo_cache_lookup.any_i,
>>           * unc_cbo_cache_lookup.any_i,
>>           * unc_cbo_cache_lookup.any_i,
>>           * unc_cbo_cache_lookup.any_i,
>>           *
>>           * cycles is leader and all unc_cbo_cache_lookup.any_i
>>           * point to this leader. But for_each_group_evsel can't
>>           * iterate all members from cycles. It only iterates
>>           * cycles and one unc_cbo_cache_lookup.any_i. So we
>>           * set extra evsel here.
>>           */
>>          evsel->leader = evsel;
>>          evsel->core.nr_members = 0;
>>      }
>> }
>>
>> Thanks
>> Jin Yao
>
> Issue is found!
>
> It looks we can't set "pos->leader = pos" in either for_each_group_member() or in
> for_each_group_evsel() because it may exit the iteration immediately.
>
>     evlist__for_each_entry(evlist, evsel) {
>         if (evsel->leader == evsel)
>             continue;
>
>         if (cpu_maps_matched(evsel->leader, evsel))
>             continue;
>
>         pr_warning("WARNING: event cpu maps are not fully matched, "
>                "disable group\n");
>
>         for_each_group_member(pos, evsel->leader) {
>             pos->leader = pos;
>             pos->core.nr_members = 0;
>         }
>
> Let me use the example of '{cycles,unc_cbo_cache_lookup.any_i}' again.
>
> In evlist:
> cycles,
> unc_cbo_cache_lookup.any_i,
> unc_cbo_cache_lookup.any_i,
> unc_cbo_cache_lookup.any_i,
> unc_cbo_cache_lookup.any_i,
>
> When we reach the for_each_group_member at first time, evsel is the first unc_cbo_cache_lookup.any_i
> and evsel->leader is cycles. pos is same as the evsel (the first unc_cbo_cache_lookup.any_i).
>
> Once we execute "pos->leader = pos;", it's actually "evsel->leader = evsel". So now evsel->leader is
> changed to the first unc_cbo_cache_lookup.any_i.
>
> In next iteration, pos is the second unc_cbo_cache_lookup.any_i. pos->leader is cycles but
> unfortunately evsel->leader has been changed to the first unc_cbo_cache_lookup.any_i. So iteration
> stops immediately.
>
> I'm now thinking if we can solve this issue by an easy way.
>
> Thanks
> Jin Yao

How about this fix?

void evlist__check_cpu_maps(struct evlist *evlist)
{
struct evsel *evsel, *pos, *tmp;

evlist__for_each_entry(evlist, evsel) {
if (evsel->leader == evsel)
continue;

if (cpu_maps_matched(evsel->leader, evsel))
continue;

pr_warning("WARNING: event cpu maps are not fully matched, "
"disable group\n");

for_each_group_member(pos, evsel->leader) {
if (pos != evsel) {
pos->leader = pos;
pos->core.nr_members = 0;
}
}

tmp = evsel->leader;
tmp->leader = tmp;
tmp->core.nr_members = 0;

evsel->leader = evsel;
evsel->core.nr_members = 0;
}
}

Thanks
Jin Yao

2020-05-27 15:36:50

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] perf evlist: Ensure grouped events with same cpu map

On Wed, May 27, 2020 at 02:31:03PM +0800, Jin, Yao wrote:

SNIP

> > Thanks
> > Jin Yao
>
> Issue is found!
>
> It looks we can't set "pos->leader = pos" in either for_each_group_member()
> or in for_each_group_evsel() because it may exit the iteration immediately.
>
> evlist__for_each_entry(evlist, evsel) {
> if (evsel->leader == evsel)
> continue;
>
> if (cpu_maps_matched(evsel->leader, evsel))
> continue;
>
> pr_warning("WARNING: event cpu maps are not fully matched, "
> "disable group\n");
>
> for_each_group_member(pos, evsel->leader) {
> pos->leader = pos;
> pos->core.nr_members = 0;
> }
>
> Let me use the example of '{cycles,unc_cbo_cache_lookup.any_i}' again.
>
> In evlist:
> cycles,
> unc_cbo_cache_lookup.any_i,
> unc_cbo_cache_lookup.any_i,
> unc_cbo_cache_lookup.any_i,
> unc_cbo_cache_lookup.any_i,
>
> When we reach the for_each_group_member at first time, evsel is the first
> unc_cbo_cache_lookup.any_i and evsel->leader is cycles. pos is same as the
> evsel (the first unc_cbo_cache_lookup.any_i).
>
> Once we execute "pos->leader = pos;", it's actually "evsel->leader = evsel".
> So now evsel->leader is changed to the first unc_cbo_cache_lookup.any_i.
>
> In next iteration, pos is the second unc_cbo_cache_lookup.any_i. pos->leader
> is cycles but unfortunately evsel->leader has been changed to the first
> unc_cbo_cache_lookup.any_i. So iteration stops immediately.

hum, AFAICS the iteration will not break but continue to next evsel and
pass the 'continue' for another group member.. what do I miss?

jirka

>
> I'm now thinking if we can solve this issue by an easy way.
>
> Thanks
> Jin Yao
>

2020-05-27 15:54:55

by Jin Yao

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] perf evlist: Ensure grouped events with same cpu map

Hi Jiri,

On 5/27/2020 6:28 PM, Jiri Olsa wrote:
> On Wed, May 27, 2020 at 02:31:03PM +0800, Jin, Yao wrote:
>
> SNIP
>
>>> Thanks
>>> Jin Yao
>>
>> Issue is found!
>>
>> It looks we can't set "pos->leader = pos" in either for_each_group_member()
>> or in for_each_group_evsel() because it may exit the iteration immediately.
>>
>> evlist__for_each_entry(evlist, evsel) {
>> if (evsel->leader == evsel)
>> continue;
>>
>> if (cpu_maps_matched(evsel->leader, evsel))
>> continue;
>>
>> pr_warning("WARNING: event cpu maps are not fully matched, "
>> "disable group\n");
>>
>> for_each_group_member(pos, evsel->leader) {
>> pos->leader = pos;
>> pos->core.nr_members = 0;
>> }
>>
>> Let me use the example of '{cycles,unc_cbo_cache_lookup.any_i}' again.
>>
>> In evlist:
>> cycles,
>> unc_cbo_cache_lookup.any_i,
>> unc_cbo_cache_lookup.any_i,
>> unc_cbo_cache_lookup.any_i,
>> unc_cbo_cache_lookup.any_i,
>>
>> When we reach the for_each_group_member at first time, evsel is the first
>> unc_cbo_cache_lookup.any_i and evsel->leader is cycles. pos is same as the
>> evsel (the first unc_cbo_cache_lookup.any_i).
>>
>> Once we execute "pos->leader = pos;", it's actually "evsel->leader = evsel".
>> So now evsel->leader is changed to the first unc_cbo_cache_lookup.any_i.
>>
>> In next iteration, pos is the second unc_cbo_cache_lookup.any_i. pos->leader
>> is cycles but unfortunately evsel->leader has been changed to the first
>> unc_cbo_cache_lookup.any_i. So iteration stops immediately.
>
> hum, AFAICS the iteration will not break but continue to next evsel and
> pass the 'continue' for another group member.. what do I miss?
>
> jirka
>

Let me use this example again.

cycles,
unc_cbo_cache_lookup.any_i,
unc_cbo_cache_lookup.any_i,
unc_cbo_cache_lookup.any_i,
unc_cbo_cache_lookup.any_i,

Yes, once for_each_group_member breaks (due to the issue in 'pos->leader = pos'),
evlist__for_each_entry will continue to the second unc_cbo_cache_lookup.any_i. But now evsel->leader
!= evsel (evsel->leader is "cycles"), so it will go to cpu_maps_matched.

But actually we don't need to go to cpu_maps_matched again.

for_each_group_member(pos, evsel->leader) {
pos->leader = pos;
pos->core.nr_members = 0;
}

If we solve the issue in above code, for_each_group_member doesn't break, the leaders of all members
in this group will be set to themselves.

if (evsel->leader == evsel)
continue;

So the iteration will continue to the next evsel.

Thanks
Jin Yao

2020-05-27 19:11:39

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] perf evlist: Ensure grouped events with same cpu map

On Wed, May 27, 2020 at 09:49:11PM +0800, Jin, Yao wrote:
> Hi Jiri,
>
> On 5/27/2020 6:28 PM, Jiri Olsa wrote:
> > On Wed, May 27, 2020 at 02:31:03PM +0800, Jin, Yao wrote:
> >
> > SNIP
> >
> > > > Thanks
> > > > Jin Yao
> > >
> > > Issue is found!
> > >
> > > It looks we can't set "pos->leader = pos" in either for_each_group_member()
> > > or in for_each_group_evsel() because it may exit the iteration immediately.
> > >
> > > evlist__for_each_entry(evlist, evsel) {
> > > if (evsel->leader == evsel)
> > > continue;
> > >
> > > if (cpu_maps_matched(evsel->leader, evsel))
> > > continue;
> > >
> > > pr_warning("WARNING: event cpu maps are not fully matched, "
> > > "disable group\n");
> > >
> > > for_each_group_member(pos, evsel->leader) {
> > > pos->leader = pos;
> > > pos->core.nr_members = 0;
> > > }
> > >
> > > Let me use the example of '{cycles,unc_cbo_cache_lookup.any_i}' again.
> > >
> > > In evlist:
> > > cycles,
> > > unc_cbo_cache_lookup.any_i,
> > > unc_cbo_cache_lookup.any_i,
> > > unc_cbo_cache_lookup.any_i,
> > > unc_cbo_cache_lookup.any_i,
> > >
> > > When we reach the for_each_group_member at first time, evsel is the first
> > > unc_cbo_cache_lookup.any_i and evsel->leader is cycles. pos is same as the
> > > evsel (the first unc_cbo_cache_lookup.any_i).
> > >
> > > Once we execute "pos->leader = pos;", it's actually "evsel->leader = evsel".
> > > So now evsel->leader is changed to the first unc_cbo_cache_lookup.any_i.
> > >
> > > In next iteration, pos is the second unc_cbo_cache_lookup.any_i. pos->leader
> > > is cycles but unfortunately evsel->leader has been changed to the first
> > > unc_cbo_cache_lookup.any_i. So iteration stops immediately.
> >
> > hum, AFAICS the iteration will not break but continue to next evsel and
> > pass the 'continue' for another group member.. what do I miss?
> >
> > jirka
> >
>
> Let me use this example again.
>
> cycles,
> unc_cbo_cache_lookup.any_i,
> unc_cbo_cache_lookup.any_i,
> unc_cbo_cache_lookup.any_i,
> unc_cbo_cache_lookup.any_i,
>
> Yes, once for_each_group_member breaks (due to the issue in 'pos->leader =
> pos'), evlist__for_each_entry will continue to the second
> unc_cbo_cache_lookup.any_i. But now evsel->leader != evsel (evsel->leader is
> "cycles"), so it will go to cpu_maps_matched.
>
> But actually we don't need to go to cpu_maps_matched again.
>
> for_each_group_member(pos, evsel->leader) {
> pos->leader = pos;
> pos->core.nr_members = 0;
> }
>
> If we solve the issue in above code, for_each_group_member doesn't break,
> the leaders of all members in this group will be set to themselves.
>
> if (evsel->leader == evsel)
> continue;

I see.. the problem is in the for_each_group_member, how about
saving the leader into separate variable, like below

jirka


---
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f789103d8306..a754cad3f5a0 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -189,6 +189,51 @@ static struct perf_stat_config stat_config = {
.big_num = true,
};

+static bool cpus_map_matched(struct evsel *a, struct evsel *b)
+{
+ if (!a->core.cpus && !b->core.cpus)
+ return true;
+
+ if (!a->core.cpus || !b->core.cpus)
+ return false;
+
+ if (a->core.cpus->nr != b->core.cpus->nr)
+ return false;
+
+ for (int i = 0; i < a->core.cpus->nr; i++) {
+ if (a->core.cpus->map[i] != b->core.cpus->map[i])
+ return false;
+ }
+
+ return true;
+}
+
+
+static void evlist__check_cpu_maps(struct evlist *evlist)
+{
+ struct evsel *evsel, *pos, *leader;
+
+ evlist__for_each_entry(evlist, evsel) {
+ char buf[1024];
+
+ leader = evsel->leader;
+ if (leader == evsel)
+ continue;
+ if (cpus_map_matched(leader, evsel))
+ continue;
+
+ evsel__group_desc(leader, buf, sizeof(buf));
+ WARN_ONCE(1, "WARNING: event cpu maps do not match, disabling group:\n");
+ pr_warning(" %s\n", buf);
+
+ for_each_group_evsel(pos, leader) {
+ pos->leader = pos;
+ pos->core.nr_members = 0;
+ }
+ evsel->leader->core.nr_members = 0;
+ }
+}
+
static inline void diff_timespec(struct timespec *r, struct timespec *a,
struct timespec *b)
{
@@ -1956,6 +2001,8 @@ int cmd_stat(int argc, const char **argv)
} else if (argc && !strncmp(argv[0], "rep", 3))
return __cmd_report(argc, argv);

+ evlist__check_cpu_maps(evsel_list);
+
interval = stat_config.interval;
timeout = stat_config.timeout;


2020-05-28 01:50:34

by Jin Yao

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] perf evlist: Ensure grouped events with same cpu map

Hi Jiri,

On 5/28/2020 12:28 AM, Jiri Olsa wrote:
> On Wed, May 27, 2020 at 09:49:11PM +0800, Jin, Yao wrote:
>> Hi Jiri,
>>
>> On 5/27/2020 6:28 PM, Jiri Olsa wrote:
>>> On Wed, May 27, 2020 at 02:31:03PM +0800, Jin, Yao wrote:
>>>
>>> SNIP
>>>
>>>>> Thanks
>>>>> Jin Yao
>>>>
>>>> Issue is found!
>>>>
>>>> It looks we can't set "pos->leader = pos" in either for_each_group_member()
>>>> or in for_each_group_evsel() because it may exit the iteration immediately.
>>>>
>>>> evlist__for_each_entry(evlist, evsel) {
>>>> if (evsel->leader == evsel)
>>>> continue;
>>>>
>>>> if (cpu_maps_matched(evsel->leader, evsel))
>>>> continue;
>>>>
>>>> pr_warning("WARNING: event cpu maps are not fully matched, "
>>>> "disable group\n");
>>>>
>>>> for_each_group_member(pos, evsel->leader) {
>>>> pos->leader = pos;
>>>> pos->core.nr_members = 0;
>>>> }
>>>>
>>>> Let me use the example of '{cycles,unc_cbo_cache_lookup.any_i}' again.
>>>>
>>>> In evlist:
>>>> cycles,
>>>> unc_cbo_cache_lookup.any_i,
>>>> unc_cbo_cache_lookup.any_i,
>>>> unc_cbo_cache_lookup.any_i,
>>>> unc_cbo_cache_lookup.any_i,
>>>>
>>>> When we reach the for_each_group_member at first time, evsel is the first
>>>> unc_cbo_cache_lookup.any_i and evsel->leader is cycles. pos is same as the
>>>> evsel (the first unc_cbo_cache_lookup.any_i).
>>>>
>>>> Once we execute "pos->leader = pos;", it's actually "evsel->leader = evsel".
>>>> So now evsel->leader is changed to the first unc_cbo_cache_lookup.any_i.
>>>>
>>>> In next iteration, pos is the second unc_cbo_cache_lookup.any_i. pos->leader
>>>> is cycles but unfortunately evsel->leader has been changed to the first
>>>> unc_cbo_cache_lookup.any_i. So iteration stops immediately.
>>>
>>> hum, AFAICS the iteration will not break but continue to next evsel and
>>> pass the 'continue' for another group member.. what do I miss?
>>>
>>> jirka
>>>
>>
>> Let me use this example again.
>>
>> cycles,
>> unc_cbo_cache_lookup.any_i,
>> unc_cbo_cache_lookup.any_i,
>> unc_cbo_cache_lookup.any_i,
>> unc_cbo_cache_lookup.any_i,
>>
>> Yes, once for_each_group_member breaks (due to the issue in 'pos->leader =
>> pos'), evlist__for_each_entry will continue to the second
>> unc_cbo_cache_lookup.any_i. But now evsel->leader != evsel (evsel->leader is
>> "cycles"), so it will go to cpu_maps_matched.
>>
>> But actually we don't need to go to cpu_maps_matched again.
>>
>> for_each_group_member(pos, evsel->leader) {
>> pos->leader = pos;
>> pos->core.nr_members = 0;
>> }
>>
>> If we solve the issue in above code, for_each_group_member doesn't break,
>> the leaders of all members in this group will be set to themselves.
>>
>> if (evsel->leader == evsel)
>> continue;
>
> I see.. the problem is in the for_each_group_member, how about
> saving the leader into separate variable, like below
>
> jirka
>
>
> ---
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index f789103d8306..a754cad3f5a0 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -189,6 +189,51 @@ static struct perf_stat_config stat_config = {
> .big_num = true,
> };
>
> +static bool cpus_map_matched(struct evsel *a, struct evsel *b)
> +{
> + if (!a->core.cpus && !b->core.cpus)
> + return true;
> +
> + if (!a->core.cpus || !b->core.cpus)
> + return false;
> +
> + if (a->core.cpus->nr != b->core.cpus->nr)
> + return false;
> +
> + for (int i = 0; i < a->core.cpus->nr; i++) {
> + if (a->core.cpus->map[i] != b->core.cpus->map[i])
> + return false;
> + }
> +
> + return true;
> +}
> +
> +
> +static void evlist__check_cpu_maps(struct evlist *evlist)
> +{
> + struct evsel *evsel, *pos, *leader;
> +
> + evlist__for_each_entry(evlist, evsel) {
> + char buf[1024];
> +
> + leader = evsel->leader;
> + if (leader == evsel)
> + continue;
> + if (cpus_map_matched(leader, evsel))
> + continue;
> +
> + evsel__group_desc(leader, buf, sizeof(buf));
> + WARN_ONCE(1, "WARNING: event cpu maps do not match, disabling group:\n");
> + pr_warning(" %s\n", buf);
> +
> + for_each_group_evsel(pos, leader) {
> + pos->leader = pos;
> + pos->core.nr_members = 0;
> + }
> + evsel->leader->core.nr_members = 0;
> + }
> +}
> +
> static inline void diff_timespec(struct timespec *r, struct timespec *a,
> struct timespec *b)
> {
> @@ -1956,6 +2001,8 @@ int cmd_stat(int argc, const char **argv)
> } else if (argc && !strncmp(argv[0], "rep", 3))
> return __cmd_report(argc, argv);
>
> + evlist__check_cpu_maps(evsel_list);
> +
> interval = stat_config.interval;
> timeout = stat_config.timeout;
>
>

This patch looks good. I guess you will post this patch, right? Thanks so much! :)

Thanks
Jin Yao