From: Kan Liang <[email protected]>
The annotation for hardware events is wrong on hybrid. For example,
# ./perf stat -a sleep 1
Performance counter stats for 'system wide':
32,148.85 msec cpu-clock # 32.000 CPUs utilized
374 context-switches # 11.633 /sec
33 cpu-migrations # 1.026 /sec
295 page-faults # 9.176 /sec
18,979,960 cpu_core/cycles/ # 590.378 K/sec
261,230,783 cpu_atom/cycles/ # 8.126 M/sec (54.21%)
17,019,732 cpu_core/instructions/ # 529.404 K/sec
38,020,470 cpu_atom/instructions/ # 1.183 M/sec (63.36%)
3,296,743 cpu_core/branches/ # 102.546 K/sec
6,692,338 cpu_atom/branches/ # 208.167 K/sec (63.40%)
96,421 cpu_core/branch-misses/ # 2.999 K/sec
1,016,336 cpu_atom/branch-misses/ # 31.613 K/sec (63.38%)
The hardware events have extended type on hybrid, but the evsel__match()
doesn't take it into account.
Add a mask to filter the extended type on hybrid when checking the config.
With the patch,
# ./perf stat -a sleep 1
Performance counter stats for 'system wide':
32,139.90 msec cpu-clock # 32.003 CPUs utilized
343 context-switches # 10.672 /sec
32 cpu-migrations # 0.996 /sec
73 page-faults # 2.271 /sec
13,712,841 cpu_core/cycles/ # 0.000 GHz
258,301,691 cpu_atom/cycles/ # 0.008 GHz (54.20%)
12,428,163 cpu_core/instructions/ # 0.91 insn per cycle
37,786,557 cpu_atom/instructions/ # 2.76 insn per cycle (63.35%)
2,418,826 cpu_core/branches/ # 75.259 K/sec
6,965,962 cpu_atom/branches/ # 216.739 K/sec (63.38%)
72,150 cpu_core/branch-misses/ # 2.98% of all branches
1,032,746 cpu_atom/branch-misses/ # 42.70% of all branches (63.35%)
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/util/evsel.h | 12 ++++++-----
tools/perf/util/stat-shadow.c | 39 +++++++++++++++++++----------------
2 files changed, 28 insertions(+), 23 deletions(-)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b365b449c6ea..36a32e4ca168 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -350,9 +350,11 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
-#define evsel__match(evsel, t, c) \
+#define EVSEL_EVENT_MASK (~0ULL)
+
+#define evsel__match(evsel, t, c, m) \
(evsel->core.attr.type == PERF_TYPE_##t && \
- evsel->core.attr.config == PERF_COUNT_##c)
+ (evsel->core.attr.config & m) == PERF_COUNT_##c)
static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
{
@@ -438,13 +440,13 @@ bool evsel__is_function_event(struct evsel *evsel);
static inline bool evsel__is_bpf_output(struct evsel *evsel)
{
- return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
+ return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT, EVSEL_EVENT_MASK);
}
static inline bool evsel__is_clock(const struct evsel *evsel)
{
- return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
- evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
+ return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK, EVSEL_EVENT_MASK) ||
+ evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK, EVSEL_EVENT_MASK);
}
bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 1566a206ba42..074f38b57e2d 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -6,6 +6,7 @@
#include "color.h"
#include "debug.h"
#include "pmu.h"
+#include "pmus.h"
#include "rblist.h"
#include "evlist.h"
#include "expr.h"
@@ -78,6 +79,8 @@ void perf_stat__reset_shadow_stats(void)
static enum stat_type evsel__stat_type(const struct evsel *evsel)
{
+ u64 mask = perf_pmus__supports_extended_type() ? PERF_HW_EVENT_MASK : EVSEL_EVENT_MASK;
+
/* Fake perf_hw_cache_op_id values for use with evsel__match. */
u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
@@ -97,41 +100,41 @@ static enum stat_type evsel__stat_type(const struct evsel *evsel)
if (evsel__is_clock(evsel))
return STAT_NSECS;
- else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
+ else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES, mask))
return STAT_CYCLES;
- else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS))
+ else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS, mask))
return STAT_INSTRUCTIONS;
- else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+ else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND, mask))
return STAT_STALLED_CYCLES_FRONT;
- else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+ else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND, mask))
return STAT_STALLED_CYCLES_BACK;
- else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+ else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS, mask))
return STAT_BRANCHES;
- else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES))
+ else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES, mask))
return STAT_BRANCH_MISS;
- else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES))
+ else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES, mask))
return STAT_CACHE_REFS;
- else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES))
+ else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES, mask))
return STAT_CACHE_MISSES;
- else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D))
+ else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D, mask))
return STAT_L1_DCACHE;
- else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I))
+ else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I, mask))
return STAT_L1_ICACHE;
- else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL))
+ else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL, mask))
return STAT_LL_CACHE;
- else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB))
+ else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB, mask))
return STAT_DTLB_CACHE;
- else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB))
+ else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB, mask))
return STAT_ITLB_CACHE;
- else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss))
+ else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss, mask))
return STAT_L1D_MISS;
- else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss))
+ else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss, mask))
return STAT_L1I_MISS;
- else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss))
+ else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss, mask))
return STAT_LL_MISS;
- else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss))
+ else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss, mask))
return STAT_DTLB_MISS;
- else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss))
+ else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss, mask))
return STAT_ITLB_MISS;
return STAT_NONE;
}
--
2.35.1
On Wed, Jun 7, 2023 at 9:27 AM <[email protected]> wrote:
>
> From: Kan Liang <[email protected]>
>
> The annotation for hardware events is wrong on hybrid. For example,
>
> # ./perf stat -a sleep 1
>
> Performance counter stats for 'system wide':
>
> 32,148.85 msec cpu-clock # 32.000 CPUs utilized
> 374 context-switches # 11.633 /sec
> 33 cpu-migrations # 1.026 /sec
> 295 page-faults # 9.176 /sec
> 18,979,960 cpu_core/cycles/ # 590.378 K/sec
> 261,230,783 cpu_atom/cycles/ # 8.126 M/sec (54.21%)
> 17,019,732 cpu_core/instructions/ # 529.404 K/sec
> 38,020,470 cpu_atom/instructions/ # 1.183 M/sec (63.36%)
> 3,296,743 cpu_core/branches/ # 102.546 K/sec
> 6,692,338 cpu_atom/branches/ # 208.167 K/sec (63.40%)
> 96,421 cpu_core/branch-misses/ # 2.999 K/sec
> 1,016,336 cpu_atom/branch-misses/ # 31.613 K/sec (63.38%)
>
> The hardware events have extended type on hybrid, but the evsel__match()
> doesn't take it into account.
>
> Add a mask to filter the extended type on hybrid when checking the config.
>
> With the patch,
>
> # ./perf stat -a sleep 1
>
> Performance counter stats for 'system wide':
>
> 32,139.90 msec cpu-clock # 32.003 CPUs utilized
> 343 context-switches # 10.672 /sec
> 32 cpu-migrations # 0.996 /sec
> 73 page-faults # 2.271 /sec
> 13,712,841 cpu_core/cycles/ # 0.000 GHz
> 258,301,691 cpu_atom/cycles/ # 0.008 GHz (54.20%)
> 12,428,163 cpu_core/instructions/ # 0.91 insn per cycle
> 37,786,557 cpu_atom/instructions/ # 2.76 insn per cycle (63.35%)
> 2,418,826 cpu_core/branches/ # 75.259 K/sec
> 6,965,962 cpu_atom/branches/ # 216.739 K/sec (63.38%)
> 72,150 cpu_core/branch-misses/ # 2.98% of all branches
> 1,032,746 cpu_atom/branch-misses/ # 42.70% of all branches (63.35%)
>
> Signed-off-by: Kan Liang <[email protected]>
> ---
> tools/perf/util/evsel.h | 12 ++++++-----
> tools/perf/util/stat-shadow.c | 39 +++++++++++++++++++----------------
> 2 files changed, 28 insertions(+), 23 deletions(-)
>
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index b365b449c6ea..36a32e4ca168 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -350,9 +350,11 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
>
> struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
>
> -#define evsel__match(evsel, t, c) \
> +#define EVSEL_EVENT_MASK (~0ULL)
> +
> +#define evsel__match(evsel, t, c, m) \
> (evsel->core.attr.type == PERF_TYPE_##t && \
> - evsel->core.attr.config == PERF_COUNT_##c)
> + (evsel->core.attr.config & m) == PERF_COUNT_##c)
The EVSEL_EVENT_MASK here isn't very intention revealing, perhaps we
can remove it and do something like:
static inline bool __evsel__match(const struct evsel *evsel, u32 type,
u64 config)
{
if ((type == PERF_TYPE_HARDWARE || type ==PERF_TYPE_HW_CACHE) &&
perf_pmus__supports_extended_type())
return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
return evsel->core.attr.config == config;
}
#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t,
PERF_COUNT_##c)
Thanks,
Ian
>
> static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
> {
> @@ -438,13 +440,13 @@ bool evsel__is_function_event(struct evsel *evsel);
>
> static inline bool evsel__is_bpf_output(struct evsel *evsel)
> {
> - return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
> + return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT, EVSEL_EVENT_MASK);
> }
>
> static inline bool evsel__is_clock(const struct evsel *evsel)
> {
> - return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
> - evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
> + return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK, EVSEL_EVENT_MASK) ||
> + evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK, EVSEL_EVENT_MASK);
> }
>
> bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> index 1566a206ba42..074f38b57e2d 100644
> --- a/tools/perf/util/stat-shadow.c
> +++ b/tools/perf/util/stat-shadow.c
> @@ -6,6 +6,7 @@
> #include "color.h"
> #include "debug.h"
> #include "pmu.h"
> +#include "pmus.h"
> #include "rblist.h"
> #include "evlist.h"
> #include "expr.h"
> @@ -78,6 +79,8 @@ void perf_stat__reset_shadow_stats(void)
>
> static enum stat_type evsel__stat_type(const struct evsel *evsel)
> {
> + u64 mask = perf_pmus__supports_extended_type() ? PERF_HW_EVENT_MASK : EVSEL_EVENT_MASK;
> +
> /* Fake perf_hw_cache_op_id values for use with evsel__match. */
> u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
> ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
> @@ -97,41 +100,41 @@ static enum stat_type evsel__stat_type(const struct evsel *evsel)
>
> if (evsel__is_clock(evsel))
> return STAT_NSECS;
> - else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
> + else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES, mask))
> return STAT_CYCLES;
> - else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS))
> + else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS, mask))
> return STAT_INSTRUCTIONS;
> - else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
> + else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND, mask))
> return STAT_STALLED_CYCLES_FRONT;
> - else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND))
> + else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND, mask))
> return STAT_STALLED_CYCLES_BACK;
> - else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS))
> + else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS, mask))
> return STAT_BRANCHES;
> - else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES))
> + else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES, mask))
> return STAT_BRANCH_MISS;
> - else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES))
> + else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES, mask))
> return STAT_CACHE_REFS;
> - else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES))
> + else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES, mask))
> return STAT_CACHE_MISSES;
> - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D))
> + else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D, mask))
> return STAT_L1_DCACHE;
> - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I))
> + else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I, mask))
> return STAT_L1_ICACHE;
> - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL))
> + else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL, mask))
> return STAT_LL_CACHE;
> - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB))
> + else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB, mask))
> return STAT_DTLB_CACHE;
> - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB))
> + else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB, mask))
> return STAT_ITLB_CACHE;
> - else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss))
> + else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss, mask))
> return STAT_L1D_MISS;
> - else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss))
> + else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss, mask))
> return STAT_L1I_MISS;
> - else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss))
> + else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss, mask))
> return STAT_LL_MISS;
> - else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss))
> + else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss, mask))
> return STAT_DTLB_MISS;
> - else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss))
> + else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss, mask))
> return STAT_ITLB_MISS;
> return STAT_NONE;
> }
> --
> 2.35.1
>
On 2023-06-13 3:35 p.m., Ian Rogers wrote:
> On Wed, Jun 7, 2023 at 9:27 AM <[email protected]> wrote:
>>
>> From: Kan Liang <[email protected]>
>>
>> The annotation for hardware events is wrong on hybrid. For example,
>>
>> # ./perf stat -a sleep 1
>>
>> Performance counter stats for 'system wide':
>>
>> 32,148.85 msec cpu-clock # 32.000 CPUs utilized
>> 374 context-switches # 11.633 /sec
>> 33 cpu-migrations # 1.026 /sec
>> 295 page-faults # 9.176 /sec
>> 18,979,960 cpu_core/cycles/ # 590.378 K/sec
>> 261,230,783 cpu_atom/cycles/ # 8.126 M/sec (54.21%)
>> 17,019,732 cpu_core/instructions/ # 529.404 K/sec
>> 38,020,470 cpu_atom/instructions/ # 1.183 M/sec (63.36%)
>> 3,296,743 cpu_core/branches/ # 102.546 K/sec
>> 6,692,338 cpu_atom/branches/ # 208.167 K/sec (63.40%)
>> 96,421 cpu_core/branch-misses/ # 2.999 K/sec
>> 1,016,336 cpu_atom/branch-misses/ # 31.613 K/sec (63.38%)
>>
>> The hardware events have extended type on hybrid, but the evsel__match()
>> doesn't take it into account.
>>
>> Add a mask to filter the extended type on hybrid when checking the config.
>>
>> With the patch,
>>
>> # ./perf stat -a sleep 1
>>
>> Performance counter stats for 'system wide':
>>
>> 32,139.90 msec cpu-clock # 32.003 CPUs utilized
>> 343 context-switches # 10.672 /sec
>> 32 cpu-migrations # 0.996 /sec
>> 73 page-faults # 2.271 /sec
>> 13,712,841 cpu_core/cycles/ # 0.000 GHz
>> 258,301,691 cpu_atom/cycles/ # 0.008 GHz (54.20%)
>> 12,428,163 cpu_core/instructions/ # 0.91 insn per cycle
>> 37,786,557 cpu_atom/instructions/ # 2.76 insn per cycle (63.35%)
>> 2,418,826 cpu_core/branches/ # 75.259 K/sec
>> 6,965,962 cpu_atom/branches/ # 216.739 K/sec (63.38%)
>> 72,150 cpu_core/branch-misses/ # 2.98% of all branches
>> 1,032,746 cpu_atom/branch-misses/ # 42.70% of all branches (63.35%)
>>
>> Signed-off-by: Kan Liang <[email protected]>
>> ---
>> tools/perf/util/evsel.h | 12 ++++++-----
>> tools/perf/util/stat-shadow.c | 39 +++++++++++++++++++----------------
>> 2 files changed, 28 insertions(+), 23 deletions(-)
>>
>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>> index b365b449c6ea..36a32e4ca168 100644
>> --- a/tools/perf/util/evsel.h
>> +++ b/tools/perf/util/evsel.h
>> @@ -350,9 +350,11 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
>>
>> struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
>>
>> -#define evsel__match(evsel, t, c) \
>> +#define EVSEL_EVENT_MASK (~0ULL)
>> +
>> +#define evsel__match(evsel, t, c, m) \
>> (evsel->core.attr.type == PERF_TYPE_##t && \
>> - evsel->core.attr.config == PERF_COUNT_##c)
>> + (evsel->core.attr.config & m) == PERF_COUNT_##c)
>
> The EVSEL_EVENT_MASK here isn't very intention revealing, perhaps we
> can remove it and do something like:
>
> static inline bool __evsel__match(const struct evsel *evsel, u32 type,
> u64 config)
> {
> if ((type == PERF_TYPE_HARDWARE || type ==PERF_TYPE_HW_CACHE) &&
> perf_pmus__supports_extended_type())
> return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
>
> return evsel->core.attr.config == config;
> }
> #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t,
> PERF_COUNT_##c)
Yes, the above code looks better. I will apply it in V2.
Thanks,
Kan
>
> Thanks,
> Ian
>
>>
>> static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
>> {
>> @@ -438,13 +440,13 @@ bool evsel__is_function_event(struct evsel *evsel);
>>
>> static inline bool evsel__is_bpf_output(struct evsel *evsel)
>> {
>> - return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
>> + return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT, EVSEL_EVENT_MASK);
>> }
>>
>> static inline bool evsel__is_clock(const struct evsel *evsel)
>> {
>> - return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
>> - evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
>> + return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK, EVSEL_EVENT_MASK) ||
>> + evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK, EVSEL_EVENT_MASK);
>> }
>>
>> bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
>> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
>> index 1566a206ba42..074f38b57e2d 100644
>> --- a/tools/perf/util/stat-shadow.c
>> +++ b/tools/perf/util/stat-shadow.c
>> @@ -6,6 +6,7 @@
>> #include "color.h"
>> #include "debug.h"
>> #include "pmu.h"
>> +#include "pmus.h"
>> #include "rblist.h"
>> #include "evlist.h"
>> #include "expr.h"
>> @@ -78,6 +79,8 @@ void perf_stat__reset_shadow_stats(void)
>>
>> static enum stat_type evsel__stat_type(const struct evsel *evsel)
>> {
>> + u64 mask = perf_pmus__supports_extended_type() ? PERF_HW_EVENT_MASK : EVSEL_EVENT_MASK;
>> +
>> /* Fake perf_hw_cache_op_id values for use with evsel__match. */
>> u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
>> ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
>> @@ -97,41 +100,41 @@ static enum stat_type evsel__stat_type(const struct evsel *evsel)
>>
>> if (evsel__is_clock(evsel))
>> return STAT_NSECS;
>> - else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
>> + else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES, mask))
>> return STAT_CYCLES;
>> - else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS))
>> + else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS, mask))
>> return STAT_INSTRUCTIONS;
>> - else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
>> + else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND, mask))
>> return STAT_STALLED_CYCLES_FRONT;
>> - else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND))
>> + else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND, mask))
>> return STAT_STALLED_CYCLES_BACK;
>> - else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS))
>> + else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS, mask))
>> return STAT_BRANCHES;
>> - else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES))
>> + else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES, mask))
>> return STAT_BRANCH_MISS;
>> - else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES))
>> + else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES, mask))
>> return STAT_CACHE_REFS;
>> - else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES))
>> + else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES, mask))
>> return STAT_CACHE_MISSES;
>> - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D))
>> + else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D, mask))
>> return STAT_L1_DCACHE;
>> - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I))
>> + else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I, mask))
>> return STAT_L1_ICACHE;
>> - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL))
>> + else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL, mask))
>> return STAT_LL_CACHE;
>> - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB))
>> + else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB, mask))
>> return STAT_DTLB_CACHE;
>> - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB))
>> + else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB, mask))
>> return STAT_ITLB_CACHE;
>> - else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss))
>> + else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss, mask))
>> return STAT_L1D_MISS;
>> - else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss))
>> + else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss, mask))
>> return STAT_L1I_MISS;
>> - else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss))
>> + else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss, mask))
>> return STAT_LL_MISS;
>> - else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss))
>> + else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss, mask))
>> return STAT_DTLB_MISS;
>> - else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss))
>> + else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss, mask))
>> return STAT_ITLB_MISS;
>> return STAT_NONE;
>> }
>> --
>> 2.35.1
>>
Em Tue, Jun 13, 2023 at 04:06:59PM -0400, Liang, Kan escreveu:
>
>
> On 2023-06-13 3:35 p.m., Ian Rogers wrote:
> > On Wed, Jun 7, 2023 at 9:27 AM <[email protected]> wrote:
> >>
> >> From: Kan Liang <[email protected]>
> >>
> >> The annotation for hardware events is wrong on hybrid. For example,
> >>
> >> # ./perf stat -a sleep 1
> >>
> >> Performance counter stats for 'system wide':
> >>
> >> 32,148.85 msec cpu-clock # 32.000 CPUs utilized
> >> 374 context-switches # 11.633 /sec
> >> 33 cpu-migrations # 1.026 /sec
> >> 295 page-faults # 9.176 /sec
> >> 18,979,960 cpu_core/cycles/ # 590.378 K/sec
> >> 261,230,783 cpu_atom/cycles/ # 8.126 M/sec (54.21%)
> >> 17,019,732 cpu_core/instructions/ # 529.404 K/sec
> >> 38,020,470 cpu_atom/instructions/ # 1.183 M/sec (63.36%)
> >> 3,296,743 cpu_core/branches/ # 102.546 K/sec
> >> 6,692,338 cpu_atom/branches/ # 208.167 K/sec (63.40%)
> >> 96,421 cpu_core/branch-misses/ # 2.999 K/sec
> >> 1,016,336 cpu_atom/branch-misses/ # 31.613 K/sec (63.38%)
> >>
> >> The hardware events have extended type on hybrid, but the evsel__match()
> >> doesn't take it into account.
> >>
> >> Add a mask to filter the extended type on hybrid when checking the config.
> >>
> >> With the patch,
> >>
> >> # ./perf stat -a sleep 1
> >>
> >> Performance counter stats for 'system wide':
> >>
> >> 32,139.90 msec cpu-clock # 32.003 CPUs utilized
> >> 343 context-switches # 10.672 /sec
> >> 32 cpu-migrations # 0.996 /sec
> >> 73 page-faults # 2.271 /sec
> >> 13,712,841 cpu_core/cycles/ # 0.000 GHz
> >> 258,301,691 cpu_atom/cycles/ # 0.008 GHz (54.20%)
> >> 12,428,163 cpu_core/instructions/ # 0.91 insn per cycle
> >> 37,786,557 cpu_atom/instructions/ # 2.76 insn per cycle (63.35%)
> >> 2,418,826 cpu_core/branches/ # 75.259 K/sec
> >> 6,965,962 cpu_atom/branches/ # 216.739 K/sec (63.38%)
> >> 72,150 cpu_core/branch-misses/ # 2.98% of all branches
> >> 1,032,746 cpu_atom/branch-misses/ # 42.70% of all branches (63.35%)
> >>
> >> Signed-off-by: Kan Liang <[email protected]>
> >> ---
> >> tools/perf/util/evsel.h | 12 ++++++-----
> >> tools/perf/util/stat-shadow.c | 39 +++++++++++++++++++----------------
> >> 2 files changed, 28 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> >> index b365b449c6ea..36a32e4ca168 100644
> >> --- a/tools/perf/util/evsel.h
> >> +++ b/tools/perf/util/evsel.h
> >> @@ -350,9 +350,11 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
> >>
> >> struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
> >>
> >> -#define evsel__match(evsel, t, c) \
> >> +#define EVSEL_EVENT_MASK (~0ULL)
> >> +
> >> +#define evsel__match(evsel, t, c, m) \
> >> (evsel->core.attr.type == PERF_TYPE_##t && \
> >> - evsel->core.attr.config == PERF_COUNT_##c)
> >> + (evsel->core.attr.config & m) == PERF_COUNT_##c)
> >
> > The EVSEL_EVENT_MASK here isn't very intention revealing, perhaps we
> > can remove it and do something like:
> >
> > static inline bool __evsel__match(const struct evsel *evsel, u32 type,
> > u64 config)
> > {
> > if ((type == PERF_TYPE_HARDWARE || type ==PERF_TYPE_HW_CACHE) &&
> > perf_pmus__supports_extended_type())
> > return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
> >
> > return evsel->core.attr.config == config;
> > }
> > #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t,
> > PERF_COUNT_##c)
>
> Yes, the above code looks better. I will apply it in V2.
Please base v2 on tmp.perf-tools-next, tests are running and that branch
will become perf-tools-next.
Some patches from your series were cherry-picked there.
- Arnaldo
On 2023-06-13 5:18 p.m., Arnaldo Carvalho de Melo wrote:
> Em Tue, Jun 13, 2023 at 04:06:59PM -0400, Liang, Kan escreveu:
>>
>>
>> On 2023-06-13 3:35 p.m., Ian Rogers wrote:
>>> On Wed, Jun 7, 2023 at 9:27 AM <[email protected]> wrote:
>>>>
>>>> From: Kan Liang <[email protected]>
>>>>
>>>> The annotation for hardware events is wrong on hybrid. For example,
>>>>
>>>> # ./perf stat -a sleep 1
>>>>
>>>> Performance counter stats for 'system wide':
>>>>
>>>> 32,148.85 msec cpu-clock # 32.000 CPUs utilized
>>>> 374 context-switches # 11.633 /sec
>>>> 33 cpu-migrations # 1.026 /sec
>>>> 295 page-faults # 9.176 /sec
>>>> 18,979,960 cpu_core/cycles/ # 590.378 K/sec
>>>> 261,230,783 cpu_atom/cycles/ # 8.126 M/sec (54.21%)
>>>> 17,019,732 cpu_core/instructions/ # 529.404 K/sec
>>>> 38,020,470 cpu_atom/instructions/ # 1.183 M/sec (63.36%)
>>>> 3,296,743 cpu_core/branches/ # 102.546 K/sec
>>>> 6,692,338 cpu_atom/branches/ # 208.167 K/sec (63.40%)
>>>> 96,421 cpu_core/branch-misses/ # 2.999 K/sec
>>>> 1,016,336 cpu_atom/branch-misses/ # 31.613 K/sec (63.38%)
>>>>
>>>> The hardware events have extended type on hybrid, but the evsel__match()
>>>> doesn't take it into account.
>>>>
>>>> Add a mask to filter the extended type on hybrid when checking the config.
>>>>
>>>> With the patch,
>>>>
>>>> # ./perf stat -a sleep 1
>>>>
>>>> Performance counter stats for 'system wide':
>>>>
>>>> 32,139.90 msec cpu-clock # 32.003 CPUs utilized
>>>> 343 context-switches # 10.672 /sec
>>>> 32 cpu-migrations # 0.996 /sec
>>>> 73 page-faults # 2.271 /sec
>>>> 13,712,841 cpu_core/cycles/ # 0.000 GHz
>>>> 258,301,691 cpu_atom/cycles/ # 0.008 GHz (54.20%)
>>>> 12,428,163 cpu_core/instructions/ # 0.91 insn per cycle
>>>> 37,786,557 cpu_atom/instructions/ # 2.76 insn per cycle (63.35%)
>>>> 2,418,826 cpu_core/branches/ # 75.259 K/sec
>>>> 6,965,962 cpu_atom/branches/ # 216.739 K/sec (63.38%)
>>>> 72,150 cpu_core/branch-misses/ # 2.98% of all branches
>>>> 1,032,746 cpu_atom/branch-misses/ # 42.70% of all branches (63.35%)
>>>>
>>>> Signed-off-by: Kan Liang <[email protected]>
>>>> ---
>>>> tools/perf/util/evsel.h | 12 ++++++-----
>>>> tools/perf/util/stat-shadow.c | 39 +++++++++++++++++++----------------
>>>> 2 files changed, 28 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>>>> index b365b449c6ea..36a32e4ca168 100644
>>>> --- a/tools/perf/util/evsel.h
>>>> +++ b/tools/perf/util/evsel.h
>>>> @@ -350,9 +350,11 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
>>>>
>>>> struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
>>>>
>>>> -#define evsel__match(evsel, t, c) \
>>>> +#define EVSEL_EVENT_MASK (~0ULL)
>>>> +
>>>> +#define evsel__match(evsel, t, c, m) \
>>>> (evsel->core.attr.type == PERF_TYPE_##t && \
>>>> - evsel->core.attr.config == PERF_COUNT_##c)
>>>> + (evsel->core.attr.config & m) == PERF_COUNT_##c)
>>>
>>> The EVSEL_EVENT_MASK here isn't very intention revealing, perhaps we
>>> can remove it and do something like:
>>>
>>> static inline bool __evsel__match(const struct evsel *evsel, u32 type,
>>> u64 config)
>>> {
>>> if ((type == PERF_TYPE_HARDWARE || type ==PERF_TYPE_HW_CACHE) &&
>>> perf_pmus__supports_extended_type())
>>> return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
>>>
>>> return evsel->core.attr.config == config;
>>> }
>>> #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t,
>>> PERF_COUNT_##c)
>>
>> Yes, the above code looks better. I will apply it in V2.
>
> Please base v2 on tmp.perf-tools-next, tests are running and that branch
> will become perf-tools-next.
>
Sure.
> Some patches from your series were cherry-picked there.
Thanks.
Kan