From: Kan Liang <[email protected]>
The annotation for hardware events is wrong on hybrid. For example,
# ./perf stat -a sleep 1
Performance counter stats for 'system wide':
32,148.85 msec cpu-clock # 32.000 CPUs utilized
374 context-switches # 11.633 /sec
33 cpu-migrations # 1.026 /sec
295 page-faults # 9.176 /sec
18,979,960 cpu_core/cycles/ # 590.378 K/sec
261,230,783 cpu_atom/cycles/ # 8.126 M/sec (54.21%)
17,019,732 cpu_core/instructions/ # 529.404 K/sec
38,020,470 cpu_atom/instructions/ # 1.183 M/sec (63.36%)
3,296,743 cpu_core/branches/ # 102.546 K/sec
6,692,338 cpu_atom/branches/ # 208.167 K/sec (63.40%)
96,421 cpu_core/branch-misses/ # 2.999 K/sec
1,016,336 cpu_atom/branch-misses/ # 31.613 K/sec (63.38%)
The hardware events have extended type on hybrid, but the evsel__match()
doesn't take it into account.
Filter the config on hybrid before checking.
With the patch,
# ./perf stat -a sleep 1
Performance counter stats for 'system wide':
32,139.90 msec cpu-clock # 32.003 CPUs utilized
343 context-switches # 10.672 /sec
32 cpu-migrations # 0.996 /sec
73 page-faults # 2.271 /sec
13,712,841 cpu_core/cycles/ # 0.000 GHz
258,301,691 cpu_atom/cycles/ # 0.008 GHz (54.20%)
12,428,163 cpu_core/instructions/ # 0.91 insn per cycle
37,786,557 cpu_atom/instructions/ # 2.76 insn per cycle (63.35%)
2,418,826 cpu_core/branches/ # 75.259 K/sec
6,965,962 cpu_atom/branches/ # 216.739 K/sec (63.38%)
72,150 cpu_core/branch-misses/ # 2.98% of all branches
1,032,746 cpu_atom/branch-misses/ # 42.70% of all branches (63.35%)
Suggested-by: Ian Rogers <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/util/evsel.h | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b365b449c6ea..cc6fb3049b99 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -10,6 +10,7 @@
#include <internal/evsel.h>
#include <perf/evsel.h>
#include "symbol_conf.h"
+#include "pmus.h"
struct bpf_object;
struct cgroup;
@@ -350,9 +351,19 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
-#define evsel__match(evsel, t, c) \
- (evsel->core.attr.type == PERF_TYPE_##t && \
- evsel->core.attr.config == PERF_COUNT_##c)
+static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
+{
+ if (evsel->core.attr.type != type)
+ return false;
+
+ if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) &&
+ perf_pmus__supports_extended_type())
+ return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
+
+ return evsel->core.attr.config == config;
+}
+
+#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c)
static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
{
--
2.35.1
On Wed, Jun 14, 2023 at 5:18 PM <[email protected]> wrote:
>
> From: Kan Liang <[email protected]>
>
> The annotation for hardware events is wrong on hybrid. For example,
>
> # ./perf stat -a sleep 1
>
> Performance counter stats for 'system wide':
>
> 32,148.85 msec cpu-clock # 32.000 CPUs utilized
> 374 context-switches # 11.633 /sec
> 33 cpu-migrations # 1.026 /sec
> 295 page-faults # 9.176 /sec
> 18,979,960 cpu_core/cycles/ # 590.378 K/sec
> 261,230,783 cpu_atom/cycles/ # 8.126 M/sec (54.21%)
> 17,019,732 cpu_core/instructions/ # 529.404 K/sec
> 38,020,470 cpu_atom/instructions/ # 1.183 M/sec (63.36%)
> 3,296,743 cpu_core/branches/ # 102.546 K/sec
> 6,692,338 cpu_atom/branches/ # 208.167 K/sec (63.40%)
> 96,421 cpu_core/branch-misses/ # 2.999 K/sec
> 1,016,336 cpu_atom/branch-misses/ # 31.613 K/sec (63.38%)
>
> The hardware events have extended type on hybrid, but the evsel__match()
> doesn't take it into account.
>
> Filter the config on hybrid before checking.
>
> With the patch,
>
> # ./perf stat -a sleep 1
>
> Performance counter stats for 'system wide':
>
> 32,139.90 msec cpu-clock # 32.003 CPUs utilized
> 343 context-switches # 10.672 /sec
> 32 cpu-migrations # 0.996 /sec
> 73 page-faults # 2.271 /sec
> 13,712,841 cpu_core/cycles/ # 0.000 GHz
> 258,301,691 cpu_atom/cycles/ # 0.008 GHz (54.20%)
> 12,428,163 cpu_core/instructions/ # 0.91 insn per cycle
> 37,786,557 cpu_atom/instructions/ # 2.76 insn per cycle (63.35%)
> 2,418,826 cpu_core/branches/ # 75.259 K/sec
> 6,965,962 cpu_atom/branches/ # 216.739 K/sec (63.38%)
> 72,150 cpu_core/branch-misses/ # 2.98% of all branches
> 1,032,746 cpu_atom/branch-misses/ # 42.70% of all branches (63.35%)
>
> Suggested-by: Ian Rogers <[email protected]>
> Signed-off-by: Kan Liang <[email protected]>
Reviewed-by: Ian Rogers <[email protected]>
Thanks,
Ian
> ---
> tools/perf/util/evsel.h | 17 ++++++++++++++---
> 1 file changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index b365b449c6ea..cc6fb3049b99 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -10,6 +10,7 @@
> #include <internal/evsel.h>
> #include <perf/evsel.h>
> #include "symbol_conf.h"
> +#include "pmus.h"
>
> struct bpf_object;
> struct cgroup;
> @@ -350,9 +351,19 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
>
> struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
>
> -#define evsel__match(evsel, t, c) \
> - (evsel->core.attr.type == PERF_TYPE_##t && \
> - evsel->core.attr.config == PERF_COUNT_##c)
> +static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
> +{
> + if (evsel->core.attr.type != type)
> + return false;
> +
> + if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) &&
> + perf_pmus__supports_extended_type())
> + return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
> +
> + return evsel->core.attr.config == config;
> +}
> +
> +#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c)
>
> static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
> {
> --
> 2.35.1
>