2022-04-20 17:08:17

by Florian Fischer

[permalink] [raw]
Subject: [PATCHSET v4 next 0/3] perf stat: add user_time and system_time tool events

This patch series adds new internal events to perf stat exposing the times spend
in user and kernel mode in nanoseconds reported by rusage.

During some benchmarking using perf it bothered me that I could not easily
retrieve those times from perf stat when using the machine readable output.

But perf definitely knows about those values because in the human readable output
they are present.

Therefore I exposed the times reported by rusage via the new tool events:
user_time and system_time.

This allows to retrieved them in machine-readable output:

$ ./perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time tools/perf
72134524,ns,duration_time:u,72134524,100.00,,
65225000,ns,user_time:u,65225000,100.00,,
6865000,ns,ssystem_time:u,6865000,100.00,,
38705,,cache-misses:u,71189328,100.00,,

The changes are mostly inspired by the code for the only other available
tool event: 'duration_time'.

For now the new user_time and system_time events are not usable with metrics
(as far as I understand).

The patch applies cleanly on linux-next/next-20220420.

[PATCH v4 1/3] perf stat: introduce stats for the user and system
[PATCH v4 2/3] perf stat: add user_time and system_time events
[PATCH v4 3/3] perf list: print all available tool events

[v2]: Split up the changes into separate commits.

[v3]: * Use nanoseconds as suggested by Xing Zhengjun.
* Squash [PATCH v2 2/4] and [PATCH v2 4/4], because it only adds the unit string.

[v4]: * Simplify event names to only user_time and system_time as suggested by Ian Rogers.

Florian Fischer


2022-04-22 17:28:16

by Florian Fischer

[permalink] [raw]
Subject: [PATCH v4 3/3] perf list: print all available tool events

Introduce names for the new tool events 'user_time' and 'system_time'.

Signed-off-by: Florian Fischer <[email protected]>
---
tools/perf/util/evsel.c | 19 ++++++++++------
tools/perf/util/evsel.h | 1 +
tools/perf/util/parse-events.c | 40 +++++++++++++++++++++++++++++-----
3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 2a1729e7aee4..d38722560e80 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -597,6 +597,17 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
return r + evsel__add_modifiers(evsel, bf + r, size - r);
}

+const char *evsel__tool_names[PERF_TOOL_MAX] = {
+ "duration_time",
+ "user_time",
+ "system_time",
+};
+
+static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size)
+{
+ return scnprintf(bf, size, "%s", evsel__tool_names[ev]);
+}
+
static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
{
int r;
@@ -723,12 +734,6 @@ static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
}

-static int evsel__tool_name(char *bf, size_t size)
-{
- int ret = scnprintf(bf, size, "duration_time");
- return ret;
-}
-
const char *evsel__name(struct evsel *evsel)
{
char bf[128];
@@ -754,7 +759,7 @@ const char *evsel__name(struct evsel *evsel)

case PERF_TYPE_SOFTWARE:
if (evsel->tool_event)
- evsel__tool_name(bf, sizeof(bf));
+ evsel__tool_name(evsel->tool_event, bf, sizeof(bf));
else
evsel__sw_name(evsel, bf, sizeof(bf));
break;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 7e2209b47b39..45d674812239 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -262,6 +262,7 @@ extern const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALI
extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES];
extern const char *evsel__hw_names[PERF_COUNT_HW_MAX];
extern const char *evsel__sw_names[PERF_COUNT_SW_MAX];
+extern const char *evsel__tool_names[PERF_TOOL_MAX];
extern char *evsel__bpf_counter_events;
bool evsel__match_bpf_counter_events(const char *name);

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 064ec368a90b..c0e88d3b3e3c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -154,6 +154,21 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
},
};

+struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = {
+ [PERF_TOOL_DURATION_TIME] = {
+ .symbol = "duration_time",
+ .alias = "",
+ },
+ [PERF_TOOL_USER_TIME] = {
+ .symbol = "user_time",
+ .alias = "",
+ },
+ [PERF_TOOL_SYSTEM_TIME] = {
+ .symbol = "system_time",
+ .alias = "",
+ },
+};
+
#define __PERF_EVENT_FIELD(config, name) \
((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)

@@ -3058,21 +3073,34 @@ int print_hwcache_events(const char *event_glob, bool name_only)
return evt_num;
}

-static void print_tool_event(const char *name, const char *event_glob,
+static void print_tool_event(const struct event_symbol *syms, const char *event_glob,
bool name_only)
{
- if (event_glob && !strglobmatch(name, event_glob))
+ if (syms->symbol == NULL)
+ return;
+
+ if (event_glob && !(strglobmatch(syms->symbol, event_glob) ||
+ (syms->alias && strglobmatch(syms->alias, event_glob))))
return;
+
if (name_only)
- printf("%s ", name);
- else
+ printf("%s ", syms->symbol);
+ else {
+ char name[MAX_NAME_LEN];
+ if (syms->alias)
+ snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
+ else
+ strlcpy(name, syms->symbol, MAX_NAME_LEN);
printf(" %-50s [%s]\n", name, "Tool event");
-
+ }
}

void print_tool_events(const char *event_glob, bool name_only)
{
- print_tool_event("duration_time", event_glob, name_only);
+ // Start at 1 because the first enum entry symbols no tool event
+ for (int i = 1; i < PERF_TOOL_MAX; ++i) {
+ print_tool_event(event_symbols_tool + i, event_glob, name_only);
+ }
if (pager_in_use())
printf("\n");
}
--
2.36.0

2022-04-22 21:01:44

by Florian Fischer

[permalink] [raw]
Subject: [PATCH v4 1/3] perf stat: introduce stats for the user and system rusage times

This is preparation for exporting rusage values as tool events.

Add new global stats tracking the values obtained via rusage.
For now only ru_utime and ru_stime are part of the tracked stats.
Both are stored as nanoseconds to be consistent with 'duration_time',
although the finest resolution the struct timeval data in rusage
provides are microseconds.

Signed-off-by: Florian Fischer <[email protected]>
---
tools/perf/builtin-stat.c | 5 ++++-
tools/perf/util/stat-shadow.c | 2 ++
tools/perf/util/stat.h | 21 +++++++++++++++++++++
3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a96f106dc93a..61faffb535f5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -216,6 +216,7 @@ static struct perf_stat_config stat_config = {
.run_count = 1,
.metric_only_len = METRIC_ONLY_LEN,
.walltime_nsecs_stats = &walltime_nsecs_stats,
+ .ru_stats = &ru_stats,
.big_num = true,
.ctl_fd = -1,
.ctl_fd_ack = -1,
@@ -1010,8 +1011,10 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
evlist__reset_prev_raw_counts(evsel_list);
runtime_stat_reset(&stat_config);
perf_stat__reset_shadow_per_stat(&rt_stat);
- } else
+ } else {
update_stats(&walltime_nsecs_stats, t1 - t0);
+ update_rusage_stats(&ru_stats, &stat_config.ru_data);
+ }

/*
* Closing a group leader splits the group, and as we only disable
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 10af7804e482..ea4c35e4f1da 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -26,6 +26,7 @@

struct runtime_stat rt_stat;
struct stats walltime_nsecs_stats;
+struct rusage_stats ru_stats;

struct saved_value {
struct rb_node rb_node;
@@ -199,6 +200,7 @@ void perf_stat__reset_shadow_stats(void)
{
reset_stat(&rt_stat);
memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+ memset(&ru_stats, 0, sizeof(ru_stats));
}

void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 335d19cc3063..e31c94d952e9 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -108,6 +108,11 @@ struct runtime_stat {
struct rblist value_list;
};

+struct rusage_stats {
+ struct stats ru_utime_usec_stat;
+ struct stats ru_stime_usec_stat;
+};
+
typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu);

struct perf_stat_config {
@@ -148,6 +153,7 @@ struct perf_stat_config {
const char *csv_sep;
struct stats *walltime_nsecs_stats;
struct rusage ru_data;
+ struct rusage_stats *ru_stats;
struct cpu_aggr_map *aggr_map;
aggr_get_id_t aggr_get_id;
struct cpu_aggr_map *cpus_aggr_map;
@@ -177,6 +183,20 @@ static inline void init_stats(struct stats *stats)
stats->max = 0;
}

+static inline void init_rusage_stats(struct rusage_stats *ru_stats) {
+ init_stats(&ru_stats->ru_utime_usec_stat);
+ init_stats(&ru_stats->ru_stime_usec_stat);
+}
+
+static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) {
+ const u64 us_to_ns = 1000;
+ const u64 s_to_ns = 1000000000;
+ update_stats(&ru_stats->ru_utime_usec_stat,
+ (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns));
+ update_stats(&ru_stats->ru_stime_usec_stat,
+ (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns));
+}
+
struct evsel;
struct evlist;

@@ -196,6 +216,7 @@ bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id);

extern struct runtime_stat rt_stat;
extern struct stats walltime_nsecs_stats;
+extern struct rusage_stats ru_stats;

typedef void (*print_metric_t)(struct perf_stat_config *config,
void *ctx, const char *color, const char *unit,
--
2.36.0

2022-04-22 21:32:51

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCHSET v4 next 0/3] perf stat: add user_time and system_time tool events

Em Wed, Apr 20, 2022 at 12:23:51PM +0200, Florian Fischer escreveu:
> This patch series adds new internal events to perf stat exposing the times spend
> in user and kernel mode in nanoseconds reported by rusage.
>
> During some benchmarking using perf it bothered me that I could not easily
> retrieve those times from perf stat when using the machine readable output.
>
> But perf definitely knows about those values because in the human readable output
> they are present.
>
> Therefore I exposed the times reported by rusage via the new tool events:
> user_time and system_time.
>
> This allows to retrieved them in machine-readable output:
>
> $ ./perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time tools/perf
> 72134524,ns,duration_time:u,72134524,100.00,,
> 65225000,ns,user_time:u,65225000,100.00,,
> 6865000,ns,ssystem_time:u,6865000,100.00,,
> 38705,,cache-misses:u,71189328,100.00,,
>
> The changes are mostly inspired by the code for the only other available
> tool event: 'duration_time'.
>
> For now the new user_time and system_time events are not usable with metrics
> (as far as I understand).
>
> The patch applies cleanly on linux-next/next-20220420.

I tested and applied the first two patches, please take a look at that
"OR" problem with patch 3.

- Arnaldo

> [PATCH v4 1/3] perf stat: introduce stats for the user and system
> [PATCH v4 2/3] perf stat: add user_time and system_time events
> [PATCH v4 3/3] perf list: print all available tool events
>
> [v2]: Split up the changes into separate commits.
>
> [v3]: * Use nanoseconds as suggested by Xing Zhengjun.
> * Squash [PATCH v2 2/4] and [PATCH v2 4/4], because it only adds the unit string.
>
> [v4]: * Simplify event names to only user_time and system_time as suggested by Ian Rogers.
>
> Florian Fischer

2022-04-22 22:26:14

by Florian Fischer

[permalink] [raw]
Subject: [PATCH v4 2/3] perf stat: add user_time and system_time events

It bothered me that during benchmarking using perf stat (to collect
for example CPU cache events) I could not simultaneously retrieve the
times spend in user or kernel mode in a machine readable format.

When running perf stat the output for humans contains the times
reported by rusage and wait4.

$ perf stat -e cache-misses:u -- true

Performance counter stats for 'true':

4,206 cache-misses:u

0.001113619 seconds time elapsed

0.001175000 seconds user
0.000000000 seconds sys

But perf stat's machine-readable format does not provide this information.

$ perf stat -x, -e cache-misses:u -- true
4282,,cache-misses:u,492859,100.00,,

I found no way to retrieve this information using the available events
while using machine-readable output.

This patch adds two new tool internal events 'user_time'
and 'system_time', similarly to the already present 'duration_time' event.

Both events use the already collected rusage information obtained by wait4
and tracked in the global ru_stats.

Examples presenting cache-misses and rusage information in both human and
machine-readable form:

$ ./perf stat -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time .

Performance counter stats for 'grep -q -r duration_time .':

67,422,542 ns duration_time:u
50,517,000 ns user_time:u
16,839,000 ns system_time:u
30,937 cache-misses:u

0.067422542 seconds time elapsed

0.050517000 seconds user
0.016839000 seconds sys

$ ./perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time .
72134524,ns,duration_time:u,72134524,100.00,,
65225000,ns,user_time:u,65225000,100.00,,
6865000,ns,system_time:u,6865000,100.00,,
38705,,cache-misses:u,71189328,100.00,,

Signed-off-by: Florian Fischer <[email protected]>
---
tools/perf/builtin-stat.c | 36 ++++++++++++++++++++++++++--------
tools/perf/util/evsel.h | 4 ++++
tools/perf/util/parse-events.c | 4 +++-
tools/perf/util/parse-events.l | 2 ++
4 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 61faffb535f5..dea34c8990ae 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -342,15 +342,35 @@ static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 t
static int read_single_counter(struct evsel *counter, int cpu_map_idx,
int thread, struct timespec *rs)
{
- if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
- u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
- struct perf_counts_values *count =
- perf_counts(counter->counts, cpu_map_idx, thread);
- count->ena = count->run = val;
- count->val = val;
- return 0;
+ switch(counter->tool_event) {
+ case PERF_TOOL_DURATION_TIME: {
+ u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
+ struct perf_counts_values *count =
+ perf_counts(counter->counts, cpu_map_idx, thread);
+ count->ena = count->run = val;
+ count->val = val;
+ return 0;
+ }
+ case PERF_TOOL_USER_TIME:
+ case PERF_TOOL_SYSTEM_TIME: {
+ u64 val;
+ struct perf_counts_values *count =
+ perf_counts(counter->counts, cpu_map_idx, thread);
+ if (counter->tool_event == PERF_TOOL_USER_TIME)
+ val = ru_stats.ru_utime_usec_stat.mean;
+ else
+ val = ru_stats.ru_stime_usec_stat.mean;
+ count->ena = count->run = val;
+ count->val = val;
+ return 0;
+ }
+ default:
+ case PERF_TOOL_NONE:
+ return evsel__read_counter(counter, cpu_map_idx, thread);
+ case PERF_TOOL_MAX:
+ /* This should never be reached */
+ return 0;
}
- return evsel__read_counter(counter, cpu_map_idx, thread);
}

/*
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 041b42d33bf5..7e2209b47b39 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -30,6 +30,10 @@ typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
enum perf_tool_event {
PERF_TOOL_NONE = 0,
PERF_TOOL_DURATION_TIME = 1,
+ PERF_TOOL_USER_TIME = 2,
+ PERF_TOOL_SYSTEM_TIME = 3,
+
+ PERF_TOOL_MAX,
};

/** struct evsel - event selector
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index dd84fed698a3..064ec368a90b 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -402,7 +402,9 @@ static int add_event_tool(struct list_head *list, int *idx,
if (!evsel)
return -ENOMEM;
evsel->tool_event = tool_event;
- if (tool_event == PERF_TOOL_DURATION_TIME) {
+ if (tool_event == PERF_TOOL_DURATION_TIME
+ || tool_event == PERF_TOOL_USER_TIME
+ || tool_event == PERF_TOOL_SYSTEM_TIME) {
free((char *)evsel->unit);
evsel->unit = strdup("ns");
}
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 5b6e4b5249cf..3a9ce96c8bce 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -353,6 +353,8 @@ alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_AL
emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
+user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); }
+system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); }
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }

--
2.36.0

2022-04-23 00:27:43

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCHSET v4 next 0/3] perf stat: add user_time and system_time tool events

Hello,

On Fri, Apr 22, 2022 at 3:05 PM Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> Em Wed, Apr 20, 2022 at 12:23:51PM +0200, Florian Fischer escreveu:
> > This patch series adds new internal events to perf stat exposing the times spend
> > in user and kernel mode in nanoseconds reported by rusage.
> >
> > During some benchmarking using perf it bothered me that I could not easily
> > retrieve those times from perf stat when using the machine readable output.
> >
> > But perf definitely knows about those values because in the human readable output
> > they are present.
> >
> > Therefore I exposed the times reported by rusage via the new tool events:
> > user_time and system_time.
> >
> > This allows to retrieved them in machine-readable output:
> >
> > $ ./perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time tools/perf
> > 72134524,ns,duration_time:u,72134524,100.00,,
> > 65225000,ns,user_time:u,65225000,100.00,,
> > 6865000,ns,ssystem_time:u,6865000,100.00,,

ssystem? Looks like a typo.

Anyway it looks a little bit strange to me if we can get
system time in user mode only (the 'u' modifier).

Thanks,
Namhyung


> > 38705,,cache-misses:u,71189328,100.00,,
> >
> > The changes are mostly inspired by the code for the only other available
> > tool event: 'duration_time'.

2022-04-23 16:11:54

by Florian Fischer

[permalink] [raw]
Subject: Re: [PATCHSET v4 next 0/3] perf stat: add user_time and system_time tool events

On 22.04.2022 16:52, Namhyung Kim wrote:
> Hello,
>
> On Fri, Apr 22, 2022 at 3:05 PM Arnaldo Carvalho de Melo
> <[email protected]> wrote:
> >
> > Em Wed, Apr 20, 2022 at 12:23:51PM +0200, Florian Fischer escreveu:
> > > This patch series adds new internal events to perf stat exposing the times spend
> > > in user and kernel mode in nanoseconds reported by rusage.
> > >
> > > During some benchmarking using perf it bothered me that I could not easily
> > > retrieve those times from perf stat when using the machine readable output.
> > >
> > > But perf definitely knows about those values because in the human readable output
> > > they are present.
> > >
> > > Therefore I exposed the times reported by rusage via the new tool events:
> > > user_time and system_time.
> > >
> > > This allows to retrieved them in machine-readable output:
> > >
> > > $ ./perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time tools/perf
> > > 72134524,ns,duration_time:u,72134524,100.00,,
> > > 65225000,ns,user_time:u,65225000,100.00,,
> > > 6865000,ns,ssystem_time:u,6865000,100.00,,
>
> ssystem? Looks like a typo.

Yes thanks. Fortunately I fixed it in the commit that got applied [1].

>
> Anyway it looks a little bit strange to me if we can get
> system time in user mode only (the 'u' modifier).

Sorry but I don't really understand what you mean.
The system_time is reported to userspace via rusage filled by wait4(2).
It will always report the value reported to the user space regardless of what
counters perf has access to.

If you run perf as user you get the same system_time (but with the ':u' suffix)
as when you run perf as root or lower kernel.perf_event_paranoid to allow access
to more counters.

$ perf stat -x, -e system_time sync
31246000,ns,system_time:u,31246000,100.00,,

# perf stat -x, -e system_time sync
33794000,ns,system_time,33794000,100.00,,

>
> Thanks,
> Namhyung
>

Flo Fischer

[1] https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/commit/?h=tmp.perf/core&id=b03b89b350034f220cc24fc77c56990a97a796b2

2022-04-25 20:18:00

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCHSET v4 next 0/3] perf stat: add user_time and system_time tool events

On Sat, Apr 23, 2022 at 5:16 AM Florian Fischer
<[email protected]> wrote:
>
> On 22.04.2022 16:52, Namhyung Kim wrote:
> > Hello,
> >
> > On Fri, Apr 22, 2022 at 3:05 PM Arnaldo Carvalho de Melo
> > <[email protected]> wrote:
> > >
> > > Em Wed, Apr 20, 2022 at 12:23:51PM +0200, Florian Fischer escreveu:
> > > > This patch series adds new internal events to perf stat exposing the times spend
> > > > in user and kernel mode in nanoseconds reported by rusage.
> > > >
> > > > During some benchmarking using perf it bothered me that I could not easily
> > > > retrieve those times from perf stat when using the machine readable output.
> > > >
> > > > But perf definitely knows about those values because in the human readable output
> > > > they are present.
> > > >
> > > > Therefore I exposed the times reported by rusage via the new tool events:
> > > > user_time and system_time.
> > > >
> > > > This allows to retrieved them in machine-readable output:
> > > >
> > > > $ ./perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time tools/perf
> > > > 72134524,ns,duration_time:u,72134524,100.00,,
> > > > 65225000,ns,user_time:u,65225000,100.00,,
> > > > 6865000,ns,ssystem_time:u,6865000,100.00,,

> > Anyway it looks a little bit strange to me if we can get
> > system time in user mode only (the 'u' modifier).
>
> Sorry but I don't really understand what you mean.
> The system_time is reported to userspace via rusage filled by wait4(2).
> It will always report the value reported to the user space regardless of what
> counters perf has access to.
>
> If you run perf as user you get the same system_time (but with the ':u' suffix)
> as when you run perf as root or lower kernel.perf_event_paranoid to allow access
> to more counters.

The ':u' modifier means that the event should count only in user mode.
So I think system_time:u should be 0 by definition.
Likewise, user_time:k should be handled as such.

But as I said before, we already have the task-clock event, so it's not
clear why we need this too. Also these tool events won't work for
other use cases like perf record.

Thanks,
Namhyung

2022-04-26 12:08:11

by Florian Fischer

[permalink] [raw]
Subject: Re: [PATCHSET v4 next 0/3] perf stat: add user_time and system_time tool events

On 25.04.2022 12:06, Namhyung Kim wrote:
> On Sat, Apr 23, 2022 at 5:16 AM Florian Fischer
> <[email protected]> wrote:
> >
> > On 22.04.2022 16:52, Namhyung Kim wrote:
> > > Hello,
> > >
> > > On Fri, Apr 22, 2022 at 3:05 PM Arnaldo Carvalho de Melo
> > > <[email protected]> wrote:
> > > >
> > > > Em Wed, Apr 20, 2022 at 12:23:51PM +0200, Florian Fischer escreveu:
> > > > > This patch series adds new internal events to perf stat exposing the times spend
> > > > > in user and kernel mode in nanoseconds reported by rusage.
> > > > >
> > > > > During some benchmarking using perf it bothered me that I could not easily
> > > > > retrieve those times from perf stat when using the machine readable output.
> > > > >
> > > > > But perf definitely knows about those values because in the human readable output
> > > > > they are present.
> > > > >
> > > > > Therefore I exposed the times reported by rusage via the new tool events:
> > > > > user_time and system_time.
> > > > >
> > > > > This allows to retrieved them in machine-readable output:
> > > > >
> > > > > $ ./perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time tools/perf
> > > > > 72134524,ns,duration_time:u,72134524,100.00,,
> > > > > 65225000,ns,user_time:u,65225000,100.00,,
> > > > > 6865000,ns,ssystem_time:u,6865000,100.00,,
>
> > > Anyway it looks a little bit strange to me if we can get
> > > system time in user mode only (the 'u' modifier).
> >
> > Sorry but I don't really understand what you mean.
> > The system_time is reported to userspace via rusage filled by wait4(2).
> > It will always report the value reported to the user space regardless of what
> > counters perf has access to.
> >
> > If you run perf as user you get the same system_time (but with the ':u' suffix)
> > as when you run perf as root or lower kernel.perf_event_paranoid to allow access
> > to more counters.
>
> The ':u' modifier means that the event should count only in user mode.
> So I think system_time:u should be 0 by definition.
> Likewise, user_time:k should be handled as such.

Not sure if this logic applies to tool events because the time spend in user mode
or kernel mdoe stays the same regardless of where you count it.

This is consistent with how duration_time works currently in perf stat.

./perf stat -e duration_time,duration_time:u,duration_time:k -- grep -q -r duration_time .

Performance counter stats for 'grep -q -r duration_time .':

29,012,558 ns duration_time
29,012,558 ns duration_time:u
29,012,558 ns duration_time:k

0.029012558 seconds time elapsed

0.029088000 seconds user
0.000000000 seconds sys

>
> But as I said before, we already have the task-clock event, so it's not
> clear why we need this too. Also these tool events won't work for
> other use cases like perf record.

Maybe I do not know enough about perf but I think this is not exactly how those
suffixes work. This is the output of perf of next-20220420 and my rusage tool
event patches applied:

$ sysctl kernel.perf_event_paranoid
kernel.perf_event_paranoid = -1

linux/tools/perf$ ./perf stat -e task-clock,task-clock:u,task-clock:k,\
duration_time,duration_time:u,duration_time:k,\
system_time,system_time:u,system_time:k,\
user_time,user_time:u,user_time:k -- grep -q -r duration_time .

Performance counter stats for 'grep -q -r duration_time .':

42.30 msec task-clock # 0.483 CPUs utilized
42.30 msec task-clock:u # 0.483 CPUs utilized
42.30 msec task-clock:k # 0.483 CPUs utilized
87,533,340 ns duration_time # 2.069 G/sec
87,533,340 ns duration_time:u # 2.069 G/sec
87,533,340 ns duration_time:k # 2.069 G/sec
12,841,000 ns system_time # 303.547 M/sec
12,841,000 ns system_time:u # 303.547 M/sec
12,841,000 ns system_time:k # 303.547 M/sec
30,820,000 ns user_time # 728.550 M/sec
30,820,000 ns user_time:u # 728.550 M/sec
30,820,000 ns user_time:k # 728.550 M/sec

0.087533340 seconds time elapsed

0.030820000 seconds user
0.012841000 seconds sys


So task-clock regardless of its ':u' or ':k' suffix shows approximately the wall
clock time.

But for my experiments on a shared system where kernel.perf_event_paranoid is
not set to -1 and I feel uncomfortable to test research prototypes as root I
would like a simple and fast way to tell where my program spends its time.

This can be achieved by using user_time and system_time.
Which are reported in the human readable output anyway.

Maybe I am doing something wrong here but as shown above task-clock does not
provide the information I am interested in.

Florian Fischer

2022-04-27 10:44:42

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v4 2/3] perf stat: add user_time and system_time events

On Wed, Apr 20, 2022 at 3:24 AM Florian Fischer
<[email protected]> wrote:
>
> It bothered me that during benchmarking using perf stat (to collect
> for example CPU cache events) I could not simultaneously retrieve the
> times spend in user or kernel mode in a machine readable format.
>
> When running perf stat the output for humans contains the times
> reported by rusage and wait4.
>
> $ perf stat -e cache-misses:u -- true
>
> Performance counter stats for 'true':
>
> 4,206 cache-misses:u
>
> 0.001113619 seconds time elapsed
>
> 0.001175000 seconds user
> 0.000000000 seconds sys
>
> But perf stat's machine-readable format does not provide this information.
>
> $ perf stat -x, -e cache-misses:u -- true
> 4282,,cache-misses:u,492859,100.00,,
>
> I found no way to retrieve this information using the available events
> while using machine-readable output.
>
> This patch adds two new tool internal events 'user_time'
> and 'system_time', similarly to the already present 'duration_time' event.
>
> Both events use the already collected rusage information obtained by wait4
> and tracked in the global ru_stats.
>
> Examples presenting cache-misses and rusage information in both human and
> machine-readable form:
>
> $ ./perf stat -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time .
>
> Performance counter stats for 'grep -q -r duration_time .':
>
> 67,422,542 ns duration_time:u
> 50,517,000 ns user_time:u
> 16,839,000 ns system_time:u
> 30,937 cache-misses:u

nit: Are the :u on the *_time accurate here?

> 0.067422542 seconds time elapsed
>
> 0.050517000 seconds user
> 0.016839000 seconds sys
>
> $ ./perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time .
> 72134524,ns,duration_time:u,72134524,100.00,,
> 65225000,ns,user_time:u,65225000,100.00,,
> 6865000,ns,system_time:u,6865000,100.00,,
> 38705,,cache-misses:u,71189328,100.00,,

nit: Similarly.

> Signed-off-by: Florian Fischer <[email protected]>
> ---
> tools/perf/builtin-stat.c | 36 ++++++++++++++++++++++++++--------
> tools/perf/util/evsel.h | 4 ++++
> tools/perf/util/parse-events.c | 4 +++-
> tools/perf/util/parse-events.l | 2 ++
> 4 files changed, 37 insertions(+), 9 deletions(-)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 61faffb535f5..dea34c8990ae 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -342,15 +342,35 @@ static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 t
> static int read_single_counter(struct evsel *counter, int cpu_map_idx,
> int thread, struct timespec *rs)
> {
> - if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
> - u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
> - struct perf_counts_values *count =
> - perf_counts(counter->counts, cpu_map_idx, thread);
> - count->ena = count->run = val;
> - count->val = val;
> - return 0;

When this was 1 tool event I think the if was okay, I think now we
should move the reading of the tool events to their own helper like
evsel__read_counter. Perhaps evsel__read_tool.

Thanks,
Ian

> + switch(counter->tool_event) {
> + case PERF_TOOL_DURATION_TIME: {
> + u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
> + struct perf_counts_values *count =
> + perf_counts(counter->counts, cpu_map_idx, thread);
> + count->ena = count->run = val;
> + count->val = val;
> + return 0;
> + }
> + case PERF_TOOL_USER_TIME:
> + case PERF_TOOL_SYSTEM_TIME: {
> + u64 val;
> + struct perf_counts_values *count =
> + perf_counts(counter->counts, cpu_map_idx, thread);
> + if (counter->tool_event == PERF_TOOL_USER_TIME)
> + val = ru_stats.ru_utime_usec_stat.mean;
> + else
> + val = ru_stats.ru_stime_usec_stat.mean;
> + count->ena = count->run = val;
> + count->val = val;
> + return 0;
> + }
> + default:
> + case PERF_TOOL_NONE:
> + return evsel__read_counter(counter, cpu_map_idx, thread);
> + case PERF_TOOL_MAX:
> + /* This should never be reached */
> + return 0;
> }
> - return evsel__read_counter(counter, cpu_map_idx, thread);
> }
>
> /*
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 041b42d33bf5..7e2209b47b39 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -30,6 +30,10 @@ typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
> enum perf_tool_event {
> PERF_TOOL_NONE = 0,
> PERF_TOOL_DURATION_TIME = 1,
> + PERF_TOOL_USER_TIME = 2,
> + PERF_TOOL_SYSTEM_TIME = 3,
> +
> + PERF_TOOL_MAX,
> };
>
> /** struct evsel - event selector
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index dd84fed698a3..064ec368a90b 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -402,7 +402,9 @@ static int add_event_tool(struct list_head *list, int *idx,
> if (!evsel)
> return -ENOMEM;
> evsel->tool_event = tool_event;
> - if (tool_event == PERF_TOOL_DURATION_TIME) {
> + if (tool_event == PERF_TOOL_DURATION_TIME
> + || tool_event == PERF_TOOL_USER_TIME
> + || tool_event == PERF_TOOL_SYSTEM_TIME) {
> free((char *)evsel->unit);
> evsel->unit = strdup("ns");
> }
> diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
> index 5b6e4b5249cf..3a9ce96c8bce 100644
> --- a/tools/perf/util/parse-events.l
> +++ b/tools/perf/util/parse-events.l
> @@ -353,6 +353,8 @@ alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_AL
> emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
> dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
> duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
> +user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); }
> +system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); }
> bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
> cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
>
> --
> 2.36.0
>

2022-04-27 10:45:13

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCHSET v4 next 0/3] perf stat: add user_time and system_time tool events

On Tue, Apr 26, 2022 at 1:58 AM Florian Fischer
<[email protected]> wrote:
>
> On 25.04.2022 12:06, Namhyung Kim wrote:
> > On Sat, Apr 23, 2022 at 5:16 AM Florian Fischer
> > <[email protected]> wrote:
> > >
> > > On 22.04.2022 16:52, Namhyung Kim wrote:
> > > > Hello,
> > > >
> > > > On Fri, Apr 22, 2022 at 3:05 PM Arnaldo Carvalho de Melo
> > > > <[email protected]> wrote:
> > > > >
> > > > > Em Wed, Apr 20, 2022 at 12:23:51PM +0200, Florian Fischer escreveu:
> > > > > > This patch series adds new internal events to perf stat exposing the times spend
> > > > > > in user and kernel mode in nanoseconds reported by rusage.
> > > > > >
> > > > > > During some benchmarking using perf it bothered me that I could not easily
> > > > > > retrieve those times from perf stat when using the machine readable output.
> > > > > >
> > > > > > But perf definitely knows about those values because in the human readable output
> > > > > > they are present.
> > > > > >
> > > > > > Therefore I exposed the times reported by rusage via the new tool events:
> > > > > > user_time and system_time.
> > > > > >
> > > > > > This allows to retrieved them in machine-readable output:
> > > > > >
> > > > > > $ ./perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time tools/perf
> > > > > > 72134524,ns,duration_time:u,72134524,100.00,,
> > > > > > 65225000,ns,user_time:u,65225000,100.00,,
> > > > > > 6865000,ns,ssystem_time:u,6865000,100.00,,
> >
> > > > Anyway it looks a little bit strange to me if we can get
> > > > system time in user mode only (the 'u' modifier).
> > >
> > > Sorry but I don't really understand what you mean.
> > > The system_time is reported to userspace via rusage filled by wait4(2).
> > > It will always report the value reported to the user space regardless of what
> > > counters perf has access to.
> > >
> > > If you run perf as user you get the same system_time (but with the ':u' suffix)
> > > as when you run perf as root or lower kernel.perf_event_paranoid to allow access
> > > to more counters.
> >
> > The ':u' modifier means that the event should count only in user mode.
> > So I think system_time:u should be 0 by definition.
> > Likewise, user_time:k should be handled as such.
>
> Not sure if this logic applies to tool events because the time spend in user mode
> or kernel mdoe stays the same regardless of where you count it.
>
> This is consistent with how duration_time works currently in perf stat.
>
> ./perf stat -e duration_time,duration_time:u,duration_time:k -- grep -q -r duration_time .
>
> Performance counter stats for 'grep -q -r duration_time .':
>
> 29,012,558 ns duration_time
> 29,012,558 ns duration_time:u
> 29,012,558 ns duration_time:k
>
> 0.029012558 seconds time elapsed
>
> 0.029088000 seconds user
> 0.000000000 seconds sys
>
> >
> > But as I said before, we already have the task-clock event, so it's not
> > clear why we need this too. Also these tool events won't work for
> > other use cases like perf record.
>
> Maybe I do not know enough about perf but I think this is not exactly how those
> suffixes work. This is the output of perf of next-20220420 and my rusage tool
> event patches applied:
>
> $ sysctl kernel.perf_event_paranoid
> kernel.perf_event_paranoid = -1
>
> linux/tools/perf$ ./perf stat -e task-clock,task-clock:u,task-clock:k,\
> duration_time,duration_time:u,duration_time:k,\
> system_time,system_time:u,system_time:k,\
> user_time,user_time:u,user_time:k -- grep -q -r duration_time .
>
> Performance counter stats for 'grep -q -r duration_time .':
>
> 42.30 msec task-clock # 0.483 CPUs utilized
> 42.30 msec task-clock:u # 0.483 CPUs utilized
> 42.30 msec task-clock:k # 0.483 CPUs utilized
> 87,533,340 ns duration_time # 2.069 G/sec
> 87,533,340 ns duration_time:u # 2.069 G/sec
> 87,533,340 ns duration_time:k # 2.069 G/sec
> 12,841,000 ns system_time # 303.547 M/sec
> 12,841,000 ns system_time:u # 303.547 M/sec
> 12,841,000 ns system_time:k # 303.547 M/sec
> 30,820,000 ns user_time # 728.550 M/sec
> 30,820,000 ns user_time:u # 728.550 M/sec
> 30,820,000 ns user_time:k # 728.550 M/sec
>
> 0.087533340 seconds time elapsed
>
> 0.030820000 seconds user
> 0.012841000 seconds sys
>
>
> So task-clock regardless of its ':u' or ':k' suffix shows approximately the wall
> clock time.

Ok, now I recall that these software events don't work with modifiers well.
Maybe we should reject or ignore setting modifiers on them. But that's
a different work, of course.

>
> But for my experiments on a shared system where kernel.perf_event_paranoid is
> not set to -1 and I feel uncomfortable to test research prototypes as root I
> would like a simple and fast way to tell where my program spends its time.
>
> This can be achieved by using user_time and system_time.
> Which are reported in the human readable output anyway.
>
> Maybe I am doing something wrong here but as shown above task-clock does not
> provide the information I am interested in.

No, it's just me making this noise. Thank you for taking the time to take
a look at the issue.

Thanks,
Namhyung

2022-04-27 11:32:47

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v4 1/3] perf stat: introduce stats for the user and system rusage times

On Wed, Apr 20, 2022 at 3:24 AM Florian Fischer
<[email protected]> wrote:
>
> This is preparation for exporting rusage values as tool events.
>
> Add new global stats tracking the values obtained via rusage.
> For now only ru_utime and ru_stime are part of the tracked stats.
> Both are stored as nanoseconds to be consistent with 'duration_time',
> although the finest resolution the struct timeval data in rusage
> provides are microseconds.
>
> Signed-off-by: Florian Fischer <[email protected]>
> ---
> tools/perf/builtin-stat.c | 5 ++++-
> tools/perf/util/stat-shadow.c | 2 ++
> tools/perf/util/stat.h | 21 +++++++++++++++++++++
> 3 files changed, 27 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index a96f106dc93a..61faffb535f5 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -216,6 +216,7 @@ static struct perf_stat_config stat_config = {
> .run_count = 1,
> .metric_only_len = METRIC_ONLY_LEN,
> .walltime_nsecs_stats = &walltime_nsecs_stats,
> + .ru_stats = &ru_stats,
> .big_num = true,
> .ctl_fd = -1,
> .ctl_fd_ack = -1,
> @@ -1010,8 +1011,10 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
> evlist__reset_prev_raw_counts(evsel_list);
> runtime_stat_reset(&stat_config);
> perf_stat__reset_shadow_per_stat(&rt_stat);
> - } else
> + } else {
> update_stats(&walltime_nsecs_stats, t1 - t0);
> + update_rusage_stats(&ru_stats, &stat_config.ru_data);
> + }
>
> /*
> * Closing a group leader splits the group, and as we only disable
> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> index 10af7804e482..ea4c35e4f1da 100644
> --- a/tools/perf/util/stat-shadow.c
> +++ b/tools/perf/util/stat-shadow.c
> @@ -26,6 +26,7 @@
>
> struct runtime_stat rt_stat;
> struct stats walltime_nsecs_stats;
> +struct rusage_stats ru_stats;
>
> struct saved_value {
> struct rb_node rb_node;
> @@ -199,6 +200,7 @@ void perf_stat__reset_shadow_stats(void)
> {
> reset_stat(&rt_stat);
> memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
> + memset(&ru_stats, 0, sizeof(ru_stats));
> }
>
> void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
> index 335d19cc3063..e31c94d952e9 100644
> --- a/tools/perf/util/stat.h
> +++ b/tools/perf/util/stat.h
> @@ -108,6 +108,11 @@ struct runtime_stat {
> struct rblist value_list;
> };
>
> +struct rusage_stats {
> + struct stats ru_utime_usec_stat;
> + struct stats ru_stime_usec_stat;
> +};
> +
> typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu);
>
> struct perf_stat_config {
> @@ -148,6 +153,7 @@ struct perf_stat_config {
> const char *csv_sep;
> struct stats *walltime_nsecs_stats;
> struct rusage ru_data;
> + struct rusage_stats *ru_stats;
> struct cpu_aggr_map *aggr_map;
> aggr_get_id_t aggr_get_id;
> struct cpu_aggr_map *cpus_aggr_map;
> @@ -177,6 +183,20 @@ static inline void init_stats(struct stats *stats)
> stats->max = 0;
> }
>
> +static inline void init_rusage_stats(struct rusage_stats *ru_stats) {
> + init_stats(&ru_stats->ru_utime_usec_stat);
> + init_stats(&ru_stats->ru_stime_usec_stat);
> +}

Is there a missing call to this function?

Thanks,
Ian

> +static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) {
> + const u64 us_to_ns = 1000;
> + const u64 s_to_ns = 1000000000;
> + update_stats(&ru_stats->ru_utime_usec_stat,
> + (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns));
> + update_stats(&ru_stats->ru_stime_usec_stat,
> + (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns));
> +}
> +
> struct evsel;
> struct evlist;
>
> @@ -196,6 +216,7 @@ bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id);
>
> extern struct runtime_stat rt_stat;
> extern struct stats walltime_nsecs_stats;
> +extern struct rusage_stats ru_stats;
>
> typedef void (*print_metric_t)(struct perf_stat_config *config,
> void *ctx, const char *color, const char *unit,
> --
> 2.36.0
>