2024-02-02 02:25:38

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 1/3] perf stat: Pass fewer metric arguments

Pass metric_expr and evsel rather than specific variables from the
struct, thereby reducing the number of arguments. This will enable
later fixes.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/stat-shadow.c | 75 +++++++++++++++--------------------
1 file changed, 33 insertions(+), 42 deletions(-)

diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index e31426167852..f6c9d2f98835 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -355,23 +355,22 @@ static void print_nsecs(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
}

-static int prepare_metric(struct evsel **metric_events,
- struct metric_ref *metric_refs,
+static int prepare_metric(const struct metric_expr *mexp,
struct expr_parse_ctx *pctx,
int aggr_idx)
{
int i;

- for (i = 0; metric_events[i]; i++) {
+ for (i = 0; mexp->metric_events[i]; i++) {
char *n;
double val;
int source_count = 0;

- if (evsel__is_tool(metric_events[i])) {
+ if (evsel__is_tool(mexp->metric_events[i])) {
struct stats *stats;
double scale;

- switch (metric_events[i]->tool_event) {
+ switch (mexp->metric_events[i]->tool_event) {
case PERF_TOOL_DURATION_TIME:
stats = &walltime_nsecs_stats;
scale = 1e-9;
@@ -391,19 +390,20 @@ static int prepare_metric(struct evsel **metric_events,
pr_err("Invalid tool event 'max'");
abort();
default:
- pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
+ pr_err("Unknown tool event '%s'",
+ evsel__name(mexp->metric_events[i]));
abort();
}
val = avg_stats(stats) * scale;
source_count = 1;
} else {
- struct perf_stat_evsel *ps = metric_events[i]->stats;
+ struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];

if (!aggr)
break;

- if (!metric_events[i]->supported) {
+ if (!mexp->metric_events[i]->supported) {
/*
* Not supported events will have a count of 0,
* which can be confusing in a
@@ -419,19 +419,19 @@ static int prepare_metric(struct evsel **metric_events,
* reverse the scale before computing the
* metric.
*/
- val = aggr->counts.val * (1.0 / metric_events[i]->scale);
- source_count = evsel__source_count(metric_events[i]);
+ val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
+ source_count = evsel__source_count(mexp->metric_events[i]);
}
}
- n = strdup(evsel__metric_id(metric_events[i]));
+ n = strdup(evsel__metric_id(mexp->metric_events[i]));
if (!n)
return -ENOMEM;

expr__add_id_val_source_count(pctx, n, val, source_count);
}

- for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) {
- int ret = expr__add_ref(pctx, &metric_refs[j]);
+ for (int j = 0; mexp->metric_refs && mexp->metric_refs[j].metric_name; j++) {
+ int ret = expr__add_ref(pctx, &mexp->metric_refs[j]);

if (ret)
return ret;
@@ -441,14 +441,8 @@ static int prepare_metric(struct evsel **metric_events,
}

static void generic_metric(struct perf_stat_config *config,
- const char *metric_expr,
- const char *metric_threshold,
- struct evsel **metric_events,
- struct metric_ref *metric_refs,
- char *name,
- const char *metric_name,
- const char *metric_unit,
- int runtime,
+ struct metric_expr *mexp,
+ struct evsel *evsel,
int aggr_idx,
struct perf_stat_output_ctx *out)
{
@@ -465,55 +459,55 @@ static void generic_metric(struct perf_stat_config *config,

if (config->user_requested_cpu_list)
pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
- pctx->sctx.runtime = runtime;
+ pctx->sctx.runtime = mexp->runtime;
pctx->sctx.system_wide = config->system_wide;
- i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
+ i = prepare_metric(mexp, pctx, aggr_idx);
if (i < 0) {
expr__ctx_free(pctx);
return;
}
- if (!metric_events[i]) {
- if (expr__parse(&ratio, pctx, metric_expr) == 0) {
+ if (!mexp->metric_events[i]) {
+ if (expr__parse(&ratio, pctx, mexp->metric_expr) == 0) {
char *unit;
char metric_bf[64];

- if (metric_threshold &&
- expr__parse(&threshold, pctx, metric_threshold) == 0 &&
+ if (mexp->metric_threshold &&
+ expr__parse(&threshold, pctx, mexp->metric_threshold) == 0 &&
!isnan(threshold)) {
color = fpclassify(threshold) == FP_ZERO
? PERF_COLOR_GREEN : PERF_COLOR_RED;
}

- if (metric_unit && metric_name) {
- if (perf_pmu__convert_scale(metric_unit,
+ if (mexp->metric_unit && mexp->metric_name) {
+ if (perf_pmu__convert_scale(mexp->metric_unit,
&unit, &scale) >= 0) {
ratio *= scale;
}
- if (strstr(metric_expr, "?"))
+ if (strstr(mexp->metric_expr, "?"))
scnprintf(metric_bf, sizeof(metric_bf),
- "%s %s_%d", unit, metric_name, runtime);
+ "%s %s_%d", unit, mexp->metric_name, mexp->runtime);
else
scnprintf(metric_bf, sizeof(metric_bf),
- "%s %s", unit, metric_name);
+ "%s %s", unit, mexp->metric_name);

print_metric(config, ctxp, color, "%8.1f",
metric_bf, ratio);
} else {
print_metric(config, ctxp, color, "%8.2f",
- metric_name ?
- metric_name :
- out->force_header ? name : "",
+ mexp->metric_name ?
+ mexp->metric_name :
+ out->force_header ? evsel->name : "",
ratio);
}
} else {
print_metric(config, ctxp, color, /*unit=*/NULL,
out->force_header ?
- (metric_name ? metric_name : name) : "", 0);
+ (mexp->metric_name ?: evsel->name) : "", 0);
}
} else {
print_metric(config, ctxp, color, /*unit=*/NULL,
out->force_header ?
- (metric_name ? metric_name : name) : "", 0);
+ (mexp->metric_name ?: evsel->name) : "", 0);
}

expr__ctx_free(pctx);
@@ -528,7 +522,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
if (!pctx)
return NAN;

- if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
+ if (prepare_metric(mexp, pctx, aggr_idx) < 0)
goto out;

if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -630,10 +624,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,

if ((*num)++ > 0)
out->new_line(config, ctxp);
- generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
- mexp->metric_events, mexp->metric_refs, evsel->name,
- mexp->metric_name, mexp->metric_unit, mexp->runtime,
- aggr_idx, out);
+ generic_metric(config, mexp, evsel, aggr_idx, out);
}

return NULL;
--
2.43.0.594.gd9cf4e227d-goog



2024-02-02 02:25:51

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually

When merging counts from multiple uncore PMUs the metric is only
computed for the metric leader. When merging/aggregation is disabled,
prior to this patch just the leader's metric would be computed. Fix
this by computing the metric for each PMU.

On a SkylakeX:
Before:
```
$ perf stat -A -M memory_bandwidth_total -a sleep 1

Performance counter stats for 'system wide':

CPU0 82,217 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.2 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total
CPU0 61,395 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1]
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1]
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU0 81,570 UNC_M_CAS_COUNT.RD [uncore_imc_2]
CPU18 113,886 UNC_M_CAS_COUNT.RD [uncore_imc_2]
CPU0 62,330 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU18 66,942 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU0 75,489 UNC_M_CAS_COUNT.RD [uncore_imc_3]
CPU18 27,958 UNC_M_CAS_COUNT.RD [uncore_imc_3]
CPU0 55,864 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU18 38,727 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4]
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4]
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU0 75,423 UNC_M_CAS_COUNT.RD [uncore_imc_5]
CPU18 104,527 UNC_M_CAS_COUNT.RD [uncore_imc_5]
CPU0 57,596 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU18 56,777 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU0 1,003,440,851 ns duration_time

1.003440851 seconds time elapsed
```

After:
```
$ perf stat -A -M memory_bandwidth_total -a sleep 1

Performance counter stats for 'system wide':

CPU0 88,968 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.5 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total
CPU0 59,498 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU0 88,635 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 9.5 MB/s memory_bandwidth_total
CPU18 117,975 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 11.5 MB/s memory_bandwidth_total
CPU0 60,829 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU18 62,105 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU0 82,238 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 8.7 MB/s memory_bandwidth_total
CPU18 22,906 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 3.6 MB/s memory_bandwidth_total
CPU0 53,959 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU18 32,990 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU0 83,595 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 8.9 MB/s memory_bandwidth_total
CPU18 110,151 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 10.5 MB/s memory_bandwidth_total
CPU0 56,540 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU18 53,816 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU0 1,003,353,416 ns duration_time
```

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/metricgroup.c | 2 ++
tools/perf/util/stat-shadow.c | 31 +++++++++++++++++++++++++++----
2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index ca3e0404f187..c33ffee837ca 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
if (!metric_events)
return NULL;

+ if (evsel->metric_leader)
+ me.evsel = evsel->metric_leader;
nd = rblist__find(metric_events, &me);
if (nd)
return container_of(nd, struct metric_event, nd);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index f6c9d2f98835..1be23b0eee2f 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config,
}

static int prepare_metric(const struct metric_expr *mexp,
+ const struct evsel *evsel,
struct expr_parse_ctx *pctx,
int aggr_idx)
{
@@ -398,8 +399,29 @@ static int prepare_metric(const struct metric_expr *mexp,
source_count = 1;
} else {
struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
- struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
+ struct perf_stat_aggr *aggr;

+ /*
+ * If there are multiple uncore PMUs and we're not
+ * reading the leader's stats, determine the stats for
+ * the appropriate uncore PMU.
+ */
+ if (evsel && evsel->metric_leader &&
+ evsel->pmu != evsel->metric_leader->pmu &&
+ mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
+ struct evsel *pos;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (pos->pmu != evsel->pmu)
+ continue;
+ if (pos->metric_leader != mexp->metric_events[i])
+ continue;
+ ps = pos->stats;
+ source_count = 1;
+ break;
+ }
+ }
+ aggr = &ps->aggr[aggr_idx];
if (!aggr)
break;

@@ -420,7 +442,8 @@ static int prepare_metric(const struct metric_expr *mexp,
* metric.
*/
val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
- source_count = evsel__source_count(mexp->metric_events[i]);
+ if (!source_count)
+ source_count = evsel__source_count(mexp->metric_events[i]);
}
}
n = strdup(evsel__metric_id(mexp->metric_events[i]));
@@ -461,7 +484,7 @@ static void generic_metric(struct perf_stat_config *config,
pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
pctx->sctx.runtime = mexp->runtime;
pctx->sctx.system_wide = config->system_wide;
- i = prepare_metric(mexp, pctx, aggr_idx);
+ i = prepare_metric(mexp, evsel, pctx, aggr_idx);
if (i < 0) {
expr__ctx_free(pctx);
return;
@@ -522,7 +545,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
if (!pctx)
return NAN;

- if (prepare_metric(mexp, pctx, aggr_idx) < 0)
+ if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
goto out;

if (expr__parse(&ratio, pctx, mexp->metric_expr))
--
2.43.0.594.gd9cf4e227d-goog


2024-02-02 02:27:49

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 3/3] perf stat: Fix metric-only aggregation index

Aggregation index was being computed using the evsel's cpumap which
may have a different (typically the same or fewer) entries.

Before:
```
$ perf stat --metric-only -A -M memory_bandwidth_total -a sleep 1

Performance counter stats for 'system wide':

MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total
CPU0 12.8 0.0 12.9 12.7 0.0 12.6
CPU1

1.007806367 seconds time elapsed
```

After:
```
$ perf stat --metric-only -A -M memory_bandwidth_total -a sleep 1

Performance counter stats for 'system wide':

MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total
CPU0 15.4 0.0 15.3 15.0 0.0 14.9
CPU18 0.0 0.0 13.5 5.2 0.0 11.9

1.007858736 seconds time elapsed
```

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/stat-display.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 8c61f8627ebc..ce830c6afdf2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1126,11 +1126,16 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
u64 ena, run, val;
double uval;
struct perf_stat_evsel *ps = counter->stats;
- int aggr_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
+ int aggr_idx = 0;

- if (aggr_idx < 0)
+ if (!perf_cpu_map__has(evsel__cpus(counter), cpu))
continue;

+ cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+ if (config->aggr_map->map[aggr_idx].cpu.cpu == cpu.cpu)
+ break;
+ }
+
os->evsel = counter;
os->id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
if (first) {
--
2.43.0.594.gd9cf4e227d-goog


2024-02-06 01:59:34

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v1 1/3] perf stat: Pass fewer metric arguments

On Thu, Feb 1, 2024 at 6:25 PM Ian Rogers <[email protected]> wrote:
>
> Pass metric_expr and evsel rather than specific variables from the
> struct, thereby reducing the number of arguments. This will enable
> later fixes.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/perf/util/stat-shadow.c | 75 +++++++++++++++--------------------
> 1 file changed, 33 insertions(+), 42 deletions(-)
>
> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> index e31426167852..f6c9d2f98835 100644
> --- a/tools/perf/util/stat-shadow.c
> +++ b/tools/perf/util/stat-shadow.c
> @@ -355,23 +355,22 @@ static void print_nsecs(struct perf_stat_config *config,
> print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
> }
>
> -static int prepare_metric(struct evsel **metric_events,
> - struct metric_ref *metric_refs,
> +static int prepare_metric(const struct metric_expr *mexp,
> struct expr_parse_ctx *pctx,
> int aggr_idx)
> {
> int i;

You may add local variables with the same name to reduce
the amount of diff.

Thanks,
Namhyung

>
> - for (i = 0; metric_events[i]; i++) {
> + for (i = 0; mexp->metric_events[i]; i++) {
> char *n;
> double val;
> int source_count = 0;
>
> - if (evsel__is_tool(metric_events[i])) {
> + if (evsel__is_tool(mexp->metric_events[i])) {
> struct stats *stats;
> double scale;
>
> - switch (metric_events[i]->tool_event) {
> + switch (mexp->metric_events[i]->tool_event) {
> case PERF_TOOL_DURATION_TIME:
> stats = &walltime_nsecs_stats;
> scale = 1e-9;
> @@ -391,19 +390,20 @@ static int prepare_metric(struct evsel **metric_events,
> pr_err("Invalid tool event 'max'");
> abort();
> default:
> - pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
> + pr_err("Unknown tool event '%s'",
> + evsel__name(mexp->metric_events[i]));
> abort();
> }
> val = avg_stats(stats) * scale;
> source_count = 1;
> } else {
> - struct perf_stat_evsel *ps = metric_events[i]->stats;
> + struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
> struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
>
> if (!aggr)
> break;
>
> - if (!metric_events[i]->supported) {
> + if (!mexp->metric_events[i]->supported) {
> /*
> * Not supported events will have a count of 0,
> * which can be confusing in a
> @@ -419,19 +419,19 @@ static int prepare_metric(struct evsel **metric_events,
> * reverse the scale before computing the
> * metric.
> */
> - val = aggr->counts.val * (1.0 / metric_events[i]->scale);
> - source_count = evsel__source_count(metric_events[i]);
> + val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
> + source_count = evsel__source_count(mexp->metric_events[i]);
> }
> }
> - n = strdup(evsel__metric_id(metric_events[i]));
> + n = strdup(evsel__metric_id(mexp->metric_events[i]));
> if (!n)
> return -ENOMEM;
>
> expr__add_id_val_source_count(pctx, n, val, source_count);
> }
>
> - for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) {
> - int ret = expr__add_ref(pctx, &metric_refs[j]);
> + for (int j = 0; mexp->metric_refs && mexp->metric_refs[j].metric_name; j++) {
> + int ret = expr__add_ref(pctx, &mexp->metric_refs[j]);
>
> if (ret)
> return ret;
> @@ -441,14 +441,8 @@ static int prepare_metric(struct evsel **metric_events,
> }
>
> static void generic_metric(struct perf_stat_config *config,
> - const char *metric_expr,
> - const char *metric_threshold,
> - struct evsel **metric_events,
> - struct metric_ref *metric_refs,
> - char *name,
> - const char *metric_name,
> - const char *metric_unit,
> - int runtime,
> + struct metric_expr *mexp,
> + struct evsel *evsel,
> int aggr_idx,
> struct perf_stat_output_ctx *out)
> {
> @@ -465,55 +459,55 @@ static void generic_metric(struct perf_stat_config *config,
>
> if (config->user_requested_cpu_list)
> pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
> - pctx->sctx.runtime = runtime;
> + pctx->sctx.runtime = mexp->runtime;
> pctx->sctx.system_wide = config->system_wide;
> - i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
> + i = prepare_metric(mexp, pctx, aggr_idx);
> if (i < 0) {
> expr__ctx_free(pctx);
> return;
> }
> - if (!metric_events[i]) {
> - if (expr__parse(&ratio, pctx, metric_expr) == 0) {
> + if (!mexp->metric_events[i]) {
> + if (expr__parse(&ratio, pctx, mexp->metric_expr) == 0) {
> char *unit;
> char metric_bf[64];
>
> - if (metric_threshold &&
> - expr__parse(&threshold, pctx, metric_threshold) == 0 &&
> + if (mexp->metric_threshold &&
> + expr__parse(&threshold, pctx, mexp->metric_threshold) == 0 &&
> !isnan(threshold)) {
> color = fpclassify(threshold) == FP_ZERO
> ? PERF_COLOR_GREEN : PERF_COLOR_RED;
> }
>
> - if (metric_unit && metric_name) {
> - if (perf_pmu__convert_scale(metric_unit,
> + if (mexp->metric_unit && mexp->metric_name) {
> + if (perf_pmu__convert_scale(mexp->metric_unit,
> &unit, &scale) >= 0) {
> ratio *= scale;
> }
> - if (strstr(metric_expr, "?"))
> + if (strstr(mexp->metric_expr, "?"))
> scnprintf(metric_bf, sizeof(metric_bf),
> - "%s %s_%d", unit, metric_name, runtime);
> + "%s %s_%d", unit, mexp->metric_name, mexp->runtime);
> else
> scnprintf(metric_bf, sizeof(metric_bf),
> - "%s %s", unit, metric_name);
> + "%s %s", unit, mexp->metric_name);
>
> print_metric(config, ctxp, color, "%8.1f",
> metric_bf, ratio);
> } else {
> print_metric(config, ctxp, color, "%8.2f",
> - metric_name ?
> - metric_name :
> - out->force_header ? name : "",
> + mexp->metric_name ?
> + mexp->metric_name :
> + out->force_header ? evsel->name : "",
> ratio);
> }
> } else {
> print_metric(config, ctxp, color, /*unit=*/NULL,
> out->force_header ?
> - (metric_name ? metric_name : name) : "", 0);
> + (mexp->metric_name ?: evsel->name) : "", 0);
> }
> } else {
> print_metric(config, ctxp, color, /*unit=*/NULL,
> out->force_header ?
> - (metric_name ? metric_name : name) : "", 0);
> + (mexp->metric_name ?: evsel->name) : "", 0);
> }
>
> expr__ctx_free(pctx);
> @@ -528,7 +522,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
> if (!pctx)
> return NAN;
>
> - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
> + if (prepare_metric(mexp, pctx, aggr_idx) < 0)
> goto out;
>
> if (expr__parse(&ratio, pctx, mexp->metric_expr))
> @@ -630,10 +624,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
>
> if ((*num)++ > 0)
> out->new_line(config, ctxp);
> - generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
> - mexp->metric_events, mexp->metric_refs, evsel->name,
> - mexp->metric_name, mexp->metric_unit, mexp->runtime,
> - aggr_idx, out);
> + generic_metric(config, mexp, evsel, aggr_idx, out);
> }
>
> return NULL;
> --
> 2.43.0.594.gd9cf4e227d-goog
>

2024-02-06 02:03:10

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually

On Thu, Feb 1, 2024 at 6:25 PM Ian Rogers <[email protected]> wrote:
>
> When merging counts from multiple uncore PMUs the metric is only
> computed for the metric leader. When merging/aggregation is disabled,
> prior to this patch just the leader's metric would be computed. Fix
> this by computing the metric for each PMU.
>
> On a SkylakeX:
> Before:
> ```
> $ perf stat -A -M memory_bandwidth_total -a sleep 1
>
> Performance counter stats for 'system wide':
>
> CPU0 82,217 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 92 MB/s memory_bandwidth_total
> CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 00 MB/s memory_bandwidth_total
> CPU0 61,395 UNC_M_CAS_COUNT.WR [uncore_imc_0]
> CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0]
> CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1]
> CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1]
> CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
> CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
> CPU0 81,570 UNC_M_CAS_COUNT.RD [uncore_imc_2]
> CPU18 113,886 UNC_M_CAS_COUNT.RD [uncore_imc_2]
> CPU0 62,330 UNC_M_CAS_COUNT.WR [uncore_imc_2]
> CPU18 66,942 UNC_M_CAS_COUNT.WR [uncore_imc_2]
> CPU0 75,489 UNC_M_CAS_COUNT.RD [uncore_imc_3]
> CPU18 27,958 UNC_M_CAS_COUNT.RD [uncore_imc_3]
> CPU0 55,864 UNC_M_CAS_COUNT.WR [uncore_imc_3]
> CPU18 38,727 UNC_M_CAS_COUNT.WR [uncore_imc_3]
> CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4]
> CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4]
> CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
> CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
> CPU0 75,423 UNC_M_CAS_COUNT.RD [uncore_imc_5]
> CPU18 104,527 UNC_M_CAS_COUNT.RD [uncore_imc_5]
> CPU0 57,596 UNC_M_CAS_COUNT.WR [uncore_imc_5]
> CPU18 56,777 UNC_M_CAS_COUNT.WR [uncore_imc_5]
> CPU0 1,003,440,851 ns duration_time
>
> 1.003440851 seconds time elapsed
> ```
>
> After:
> ```
> $ perf stat -A -M memory_bandwidth_total -a sleep 1
>
> Performance counter stats for 'system wide':
>
> CPU0 88,968 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 95 MB/s memory_bandwidth_total
> CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 00 MB/s memory_bandwidth_total
> CPU0 59,498 UNC_M_CAS_COUNT.WR [uncore_imc_0]
> CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0]
> CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 00 MB/s memory_bandwidth_total
> CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 00 MB/s memory_bandwidth_total
> CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
> CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
> CPU0 88,635 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 95 MB/s memory_bandwidth_total
> CPU18 117,975 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 115 MB/s memory_bandwidth_total
> CPU0 60,829 UNC_M_CAS_COUNT.WR [uncore_imc_2]
> CPU18 62,105 UNC_M_CAS_COUNT.WR [uncore_imc_2]
> CPU0 82,238 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 87 MB/s memory_bandwidth_total
> CPU18 22,906 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 36 MB/s memory_bandwidth_total
> CPU0 53,959 UNC_M_CAS_COUNT.WR [uncore_imc_3]
> CPU18 32,990 UNC_M_CAS_COUNT.WR [uncore_imc_3]
> CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 00 MB/s memory_bandwidth_total
> CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 00 MB/s memory_bandwidth_total
> CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
> CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
> CPU0 83,595 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 89 MB/s memory_bandwidth_total
> CPU18 110,151 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 105 MB/s memory_bandwidth_total
> CPU0 56,540 UNC_M_CAS_COUNT.WR [uncore_imc_5]
> CPU18 53,816 UNC_M_CAS_COUNT.WR [uncore_imc_5]
> CPU0 1,003,353,416 ns duration_time
> ```
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/perf/util/metricgroup.c | 2 ++
> tools/perf/util/stat-shadow.c | 31 +++++++++++++++++++++++++++----
> 2 files changed, 29 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index ca3e0404f187..c33ffee837ca 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
> if (!metric_events)
> return NULL;
>
> + if (evsel->metric_leader)
> + me.evsel = evsel->metric_leader;
> nd = rblist__find(metric_events, &me);
> if (nd)
> return container_of(nd, struct metric_event, nd);
> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> index f6c9d2f98835..1be23b0eee2f 100644
> --- a/tools/perf/util/stat-shadow.c
> +++ b/tools/perf/util/stat-shadow.c
> @@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config,
> }
>
> static int prepare_metric(const struct metric_expr *mexp,
> + const struct evsel *evsel,
> struct expr_parse_ctx *pctx,
> int aggr_idx)
> {
> @@ -398,8 +399,29 @@ static int prepare_metric(const struct metric_expr *mexp,
> source_count = 1;
> } else {
> struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
> - struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
> + struct perf_stat_aggr *aggr;
>
> + /*
> + * If there are multiple uncore PMUs and we're not
> + * reading the leader's stats, determine the stats for
> + * the appropriate uncore PMU.
> + */
> + if (evsel && evsel->metric_leader &&
> + evsel->pmu != evsel->metric_leader->pmu &&
> + mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {

Is it just to check we're in --no-aggr (or --no-merge)?
Then it'd be simpler to use stat_config->aggr_mode.

Thanks,
Namhyung


> + struct evsel *pos;
> +
> + evlist__for_each_entry(evsel->evlist, pos) {
> + if (pos->pmu != evsel->pmu)
> + continue;
> + if (pos->metric_leader != mexp->metric_events[i])
> + continue;
> + ps = pos->stats;
> + source_count = 1;
> + break;
> + }
> + }
> + aggr = &ps->aggr[aggr_idx];
> if (!aggr)
> break;
>
> @@ -420,7 +442,8 @@ static int prepare_metric(const struct metric_expr *mexp,
> * metric.
> */
> val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
> - source_count = evsel__source_count(mexp->metric_events[i]);
> + if (!source_count)
> + source_count = evsel__source_count(mexp->metric_events[i]);
> }
> }
> n = strdup(evsel__metric_id(mexp->metric_events[i]));
> @@ -461,7 +484,7 @@ static void generic_metric(struct perf_stat_config *config,
> pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
> pctx->sctx.runtime = mexp->runtime;
> pctx->sctx.system_wide = config->system_wide;
> - i = prepare_metric(mexp, pctx, aggr_idx);
> + i = prepare_metric(mexp, evsel, pctx, aggr_idx);
> if (i < 0) {
> expr__ctx_free(pctx);
> return;
> @@ -522,7 +545,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
> if (!pctx)
> return NAN;
>
> - if (prepare_metric(mexp, pctx, aggr_idx) < 0)
> + if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
> goto out;
>
> if (expr__parse(&ratio, pctx, mexp->metric_expr))
> --
> 2.43.0.594.gd9cf4e227d-goog
>

2024-02-06 02:21:24

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually

On Mon, Feb 5, 2024 at 6:02 PM Namhyung Kim <[email protected]> wrote:
>
> On Thu, Feb 1, 2024 at 6:25 PM Ian Rogers <[email protected]> wrote:
> >
> > When merging counts from multiple uncore PMUs the metric is only
> > computed for the metric leader. When merging/aggregation is disabled,
> > prior to this patch just the leader's metric would be computed. Fix
> > this by computing the metric for each PMU.
> >
> > On a SkylakeX:
> > Before:
> > ```
> > $ perf stat -A -M memory_bandwidth_total -a sleep 1
> >
> > Performance counter stats for 'system wide':
> >
> > CPU0 82,217 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.2 MB/s memory_bandwidth_total
> > CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total
> > CPU0 61,395 UNC_M_CAS_COUNT.WR [uncore_imc_0]
> > CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0]
> > CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1]
> > CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1]
> > CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
> > CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
> > CPU0 81,570 UNC_M_CAS_COUNT.RD [uncore_imc_2]
> > CPU18 113,886 UNC_M_CAS_COUNT.RD [uncore_imc_2]
> > CPU0 62,330 UNC_M_CAS_COUNT.WR [uncore_imc_2]
> > CPU18 66,942 UNC_M_CAS_COUNT.WR [uncore_imc_2]
> > CPU0 75,489 UNC_M_CAS_COUNT.RD [uncore_imc_3]
> > CPU18 27,958 UNC_M_CAS_COUNT.RD [uncore_imc_3]
> > CPU0 55,864 UNC_M_CAS_COUNT.WR [uncore_imc_3]
> > CPU18 38,727 UNC_M_CAS_COUNT.WR [uncore_imc_3]
> > CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4]
> > CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4]
> > CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
> > CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
> > CPU0 75,423 UNC_M_CAS_COUNT.RD [uncore_imc_5]
> > CPU18 104,527 UNC_M_CAS_COUNT.RD [uncore_imc_5]
> > CPU0 57,596 UNC_M_CAS_COUNT.WR [uncore_imc_5]
> > CPU18 56,777 UNC_M_CAS_COUNT.WR [uncore_imc_5]
> > CPU0 1,003,440,851 ns duration_time
> >
> > 1.003440851 seconds time elapsed
> > ```
> >
> > After:
> > ```
> > $ perf stat -A -M memory_bandwidth_total -a sleep 1
> >
> > Performance counter stats for 'system wide':
> >
> > CPU0 88,968 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.5 MB/s memory_bandwidth_total
> > CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total
> > CPU0 59,498 UNC_M_CAS_COUNT.WR [uncore_imc_0]
> > CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0]
> > CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total
> > CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total
> > CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
> > CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
> > CPU0 88,635 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 9.5 MB/s memory_bandwidth_total
> > CPU18 117,975 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 11.5 MB/s memory_bandwidth_total
> > CPU0 60,829 UNC_M_CAS_COUNT.WR [uncore_imc_2]
> > CPU18 62,105 UNC_M_CAS_COUNT.WR [uncore_imc_2]
> > CPU0 82,238 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 8.7 MB/s memory_bandwidth_total
> > CPU18 22,906 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 3.6 MB/s memory_bandwidth_total
> > CPU0 53,959 UNC_M_CAS_COUNT.WR [uncore_imc_3]
> > CPU18 32,990 UNC_M_CAS_COUNT.WR [uncore_imc_3]
> > CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total
> > CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total
> > CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
> > CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
> > CPU0 83,595 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 8.9 MB/s memory_bandwidth_total
> > CPU18 110,151 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 10.5 MB/s memory_bandwidth_total
> > CPU0 56,540 UNC_M_CAS_COUNT.WR [uncore_imc_5]
> > CPU18 53,816 UNC_M_CAS_COUNT.WR [uncore_imc_5]
> > CPU0 1,003,353,416 ns duration_time
> > ```
> >
> > Signed-off-by: Ian Rogers <[email protected]>
> > ---
> > tools/perf/util/metricgroup.c | 2 ++
> > tools/perf/util/stat-shadow.c | 31 +++++++++++++++++++++++++++----
> > 2 files changed, 29 insertions(+), 4 deletions(-)
> >
> > diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> > index ca3e0404f187..c33ffee837ca 100644
> > --- a/tools/perf/util/metricgroup.c
> > +++ b/tools/perf/util/metricgroup.c
> > @@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
> > if (!metric_events)
> > return NULL;
> >
> > + if (evsel->metric_leader)
> > + me.evsel = evsel->metric_leader;
> > nd = rblist__find(metric_events, &me);
> > if (nd)
> > return container_of(nd, struct metric_event, nd);
> > diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> > index f6c9d2f98835..1be23b0eee2f 100644
> > --- a/tools/perf/util/stat-shadow.c
> > +++ b/tools/perf/util/stat-shadow.c
> > @@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config,
> > }
> >
> > static int prepare_metric(const struct metric_expr *mexp,
> > + const struct evsel *evsel,
> > struct expr_parse_ctx *pctx,
> > int aggr_idx)
> > {
> > @@ -398,8 +399,29 @@ static int prepare_metric(const struct metric_expr *mexp,
> > source_count = 1;
> > } else {
> > struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
> > - struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
> > + struct perf_stat_aggr *aggr;
> >
> > + /*
> > + * If there are multiple uncore PMUs and we're not
> > + * reading the leader's stats, determine the stats for
> > + * the appropriate uncore PMU.
> > + */
> > + if (evsel && evsel->metric_leader &&
> > + evsel->pmu != evsel->metric_leader->pmu &&
> > + mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
>
> Is it just to check we're in --no-aggr (or --no-merge)?
> Then it'd be simpler to use stat_config->aggr_mode.

For most metrics the events will be on the same PMU, but there is
nothing stopping mixing events from different PMUs (grouping can be
disabled). There may also be software and tool evsels.

Thanks,
Ian

> Thanks,
> Namhyung
>
>
> > + struct evsel *pos;
> > +
> > + evlist__for_each_entry(evsel->evlist, pos) {
> > + if (pos->pmu != evsel->pmu)
> > + continue;
> > + if (pos->metric_leader != mexp->metric_events[i])
> > + continue;
> > + ps = pos->stats;
> > + source_count = 1;
> > + break;
> > + }
> > + }
> > + aggr = &ps->aggr[aggr_idx];
> > if (!aggr)
> > break;
> >
> > @@ -420,7 +442,8 @@ static int prepare_metric(const struct metric_expr *mexp,
> > * metric.
> > */
> > val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
> > - source_count = evsel__source_count(mexp->metric_events[i]);
> > + if (!source_count)
> > + source_count = evsel__source_count(mexp->metric_events[i]);
> > }
> > }
> > n = strdup(evsel__metric_id(mexp->metric_events[i]));
> > @@ -461,7 +484,7 @@ static void generic_metric(struct perf_stat_config *config,
> > pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
> > pctx->sctx.runtime = mexp->runtime;
> > pctx->sctx.system_wide = config->system_wide;
> > - i = prepare_metric(mexp, pctx, aggr_idx);
> > + i = prepare_metric(mexp, evsel, pctx, aggr_idx);
> > if (i < 0) {
> > expr__ctx_free(pctx);
> > return;
> > @@ -522,7 +545,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
> > if (!pctx)
> > return NAN;
> >
> > - if (prepare_metric(mexp, pctx, aggr_idx) < 0)
> > + if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
> > goto out;
> >
> > if (expr__parse(&ratio, pctx, mexp->metric_expr))
> > --
> > 2.43.0.594.gd9cf4e227d-goog
> >

2024-02-06 02:24:16

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v1 1/3] perf stat: Pass fewer metric arguments

On Mon, Feb 5, 2024 at 5:59 PM Namhyung Kim <[email protected]> wrote:
>
> On Thu, Feb 1, 2024 at 6:25 PM Ian Rogers <[email protected]> wrote:
> >
> > Pass metric_expr and evsel rather than specific variables from the
> > struct, thereby reducing the number of arguments. This will enable
> > later fixes.
> >
> > Signed-off-by: Ian Rogers <[email protected]>
> > ---
> > tools/perf/util/stat-shadow.c | 75 +++++++++++++++--------------------
> > 1 file changed, 33 insertions(+), 42 deletions(-)
> >
> > diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> > index e31426167852..f6c9d2f98835 100644
> > --- a/tools/perf/util/stat-shadow.c
> > +++ b/tools/perf/util/stat-shadow.c
> > @@ -355,23 +355,22 @@ static void print_nsecs(struct perf_stat_config *config,
> > print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
> > }
> >
> > -static int prepare_metric(struct evsel **metric_events,
> > - struct metric_ref *metric_refs,
> > +static int prepare_metric(const struct metric_expr *mexp,
> > struct expr_parse_ctx *pctx,
> > int aggr_idx)
> > {
> > int i;
>
> You may add local variables with the same name to reduce
> the amount of diff.

I'll see what I can do in v2.

Thanks,
Ian

> Thanks,
> Namhyung
>
> >
> > - for (i = 0; metric_events[i]; i++) {
> > + for (i = 0; mexp->metric_events[i]; i++) {
> > char *n;
> > double val;
> > int source_count = 0;
> >
> > - if (evsel__is_tool(metric_events[i])) {
> > + if (evsel__is_tool(mexp->metric_events[i])) {
> > struct stats *stats;
> > double scale;
> >
> > - switch (metric_events[i]->tool_event) {
> > + switch (mexp->metric_events[i]->tool_event) {
> > case PERF_TOOL_DURATION_TIME:
> > stats = &walltime_nsecs_stats;
> > scale = 1e-9;
> > @@ -391,19 +390,20 @@ static int prepare_metric(struct evsel **metric_events,
> > pr_err("Invalid tool event 'max'");
> > abort();
> > default:
> > - pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
> > + pr_err("Unknown tool event '%s'",
> > + evsel__name(mexp->metric_events[i]));
> > abort();
> > }
> > val = avg_stats(stats) * scale;
> > source_count = 1;
> > } else {
> > - struct perf_stat_evsel *ps = metric_events[i]->stats;
> > + struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
> > struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
> >
> > if (!aggr)
> > break;
> >
> > - if (!metric_events[i]->supported) {
> > + if (!mexp->metric_events[i]->supported) {
> > /*
> > * Not supported events will have a count of 0,
> > * which can be confusing in a
> > @@ -419,19 +419,19 @@ static int prepare_metric(struct evsel **metric_events,
> > * reverse the scale before computing the
> > * metric.
> > */
> > - val = aggr->counts.val * (1.0 / metric_events[i]->scale);
> > - source_count = evsel__source_count(metric_events[i]);
> > + val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
> > + source_count = evsel__source_count(mexp->metric_events[i]);
> > }
> > }
> > - n = strdup(evsel__metric_id(metric_events[i]));
> > + n = strdup(evsel__metric_id(mexp->metric_events[i]));
> > if (!n)
> > return -ENOMEM;
> >
> > expr__add_id_val_source_count(pctx, n, val, source_count);
> > }
> >
> > - for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) {
> > - int ret = expr__add_ref(pctx, &metric_refs[j]);
> > + for (int j = 0; mexp->metric_refs && mexp->metric_refs[j].metric_name; j++) {
> > + int ret = expr__add_ref(pctx, &mexp->metric_refs[j]);
> >
> > if (ret)
> > return ret;
> > @@ -441,14 +441,8 @@ static int prepare_metric(struct evsel **metric_events,
> > }
> >
> > static void generic_metric(struct perf_stat_config *config,
> > - const char *metric_expr,
> > - const char *metric_threshold,
> > - struct evsel **metric_events,
> > - struct metric_ref *metric_refs,
> > - char *name,
> > - const char *metric_name,
> > - const char *metric_unit,
> > - int runtime,
> > + struct metric_expr *mexp,
> > + struct evsel *evsel,
> > int aggr_idx,
> > struct perf_stat_output_ctx *out)
> > {
> > @@ -465,55 +459,55 @@ static void generic_metric(struct perf_stat_config *config,
> >
> > if (config->user_requested_cpu_list)
> > pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
> > - pctx->sctx.runtime = runtime;
> > + pctx->sctx.runtime = mexp->runtime;
> > pctx->sctx.system_wide = config->system_wide;
> > - i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
> > + i = prepare_metric(mexp, pctx, aggr_idx);
> > if (i < 0) {
> > expr__ctx_free(pctx);
> > return;
> > }
> > - if (!metric_events[i]) {
> > - if (expr__parse(&ratio, pctx, metric_expr) == 0) {
> > + if (!mexp->metric_events[i]) {
> > + if (expr__parse(&ratio, pctx, mexp->metric_expr) == 0) {
> > char *unit;
> > char metric_bf[64];
> >
> > - if (metric_threshold &&
> > - expr__parse(&threshold, pctx, metric_threshold) == 0 &&
> > + if (mexp->metric_threshold &&
> > + expr__parse(&threshold, pctx, mexp->metric_threshold) == 0 &&
> > !isnan(threshold)) {
> > color = fpclassify(threshold) == FP_ZERO
> > ? PERF_COLOR_GREEN : PERF_COLOR_RED;
> > }
> >
> > - if (metric_unit && metric_name) {
> > - if (perf_pmu__convert_scale(metric_unit,
> > + if (mexp->metric_unit && mexp->metric_name) {
> > + if (perf_pmu__convert_scale(mexp->metric_unit,
> > &unit, &scale) >= 0) {
> > ratio *= scale;
> > }
> > - if (strstr(metric_expr, "?"))
> > + if (strstr(mexp->metric_expr, "?"))
> > scnprintf(metric_bf, sizeof(metric_bf),
> > - "%s %s_%d", unit, metric_name, runtime);
> > + "%s %s_%d", unit, mexp->metric_name, mexp->runtime);
> > else
> > scnprintf(metric_bf, sizeof(metric_bf),
> > - "%s %s", unit, metric_name);
> > + "%s %s", unit, mexp->metric_name);
> >
> > print_metric(config, ctxp, color, "%8.1f",
> > metric_bf, ratio);
> > } else {
> > print_metric(config, ctxp, color, "%8.2f",
> > - metric_name ?
> > - metric_name :
> > - out->force_header ? name : "",
> > + mexp->metric_name ?
> > + mexp->metric_name :
> > + out->force_header ? evsel->name : "",
> > ratio);
> > }
> > } else {
> > print_metric(config, ctxp, color, /*unit=*/NULL,
> > out->force_header ?
> > - (metric_name ? metric_name : name) : "", 0);
> > + (mexp->metric_name ?: evsel->name) : "", 0);
> > }
> > } else {
> > print_metric(config, ctxp, color, /*unit=*/NULL,
> > out->force_header ?
> > - (metric_name ? metric_name : name) : "", 0);
> > + (mexp->metric_name ?: evsel->name) : "", 0);
> > }
> >
> > expr__ctx_free(pctx);
> > @@ -528,7 +522,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
> > if (!pctx)
> > return NAN;
> >
> > - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
> > + if (prepare_metric(mexp, pctx, aggr_idx) < 0)
> > goto out;
> >
> > if (expr__parse(&ratio, pctx, mexp->metric_expr))
> > @@ -630,10 +624,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
> >
> > if ((*num)++ > 0)
> > out->new_line(config, ctxp);
> > - generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
> > - mexp->metric_events, mexp->metric_refs, evsel->name,
> > - mexp->metric_name, mexp->metric_unit, mexp->runtime,
> > - aggr_idx, out);
> > + generic_metric(config, mexp, evsel, aggr_idx, out);
> > }
> >
> > return NULL;
> > --
> > 2.43.0.594.gd9cf4e227d-goog
> >