Pass metric_expr and evsel rather than specific variables from the
struct, thereby reducing the number of arguments. This will enable
later fixes.
To reduce the size of the diff, local variables are added to match the
previous parameter names. This isn't done in the case of "name" as
evsel->name is more intention revealing. A whitespace issue is also
addressed.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/stat-shadow.c | 38 +++++++++++++++++------------------
1 file changed, 18 insertions(+), 20 deletions(-)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index e31426167852..b3a25659b9e6 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -355,11 +355,12 @@ static void print_nsecs(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
}
-static int prepare_metric(struct evsel **metric_events,
- struct metric_ref *metric_refs,
+static int prepare_metric(const struct metric_expr *mexp,
struct expr_parse_ctx *pctx,
int aggr_idx)
{
+ struct evsel * const *metric_events = mexp->metric_events;
+ struct metric_ref *metric_refs = mexp->metric_refs;
int i;
for (i = 0; metric_events[i]; i++) {
@@ -403,7 +404,7 @@ static int prepare_metric(struct evsel **metric_events,
if (!aggr)
break;
- if (!metric_events[i]->supported) {
+ if (!metric_events[i]->supported) {
/*
* Not supported events will have a count of 0,
* which can be confusing in a
@@ -441,18 +442,18 @@ static int prepare_metric(struct evsel **metric_events,
}
static void generic_metric(struct perf_stat_config *config,
- const char *metric_expr,
- const char *metric_threshold,
- struct evsel **metric_events,
- struct metric_ref *metric_refs,
- char *name,
- const char *metric_name,
- const char *metric_unit,
- int runtime,
+ struct metric_expr *mexp,
+ struct evsel *evsel,
int aggr_idx,
struct perf_stat_output_ctx *out)
{
print_metric_t print_metric = out->print_metric;
+ const char *metric_name = mexp->metric_name;
+ const char *metric_expr = mexp->metric_expr;
+ const char *metric_threshold = mexp->metric_threshold;
+ const char *metric_unit = mexp->metric_unit;
+ struct evsel * const *metric_events = mexp->metric_events;
+ int runtime = mexp->runtime;
struct expr_parse_ctx *pctx;
double ratio, scale, threshold;
int i;
@@ -467,7 +468,7 @@ static void generic_metric(struct perf_stat_config *config,
pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
pctx->sctx.runtime = runtime;
pctx->sctx.system_wide = config->system_wide;
- i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
+ i = prepare_metric(mexp, pctx, aggr_idx);
if (i < 0) {
expr__ctx_free(pctx);
return;
@@ -502,18 +503,18 @@ static void generic_metric(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.2f",
metric_name ?
metric_name :
- out->force_header ? name : "",
+ out->force_header ? evsel->name : "",
ratio);
}
} else {
print_metric(config, ctxp, color, /*unit=*/NULL,
out->force_header ?
- (metric_name ? metric_name : name) : "", 0);
+ (metric_name ?: evsel->name) : "", 0);
}
} else {
print_metric(config, ctxp, color, /*unit=*/NULL,
out->force_header ?
- (metric_name ? metric_name : name) : "", 0);
+ (metric_name ?: evsel->name) : "", 0);
}
expr__ctx_free(pctx);
@@ -528,7 +529,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
if (!pctx)
return NAN;
- if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
+ if (prepare_metric(mexp, pctx, aggr_idx) < 0)
goto out;
if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -630,10 +631,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
if ((*num)++ > 0)
out->new_line(config, ctxp);
- generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
- mexp->metric_events, mexp->metric_refs, evsel->name,
- mexp->metric_name, mexp->metric_unit, mexp->runtime,
- aggr_idx, out);
+ generic_metric(config, mexp, evsel, aggr_idx, out);
}
return NULL;
--
2.43.0.594.gd9cf4e227d-goog
When merging counts from multiple uncore PMUs the metric is only
computed for the metric leader. When merging/aggregation is disabled,
prior to this patch just the leader's metric would be computed. Fix
this by computing the metric for each PMU.
On a SkylakeX:
Before:
```
$ perf stat -A -M memory_bandwidth_total -a sleep 1
Performance counter stats for 'system wide':
CPU0 82,217 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.2 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total
CPU0 61,395 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1]
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1]
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU0 81,570 UNC_M_CAS_COUNT.RD [uncore_imc_2]
CPU18 113,886 UNC_M_CAS_COUNT.RD [uncore_imc_2]
CPU0 62,330 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU18 66,942 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU0 75,489 UNC_M_CAS_COUNT.RD [uncore_imc_3]
CPU18 27,958 UNC_M_CAS_COUNT.RD [uncore_imc_3]
CPU0 55,864 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU18 38,727 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4]
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4]
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU0 75,423 UNC_M_CAS_COUNT.RD [uncore_imc_5]
CPU18 104,527 UNC_M_CAS_COUNT.RD [uncore_imc_5]
CPU0 57,596 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU18 56,777 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU0 1,003,440,851 ns duration_time
1.003440851 seconds time elapsed
```
After:
```
$ perf stat -A -M memory_bandwidth_total -a sleep 1
Performance counter stats for 'system wide':
CPU0 88,968 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.5 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total
CPU0 59,498 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU0 88,635 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 9.5 MB/s memory_bandwidth_total
CPU18 117,975 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 11.5 MB/s memory_bandwidth_total
CPU0 60,829 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU18 62,105 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU0 82,238 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 8.7 MB/s memory_bandwidth_total
CPU18 22,906 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 3.6 MB/s memory_bandwidth_total
CPU0 53,959 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU18 32,990 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU0 83,595 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 8.9 MB/s memory_bandwidth_total
CPU18 110,151 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 10.5 MB/s memory_bandwidth_total
CPU0 56,540 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU18 53,816 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU0 1,003,353,416 ns duration_time
```
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/metricgroup.c | 2 ++
tools/perf/util/stat-shadow.c | 31 +++++++++++++++++++++++++++----
2 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index ca3e0404f187..c33ffee837ca 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
if (!metric_events)
return NULL;
+ if (evsel->metric_leader)
+ me.evsel = evsel->metric_leader;
nd = rblist__find(metric_events, &me);
if (nd)
return container_of(nd, struct metric_event, nd);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index b3a25659b9e6..c745337f50a4 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config,
}
static int prepare_metric(const struct metric_expr *mexp,
+ const struct evsel *evsel,
struct expr_parse_ctx *pctx,
int aggr_idx)
{
@@ -399,8 +400,29 @@ static int prepare_metric(const struct metric_expr *mexp,
source_count = 1;
} else {
struct perf_stat_evsel *ps = metric_events[i]->stats;
- struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
+ struct perf_stat_aggr *aggr;
+ /*
+ * If there are multiple uncore PMUs and we're not
+ * reading the leader's stats, determine the stats for
+ * the appropriate uncore PMU.
+ */
+ if (evsel && evsel->metric_leader &&
+ evsel->pmu != evsel->metric_leader->pmu &&
+ mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
+ struct evsel *pos;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (pos->pmu != evsel->pmu)
+ continue;
+ if (pos->metric_leader != mexp->metric_events[i])
+ continue;
+ ps = pos->stats;
+ source_count = 1;
+ break;
+ }
+ }
+ aggr = &ps->aggr[aggr_idx];
if (!aggr)
break;
@@ -421,7 +443,8 @@ static int prepare_metric(const struct metric_expr *mexp,
* metric.
*/
val = aggr->counts.val * (1.0 / metric_events[i]->scale);
- source_count = evsel__source_count(metric_events[i]);
+ if (!source_count)
+ source_count = evsel__source_count(metric_events[i]);
}
}
n = strdup(evsel__metric_id(metric_events[i]));
@@ -468,7 +491,7 @@ static void generic_metric(struct perf_stat_config *config,
pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
pctx->sctx.runtime = runtime;
pctx->sctx.system_wide = config->system_wide;
- i = prepare_metric(mexp, pctx, aggr_idx);
+ i = prepare_metric(mexp, evsel, pctx, aggr_idx);
if (i < 0) {
expr__ctx_free(pctx);
return;
@@ -529,7 +552,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
if (!pctx)
return NAN;
- if (prepare_metric(mexp, pctx, aggr_idx) < 0)
+ if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
goto out;
if (expr__parse(&ratio, pctx, mexp->metric_expr))
--
2.43.0.594.gd9cf4e227d-goog
Aggregation index was being computed using the evsel's cpumap which
may have a different (typically the same or fewer) entries.
Before:
```
$ perf stat --metric-only -A -M memory_bandwidth_total -a sleep 1
Performance counter stats for 'system wide':
MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total
CPU0 12.8 0.0 12.9 12.7 0.0 12.6
CPU1
1.007806367 seconds time elapsed
```
After:
```
$ perf stat --metric-only -A -M memory_bandwidth_total -a sleep 1
Performance counter stats for 'system wide':
MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total MB/s memory_bandwidth_total
CPU0 15.4 0.0 15.3 15.0 0.0 14.9
CPU18 0.0 0.0 13.5 5.2 0.0 11.9
1.007858736 seconds time elapsed
```
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/stat-display.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 8c61f8627ebc..ce830c6afdf2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1126,11 +1126,16 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
u64 ena, run, val;
double uval;
struct perf_stat_evsel *ps = counter->stats;
- int aggr_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
+ int aggr_idx = 0;
- if (aggr_idx < 0)
+ if (!perf_cpu_map__has(evsel__cpus(counter), cpu))
continue;
+ cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+ if (config->aggr_map->map[aggr_idx].cpu.cpu == cpu.cpu)
+ break;
+ }
+
os->evsel = counter;
os->id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
if (first) {
--
2.43.0.594.gd9cf4e227d-goog
On Mon, Feb 5, 2024 at 8:32 PM Ian Rogers <[email protected]> wrote:
>
> Pass metric_expr and evsel rather than specific variables from the
> struct, thereby reducing the number of arguments. This will enable
> later fixes.
>
> To reduce the size of the diff, local variables are added to match the
> previous parameter names. This isn't done in the case of "name" as
> evsel->name is more intention revealing. A whitespace issue is also
> addressed.
>
> Signed-off-by: Ian Rogers <[email protected]>
This doesn't apply to perf-tools-next anymore. Can you please refresh?
Anyway the change looks good to me now. For the series:
Acked-by: Namhyung Kim <[email protected]>
Thanks,
Namhyung
> ---
> tools/perf/util/stat-shadow.c | 38 +++++++++++++++++------------------
> 1 file changed, 18 insertions(+), 20 deletions(-)
>
> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> index e31426167852..b3a25659b9e6 100644
> --- a/tools/perf/util/stat-shadow.c
> +++ b/tools/perf/util/stat-shadow.c
> @@ -355,11 +355,12 @@ static void print_nsecs(struct perf_stat_config *config,
> print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
> }
>
> -static int prepare_metric(struct evsel **metric_events,
> - struct metric_ref *metric_refs,
> +static int prepare_metric(const struct metric_expr *mexp,
> struct expr_parse_ctx *pctx,
> int aggr_idx)
> {
> + struct evsel * const *metric_events = mexp->metric_events;
> + struct metric_ref *metric_refs = mexp->metric_refs;
> int i;
>
> for (i = 0; metric_events[i]; i++) {
> @@ -403,7 +404,7 @@ static int prepare_metric(struct evsel **metric_events,
> if (!aggr)
> break;
>
> - if (!metric_events[i]->supported) {
> + if (!metric_events[i]->supported) {
> /*
> * Not supported events will have a count of 0,
> * which can be confusing in a
> @@ -441,18 +442,18 @@ static int prepare_metric(struct evsel **metric_events,
> }
>
> static void generic_metric(struct perf_stat_config *config,
> - const char *metric_expr,
> - const char *metric_threshold,
> - struct evsel **metric_events,
> - struct metric_ref *metric_refs,
> - char *name,
> - const char *metric_name,
> - const char *metric_unit,
> - int runtime,
> + struct metric_expr *mexp,
> + struct evsel *evsel,
> int aggr_idx,
> struct perf_stat_output_ctx *out)
> {
> print_metric_t print_metric = out->print_metric;
> + const char *metric_name = mexp->metric_name;
> + const char *metric_expr = mexp->metric_expr;
> + const char *metric_threshold = mexp->metric_threshold;
> + const char *metric_unit = mexp->metric_unit;
> + struct evsel * const *metric_events = mexp->metric_events;
> + int runtime = mexp->runtime;
> struct expr_parse_ctx *pctx;
> double ratio, scale, threshold;
> int i;
> @@ -467,7 +468,7 @@ static void generic_metric(struct perf_stat_config *config,
> pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
> pctx->sctx.runtime = runtime;
> pctx->sctx.system_wide = config->system_wide;
> - i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
> + i = prepare_metric(mexp, pctx, aggr_idx);
> if (i < 0) {
> expr__ctx_free(pctx);
> return;
> @@ -502,18 +503,18 @@ static void generic_metric(struct perf_stat_config *config,
> print_metric(config, ctxp, color, "%8.2f",
> metric_name ?
> metric_name :
> - out->force_header ? name : "",
> + out->force_header ? evsel->name : "",
> ratio);
> }
> } else {
> print_metric(config, ctxp, color, /*unit=*/NULL,
> out->force_header ?
> - (metric_name ? metric_name : name) : "", 0);
> + (metric_name ?: evsel->name) : "", 0);
> }
> } else {
> print_metric(config, ctxp, color, /*unit=*/NULL,
> out->force_header ?
> - (metric_name ? metric_name : name) : "", 0);
> + (metric_name ?: evsel->name) : "", 0);
> }
>
> expr__ctx_free(pctx);
> @@ -528,7 +529,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
> if (!pctx)
> return NAN;
>
> - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
> + if (prepare_metric(mexp, pctx, aggr_idx) < 0)
> goto out;
>
> if (expr__parse(&ratio, pctx, mexp->metric_expr))
> @@ -630,10 +631,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
>
> if ((*num)++ > 0)
> out->new_line(config, ctxp);
> - generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
> - mexp->metric_events, mexp->metric_refs, evsel->name,
> - mexp->metric_name, mexp->metric_unit, mexp->runtime,
> - aggr_idx, out);
> + generic_metric(config, mexp, evsel, aggr_idx, out);
> }
>
> return NULL;
> --
> 2.43.0.594.gd9cf4e227d-goog
>