Hello,
Current perf stat code is somewhat hard to follow since it handles
many combinations of PMUs/events for given display and aggregation
options. This is my attempt to clean it up a little. ;-)
My first concern is that aggregation and display routines are intermixed
and processed differently depends on the aggregation mode. I'd like to
separate them apart and make the logic clearer.
To do that, I added struct perf_stat_aggr to save the aggregated counter
values and other info. It'll be allocated and processed according to
the aggr_mode and display logic will use it.
The code is available at 'perf/stat-aggr-v1' branch in
git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
Thanks,
Namhyung
Namhyung Kim (19):
perf tools: Save evsel->pmu in parse_events()
perf tools: Use pmu info in evsel__is_hybrid()
perf stat: Use evsel__is_hybrid() more
perf stat: Add aggr id for global mode
perf stat: Add cpu aggr id for no aggregation mode
perf stat: Add 'needs_sort' argument to cpu_aggr_map__new()
perf stat: Add struct perf_stat_aggr to perf_stat_evsel
perf stat: Allocate evsel->stats->aggr properly
perf stat: Aggregate events using evsel->stats->aggr
perf stat: Aggregate per-thread stats using evsel->stats->aggr
perf stat: Allocate aggr counts for recorded data
perf stat: Reset aggr counts for each interval
perf stat: Split process_counters()
perf stat: Add perf_stat_merge_counters()
perf stat: Add perf_stat_process_percore()
perf stat: Add perf_stat_process_shadow_stats()
perf stat: Display event stats using aggr counts
perf stat: Display percore events properly
perf stat: Remove unused perf_counts.aggr field
tools/perf/builtin-script.c | 4 +-
tools/perf/builtin-stat.c | 177 +++++--
tools/perf/tests/parse-metric.c | 2 +-
tools/perf/tests/pmu-events.c | 2 +-
tools/perf/util/counts.c | 1 -
tools/perf/util/counts.h | 1 -
tools/perf/util/cpumap.c | 16 +-
tools/perf/util/cpumap.h | 8 +-
tools/perf/util/evsel.c | 13 +-
tools/perf/util/parse-events.c | 1 +
tools/perf/util/pmu.c | 4 +
.../scripting-engines/trace-event-python.c | 6 -
tools/perf/util/stat-display.c | 472 +++---------------
tools/perf/util/stat.c | 383 +++++++++++---
tools/perf/util/stat.h | 29 +-
15 files changed, 590 insertions(+), 529 deletions(-)
base-commit: d79310700590b8b40d8c867012d6c899ea6fd505
--
2.38.0.rc1.362.ged0d419d3c-goog
If evsel has pmu, it can use pmu->is_hybrid directly.
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/util/evsel.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 196f8e4859d7..a6ea91c72659 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3132,6 +3132,9 @@ void evsel__zero_per_pkg(struct evsel *evsel)
bool evsel__is_hybrid(struct evsel *evsel)
{
+ if (evsel->pmu)
+ return evsel->pmu->is_hybrid;
+
return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name);
}
--
2.38.0.rc1.362.ged0d419d3c-goog
The aggr field in the struct perf_counts is to keep the aggregated value
in the AGGR_GLOBAL for the old code. But it's not used anymore.
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/util/counts.c | 1 -
tools/perf/util/counts.h | 1 -
tools/perf/util/stat.c | 35 ++---------------------------------
3 files changed, 2 insertions(+), 35 deletions(-)
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index 7a447d918458..11cd85b278a6 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -48,7 +48,6 @@ void perf_counts__reset(struct perf_counts *counts)
{
xyarray__reset(counts->loaded);
xyarray__reset(counts->values);
- memset(&counts->aggr, 0, sizeof(struct perf_counts_values));
}
void evsel__reset_counts(struct evsel *evsel)
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 5de275194f2b..42760242e0df 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -11,7 +11,6 @@ struct evsel;
struct perf_counts {
s8 scaled;
- struct perf_counts_values aggr;
struct xyarray *values;
struct xyarray *loaded;
};
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 1652586a4925..0dccfa273fa7 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -307,8 +307,6 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel)
*perf_counts(evsel->prev_raw_counts, idx, thread);
}
}
-
- evsel->counts->aggr = evsel->prev_raw_counts->aggr;
}
void evlist__copy_prev_raw_counts(struct evlist *evlist)
@@ -319,26 +317,6 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist)
evsel__copy_prev_raw_counts(evsel);
}
-void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
-{
- struct evsel *evsel;
-
- /*
- * To collect the overall statistics for interval mode,
- * we copy the counts from evsel->prev_raw_counts to
- * evsel->counts. The perf_stat_process_counter creates
- * aggr values from per cpu values, but the per cpu values
- * are 0 for AGGR_GLOBAL. So we use a trick that saves the
- * previous aggr value to the first member of perf_counts,
- * then aggr calculation in process_counter_values can work
- * correctly.
- */
- evlist__for_each_entry(evlist, evsel) {
- *perf_counts(evsel->prev_raw_counts, 0, 0) =
- evsel->prev_raw_counts->aggr;
- }
-}
-
static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
{
uint64_t *key = (uint64_t *) __key;
@@ -422,7 +400,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
- struct perf_counts_values *aggr = &evsel->counts->aggr;
struct perf_stat_evsel *ps = evsel->stats;
static struct perf_counts_values zero;
bool skip = false;
@@ -491,12 +468,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
}
}
- if (config->aggr_mode == AGGR_GLOBAL) {
- aggr->val += count->val;
- aggr->ena += count->ena;
- aggr->run += count->run;
- }
-
return 0;
}
@@ -521,13 +492,10 @@ static int process_counter_maps(struct perf_stat_config *config,
int perf_stat_process_counter(struct perf_stat_config *config,
struct evsel *counter)
{
- struct perf_counts_values *aggr = &counter->counts->aggr;
struct perf_stat_evsel *ps = counter->stats;
- u64 *count = counter->counts->aggr.values;
+ u64 *count;
int ret;
- aggr->val = aggr->ena = aggr->run = 0;
-
if (counter->per_pkg)
evsel__zero_per_pkg(counter);
@@ -538,6 +506,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
if (config->aggr_mode != AGGR_GLOBAL)
return 0;
+ count = ps->aggr[0].counts.values;
update_stats(&ps->res_stats, *count);
if (verbose > 0) {
--
2.38.0.rc1.362.ged0d419d3c-goog
The recent change in the perf stat broke the percore event display.
Note that the aggr counts are already processed so that the every
sibling thread in the same core will get the per-core counter values.
Check percore evsels and skip the sibling threads in the display.
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-stat.c | 16 ----------------
tools/perf/util/stat-display.c | 27 +++++++++++++++++++++++++--
2 files changed, 25 insertions(+), 18 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d92815f4eae0..b3a39d4c86a7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1403,18 +1403,6 @@ static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *con
return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu);
}
-static bool term_percore_set(void)
-{
- struct evsel *counter;
-
- evlist__for_each_entry(evsel_list, counter) {
- if (counter->percore)
- return true;
- }
-
- return false;
-}
-
static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
{
switch (aggr_mode) {
@@ -1427,8 +1415,6 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
case AGGR_NODE:
return aggr_cpu_id__node;
case AGGR_NONE:
- if (term_percore_set())
- return aggr_cpu_id__core;
return aggr_cpu_id__cpu;;
case AGGR_GLOBAL:
return aggr_cpu_id__global;
@@ -1452,8 +1438,6 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
case AGGR_NODE:
return perf_stat__get_node_cached;
case AGGR_NONE:
- if (term_percore_set())
- return perf_stat__get_core_cached;
return perf_stat__get_cpu_cached;
case AGGR_GLOBAL:
return perf_stat__get_global_cached;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 0c0e22c175a1..e0c0df99d40d 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1094,7 +1094,8 @@ static void print_percore(struct perf_stat_config *config,
{
bool metric_only = config->metric_only;
FILE *output = config->output;
- int s;
+ struct cpu_aggr_map *core_map;
+ int s, c, i;
bool first = true;
if (!config->aggr_map || !config->aggr_get_id)
@@ -1103,13 +1104,35 @@ static void print_percore(struct perf_stat_config *config,
if (config->percore_show_thread)
return print_counter(config, counter, prefix);
- for (s = 0; s < config->aggr_map->nr; s++) {
+ core_map = cpu_aggr_map__empty_new(config->aggr_map->nr);
+ if (core_map == NULL) {
+ fprintf(output, "Cannot allocate per-core aggr map for display\n");
+ return;
+ }
+
+ for (s = 0, c = 0; s < config->aggr_map->nr; s++) {
+ struct perf_cpu curr_cpu = config->aggr_map->map[s].cpu;
+ struct aggr_cpu_id core_id = aggr_cpu_id__core(curr_cpu, NULL);
+ bool found = false;
+
+ for (i = 0; i < c; i++) {
+ if (aggr_cpu_id__equal(&core_map->map[i], &core_id)) {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ continue;
+
if (prefix && metric_only)
fprintf(output, "%s", prefix);
print_counter_aggrdata(config, counter, s,
prefix, metric_only, &first);
+
+ core_map->map[c++] = core_id;
}
+ free(core_map);
if (metric_only)
fputc('\n', output);
--
2.38.0.rc1.362.ged0d419d3c-goog
On Sun, Oct 9, 2022 at 10:36 PM Namhyung Kim <[email protected]> wrote:
>
> If evsel has pmu, it can use pmu->is_hybrid directly.
>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> tools/perf/util/evsel.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 196f8e4859d7..a6ea91c72659 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -3132,6 +3132,9 @@ void evsel__zero_per_pkg(struct evsel *evsel)
>
> bool evsel__is_hybrid(struct evsel *evsel)
> {
> + if (evsel->pmu)
> + return evsel->pmu->is_hybrid;
> +
> return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name);
Wow, there's so much duplicated state. Why do evsels have a pmu_name
and a pmu? Why not just pmu->name? I feel always having a pmu would be
cleanest here. That said what does evsel__is_hybrid even mean? Does it
mean this event is on a PMU normally called cpu and called cpu_core
and cpu_atom on hybrid systems? And of course there are no comments to
explain what this little mystery could be. Anyway, that's not a fault
of this change, and probably later changes will go someway toward
cleaning this up. It was a shame the code wasn't cleaner in the first
place.
Acked-by: Ian Rogers
Thanks,
Ian
> }
>
> --
> 2.38.0.rc1.362.ged0d419d3c-goog
>
On Sun, Oct 9, 2022 at 10:36 PM Namhyung Kim <[email protected]> wrote:
>
> The aggr field in the struct perf_counts is to keep the aggregated value
> in the AGGR_GLOBAL for the old code. But it's not used anymore.
>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> tools/perf/util/counts.c | 1 -
> tools/perf/util/counts.h | 1 -
> tools/perf/util/stat.c | 35 ++---------------------------------
> 3 files changed, 2 insertions(+), 35 deletions(-)
Very nice!
Acked-by: Ian Rogers <[email protected]>
Thanks,
Ian
>
> diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
> index 7a447d918458..11cd85b278a6 100644
> --- a/tools/perf/util/counts.c
> +++ b/tools/perf/util/counts.c
> @@ -48,7 +48,6 @@ void perf_counts__reset(struct perf_counts *counts)
> {
> xyarray__reset(counts->loaded);
> xyarray__reset(counts->values);
> - memset(&counts->aggr, 0, sizeof(struct perf_counts_values));
> }
>
> void evsel__reset_counts(struct evsel *evsel)
> diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
> index 5de275194f2b..42760242e0df 100644
> --- a/tools/perf/util/counts.h
> +++ b/tools/perf/util/counts.h
> @@ -11,7 +11,6 @@ struct evsel;
>
> struct perf_counts {
> s8 scaled;
> - struct perf_counts_values aggr;
> struct xyarray *values;
> struct xyarray *loaded;
> };
> diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
> index 1652586a4925..0dccfa273fa7 100644
> --- a/tools/perf/util/stat.c
> +++ b/tools/perf/util/stat.c
> @@ -307,8 +307,6 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel)
> *perf_counts(evsel->prev_raw_counts, idx, thread);
> }
> }
> -
> - evsel->counts->aggr = evsel->prev_raw_counts->aggr;
> }
>
> void evlist__copy_prev_raw_counts(struct evlist *evlist)
> @@ -319,26 +317,6 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist)
> evsel__copy_prev_raw_counts(evsel);
> }
>
> -void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
> -{
> - struct evsel *evsel;
> -
> - /*
> - * To collect the overall statistics for interval mode,
> - * we copy the counts from evsel->prev_raw_counts to
> - * evsel->counts. The perf_stat_process_counter creates
> - * aggr values from per cpu values, but the per cpu values
> - * are 0 for AGGR_GLOBAL. So we use a trick that saves the
> - * previous aggr value to the first member of perf_counts,
> - * then aggr calculation in process_counter_values can work
> - * correctly.
> - */
> - evlist__for_each_entry(evlist, evsel) {
> - *perf_counts(evsel->prev_raw_counts, 0, 0) =
> - evsel->prev_raw_counts->aggr;
> - }
> -}
> -
> static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
> {
> uint64_t *key = (uint64_t *) __key;
> @@ -422,7 +400,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
> int cpu_map_idx, int thread,
> struct perf_counts_values *count)
> {
> - struct perf_counts_values *aggr = &evsel->counts->aggr;
> struct perf_stat_evsel *ps = evsel->stats;
> static struct perf_counts_values zero;
> bool skip = false;
> @@ -491,12 +468,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
> }
> }
>
> - if (config->aggr_mode == AGGR_GLOBAL) {
> - aggr->val += count->val;
> - aggr->ena += count->ena;
> - aggr->run += count->run;
> - }
> -
> return 0;
> }
>
> @@ -521,13 +492,10 @@ static int process_counter_maps(struct perf_stat_config *config,
> int perf_stat_process_counter(struct perf_stat_config *config,
> struct evsel *counter)
> {
> - struct perf_counts_values *aggr = &counter->counts->aggr;
> struct perf_stat_evsel *ps = counter->stats;
> - u64 *count = counter->counts->aggr.values;
> + u64 *count;
> int ret;
>
> - aggr->val = aggr->ena = aggr->run = 0;
> -
> if (counter->per_pkg)
> evsel__zero_per_pkg(counter);
>
> @@ -538,6 +506,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
> if (config->aggr_mode != AGGR_GLOBAL)
> return 0;
>
> + count = ps->aggr[0].counts.values;
> update_stats(&ps->res_stats, *count);
>
> if (verbose > 0) {
> --
> 2.38.0.rc1.362.ged0d419d3c-goog
>
On Sun, Oct 9, 2022 at 10:36 PM Namhyung Kim <[email protected]> wrote:
>
> The recent change in the perf stat broke the percore event display.
> Note that the aggr counts are already processed so that the every
> sibling thread in the same core will get the per-core counter values.
Could we add a test given this has broken once?
> Check percore evsels and skip the sibling threads in the display.
>
> Signed-off-by: Namhyung Kim <[email protected]>
Acked-by: Ian Rogers <[email protected]>
Thanks,
Ian
> ---
> tools/perf/builtin-stat.c | 16 ----------------
> tools/perf/util/stat-display.c | 27 +++++++++++++++++++++++++--
> 2 files changed, 25 insertions(+), 18 deletions(-)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index d92815f4eae0..b3a39d4c86a7 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -1403,18 +1403,6 @@ static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *con
> return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu);
> }
>
> -static bool term_percore_set(void)
> -{
> - struct evsel *counter;
> -
> - evlist__for_each_entry(evsel_list, counter) {
> - if (counter->percore)
> - return true;
> - }
> -
> - return false;
> -}
> -
> static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
> {
> switch (aggr_mode) {
> @@ -1427,8 +1415,6 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
> case AGGR_NODE:
> return aggr_cpu_id__node;
> case AGGR_NONE:
> - if (term_percore_set())
> - return aggr_cpu_id__core;
> return aggr_cpu_id__cpu;;
> case AGGR_GLOBAL:
> return aggr_cpu_id__global;
> @@ -1452,8 +1438,6 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
> case AGGR_NODE:
> return perf_stat__get_node_cached;
> case AGGR_NONE:
> - if (term_percore_set())
> - return perf_stat__get_core_cached;
> return perf_stat__get_cpu_cached;
> case AGGR_GLOBAL:
> return perf_stat__get_global_cached;
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index 0c0e22c175a1..e0c0df99d40d 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -1094,7 +1094,8 @@ static void print_percore(struct perf_stat_config *config,
> {
> bool metric_only = config->metric_only;
> FILE *output = config->output;
> - int s;
> + struct cpu_aggr_map *core_map;
> + int s, c, i;
> bool first = true;
>
> if (!config->aggr_map || !config->aggr_get_id)
> @@ -1103,13 +1104,35 @@ static void print_percore(struct perf_stat_config *config,
> if (config->percore_show_thread)
> return print_counter(config, counter, prefix);
>
> - for (s = 0; s < config->aggr_map->nr; s++) {
> + core_map = cpu_aggr_map__empty_new(config->aggr_map->nr);
> + if (core_map == NULL) {
> + fprintf(output, "Cannot allocate per-core aggr map for display\n");
> + return;
> + }
> +
> + for (s = 0, c = 0; s < config->aggr_map->nr; s++) {
> + struct perf_cpu curr_cpu = config->aggr_map->map[s].cpu;
> + struct aggr_cpu_id core_id = aggr_cpu_id__core(curr_cpu, NULL);
> + bool found = false;
> +
> + for (i = 0; i < c; i++) {
> + if (aggr_cpu_id__equal(&core_map->map[i], &core_id)) {
> + found = true;
> + break;
> + }
> + }
> + if (found)
> + continue;
> +
> if (prefix && metric_only)
> fprintf(output, "%s", prefix);
>
> print_counter_aggrdata(config, counter, s,
> prefix, metric_only, &first);
> +
> + core_map->map[c++] = core_id;
> }
> + free(core_map);
>
> if (metric_only)
> fputc('\n', output);
> --
> 2.38.0.rc1.362.ged0d419d3c-goog
>
On 10/10/2022 10:35 PM, Namhyung Kim wrote:
> Hello,
>
> Current perf stat code is somewhat hard to follow since it handles
> many combinations of PMUs/events for given display and aggregation
> options. This is my attempt to clean it up a little. ;-)
My main concern would be subtle regressions since there are so many
different combinations and way to travel through the code, and a lot of
things are not covered by unit tests. When I worked on the code it was
difficult to keep it all working. I assume you have some way to
enumerate them all and tested that the output is identical?
-Andi
Hi Ian,
On Mon, Oct 10, 2022 at 3:31 PM Ian Rogers <[email protected]> wrote:
>
> On Sun, Oct 9, 2022 at 10:36 PM Namhyung Kim <[email protected]> wrote:
> >
> > If evsel has pmu, it can use pmu->is_hybrid directly.
> >
> > Signed-off-by: Namhyung Kim <[email protected]>
> > ---
> > tools/perf/util/evsel.c | 3 +++
> > 1 file changed, 3 insertions(+)
> >
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index 196f8e4859d7..a6ea91c72659 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -3132,6 +3132,9 @@ void evsel__zero_per_pkg(struct evsel *evsel)
> >
> > bool evsel__is_hybrid(struct evsel *evsel)
> > {
> > + if (evsel->pmu)
> > + return evsel->pmu->is_hybrid;
> > +
> > return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name);
>
> Wow, there's so much duplicated state. Why do evsels have a pmu_name
> and a pmu? Why not just pmu->name? I feel always having a pmu would be
> cleanest here.
Thanks a lot Ian for your detailed review!
The evsel->pmu was added recently for checking missing features.
And I just made it to have the pmu info when parsing events.
I guess it has pmu_name because it didn't want to add pmu.c dependency
to the python module. But this change only adds pmu.h dependency.
> That said what does evsel__is_hybrid even mean? Does it
> mean this event is on a PMU normally called cpu and called cpu_core
> and cpu_atom on hybrid systems? And of course there are no comments to
> explain what this little mystery could be.
I believe so.
> Anyway, that's not a fault
> of this change, and probably later changes will go someway toward
> cleaning this up. It was a shame the code wasn't cleaner in the first
> place.
>
> Acked-by: Ian Rogers
Thanks,
Namhyung
Hi Andi,
On Mon, Oct 10, 2022 at 5:25 PM Andi Kleen <[email protected]> wrote:
>
>
> On 10/10/2022 10:35 PM, Namhyung Kim wrote:
> > Hello,
> >
> > Current perf stat code is somewhat hard to follow since it handles
> > many combinations of PMUs/events for given display and aggregation
> > options. This is my attempt to clean it up a little. ;-)
>
>
> My main concern would be subtle regressions since there are so many
> different combinations and way to travel through the code, and a lot of
> things are not covered by unit tests. When I worked on the code it was
> difficult to keep it all working. I assume you have some way to
> enumerate them all and tested that the output is identical?
Right, that's my concern too.
I have tested many combinations manually and checked if they
produced similar results. But the problem is that I cannot test
all hardwares and more importantly it's hard to check
programmatically if the output is the same or not. The numbers
vary on each run and sometimes it fluctuates a lot. I don't have
good test workloads and the results work for every combination.
Any suggestions?
Thanks,
Namhyung
On Mon, Oct 10, 2022 at 10:38 PM Namhyung Kim <[email protected]> wrote:
>
> Hi Andi,
>
> On Mon, Oct 10, 2022 at 5:25 PM Andi Kleen <[email protected]> wrote:
> >
> >
> > On 10/10/2022 10:35 PM, Namhyung Kim wrote:
> > > Hello,
> > >
> > > Current perf stat code is somewhat hard to follow since it handles
> > > many combinations of PMUs/events for given display and aggregation
> > > options. This is my attempt to clean it up a little. ;-)
> >
> >
> > My main concern would be subtle regressions since there are so many
> > different combinations and way to travel through the code, and a lot of
> > things are not covered by unit tests. When I worked on the code it was
> > difficult to keep it all working. I assume you have some way to
> > enumerate them all and tested that the output is identical?
>
> Right, that's my concern too.
>
> I have tested many combinations manually and checked if they
> produced similar results. But the problem is that I cannot test
> all hardwares and more importantly it's hard to check
> programmatically if the output is the same or not. The numbers
> vary on each run and sometimes it fluctuates a lot. I don't have
> good test workloads and the results work for every combination.
>
> Any suggestions?
I don't think there is anything clever we can do here. A few releases
ago summary mode was enabled by default. For CSV output this meant a
summary was printed at the bottom of perf stat and importantly the
summary print out added a column on the left of all the other columns.
This caused some tool issues for us. We now have a test that CSV
output has a fixed number of columns. We added the CSV test because
the json output code reformatted the display code and it would be easy
to introduce a regression (in fact I did :-/ ). So my point is that
stat output can change and break things and we've been doing this by
accident for a while now. This isn't a reason to not merge this
change.
I think the real fix here is for tools to stop using text or CSV
output and switch to the json output, that way output isn't as brittle
except to the keys we use. It isn't feasible for the perf tool to
stand still in case there is a script somewhere, we'll just accumulate
bugs and baggage. However, if someone has a script and they want to
enforce an output, all they need to do is stick a test on it (the
Beyonce principle except s/ring/test/).
Thanks,
Ian
>> My main concern would be subtle regressions since there are so many
>> different combinations and way to travel through the code, and a lot of
>> things are not covered by unit tests. When I worked on the code it was
>> difficult to keep it all working. I assume you have some way to
>> enumerate them all and tested that the output is identical?
> Right, that's my concern too.
>
> I have tested many combinations manually and checked if they
> produced similar results.
I had a script to test many combinations, but had to check the output
manually
> But the problem is that I cannot test
> all hardwares and more importantly it's hard to check
> programmatically if the output is the same or not.
Can use "dummy" or some software event (e.g. a probe on some syscall) to
get stable numbers. I don't think we need to cover all hardware for the
output options, the different events should be similar, but need some
coverage for the different aggregation. Or we could add some more tool
events just for testing purposes, that would allow covering different
core scopes etc. and would easily allow generating known counts.
-Andi
On Mon, Oct 10, 2022 at 11:14 PM Ian Rogers <[email protected]> wrote:
>
> On Mon, Oct 10, 2022 at 10:38 PM Namhyung Kim <[email protected]> wrote:
> >
> > Hi Andi,
> >
> > On Mon, Oct 10, 2022 at 5:25 PM Andi Kleen <[email protected]> wrote:
> > >
> > >
> > > On 10/10/2022 10:35 PM, Namhyung Kim wrote:
> > > > Hello,
> > > >
> > > > Current perf stat code is somewhat hard to follow since it handles
> > > > many combinations of PMUs/events for given display and aggregation
> > > > options. This is my attempt to clean it up a little. ;-)
> > >
> > >
> > > My main concern would be subtle regressions since there are so many
> > > different combinations and way to travel through the code, and a lot of
> > > things are not covered by unit tests. When I worked on the code it was
> > > difficult to keep it all working. I assume you have some way to
> > > enumerate them all and tested that the output is identical?
> >
> > Right, that's my concern too.
> >
> > I have tested many combinations manually and checked if they
> > produced similar results. But the problem is that I cannot test
> > all hardwares and more importantly it's hard to check
> > programmatically if the output is the same or not. The numbers
> > vary on each run and sometimes it fluctuates a lot. I don't have
> > good test workloads and the results work for every combination.
> >
> > Any suggestions?
>
> I don't think there is anything clever we can do here. A few releases
> ago summary mode was enabled by default. For CSV output this meant a
> summary was printed at the bottom of perf stat and importantly the
> summary print out added a column on the left of all the other columns.
> This caused some tool issues for us. We now have a test that CSV
> output has a fixed number of columns. We added the CSV test because
> the json output code reformatted the display code and it would be easy
> to introduce a regression (in fact I did :-/ ). So my point is that
> stat output can change and break things and we've been doing this by
> accident for a while now. This isn't a reason to not merge this
> change.
>
> I think the real fix here is for tools to stop using text or CSV
> output and switch to the json output, that way output isn't as brittle
> except to the keys we use. It isn't feasible for the perf tool to
> stand still in case there is a script somewhere, we'll just accumulate
> bugs and baggage. However, if someone has a script and they want to
> enforce an output, all they need to do is stick a test on it (the
> Beyonce principle except s/ring/test/).
Thanks for your opinion.
I agree that it'd be better using JSON output for machine processing.
Although there are records of historic perf stat brekages, it'd be nice
if we could avoid that for the default output mode. :)
Let me think about if there's a better way.
Thanks,
Namhyung
On Tue, Oct 11, 2022 at 4:57 AM Andi Kleen <[email protected]> wrote:
>
>
> >> My main concern would be subtle regressions since there are so many
> >> different combinations and way to travel through the code, and a lot of
> >> things are not covered by unit tests. When I worked on the code it was
> >> difficult to keep it all working. I assume you have some way to
> >> enumerate them all and tested that the output is identical?
> > Right, that's my concern too.
> >
> > I have tested many combinations manually and checked if they
> > produced similar results.
>
> I had a script to test many combinations, but had to check the output
> manually
>
>
> > But the problem is that I cannot test
> > all hardwares and more importantly it's hard to check
> > programmatically if the output is the same or not.
>
> Can use "dummy" or some software event (e.g. a probe on some syscall) to
> get stable numbers. I don't think we need to cover all hardware for the
> output options, the different events should be similar, but need some
> coverage for the different aggregation. Or we could add some more tool
> events just for testing purposes, that would allow covering different
> core scopes etc. and would easily allow generating known counts.
Even if we can get a stable number, it still needs to know cpu topology
for different aggregation modes to verify the count. Also I'm afraid that
cpu hotplug can affect the aggregation.
Thanks,
Namhyung
On Sun, Oct 09, 2022 at 10:36:00PM -0700, Namhyung Kim wrote:
> The aggr field in the struct perf_counts is to keep the aggregated value
> in the AGGR_GLOBAL for the old code. But it's not used anymore.
>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> tools/perf/util/counts.c | 1 -
> tools/perf/util/counts.h | 1 -
> tools/perf/util/stat.c | 35 ++---------------------------------
> 3 files changed, 2 insertions(+), 35 deletions(-)
>
> diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
> index 7a447d918458..11cd85b278a6 100644
> --- a/tools/perf/util/counts.c
> +++ b/tools/perf/util/counts.c
> @@ -48,7 +48,6 @@ void perf_counts__reset(struct perf_counts *counts)
> {
> xyarray__reset(counts->loaded);
> xyarray__reset(counts->values);
> - memset(&counts->aggr, 0, sizeof(struct perf_counts_values));
> }
>
> void evsel__reset_counts(struct evsel *evsel)
> diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
> index 5de275194f2b..42760242e0df 100644
> --- a/tools/perf/util/counts.h
> +++ b/tools/perf/util/counts.h
> @@ -11,7 +11,6 @@ struct evsel;
>
> struct perf_counts {
> s8 scaled;
> - struct perf_counts_values aggr;
> struct xyarray *values;
> struct xyarray *loaded;
> };
> diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
> index 1652586a4925..0dccfa273fa7 100644
> --- a/tools/perf/util/stat.c
> +++ b/tools/perf/util/stat.c
> @@ -307,8 +307,6 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel)
> *perf_counts(evsel->prev_raw_counts, idx, thread);
> }
> }
> -
> - evsel->counts->aggr = evsel->prev_raw_counts->aggr;
> }
>
> void evlist__copy_prev_raw_counts(struct evlist *evlist)
> @@ -319,26 +317,6 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist)
> evsel__copy_prev_raw_counts(evsel);
> }
>
> -void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
> -{
> - struct evsel *evsel;
> -
> - /*
> - * To collect the overall statistics for interval mode,
> - * we copy the counts from evsel->prev_raw_counts to
> - * evsel->counts. The perf_stat_process_counter creates
> - * aggr values from per cpu values, but the per cpu values
> - * are 0 for AGGR_GLOBAL. So we use a trick that saves the
> - * previous aggr value to the first member of perf_counts,
> - * then aggr calculation in process_counter_values can work
> - * correctly.
> - */
> - evlist__for_each_entry(evlist, evsel) {
> - *perf_counts(evsel->prev_raw_counts, 0, 0) =
> - evsel->prev_raw_counts->aggr;
> - }
> -}
> -
> static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
> {
> uint64_t *key = (uint64_t *) __key;
> @@ -422,7 +400,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
> int cpu_map_idx, int thread,
> struct perf_counts_values *count)
> {
> - struct perf_counts_values *aggr = &evsel->counts->aggr;
> struct perf_stat_evsel *ps = evsel->stats;
> static struct perf_counts_values zero;
> bool skip = false;
> @@ -491,12 +468,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
> }
> }
>
> - if (config->aggr_mode == AGGR_GLOBAL) {
> - aggr->val += count->val;
> - aggr->ena += count->ena;
> - aggr->run += count->run;
> - }
> -
> return 0;
> }
>
> @@ -521,13 +492,10 @@ static int process_counter_maps(struct perf_stat_config *config,
> int perf_stat_process_counter(struct perf_stat_config *config,
> struct evsel *counter)
> {
> - struct perf_counts_values *aggr = &counter->counts->aggr;
> struct perf_stat_evsel *ps = counter->stats;
> - u64 *count = counter->counts->aggr.values;
> + u64 *count;
> int ret;
>
> - aggr->val = aggr->ena = aggr->run = 0;
> -
> if (counter->per_pkg)
> evsel__zero_per_pkg(counter);
>
> @@ -538,6 +506,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
> if (config->aggr_mode != AGGR_GLOBAL)
> return 0;
>
> + count = ps->aggr[0].counts.values;
> update_stats(&ps->res_stats, *count);
hi,
for some reason 'count' could be NULL, I'm getting crash in here:
$ ./perf stat record -o krava.data true
...
$ gdb ./perf
(gdb) r stat report -i krava.data
Starting program: /home/jolsa/kernel/linux-perf/tools/perf/perf stat report -i krava.data
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Program received signal SIGSEGV, Segmentation fault.
0x00000000005ae90b in perf_stat_process_counter (config=0xe18d60 <stat_config>, counter=0xecfd00) at util/stat.c:510
510 update_stats(&ps->res_stats, *count);
Missing separate debuginfos, use: dnf debuginfo-install bzip2-libs-1.0.8-11.fc36.x86_64 cyrus-sasl-lib-2.1.27-18.fc36.x86_64 elfutils-debuginfod-client-0.187-4.fc36.x86_64 elfutils-libelf-0.187-4.fc36.x86_64 elfutils-libs-0.187-4.fc36.x86_64 glibc-2.35-15.fc36.x86_64 glibc-2.35-17.fc36.x86_64 keyutils-libs-1.6.1-4.fc36.x86_64 krb5-libs-1.19.2-11.fc36.x86_64 libbrotli-1.0.9-7.fc36.x86_64 libcap-2.48-4.fc36.x86_64 libcom_err-1.46.5-2.fc36.x86_64 libcurl-7.82.0-8.fc36.x86_64 libevent-2.1.12-6.fc36.x86_64 libgcc-12.2.1-2.fc36.x86_64 libidn2-2.3.3-1.fc36.x86_64 libnghttp2-1.46.0-2.fc36.x86_64 libpsl-0.21.1-5.fc36.x86_64 libselinux-3.3-4.fc36.x86_64 libssh-0.9.6-4.fc36.x86_64 libunistring-1.0-1.fc36.x86_64 libunwind-1.6.2-2.fc36.x86_64 libxcrypt-4.4.28-1.fc36.x86_64 libzstd-1.5.2-2.fc36.x86_64 numactl-libs-2.0.14-5.fc36.x86_64 openldap-2.6.3-1.fc36.x86_64 openssl-libs-3.0.5-1.fc36.x86_64 perl-libs-5.34.1-486.fc36.x86_64 python3-libs-3.10.7-1.fc36.x86_64 slang-2.3.2-11.fc36.x86_64 xz-libs-5.2.5-9.fc36.x86_64 zlib-1.2.11-33.fc36.x86_64
(gdb) bt
#0 0x00000000005ae90b in perf_stat_process_counter (config=0xe18d60 <stat_config>, counter=0xecfd00) at util/stat.c:510
#1 0x000000000043b716 in process_counters () at builtin-stat.c:485
#2 0x000000000043f2bf in process_stat_round_event (session=0xec84f0, event=0x7ffff7ffaba8) at builtin-stat.c:2099
#3 0x000000000056c7b6 in perf_session__process_user_event (session=0xec84f0, event=0x7ffff7ffaba8, file_offset=2984, file_path=0xecf220 "krava.data")
at util/session.c:1714
#4 0x000000000056cea5 in perf_session__process_event (session=0xec84f0, event=0x7ffff7ffaba8, file_offset=2984, file_path=0xecf220 "krava.data")
at util/session.c:1857
#5 0x000000000056e4fa in process_simple (session=0xec84f0, event=0x7ffff7ffaba8, file_offset=2984, file_path=0xecf220 "krava.data") at util/session.c:2432
#6 0x000000000056e1b9 in reader__read_event (rd=0x7fffffffb6c0, session=0xec84f0, prog=0x7fffffffb690) at util/session.c:2361
#7 0x000000000056e3ae in reader__process_events (rd=0x7fffffffb6c0, session=0xec84f0, prog=0x7fffffffb690) at util/session.c:2410
#8 0x000000000056e652 in __perf_session__process_events (session=0xec84f0) at util/session.c:2457
#9 0x000000000056eff8 in perf_session__process_events (session=0xec84f0) at util/session.c:2623
#10 0x000000000043fa53 in __cmd_report (argc=0, argv=0x7fffffffdf70) at builtin-stat.c:2265
#11 0x000000000043fd94 in cmd_stat (argc=3, argv=0x7fffffffdf70) at builtin-stat.c:2346
#12 0x00000000004ef495 in run_builtin (p=0xe2f930 <commands+336>, argc=4, argv=0x7fffffffdf70) at perf.c:322
#13 0x00000000004ef709 in handle_internal_command (argc=4, argv=0x7fffffffdf70) at perf.c:376
#14 0x00000000004ef858 in run_argv (argcp=0x7fffffffdd9c, argv=0x7fffffffdd90) at perf.c:420
#15 0x00000000004efc1f in main (argc=4, argv=0x7fffffffdf70) at perf.c:550
(gdb) p count
$1 = (u64 *) 0x0
jirka
>
> if (verbose > 0) {
> --
> 2.38.0.rc1.362.ged0d419d3c-goog
>
Hi Jiri,
On Wed, Oct 12, 2022 at 1:41 AM Jiri Olsa <[email protected]> wrote:
>
> On Sun, Oct 09, 2022 at 10:36:00PM -0700, Namhyung Kim wrote:
> > The aggr field in the struct perf_counts is to keep the aggregated value
> > in the AGGR_GLOBAL for the old code. But it's not used anymore.
> >
> > Signed-off-by: Namhyung Kim <[email protected]>
> > ---
> > tools/perf/util/counts.c | 1 -
> > tools/perf/util/counts.h | 1 -
> > tools/perf/util/stat.c | 35 ++---------------------------------
> > 3 files changed, 2 insertions(+), 35 deletions(-)
> >
> > diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
> > index 7a447d918458..11cd85b278a6 100644
> > --- a/tools/perf/util/counts.c
> > +++ b/tools/perf/util/counts.c
> > @@ -48,7 +48,6 @@ void perf_counts__reset(struct perf_counts *counts)
> > {
> > xyarray__reset(counts->loaded);
> > xyarray__reset(counts->values);
> > - memset(&counts->aggr, 0, sizeof(struct perf_counts_values));
> > }
> >
> > void evsel__reset_counts(struct evsel *evsel)
> > diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
> > index 5de275194f2b..42760242e0df 100644
> > --- a/tools/perf/util/counts.h
> > +++ b/tools/perf/util/counts.h
> > @@ -11,7 +11,6 @@ struct evsel;
> >
> > struct perf_counts {
> > s8 scaled;
> > - struct perf_counts_values aggr;
> > struct xyarray *values;
> > struct xyarray *loaded;
> > };
> > diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
> > index 1652586a4925..0dccfa273fa7 100644
> > --- a/tools/perf/util/stat.c
> > +++ b/tools/perf/util/stat.c
> > @@ -307,8 +307,6 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel)
> > *perf_counts(evsel->prev_raw_counts, idx, thread);
> > }
> > }
> > -
> > - evsel->counts->aggr = evsel->prev_raw_counts->aggr;
> > }
> >
> > void evlist__copy_prev_raw_counts(struct evlist *evlist)
> > @@ -319,26 +317,6 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist)
> > evsel__copy_prev_raw_counts(evsel);
> > }
> >
> > -void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
> > -{
> > - struct evsel *evsel;
> > -
> > - /*
> > - * To collect the overall statistics for interval mode,
> > - * we copy the counts from evsel->prev_raw_counts to
> > - * evsel->counts. The perf_stat_process_counter creates
> > - * aggr values from per cpu values, but the per cpu values
> > - * are 0 for AGGR_GLOBAL. So we use a trick that saves the
> > - * previous aggr value to the first member of perf_counts,
> > - * then aggr calculation in process_counter_values can work
> > - * correctly.
> > - */
> > - evlist__for_each_entry(evlist, evsel) {
> > - *perf_counts(evsel->prev_raw_counts, 0, 0) =
> > - evsel->prev_raw_counts->aggr;
> > - }
> > -}
> > -
> > static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
> > {
> > uint64_t *key = (uint64_t *) __key;
> > @@ -422,7 +400,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
> > int cpu_map_idx, int thread,
> > struct perf_counts_values *count)
> > {
> > - struct perf_counts_values *aggr = &evsel->counts->aggr;
> > struct perf_stat_evsel *ps = evsel->stats;
> > static struct perf_counts_values zero;
> > bool skip = false;
> > @@ -491,12 +468,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
> > }
> > }
> >
> > - if (config->aggr_mode == AGGR_GLOBAL) {
> > - aggr->val += count->val;
> > - aggr->ena += count->ena;
> > - aggr->run += count->run;
> > - }
> > -
> > return 0;
> > }
> >
> > @@ -521,13 +492,10 @@ static int process_counter_maps(struct perf_stat_config *config,
> > int perf_stat_process_counter(struct perf_stat_config *config,
> > struct evsel *counter)
> > {
> > - struct perf_counts_values *aggr = &counter->counts->aggr;
> > struct perf_stat_evsel *ps = counter->stats;
> > - u64 *count = counter->counts->aggr.values;
> > + u64 *count;
> > int ret;
> >
> > - aggr->val = aggr->ena = aggr->run = 0;
> > -
> > if (counter->per_pkg)
> > evsel__zero_per_pkg(counter);
> >
> > @@ -538,6 +506,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
> > if (config->aggr_mode != AGGR_GLOBAL)
> > return 0;
> >
> > + count = ps->aggr[0].counts.values;
> > update_stats(&ps->res_stats, *count);
>
> hi,
> for some reason 'count' could be NULL, I'm getting crash in here:
Ouch, will check. Thanks for the test!
Thanks,
Namhyung
>
> $ ./perf stat record -o krava.data true
> ...
>
> $ gdb ./perf
>
> (gdb) r stat report -i krava.data
> Starting program: /home/jolsa/kernel/linux-perf/tools/perf/perf stat report -i krava.data
> [Thread debugging using libthread_db enabled]
> Using host libthread_db library "/lib64/libthread_db.so.1".
>
> Program received signal SIGSEGV, Segmentation fault.
> 0x00000000005ae90b in perf_stat_process_counter (config=0xe18d60 <stat_config>, counter=0xecfd00) at util/stat.c:510
> 510 update_stats(&ps->res_stats, *count);
> Missing separate debuginfos, use: dnf debuginfo-install bzip2-libs-1.0.8-11.fc36.x86_64 cyrus-sasl-lib-2.1.27-18.fc36.x86_64 elfutils-debuginfod-client-0.187-4.fc36.x86_64 elfutils-libelf-0.187-4.fc36.x86_64 elfutils-libs-0.187-4.fc36.x86_64 glibc-2.35-15.fc36.x86_64 glibc-2.35-17.fc36.x86_64 keyutils-libs-1.6.1-4.fc36.x86_64 krb5-libs-1.19.2-11.fc36.x86_64 libbrotli-1.0.9-7.fc36.x86_64 libcap-2.48-4.fc36.x86_64 libcom_err-1.46.5-2.fc36.x86_64 libcurl-7.82.0-8.fc36.x86_64 libevent-2.1.12-6.fc36.x86_64 libgcc-12.2.1-2.fc36.x86_64 libidn2-2.3.3-1.fc36.x86_64 libnghttp2-1.46.0-2.fc36.x86_64 libpsl-0.21.1-5.fc36.x86_64 libselinux-3.3-4.fc36.x86_64 libssh-0.9.6-4.fc36.x86_64 libunistring-1.0-1.fc36.x86_64 libunwind-1.6.2-2.fc36.x86_64 libxcrypt-4.4.28-1.fc36.x86_64 libzstd-1.5.2-2.fc36.x86_64 numactl-libs-2.0.14-5.fc36.x86_64 openldap-2.6.3-1.fc36.x86_64 openssl-libs-3.0.5-1.fc36.x86_64 perl-libs-5.34.1-486.fc36.x86_64 python3-libs-3.10.7-1.fc36.x86_64 slang-2.3.2-11.fc36.x86_64 xz-libs-5.2.5-9.fc36.x86_64 zlib-1.2.11-33.fc36.x86_64
> (gdb) bt
> #0 0x00000000005ae90b in perf_stat_process_counter (config=0xe18d60 <stat_config>, counter=0xecfd00) at util/stat.c:510
> #1 0x000000000043b716 in process_counters () at builtin-stat.c:485
> #2 0x000000000043f2bf in process_stat_round_event (session=0xec84f0, event=0x7ffff7ffaba8) at builtin-stat.c:2099
> #3 0x000000000056c7b6 in perf_session__process_user_event (session=0xec84f0, event=0x7ffff7ffaba8, file_offset=2984, file_path=0xecf220 "krava.data")
> at util/session.c:1714
> #4 0x000000000056cea5 in perf_session__process_event (session=0xec84f0, event=0x7ffff7ffaba8, file_offset=2984, file_path=0xecf220 "krava.data")
> at util/session.c:1857
> #5 0x000000000056e4fa in process_simple (session=0xec84f0, event=0x7ffff7ffaba8, file_offset=2984, file_path=0xecf220 "krava.data") at util/session.c:2432
> #6 0x000000000056e1b9 in reader__read_event (rd=0x7fffffffb6c0, session=0xec84f0, prog=0x7fffffffb690) at util/session.c:2361
> #7 0x000000000056e3ae in reader__process_events (rd=0x7fffffffb6c0, session=0xec84f0, prog=0x7fffffffb690) at util/session.c:2410
> #8 0x000000000056e652 in __perf_session__process_events (session=0xec84f0) at util/session.c:2457
> #9 0x000000000056eff8 in perf_session__process_events (session=0xec84f0) at util/session.c:2623
> #10 0x000000000043fa53 in __cmd_report (argc=0, argv=0x7fffffffdf70) at builtin-stat.c:2265
> #11 0x000000000043fd94 in cmd_stat (argc=3, argv=0x7fffffffdf70) at builtin-stat.c:2346
> #12 0x00000000004ef495 in run_builtin (p=0xe2f930 <commands+336>, argc=4, argv=0x7fffffffdf70) at perf.c:322
> #13 0x00000000004ef709 in handle_internal_command (argc=4, argv=0x7fffffffdf70) at perf.c:376
> #14 0x00000000004ef858 in run_argv (argcp=0x7fffffffdd9c, argv=0x7fffffffdd90) at perf.c:420
> #15 0x00000000004efc1f in main (argc=4, argv=0x7fffffffdf70) at perf.c:550
> (gdb) p count
> $1 = (u64 *) 0x0
>
> jirka
>
> >
> > if (verbose > 0) {
> > --
> > 2.38.0.rc1.362.ged0d419d3c-goog
> >
I'll merge this into the problematic commit.
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-stat.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 4cb3ceeb7ba4..9d35a3338976 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1355,7 +1355,11 @@ static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __m
static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
aggr_get_id_t get_id, struct perf_cpu cpu)
{
- struct aggr_cpu_id id = aggr_cpu_id__empty();
+ struct aggr_cpu_id id;
+
+ /* per-process mode - should use global aggr mode */
+ if (cpu.cpu == -1)
+ return get_id(config, cpu);
if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
@@ -2120,11 +2124,9 @@ int process_stat_config_event(struct perf_session *session,
if (perf_cpu_map__empty(st->cpus)) {
if (st->aggr_mode != AGGR_UNSET)
pr_warning("warning: processing task data, aggregation mode not set\n");
- return 0;
- }
-
- if (st->aggr_mode != AGGR_UNSET)
+ } else if (st->aggr_mode != AGGR_UNSET) {
stat_config.aggr_mode = st->aggr_mode;
+ }
if (perf_stat.data.is_pipe)
perf_stat_init_aggr_mode();
--
2.38.0.413.g74048e4d9e-goog