Currently perf-stat supports to print counts at regular interval (-I),
but it's not very easy for user to get the overall statistics.
The patch uses 'evsel->prev_raw_counts' to get counts for summary.
Copy the counts to 'evsel->counts' after printing the interval results.
Next, we just follow the non-interval processing.
Let's see some examples,
root@kbl-ppc:~# perf stat -e cycles -I1000 --interval-count 2
# time counts unit events
1.000412064 2,281,114 cycles
2.001383658 2,547,880 cycles
Performance counter stats for 'system wide':
4,828,994 cycles
2.002860349 seconds time elapsed
root@kbl-ppc:~# perf stat -e cycles,instructions -I1000 --interval-count 2
# time counts unit events
1.000389902 1,536,093 cycles
1.000389902 420,226 instructions # 0.27 insn per cycle
2.001433453 2,213,952 cycles
2.001433453 735,465 instructions # 0.33 insn per cycle
Performance counter stats for 'system wide':
3,750,045 cycles
1,155,691 instructions # 0.31 insn per cycle
2.003023361 seconds time elapsed
root@kbl-ppc:~# perf stat -M CPI,IPC -I1000 --interval-count 2
# time counts unit events
1.000435121 905,303 inst_retired.any # 2.9 CPI
1.000435121 2,663,333 cycles
1.000435121 914,702 inst_retired.any # 0.3 IPC
1.000435121 2,676,559 cpu_clk_unhalted.thread
2.001615941 1,951,092 inst_retired.any # 1.8 CPI
2.001615941 3,551,357 cycles
2.001615941 1,950,837 inst_retired.any # 0.5 IPC
2.001615941 3,551,044 cpu_clk_unhalted.thread
Performance counter stats for 'system wide':
2,856,395 inst_retired.any # 2.2 CPI
6,214,690 cycles
2,865,539 inst_retired.any # 0.5 IPC
6,227,603 cpu_clk_unhalted.thread
2.003403078 seconds time elapsed
v3:
---
Use evsel->prev_raw_counts for summary counts
v2:
---
Rebase to perf/core branch
Signed-off-by: Jin Yao <[email protected]>
---
tools/perf/builtin-stat.c | 16 ++++++++++++++--
tools/perf/util/stat.c | 2 +-
tools/perf/util/stat.h | 1 +
3 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 97ee941649e6..f67d6a50274e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -334,7 +334,7 @@ static void read_counters(struct timespec *rs)
evlist__for_each_entry(evsel_list, counter) {
if (evsel__cpu_iter_skip(counter, cpu))
continue;
- if (!counter->err) {
+ if (!counter->err && !stat_config.summary) {
counter->err = read_counter_cpu(counter, rs,
counter->cpu_iter - 1);
}
@@ -364,6 +364,7 @@ static void thread_stats_reset(struct perf_stat_config *config)
static void process_interval(void)
{
struct timespec ts, rs;
+ struct stats walltime_nsecs_stats_bak;
clock_gettime(CLOCK_MONOTONIC, &ts);
diff_timespec(&rs, &ts, &ref_time);
@@ -377,9 +378,11 @@ static void process_interval(void)
pr_err("failed to write stat round event\n");
}
+ walltime_nsecs_stats_bak = walltime_nsecs_stats;
init_stats(&walltime_nsecs_stats);
update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
print_counters(&rs, 0, NULL);
+ walltime_nsecs_stats = walltime_nsecs_stats_bak;
}
static void enable_counters(void)
@@ -735,6 +738,15 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
update_stats(&walltime_nsecs_stats, t1 - t0);
+ if (interval) {
+ stat_config.interval = 0;
+ stat_config.summary = true;
+ perf_evlist__copy_prev_raw_counts(evsel_list);
+ perf_evlist__reset_prev_raw_counts(evsel_list);
+ thread_stats_reset(&stat_config);
+ perf_stat__reset_shadow_per_stat(&rt_stat);
+ }
+
/*
* Closing a group leader splits the group, and as we only disable
* group leaders, results in remaining events becoming enabled. To
@@ -2158,7 +2170,7 @@ int cmd_stat(int argc, const char **argv)
}
}
- if (!forever && status != -1 && !interval)
+ if (!forever && status != -1 && (!interval || stat_config.summary))
print_counters(NULL, argc, argv);
if (STAT_RECORD) {
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index ede113805ecd..47b26c024830 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -393,7 +393,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
* interval mode, otherwise overall avg running
* averages will be shown for each interval.
*/
- if (config->interval) {
+ if (config->interval || config->summary) {
for (i = 0; i < 3; i++)
init_stats(&ps->res_stats[i]);
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 62cf72c71869..c60e9e5d6474 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -110,6 +110,7 @@ struct perf_stat_config {
bool all_kernel;
bool all_user;
bool percore_show_thread;
+ bool summary;
FILE *output;
unsigned int interval;
unsigned int timeout;
--
2.17.1
On Thu, May 07, 2020 at 02:58:22PM +0800, Jin Yao wrote:
> Currently perf-stat supports to print counts at regular interval (-I),
> but it's not very easy for user to get the overall statistics.
>
> The patch uses 'evsel->prev_raw_counts' to get counts for summary.
> Copy the counts to 'evsel->counts' after printing the interval results.
> Next, we just follow the non-interval processing.
>
> Let's see some examples,
>
> root@kbl-ppc:~# perf stat -e cycles -I1000 --interval-count 2
> # time counts unit events
> 1.000412064 2,281,114 cycles
> 2.001383658 2,547,880 cycles
>
> Performance counter stats for 'system wide':
>
> 4,828,994 cycles
>
> 2.002860349 seconds time elapsed
>
> root@kbl-ppc:~# perf stat -e cycles,instructions -I1000 --interval-count 2
> # time counts unit events
> 1.000389902 1,536,093 cycles
> 1.000389902 420,226 instructions # 0.27 insn per cycle
> 2.001433453 2,213,952 cycles
> 2.001433453 735,465 instructions # 0.33 insn per cycle
>
> Performance counter stats for 'system wide':
>
> 3,750,045 cycles
> 1,155,691 instructions # 0.31 insn per cycle
>
> 2.003023361 seconds time elapsed
>
> root@kbl-ppc:~# perf stat -M CPI,IPC -I1000 --interval-count 2
> # time counts unit events
> 1.000435121 905,303 inst_retired.any # 2.9 CPI
> 1.000435121 2,663,333 cycles
> 1.000435121 914,702 inst_retired.any # 0.3 IPC
> 1.000435121 2,676,559 cpu_clk_unhalted.thread
> 2.001615941 1,951,092 inst_retired.any # 1.8 CPI
> 2.001615941 3,551,357 cycles
> 2.001615941 1,950,837 inst_retired.any # 0.5 IPC
> 2.001615941 3,551,044 cpu_clk_unhalted.thread
>
> Performance counter stats for 'system wide':
>
> 2,856,395 inst_retired.any # 2.2 CPI
> 6,214,690 cycles
> 2,865,539 inst_retired.any # 0.5 IPC
> 6,227,603 cpu_clk_unhalted.thread
>
> 2.003403078 seconds time elapsed
>
> v3:
> ---
> Use evsel->prev_raw_counts for summary counts
>
> v2:
> ---
> Rebase to perf/core branch
>
> Signed-off-by: Jin Yao <[email protected]>
> ---
> tools/perf/builtin-stat.c | 16 ++++++++++++++--
> tools/perf/util/stat.c | 2 +-
> tools/perf/util/stat.h | 1 +
> 3 files changed, 16 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 97ee941649e6..f67d6a50274e 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -334,7 +334,7 @@ static void read_counters(struct timespec *rs)
> evlist__for_each_entry(evsel_list, counter) {
> if (evsel__cpu_iter_skip(counter, cpu))
> continue;
> - if (!counter->err) {
> + if (!counter->err && !stat_config.summary) {
you'll go through all the affinity setup and do nothing at the end,
even if you know at the begining that it's the case.. not good
we need to call only the perf_stat_process_counter in summary case
jirka
> counter->err = read_counter_cpu(counter, rs,
> counter->cpu_iter - 1);
> }
> @@ -364,6 +364,7 @@ static void thread_stats_reset(struct perf_stat_config *config)
> static void process_interval(void)
> {
> struct timespec ts, rs;
> + struct stats walltime_nsecs_stats_bak;
>
> clock_gettime(CLOCK_MONOTONIC, &ts);
> diff_timespec(&rs, &ts, &ref_time);
> @@ -377,9 +378,11 @@ static void process_interval(void)
> pr_err("failed to write stat round event\n");
> }
>
> + walltime_nsecs_stats_bak = walltime_nsecs_stats;
> init_stats(&walltime_nsecs_stats);
> update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
> print_counters(&rs, 0, NULL);
> + walltime_nsecs_stats = walltime_nsecs_stats_bak;
> }
>
> static void enable_counters(void)
> @@ -735,6 +738,15 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
>
> update_stats(&walltime_nsecs_stats, t1 - t0);
>
> + if (interval) {
> + stat_config.interval = 0;
> + stat_config.summary = true;
> + perf_evlist__copy_prev_raw_counts(evsel_list);
> + perf_evlist__reset_prev_raw_counts(evsel_list);
> + thread_stats_reset(&stat_config);
> + perf_stat__reset_shadow_per_stat(&rt_stat);
> + }
> +
> /*
> * Closing a group leader splits the group, and as we only disable
> * group leaders, results in remaining events becoming enabled. To
> @@ -2158,7 +2170,7 @@ int cmd_stat(int argc, const char **argv)
> }
> }
>
> - if (!forever && status != -1 && !interval)
> + if (!forever && status != -1 && (!interval || stat_config.summary))
> print_counters(NULL, argc, argv);
>
> if (STAT_RECORD) {
> diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
> index ede113805ecd..47b26c024830 100644
> --- a/tools/perf/util/stat.c
> +++ b/tools/perf/util/stat.c
> @@ -393,7 +393,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
> * interval mode, otherwise overall avg running
> * averages will be shown for each interval.
> */
> - if (config->interval) {
> + if (config->interval || config->summary) {
> for (i = 0; i < 3; i++)
> init_stats(&ps->res_stats[i]);
> }
> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
> index 62cf72c71869..c60e9e5d6474 100644
> --- a/tools/perf/util/stat.h
> +++ b/tools/perf/util/stat.h
> @@ -110,6 +110,7 @@ struct perf_stat_config {
> bool all_kernel;
> bool all_user;
> bool percore_show_thread;
> + bool summary;
> FILE *output;
> unsigned int interval;
> unsigned int timeout;
> --
> 2.17.1
>
Hi Jiri,
On 5/7/2020 11:18 PM, Jiri Olsa wrote:
> On Thu, May 07, 2020 at 02:58:22PM +0800, Jin Yao wrote:
>> Currently perf-stat supports to print counts at regular interval (-I),
>> but it's not very easy for user to get the overall statistics.
>>
>> The patch uses 'evsel->prev_raw_counts' to get counts for summary.
>> Copy the counts to 'evsel->counts' after printing the interval results.
>> Next, we just follow the non-interval processing.
>>
>> Let's see some examples,
>>
>> root@kbl-ppc:~# perf stat -e cycles -I1000 --interval-count 2
>> # time counts unit events
>> 1.000412064 2,281,114 cycles
>> 2.001383658 2,547,880 cycles
>>
>> Performance counter stats for 'system wide':
>>
>> 4,828,994 cycles
>>
>> 2.002860349 seconds time elapsed
>>
>> root@kbl-ppc:~# perf stat -e cycles,instructions -I1000 --interval-count 2
>> # time counts unit events
>> 1.000389902 1,536,093 cycles
>> 1.000389902 420,226 instructions # 0.27 insn per cycle
>> 2.001433453 2,213,952 cycles
>> 2.001433453 735,465 instructions # 0.33 insn per cycle
>>
>> Performance counter stats for 'system wide':
>>
>> 3,750,045 cycles
>> 1,155,691 instructions # 0.31 insn per cycle
>>
>> 2.003023361 seconds time elapsed
>>
>> root@kbl-ppc:~# perf stat -M CPI,IPC -I1000 --interval-count 2
>> # time counts unit events
>> 1.000435121 905,303 inst_retired.any # 2.9 CPI
>> 1.000435121 2,663,333 cycles
>> 1.000435121 914,702 inst_retired.any # 0.3 IPC
>> 1.000435121 2,676,559 cpu_clk_unhalted.thread
>> 2.001615941 1,951,092 inst_retired.any # 1.8 CPI
>> 2.001615941 3,551,357 cycles
>> 2.001615941 1,950,837 inst_retired.any # 0.5 IPC
>> 2.001615941 3,551,044 cpu_clk_unhalted.thread
>>
>> Performance counter stats for 'system wide':
>>
>> 2,856,395 inst_retired.any # 2.2 CPI
>> 6,214,690 cycles
>> 2,865,539 inst_retired.any # 0.5 IPC
>> 6,227,603 cpu_clk_unhalted.thread
>>
>> 2.003403078 seconds time elapsed
>>
>> v3:
>> ---
>> Use evsel->prev_raw_counts for summary counts
>>
>> v2:
>> ---
>> Rebase to perf/core branch
>>
>> Signed-off-by: Jin Yao <[email protected]>
>> ---
>> tools/perf/builtin-stat.c | 16 ++++++++++++++--
>> tools/perf/util/stat.c | 2 +-
>> tools/perf/util/stat.h | 1 +
>> 3 files changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>> index 97ee941649e6..f67d6a50274e 100644
>> --- a/tools/perf/builtin-stat.c
>> +++ b/tools/perf/builtin-stat.c
>> @@ -334,7 +334,7 @@ static void read_counters(struct timespec *rs)
>> evlist__for_each_entry(evsel_list, counter) {
>> if (evsel__cpu_iter_skip(counter, cpu))
>> continue;
>> - if (!counter->err) {
>> + if (!counter->err && !stat_config.summary) {
>
> you'll go through all the affinity setup and do nothing at the end,
> even if you know at the begining that it's the case.. not good
>
> we need to call only the perf_stat_process_counter in summary case
>
> jirka
>
The interval processing and non-interval processing both need to call
read_counters.
I need to prevent calling read_counter_cpu in read_counters for non-interval
otherwise the summary counts will be bigger than the sum of interval counts.
e.g.
interval: read_counter_cpu
interval: read_counter_cpu
interval: read_counter_cpu
interval mode is over,
non-interval: read_counter_cpu /* prevent this time read_counter_cpu */
Since read_counter_cpu is called before perf_stat_process_counter, so I can't do
this in perf_stat_process_counter.
Yes, it's not good to check stat_config.summary in loops, low efficiency.
Maybe we can move the codes of affinity setup and read_counter_cpu to a new
function, such as affinity_read_counters and change the read_counters to:
void read_counters()
{
if (!stat_config.summary)
affinity_read_counters();
evlist__for_each_entry(evsel_list, counter) {
perf_stat_process_counter(&stat_config, counter));
}
}
Thanks
Jin Yao
>> counter->err = read_counter_cpu(counter, rs,
>> counter->cpu_iter - 1);
>> }
>> @@ -364,6 +364,7 @@ static void thread_stats_reset(struct perf_stat_config *config)
>> static void process_interval(void)
>> {
>> struct timespec ts, rs;
>> + struct stats walltime_nsecs_stats_bak;
>>
>> clock_gettime(CLOCK_MONOTONIC, &ts);
>> diff_timespec(&rs, &ts, &ref_time);
>> @@ -377,9 +378,11 @@ static void process_interval(void)
>> pr_err("failed to write stat round event\n");
>> }
>>
>> + walltime_nsecs_stats_bak = walltime_nsecs_stats;
>> init_stats(&walltime_nsecs_stats);
>> update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
>> print_counters(&rs, 0, NULL);
>> + walltime_nsecs_stats = walltime_nsecs_stats_bak;
>> }
>>
>> static void enable_counters(void)
>> @@ -735,6 +738,15 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
>>
>> update_stats(&walltime_nsecs_stats, t1 - t0);
>>
>> + if (interval) {
>> + stat_config.interval = 0;
>> + stat_config.summary = true;
>> + perf_evlist__copy_prev_raw_counts(evsel_list);
>> + perf_evlist__reset_prev_raw_counts(evsel_list);
>> + thread_stats_reset(&stat_config);
>> + perf_stat__reset_shadow_per_stat(&rt_stat);
>> + }
>> +
>> /*
>> * Closing a group leader splits the group, and as we only disable
>> * group leaders, results in remaining events becoming enabled. To
>> @@ -2158,7 +2170,7 @@ int cmd_stat(int argc, const char **argv)
>> }
>> }
>>
>> - if (!forever && status != -1 && !interval)
>> + if (!forever && status != -1 && (!interval || stat_config.summary))
>> print_counters(NULL, argc, argv);
>>
>> if (STAT_RECORD) {
>> diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
>> index ede113805ecd..47b26c024830 100644
>> --- a/tools/perf/util/stat.c
>> +++ b/tools/perf/util/stat.c
>> @@ -393,7 +393,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
>> * interval mode, otherwise overall avg running
>> * averages will be shown for each interval.
>> */
>> - if (config->interval) {
>> + if (config->interval || config->summary) {
>> for (i = 0; i < 3; i++)
>> init_stats(&ps->res_stats[i]);
>> }
>> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
>> index 62cf72c71869..c60e9e5d6474 100644
>> --- a/tools/perf/util/stat.h
>> +++ b/tools/perf/util/stat.h
>> @@ -110,6 +110,7 @@ struct perf_stat_config {
>> bool all_kernel;
>> bool all_user;
>> bool percore_show_thread;
>> + bool summary;
>> FILE *output;
>> unsigned int interval;
>> unsigned int timeout;
>> --
>> 2.17.1
>>
>