On Tue, Mar 02, 2021 at 01:24:02AM +0800, Changbin Du wrote:
> This does follow two changes:
> 1) Select appropriate unit between K/M/G.
> 2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
>
> $ sudo ./perf stat -a -- sleep 1
>
> Before: Unit 'M' is selected even the number is very small.
> Performance counter stats for 'system wide':
>
> 4,003.06 msec cpu-clock # 3.998 CPUs utilized
> 16,179 context-switches # 0.004 M/sec
> 161 cpu-migrations # 0.040 K/sec
> 4,699 page-faults # 0.001 M/sec
> 6,135,801,925 cycles # 1.533 GHz (83.21%)
> 5,783,308,491 stalled-cycles-frontend # 94.26% frontend cycles idle (83.21%)
> 4,543,694,050 stalled-cycles-backend # 74.05% backend cycles idle (66.49%)
> 4,720,130,587 instructions # 0.77 insn per cycle
> # 1.23 stalled cycles per insn (83.28%)
> 753,848,078 branches # 188.318 M/sec (83.61%)
> 37,457,747 branch-misses # 4.97% of all branches (83.48%)
>
> 1.001283725 seconds time elapsed
>
> After:
> $ sudo ./perf stat -a -- sleep 2
>
> Performance counter stats for 'system wide':
>
> 8,003.20 msec cpu-clock # 3.998 CPUs utilized
> 9,768 context-switches # 1.221 K/cpu-sec
> 164 cpu-migrations # 20.492 /cpu-sec
should you remove also the leading '/' in ' /cpu-sec' ?
SNIP
> @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
> generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
> evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
> } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
> - char unit = 'M';
> + char unit = ' ';
> char unit_buf[10];
>
> total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
> -
> if (total)
> - ratio = 1000.0 * avg / total;
> - if (ratio < 0.001) {
> - ratio *= 1000;
> - unit = 'K';
> - }
> - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> + ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
> +
> + snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
> print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
hum this will change -x output that people parse, so I don't think we can do that
> } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
> print_smi_cost(config, cpu, out, st, &rsd);
> diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
> index a46762aec4c9..ac13b5ecde31 100644
> --- a/tools/perf/util/units.c
> +++ b/tools/perf/util/units.c
> @@ -55,6 +55,28 @@ unsigned long convert_unit(unsigned long value, char *unit)
> return value;
> }
>
> +double convert_unit_double(double value, char *unit)
> +{
> + *unit = ' ';
> +
> + if (value > 1000.0) {
> + value /= 1000.0;
> + *unit = 'K';
> + }
> +
> + if (value > 1000.0) {
> + value /= 1000.0;
> + *unit = 'M';
> + }
> +
> + if (value > 1000.0) {
> + value /= 1000.0;
> + *unit = 'G';
> + }
> +
> + return value;
> +}
we have convert_unit function just above doing the same only with
unsigned long.. let's have one base function with double values and
another one casting the result to unsigned long
jirka
On Tue, Mar 2, 2021 at 4:19 AM Jiri Olsa <[email protected]> wrote:
>
> On Tue, Mar 02, 2021 at 01:24:02AM +0800, Changbin Du wrote:
> > This does follow two changes:
> > 1) Select appropriate unit between K/M/G.
> > 2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
> >
> > $ sudo ./perf stat -a -- sleep 1
> >
> > Before: Unit 'M' is selected even the number is very small.
> > Performance counter stats for 'system wide':
> >
> > 4,003.06 msec cpu-clock # 3.998 CPUs utilized
> > 16,179 context-switches # 0.004 M/sec
> > 161 cpu-migrations # 0.040 K/sec
> > 4,699 page-faults # 0.001 M/sec
> > 6,135,801,925 cycles # 1.533 GHz (83.21%)
> > 5,783,308,491 stalled-cycles-frontend # 94.26% frontend cycles idle (83.21%)
> > 4,543,694,050 stalled-cycles-backend # 74.05% backend cycles idle (66.49%)
> > 4,720,130,587 instructions # 0.77 insn per cycle
> > # 1.23 stalled cycles per insn (83.28%)
> > 753,848,078 branches # 188.318 M/sec (83.61%)
> > 37,457,747 branch-misses # 4.97% of all branches (83.48%)
> >
> > 1.001283725 seconds time elapsed
> >
> > After:
> > $ sudo ./perf stat -a -- sleep 2
> >
> > Performance counter stats for 'system wide':
> >
> > 8,003.20 msec cpu-clock # 3.998 CPUs utilized
> > 9,768 context-switches # 1.221 K/cpu-sec
> > 164 cpu-migrations # 20.492 /cpu-sec
>
> should you remove also the leading '/' in ' /cpu-sec' ?
The change looks good. And I think we should keep '/' otherwise it'd be
more confusing.
>
>
> SNIP
>
> > @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
> > generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
> > evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
> > } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
> > - char unit = 'M';
> > + char unit = ' ';
> > char unit_buf[10];
> >
> > total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
> > -
> > if (total)
> > - ratio = 1000.0 * avg / total;
> > - if (ratio < 0.001) {
> > - ratio *= 1000;
> > - unit = 'K';
> > - }
> > - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> > + ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
> > +
> > + snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
> > print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
>
> hum this will change -x output that people parse, so I don't think we can do that
Agreed.
>
> > } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
> > print_smi_cost(config, cpu, out, st, &rsd);
> > diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
> > index a46762aec4c9..ac13b5ecde31 100644
> > --- a/tools/perf/util/units.c
> > +++ b/tools/perf/util/units.c
> > @@ -55,6 +55,28 @@ unsigned long convert_unit(unsigned long value, char *unit)
> > return value;
> > }
> >
> > +double convert_unit_double(double value, char *unit)
> > +{
> > + *unit = ' ';
> > +
> > + if (value > 1000.0) {
> > + value /= 1000.0;
> > + *unit = 'K';
> > + }
> > +
> > + if (value > 1000.0) {
> > + value /= 1000.0;
> > + *unit = 'M';
> > + }
> > +
> > + if (value > 1000.0) {
> > + value /= 1000.0;
> > + *unit = 'G';
> > + }
> > +
> > + return value;
> > +}
>
> we have convert_unit function just above doing the same only with
> unsigned long.. let's have one base function with double values and
> another one casting the result to unsigned long
Sounds good.
Thanks,
Namhyung
Em Tue, Mar 02, 2021 at 11:08:40AM +0900, Namhyung Kim escreveu:
> On Tue, Mar 2, 2021 at 4:19 AM Jiri Olsa <[email protected]> wrote:
> >
> > On Tue, Mar 02, 2021 at 01:24:02AM +0800, Changbin Du wrote:
> > > This does follow two changes:
> > > 1) Select appropriate unit between K/M/G.
> > > 2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
> > >
> > > $ sudo ./perf stat -a -- sleep 1
> > >
> > > Before: Unit 'M' is selected even the number is very small.
> > > Performance counter stats for 'system wide':
> > >
> > > 4,003.06 msec cpu-clock # 3.998 CPUs utilized
> > > 16,179 context-switches # 0.004 M/sec
> > > 161 cpu-migrations # 0.040 K/sec
> > > 4,699 page-faults # 0.001 M/sec
> > > 6,135,801,925 cycles # 1.533 GHz (83.21%)
> > > 5,783,308,491 stalled-cycles-frontend # 94.26% frontend cycles idle (83.21%)
> > > 4,543,694,050 stalled-cycles-backend # 74.05% backend cycles idle (66.49%)
> > > 4,720,130,587 instructions # 0.77 insn per cycle
> > > # 1.23 stalled cycles per insn (83.28%)
> > > 753,848,078 branches # 188.318 M/sec (83.61%)
> > > 37,457,747 branch-misses # 4.97% of all branches (83.48%)
> > >
> > > 1.001283725 seconds time elapsed
> > >
> > > After:
> > > $ sudo ./perf stat -a -- sleep 2
> > >
> > > Performance counter stats for 'system wide':
> > >
> > > 8,003.20 msec cpu-clock # 3.998 CPUs utilized
> > > 9,768 context-switches # 1.221 K/cpu-sec
> > > 164 cpu-migrations # 20.492 /cpu-sec
> >
> > should you remove also the leading '/' in ' /cpu-sec' ?
>
> The change looks good. And I think we should keep '/' otherwise it'd be
> more confusing.
Perhaps:
8,003.20 msec cpu-clock # 3.998 CPUs utilized
9,768 context-switches # 1.221 K/cpu-sec
164 cpu-migrations # 20.492 /cpu-sec
?
- Arnaldo
> >
> >
> > SNIP
> >
> > > @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
> > > generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
> > > evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
> > > } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
> > > - char unit = 'M';
> > > + char unit = ' ';
> > > char unit_buf[10];
> > >
> > > total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
> > > -
> > > if (total)
> > > - ratio = 1000.0 * avg / total;
> > > - if (ratio < 0.001) {
> > > - ratio *= 1000;
> > > - unit = 'K';
> > > - }
> > > - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> > > + ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
> > > +
> > > + snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
> > > print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
> >
> > hum this will change -x output that people parse, so I don't think we can do that
>
> Agreed.
>
> >
> > > } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
> > > print_smi_cost(config, cpu, out, st, &rsd);
> > > diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
> > > index a46762aec4c9..ac13b5ecde31 100644
> > > --- a/tools/perf/util/units.c
> > > +++ b/tools/perf/util/units.c
> > > @@ -55,6 +55,28 @@ unsigned long convert_unit(unsigned long value, char *unit)
> > > return value;
> > > }
> > >
> > > +double convert_unit_double(double value, char *unit)
> > > +{
> > > + *unit = ' ';
> > > +
> > > + if (value > 1000.0) {
> > > + value /= 1000.0;
> > > + *unit = 'K';
> > > + }
> > > +
> > > + if (value > 1000.0) {
> > > + value /= 1000.0;
> > > + *unit = 'M';
> > > + }
> > > +
> > > + if (value > 1000.0) {
> > > + value /= 1000.0;
> > > + *unit = 'G';
> > > + }
> > > +
> > > + return value;
> > > +}
> >
> > we have convert_unit function just above doing the same only with
> > unsigned long.. let's have one base function with double values and
> > another one casting the result to unsigned long
>
> Sounds good.
>
> Thanks,
> Namhyung
--
- Arnaldo
On Tue, Mar 02, 2021 at 09:25:24AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Tue, Mar 02, 2021 at 11:08:40AM +0900, Namhyung Kim escreveu:
> > On Tue, Mar 2, 2021 at 4:19 AM Jiri Olsa <[email protected]> wrote:
> > >
> > > On Tue, Mar 02, 2021 at 01:24:02AM +0800, Changbin Du wrote:
> > > > This does follow two changes:
> > > > 1) Select appropriate unit between K/M/G.
> > > > 2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
> > > >
> > > > $ sudo ./perf stat -a -- sleep 1
> > > >
> > > > Before: Unit 'M' is selected even the number is very small.
> > > > Performance counter stats for 'system wide':
> > > >
> > > > 4,003.06 msec cpu-clock # 3.998 CPUs utilized
> > > > 16,179 context-switches # 0.004 M/sec
> > > > 161 cpu-migrations # 0.040 K/sec
> > > > 4,699 page-faults # 0.001 M/sec
> > > > 6,135,801,925 cycles # 1.533 GHz (83.21%)
> > > > 5,783,308,491 stalled-cycles-frontend # 94.26% frontend cycles idle (83.21%)
> > > > 4,543,694,050 stalled-cycles-backend # 74.05% backend cycles idle (66.49%)
> > > > 4,720,130,587 instructions # 0.77 insn per cycle
> > > > # 1.23 stalled cycles per insn (83.28%)
> > > > 753,848,078 branches # 188.318 M/sec (83.61%)
> > > > 37,457,747 branch-misses # 4.97% of all branches (83.48%)
> > > >
> > > > 1.001283725 seconds time elapsed
> > > >
> > > > After:
> > > > $ sudo ./perf stat -a -- sleep 2
> > > >
> > > > Performance counter stats for 'system wide':
> > > >
> > > > 8,003.20 msec cpu-clock # 3.998 CPUs utilized
> > > > 9,768 context-switches # 1.221 K/cpu-sec
> > > > 164 cpu-migrations # 20.492 /cpu-sec
> > >
> > > should you remove also the leading '/' in ' /cpu-sec' ?
> >
> > The change looks good. And I think we should keep '/' otherwise it'd be
> > more confusing.
>
> Perhaps:
>
> 8,003.20 msec cpu-clock # 3.998 CPUs utilized
> 9,768 context-switches # 1.221 K/cpu-sec
> 164 cpu-migrations # 20.492 /cpu-sec
>
> ?
>
sure. Now it is:
$ sudo ./perf stat
[sudo] password for changbin:
^C
Performance counter stats for 'system wide':
6,046.61 msec cpu-clock # 3.999 CPUs utilized
7,569 context-switches # 1.252 K/sec
587 cpu-migrations # 97.079 /sec
229 page-faults # 37.872 /sec
1,484,855,385 cycles # 0.246 GHz
2,112,863,483 stalled-cycles-frontend # 142.29% frontend cycles idle
1,980,746,623 stalled-cycles-backend # 133.40% backend cycles idle
454,477,035 instructions # 0.31 insn per cycle
# 4.65 stalled cycles per insn
89,992,521 branches # 14.883 M/sec
6,196,599 branch-misses # 6.89% of all branches
1.512029866 seconds time elapsed
> - Arnaldo
>
> > >
> > >
> > > SNIP
> > >
> > > > @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
> > > > generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
> > > > evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
> > > > } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
> > > > - char unit = 'M';
> > > > + char unit = ' ';
> > > > char unit_buf[10];
> > > >
> > > > total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
> > > > -
> > > > if (total)
> > > > - ratio = 1000.0 * avg / total;
> > > > - if (ratio < 0.001) {
> > > > - ratio *= 1000;
> > > > - unit = 'K';
> > > > - }
> > > > - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> > > > + ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
> > > > +
> > > > + snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
> > > > print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
> > >
> > > hum this will change -x output that people parse, so I don't think we can do that
> >
> > Agreed.
> >
> > >
> > > > } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
> > > > print_smi_cost(config, cpu, out, st, &rsd);
> > > > diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
> > > > index a46762aec4c9..ac13b5ecde31 100644
> > > > --- a/tools/perf/util/units.c
> > > > +++ b/tools/perf/util/units.c
> > > > @@ -55,6 +55,28 @@ unsigned long convert_unit(unsigned long value, char *unit)
> > > > return value;
> > > > }
> > > >
> > > > +double convert_unit_double(double value, char *unit)
> > > > +{
> > > > + *unit = ' ';
> > > > +
> > > > + if (value > 1000.0) {
> > > > + value /= 1000.0;
> > > > + *unit = 'K';
> > > > + }
> > > > +
> > > > + if (value > 1000.0) {
> > > > + value /= 1000.0;
> > > > + *unit = 'M';
> > > > + }
> > > > +
> > > > + if (value > 1000.0) {
> > > > + value /= 1000.0;
> > > > + *unit = 'G';
> > > > + }
> > > > +
> > > > + return value;
> > > > +}
> > >
> > > we have convert_unit function just above doing the same only with
> > > unsigned long.. let's have one base function with double values and
> > > another one casting the result to unsigned long
> >
> > Sounds good.
> >
> > Thanks,
> > Namhyung
>
> --
>
> - Arnaldo
--
Cheers,
Changbin Du
On Mon, Mar 01, 2021 at 08:19:20PM +0100, Jiri Olsa wrote:
> On Tue, Mar 02, 2021 at 01:24:02AM +0800, Changbin Du wrote:
> > This does follow two changes:
> > 1) Select appropriate unit between K/M/G.
> > 2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
> >
> > $ sudo ./perf stat -a -- sleep 1
> >
> > Before: Unit 'M' is selected even the number is very small.
> > Performance counter stats for 'system wide':
> >
> > 4,003.06 msec cpu-clock # 3.998 CPUs utilized
> > 16,179 context-switches # 0.004 M/sec
> > 161 cpu-migrations # 0.040 K/sec
> > 4,699 page-faults # 0.001 M/sec
> > 6,135,801,925 cycles # 1.533 GHz (83.21%)
> > 5,783,308,491 stalled-cycles-frontend # 94.26% frontend cycles idle (83.21%)
> > 4,543,694,050 stalled-cycles-backend # 74.05% backend cycles idle (66.49%)
> > 4,720,130,587 instructions # 0.77 insn per cycle
> > # 1.23 stalled cycles per insn (83.28%)
> > 753,848,078 branches # 188.318 M/sec (83.61%)
> > 37,457,747 branch-misses # 4.97% of all branches (83.48%)
> >
> > 1.001283725 seconds time elapsed
> >
> > After:
> > $ sudo ./perf stat -a -- sleep 2
> >
> > Performance counter stats for 'system wide':
> >
> > 8,003.20 msec cpu-clock # 3.998 CPUs utilized
> > 9,768 context-switches # 1.221 K/cpu-sec
> > 164 cpu-migrations # 20.492 /cpu-sec
>
> should you remove also the leading '/' in ' /cpu-sec' ?
>
>
> SNIP
>
> > @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
> > generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
> > evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
> > } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
> > - char unit = 'M';
> > + char unit = ' ';
> > char unit_buf[10];
> >
> > total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
> > -
> > if (total)
> > - ratio = 1000.0 * avg / total;
> > - if (ratio < 0.001) {
> > - ratio *= 1000;
> > - unit = 'K';
> > - }
> > - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> > + ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
> > +
> > + snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
> > print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
>
> hum this will change -x output that people parse, so I don't think we can do that
okay, so skip this change and keep another.
--
Cheers,
Changbin Du