2020-09-25 23:58:39

by Joel Fernandes

[permalink] [raw]
Subject: [PATCH] perf: sched: Show start of latency as well

perf sched latency is really useful at showing worst-case latencies that task
encountered since wakeup. However it shows only the end of the latency. Often
times the start of a latency is interesting as it can show what else was going
on at the time to cause the latency. I certainly myself spending a lot of time
backtracking to the start of the latency in "perf sched script" which wastes a
lot of time.

This patch therefore adds a new column "Max delay start". Considering this,
also rename "Maximum delay at" to "Max delay end" as its easier to understand.

Signed-off-by: Joel Fernandes (Google) <[email protected]>


---
A sample output can be seen after applying patch:
https://hastebin.com/raw/ivinimaler

tools/perf/builtin-sched.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 459e4229945e..2791da1fe5f7 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -130,7 +130,8 @@ struct work_atoms {
struct thread *thread;
struct rb_node node;
u64 max_lat;
- u64 max_lat_at;
+ u64 max_lat_start;
+ u64 max_lat_end;
u64 total_lat;
u64 nb_atoms;
u64 total_runtime;
@@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
atoms->total_lat += delta;
if (delta > atoms->max_lat) {
atoms->max_lat = delta;
- atoms->max_lat_at = timestamp;
+ atoms->max_lat_start = atom->wake_up_time;
+ atoms->max_lat_end = timestamp;
}
atoms->nb_atoms++;
}
@@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
int i;
int ret;
u64 avg;
- char max_lat_at[32];
+ char max_lat_start[32], max_lat_end[32];

if (!work_list->nb_atoms)
return;
@@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
printf(" ");

avg = work_list->total_lat / work_list->nb_atoms;
- timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
+ timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
+ timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));

- printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
+ printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
(double)work_list->total_runtime / NSEC_PER_MSEC,
work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
(double)work_list->max_lat / NSEC_PER_MSEC,
- max_lat_at);
+ max_lat_start, max_lat_end);
}

static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
@@ -3118,7 +3121,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d
list_splice(&data->work_list, &this->work_list);
if (this->max_lat < data->max_lat) {
this->max_lat = data->max_lat;
- this->max_lat_at = data->max_lat_at;
+ this->max_lat_start = data->max_lat_start;
+ this->max_lat_end = data->max_lat_end;
}
zfree(&data);
return;
@@ -3157,9 +3161,9 @@ static int perf_sched__lat(struct perf_sched *sched)
perf_sched__merge_lat(sched);
perf_sched__sort_lat(sched);

- printf("\n -----------------------------------------------------------------------------------------------------------------\n");
- printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
- printf(" -----------------------------------------------------------------------------------------------------------------\n");
+ printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
+ printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
+ printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");

next = rb_first_cached(&sched->sorted_atom_root);

--
2.28.0.709.gb0816b6eb0-goog


2020-09-26 14:12:24

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH] perf: sched: Show start of latency as well

Hi Joel,

On Sat, Sep 26, 2020 at 8:56 AM Joel Fernandes (Google)
<[email protected]> wrote:
>
> perf sched latency is really useful at showing worst-case latencies that task
> encountered since wakeup. However it shows only the end of the latency. Often
> times the start of a latency is interesting as it can show what else was going
> on at the time to cause the latency. I certainly myself spending a lot of time
> backtracking to the start of the latency in "perf sched script" which wastes a
> lot of time.
>
> This patch therefore adds a new column "Max delay start". Considering this,
> also rename "Maximum delay at" to "Max delay end" as its easier to understand.

Oh, I thought we print start time not the end time. I think it's better
to print start time but others may think differently.

Actually we can calculate the start time from the end time and the
latency but it'd be convenient if the tool does that for us (as they are
printed in different units). Then the remaining concern is the screen
width (of 114 or 115?) but I think it should be fine for most of us.

Acked-by: Namhyung Kim <[email protected]>

Thanks
Namhyung

>
> Signed-off-by: Joel Fernandes (Google) <[email protected]>
>
>
> ---
> A sample output can be seen after applying patch:
> https://hastebin.com/raw/ivinimaler
>
> tools/perf/builtin-sched.c | 24 ++++++++++++++----------
> 1 file changed, 14 insertions(+), 10 deletions(-)
>
> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> index 459e4229945e..2791da1fe5f7 100644
> --- a/tools/perf/builtin-sched.c
> +++ b/tools/perf/builtin-sched.c
> @@ -130,7 +130,8 @@ struct work_atoms {
> struct thread *thread;
> struct rb_node node;
> u64 max_lat;
> - u64 max_lat_at;
> + u64 max_lat_start;
> + u64 max_lat_end;
> u64 total_lat;
> u64 nb_atoms;
> u64 total_runtime;
> @@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
> atoms->total_lat += delta;
> if (delta > atoms->max_lat) {
> atoms->max_lat = delta;
> - atoms->max_lat_at = timestamp;
> + atoms->max_lat_start = atom->wake_up_time;
> + atoms->max_lat_end = timestamp;
> }
> atoms->nb_atoms++;
> }
> @@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> int i;
> int ret;
> u64 avg;
> - char max_lat_at[32];
> + char max_lat_start[32], max_lat_end[32];
>
> if (!work_list->nb_atoms)
> return;
> @@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> printf(" ");
>
> avg = work_list->total_lat / work_list->nb_atoms;
> - timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
> + timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
> + timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));
>
> - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
> + printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
> (double)work_list->total_runtime / NSEC_PER_MSEC,
> work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
> (double)work_list->max_lat / NSEC_PER_MSEC,
> - max_lat_at);
> + max_lat_start, max_lat_end);
> }
>
> static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
> @@ -3118,7 +3121,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d
> list_splice(&data->work_list, &this->work_list);
> if (this->max_lat < data->max_lat) {
> this->max_lat = data->max_lat;
> - this->max_lat_at = data->max_lat_at;
> + this->max_lat_start = data->max_lat_start;
> + this->max_lat_end = data->max_lat_end;
> }
> zfree(&data);
> return;
> @@ -3157,9 +3161,9 @@ static int perf_sched__lat(struct perf_sched *sched)
> perf_sched__merge_lat(sched);
> perf_sched__sort_lat(sched);
>
> - printf("\n -----------------------------------------------------------------------------------------------------------------\n");
> - printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
> - printf(" -----------------------------------------------------------------------------------------------------------------\n");
> + printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
> + printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
> + printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");
>
> next = rb_first_cached(&sched->sorted_atom_root);
>
> --
> 2.28.0.709.gb0816b6eb0-goog

2020-09-26 15:47:38

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH] perf: sched: Show start of latency as well

On Sat, Sep 26, 2020 at 10:10 AM Namhyung Kim <[email protected]> wrote:
[...]
> On Sat, Sep 26, 2020 at 8:56 AM Joel Fernandes (Google)
> <[email protected]> wrote:
> >
> > perf sched latency is really useful at showing worst-case latencies that task
> > encountered since wakeup. However it shows only the end of the latency. Often
> > times the start of a latency is interesting as it can show what else was going
> > on at the time to cause the latency. I certainly myself spending a lot of time
> > backtracking to the start of the latency in "perf sched script" which wastes a
> > lot of time.
> >
> > This patch therefore adds a new column "Max delay start". Considering this,
> > also rename "Maximum delay at" to "Max delay end" as its easier to understand.
>
> Oh, I thought we print start time not the end time. I think it's better
> to print start time but others may think differently.

Right, glad you think so too.

> Actually we can calculate the start time from the end time and the
> latency but it'd be convenient if the tool does that for us (as they are
> printed in different units).

Correct, but as you mention it is more burdensome to calculate each time.

> Then the remaining concern is the screen
> width (of 114 or 115?) but I think it should be fine for most of us.

It is 114 without the patch and 140 with it. I tried my best to trim
it a little. It fits fine on my screen with the patch. So I think we
should be good!

> Acked-by: Namhyung Kim <[email protected]>

Thanks, Namyhung!

- Joel

> > Signed-off-by: Joel Fernandes (Google) <[email protected]>
> >
> >
> > ---
> > A sample output can be seen after applying patch:
> > https://hastebin.com/raw/ivinimaler
> >
> > tools/perf/builtin-sched.c | 24 ++++++++++++++----------
> > 1 file changed, 14 insertions(+), 10 deletions(-)
> >
> > diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> > index 459e4229945e..2791da1fe5f7 100644
> > --- a/tools/perf/builtin-sched.c
> > +++ b/tools/perf/builtin-sched.c
> > @@ -130,7 +130,8 @@ struct work_atoms {
> > struct thread *thread;
> > struct rb_node node;
> > u64 max_lat;
> > - u64 max_lat_at;
> > + u64 max_lat_start;
> > + u64 max_lat_end;
> > u64 total_lat;
> > u64 nb_atoms;
> > u64 total_runtime;
> > @@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
> > atoms->total_lat += delta;
> > if (delta > atoms->max_lat) {
> > atoms->max_lat = delta;
> > - atoms->max_lat_at = timestamp;
> > + atoms->max_lat_start = atom->wake_up_time;
> > + atoms->max_lat_end = timestamp;
> > }
> > atoms->nb_atoms++;
> > }
> > @@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> > int i;
> > int ret;
> > u64 avg;
> > - char max_lat_at[32];
> > + char max_lat_start[32], max_lat_end[32];
> >
> > if (!work_list->nb_atoms)
> > return;
> > @@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> > printf(" ");
> >
> > avg = work_list->total_lat / work_list->nb_atoms;
> > - timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
> > + timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
> > + timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));
> >
> > - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
> > + printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
> > (double)work_list->total_runtime / NSEC_PER_MSEC,
> > work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
> > (double)work_list->max_lat / NSEC_PER_MSEC,
> > - max_lat_at);
> > + max_lat_start, max_lat_end);
> > }
> >
> > static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
> > @@ -3118,7 +3121,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d
> > list_splice(&data->work_list, &this->work_list);
> > if (this->max_lat < data->max_lat) {
> > this->max_lat = data->max_lat;
> > - this->max_lat_at = data->max_lat_at;
> > + this->max_lat_start = data->max_lat_start;
> > + this->max_lat_end = data->max_lat_end;
> > }
> > zfree(&data);
> > return;
> > @@ -3157,9 +3161,9 @@ static int perf_sched__lat(struct perf_sched *sched)
> > perf_sched__merge_lat(sched);
> > perf_sched__sort_lat(sched);
> >
> > - printf("\n -----------------------------------------------------------------------------------------------------------------\n");
> > - printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
> > - printf(" -----------------------------------------------------------------------------------------------------------------\n");
> > + printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
> > + printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
> > + printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");
> >
> > next = rb_first_cached(&sched->sorted_atom_root);
> >
> > --
> > 2.28.0.709.gb0816b6eb0-goog

2020-10-13 16:28:06

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH] perf: sched: Show start of latency as well

Em Sat, Sep 26, 2020 at 11:10:46PM +0900, Namhyung Kim escreveu:
> Hi Joel,
>
> On Sat, Sep 26, 2020 at 8:56 AM Joel Fernandes (Google)
> <[email protected]> wrote:
> >
> > perf sched latency is really useful at showing worst-case latencies that task
> > encountered since wakeup. However it shows only the end of the latency. Often
> > times the start of a latency is interesting as it can show what else was going
> > on at the time to cause the latency. I certainly myself spending a lot of time
> > backtracking to the start of the latency in "perf sched script" which wastes a
> > lot of time.
> >
> > This patch therefore adds a new column "Max delay start". Considering this,
> > also rename "Maximum delay at" to "Max delay end" as its easier to understand.
>
> Oh, I thought we print start time not the end time. I think it's better
> to print start time but others may think differently.
>
> Actually we can calculate the start time from the end time and the
> latency but it'd be convenient if the tool does that for us (as they are
> printed in different units). Then the remaining concern is the screen
> width (of 114 or 115?) but I think it should be fine for most of us.
>
> Acked-by: Namhyung Kim <[email protected]>

Thanks, applied.

- Arnaldo

>
> Thanks
> Namhyung
>
> >
> > Signed-off-by: Joel Fernandes (Google) <[email protected]>
> >
> >
> > ---
> > A sample output can be seen after applying patch:
> > https://hastebin.com/raw/ivinimaler
> >
> > tools/perf/builtin-sched.c | 24 ++++++++++++++----------
> > 1 file changed, 14 insertions(+), 10 deletions(-)
> >
> > diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> > index 459e4229945e..2791da1fe5f7 100644
> > --- a/tools/perf/builtin-sched.c
> > +++ b/tools/perf/builtin-sched.c
> > @@ -130,7 +130,8 @@ struct work_atoms {
> > struct thread *thread;
> > struct rb_node node;
> > u64 max_lat;
> > - u64 max_lat_at;
> > + u64 max_lat_start;
> > + u64 max_lat_end;
> > u64 total_lat;
> > u64 nb_atoms;
> > u64 total_runtime;
> > @@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
> > atoms->total_lat += delta;
> > if (delta > atoms->max_lat) {
> > atoms->max_lat = delta;
> > - atoms->max_lat_at = timestamp;
> > + atoms->max_lat_start = atom->wake_up_time;
> > + atoms->max_lat_end = timestamp;
> > }
> > atoms->nb_atoms++;
> > }
> > @@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> > int i;
> > int ret;
> > u64 avg;
> > - char max_lat_at[32];
> > + char max_lat_start[32], max_lat_end[32];
> >
> > if (!work_list->nb_atoms)
> > return;
> > @@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> > printf(" ");
> >
> > avg = work_list->total_lat / work_list->nb_atoms;
> > - timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
> > + timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
> > + timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));
> >
> > - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
> > + printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
> > (double)work_list->total_runtime / NSEC_PER_MSEC,
> > work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
> > (double)work_list->max_lat / NSEC_PER_MSEC,
> > - max_lat_at);
> > + max_lat_start, max_lat_end);
> > }
> >
> > static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
> > @@ -3118,7 +3121,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d
> > list_splice(&data->work_list, &this->work_list);
> > if (this->max_lat < data->max_lat) {
> > this->max_lat = data->max_lat;
> > - this->max_lat_at = data->max_lat_at;
> > + this->max_lat_start = data->max_lat_start;
> > + this->max_lat_end = data->max_lat_end;
> > }
> > zfree(&data);
> > return;
> > @@ -3157,9 +3161,9 @@ static int perf_sched__lat(struct perf_sched *sched)
> > perf_sched__merge_lat(sched);
> > perf_sched__sort_lat(sched);
> >
> > - printf("\n -----------------------------------------------------------------------------------------------------------------\n");
> > - printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
> > - printf(" -----------------------------------------------------------------------------------------------------------------\n");
> > + printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
> > + printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
> > + printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");
> >
> > next = rb_first_cached(&sched->sorted_atom_root);
> >
> > --
> > 2.28.0.709.gb0816b6eb0-goog

--

- Arnaldo

2020-10-13 18:28:04

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH] perf: sched: Show start of latency as well

Em Sat, Sep 26, 2020 at 11:45:39AM -0400, Joel Fernandes escreveu:
> On Sat, Sep 26, 2020 at 10:10 AM Namhyung Kim <[email protected]> wrote:
> [...]
> > On Sat, Sep 26, 2020 at 8:56 AM Joel Fernandes (Google)
> > <[email protected]> wrote:
> > >
> > > perf sched latency is really useful at showing worst-case latencies that task
> > > encountered since wakeup. However it shows only the end of the latency. Often
> > > times the start of a latency is interesting as it can show what else was going
> > > on at the time to cause the latency. I certainly myself spending a lot of time
> > > backtracking to the start of the latency in "perf sched script" which wastes a
> > > lot of time.
> > >
> > > This patch therefore adds a new column "Max delay start". Considering this,
> > > also rename "Maximum delay at" to "Max delay end" as its easier to understand.
> >
> > Oh, I thought we print start time not the end time. I think it's better
> > to print start time but others may think differently.
>
> Right, glad you think so too.
>
> > Actually we can calculate the start time from the end time and the
> > latency but it'd be convenient if the tool does that for us (as they are
> > printed in different units).
>
> Correct, but as you mention it is more burdensome to calculate each time.
>
> > Then the remaining concern is the screen
> > width (of 114 or 115?) but I think it should be fine for most of us.
>
> It is 114 without the patch and 140 with it. I tried my best to trim
> it a little. It fits fine on my screen with the patch. So I think we
> should be good!

So, what do you think of removing all the redundant info so that we can
get it in a more compact way, i.e.:

| Delays
-----------------------------------------------------------------------------------------------------
Task | Runtime | Switches | Avg | Max | Max start | Max end |
-----------------------------------------------------------------------------------------------------
MediaScannerSer:11936 | 651.296 ms | 67978 | 0.113 ms | 77.250 ms | 477.691360 s | 477.768610 s
[email protected]:(3) | 0.000 ms | 3440 | 0.034 ms | 72.267 ms | 477.697051 s | 477.769318 s
AudioOut_1D:8112 | 0.000 ms | 2588 | 0.083 ms | 64.020 ms | 477.710740 s | 477.774760 s
Time-limited te:14973 | 7966.090 ms | 24807 | 0.073 ms | 15.563 ms | 477.162746 s | 477.178309 s
surfaceflinger:8049 | 9.680 ms | 603 | 0.063 ms | 13.275 ms | 476.931791 s | 476.945067 s
HeapTaskDaemon:(3) | 1588.830 ms | 7040 | 0.065 ms | 6.880 ms | 473.666043 s | 473.672922 s
mount-passthrou:(3) | 1370.809 ms | 68904 | 0.011 ms | 6.524 ms | 478.090630 s | 478.097154 s
ReferenceQueueD:(3) | 11.794 ms | 1725 | 0.014 ms | 6.521 ms | 476.119782 s | 476.126303 s
writer:14077 | 18.410 ms | 1427 | 0.036 ms | 6.131 ms | 474.169675 s | 474.175805 s

> > Acked-by: Namhyung Kim <[email protected]>
>
> Thanks, Namyhung!
>
> - Joel
>
> > > Signed-off-by: Joel Fernandes (Google) <[email protected]>
> > >
> > >
> > > ---
> > > A sample output can be seen after applying patch:
> > > https://hastebin.com/raw/ivinimaler
> > >
> > > tools/perf/builtin-sched.c | 24 ++++++++++++++----------
> > > 1 file changed, 14 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> > > index 459e4229945e..2791da1fe5f7 100644
> > > --- a/tools/perf/builtin-sched.c
> > > +++ b/tools/perf/builtin-sched.c
> > > @@ -130,7 +130,8 @@ struct work_atoms {
> > > struct thread *thread;
> > > struct rb_node node;
> > > u64 max_lat;
> > > - u64 max_lat_at;
> > > + u64 max_lat_start;
> > > + u64 max_lat_end;
> > > u64 total_lat;
> > > u64 nb_atoms;
> > > u64 total_runtime;
> > > @@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
> > > atoms->total_lat += delta;
> > > if (delta > atoms->max_lat) {
> > > atoms->max_lat = delta;
> > > - atoms->max_lat_at = timestamp;
> > > + atoms->max_lat_start = atom->wake_up_time;
> > > + atoms->max_lat_end = timestamp;
> > > }
> > > atoms->nb_atoms++;
> > > }
> > > @@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> > > int i;
> > > int ret;
> > > u64 avg;
> > > - char max_lat_at[32];
> > > + char max_lat_start[32], max_lat_end[32];
> > >
> > > if (!work_list->nb_atoms)
> > > return;
> > > @@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> > > printf(" ");
> > >
> > > avg = work_list->total_lat / work_list->nb_atoms;
> > > - timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
> > > + timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
> > > + timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));
> > >
> > > - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
> > > + printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
> > > (double)work_list->total_runtime / NSEC_PER_MSEC,
> > > work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
> > > (double)work_list->max_lat / NSEC_PER_MSEC,
> > > - max_lat_at);
> > > + max_lat_start, max_lat_end);
> > > }
> > >
> > > static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
> > > @@ -3118,7 +3121,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d
> > > list_splice(&data->work_list, &this->work_list);
> > > if (this->max_lat < data->max_lat) {
> > > this->max_lat = data->max_lat;
> > > - this->max_lat_at = data->max_lat_at;
> > > + this->max_lat_start = data->max_lat_start;
> > > + this->max_lat_end = data->max_lat_end;
> > > }
> > > zfree(&data);
> > > return;
> > > @@ -3157,9 +3161,9 @@ static int perf_sched__lat(struct perf_sched *sched)
> > > perf_sched__merge_lat(sched);
> > > perf_sched__sort_lat(sched);
> > >
> > > - printf("\n -----------------------------------------------------------------------------------------------------------------\n");
> > > - printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
> > > - printf(" -----------------------------------------------------------------------------------------------------------------\n");
> > > + printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
> > > + printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
> > > + printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");
> > >
> > > next = rb_first_cached(&sched->sorted_atom_root);
> > >
> > > --
> > > 2.28.0.709.gb0816b6eb0-goog

--

- Arnaldo

2020-10-14 17:39:11

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH] perf: sched: Show start of latency as well

Em Wed, Oct 14, 2020 at 11:05:17AM -0400, [email protected] escreveu:
> On Tue, Oct 13, 2020 at 09:37:48AM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Sat, Sep 26, 2020 at 11:45:39AM -0400, Joel Fernandes escreveu:
> > > On Sat, Sep 26, 2020 at 10:10 AM Namhyung Kim <[email protected]> wrote:
> > > [...]
> > > > On Sat, Sep 26, 2020 at 8:56 AM Joel Fernandes (Google)
> > > > Then the remaining concern is the screen
> > > > width (of 114 or 115?) but I think it should be fine for most of us.

> > > It is 114 without the patch and 140 with it. I tried my best to trim
> > > it a little. It fits fine on my screen with the patch. So I think we
> > > should be good!

> > So, what do you think of removing all the redundant info so that we can
> > get it in a more compact way, i.e.:

> Doing it this way looks good to me too!

Ingo, do you have a problem with that? I see that if you have it as it
is now one can just copy a line out of the output and have the relevant
column tags in each line, like with cyclictest, so there is value in
keeping it as is.

- Arnaldo

> > -----------------------------------------------------------------------------------------------------
> > Task | Runtime | Switches | Avg | Max | Max start | Max end |
> > -----------------------------------------------------------------------------------------------------
> > MediaScannerSer:11936 | 651.296 ms | 67978 | 0.113 ms | 77.250 ms | 477.691360 s | 477.768610 s
> > [email protected]:(3) | 0.000 ms | 3440 | 0.034 ms | 72.267 ms | 477.697051 s | 477.769318 s
> > AudioOut_1D:8112 | 0.000 ms | 2588 | 0.083 ms | 64.020 ms | 477.710740 s | 477.774760 s
> > Time-limited te:14973 | 7966.090 ms | 24807 | 0.073 ms | 15.563 ms | 477.162746 s | 477.178309 s
> > surfaceflinger:8049 | 9.680 ms | 603 | 0.063 ms | 13.275 ms | 476.931791 s | 476.945067 s
> > HeapTaskDaemon:(3) | 1588.830 ms | 7040 | 0.065 ms | 6.880 ms | 473.666043 s | 473.672922 s
> > mount-passthrou:(3) | 1370.809 ms | 68904 | 0.011 ms | 6.524 ms | 478.090630 s | 478.097154 s
> > ReferenceQueueD:(3) | 11.794 ms | 1725 | 0.014 ms | 6.521 ms | 476.119782 s | 476.126303 s
> > writer:14077 | 18.410 ms | 1427 | 0.036 ms | 6.131 ms | 474.169675 s | 474.175805 s
> >
> > > > Acked-by: Namhyung Kim <[email protected]>
> > >
> > > Thanks, Namyhung!
> > >
> > > - Joel
> > >
> > > > > Signed-off-by: Joel Fernandes (Google) <[email protected]>
> > > > >
> > > > >
> > > > > ---
> > > > > A sample output can be seen after applying patch:
> > > > > https://hastebin.com/raw/ivinimaler
> > > > >
> > > > > tools/perf/builtin-sched.c | 24 ++++++++++++++----------
> > > > > 1 file changed, 14 insertions(+), 10 deletions(-)
> > > > >
> > > > > diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> > > > > index 459e4229945e..2791da1fe5f7 100644
> > > > > --- a/tools/perf/builtin-sched.c
> > > > > +++ b/tools/perf/builtin-sched.c
> > > > > @@ -130,7 +130,8 @@ struct work_atoms {
> > > > > struct thread *thread;
> > > > > struct rb_node node;
> > > > > u64 max_lat;
> > > > > - u64 max_lat_at;
> > > > > + u64 max_lat_start;
> > > > > + u64 max_lat_end;
> > > > > u64 total_lat;
> > > > > u64 nb_atoms;
> > > > > u64 total_runtime;
> > > > > @@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
> > > > > atoms->total_lat += delta;
> > > > > if (delta > atoms->max_lat) {
> > > > > atoms->max_lat = delta;
> > > > > - atoms->max_lat_at = timestamp;
> > > > > + atoms->max_lat_start = atom->wake_up_time;
> > > > > + atoms->max_lat_end = timestamp;
> > > > > }
> > > > > atoms->nb_atoms++;
> > > > > }
> > > > > @@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> > > > > int i;
> > > > > int ret;
> > > > > u64 avg;
> > > > > - char max_lat_at[32];
> > > > > + char max_lat_start[32], max_lat_end[32];
> > > > >
> > > > > if (!work_list->nb_atoms)
> > > > > return;
> > > > > @@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> > > > > printf(" ");
> > > > >
> > > > > avg = work_list->total_lat / work_list->nb_atoms;
> > > > > - timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
> > > > > + timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
> > > > > + timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));
> > > > >
> > > > > - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
> > > > > + printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
> > > > > (double)work_list->total_runtime / NSEC_PER_MSEC,
> > > > > work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
> > > > > (double)work_list->max_lat / NSEC_PER_MSEC,
> > > > > - max_lat_at);
> > > > > + max_lat_start, max_lat_end);
> > > > > }
> > > > >
> > > > > static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
> > > > > @@ -3118,7 +3121,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d
> > > > > list_splice(&data->work_list, &this->work_list);
> > > > > if (this->max_lat < data->max_lat) {
> > > > > this->max_lat = data->max_lat;
> > > > > - this->max_lat_at = data->max_lat_at;
> > > > > + this->max_lat_start = data->max_lat_start;
> > > > > + this->max_lat_end = data->max_lat_end;
> > > > > }
> > > > > zfree(&data);
> > > > > return;
> > > > > @@ -3157,9 +3161,9 @@ static int perf_sched__lat(struct perf_sched *sched)
> > > > > perf_sched__merge_lat(sched);
> > > > > perf_sched__sort_lat(sched);
> > > > >
> > > > > - printf("\n -----------------------------------------------------------------------------------------------------------------\n");
> > > > > - printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
> > > > > - printf(" -----------------------------------------------------------------------------------------------------------------\n");
> > > > > + printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
> > > > > + printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
> > > > > + printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");
> > > > >
> > > > > next = rb_first_cached(&sched->sorted_atom_root);
> > > > >
> > > > > --
> > > > > 2.28.0.709.gb0816b6eb0-goog
> >
> > --
> >
> > - Arnaldo

--

- Arnaldo

2020-10-14 23:04:00

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH] perf: sched: Show start of latency as well

Hi Arnaldo,

On Tue, Oct 13, 2020 at 09:37:48AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Sat, Sep 26, 2020 at 11:45:39AM -0400, Joel Fernandes escreveu:
> > On Sat, Sep 26, 2020 at 10:10 AM Namhyung Kim <[email protected]> wrote:
> > [...]
> > > On Sat, Sep 26, 2020 at 8:56 AM Joel Fernandes (Google)
> > > <[email protected]> wrote:
> > > >
> > > > perf sched latency is really useful at showing worst-case latencies that task
> > > > encountered since wakeup. However it shows only the end of the latency. Often
> > > > times the start of a latency is interesting as it can show what else was going
> > > > on at the time to cause the latency. I certainly myself spending a lot of time
> > > > backtracking to the start of the latency in "perf sched script" which wastes a
> > > > lot of time.
> > > >
> > > > This patch therefore adds a new column "Max delay start". Considering this,
> > > > also rename "Maximum delay at" to "Max delay end" as its easier to understand.
> > >
> > > Oh, I thought we print start time not the end time. I think it's better
> > > to print start time but others may think differently.
> >
> > Right, glad you think so too.
> >
> > > Actually we can calculate the start time from the end time and the
> > > latency but it'd be convenient if the tool does that for us (as they are
> > > printed in different units).
> >
> > Correct, but as you mention it is more burdensome to calculate each time.
> >
> > > Then the remaining concern is the screen
> > > width (of 114 or 115?) but I think it should be fine for most of us.
> >
> > It is 114 without the patch and 140 with it. I tried my best to trim
> > it a little. It fits fine on my screen with the patch. So I think we
> > should be good!
>
> So, what do you think of removing all the redundant info so that we can
> get it in a more compact way, i.e.:
> D

Doing it this way looks good to me too!

thanks,

- Joel


> | Delays
> -----------------------------------------------------------------------------------------------------
> Task | Runtime | Switches | Avg | Max | Max start | Max end |
> -----------------------------------------------------------------------------------------------------
> MediaScannerSer:11936 | 651.296 ms | 67978 | 0.113 ms | 77.250 ms | 477.691360 s | 477.768610 s
> [email protected]:(3) | 0.000 ms | 3440 | 0.034 ms | 72.267 ms | 477.697051 s | 477.769318 s
> AudioOut_1D:8112 | 0.000 ms | 2588 | 0.083 ms | 64.020 ms | 477.710740 s | 477.774760 s
> Time-limited te:14973 | 7966.090 ms | 24807 | 0.073 ms | 15.563 ms | 477.162746 s | 477.178309 s
> surfaceflinger:8049 | 9.680 ms | 603 | 0.063 ms | 13.275 ms | 476.931791 s | 476.945067 s
> HeapTaskDaemon:(3) | 1588.830 ms | 7040 | 0.065 ms | 6.880 ms | 473.666043 s | 473.672922 s
> mount-passthrou:(3) | 1370.809 ms | 68904 | 0.011 ms | 6.524 ms | 478.090630 s | 478.097154 s
> ReferenceQueueD:(3) | 11.794 ms | 1725 | 0.014 ms | 6.521 ms | 476.119782 s | 476.126303 s
> writer:14077 | 18.410 ms | 1427 | 0.036 ms | 6.131 ms | 474.169675 s | 474.175805 s
>
> > > Acked-by: Namhyung Kim <[email protected]>
> >
> > Thanks, Namyhung!
> >
> > - Joel
> >
> > > > Signed-off-by: Joel Fernandes (Google) <[email protected]>
> > > >
> > > >
> > > > ---
> > > > A sample output can be seen after applying patch:
> > > > https://hastebin.com/raw/ivinimaler
> > > >
> > > > tools/perf/builtin-sched.c | 24 ++++++++++++++----------
> > > > 1 file changed, 14 insertions(+), 10 deletions(-)
> > > >
> > > > diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> > > > index 459e4229945e..2791da1fe5f7 100644
> > > > --- a/tools/perf/builtin-sched.c
> > > > +++ b/tools/perf/builtin-sched.c
> > > > @@ -130,7 +130,8 @@ struct work_atoms {
> > > > struct thread *thread;
> > > > struct rb_node node;
> > > > u64 max_lat;
> > > > - u64 max_lat_at;
> > > > + u64 max_lat_start;
> > > > + u64 max_lat_end;
> > > > u64 total_lat;
> > > > u64 nb_atoms;
> > > > u64 total_runtime;
> > > > @@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
> > > > atoms->total_lat += delta;
> > > > if (delta > atoms->max_lat) {
> > > > atoms->max_lat = delta;
> > > > - atoms->max_lat_at = timestamp;
> > > > + atoms->max_lat_start = atom->wake_up_time;
> > > > + atoms->max_lat_end = timestamp;
> > > > }
> > > > atoms->nb_atoms++;
> > > > }
> > > > @@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> > > > int i;
> > > > int ret;
> > > > u64 avg;
> > > > - char max_lat_at[32];
> > > > + char max_lat_start[32], max_lat_end[32];
> > > >
> > > > if (!work_list->nb_atoms)
> > > > return;
> > > > @@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
> > > > printf(" ");
> > > >
> > > > avg = work_list->total_lat / work_list->nb_atoms;
> > > > - timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
> > > > + timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
> > > > + timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));
> > > >
> > > > - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
> > > > + printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
> > > > (double)work_list->total_runtime / NSEC_PER_MSEC,
> > > > work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
> > > > (double)work_list->max_lat / NSEC_PER_MSEC,
> > > > - max_lat_at);
> > > > + max_lat_start, max_lat_end);
> > > > }
> > > >
> > > > static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
> > > > @@ -3118,7 +3121,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d
> > > > list_splice(&data->work_list, &this->work_list);
> > > > if (this->max_lat < data->max_lat) {
> > > > this->max_lat = data->max_lat;
> > > > - this->max_lat_at = data->max_lat_at;
> > > > + this->max_lat_start = data->max_lat_start;
> > > > + this->max_lat_end = data->max_lat_end;
> > > > }
> > > > zfree(&data);
> > > > return;
> > > > @@ -3157,9 +3161,9 @@ static int perf_sched__lat(struct perf_sched *sched)
> > > > perf_sched__merge_lat(sched);
> > > > perf_sched__sort_lat(sched);
> > > >
> > > > - printf("\n -----------------------------------------------------------------------------------------------------------------\n");
> > > > - printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
> > > > - printf(" -----------------------------------------------------------------------------------------------------------------\n");
> > > > + printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
> > > > + printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
> > > > + printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");
> > > >
> > > > next = rb_first_cached(&sched->sorted_atom_root);
> > > >
> > > > --
> > > > 2.28.0.709.gb0816b6eb0-goog
>
> --
>
> - Arnaldo