perf sched map supports cpu filter.
However, even with cpu filters active, any context switch currently
corresponds to a separate line.
As result, context switches on irrelevant cpus result to redundant lines,
which makes the output particlularly difficult to read on wide
architectures.
Fix it by skipping printing for irrelevant CPUs.
Example snippet of output before fix:
*B0 1.461147 secs
B0
B0
B0
*G0 1.517139 secs
After fix:
*B0 1.461147 secs
*G0 1.517139 secs
Signed-off-by: Fernand Sieber <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Alexander Shishkin <[email protected]>
---
tools/perf/builtin-sched.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 7422c930abaf..aa59f763ca46 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1594,8 +1594,6 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
sched->curr_thread[this_cpu.cpu] = thread__get(sched_in);
- printf(" ");
-
new_shortname = 0;
if (!tr->shortname[0]) {
if (!strcmp(thread__comm_str(sched_in), "swapper")) {
@@ -1622,6 +1620,11 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
new_shortname = 1;
}
+ if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
+ goto out;
+
+ printf(" ");
+
for (i = 0; i < cpus_nr; i++) {
struct perf_cpu cpu = {
.cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i,
@@ -1656,9 +1659,6 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
color_fprintf(stdout, color, " ");
}
- if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
- goto out;
-
timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
color_fprintf(stdout, color, " %12s secs ", stimestamp);
if (new_shortname || tr->comm_changed || (verbose > 0 && thread__tid(sched_in))) {
@@ -1675,9 +1675,9 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
if (sched->map.comp && new_cpu)
color_fprintf(stdout, color, " (CPU %d)", this_cpu);
-out:
color_fprintf(stdout, color, "\n");
+out:
thread__put(sched_in);
return 0;
--
2.40.1
Hello,
On Fri, Jun 14, 2024 at 12:49 AM Fernand Sieber <[email protected]> wrote:
>
> perf sched map supports cpu filter.
> However, even with cpu filters active, any context switch currently
> corresponds to a separate line.
> As result, context switches on irrelevant cpus result to redundant lines,
> which makes the output particlularly difficult to read on wide
> architectures.
>
> Fix it by skipping printing for irrelevant CPUs.
>
> Example snippet of output before fix:
>
> *B0 1.461147 secs
> B0
> B0
> B0
> *G0 1.517139 secs
>
> After fix:
>
> *B0 1.461147 secs
> *G0 1.517139 secs
>
> Signed-off-by: Fernand Sieber <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Cc: Alexander Shishkin <[email protected]>
Acked-by: Namhyung Kim <[email protected]>
Thanks,
Namhyung
> ---
> tools/perf/builtin-sched.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> index 7422c930abaf..aa59f763ca46 100644
> --- a/tools/perf/builtin-sched.c
> +++ b/tools/perf/builtin-sched.c
> @@ -1594,8 +1594,6 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
>
> sched->curr_thread[this_cpu.cpu] = thread__get(sched_in);
>
> - printf(" ");
> -
> new_shortname = 0;
> if (!tr->shortname[0]) {
> if (!strcmp(thread__comm_str(sched_in), "swapper")) {
> @@ -1622,6 +1620,11 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
> new_shortname = 1;
> }
>
> + if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
> + goto out;
> +
> + printf(" ");
> +
> for (i = 0; i < cpus_nr; i++) {
> struct perf_cpu cpu = {
> .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i,
> @@ -1656,9 +1659,6 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
> color_fprintf(stdout, color, " ");
> }
>
> - if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
> - goto out;
> -
> timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
> color_fprintf(stdout, color, " %12s secs ", stimestamp);
> if (new_shortname || tr->comm_changed || (verbose > 0 && thread__tid(sched_in))) {
> @@ -1675,9 +1675,9 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
> if (sched->map.comp && new_cpu)
> color_fprintf(stdout, color, " (CPU %d)", this_cpu);
>
> -out:
> color_fprintf(stdout, color, "\n");
>
> +out:
> thread__put(sched_in);
>
> return 0;
> --
> 2.40.1
>
>
Hi Fernand,
On 14/06/24 13:05, Fernand Sieber wrote:
> perf sched map supports cpu filter.
> However, even with cpu filters active, any context switch currently
> corresponds to a separate line.
> As result, context switches on irrelevant cpus result to redundant lines,
> which makes the output particlularly difficult to read on wide
> architectures.
>
> Fix it by skipping printing for irrelevant CPUs.
>
> Example snippet of output before fix:
>
> *B0 1.461147 secs
> B0
> B0
> B0
> *G0 1.517139 secs
>
> After fix:
>
> *B0 1.461147 secs
> *G0 1.517139 secs
Yes, this makes sense. The current implementation doesn't even print timestamp
for the redundant lines as shown in the example below.
. *F0 708529.114889 secs F0 => schbench:278841
. F0
. F0
. *. 708529.114919 secs
It makes sense to remove them entirely since we can still infer the sched-out
time for the selected CPUs implicitly.
Reviewed-and-tested-by: Madadi Vineeth Reddy <[email protected]>
Thanks and Regards
Madadi Vineeth Reddy
>
> Signed-off-by: Fernand Sieber <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Cc: Alexander Shishkin <[email protected]>
> ---
> tools/perf/builtin-sched.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> index 7422c930abaf..aa59f763ca46 100644
> --- a/tools/perf/builtin-sched.c
> +++ b/tools/perf/builtin-sched.c
> @@ -1594,8 +1594,6 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
>
> sched->curr_thread[this_cpu.cpu] = thread__get(sched_in);
>
> - printf(" ");
> -
> new_shortname = 0;
> if (!tr->shortname[0]) {
> if (!strcmp(thread__comm_str(sched_in), "swapper")) {
> @@ -1622,6 +1620,11 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
> new_shortname = 1;
> }
>
> + if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
> + goto out;
> +
> + printf(" ");
> +
> for (i = 0; i < cpus_nr; i++) {
> struct perf_cpu cpu = {
> .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i,
> @@ -1656,9 +1659,6 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
> color_fprintf(stdout, color, " ");
> }
>
> - if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
> - goto out;
> -
> timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
> color_fprintf(stdout, color, " %12s secs ", stimestamp);
> if (new_shortname || tr->comm_changed || (verbose > 0 && thread__tid(sched_in))) {
> @@ -1675,9 +1675,9 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
> if (sched->map.comp && new_cpu)
> color_fprintf(stdout, color, " (CPU %d)", this_cpu);
>
> -out:
> color_fprintf(stdout, color, "\n");
>
> +out:
> thread__put(sched_in);
>
> return 0;
Hi Fernand,
On 14/06/24 13:05, Fernand Sieber wrote:
> perf sched map supports cpu filter.
> However, even with cpu filters active, any context switch currently
> corresponds to a separate line.
> As result, context switches on irrelevant cpus result to redundant lines,
> which makes the output particlularly difficult to read on wide
> architectures.
>
> Fix it by skipping printing for irrelevant CPUs.
>
> Example snippet of output before fix:
>
> *B0 1.461147 secs
> B0
> B0
> B0
> *G0 1.517139 secs
>
> After fix:
>
> *B0 1.461147 secs
> *G0 1.517139 secs
Yes, this makes sense. The current implementation doesn't even print timestamp
for the redundant lines as shown in the example below.
. *F0 708529.114889 secs F0 => schbench:278841
. F0
. F0
. *. 708529.114919 secs
It makes sense to remove them entirely since we can still infer the sched-out
time for the selected CPUs implicitly.
Reviewed-and-tested-by: Madadi Vineeth Reddy <[email protected]>
Thanks and Regards
Madadi Vineeth Reddy
>
> Signed-off-by: Fernand Sieber <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Cc: Alexander Shishkin <[email protected]>
> ---
> tools/perf/builtin-sched.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> index 7422c930abaf..aa59f763ca46 100644
> --- a/tools/perf/builtin-sched.c
> +++ b/tools/perf/builtin-sched.c
> @@ -1594,8 +1594,6 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
>
> sched->curr_thread[this_cpu.cpu] = thread__get(sched_in);
>
> - printf(" ");
> -
> new_shortname = 0;
> if (!tr->shortname[0]) {
> if (!strcmp(thread__comm_str(sched_in), "swapper")) {
> @@ -1622,6 +1620,11 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
> new_shortname = 1;
> }
>
> + if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
> + goto out;
> +
> + printf(" ");
> +
> for (i = 0; i < cpus_nr; i++) {
> struct perf_cpu cpu = {
> .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i,
> @@ -1656,9 +1659,6 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
> color_fprintf(stdout, color, " ");
> }
>
> - if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
> - goto out;
> -
> timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
> color_fprintf(stdout, color, " %12s secs ", stimestamp);
> if (new_shortname || tr->comm_changed || (verbose > 0 && thread__tid(sched_in))) {
> @@ -1675,9 +1675,9 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
> if (sched->map.comp && new_cpu)
> color_fprintf(stdout, color, " (CPU %d)", this_cpu);
>
> -out:
> color_fprintf(stdout, color, "\n");
>
> +out:
> thread__put(sched_in);
>
> return 0;
On Fri, 14 Jun 2024 09:35:17 +0200, Fernand Sieber wrote:
> perf sched map supports cpu filter.
> However, even with cpu filters active, any context switch currently
> corresponds to a separate line.
> As result, context switches on irrelevant cpus result to redundant lines,
> which makes the output particlularly difficult to read on wide
> architectures.
>
> [...]
Applied to perf-tools-next, thanks!
Best regards,
Namhyung