From: kan.liang@intel.com
To: peterz@infradead.org, mingo@redhat.com, acme@kernel.org,
        linux-kernel@vger.kernel.org
Cc: alexander.shishkin@linux.intel.com, tglx@linutronix.de,
        namhyung@kernel.org, jolsa@kernel.org, adrian.hunter@intel.com,
        wangnan0@huawei.com, mark.rutland@arm.com, andi@firstfloor.org,
        Kan Liang <kan.liang@intel.com>
Subject: [PATCH V2 08/13] perf tools: show kernel overhead
Date: Fri,  2 Dec 2016 16:19:16 -0500
Message-Id: <1480713561-6617-9-git-send-email-kan.liang@intel.com>
In-Reply-To: <1480713561-6617-1-git-send-email-kan.liang@intel.com>
References: <1480713561-6617-1-git-send-email-kan.liang@intel.com>
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 9012
Lines: 268

From: Kan Liang <kan.liang@intel.com>

The kernel overhead include sample overhead, multiplexing overhead and
side-band events overhead. All the information is printed in the head of
output file if the show profiling cost option is applied.
The result is sorted by CPU. If there is no available CPU information in
sample, using -1 instead.

Output when CPU is available
 # To display the perf.data header info, please use
 --header/--header-only options.
 #
 # ========
 # CPU       SAM    SAM cost(ns)       MUX    MUX cost(ns)        SB
 SB cost(ns)
 #   0    2261446     1991216491     122174     1128716680          0
 0
 #   1      17360       67130324     122174     4498665956          0
 0
 #   2      25279      112575486     122174     4460407877          0
 0
 #   3      35532      157216449     122174     4411951469          0
 0
... ...
 #  61      16248       78159121     122172     4495896103          0
 0
 #  62      18911       90880108     122172     4488922500          0
 0
 #  63      63292      322048750     122171     4380579279          0
 0
 # ========
 #
 #
 # Total Lost Samples: 0

Output when CPU is not available
 # To display the perf.data header info, please use
 --header/--header-only options.
 #
 # ========
 # CPU       SAM    SAM cost(ns)       MUX    MUX cost(ns)        SB
 SB cost(ns)
 #  -1    2044182     1558283028     102380     2146692288    1237358
 7556703763
 # ========
 #
 #
 # Total Lost Samples: 0

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 tools/perf/Documentation/perf-report.txt |  7 +++
 tools/perf/util/event.h                  | 10 ++++
 tools/perf/util/machine.c                | 28 +++++++++--
 tools/perf/util/session.c                | 85 +++++++++++++++++++++++++++++---
 4 files changed, 120 insertions(+), 10 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 36d196c..50a89ff 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -414,6 +414,13 @@ include::itrace.txt[]
 
 --show-profiling-cost:
 	Show extra time cost during perf profiling
+	Sort the extra time cost by CPU
+	If CPU information is not available in perf_sample, using -1 instead.
+	The time cost include:
+	- SAM: sample overhead. For x86, it's the time cost in perf NMI handler.
+	- MUX: multiplexing overhead. The time cost spends on rotate context.
+	- SB: side-band events overhead. The time cost spends on iterating all
+	      events that need to receive side-band events.
 
 include::callchain-overhead-calculation.txt[]
 
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index d96e215..dd4ec5c 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -245,6 +245,12 @@ enum auxtrace_error_type {
 	PERF_AUXTRACE_ERROR_MAX
 };
 
+struct total_overhead {
+	struct perf_overhead_entry	total_sample[MAX_NR_CPUS + 1];
+	struct perf_overhead_entry	total_mux[MAX_NR_CPUS + 1];
+	struct perf_overhead_entry	total_sb[MAX_NR_CPUS + 1];
+};
+
 /*
  * The kernel collects the number of events it couldn't send in a stretch and
  * when possible sends this number in a PERF_RECORD_LOST event. The number of
@@ -262,6 +268,9 @@ enum auxtrace_error_type {
  * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
  * the total number of low level events, it is necessary to to sum all struct
  * sample_event.period and stash the result in total_period.
+ *
+ * The overhead tells the perf profiling time cost on each CPU. The cost can be
+ * divided into sampling cost, multiplexing cost and side-band events cost.
  */
 struct events_stats {
 	u64 total_period;
@@ -270,6 +279,7 @@ struct events_stats {
 	u64 total_lost_samples;
 	u64 total_aux_lost;
 	u64 total_invalid_chains;
+	struct total_overhead overhead;
 	u32 nr_events[PERF_RECORD_HEADER_MAX];
 	u32 nr_non_filtered_samples;
 	u32 nr_lost_warned;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 02c8f7a..72c4412 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -557,9 +557,31 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
 
 int machine__process_overhead_event(struct machine *machine __maybe_unused,
 				    union perf_event *event,
-				    struct perf_sample *sample __maybe_unused)
-{
-	dump_printf("\tUNSUPPORT TYPE 0x%lx!\n", event->overhead.type);
+				    struct perf_sample *sample)
+{
+	switch (event->overhead.type) {
+	case PERF_PMU_SAMPLE_OVERHEAD:
+		dump_printf(" SAMPLE nr: %llu  time: %llu cpu %d\n",
+			    event->overhead.entry.nr,
+			    event->overhead.entry.time,
+			    sample->cpu);
+		break;
+	case PERF_CORE_MUX_OVERHEAD:
+		dump_printf(" MULTIPLEXING nr: %llu  time: %llu cpu %d\n",
+			    event->overhead.entry.nr,
+			    event->overhead.entry.time,
+			    sample->cpu);
+		break;
+	case PERF_CORE_SB_OVERHEAD:
+		dump_printf(" SIDE-BAND nr: %llu  time: %llu cpu %d\n",
+			    event->overhead.entry.nr,
+			    event->overhead.entry.time,
+			    sample->cpu);
+		break;
+	default:
+		dump_printf("\tUNSUPPORT TYPE 0x%lx!\n", event->overhead.type);
+		return 0;
+	}
 	return 0;
 }
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 9de4f74..1dc32e3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1207,6 +1207,36 @@ static int
 					    &sample->read.one, machine);
 }
 
+static void
+overhead_stats_update(struct perf_tool *tool,
+		      struct perf_evlist *evlist,
+		      union perf_event *event,
+		      int cpu)
+{
+	if (tool->overhead != perf_event__process_overhead)
+		return;
+
+	if (cpu < 0)
+		cpu = MAX_NR_CPUS;
+
+	switch (event->overhead.type) {
+	case PERF_PMU_SAMPLE_OVERHEAD:
+		evlist->stats.overhead.total_sample[cpu].nr += event->overhead.entry.nr;
+		evlist->stats.overhead.total_sample[cpu].time += event->overhead.entry.time;
+		break;
+	case PERF_CORE_MUX_OVERHEAD:
+		evlist->stats.overhead.total_mux[cpu].nr += event->overhead.entry.nr;
+		evlist->stats.overhead.total_mux[cpu].time += event->overhead.entry.time;
+		break;
+	case PERF_CORE_SB_OVERHEAD:
+		evlist->stats.overhead.total_sb[cpu].nr += event->overhead.entry.nr;
+		evlist->stats.overhead.total_sb[cpu].time += event->overhead.entry.time;
+		break;
+	default:
+		break;
+	}
+}
+
 static int machines__deliver_event(struct machines *machines,
 				   struct perf_evlist *evlist,
 				   union perf_event *event,
@@ -1271,6 +1301,7 @@ static int machines__deliver_event(struct machines *machines,
 	case PERF_RECORD_SWITCH_CPU_WIDE:
 		return tool->context_switch(tool, event, sample, machine);
 	case PERF_RECORD_OVERHEAD:
+		overhead_stats_update(tool, evlist, event, sample->cpu);
 		return tool->overhead(tool, event, sample, machine);
 	default:
 		++evlist->stats.nr_unknown_events;
@@ -2038,20 +2069,60 @@ void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
 	fprintf(fp, "# ========\n#\n");
 }
 
-void perf_session__fprintf_overhead_info(struct perf_session *session, FILE *fp,
-					 const char *cpu_list __maybe_unused,
-					 unsigned long *cpu_bitmap __maybe_unused)
+static void
+perf_session__fprintf_overhead_body(struct perf_evlist *evlist,
+				    FILE *fp, int cpu)
 {
-	if (session == NULL || fp == NULL)
+	if (!evlist->stats.overhead.total_sample[cpu].nr &&
+	    !evlist->stats.overhead.total_mux[cpu].nr &&
+	    !evlist->stats.overhead.total_sb[cpu].nr)
 		return;
 
-	if (!perf_evlist__overhead(session->evlist)) {
-		fprintf(fp, "#\n# No profiling time cost information available.\n#\n");
+	fprintf(stdout, "# %3d ", (cpu == MAX_NR_CPUS) ? -1 : cpu);
+
+	fprintf(stdout, "%10llu %14llu ",
+		evlist->stats.overhead.total_sample[cpu].nr,
+		evlist->stats.overhead.total_sample[cpu].time);
+	fprintf(stdout, "%10llu %14llu ",
+		evlist->stats.overhead.total_mux[cpu].nr,
+		evlist->stats.overhead.total_mux[cpu].time);
+	fprintf(stdout, "%10llu %14llu ",
+		evlist->stats.overhead.total_sb[cpu].nr,
+		evlist->stats.overhead.total_sb[cpu].time);
+	fprintf(fp, "\n");
+}
+
+void perf_session__fprintf_overhead_info(struct perf_session *session,
+					 FILE *fp,
+					 const char *cpu_list,
+					 unsigned long *cpu_bitmap)
+{
+	struct perf_evlist *evlist;
+	int cpu;
+
+	if (session == NULL || fp == NULL)
 		return;
+	evlist = session->evlist;
+	if (!perf_evlist__overhead(evlist)) {
+		fprintf(fp, "#\n# No profiling time cost information available.\n#\n");
 	}
 
 	fprintf(fp, "# ========\n");
-
+	fprintf(fp, "# CPU");
+	fprintf(fp, "       SAM    SAM cost(ns)");
+	fprintf(fp, "       MUX    MUX cost(ns)");
+	fprintf(fp, "        SB     SB cost(ns)");
+	fprintf(fp, "\n");
+
+	perf_session__fprintf_overhead_body(evlist, fp, MAX_NR_CPUS);
+
+	if (perf_evlist__sample_id_all(evlist)) {
+		for (cpu = 0; cpu < session->header.env.nr_cpus_online; cpu++) {
+			if (cpu_list && !test_bit(cpu, cpu_bitmap))
+				continue;
+			perf_session__fprintf_overhead_body(evlist, fp, cpu);
+		}
+	}
 	fprintf(fp, "# ========\n#\n");
 }
 
-- 
2.5.5