by tip-bot for Vasyl Gomonovych

[permalink] [raw]

Subject: [tip:perf/core] perf/x86/intel: Introduce PERF_RECORD_LOST_SAMPLES

Commit-ID: f38b0dbb491a6987e198aa6b428db8692a6480f8
Gitweb: http://git.kernel.org/tip/f38b0dbb491a6987e198aa6b428db8692a6480f8
Author: Kan Liang <[email protected]>
AuthorDate: Sun, 10 May 2015 15:13:14 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 7 Jun 2015 16:09:02 +0200

perf/x86/intel: Introduce PERF_RECORD_LOST_SAMPLES

After enlarging the PEBS interrupt threshold, there may be some mixed up
PEBS samples which are discarded by the kernel.

This patch makes the kernel emit a PERF_RECORD_LOST_SAMPLES record with
the number of possible discarded records when it is impossible to demux
the samples.

It makes sure the user is not left in the dark about such discards.

Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event_intel_ds.c | 20 ++++++++++++++++---
include/linux/perf_event.h | 3 +++
include/uapi/linux/perf_event.h | 12 +++++++++++
kernel/events/core.c | 33 +++++++++++++++++++++++++++++++
4 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 266079a..34d0c48 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1126,6 +1126,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
void *base, *at, *top;
int bit;
short counts[MAX_PEBS_EVENTS] = {};
+ short error[MAX_PEBS_EVENTS] = {};

if (!x86_pmu.pebs_active)
return;
@@ -1169,20 +1170,33 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
/* slow path */
pebs_status = p->status & cpuc->pebs_enabled;
pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
- if (pebs_status != (1 << bit))
+ if (pebs_status != (1 << bit)) {
+ u8 i;
+
+ for_each_set_bit(i, (unsigned long *)&pebs_status,
+ MAX_PEBS_EVENTS)
+ error[i]++;
continue;
+ }
}
counts[bit]++;
}

for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
- if (counts[bit] == 0)
+ if ((counts[bit] == 0) && (error[bit] == 0))
continue;
event = cpuc->events[bit];
WARN_ON_ONCE(!event);
WARN_ON_ONCE(!event->attr.precise_ip);

- __intel_pmu_pebs_event(event, iregs, base, top, bit, counts[bit]);
+ /* log dropped samples number */
+ if (error[bit])
+ perf_log_lost_samples(event, error[bit]);
+
+ if (counts[bit]) {
+ __intel_pmu_pebs_event(event, iregs, base,
+ top, bit, counts[bit]);
+ }
}
}

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5f192e1..a204d52 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -743,6 +743,9 @@ perf_event__output_id_sample(struct perf_event *event,
struct perf_output_handle *handle,
struct perf_sample_data *sample);

+extern void
+perf_log_lost_samples(struct perf_event *event, u64 lost);
+
static inline bool is_sampling_event(struct perf_event *event)
{
return event->attr.sample_period != 0;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c4622f1..613ed9a 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -802,6 +802,18 @@ enum perf_event_type {
*/
PERF_RECORD_ITRACE_START = 12,

+ /*
+ * Records the dropped/lost sample number.
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u64 lost;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_LOST_SAMPLES = 13,
+
PERF_RECORD_MAX, /* non-ABI */
};

diff --git a/kernel/events/core.c b/kernel/events/core.c
index e499b4e..9e0773d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5975,6 +5975,39 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
}

/*
+ * Lost/dropped samples logging
+ */
+void perf_log_lost_samples(struct perf_event *event, u64 lost)
+{
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ struct {
+ struct perf_event_header header;
+ u64 lost;
+ } lost_samples_event = {
+ .header = {
+ .type = PERF_RECORD_LOST_SAMPLES,
+ .misc = 0,
+ .size = sizeof(lost_samples_event),
+ },
+ .lost = lost,
+ };
+
+ perf_event_header__init_id(&lost_samples_event.header, &sample, event);
+
+ ret = perf_output_begin(&handle, event,
+ lost_samples_event.header.size);
+ if (ret)
+ return;
+
+ perf_output_put(&handle, lost_samples_event);
+ perf_event__output_id_sample(event, &handle, &sample);
+ perf_output_end(&handle);
+}
+
+/*
* IRQ throttle logging
*/

2015-06-07 17:51:51

by tip-bot for Vasyl Gomonovych

[permalink] [raw]

Subject: [tip:perf/core] perf tools: handle PERF_RECORD_LOST_SAMPLES

Commit-ID: c4937a91ea56b546234b0608a413ebad90536d26
Gitweb: http://git.kernel.org/tip/c4937a91ea56b546234b0608a413ebad90536d26
Author: Kan Liang <[email protected]>
AuthorDate: Sun, 10 May 2015 15:13:15 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 7 Jun 2015 16:09:06 +0200

perf tools: handle PERF_RECORD_LOST_SAMPLES

This patch modifies the perf tool to handle the new RECORD type,
PERF_RECORD_LOST_SAMPLES.

The number of lost-sample events is stored in
.nr_events[PERF_RECORD_LOST_SAMPLES]. The exact number of samples
which the kernel dropped is stored in total_lost_samples.

When the percentage of dropped samples is greater than 5%, a warning
is printed.

Here are some examples:

Eg 1, Recording different frequently-occurring events is safe with the
patch. Only a very low drop rate is associated with such actions.

$ perf record -e '{cycles:p,instructions:p}' -c 20003 --no-time ~/tchain ~/tchain

$ perf report -D | tail
SAMPLE events: 120243
MMAP2 events: 5
LOST_SAMPLES events: 24
FINISHED_ROUND events: 15
cycles:p stats:
TOTAL events: 59348
SAMPLE events: 59348
instructions:p stats:
TOTAL events: 60895
SAMPLE events: 60895

$ perf report --stdio --group
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 24
#
# Samples: 120K of event 'anon group { cycles:p, instructions:p }'
# Event count (approx.): 24048600000
#
# Overhead Command Shared Object Symbol
# ................ ........... ................
..................................
#
99.74% 99.86% tchain_edit tchain_edit [.] f3
0.09% 0.02% tchain_edit tchain_edit [.] f2
0.04% 0.00% tchain_edit [kernel.vmlinux] [k] ixgbe_read_reg

Eg 2, Recording the same thing multiple times can lead to high drop
rate, but it is not a useful configuration.

$ perf record -e '{cycles:p,cycles:p}' -c 20003 --no-time ~/tchain
Warning: Processed 600592 samples and lost 99.73% samples!
[perf record: Woken up 148 times to write data]
[perf record: Captured and wrote 36.922 MB perf.data (1206322 samples)]
[perf record: Woken up 1 times to write data]
[perf record: Captured and wrote 0.121 MB perf.data (1629 samples)]

Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/builtin-report.c | 1 +
tools/perf/util/event.c | 9 +++++++++
tools/perf/util/event.h | 17 +++++++++++++++++
tools/perf/util/machine.c | 10 ++++++++++
tools/perf/util/machine.h | 2 ++
tools/perf/util/session.c | 19 +++++++++++++++++++
tools/perf/util/tool.h | 1 +
7 files changed, 59 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 56025d9..628090b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -320,6 +320,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
{
struct perf_evsel *pos;

+ fprintf(stdout, "#\n# Total Lost Samples: %lu\n#\n", evlist->stats.total_lost_samples);
evlist__for_each(evlist, pos) {
struct hists *hists = evsel__hists(pos);
const char *evname = perf_evsel__name(pos);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index c192596..793b150 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -25,6 +25,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_SAMPLE] = "SAMPLE",
[PERF_RECORD_AUX] = "AUX",
[PERF_RECORD_ITRACE_START] = "ITRACE_START",
+ [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -712,6 +713,14 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
return machine__process_itrace_start_event(machine, event);
}

+int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_lost_samples_event(machine, event, sample);
+}
+
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 97179ab..5dc51ad 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -52,6 +52,11 @@ struct lost_event {
u64 lost;
};

+struct lost_samples_event {
+ struct perf_event_header header;
+ u64 lost;
+};
+
/*
* PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
*/
@@ -235,6 +240,12 @@ enum auxtrace_error_type {
* total_lost tells exactly how many events the kernel in fact lost, i.e. it is
* the sum of all struct lost_event.lost fields reported.
*
+ * The kernel discards mixed up samples and sends the number in a
+ * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
+ * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
+ * exactly how many samples the kernel in fact dropped, i.e. it is the sum of
+ * all struct lost_samples_event.lost fields reported.
+ *
* The total_period is needed because by default auto-freq is used, so
* multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
* the total number of low level events, it is necessary to to sum all struct
@@ -244,6 +255,7 @@ struct events_stats {
u64 total_period;
u64 total_non_filtered_period;
u64 total_lost;
+ u64 total_lost_samples;
u64 total_invalid_chains;
u32 nr_events[PERF_RECORD_HEADER_MAX];
u32 nr_non_filtered_samples;
@@ -342,6 +354,7 @@ union perf_event {
struct comm_event comm;
struct fork_event fork;
struct lost_event lost;
+ struct lost_samples_event lost_samples;
struct read_event read;
struct throttle_event throttle;
struct sample_event sample;
@@ -390,6 +403,10 @@ int perf_event__process_lost(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_lost_samples(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_event__process_aux(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 9e02c86..f15ed24 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -482,6 +482,14 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
return 0;
}

+int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
+ union perf_event *event, struct perf_sample *sample)
+{
+ dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n",
+ sample->id, event->lost_samples.lost);
+ return 0;
+}
+
static struct dso*
machine__module_dso(struct machine *machine, struct kmod_path *m,
const char *filename)
@@ -1419,6 +1427,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_aux_event(machine, event); break;
case PERF_RECORD_ITRACE_START:
ret = machine__process_itrace_start_event(machine, event);
+ case PERF_RECORD_LOST_SAMPLES:
+ ret = machine__process_lost_samples_event(machine, event, sample); break;
break;
default:
ret = -1;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 39a0ca0..8e1f796 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -81,6 +81,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
struct perf_sample *sample);
int machine__process_lost_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
+int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
int machine__process_aux_event(struct machine *machine,
union perf_event *event);
int machine__process_itrace_start_event(struct machine *machine,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 39fe09d..88d87bf 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -325,6 +325,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->exit = process_event_stub;
if (tool->lost == NULL)
tool->lost = perf_event__process_lost;
+ if (tool->lost_samples == NULL)
+ tool->lost_samples = perf_event__process_lost_samples;
if (tool->aux == NULL)
tool->aux = perf_event__process_aux;
if (tool->itrace_start == NULL)
@@ -606,6 +608,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_SAMPLE] = perf_event__all64_swap,
[PERF_RECORD_AUX] = perf_event__aux_swap,
[PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap,
+ [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap,
[PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
[PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -1049,6 +1052,10 @@ static int machines__deliver_event(struct machines *machines,
if (tool->lost == perf_event__process_lost)
evlist->stats.total_lost += event->lost.lost;
return tool->lost(tool, event, sample, machine);
+ case PERF_RECORD_LOST_SAMPLES:
+ if (tool->lost_samples == perf_event__process_lost_samples)
+ evlist->stats.total_lost_samples += event->lost_samples.lost;
+ return tool->lost_samples(tool, event, sample, machine);
case PERF_RECORD_READ:
return tool->read(tool, event, sample, evsel, machine);
case PERF_RECORD_THROTTLE:
@@ -1286,6 +1293,18 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
stats->nr_events[PERF_RECORD_LOST]);
}

+ if (session->tool->lost_samples == perf_event__process_lost_samples) {
+ double drop_rate;
+
+ drop_rate = (double)stats->total_lost_samples /
+ (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
+ if (drop_rate > 0.05) {
+ ui__warning("Processed %lu samples and lost %3.2f%% samples!\n\n",
+ stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
+ drop_rate * 100.0);
+ }
+ }
+
if (stats->nr_unknown_events != 0) {
ui__warning("Found %u unknown events!\n\n"
"Is this an older tool processing a perf.data "
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 7f282ad..c307dd4 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -43,6 +43,7 @@ struct perf_tool {
fork,
exit,
lost,
+ lost_samples,
aux,
itrace_start,
throttle,