Commit 9ecda41acb97 ("perf/core: Add ::write_backward attribute to
perf event") introduces backward ring buffer. This 5 patches add basic
support for reading from it, and add a new test case for it.
Wang Nan (5):
perf tools: Enforce ring buffer reading
perf tools: Extract __perf_evlist__mmap_read()
perf tools: Rename variable in __perf_evlist__mmap_read()
perf tools: Support reading from backward ring buffer
perf tests: Add test to check backward ring buffer
tools/perf/tests/Build | 1 +
tools/perf/tests/backward-ring-buffer.c | 151 ++++++++++++++++++++++++++++++++
tools/perf/tests/builtin-test.c | 4 +
tools/perf/tests/tests.h | 1 +
tools/perf/util/evlist.c | 104 +++++++++++++++++-----
tools/perf/util/evlist.h | 4 +
6 files changed, 242 insertions(+), 23 deletions(-)
create mode 100644 tools/perf/tests/backward-ring-buffer.c
Signed-off-by: Wang Nan <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
--
1.8.3.4
Don't read broken data after 'head' pointer.
Following commits will feed perf_evlist__mmap_read() with some 'head'
pointers not maintained by kernel. If 'head' pointer breaks an event,
we should avoid reading from the broken event. This can happen in
backward ring buffer.
For example:
old head
| |
V V
+---+------+----------+----+-----+--+
|..E|D....D|C........C|B..B|A....|E.|
+---+------+----------+----+-----+--+
'old' pointer points to the beginning of 'A' and trying read from it,
but 'A' has been overwritten. In this case, don't try to read from 'A',
simply return NULL.
Signed-off-by: Wang Nan <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
---
tools/perf/util/evlist.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6fb5725..85271e5 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -684,6 +684,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
struct perf_mmap *md = &evlist->mmap[idx];
u64 head;
u64 old = md->prev;
+ int diff;
unsigned char *data = md->base + page_size;
union perf_event *event = NULL;
@@ -694,6 +695,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
return NULL;
head = perf_mmap__read_head(md);
+ diff = head - old;
if (evlist->overwrite) {
/*
* If we're further behind than half the buffer, there's a chance
@@ -703,7 +705,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
*
* In either case, truncate and restart at head.
*/
- int diff = head - old;
if (diff > md->mask / 2 || diff < 0) {
fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
@@ -711,15 +712,21 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
* head points to a known good entry, start there.
*/
old = head;
+ diff = 0;
}
}
- if (old != head) {
+ if (diff >= (int)sizeof(event->header)) {
size_t size;
event = (union perf_event *)&data[old & md->mask];
size = event->header.size;
+ if (size < sizeof(event->header) || diff < (int)size) {
+ event = NULL;
+ goto broken_event;
+ }
+
/*
* Event straddles the mmap boundary -- header should always
* be inside due to u64 alignment of output.
@@ -743,6 +750,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
old += size;
}
+broken_event:
md->prev = old;
return event;
--
1.8.3.4
perf_evlist__mmap_read_backward() is introduced for reading backward
ring buffer. Different from reading forward, before reading, caller
needs to call perf_evlist__mmap_read_catchup() first.
Backward ring buffer should be read from 'head' pointer, not '0'.
perf_evlist__mmap_read_catchup() saves 'head' to 'md->prev', then
make it remember the start position after each reading.
Signed-off-by: Wang Nan <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
---
tools/perf/util/evlist.c | 39 +++++++++++++++++++++++++++++++++++++++
tools/perf/util/evlist.h | 4 ++++
2 files changed, 43 insertions(+)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index f680983..b0f1d8f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -767,6 +767,45 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
head, &md->prev);
}
+union perf_event *
+perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
+{
+ struct perf_mmap *md = &evlist->mmap[idx];
+ u64 head, end;
+ u64 start = md->prev;
+
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!atomic_read(&md->refcnt))
+ return NULL;
+
+ /* NOTE: head is negative in this case */
+ head = perf_mmap__read_head(md);
+
+ if (!head)
+ return NULL;
+
+ end = head + md->mask + 1;
+
+ if ((end - head) > -head)
+ end = 0;
+
+ return __perf_evlist__mmap_read(md, false, start, end, &md->prev);
+}
+
+void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
+{
+ struct perf_mmap *md = &evlist->mmap[idx];
+ u64 head;
+
+ if (!atomic_read(&md->refcnt))
+ return;
+
+ head = perf_mmap__read_head(md);
+ md->prev = head;
+}
+
static bool perf_mmap__empty(struct perf_mmap *md)
{
return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 208897a..85d1b59 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -129,6 +129,10 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
+union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
+ int idx);
+void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
+
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
int perf_evlist__open(struct perf_evlist *evlist);
--
1.8.3.4
Extract event reader to __perf_evlist__mmap_read(). Future commit will
feed it with manually computed 'head' and 'old' pointers.
Signed-off-by: Wang Nan <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
---
tools/perf/util/evlist.c | 40 +++++++++++++++++++++++++---------------
1 file changed, 25 insertions(+), 15 deletions(-)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 85271e5..3ee94b2 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -679,24 +679,15 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
return NULL;
}
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+static union perf_event *
+__perf_evlist__mmap_read(struct perf_mmap *md, bool overwrite, u64 head,
+ u64 old, u64 *prev)
{
- struct perf_mmap *md = &evlist->mmap[idx];
- u64 head;
- u64 old = md->prev;
- int diff;
unsigned char *data = md->base + page_size;
union perf_event *event = NULL;
+ int diff = head - old;
- /*
- * Check if event was unmapped due to a POLLHUP/POLLERR.
- */
- if (!atomic_read(&md->refcnt))
- return NULL;
-
- head = perf_mmap__read_head(md);
- diff = head - old;
- if (evlist->overwrite) {
+ if (overwrite) {
/*
* If we're further behind than half the buffer, there's a chance
* the writer will bite our tail and mess up the samples under us.
@@ -751,11 +742,30 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
}
broken_event:
- md->prev = old;
+ if (prev)
+ *prev = old;
return event;
}
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+{
+ struct perf_mmap *md = &evlist->mmap[idx];
+ u64 head;
+ u64 old = md->prev;
+
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!atomic_read(&md->refcnt))
+ return NULL;
+
+ head = perf_mmap__read_head(md);
+
+ return __perf_evlist__mmap_read(md, evlist->overwrite, head,
+ old, &md->prev);
+}
+
static bool perf_mmap__empty(struct perf_mmap *md)
{
return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
--
1.8.3.4
In __perf_evlist__mmap_read(), give better names to pointers. Origianl
name 'old' and 'head' directly related to pointers in ring buffer
control page. For backward ring buffer, the meaning of 'head' point
is not 'the first byte of free space', but 'the first byte of the last
record'. To reduce confusion, rename 'old' to 'start', 'head' to 'end'.
'start' -> 'end' is the records should be read from.
Change parameter order.
Change 'overwrite' to 'check_messup'. When reading from 'head', no
need to check messup for for backward ring buffer.
Signed-off-by: Wang Nan <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
---
tools/perf/util/evlist.c | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 3ee94b2..f680983 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -679,30 +679,31 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
return NULL;
}
+/* When check_messup is true, 'end' must points to a known good entry */
static union perf_event *
-__perf_evlist__mmap_read(struct perf_mmap *md, bool overwrite, u64 head,
- u64 old, u64 *prev)
+__perf_evlist__mmap_read(struct perf_mmap *md, bool check_messup, u64 start,
+ u64 end, u64 *prev)
{
unsigned char *data = md->base + page_size;
union perf_event *event = NULL;
- int diff = head - old;
+ int diff = end - start;
- if (overwrite) {
+ if (check_messup) {
/*
* If we're further behind than half the buffer, there's a chance
* the writer will bite our tail and mess up the samples under us.
*
- * If we somehow ended up ahead of the head, we got messed up.
+ * If we somehow ended up ahead of the 'end', we got messed up.
*
- * In either case, truncate and restart at head.
+ * In either case, truncate and restart at 'end'.
*/
if (diff > md->mask / 2 || diff < 0) {
fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
/*
- * head points to a known good entry, start there.
+ * 'end' points to a known good entry, start there.
*/
- old = head;
+ start = end;
diff = 0;
}
}
@@ -710,7 +711,7 @@ __perf_evlist__mmap_read(struct perf_mmap *md, bool overwrite, u64 head,
if (diff >= (int)sizeof(event->header)) {
size_t size;
- event = (union perf_event *)&data[old & md->mask];
+ event = (union perf_event *)&data[start & md->mask];
size = event->header.size;
if (size < sizeof(event->header) || diff < (int)size) {
@@ -722,8 +723,8 @@ __perf_evlist__mmap_read(struct perf_mmap *md, bool overwrite, u64 head,
* Event straddles the mmap boundary -- header should always
* be inside due to u64 alignment of output.
*/
- if ((old & md->mask) + size != ((old + size) & md->mask)) {
- unsigned int offset = old;
+ if ((start & md->mask) + size != ((start + size) & md->mask)) {
+ unsigned int offset = start;
unsigned int len = min(sizeof(*event), size), cpy;
void *dst = md->event_copy;
@@ -738,12 +739,12 @@ __perf_evlist__mmap_read(struct perf_mmap *md, bool overwrite, u64 head,
event = (union perf_event *) md->event_copy;
}
- old += size;
+ start += size;
}
broken_event:
if (prev)
- *prev = old;
+ *prev = start;
return event;
}
@@ -762,8 +763,8 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
head = perf_mmap__read_head(md);
- return __perf_evlist__mmap_read(md, evlist->overwrite, head,
- old, &md->prev);
+ return __perf_evlist__mmap_read(md, evlist->overwrite, old,
+ head, &md->prev);
}
static bool perf_mmap__empty(struct perf_mmap *md)
--
1.8.3.4
This test checks reading from backward ring buffer.
Test result:
# ~/perf test 'ring buffer'
45: Test backward reading from ring buffer : Ok
Test case is a while loop which calls prctl(PR_SET_NAME) multiple
times. Each prctl should issue 2 events: one PERF_RECORD_SAMPLE,
one PERF_RECORD_COMM.
The first round creates a relative large ring buffer (256 pages). It
can afford all events. Read from it and check the count of each type of
events.
The second round creates a small ring buffer (1 page) and makes it
overwritable. Check the correctness of the buffer.
Signed-off-by: Wang Nan <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
---
tools/perf/tests/Build | 1 +
tools/perf/tests/backward-ring-buffer.c | 151 ++++++++++++++++++++++++++++++++
tools/perf/tests/builtin-test.c | 4 +
tools/perf/tests/tests.h | 1 +
4 files changed, 157 insertions(+)
create mode 100644 tools/perf/tests/backward-ring-buffer.c
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 449fe97..66a2898 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -38,6 +38,7 @@ perf-y += cpumap.o
perf-y += stat.o
perf-y += event_update.o
perf-y += event-times.o
+perf-y += backward-ring-buffer.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
new file mode 100644
index 0000000..d9ba991
--- /dev/null
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -0,0 +1,151 @@
+/*
+ * Test backward bit in event attribute, read ring buffer from end to
+ * beginning
+ */
+
+#include <perf.h>
+#include <evlist.h>
+#include <sys/prctl.h>
+#include "tests.h"
+#include "debug.h"
+
+#define NR_ITERS 111
+
+static void testcase(void)
+{
+ int i;
+
+ for (i = 0; i < NR_ITERS; i++) {
+ char proc_name[10];
+
+ snprintf(proc_name, sizeof(proc_name), "p:%d\n", i);
+ prctl(PR_SET_NAME, proc_name);
+ }
+}
+
+static int count_samples(struct perf_evlist *evlist, int *sample_count,
+ int *comm_count)
+{
+ int i;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ union perf_event *event;
+
+ perf_evlist__mmap_read_catchup(evlist, i);
+ while ((event = perf_evlist__mmap_read_backward(evlist, i)) != NULL) {
+ const u32 type = event->header.type;
+
+ switch (type) {
+ case PERF_RECORD_SAMPLE:
+ (*sample_count)++;
+ break;
+ case PERF_RECORD_COMM:
+ (*comm_count)++;
+ break;
+ default:
+ pr_err("Unexpected record of type %d\n", type);
+ return TEST_FAIL;
+ }
+ }
+ }
+ return TEST_OK;
+}
+
+static int do_test(struct perf_evlist *evlist, int mmap_pages,
+ int *sample_count, int *comm_count)
+{
+ int err;
+ char sbuf[STRERR_BUFSIZE];
+
+ err = perf_evlist__mmap(evlist, mmap_pages, true);
+ if (err < 0) {
+ pr_debug("perf_evlist__mmap: %s\n",
+ strerror_r(errno, sbuf, sizeof(sbuf)));
+ return TEST_FAIL;
+ }
+
+ perf_evlist__enable(evlist);
+ testcase();
+ perf_evlist__disable(evlist);
+
+ err = count_samples(evlist, sample_count, comm_count);
+ perf_evlist__munmap(evlist);
+ return err;
+}
+
+
+int test__backward_ring_buffer(int subtest __maybe_unused)
+{
+ int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0;
+ char pid[16], sbuf[STRERR_BUFSIZE];
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel __maybe_unused;
+ struct parse_events_error parse_error;
+ struct record_opts opts = {
+ .target = {
+ .uid = UINT_MAX,
+ .uses_mmap = true,
+ },
+ .freq = 0,
+ .mmap_pages = 256,
+ .default_interval = 1,
+ };
+
+ snprintf(pid, sizeof(pid), "%d", getpid());
+ pid[sizeof(pid) - 1] = '\0';
+ opts.target.tid = opts.target.pid = pid;
+
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ pr_debug("No ehough memory to create evlist\n");
+ return TEST_FAIL;
+ }
+
+ err = perf_evlist__create_maps(evlist, &opts.target);
+ if (err < 0) {
+ pr_debug("Not enough memory to create thread/cpu maps\n");
+ goto out_delete_evlist;
+ }
+
+ bzero(&parse_error, sizeof(parse_error));
+ err = parse_events(evlist, "syscalls:sys_enter_prctl", &parse_error);
+ if (err) {
+ pr_debug("Failed to parse tracepoint event, try use root\n");
+ ret = TEST_SKIP;
+ goto out_delete_evlist;
+ }
+
+ perf_evlist__config(evlist, &opts, NULL);
+
+ /* Set backward bit, ring buffer should be writing from end */
+ evlist__for_each(evlist, evsel)
+ evsel->attr.write_backward = 1;
+
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ pr_debug("perf_evlist__open: %s\n",
+ strerror_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ ret = TEST_FAIL;
+ err = do_test(evlist, opts.mmap_pages, &sample_count,
+ &comm_count);
+ if (err != TEST_OK)
+ goto out_delete_evlist;
+
+ if ((sample_count != NR_ITERS) || (comm_count != NR_ITERS)) {
+ pr_err("Unexpected counter: sample_count=%d, comm_count=%d\n",
+ sample_count, comm_count);
+ goto out_delete_evlist;
+ }
+
+ err = do_test(evlist, 1, &sample_count, &comm_count);
+ if (err != TEST_OK)
+ goto out_delete_evlist;
+
+ ret = TEST_OK;
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+ return ret;
+}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 93c4670..0e95c20 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -208,6 +208,10 @@ static struct test generic_tests[] = {
.func = test__event_times,
},
{
+ .desc = "Test backward reading from ring buffer",
+ .func = test__backward_ring_buffer,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 0fc9469..c57e72c 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -86,6 +86,7 @@ int test__synthesize_stat(int subtest);
int test__synthesize_stat_round(int subtest);
int test__event_update(int subtest);
int test__event_times(int subtest);
+int test__backward_ring_buffer(int subtest);
#if defined(__arm__) || defined(__aarch64__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
--
1.8.3.4
Em Tue, Apr 26, 2016 at 02:28:55AM +0000, Wang Nan escreveu:
> Extract event reader to __perf_evlist__mmap_read(). Future commit will
> feed it with manually computed 'head' and 'old' pointers.
>
> Signed-off-by: Wang Nan <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Zefan Li <[email protected]>
> Cc: [email protected]
> ---
> tools/perf/util/evlist.c | 40 +++++++++++++++++++++++++---------------
> 1 file changed, 25 insertions(+), 15 deletions(-)
>
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 85271e5..3ee94b2 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -679,24 +679,15 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
> return NULL;
> }
>
> -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
> +static union perf_event *
> +__perf_evlist__mmap_read(struct perf_mmap *md, bool overwrite, u64 head,
> + u64 old, u64 *prev)
> {
Ok, now this is not a perf_evlist method anymore, see the first
parameter? It handles perf_mmap instances, so please rename it to:
static union perf_event *perf_mmap__read(struct perf_mmap *md, bool overwrite,
u64 head, u64 *prev)
And thanks for leaving the renaming of those variables to the next
patch, that eases reviewieng indeed.
> - struct perf_mmap *md = &evlist->mmap[idx];
> - u64 head;
> - u64 old = md->prev;
>
> +union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
> +{
> + struct perf_mmap *md = &evlist->mmap[idx];
> + u64 head;
> + u64 old = md->prev;
> +
> + if (!atomic_read(&md->refcnt))
> + return NULL;
> +
> + head = perf_mmap__read_head(md);
> +
> + return __perf_evlist__mmap_read(md, evlist->overwrite, head,
> + old, &md->prev);
See that perf_mmap__read_head()? It acts on a struct perf_mmap too, thus
the prefix. This will end up as:
head = perf_mmap__read_head(md);
return perf_mmap__read(md, evlist->overwrite, head, old, &md->prev);
And see below yet another perf_mmap method, perf_mmap__empty()
Regards,
- Arnaldo
> +}
> +
> static bool perf_mmap__empty(struct perf_mmap *md)
> {
> return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
> --
> 1.8.3.4
Em Tue, Apr 26, 2016 at 02:28:54AM +0000, Wang Nan escreveu:
> Don't read broken data after 'head' pointer.
>
> Following commits will feed perf_evlist__mmap_read() with some 'head'
> pointers not maintained by kernel. If 'head' pointer breaks an event,
> we should avoid reading from the broken event. This can happen in
> backward ring buffer.
Looks good, applied.
- Arnaldo
> For example:
>
> old head
> | |
> V V
> +---+------+----------+----+-----+--+
> |..E|D....D|C........C|B..B|A....|E.|
> +---+------+----------+----+-----+--+
>
> 'old' pointer points to the beginning of 'A' and trying read from it,
> but 'A' has been overwritten. In this case, don't try to read from 'A',
> simply return NULL.
>
> Signed-off-by: Wang Nan <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Zefan Li <[email protected]>
> Cc: [email protected]
> ---
> tools/perf/util/evlist.c | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 6fb5725..85271e5 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -684,6 +684,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
> struct perf_mmap *md = &evlist->mmap[idx];
> u64 head;
> u64 old = md->prev;
> + int diff;
> unsigned char *data = md->base + page_size;
> union perf_event *event = NULL;
>
> @@ -694,6 +695,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
> return NULL;
>
> head = perf_mmap__read_head(md);
> + diff = head - old;
> if (evlist->overwrite) {
> /*
> * If we're further behind than half the buffer, there's a chance
> @@ -703,7 +705,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
> *
> * In either case, truncate and restart at head.
> */
> - int diff = head - old;
> if (diff > md->mask / 2 || diff < 0) {
> fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
>
> @@ -711,15 +712,21 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
> * head points to a known good entry, start there.
> */
> old = head;
> + diff = 0;
> }
> }
>
> - if (old != head) {
> + if (diff >= (int)sizeof(event->header)) {
> size_t size;
>
> event = (union perf_event *)&data[old & md->mask];
> size = event->header.size;
>
> + if (size < sizeof(event->header) || diff < (int)size) {
> + event = NULL;
> + goto broken_event;
> + }
> +
> /*
> * Event straddles the mmap boundary -- header should always
> * be inside due to u64 alignment of output.
> @@ -743,6 +750,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
> old += size;
> }
>
> +broken_event:
> md->prev = old;
>
> return event;
> --
> 1.8.3.4
Commit-ID: b04b7023751bf6519eee64467b6477f0e7fb82a1
Gitweb: http://git.kernel.org/tip/b04b7023751bf6519eee64467b6477f0e7fb82a1
Author: Wang Nan <[email protected]>
AuthorDate: Tue, 26 Apr 2016 02:28:54 +0000
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Tue, 26 Apr 2016 10:56:08 -0300
perf evlist: Enforce ring buffer reading
Don't read broken data after 'head' pointer.
Following commits will feed perf_evlist__mmap_read() with some 'head'
pointers not maintained by kernel. If 'head' pointer breaks an event, we
should avoid reading from the broken event. This can happen in backward
ring buffer.
For example:
old head
| |
V V
+---+------+----------+----+-----+--+
|..E|D....D|C........C|B..B|A....|E.|
+---+------+----------+----+-----+--+
'old' pointer points to the beginning of 'A' and trying read from it,
but 'A' has been overwritten. In this case, don't try to read from 'A',
simply return NULL.
Signed-off-by: Wang Nan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/evlist.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6fb5725..85271e5 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -684,6 +684,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
struct perf_mmap *md = &evlist->mmap[idx];
u64 head;
u64 old = md->prev;
+ int diff;
unsigned char *data = md->base + page_size;
union perf_event *event = NULL;
@@ -694,6 +695,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
return NULL;
head = perf_mmap__read_head(md);
+ diff = head - old;
if (evlist->overwrite) {
/*
* If we're further behind than half the buffer, there's a chance
@@ -703,7 +705,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
*
* In either case, truncate and restart at head.
*/
- int diff = head - old;
if (diff > md->mask / 2 || diff < 0) {
fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
@@ -711,15 +712,21 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
* head points to a known good entry, start there.
*/
old = head;
+ diff = 0;
}
}
- if (old != head) {
+ if (diff >= (int)sizeof(event->header)) {
size_t size;
event = (union perf_event *)&data[old & md->mask];
size = event->header.size;
+ if (size < sizeof(event->header) || diff < (int)size) {
+ event = NULL;
+ goto broken_event;
+ }
+
/*
* Event straddles the mmap boundary -- header should always
* be inside due to u64 alignment of output.
@@ -743,6 +750,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
old += size;
}
+broken_event:
md->prev = old;
return event;