2017-12-04 16:53:20

by Wang Nan

[permalink] [raw]
Subject: [PATCH v3 0/3] perf tools: perf tools: Clarify overwrite and backward, bugfix

Simplify patch 1/3 following Namhyung's suggestion.

Context adjustment for patch 2 and 3.

Wang Nan (3):
perf mmap: Fix perf backward recording
perf tools: Don't discard prev in backward mode
perf tools: Replace 'backward' to 'overwrite' in evlist. mmap and
record

tools/perf/builtin-record.c | 14 +++++-----
tools/perf/tests/backward-ring-buffer.c | 4 +--
tools/perf/util/evlist.c | 41 +++++++++++++++++-------------
tools/perf/util/evlist.h | 2 +-
tools/perf/util/mmap.c | 45 +++++++++++++++------------------
5 files changed, 54 insertions(+), 52 deletions(-)

--
2.10.1


2017-12-04 16:53:14

by Wang Nan

[permalink] [raw]
Subject: [PATCH v3 2/3] perf tools: Don't discard prev in backward mode

Perf record can switch output. The new output should only store the
data after switching. However, in overwrite backward mode, the new
output still have the data from old output. That also brings extra
overhead.

At the end of mmap_read, the position of processed ring buffer is
saved in md->prev. Next mmap_read should be end in md->prev if it is
not overwriten. That avoids to process duplicate data.
However, the md->prev is discarded. So next mmap_read has to process
whole valid ring buffer, which probably include the old processed
data.

Avoid calling backward_rb_find_range() when md->prev is still
available.

Signed-off-by: Wang Nan <[email protected]>
Tested-by: Kan Liang <[email protected]>
---
tools/perf/util/mmap.c | 33 +++++++++++++++------------------
1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 3f262e7..5f8cb15 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -267,18 +267,6 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
return -1;
}

-static int rb_find_range(void *data, int mask, u64 head, u64 old,
- u64 *start, u64 *end, bool backward)
-{
- if (!backward) {
- *start = old;
- *end = head;
- return 0;
- }
-
- return backward_rb_find_range(data, mask, head, start, end);
-}
-
int perf_mmap__push(struct perf_mmap *md, bool backward,
void *to, int push(void *to, void *buf, size_t size))
{
@@ -290,19 +278,28 @@ int perf_mmap__push(struct perf_mmap *md, bool backward,
void *buf;
int rc = 0;

- if (rb_find_range(data, md->mask, head, old, &start, &end, backward))
- return -1;
+ start = backward ? head : old;
+ end = backward ? old : head;

if (start == end)
return 0;

size = end - start;
if (size > (unsigned long)(md->mask) + 1) {
- WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
+ if (!backward) {
+ WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");

- md->prev = head;
- perf_mmap__consume(md, backward);
- return 0;
+ md->prev = head;
+ perf_mmap__consume(md, backward);
+ return 0;
+ }
+
+ /*
+ * Backward ring buffer is full. We still have a chance to read
+ * most of data from it.
+ */
+ if (backward_rb_find_range(data, md->mask, head, &start, &end))
+ return -1;
}

if ((start & md->mask) + size != (end & md->mask)) {
--
2.10.1

2017-12-04 16:53:17

by Wang Nan

[permalink] [raw]
Subject: [PATCH v3 3/3] perf tools: Replace 'backward' to 'overwrite' in evlist. mmap and record

Remove the backward/forward concept to make it uniform with user
interface (the '--overwrite' option).

Signed-off-by: Wang Nan <[email protected]>
---
tools/perf/builtin-record.c | 14 +++++++-------
tools/perf/tests/backward-ring-buffer.c | 4 ++--
tools/perf/util/evlist.c | 30 +++++++++++++++---------------
tools/perf/util/evlist.h | 2 +-
tools/perf/util/mmap.c | 22 +++++++++++-----------
5 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 26b8571..0a5749e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -479,7 +479,7 @@ static struct perf_event_header finished_round_event = {
};

static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
- bool backward)
+ bool overwrite)
{
u64 bytes_written = rec->bytes_written;
int i;
@@ -489,18 +489,18 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (!evlist)
return 0;

- maps = backward ? evlist->backward_mmap : evlist->mmap;
+ maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
if (!maps)
return 0;

- if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
+ if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
return 0;

for (i = 0; i < evlist->nr_mmaps; i++) {
struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;

if (maps[i].base) {
- if (perf_mmap__push(&maps[i], backward, rec, record__pushfn) != 0) {
+ if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) {
rc = -1;
goto out;
}
@@ -520,7 +520,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (bytes_written != rec->bytes_written)
rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));

- if (backward)
+ if (overwrite)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
out:
return rc;
@@ -692,8 +692,8 @@ perf_evlist__pick_pc(struct perf_evlist *evlist)
if (evlist) {
if (evlist->mmap && evlist->mmap[0].base)
return evlist->mmap[0].base;
- if (evlist->backward_mmap && evlist->backward_mmap[0].base)
- return evlist->backward_mmap[0].base;
+ if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
+ return evlist->overwrite_mmap[0].base;
}
return NULL;
}
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index cf37e43..4035d43 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -33,8 +33,8 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
for (i = 0; i < evlist->nr_mmaps; i++) {
union perf_event *event;

- perf_mmap__read_catchup(&evlist->backward_mmap[i]);
- while ((event = perf_mmap__read_backward(&evlist->backward_mmap[i])) != NULL) {
+ perf_mmap__read_catchup(&evlist->overwrite_mmap[i]);
+ while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) {
const u32 type = event->header.type;

switch (type) {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index b1cea71..3570355 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -125,7 +125,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
zfree(&evlist->mmap);
- zfree(&evlist->backward_mmap);
+ zfree(&evlist->overwrite_mmap);
fdarray__exit(&evlist->pollfd);
}

@@ -675,11 +675,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
{
int i;

- if (!evlist->backward_mmap)
+ if (!evlist->overwrite_mmap)
return 0;

for (i = 0; i < evlist->nr_mmaps; i++) {
- int fd = evlist->backward_mmap[i].fd;
+ int fd = evlist->overwrite_mmap[i].fd;
int err;

if (fd < 0)
@@ -749,16 +749,16 @@ static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
for (i = 0; i < evlist->nr_mmaps; i++)
perf_mmap__munmap(&evlist->mmap[i]);

- if (evlist->backward_mmap)
+ if (evlist->overwrite_mmap)
for (i = 0; i < evlist->nr_mmaps; i++)
- perf_mmap__munmap(&evlist->backward_mmap[i]);
+ perf_mmap__munmap(&evlist->overwrite_mmap[i]);
}

void perf_evlist__munmap(struct perf_evlist *evlist)
{
perf_evlist__munmap_nofree(evlist);
zfree(&evlist->mmap);
- zfree(&evlist->backward_mmap);
+ zfree(&evlist->overwrite_mmap);
}

static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
@@ -800,7 +800,7 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,

static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int cpu_idx,
- int thread, int *_output, int *_output_backward)
+ int thread, int *_output, int *_output_overwrite)
{
struct perf_evsel *evsel;
int revent;
@@ -814,14 +814,14 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

mp->prot = PROT_READ | PROT_WRITE;
if (evsel->attr.write_backward) {
- output = _output_backward;
- maps = evlist->backward_mmap;
+ output = _output_overwrite;
+ maps = evlist->overwrite_mmap;

if (!maps) {
maps = perf_evlist__alloc_mmap(evlist);
if (!maps)
return -1;
- evlist->backward_mmap = maps;
+ evlist->overwrite_mmap = maps;
if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
}
@@ -886,14 +886,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
pr_debug2("perf event ring buffer mmapped per cpu\n");
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
- int output_backward = -1;
+ int output_overwrite = -1;

auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
true);

for (thread = 0; thread < nr_threads; thread++) {
if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
- thread, &output, &output_backward))
+ thread, &output, &output_overwrite))
goto out_unmap;
}
}
@@ -914,13 +914,13 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
pr_debug2("perf event ring buffer mmapped per thread\n");
for (thread = 0; thread < nr_threads; thread++) {
int output = -1;
- int output_backward = -1;
+ int output_overwrite = -1;

auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
false);

if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
- &output, &output_backward))
+ &output, &output_overwrite))
goto out_unmap;
}

@@ -1753,7 +1753,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
RESUME,
} action = NONE;

- if (!evlist->backward_mmap)
+ if (!evlist->overwrite_mmap)
return;

switch (old_state) {
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index eec3377..7516066 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -44,7 +44,7 @@ struct perf_evlist {
} workload;
struct fdarray pollfd;
struct perf_mmap *mmap;
- struct perf_mmap *backward_mmap;
+ struct perf_mmap *overwrite_mmap;
struct thread_map *threads;
struct cpu_map *cpus;
struct perf_evsel *selected;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 5f8cb15..05076e6 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -234,18 +234,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
return 0;
}

-static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
+static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
{
struct perf_event_header *pheader;
u64 evt_head = head;
int size = mask + 1;

- pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
+ pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
pheader = (struct perf_event_header *)(buf + (head & mask));
*start = head;
while (true) {
if (evt_head - head >= (unsigned int)size) {
- pr_debug("Finished reading backward ring buffer: rewind\n");
+ pr_debug("Finished reading overwrite ring buffer: rewind\n");
if (evt_head - head > (unsigned int)size)
evt_head -= pheader->size;
*end = evt_head;
@@ -255,7 +255,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
pheader = (struct perf_event_header *)(buf + (evt_head & mask));

if (pheader->size == 0) {
- pr_debug("Finished reading backward ring buffer: get start\n");
+ pr_debug("Finished reading overwrite ring buffer: get start\n");
*end = evt_head;
return 0;
}
@@ -267,7 +267,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
return -1;
}

-int perf_mmap__push(struct perf_mmap *md, bool backward,
+int perf_mmap__push(struct perf_mmap *md, bool overwrite,
void *to, int push(void *to, void *buf, size_t size))
{
u64 head = perf_mmap__read_head(md);
@@ -278,19 +278,19 @@ int perf_mmap__push(struct perf_mmap *md, bool backward,
void *buf;
int rc = 0;

- start = backward ? head : old;
- end = backward ? old : head;
+ start = overwrite ? head : old;
+ end = overwrite ? old : head;

if (start == end)
return 0;

size = end - start;
if (size > (unsigned long)(md->mask) + 1) {
- if (!backward) {
+ if (!overwrite) {
WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");

md->prev = head;
- perf_mmap__consume(md, backward);
+ perf_mmap__consume(md, overwrite);
return 0;
}

@@ -298,7 +298,7 @@ int perf_mmap__push(struct perf_mmap *md, bool backward,
* Backward ring buffer is full. We still have a chance to read
* most of data from it.
*/
- if (backward_rb_find_range(data, md->mask, head, &start, &end))
+ if (overwrite_rb_find_range(data, md->mask, head, &start, &end))
return -1;
}

@@ -323,7 +323,7 @@ int perf_mmap__push(struct perf_mmap *md, bool backward,
}

md->prev = head;
- perf_mmap__consume(md, backward);
+ perf_mmap__consume(md, overwrite);
out:
return rc;
}
--
2.10.1

2017-12-04 16:53:25

by Wang Nan

[permalink] [raw]
Subject: [PATCH v3 1/3] perf mmap: Fix perf backward recording

perf record backward recording doesn't work as we expected: it never
overwrite when ring buffer full.

Test:

(Run a busy python printing task background like this:

while True:
print 123

send SIGUSR2 to perf to capture snapshot.)

# ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101520743 ]
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101521251 ]
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101521692 ]
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Dump perf.data.2017110101521936 ]
[ perf record: Captured and wrote 0.826 MB perf.data.<timestamp> ]

# ./perf script -i ./perf.data.2017110101520743 | head -n3
perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)
perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)
python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4
# ./perf script -i ./perf.data.2017110101521251 | head -n3
perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)
perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)
python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4
# ./perf script -i ./perf.data.2017110101521692 | head -n3
perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)
perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)
python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4

Timestamps are never change, but my background task is a dead loop, can
easily overwhelme the ring buffer.

This patch fix it by force unsetting PROT_WRITE for backward ring
buffer, so all backward ring buffer become overwrite ring buffer.

Test result:

# ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101285323 ]
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101290053 ]
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101290446 ]
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Dump perf.data.2017110101290837 ]
[ perf record: Captured and wrote 0.826 MB perf.data.<timestamp> ]
# ./perf script -i ./perf.data.2017110101285323 | head -n3
python 2545 [000] 11064.268083: raw_syscalls:sys_exit: NR 1 = 4
python 2545 [000] 11064.268084: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
python 2545 [000] 11064.268086: raw_syscalls:sys_exit: NR 1 = 4
# ./perf script -i ./perf.data.2017110101290 | head -n3
failed to open ./perf.data.2017110101290: No such file or directory
# ./perf script -i ./perf.data.2017110101290053 | head -n3
python 2545 [000] 11071.564062: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
python 2545 [000] 11071.564064: raw_syscalls:sys_exit: NR 1 = 4
python 2545 [000] 11071.564066: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
# ./perf script -i ./perf.data.2017110101290 | head -n3
perf.data.2017110101290053 perf.data.2017110101290446 perf.data.2017110101290837
# ./perf script -i ./perf.data.2017110101290446 | head -n3
sshd 1321 [000] 11075.499473: raw_syscalls:sys_exit: NR 14 = 0
sshd 1321 [000] 11075.499474: raw_syscalls:sys_enter: NR 14 (2, 7ffe98899490, 0, 8, 0, 3000)
sshd 1321 [000] 11075.499474: raw_syscalls:sys_exit: NR 14 = 0
# ./perf script -i ./perf.data.2017110101290837 | head -n3
python 2545 [000] 11079.280844: raw_syscalls:sys_exit: NR 1 = 4
python 2545 [000] 11079.280847: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
python 2545 [000] 11079.280850: raw_syscalls:sys_exit: NR 1 = 4

Signed-off-by: Wang Nan <[email protected]>
---
tools/perf/util/evlist.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 68c1f95..b1cea71 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -812,6 +812,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
int fd;
int cpu;

+ mp->prot = PROT_READ | PROT_WRITE;
if (evsel->attr.write_backward) {
output = _output_backward;
maps = evlist->backward_mmap;
@@ -824,6 +825,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
}
+ mp->prot &= ~PROT_WRITE;
}

if (evsel->system_wide && thread)
@@ -1058,9 +1060,12 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
const struct thread_map *threads = evlist->threads;
- struct mmap_params mp = {
- .prot = PROT_READ | PROT_WRITE,
- };
+ /*
+ * Delay setting mp.prot: set it before calling perf_mmap__mmap.
+ * Its value is decided by evsel's write_backward.
+ * So &mp should not be passed through const pointer.
+ */
+ struct mmap_params mp;

if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist);
--
2.10.1

2017-12-05 00:04:36

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v3 0/3] perf tools: perf tools: Clarify overwrite and backward, bugfix

On Mon, Dec 04, 2017 at 04:51:04PM +0000, Wang Nan wrote:
> Simplify patch 1/3 following Namhyung's suggestion.
>
> Context adjustment for patch 2 and 3.
>
> Wang Nan (3):
> perf mmap: Fix perf backward recording
> perf tools: Don't discard prev in backward mode
> perf tools: Replace 'backward' to 'overwrite' in evlist. mmap and
> record

Acked-by: Namhyung Kim <[email protected]>

Thanks,
Namhyung


>
> tools/perf/builtin-record.c | 14 +++++-----
> tools/perf/tests/backward-ring-buffer.c | 4 +--
> tools/perf/util/evlist.c | 41 +++++++++++++++++-------------
> tools/perf/util/evlist.h | 2 +-
> tools/perf/util/mmap.c | 45 +++++++++++++++------------------
> 5 files changed, 54 insertions(+), 52 deletions(-)
>
> --
> 2.10.1
>

Subject: [tip:perf/core] perf mmap: Fix perf backward recording

Commit-ID: 71f566a34986f4a86a8c546c7a36f70f0132b8a9
Gitweb: https://git.kernel.org/tip/71f566a34986f4a86a8c546c7a36f70f0132b8a9
Author: Wang Nan <[email protected]>
AuthorDate: Mon, 4 Dec 2017 16:51:05 +0000
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Tue, 5 Dec 2017 15:45:36 -0300

perf mmap: Fix perf backward recording

'perf record' backward recording doesn't work as we expected: it never
overwrites when ring buffer gets full.

Test:

Run a busy python printing task background like this:

while True:
print 123

send SIGUSR2 to perf to capture snapshot, then:

# ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101520743 ]
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101521251 ]
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101521692 ]
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Dump perf.data.2017110101521936 ]
[ perf record: Captured and wrote 0.826 MB perf.data.<timestamp> ]

# ./perf script -i ./perf.data.2017110101520743 | head -n3
perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)
perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)
python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4
# ./perf script -i ./perf.data.2017110101521251 | head -n3
perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)
perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)
python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4
# ./perf script -i ./perf.data.2017110101521692 | head -n3
perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)
perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)
python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4

Timestamps never change, but my background task is a dead loop, can
easily overwhelm the ring buffer.

This patch fixes it by forcing unsetting PROT_WRITE for a backward ring
buffer, so all backward ring buffers become overwrite ring buffers.

Test result:

# ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101285323 ]
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101290053 ]
[ perf record: dump data: Woken up 1 times ]
[ perf record: Dump perf.data.2017110101290446 ]
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Dump perf.data.2017110101290837 ]
[ perf record: Captured and wrote 0.826 MB perf.data.<timestamp> ]
# ./perf script -i ./perf.data.2017110101285323 | head -n3
python 2545 [000] 11064.268083: raw_syscalls:sys_exit: NR 1 = 4
python 2545 [000] 11064.268084: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
python 2545 [000] 11064.268086: raw_syscalls:sys_exit: NR 1 = 4
# ./perf script -i ./perf.data.2017110101290 | head -n3
failed to open ./perf.data.2017110101290: No such file or directory
# ./perf script -i ./perf.data.2017110101290053 | head -n3
python 2545 [000] 11071.564062: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
python 2545 [000] 11071.564064: raw_syscalls:sys_exit: NR 1 = 4
python 2545 [000] 11071.564066: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
# ./perf script -i ./perf.data.2017110101290 | head -n3
perf.data.2017110101290053 perf.data.2017110101290446 perf.data.2017110101290837
# ./perf script -i ./perf.data.2017110101290446 | head -n3
sshd 1321 [000] 11075.499473: raw_syscalls:sys_exit: NR 14 = 0
sshd 1321 [000] 11075.499474: raw_syscalls:sys_enter: NR 14 (2, 7ffe98899490, 0, 8, 0, 3000)
sshd 1321 [000] 11075.499474: raw_syscalls:sys_exit: NR 14 = 0
# ./perf script -i ./perf.data.2017110101290837 | head -n3
python 2545 [000] 11079.280844: raw_syscalls:sys_exit: NR 1 = 4
python 2545 [000] 11079.280847: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
python 2545 [000] 11079.280850: raw_syscalls:sys_exit: NR 1 = 4

Signed-off-by: Wang Nan <[email protected]>
Acked-by: Namhyung Kim <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Mengting Zhang <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/evlist.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 68c1f95..b1cea71 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -812,6 +812,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
int fd;
int cpu;

+ mp->prot = PROT_READ | PROT_WRITE;
if (evsel->attr.write_backward) {
output = _output_backward;
maps = evlist->backward_mmap;
@@ -824,6 +825,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
}
+ mp->prot &= ~PROT_WRITE;
}

if (evsel->system_wide && thread)
@@ -1058,9 +1060,12 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
const struct thread_map *threads = evlist->threads;
- struct mmap_params mp = {
- .prot = PROT_READ | PROT_WRITE,
- };
+ /*
+ * Delay setting mp.prot: set it before calling perf_mmap__mmap.
+ * Its value is decided by evsel's write_backward.
+ * So &mp should not be passed through const pointer.
+ */
+ struct mmap_params mp;

if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist);

Subject: [tip:perf/core] perf tools: Rename 'backward' to 'overwrite' in evlist, mmap and record

Commit-ID: 0b72d69a542873ee098867deeb37d27ad4629c64
Gitweb: https://git.kernel.org/tip/0b72d69a542873ee098867deeb37d27ad4629c64
Author: Wang Nan <[email protected]>
AuthorDate: Mon, 4 Dec 2017 16:51:07 +0000
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Tue, 5 Dec 2017 16:02:39 -0300

perf tools: Rename 'backward' to 'overwrite' in evlist, mmap and record

Remove the backward/forward concept to make it uniform with user
interface (the '--overwrite' option).

Signed-off-by: Wang Nan <[email protected]>
Acked-by: Namhyung Kim <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Mengting Zhang <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-record.c | 14 +++++++-------
tools/perf/tests/backward-ring-buffer.c | 4 ++--
tools/perf/util/evlist.c | 30 +++++++++++++++---------------
tools/perf/util/evlist.h | 2 +-
tools/perf/util/mmap.c | 22 +++++++++++-----------
5 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 26b8571..0a5749e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -479,7 +479,7 @@ static struct perf_event_header finished_round_event = {
};

static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
- bool backward)
+ bool overwrite)
{
u64 bytes_written = rec->bytes_written;
int i;
@@ -489,18 +489,18 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (!evlist)
return 0;

- maps = backward ? evlist->backward_mmap : evlist->mmap;
+ maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
if (!maps)
return 0;

- if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
+ if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
return 0;

for (i = 0; i < evlist->nr_mmaps; i++) {
struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;

if (maps[i].base) {
- if (perf_mmap__push(&maps[i], backward, rec, record__pushfn) != 0) {
+ if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) {
rc = -1;
goto out;
}
@@ -520,7 +520,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (bytes_written != rec->bytes_written)
rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));

- if (backward)
+ if (overwrite)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
out:
return rc;
@@ -692,8 +692,8 @@ perf_evlist__pick_pc(struct perf_evlist *evlist)
if (evlist) {
if (evlist->mmap && evlist->mmap[0].base)
return evlist->mmap[0].base;
- if (evlist->backward_mmap && evlist->backward_mmap[0].base)
- return evlist->backward_mmap[0].base;
+ if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
+ return evlist->overwrite_mmap[0].base;
}
return NULL;
}
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index cf37e43..4035d43 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -33,8 +33,8 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
for (i = 0; i < evlist->nr_mmaps; i++) {
union perf_event *event;

- perf_mmap__read_catchup(&evlist->backward_mmap[i]);
- while ((event = perf_mmap__read_backward(&evlist->backward_mmap[i])) != NULL) {
+ perf_mmap__read_catchup(&evlist->overwrite_mmap[i]);
+ while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) {
const u32 type = event->header.type;

switch (type) {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index b1cea71..3570355 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -125,7 +125,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
zfree(&evlist->mmap);
- zfree(&evlist->backward_mmap);
+ zfree(&evlist->overwrite_mmap);
fdarray__exit(&evlist->pollfd);
}

@@ -675,11 +675,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
{
int i;

- if (!evlist->backward_mmap)
+ if (!evlist->overwrite_mmap)
return 0;

for (i = 0; i < evlist->nr_mmaps; i++) {
- int fd = evlist->backward_mmap[i].fd;
+ int fd = evlist->overwrite_mmap[i].fd;
int err;

if (fd < 0)
@@ -749,16 +749,16 @@ static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
for (i = 0; i < evlist->nr_mmaps; i++)
perf_mmap__munmap(&evlist->mmap[i]);

- if (evlist->backward_mmap)
+ if (evlist->overwrite_mmap)
for (i = 0; i < evlist->nr_mmaps; i++)
- perf_mmap__munmap(&evlist->backward_mmap[i]);
+ perf_mmap__munmap(&evlist->overwrite_mmap[i]);
}

void perf_evlist__munmap(struct perf_evlist *evlist)
{
perf_evlist__munmap_nofree(evlist);
zfree(&evlist->mmap);
- zfree(&evlist->backward_mmap);
+ zfree(&evlist->overwrite_mmap);
}

static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
@@ -800,7 +800,7 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,

static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int cpu_idx,
- int thread, int *_output, int *_output_backward)
+ int thread, int *_output, int *_output_overwrite)
{
struct perf_evsel *evsel;
int revent;
@@ -814,14 +814,14 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

mp->prot = PROT_READ | PROT_WRITE;
if (evsel->attr.write_backward) {
- output = _output_backward;
- maps = evlist->backward_mmap;
+ output = _output_overwrite;
+ maps = evlist->overwrite_mmap;

if (!maps) {
maps = perf_evlist__alloc_mmap(evlist);
if (!maps)
return -1;
- evlist->backward_mmap = maps;
+ evlist->overwrite_mmap = maps;
if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
}
@@ -886,14 +886,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
pr_debug2("perf event ring buffer mmapped per cpu\n");
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
- int output_backward = -1;
+ int output_overwrite = -1;

auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
true);

for (thread = 0; thread < nr_threads; thread++) {
if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
- thread, &output, &output_backward))
+ thread, &output, &output_overwrite))
goto out_unmap;
}
}
@@ -914,13 +914,13 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
pr_debug2("perf event ring buffer mmapped per thread\n");
for (thread = 0; thread < nr_threads; thread++) {
int output = -1;
- int output_backward = -1;
+ int output_overwrite = -1;

auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
false);

if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
- &output, &output_backward))
+ &output, &output_overwrite))
goto out_unmap;
}

@@ -1753,7 +1753,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
RESUME,
} action = NONE;

- if (!evlist->backward_mmap)
+ if (!evlist->overwrite_mmap)
return;

switch (old_state) {
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index eec3377..7516066 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -44,7 +44,7 @@ struct perf_evlist {
} workload;
struct fdarray pollfd;
struct perf_mmap *mmap;
- struct perf_mmap *backward_mmap;
+ struct perf_mmap *overwrite_mmap;
struct thread_map *threads;
struct cpu_map *cpus;
struct perf_evsel *selected;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 5f8cb15..05076e6 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -234,18 +234,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
return 0;
}

-static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
+static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
{
struct perf_event_header *pheader;
u64 evt_head = head;
int size = mask + 1;

- pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
+ pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
pheader = (struct perf_event_header *)(buf + (head & mask));
*start = head;
while (true) {
if (evt_head - head >= (unsigned int)size) {
- pr_debug("Finished reading backward ring buffer: rewind\n");
+ pr_debug("Finished reading overwrite ring buffer: rewind\n");
if (evt_head - head > (unsigned int)size)
evt_head -= pheader->size;
*end = evt_head;
@@ -255,7 +255,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
pheader = (struct perf_event_header *)(buf + (evt_head & mask));

if (pheader->size == 0) {
- pr_debug("Finished reading backward ring buffer: get start\n");
+ pr_debug("Finished reading overwrite ring buffer: get start\n");
*end = evt_head;
return 0;
}
@@ -267,7 +267,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
return -1;
}

-int perf_mmap__push(struct perf_mmap *md, bool backward,
+int perf_mmap__push(struct perf_mmap *md, bool overwrite,
void *to, int push(void *to, void *buf, size_t size))
{
u64 head = perf_mmap__read_head(md);
@@ -278,19 +278,19 @@ int perf_mmap__push(struct perf_mmap *md, bool backward,
void *buf;
int rc = 0;

- start = backward ? head : old;
- end = backward ? old : head;
+ start = overwrite ? head : old;
+ end = overwrite ? old : head;

if (start == end)
return 0;

size = end - start;
if (size > (unsigned long)(md->mask) + 1) {
- if (!backward) {
+ if (!overwrite) {
WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");

md->prev = head;
- perf_mmap__consume(md, backward);
+ perf_mmap__consume(md, overwrite);
return 0;
}

@@ -298,7 +298,7 @@ int perf_mmap__push(struct perf_mmap *md, bool backward,
* Backward ring buffer is full. We still have a chance to read
* most of data from it.
*/
- if (backward_rb_find_range(data, md->mask, head, &start, &end))
+ if (overwrite_rb_find_range(data, md->mask, head, &start, &end))
return -1;
}

@@ -323,7 +323,7 @@ int perf_mmap__push(struct perf_mmap *md, bool backward,
}

md->prev = head;
- perf_mmap__consume(md, backward);
+ perf_mmap__consume(md, overwrite);
out:
return rc;
}

Subject: [tip:perf/core] perf mmap: Don't discard prev in backward mode

Commit-ID: 7fb4b407a1242dbc85ea3ed1be065dca8f9a6f5b
Gitweb: https://git.kernel.org/tip/7fb4b407a1242dbc85ea3ed1be065dca8f9a6f5b
Author: Wang Nan <[email protected]>
AuthorDate: Mon, 4 Dec 2017 16:51:06 +0000
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Tue, 5 Dec 2017 15:59:37 -0300

perf mmap: Don't discard prev in backward mode

'perf record' can switch its output data file. The new output should
only store the data after switching. However, in overwrite backward
mode, the new output still can have data from before switching. That
also brings extra overhead.

At the end of mmap_read(), the position of the processed ring buffer is
saved in md->prev. Next mmap_read should be end in md->prev if it is not
overwriten. That avoids processing duplicate data. However, md->prev is
discarded. So next the mmap_read() has to process whole valid ring
buffer, which probably includes old processed data.

Avoid calling backward_rb_find_range() when md->prev is still
available.

Signed-off-by: Wang Nan <[email protected]>
Tested-by: Kan Liang <[email protected]>
Acked-by: Namhyung Kim <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Mengting Zhang <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/mmap.c | 33 +++++++++++++++------------------
1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 3f262e7..5f8cb15 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -267,18 +267,6 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
return -1;
}

-static int rb_find_range(void *data, int mask, u64 head, u64 old,
- u64 *start, u64 *end, bool backward)
-{
- if (!backward) {
- *start = old;
- *end = head;
- return 0;
- }
-
- return backward_rb_find_range(data, mask, head, start, end);
-}
-
int perf_mmap__push(struct perf_mmap *md, bool backward,
void *to, int push(void *to, void *buf, size_t size))
{
@@ -290,19 +278,28 @@ int perf_mmap__push(struct perf_mmap *md, bool backward,
void *buf;
int rc = 0;

- if (rb_find_range(data, md->mask, head, old, &start, &end, backward))
- return -1;
+ start = backward ? head : old;
+ end = backward ? old : head;

if (start == end)
return 0;

size = end - start;
if (size > (unsigned long)(md->mask) + 1) {
- WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
+ if (!backward) {
+ WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");

- md->prev = head;
- perf_mmap__consume(md, backward);
- return 0;
+ md->prev = head;
+ perf_mmap__consume(md, backward);
+ return 0;
+ }
+
+ /*
+ * Backward ring buffer is full. We still have a chance to read
+ * most of data from it.
+ */
+ if (backward_rb_find_range(data, md->mask, head, &start, &end))
+ return -1;
}

if ((start & md->mask) + size != (end & md->mask)) {