Hi,
In the perf wiki todo-list, there is an entry regarding initial-delay and
perf trace; the following small patch tries to fulfill this point. It
has been generated against the branch tip/perf/core.
It has only been implemented in the "trace__run" case.
Ex.:
$ sudo strace -- ./perf trace --delay 5 sleep 1 2>&1
...
fcntl(7, F_SETFL, O_RDONLY|O_NONBLOCK) = 0
ioctl(7, PERF_EVENT_IOC_ID, 0x7ffc8fd35718) = 0
ioctl(11, PERF_EVENT_IOC_SET_OUTPUT, 0x7) = 0
fcntl(11, F_SETFL, O_RDONLY|O_NONBLOCK) = 0
ioctl(11, PERF_EVENT_IOC_ID, 0x7ffc8fd35718) = 0
write(6, "\0", 1) = 1
close(6) = 0
nanosleep({0, 5000000}, NULL) = 0 # DELAY OF 5 MS BEFORE ENABLING THE EVNTS
ioctl(3, PERF_EVENT_IOC_ENABLE, 0) = 0
ioctl(4, PERF_EVENT_IOC_ENABLE, 0) = 0
ioctl(5, PERF_EVENT_IOC_ENABLE, 0) = 0
ioctl(7, PERF_EVENT_IOC_ENABLE, 0) = 0
...
Alexis.
Alexis Berlemont (1):
perf: implement --delay on perf trace
tools/perf/builtin-trace.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
--
2.10.0
Signed-off-by: Alexis Berlemont <[email protected]>
---
tools/perf/builtin-trace.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c298bd3..0bae454 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2310,12 +2310,17 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (err < 0)
goto out_error_mmap;
- if (!target__none(&trace->opts.target))
+ if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
perf_evlist__enable(evlist);
if (forks)
perf_evlist__start_workload(evlist);
+ if (trace->opts.initial_delay) {
+ usleep(trace->opts.initial_delay * 1000);
+ perf_evlist__enable(evlist);
+ }
+
trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
evlist->threads->nr > 1 ||
perf_evlist__first(evlist)->attr.inherit;
@@ -2816,6 +2821,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
+ OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
+ "ms to wait before starting measurement after program "
+ "start"),
OPT_END()
};
bool __maybe_unused max_stack_user_set = true;
--
2.10.0
Em Mon, Oct 10, 2016 at 07:43:27AM +0200, Alexis Berlemont escreveu:
> Hi,
>
> In the perf wiki todo-list, there is an entry regarding initial-delay and
> perf trace; the following small patch tries to fulfill this point. It
> has been generated against the branch tip/perf/core.
>
> It has only been implemented in the "trace__run" case.
Ok, moved the description to the cset comment and added the entry to
tools/perf/Documentation/perf-trace.txt, i.e. to its man page, just
copying from the exact same description in 'perf stat' and 'perf
record's ones.
What is next? :-)
- Arnaldo
> Ex.:
>
> $ sudo strace -- ./perf trace --delay 5 sleep 1 2>&1
> ...
> fcntl(7, F_SETFL, O_RDONLY|O_NONBLOCK) = 0
> ioctl(7, PERF_EVENT_IOC_ID, 0x7ffc8fd35718) = 0
> ioctl(11, PERF_EVENT_IOC_SET_OUTPUT, 0x7) = 0
> fcntl(11, F_SETFL, O_RDONLY|O_NONBLOCK) = 0
> ioctl(11, PERF_EVENT_IOC_ID, 0x7ffc8fd35718) = 0
> write(6, "\0", 1) = 1
> close(6) = 0
> nanosleep({0, 5000000}, NULL) = 0 # DELAY OF 5 MS BEFORE ENABLING THE EVNTS
> ioctl(3, PERF_EVENT_IOC_ENABLE, 0) = 0
> ioctl(4, PERF_EVENT_IOC_ENABLE, 0) = 0
> ioctl(5, PERF_EVENT_IOC_ENABLE, 0) = 0
> ioctl(7, PERF_EVENT_IOC_ENABLE, 0) = 0
> ...
>
> Alexis.
>
> Alexis Berlemont (1):
> perf: implement --delay on perf trace
>
> tools/perf/builtin-trace.c | 10 +++++++++-
> 1 file changed, 9 insertions(+), 1 deletion(-)
>
> --
> 2.10.0
Commit-ID: e36b7821a985325dd7074de96deface5c9c6d700
Gitweb: http://git.kernel.org/tip/e36b7821a985325dd7074de96deface5c9c6d700
Author: Alexis Berlemont <[email protected]>
AuthorDate: Mon, 10 Oct 2016 07:43:28 +0200
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Mon, 24 Oct 2016 11:07:43 -0300
perf trace: Implement --delay
In the perf wiki todo-list[1], there is an entry regarding initial-delay
and 'perf trace'; the following small patch tries to fulfill this point.
It has been generated against the branch tip/perf/core.
It has only been implemented in the "trace__run" case.
Ex.:
$ sudo strace -- ./perf trace --delay 5 sleep 1 2>&1
...
fcntl(7, F_SETFL, O_RDONLY|O_NONBLOCK) = 0
ioctl(7, PERF_EVENT_IOC_ID, 0x7ffc8fd35718) = 0
ioctl(11, PERF_EVENT_IOC_SET_OUTPUT, 0x7) = 0
fcntl(11, F_SETFL, O_RDONLY|O_NONBLOCK) = 0
ioctl(11, PERF_EVENT_IOC_ID, 0x7ffc8fd35718) = 0
write(6, "\0", 1) = 1
close(6) = 0
nanosleep({0, 5000000}, NULL) = 0 # DELAY OF 5 MS BEFORE ENABLING THE EVENTS
ioctl(3, PERF_EVENT_IOC_ENABLE, 0) = 0
ioctl(4, PERF_EVENT_IOC_ENABLE, 0) = 0
ioctl(5, PERF_EVENT_IOC_ENABLE, 0) = 0
ioctl(7, PERF_EVENT_IOC_ENABLE, 0) = 0
...
[1]: https://perf.wiki.kernel.org/index.php/Todo
Signed-off-by: Alexis Berlemont <[email protected]>
Suggested-and-Tested-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
[ Add entry to the manpage, cut'n'pasted from stat's and record's ]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-trace.txt | 5 +++++
tools/perf/builtin-trace.c | 10 +++++++++-
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 1ab0782..781b019 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -39,6 +39,11 @@ OPTIONS
Prefixing with ! shows all syscalls but the ones specified. You may
need to escape it.
+-D msecs::
+--delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
-o::
--output=::
Output file name.
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c298bd3..0bae454 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2310,12 +2310,17 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (err < 0)
goto out_error_mmap;
- if (!target__none(&trace->opts.target))
+ if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
perf_evlist__enable(evlist);
if (forks)
perf_evlist__start_workload(evlist);
+ if (trace->opts.initial_delay) {
+ usleep(trace->opts.initial_delay * 1000);
+ perf_evlist__enable(evlist);
+ }
+
trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
evlist->threads->nr > 1 ||
perf_evlist__first(evlist)->attr.inherit;
@@ -2816,6 +2821,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
+ OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
+ "ms to wait before starting measurement after program "
+ "start"),
OPT_END()
};
bool __maybe_unused max_stack_user_set = true;