Depending on the use case, it might require some kind of synthesize
and some not. Make it controllable to turn off heavy operations like
MMAP for all tasks.
Currently all users are converted to enable all the synthesis by
default. It'll be updated in the later patch.
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/bench/synthesize.c | 4 +--
tools/perf/builtin-kvm.c | 2 +-
tools/perf/builtin-record.c | 6 ++--
tools/perf/builtin-top.c | 2 +-
tools/perf/builtin-trace.c | 4 +--
tools/perf/tests/code-reading.c | 3 +-
tools/perf/tests/mmap-thread-lookup.c | 4 +--
tools/perf/util/synthetic-events.c | 45 ++++++++++++++++-----------
tools/perf/util/synthetic-events.h | 8 ++---
9 files changed, 44 insertions(+), 34 deletions(-)
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
index 05f7c923c745..7401ebbac100 100644
--- a/tools/perf/bench/synthesize.c
+++ b/tools/perf/bench/synthesize.c
@@ -80,7 +80,7 @@ static int do_run_single_threaded(struct perf_session *session,
NULL,
target, threads,
process_synthesized_event,
- data_mmap,
+ true, data_mmap,
nr_threads_synthesize);
if (err)
return err;
@@ -171,7 +171,7 @@ static int do_run_multi_threaded(struct target *target,
NULL,
target, NULL,
process_synthesized_event,
- false,
+ true, false,
nr_threads_synthesize);
if (err) {
perf_session__delete(session);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index aa1b127ffb5b..c6f352ee57e6 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1456,7 +1456,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
perf_session__set_id_hdr_size(kvm->session);
ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
- kvm->evlist->core.threads, false, 1);
+ kvm->evlist->core.threads, true, false, 1);
err = kvm_live_open_events(kvm);
if (err)
goto out;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 548c1dbde6c5..764e391e89f8 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1265,6 +1265,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
process_synthesized_event,
&rec->session->machines.host,
+ true,
rec->opts.sample_address);
perf_thread_map__put(thread_map);
return err;
@@ -1479,8 +1480,9 @@ static int record__synthesize(struct record *rec, bool tail)
f = process_locked_synthesized_event;
}
- err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
- f, opts->sample_address,
+ err = __machine__synthesize_threads(machine, tool, &opts->target,
+ rec->evlist->core.threads,
+ f, true, opts->sample_address,
rec->opts.nr_threads_synthesize);
if (rec->opts.nr_threads_synthesize > 1)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a3ae9176a83e..020c4f110c10 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1271,7 +1271,7 @@ static int __cmd_top(struct perf_top *top)
pr_debug("Couldn't synthesize cgroup events.\n");
machine__synthesize_threads(&top->session->machines.host, &opts->target,
- top->evlist->core.threads, false,
+ top->evlist->core.threads, true, false,
top->nr_threads_synthesize);
if (top->nr_threads_synthesize > 1)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 2bf21194c7b3..2f1d20553a0a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1628,8 +1628,8 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
goto out;
err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
- evlist->core.threads, trace__tool_process, false,
- 1);
+ evlist->core.threads, trace__tool_process,
+ true, false, 1);
out:
if (err)
symbol__exit();
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 9866cddebf23..3a4d932e7ffc 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -606,7 +606,8 @@ static int do_test_code_reading(bool try_kcore)
}
ret = perf_event__synthesize_thread_map(NULL, threads,
- perf_event__process, machine, false);
+ perf_event__process, machine,
+ true, false);
if (ret < 0) {
pr_debug("perf_event__synthesize_thread_map failed\n");
goto out_err;
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 8d9d4cbff76d..6f2da7a72f67 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -135,7 +135,7 @@ static int synth_all(struct machine *machine)
{
return perf_event__synthesize_threads(NULL,
perf_event__process,
- machine, 0, 1);
+ machine, 1, 0, 1);
}
static int synth_process(struct machine *machine)
@@ -147,7 +147,7 @@ static int synth_process(struct machine *machine)
err = perf_event__synthesize_thread_map(NULL, map,
perf_event__process,
- machine, 0);
+ machine, 1, 0);
perf_thread_map__put(map);
return err;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index a7e981b2d7de..a7a2825356d6 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -715,7 +715,8 @@ static int __event__synthesize_thread(union perf_event *comm_event,
union perf_event *fork_event,
union perf_event *namespaces_event,
pid_t pid, int full, perf_event__handler_t process,
- struct perf_tool *tool, struct machine *machine, bool mmap_data)
+ struct perf_tool *tool, struct machine *machine,
+ bool needs_mmap, bool mmap_data)
{
char filename[PATH_MAX];
struct dirent **dirent;
@@ -739,7 +740,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
* send mmap only for thread group leader
* see thread__init_maps()
*/
- if (pid == tgid &&
+ if (pid == tgid && needs_mmap &&
perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
process, machine, mmap_data))
return -1;
@@ -786,7 +787,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
break;
rc = 0;
- if (_pid == pid && !kernel_thread) {
+ if (_pid == pid && !kernel_thread && needs_mmap) {
/* process the parent's maps too */
rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
process, machine, mmap_data);
@@ -806,7 +807,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
struct perf_thread_map *threads,
perf_event__handler_t process,
struct machine *machine,
- bool mmap_data)
+ bool needs_mmap, bool mmap_data)
{
union perf_event *comm_event, *mmap_event, *fork_event;
union perf_event *namespaces_event;
@@ -836,7 +837,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
fork_event, namespaces_event,
perf_thread_map__pid(threads, thread), 0,
process, tool, machine,
- mmap_data)) {
+ needs_mmap, mmap_data)) {
err = -1;
break;
}
@@ -862,7 +863,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
fork_event, namespaces_event,
comm_event->comm.pid, 0,
process, tool, machine,
- mmap_data)) {
+ needs_mmap, mmap_data)) {
err = -1;
break;
}
@@ -882,6 +883,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
static int __perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
+ bool needs_mmap,
bool mmap_data,
struct dirent **dirent,
int start,
@@ -926,7 +928,7 @@ static int __perf_event__synthesize_threads(struct perf_tool *tool,
*/
__event__synthesize_thread(comm_event, mmap_event, fork_event,
namespaces_event, pid, 1, process,
- tool, machine, mmap_data);
+ tool, machine, needs_mmap, mmap_data);
}
err = 0;
@@ -945,6 +947,7 @@ struct synthesize_threads_arg {
struct perf_tool *tool;
perf_event__handler_t process;
struct machine *machine;
+ bool needs_mmap;
bool mmap_data;
struct dirent **dirent;
int num;
@@ -956,7 +959,8 @@ static void *synthesize_threads_worker(void *arg)
struct synthesize_threads_arg *args = arg;
__perf_event__synthesize_threads(args->tool, args->process,
- args->machine, args->mmap_data,
+ args->machine,
+ args->needs_mmap, args->mmap_data,
args->dirent,
args->start, args->num);
return NULL;
@@ -965,7 +969,7 @@ static void *synthesize_threads_worker(void *arg)
int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
- bool mmap_data,
+ bool needs_mmap, bool mmap_data,
unsigned int nr_threads_synthesize)
{
struct synthesize_threads_arg *args = NULL;
@@ -994,7 +998,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
if (thread_nr <= 1) {
err = __perf_event__synthesize_threads(tool, process,
- machine, mmap_data,
+ machine,
+ needs_mmap, mmap_data,
dirent, base, n);
goto free_dirent;
}
@@ -1015,6 +1020,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
args[i].tool = tool;
args[i].process = process;
args[i].machine = machine;
+ args[i].needs_mmap = needs_mmap;
args[i].mmap_data = mmap_data;
args[i].dirent = dirent;
}
@@ -1775,26 +1781,27 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct perf_thread_map *threads,
- perf_event__handler_t process, bool data_mmap,
- unsigned int nr_threads_synthesize)
+ perf_event__handler_t process, bool needs_mmap,
+ bool data_mmap, unsigned int nr_threads_synthesize)
{
if (target__has_task(target))
- return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
+ return perf_event__synthesize_thread_map(tool, threads, process, machine,
+ needs_mmap, data_mmap);
else if (target__has_cpu(target))
- return perf_event__synthesize_threads(tool, process,
- machine, data_mmap,
+ return perf_event__synthesize_threads(tool, process, machine,
+ needs_mmap, data_mmap,
nr_threads_synthesize);
/* command specified */
return 0;
}
int machine__synthesize_threads(struct machine *machine, struct target *target,
- struct perf_thread_map *threads, bool data_mmap,
- unsigned int nr_threads_synthesize)
+ struct perf_thread_map *threads, bool needs_mmap,
+ bool data_mmap, unsigned int nr_threads_synthesize)
{
return __machine__synthesize_threads(machine, NULL, target, threads,
- perf_event__process, data_mmap,
- nr_threads_synthesize);
+ perf_event__process, needs_mmap,
+ data_mmap, nr_threads_synthesize);
}
static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id)
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index c845e2b9b444..44f72d56ca4d 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -53,8 +53,8 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p
int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool mmap_data);
-int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int nr_threads_synthesize);
+int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
+int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
@@ -65,10 +65,10 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct perf_thread_map *threads,
- perf_event__handler_t process, bool data_mmap,
+ perf_event__handler_t process, bool needs_mmap, bool data_mmap,
unsigned int nr_threads_synthesize);
int machine__synthesize_threads(struct machine *machine, struct target *target,
- struct perf_thread_map *threads, bool data_mmap,
+ struct perf_thread_map *threads, bool needs_mmap, bool data_mmap,
unsigned int nr_threads_synthesize);
#ifdef HAVE_AUXTRACE_SUPPORT
--
2.32.0.605.g8dce9f2422-goog
Add an option to control synthesize behavior.
--synth <no|all|task|mmap|cgroup>
Fine-tune event synthesis: default=all
This can be useful when we know it doesn't need some synthesis like
in a specific usecase and/or when using pipe:
$ perf record -a --all-cgroups --synth cgroup -o- sleep 1 | \
> perf report -i- -s cgroup
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 14 +++++++
tools/perf/builtin-record.c | 48 +++++++++++++++++++-----
tools/perf/util/record.h | 1 +
tools/perf/util/synthetic-events.c | 28 ++++++++++++++
tools/perf/util/synthetic-events.h | 12 ++++++
5 files changed, 94 insertions(+), 9 deletions(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index d71bac847936..ffa110488431 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -596,6 +596,20 @@ options.
'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
in config file is set to true.
+--synth=TYPE::
+Collect and synthesize given type of events (comma separated). Note that
+this option controls the synthesis from the /proc filesystem which represent
+task status. Kernel (and some other) events are recorded regardless of the
+choice in this option. For example, --synth=no would have MMAP events for
+kernel and modules.
+
+Available types are:
+ 'task' - synthesize FORK and COMM events for each task
+ 'mmap' - synthesize MMAP events for each process (implies 'task')
+ 'cgroup' - synthesize CGROUP events for each cgroup
+ 'all' - synthesize all events (default)
+ 'no' - do not synthesize any of the above events
+
--tail-synthesize::
Instead of collecting non-sample events (for example, fork, comm, mmap) at
the beginning of record, collect them during finalizing an output file.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 764e391e89f8..46ea7bd09fb1 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1254,6 +1254,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
{
int err;
struct perf_thread_map *thread_map;
+ bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
if (rec->opts.tail_synthesize != tail)
return 0;
@@ -1265,7 +1266,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
process_synthesized_event,
&rec->session->machines.host,
- true,
+ needs_mmap,
rec->opts.sample_address);
perf_thread_map__put(thread_map);
return err;
@@ -1470,20 +1471,26 @@ static int record__synthesize(struct record *rec, bool tail)
if (err < 0)
pr_warning("Couldn't synthesize bpf events.\n");
- err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
- machine);
- if (err < 0)
- pr_warning("Couldn't synthesize cgroup events.\n");
+ if (rec->opts.synth & PERF_SYNTH_CGROUP) {
+ err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
+ machine);
+ if (err < 0)
+ pr_warning("Couldn't synthesize cgroup events.\n");
+ }
if (rec->opts.nr_threads_synthesize > 1) {
perf_set_multithreaded();
f = process_locked_synthesized_event;
}
- err = __machine__synthesize_threads(machine, tool, &opts->target,
- rec->evlist->core.threads,
- f, true, opts->sample_address,
- rec->opts.nr_threads_synthesize);
+ if (rec->opts.synth & PERF_SYNTH_TASK) {
+ bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
+
+ err = __machine__synthesize_threads(machine, tool, &opts->target,
+ rec->evlist->core.threads,
+ f, needs_mmap, opts->sample_address,
+ rec->opts.nr_threads_synthesize);
+ }
if (rec->opts.nr_threads_synthesize > 1)
perf_set_singlethreaded();
@@ -2392,6 +2399,26 @@ static int process_timestamp_boundary(struct perf_tool *tool,
return 0;
}
+static int parse_record_synth_option(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ struct record_opts *opts = opt->value;
+ char *p = strdup(str);
+
+ if (p == NULL)
+ return -1;
+
+ opts->synth = parse_synth_opt(p);
+ free(p);
+
+ if (opts->synth < 0) {
+ pr_err("Invalid synth option: %s\n", str);
+ return -1;
+ }
+ return 0;
+}
+
/*
* XXX Ideally would be local to cmd_record() and passed to a record__new
* because we need to have access to it in record__exit, that is called
@@ -2417,6 +2444,7 @@ static struct record record = {
.nr_threads_synthesize = 1,
.ctl_fd = -1,
.ctl_fd_ack = -1,
+ .synth = PERF_SYNTH_ALL,
},
.tool = {
.sample = process_sample_event,
@@ -2632,6 +2660,8 @@ static struct option __record_options[] = {
"\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
"\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
parse_control_option),
+ OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
+ "Fine-tune event synthesis: default=all", parse_record_synth_option),
OPT_END()
};
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 68f471d9a88b..ef6c2715fdd9 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -77,6 +77,7 @@ struct record_opts {
int ctl_fd;
int ctl_fd_ack;
bool ctl_fd_close;
+ int synth;
};
extern const char * const *record_usage;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index a7a2825356d6..198982109f0f 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -2237,3 +2237,31 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool,
return ret;
}
+
+int parse_synth_opt(char *synth)
+{
+ char *p, *q;
+ int ret = 0;
+
+ if (synth == NULL)
+ return -1;
+
+ for (q = synth; (p = strsep(&q, ",")); p = q) {
+ if (!strcasecmp(p, "no") || !strcasecmp(p, "none"))
+ return 0;
+
+ if (!strcasecmp(p, "all"))
+ return PERF_SYNTH_ALL;
+
+ if (!strcasecmp(p, "task"))
+ ret |= PERF_SYNTH_TASK;
+ else if (!strcasecmp(p, "mmap"))
+ ret |= PERF_SYNTH_TASK | PERF_SYNTH_MMAP;
+ else if (!strcasecmp(p, "cgroup"))
+ ret |= PERF_SYNTH_CGROUP;
+ else
+ return -1;
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index 44f72d56ca4d..c931433bacbf 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -27,6 +27,18 @@ struct target;
union perf_event;
+enum perf_record_synth {
+ PERF_SYNTH_TASK = 1 << 0,
+ PERF_SYNTH_MMAP = 1 << 1,
+ PERF_SYNTH_CGROUP = 1 << 2,
+
+ /* last element */
+ PERF_SYNTH_MAX = 1 << 3,
+};
+#define PERF_SYNTH_ALL (PERF_SYNTH_MAX - 1)
+
+int parse_synth_opt(char *str);
+
typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct machine *machine);
--
2.32.0.605.g8dce9f2422-goog
On Tue, Aug 10, 2021 at 09:46:57PM -0700, Namhyung Kim wrote:
> Depending on the use case, it might require some kind of synthesize
> and some not. Make it controllable to turn off heavy operations like
> MMAP for all tasks.
>
> Currently all users are converted to enable all the synthesis by
> default. It'll be updated in the later patch.
>
> Signed-off-by: Namhyung Kim <[email protected]>
for both patches
Acked-by: Jiri Olsa <[email protected]>
thanks,
jirka
> ---
> tools/perf/bench/synthesize.c | 4 +--
> tools/perf/builtin-kvm.c | 2 +-
> tools/perf/builtin-record.c | 6 ++--
> tools/perf/builtin-top.c | 2 +-
> tools/perf/builtin-trace.c | 4 +--
> tools/perf/tests/code-reading.c | 3 +-
> tools/perf/tests/mmap-thread-lookup.c | 4 +--
> tools/perf/util/synthetic-events.c | 45 ++++++++++++++++-----------
> tools/perf/util/synthetic-events.h | 8 ++---
> 9 files changed, 44 insertions(+), 34 deletions(-)
>
> diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
> index 05f7c923c745..7401ebbac100 100644
> --- a/tools/perf/bench/synthesize.c
> +++ b/tools/perf/bench/synthesize.c
> @@ -80,7 +80,7 @@ static int do_run_single_threaded(struct perf_session *session,
> NULL,
> target, threads,
> process_synthesized_event,
> - data_mmap,
> + true, data_mmap,
> nr_threads_synthesize);
> if (err)
> return err;
> @@ -171,7 +171,7 @@ static int do_run_multi_threaded(struct target *target,
> NULL,
> target, NULL,
> process_synthesized_event,
> - false,
> + true, false,
> nr_threads_synthesize);
> if (err) {
> perf_session__delete(session);
> diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
> index aa1b127ffb5b..c6f352ee57e6 100644
> --- a/tools/perf/builtin-kvm.c
> +++ b/tools/perf/builtin-kvm.c
> @@ -1456,7 +1456,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
> perf_session__set_id_hdr_size(kvm->session);
> ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
> machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
> - kvm->evlist->core.threads, false, 1);
> + kvm->evlist->core.threads, true, false, 1);
> err = kvm_live_open_events(kvm);
> if (err)
> goto out;
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 548c1dbde6c5..764e391e89f8 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1265,6 +1265,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
> err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
> process_synthesized_event,
> &rec->session->machines.host,
> + true,
> rec->opts.sample_address);
> perf_thread_map__put(thread_map);
> return err;
> @@ -1479,8 +1480,9 @@ static int record__synthesize(struct record *rec, bool tail)
> f = process_locked_synthesized_event;
> }
>
> - err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
> - f, opts->sample_address,
> + err = __machine__synthesize_threads(machine, tool, &opts->target,
> + rec->evlist->core.threads,
> + f, true, opts->sample_address,
> rec->opts.nr_threads_synthesize);
>
> if (rec->opts.nr_threads_synthesize > 1)
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index a3ae9176a83e..020c4f110c10 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -1271,7 +1271,7 @@ static int __cmd_top(struct perf_top *top)
> pr_debug("Couldn't synthesize cgroup events.\n");
>
> machine__synthesize_threads(&top->session->machines.host, &opts->target,
> - top->evlist->core.threads, false,
> + top->evlist->core.threads, true, false,
> top->nr_threads_synthesize);
>
> if (top->nr_threads_synthesize > 1)
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 2bf21194c7b3..2f1d20553a0a 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -1628,8 +1628,8 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
> goto out;
>
> err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
> - evlist->core.threads, trace__tool_process, false,
> - 1);
> + evlist->core.threads, trace__tool_process,
> + true, false, 1);
> out:
> if (err)
> symbol__exit();
> diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
> index 9866cddebf23..3a4d932e7ffc 100644
> --- a/tools/perf/tests/code-reading.c
> +++ b/tools/perf/tests/code-reading.c
> @@ -606,7 +606,8 @@ static int do_test_code_reading(bool try_kcore)
> }
>
> ret = perf_event__synthesize_thread_map(NULL, threads,
> - perf_event__process, machine, false);
> + perf_event__process, machine,
> + true, false);
> if (ret < 0) {
> pr_debug("perf_event__synthesize_thread_map failed\n");
> goto out_err;
> diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
> index 8d9d4cbff76d..6f2da7a72f67 100644
> --- a/tools/perf/tests/mmap-thread-lookup.c
> +++ b/tools/perf/tests/mmap-thread-lookup.c
> @@ -135,7 +135,7 @@ static int synth_all(struct machine *machine)
> {
> return perf_event__synthesize_threads(NULL,
> perf_event__process,
> - machine, 0, 1);
> + machine, 1, 0, 1);
> }
>
> static int synth_process(struct machine *machine)
> @@ -147,7 +147,7 @@ static int synth_process(struct machine *machine)
>
> err = perf_event__synthesize_thread_map(NULL, map,
> perf_event__process,
> - machine, 0);
> + machine, 1, 0);
>
> perf_thread_map__put(map);
> return err;
> diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
> index a7e981b2d7de..a7a2825356d6 100644
> --- a/tools/perf/util/synthetic-events.c
> +++ b/tools/perf/util/synthetic-events.c
> @@ -715,7 +715,8 @@ static int __event__synthesize_thread(union perf_event *comm_event,
> union perf_event *fork_event,
> union perf_event *namespaces_event,
> pid_t pid, int full, perf_event__handler_t process,
> - struct perf_tool *tool, struct machine *machine, bool mmap_data)
> + struct perf_tool *tool, struct machine *machine,
> + bool needs_mmap, bool mmap_data)
> {
> char filename[PATH_MAX];
> struct dirent **dirent;
> @@ -739,7 +740,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
> * send mmap only for thread group leader
> * see thread__init_maps()
> */
> - if (pid == tgid &&
> + if (pid == tgid && needs_mmap &&
> perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
> process, machine, mmap_data))
> return -1;
> @@ -786,7 +787,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
> break;
>
> rc = 0;
> - if (_pid == pid && !kernel_thread) {
> + if (_pid == pid && !kernel_thread && needs_mmap) {
> /* process the parent's maps too */
> rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
> process, machine, mmap_data);
> @@ -806,7 +807,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
> struct perf_thread_map *threads,
> perf_event__handler_t process,
> struct machine *machine,
> - bool mmap_data)
> + bool needs_mmap, bool mmap_data)
> {
> union perf_event *comm_event, *mmap_event, *fork_event;
> union perf_event *namespaces_event;
> @@ -836,7 +837,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
> fork_event, namespaces_event,
> perf_thread_map__pid(threads, thread), 0,
> process, tool, machine,
> - mmap_data)) {
> + needs_mmap, mmap_data)) {
> err = -1;
> break;
> }
> @@ -862,7 +863,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
> fork_event, namespaces_event,
> comm_event->comm.pid, 0,
> process, tool, machine,
> - mmap_data)) {
> + needs_mmap, mmap_data)) {
> err = -1;
> break;
> }
> @@ -882,6 +883,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
> static int __perf_event__synthesize_threads(struct perf_tool *tool,
> perf_event__handler_t process,
> struct machine *machine,
> + bool needs_mmap,
> bool mmap_data,
> struct dirent **dirent,
> int start,
> @@ -926,7 +928,7 @@ static int __perf_event__synthesize_threads(struct perf_tool *tool,
> */
> __event__synthesize_thread(comm_event, mmap_event, fork_event,
> namespaces_event, pid, 1, process,
> - tool, machine, mmap_data);
> + tool, machine, needs_mmap, mmap_data);
> }
> err = 0;
>
> @@ -945,6 +947,7 @@ struct synthesize_threads_arg {
> struct perf_tool *tool;
> perf_event__handler_t process;
> struct machine *machine;
> + bool needs_mmap;
> bool mmap_data;
> struct dirent **dirent;
> int num;
> @@ -956,7 +959,8 @@ static void *synthesize_threads_worker(void *arg)
> struct synthesize_threads_arg *args = arg;
>
> __perf_event__synthesize_threads(args->tool, args->process,
> - args->machine, args->mmap_data,
> + args->machine,
> + args->needs_mmap, args->mmap_data,
> args->dirent,
> args->start, args->num);
> return NULL;
> @@ -965,7 +969,7 @@ static void *synthesize_threads_worker(void *arg)
> int perf_event__synthesize_threads(struct perf_tool *tool,
> perf_event__handler_t process,
> struct machine *machine,
> - bool mmap_data,
> + bool needs_mmap, bool mmap_data,
> unsigned int nr_threads_synthesize)
> {
> struct synthesize_threads_arg *args = NULL;
> @@ -994,7 +998,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
>
> if (thread_nr <= 1) {
> err = __perf_event__synthesize_threads(tool, process,
> - machine, mmap_data,
> + machine,
> + needs_mmap, mmap_data,
> dirent, base, n);
> goto free_dirent;
> }
> @@ -1015,6 +1020,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
> args[i].tool = tool;
> args[i].process = process;
> args[i].machine = machine;
> + args[i].needs_mmap = needs_mmap;
> args[i].mmap_data = mmap_data;
> args[i].dirent = dirent;
> }
> @@ -1775,26 +1781,27 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_
>
> int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
> struct target *target, struct perf_thread_map *threads,
> - perf_event__handler_t process, bool data_mmap,
> - unsigned int nr_threads_synthesize)
> + perf_event__handler_t process, bool needs_mmap,
> + bool data_mmap, unsigned int nr_threads_synthesize)
> {
> if (target__has_task(target))
> - return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
> + return perf_event__synthesize_thread_map(tool, threads, process, machine,
> + needs_mmap, data_mmap);
> else if (target__has_cpu(target))
> - return perf_event__synthesize_threads(tool, process,
> - machine, data_mmap,
> + return perf_event__synthesize_threads(tool, process, machine,
> + needs_mmap, data_mmap,
> nr_threads_synthesize);
> /* command specified */
> return 0;
> }
>
> int machine__synthesize_threads(struct machine *machine, struct target *target,
> - struct perf_thread_map *threads, bool data_mmap,
> - unsigned int nr_threads_synthesize)
> + struct perf_thread_map *threads, bool needs_mmap,
> + bool data_mmap, unsigned int nr_threads_synthesize)
> {
> return __machine__synthesize_threads(machine, NULL, target, threads,
> - perf_event__process, data_mmap,
> - nr_threads_synthesize);
> + perf_event__process, needs_mmap,
> + data_mmap, nr_threads_synthesize);
> }
>
> static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id)
> diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
> index c845e2b9b444..44f72d56ca4d 100644
> --- a/tools/perf/util/synthetic-events.h
> +++ b/tools/perf/util/synthetic-events.h
> @@ -53,8 +53,8 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p
> int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
> int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
> int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
> -int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool mmap_data);
> -int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int nr_threads_synthesize);
> +int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
> +int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
> int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
> int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
> pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
> @@ -65,10 +65,10 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
>
> int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
> struct target *target, struct perf_thread_map *threads,
> - perf_event__handler_t process, bool data_mmap,
> + perf_event__handler_t process, bool needs_mmap, bool data_mmap,
> unsigned int nr_threads_synthesize);
> int machine__synthesize_threads(struct machine *machine, struct target *target,
> - struct perf_thread_map *threads, bool data_mmap,
> + struct perf_thread_map *threads, bool needs_mmap, bool data_mmap,
> unsigned int nr_threads_synthesize);
>
> #ifdef HAVE_AUXTRACE_SUPPORT
> --
> 2.32.0.605.g8dce9f2422-goog
>
Hi Arnaldo,
On Thu, Aug 12, 2021 at 2:15 AM Jiri Olsa <[email protected]> wrote:
>
> On Tue, Aug 10, 2021 at 09:46:57PM -0700, Namhyung Kim wrote:
> > Depending on the use case, it might require some kind of synthesize
> > and some not. Make it controllable to turn off heavy operations like
> > MMAP for all tasks.
> >
> > Currently all users are converted to enable all the synthesis by
> > default. It'll be updated in the later patch.
> >
> > Signed-off-by: Namhyung Kim <[email protected]>
>
> for both patches
>
> Acked-by: Jiri Olsa <[email protected]>
Could you please take these patches?
Thanks,
Namhyung
Em Tue, Aug 10, 2021 at 09:46:58PM -0700, Namhyung Kim escreveu:
> Add an option to control synthesize behavior.
>
> --synth <no|all|task|mmap|cgroup>
> Fine-tune event synthesis: default=all
>
> This can be useful when we know it doesn't need some synthesis like
> in a specific usecase and/or when using pipe:
>
> $ perf record -a --all-cgroups --synth cgroup -o- sleep 1 | \
> > perf report -i- -s cgroup
I added this note to your patch, to clarify what is being synthesized as
threads started after the record session will also have entries in /proc.
- Arnaldo
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 50d7d9d5d70addf0..2d7df8703cf2077d 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -599,7 +599,9 @@ in config file is set to true.
--synth=TYPE::
Collect and synthesize given type of events (comma separated). Note that
this option controls the synthesis from the /proc filesystem which represent
-task status. Kernel (and some other) events are recorded regardless of the
+task status for pre-existing threads.
+
+Kernel (and some other) events are recorded regardless of the
choice in this option. For example, --synth=no would have MMAP events for
kernel and modules.
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> tools/perf/Documentation/perf-record.txt | 14 +++++++
> tools/perf/builtin-record.c | 48 +++++++++++++++++++-----
> tools/perf/util/record.h | 1 +
> tools/perf/util/synthetic-events.c | 28 ++++++++++++++
> tools/perf/util/synthetic-events.h | 12 ++++++
> 5 files changed, 94 insertions(+), 9 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index d71bac847936..ffa110488431 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -596,6 +596,20 @@ options.
> 'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
> in config file is set to true.
>
> +--synth=TYPE::
> +Collect and synthesize given type of events (comma separated). Note that
> +this option controls the synthesis from the /proc filesystem which represent
> +task status. Kernel (and some other) events are recorded regardless of the
> +choice in this option. For example, --synth=no would have MMAP events for
> +kernel and modules.
> +
> +Available types are:
> + 'task' - synthesize FORK and COMM events for each task
> + 'mmap' - synthesize MMAP events for each process (implies 'task')
> + 'cgroup' - synthesize CGROUP events for each cgroup
> + 'all' - synthesize all events (default)
> + 'no' - do not synthesize any of the above events
> +
> --tail-synthesize::
> Instead of collecting non-sample events (for example, fork, comm, mmap) at
> the beginning of record, collect them during finalizing an output file.
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 764e391e89f8..46ea7bd09fb1 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1254,6 +1254,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
> {
> int err;
> struct perf_thread_map *thread_map;
> + bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
>
> if (rec->opts.tail_synthesize != tail)
> return 0;
> @@ -1265,7 +1266,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
> err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
> process_synthesized_event,
> &rec->session->machines.host,
> - true,
> + needs_mmap,
> rec->opts.sample_address);
> perf_thread_map__put(thread_map);
> return err;
> @@ -1470,20 +1471,26 @@ static int record__synthesize(struct record *rec, bool tail)
> if (err < 0)
> pr_warning("Couldn't synthesize bpf events.\n");
>
> - err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
> - machine);
> - if (err < 0)
> - pr_warning("Couldn't synthesize cgroup events.\n");
> + if (rec->opts.synth & PERF_SYNTH_CGROUP) {
> + err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
> + machine);
> + if (err < 0)
> + pr_warning("Couldn't synthesize cgroup events.\n");
> + }
>
> if (rec->opts.nr_threads_synthesize > 1) {
> perf_set_multithreaded();
> f = process_locked_synthesized_event;
> }
>
> - err = __machine__synthesize_threads(machine, tool, &opts->target,
> - rec->evlist->core.threads,
> - f, true, opts->sample_address,
> - rec->opts.nr_threads_synthesize);
> + if (rec->opts.synth & PERF_SYNTH_TASK) {
> + bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
> +
> + err = __machine__synthesize_threads(machine, tool, &opts->target,
> + rec->evlist->core.threads,
> + f, needs_mmap, opts->sample_address,
> + rec->opts.nr_threads_synthesize);
> + }
>
> if (rec->opts.nr_threads_synthesize > 1)
> perf_set_singlethreaded();
> @@ -2392,6 +2399,26 @@ static int process_timestamp_boundary(struct perf_tool *tool,
> return 0;
> }
>
> +static int parse_record_synth_option(const struct option *opt,
> + const char *str,
> + int unset __maybe_unused)
> +{
> + struct record_opts *opts = opt->value;
> + char *p = strdup(str);
> +
> + if (p == NULL)
> + return -1;
> +
> + opts->synth = parse_synth_opt(p);
> + free(p);
> +
> + if (opts->synth < 0) {
> + pr_err("Invalid synth option: %s\n", str);
> + return -1;
> + }
> + return 0;
> +}
> +
> /*
> * XXX Ideally would be local to cmd_record() and passed to a record__new
> * because we need to have access to it in record__exit, that is called
> @@ -2417,6 +2444,7 @@ static struct record record = {
> .nr_threads_synthesize = 1,
> .ctl_fd = -1,
> .ctl_fd_ack = -1,
> + .synth = PERF_SYNTH_ALL,
> },
> .tool = {
> .sample = process_sample_event,
> @@ -2632,6 +2660,8 @@ static struct option __record_options[] = {
> "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
> "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
> parse_control_option),
> + OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
> + "Fine-tune event synthesis: default=all", parse_record_synth_option),
> OPT_END()
> };
>
> diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
> index 68f471d9a88b..ef6c2715fdd9 100644
> --- a/tools/perf/util/record.h
> +++ b/tools/perf/util/record.h
> @@ -77,6 +77,7 @@ struct record_opts {
> int ctl_fd;
> int ctl_fd_ack;
> bool ctl_fd_close;
> + int synth;
> };
>
> extern const char * const *record_usage;
> diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
> index a7a2825356d6..198982109f0f 100644
> --- a/tools/perf/util/synthetic-events.c
> +++ b/tools/perf/util/synthetic-events.c
> @@ -2237,3 +2237,31 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool,
>
> return ret;
> }
> +
> +int parse_synth_opt(char *synth)
> +{
> + char *p, *q;
> + int ret = 0;
> +
> + if (synth == NULL)
> + return -1;
> +
> + for (q = synth; (p = strsep(&q, ",")); p = q) {
> + if (!strcasecmp(p, "no") || !strcasecmp(p, "none"))
> + return 0;
> +
> + if (!strcasecmp(p, "all"))
> + return PERF_SYNTH_ALL;
> +
> + if (!strcasecmp(p, "task"))
> + ret |= PERF_SYNTH_TASK;
> + else if (!strcasecmp(p, "mmap"))
> + ret |= PERF_SYNTH_TASK | PERF_SYNTH_MMAP;
> + else if (!strcasecmp(p, "cgroup"))
> + ret |= PERF_SYNTH_CGROUP;
> + else
> + return -1;
> + }
> +
> + return ret;
> +}
> diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
> index 44f72d56ca4d..c931433bacbf 100644
> --- a/tools/perf/util/synthetic-events.h
> +++ b/tools/perf/util/synthetic-events.h
> @@ -27,6 +27,18 @@ struct target;
>
> union perf_event;
>
> +enum perf_record_synth {
> + PERF_SYNTH_TASK = 1 << 0,
> + PERF_SYNTH_MMAP = 1 << 1,
> + PERF_SYNTH_CGROUP = 1 << 2,
> +
> + /* last element */
> + PERF_SYNTH_MAX = 1 << 3,
> +};
> +#define PERF_SYNTH_ALL (PERF_SYNTH_MAX - 1)
> +
> +int parse_synth_opt(char *str);
> +
> typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event,
> struct perf_sample *sample, struct machine *machine);
>
> --
> 2.32.0.605.g8dce9f2422-goog
>
--
- Arnaldo