Currently, there is no trivial mechanism to analyze events based on
containers. perf -G can be used, but it will not filter events for the
containers created after perf is invoked, making it difficult to assess/
analyze performance issues of multiple containers at once.
This patch-set is aimed at addressing this limitation by introducing a
new PERF_RECORD_NAMESPACES event that records namespaces related info.
As containers are created with namespaces, the new data can be used to
in assessment/analysis of multiple containers.
The first patch introduces PERF_RECORD_NAMESPACES in kernel while the
second patch makes the corresponding changes in perf tool to read this
PERF_RECORD_NAMESPACES events. The third patch demonstrates analysis
of containers with this data by adding a cgroup identifier column in
perf report, which contains the cgroup namespace's device and inode
numbers. This is based on the assumption that each container is created
with it's own cgroup namespace. The third patch has scope for improvement
based on the conventions a container is attributed with, going forward.
Changes from v7:
* Folded doc related patches into corresponding code changes
* Got rid of NAMESPACES_MAX macro and using a zero array instead
* Updated changelog of few patches
Changes from v6:
* Updated changelog of patch 1
* Split patch 2 into smaller patches
* Updated record and script documenatation
* Dropped name field from ns_link_info struct
Changes from v5:
* Updated changelogs of patches 1 & 3
* Rebased the patches on perf/core in tip
---
Hari Bathini (6):
perf: Add PERF_RECORD_NAMESPACES to include namespaces related info
perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info
perf record: Synthesize namespace events for current processes
perf tools: Add print support for namespace events
perf script: Add script print support for namespace events
perf tools: Add cgroup identifier sort order keyword
include/linux/perf_event.h | 2
include/uapi/linux/perf_event.h | 32 ++++++
kernel/events/core.c | 139 ++++++++++++++++++++++++++++
kernel/fork.c | 2
kernel/nsproxy.c | 3 +
tools/include/uapi/linux/perf_event.h | 32 ++++++
tools/perf/Documentation/perf-record.txt | 3 +
tools/perf/Documentation/perf-report.txt | 4 +
tools/perf/Documentation/perf-script.txt | 3 +
tools/perf/builtin-annotate.c | 1
tools/perf/builtin-diff.c | 1
tools/perf/builtin-inject.c | 13 +++
tools/perf/builtin-kmem.c | 1
tools/perf/builtin-kvm.c | 2
tools/perf/builtin-lock.c | 1
tools/perf/builtin-mem.c | 1
tools/perf/builtin-record.c | 35 ++++++-
tools/perf/builtin-report.c | 1
tools/perf/builtin-sched.c | 1
tools/perf/builtin-script.c | 41 ++++++++
tools/perf/builtin-trace.c | 3 -
tools/perf/perf.h | 1
tools/perf/util/Build | 1
tools/perf/util/data-convert-bt.c | 1
tools/perf/util/event.c | 150 +++++++++++++++++++++++++++++-
tools/perf/util/event.h | 19 ++++
tools/perf/util/evsel.c | 3 +
tools/perf/util/hist.c | 7 +
tools/perf/util/hist.h | 1
tools/perf/util/machine.c | 34 +++++++
tools/perf/util/machine.h | 3 +
tools/perf/util/namespaces.c | 36 +++++++
tools/perf/util/namespaces.h | 26 +++++
tools/perf/util/session.c | 7 +
tools/perf/util/sort.c | 41 ++++++++
tools/perf/util/sort.h | 7 +
tools/perf/util/thread.c | 44 ++++++++-
tools/perf/util/thread.h | 6 +
tools/perf/util/tool.h | 2
39 files changed, 694 insertions(+), 16 deletions(-)
create mode 100644 tools/perf/util/namespaces.c
create mode 100644 tools/perf/util/namespaces.h
Introduce a new option to record PERF_RECORD_NAMESPACES events emitted
by the kernel when fork, clone, setns or unshare are invoked. And update
perf-record documentation with the new option to record namespace events.
Acked-by: Jiri Olsa <[email protected]>
Signed-off-by: Hari Bathini <[email protected]>
---
tools/include/uapi/linux/perf_event.h | 32 +++++++++++++++++++++-
tools/perf/Documentation/perf-record.txt | 3 ++
tools/perf/builtin-annotate.c | 1 +
tools/perf/builtin-diff.c | 1 +
tools/perf/builtin-inject.c | 13 +++++++++
tools/perf/builtin-kmem.c | 1 +
tools/perf/builtin-kvm.c | 2 +
tools/perf/builtin-lock.c | 1 +
tools/perf/builtin-mem.c | 1 +
tools/perf/builtin-record.c | 6 ++++
tools/perf/builtin-report.c | 1 +
tools/perf/builtin-sched.c | 1 +
tools/perf/builtin-script.c | 1 +
tools/perf/builtin-trace.c | 3 +-
tools/perf/perf.h | 1 +
tools/perf/util/Build | 1 +
tools/perf/util/data-convert-bt.c | 1 +
tools/perf/util/event.c | 9 ++++++
tools/perf/util/event.h | 12 ++++++++
tools/perf/util/evsel.c | 3 ++
tools/perf/util/machine.c | 31 +++++++++++++++++++++
tools/perf/util/machine.h | 3 ++
tools/perf/util/namespaces.c | 36 +++++++++++++++++++++++++
tools/perf/util/namespaces.h | 26 ++++++++++++++++++
tools/perf/util/session.c | 7 +++++
tools/perf/util/thread.c | 44 +++++++++++++++++++++++++++++-
tools/perf/util/thread.h | 6 ++++
tools/perf/util/tool.h | 2 +
28 files changed, 245 insertions(+), 4 deletions(-)
create mode 100644 tools/perf/util/namespaces.c
create mode 100644 tools/perf/util/namespaces.h
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index c66a485..bec0aad 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
- __reserved_1 : 36;
+ namespaces : 1, /* include namespaces data */
+ __reserved_1 : 35;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};
+struct perf_ns_link_info {
+ __u64 dev;
+ __u64 ino;
+};
+
+enum {
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
+};
+
enum perf_event_type {
/*
@@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid;
+ * u32 tid;
+ * u64 nr_namespaces;
+ * { u64 dev, inode; } [nr_namespaces];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_NAMESPACES = 16,
+
PERF_RECORD_MAX, /* non-ABI */
};
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b16003e..ea3789d 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can
displayed with the weight and local_weight sort keys. This currently works for TSX
abort events and some memory events in precise mode on modern Intel CPUs.
+--namespaces::
+Record events of type PERF_RECORD_NAMESPACES.
+
--transaction::
Record transaction flags for transaction related events.
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 4f52d85..e54b1f9 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -393,6 +393,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 1b96a31..5e480315 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -364,6 +364,7 @@ static struct perf_tool tool = {
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
};
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index b9bc7e3..8d1d13b 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -333,6 +333,18 @@ static int perf_event__repipe_comm(struct perf_tool *tool,
return err;
}
+static int perf_event__repipe_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err = perf_event__process_namespaces(tool, event, sample, machine);
+
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
static int perf_event__repipe_exit(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -660,6 +672,7 @@ static int __cmd_inject(struct perf_inject *inject)
session->itrace_synth_opts = &inject->itrace_synth_opts;
inject->itrace_synth_opts.inject = true;
inject->tool.comm = perf_event__repipe_comm;
+ inject->tool.namespaces = perf_event__repipe_namespaces;
inject->tool.exit = perf_event__repipe_exit;
inject->tool.id_index = perf_event__repipe_id_index;
inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 6da8d08..d509e74 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -964,6 +964,7 @@ static struct perf_tool perf_kmem = {
.comm = perf_event__process_comm,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 08fa88f..18e6c38 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1044,6 +1044,7 @@ static int read_events(struct perf_kvm_stat *kvm)
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
struct perf_data_file file = {
@@ -1348,6 +1349,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
kvm->tool.exit = perf_event__process_exit;
kvm->tool.fork = perf_event__process_fork;
kvm->tool.lost = process_lost_event;
+ kvm->tool.namespaces = perf_event__process_namespaces;
kvm->tool.ordered_events = true;
perf_tool__fill_defaults(&kvm->tool);
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index ce3bfb4..d750cca 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -858,6 +858,7 @@ static int __cmd_report(bool display_info)
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
struct perf_data_file file = {
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 6114e07..030a6cf 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -342,6 +342,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
.lost = perf_event__process_lost,
.fork = perf_event__process_fork,
.build_id = perf_event__process_build_id,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
},
.input_name = "perf.data",
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bc84a37..99562c7 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -876,6 +876,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
signal(SIGTERM, sig_handler);
signal(SIGSEGV, sigsegv_handler);
+ if (rec->opts.record_namespaces)
+ tool->namespace_events = true;
+
if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
signal(SIGUSR2, snapshot_sig_handler);
if (rec->opts.auxtrace_snapshot_mode)
@@ -1497,6 +1500,7 @@ static struct record record = {
.fork = perf_event__process_fork,
.exit = perf_event__process_exit,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.ordered_events = true,
@@ -1611,6 +1615,8 @@ static struct option __record_options[] = {
"opts", "AUX area tracing Snapshot Mode", ""),
OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
+ OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
+ "Record namespaces events"),
OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
"Record context switch events"),
OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 0a88670..4e68eec 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -701,6 +701,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index b94cf0d..16170e9 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -3272,6 +3272,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
.tool = {
.sample = perf_sched__process_tracepoint_sample,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.lost = perf_event__process_lost,
.fork = perf_sched__process_fork_event,
.ordered_events = true,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c0783b4..f1ce806 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2097,6 +2097,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.attr = process_attr,
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 256f1fa..912fedc 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2415,8 +2415,9 @@ static int trace__replay(struct trace *trace)
trace->tool.exit = perf_event__process_exit;
trace->tool.fork = perf_event__process_fork;
trace->tool.attr = perf_event__process_attr;
- trace->tool.tracing_data = perf_event__process_tracing_data;
+ trace->tool.tracing_data = perf_event__process_tracing_data;
trace->tool.build_id = perf_event__process_build_id;
+ trace->tool.namespaces = perf_event__process_namespaces;
trace->tool.ordered_events = true;
trace->tool.ordering_requires_timestamps = true;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 1c27d94..806c216 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -50,6 +50,7 @@ struct record_opts {
bool running_time;
bool full_auxtrace;
bool auxtrace_snapshot_mode;
+ bool record_namespaces;
bool record_switch_events;
bool all_kernel;
bool all_user;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5da376b..2ea5ee1 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -42,6 +42,7 @@ libperf-y += pstack.o
libperf-y += session.o
libperf-$(CONFIG_AUDIT) += syscalltbl.o
libperf-y += ordered-events.o
+libperf-y += namespaces.o
libperf-y += comm.o
libperf-y += thread.o
libperf-y += thread_map.o
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 4e6cbc9..89ece24 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1468,6 +1468,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
.lost = perf_event__process_lost,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 4ea7ce7..f118eac 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -31,6 +31,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
[PERF_RECORD_SWITCH] = "SWITCH",
[PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
+ [PERF_RECORD_NAMESPACES] = "NAMESPACES",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -1016,6 +1017,14 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
return machine__process_comm_event(machine, event, sample);
}
+int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_namespaces_event(machine, event, sample);
+}
+
int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index c735c53..26efc77 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -39,6 +39,13 @@ struct comm_event {
char comm[16];
};
+struct namespaces_event {
+ struct perf_event_header header;
+ u32 pid, tid;
+ u64 nr_namespaces;
+ struct perf_ns_link_info link_info[];
+};
+
struct fork_event {
struct perf_event_header header;
u32 pid, ppid;
@@ -485,6 +492,7 @@ union perf_event {
struct mmap_event mmap;
struct mmap2_event mmap2;
struct comm_event comm;
+ struct namespaces_event namespaces;
struct fork_event fork;
struct lost_event lost;
struct lost_samples_event lost_samples;
@@ -587,6 +595,10 @@ int perf_event__process_switch(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_event__process_mmap(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ac59710..175dc23 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -932,6 +932,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
+ if (opts->record_namespaces)
+ attr->namespaces = track;
+
if (opts->record_switch_events)
attr->context_switch = track;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b9974fe..16e7325 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -13,6 +13,7 @@
#include <symbol/kallsyms.h>
#include "unwind.h"
#include "linux/hash.h"
+#include "asm/bug.h"
static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
@@ -501,6 +502,34 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
return err;
}
+int machine__process_namespaces_event(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct thread *thread = machine__findnew_thread(machine,
+ event->namespaces.pid,
+ event->namespaces.tid);
+ int err = 0;
+
+ WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES,
+ "\nWARNING: kernel seems to support more namespaces than perf"
+ " tool.\nTry updating the perf tool..\n\n");
+
+ WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES,
+ "\nWARNING: perf tool seems to support more namespaces than"
+ " the kernel.\nTry updating the kernel..\n\n");
+
+ if (thread == NULL ||
+ thread__set_namespaces(thread, sample->time, &event->namespaces)) {
+ dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n");
+ err = -1;
+ }
+
+ thread__put(thread);
+
+ return err;
+}
+
int machine__process_lost_event(struct machine *machine __maybe_unused,
union perf_event *event, struct perf_sample *sample __maybe_unused)
{
@@ -1538,6 +1567,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_comm_event(machine, event, sample); break;
case PERF_RECORD_MMAP:
ret = machine__process_mmap_event(machine, event, sample); break;
+ case PERF_RECORD_NAMESPACES:
+ ret = machine__process_namespaces_event(machine, event, sample); break;
case PERF_RECORD_MMAP2:
ret = machine__process_mmap2_event(machine, event, sample); break;
case PERF_RECORD_FORK:
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index a283050..3cdb134 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -97,6 +97,9 @@ int machine__process_itrace_start_event(struct machine *machine,
union perf_event *event);
int machine__process_switch_event(struct machine *machine,
union perf_event *event);
+int machine__process_namespaces_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
new file mode 100644
index 0000000..2de8da6
--- /dev/null
+++ b/tools/perf/util/namespaces.c
@@ -0,0 +1,36 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#include "namespaces.h"
+#include "util.h"
+#include "event.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+struct namespaces *namespaces__new(struct namespaces_event *event)
+{
+ struct namespaces *namespaces;
+ u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) *
+ sizeof(struct perf_ns_link_info));
+
+ namespaces = zalloc(sizeof(struct namespaces) + link_info_size);
+ if (!namespaces)
+ return NULL;
+
+ namespaces->end_time = -1;
+
+ if (event)
+ memcpy(namespaces->link_info, event->link_info, link_info_size);
+
+ return namespaces;
+}
+
+void namespaces__free(struct namespaces *namespaces)
+{
+ free(namespaces);
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
new file mode 100644
index 0000000..468f1e9
--- /dev/null
+++ b/tools/perf/util/namespaces.h
@@ -0,0 +1,26 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#ifndef __PERF_NAMESPACES_H
+#define __PERF_NAMESPACES_H
+
+#include "../perf.h"
+#include <linux/list.h>
+
+struct namespaces_event;
+
+struct namespaces {
+ struct list_head list;
+ u64 end_time;
+ struct perf_ns_link_info link_info[];
+};
+
+struct namespaces *namespaces__new(struct namespaces_event *event);
+void namespaces__free(struct namespaces *namespaces);
+
+#endif /* __PERF_NAMESPACES_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1dd617d..ae42e74 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1239,6 +1239,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->mmap2(tool, event, sample, machine);
case PERF_RECORD_COMM:
return tool->comm(tool, event, sample, machine);
+ case PERF_RECORD_NAMESPACES:
+ return tool->namespaces(tool, event, sample, machine);
case PERF_RECORD_FORK:
return tool->fork(tool, event, sample, machine);
case PERF_RECORD_EXIT:
@@ -1494,6 +1496,11 @@ int perf_session__register_idle_thread(struct perf_session *session)
err = -1;
}
+ if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) {
+ pr_err("problem inserting idle task.\n");
+ err = -1;
+ }
+
/* machine__findnew_thread() got the thread, so put it */
thread__put(thread);
return err;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 74e79d2..dcdb87a 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,6 +7,7 @@
#include "thread-stack.h"
#include "util.h"
#include "debug.h"
+#include "namespaces.h"
#include "comm.h"
#include "unwind.h"
@@ -40,6 +41,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread->tid = tid;
thread->ppid = -1;
thread->cpu = -1;
+ INIT_LIST_HEAD(&thread->namespaces_list);
INIT_LIST_HEAD(&thread->comm_list);
comm_str = malloc(32);
@@ -66,7 +68,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
void thread__delete(struct thread *thread)
{
- struct comm *comm, *tmp;
+ struct namespaces *namespaces, *tmp_namespaces;
+ struct comm *comm, *tmp_comm;
BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
@@ -76,7 +79,12 @@ void thread__delete(struct thread *thread)
map_groups__put(thread->mg);
thread->mg = NULL;
}
- list_for_each_entry_safe(comm, tmp, &thread->comm_list, list) {
+ list_for_each_entry_safe(namespaces, tmp_namespaces,
+ &thread->namespaces_list, list) {
+ list_del(&namespaces->list);
+ namespaces__free(namespaces);
+ }
+ list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
list_del(&comm->list);
comm__free(comm);
}
@@ -104,6 +112,38 @@ void thread__put(struct thread *thread)
}
}
+struct namespaces *thread__namespaces(const struct thread *thread)
+{
+ if (list_empty(&thread->namespaces_list))
+ return NULL;
+
+ return list_first_entry(&thread->namespaces_list, struct namespaces, list);
+}
+
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+ struct namespaces_event *event)
+{
+ struct namespaces *new, *curr = thread__namespaces(thread);
+
+ new = namespaces__new(event);
+ if (!new)
+ return -ENOMEM;
+
+ list_add(&new->list, &thread->namespaces_list);
+
+ if (timestamp && curr) {
+ /*
+ * setns syscall must have changed few or all the namespaces
+ * of this thread. Update end time for the namespaces
+ * previously used.
+ */
+ curr = list_next_entry(new, list);
+ curr->end_time = timestamp;
+ }
+
+ return 0;
+}
+
struct comm *thread__comm(const struct thread *thread)
{
if (list_empty(&thread->comm_list))
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index e571885..4eb849e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -28,6 +28,7 @@ struct thread {
bool comm_set;
int comm_len;
bool dead; /* if set thread has exited */
+ struct list_head namespaces_list;
struct list_head comm_list;
u64 db_id;
@@ -40,6 +41,7 @@ struct thread {
};
struct machine;
+struct namespaces;
struct comm;
struct thread *thread__new(pid_t pid, pid_t tid);
@@ -62,6 +64,10 @@ static inline void thread__exited(struct thread *thread)
thread->dead = true;
}
+struct namespaces *thread__namespaces(const struct thread *thread);
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+ struct namespaces_event *event);
+
int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp,
bool exec);
static inline int thread__set_comm(struct thread *thread, const char *comm,
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index ac2590a..829471a 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -40,6 +40,7 @@ struct perf_tool {
event_op mmap,
mmap2,
comm,
+ namespaces,
fork,
exit,
lost,
@@ -66,6 +67,7 @@ struct perf_tool {
event_op3 auxtrace;
bool ordered_events;
bool ordering_requires_timestamps;
+ bool namespace_events;
};
#endif /* __PERF_TOOL_H */
Add print support for events of type PERF_RECORD_NAMESPACES.
Acked-by: Jiri Olsa <[email protected]>
Signed-off-by: Hari Bathini <[email protected]>
---
tools/perf/util/event.c | 30 ++++++++++++++++++++++++++++++
tools/perf/util/event.h | 1 +
tools/perf/util/machine.c | 3 +++
3 files changed, 34 insertions(+)
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 5a99af8..022587e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1108,6 +1108,33 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid);
}
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
+{
+ size_t ret = 0;
+ struct perf_ns_link_info *ns_link_info;
+ u32 nr_namespaces, idx;
+
+ ns_link_info = event->namespaces.link_info;
+ nr_namespaces = event->namespaces.nr_namespaces;
+
+ ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[",
+ event->namespaces.pid,
+ event->namespaces.tid,
+ nr_namespaces);
+
+ for (idx = 0; idx < nr_namespaces; idx++) {
+ if (idx && (idx % 4 == 0))
+ ret += fprintf(fp, "\n\t\t ");
+
+ ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx,
+ perf_ns__name(idx), (u64)ns_link_info[idx].dev,
+ (u64)ns_link_info[idx].ino,
+ ((idx + 1) != nr_namespaces) ? ", " : "]\n");
+ }
+
+ return ret;
+}
+
int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -1304,6 +1331,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_MMAP:
ret += perf_event__fprintf_mmap(event, fp);
break;
+ case PERF_RECORD_NAMESPACES:
+ ret += perf_event__fprintf_namespaces(event, fp);
+ break;
case PERF_RECORD_MMAP2:
ret += perf_event__fprintf_mmap2(event, fp);
break;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index a46a1b4..e1d8166 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -671,6 +671,7 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
u64 kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 16e7325..dfc6004 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -519,6 +519,9 @@ int machine__process_namespaces_event(struct machine *machine __maybe_unused,
"\nWARNING: perf tool seems to support more namespaces than"
" the kernel.\nTry updating the kernel..\n\n");
+ if (dump_trace)
+ perf_event__fprintf_namespaces(event, stdout);
+
if (thread == NULL ||
thread__set_namespaces(thread, sample->time, &event->namespaces)) {
dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n");
Synthesize PERF_RECORD_NAMESPACES events for processes that were
running prior to invocation of perf record. The data for this is
taken from /proc/$PID/ns. These changes make way for analyzing
events with regard to namespaces.
Acked-by: Jiri Olsa <[email protected]>
Signed-off-by: Hari Bathini <[email protected]>
---
tools/perf/builtin-record.c | 29 ++++++++++-
tools/perf/util/event.c | 111 +++++++++++++++++++++++++++++++++++++++++--
tools/perf/util/event.h | 6 ++
3 files changed, 136 insertions(+), 10 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 99562c7..04faef7 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -986,6 +986,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
*/
if (forks) {
union perf_event *event;
+ pid_t tgid;
event = malloc(sizeof(event->comm) + machine->id_hdr_size);
if (event == NULL) {
@@ -999,10 +1000,30 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* cannot see a correct process name for those events.
* Synthesize COMM event to prevent it.
*/
- perf_event__synthesize_comm(tool, event,
- rec->evlist->workload.pid,
- process_synthesized_event,
- machine);
+ tgid = perf_event__synthesize_comm(tool, event,
+ rec->evlist->workload.pid,
+ process_synthesized_event,
+ machine);
+ free(event);
+
+ if (tgid == -1)
+ goto out_child;
+
+ event = malloc(sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (event == NULL) {
+ err = -ENOMEM;
+ goto out_child;
+ }
+
+ /*
+ * Synthesize NAMESPACES event for the command specified.
+ */
+ perf_event__synthesize_namespaces(tool, event,
+ rec->evlist->workload.pid,
+ tgid, process_synthesized_event,
+ machine);
free(event);
perf_evlist__start_workload(rec->evlist);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index f118eac..5a99af8 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -50,6 +50,16 @@ static const char *perf_event__names[] = {
[PERF_RECORD_TIME_CONV] = "TIME_CONV",
};
+static const char *perf_ns__names[] = {
+ [NET_NS_INDEX] = "net",
+ [UTS_NS_INDEX] = "uts",
+ [IPC_NS_INDEX] = "ipc",
+ [PID_NS_INDEX] = "pid",
+ [USER_NS_INDEX] = "user",
+ [MNT_NS_INDEX] = "mnt",
+ [CGROUP_NS_INDEX] = "cgroup",
+};
+
const char *perf_event__name(unsigned int id)
{
if (id >= ARRAY_SIZE(perf_event__names))
@@ -59,6 +69,13 @@ const char *perf_event__name(unsigned int id)
return perf_event__names[id];
}
+static const char *perf_ns__name(unsigned int id)
+{
+ if (id >= ARRAY_SIZE(perf_ns__names))
+ return "UNKNOWN";
+ return perf_ns__names[id];
+}
+
static int perf_tool__process_synth_event(struct perf_tool *tool,
union perf_event *event,
struct machine *machine,
@@ -204,6 +221,58 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
return tgid;
}
+static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
+ struct perf_ns_link_info *ns_link_info)
+{
+ struct stat64 st;
+ char proc_ns[128];
+
+ sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
+ if (stat64(proc_ns, &st) == 0) {
+ ns_link_info->dev = st.st_dev;
+ ns_link_info->ino = st.st_ino;
+ }
+}
+
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ u32 idx;
+ struct perf_ns_link_info *ns_link_info;
+
+ if (!tool->namespace_events)
+ return 0;
+
+ memset(&event->namespaces, 0, (sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size));
+
+ event->namespaces.pid = tgid;
+ event->namespaces.tid = pid;
+
+ event->namespaces.nr_namespaces = NR_NAMESPACES;
+
+ ns_link_info = event->namespaces.link_info;
+
+ for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
+ perf_event__get_ns_link_info(pid, perf_ns__name(idx),
+ &ns_link_info[idx]);
+
+ event->namespaces.header.type = PERF_RECORD_NAMESPACES;
+
+ event->namespaces.header.size = (sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+ return -1;
+
+ return 0;
+}
+
static int perf_event__synthesize_fork(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid, pid_t ppid,
@@ -435,8 +504,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
static int __event__synthesize_thread(union perf_event *comm_event,
union perf_event *mmap_event,
union perf_event *fork_event,
+ union perf_event *namespaces_event,
pid_t pid, int full,
- perf_event__handler_t process,
+ perf_event__handler_t process,
struct perf_tool *tool,
struct machine *machine,
bool mmap_data,
@@ -456,6 +526,11 @@ static int __event__synthesize_thread(union perf_event *comm_event,
if (tgid == -1)
return -1;
+ if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
+ tgid, process, machine) < 0)
+ return -1;
+
+
return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
process, machine, mmap_data,
proc_map_timeout);
@@ -489,6 +564,11 @@ static int __event__synthesize_thread(union perf_event *comm_event,
if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
ppid, process, machine) < 0)
break;
+
+ if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
+ tgid, process, machine) < 0)
+ break;
+
/*
* Send the prepared comm event
*/
@@ -517,6 +597,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
unsigned int proc_map_timeout)
{
union perf_event *comm_event, *mmap_event, *fork_event;
+ union perf_event *namespaces_event;
int err = -1, thread, j;
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
@@ -531,10 +612,16 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
if (fork_event == NULL)
goto out_free_mmap;
+ namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (namespaces_event == NULL)
+ goto out_free_fork;
+
err = 0;
for (thread = 0; thread < threads->nr; ++thread) {
if (__event__synthesize_thread(comm_event, mmap_event,
- fork_event,
+ fork_event, namespaces_event,
thread_map__pid(threads, thread), 0,
process, tool, machine,
mmap_data, proc_map_timeout)) {
@@ -560,7 +647,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
/* if not, generate events for it */
if (need_leader &&
__event__synthesize_thread(comm_event, mmap_event,
- fork_event,
+ fork_event, namespaces_event,
comm_event->comm.pid, 0,
process, tool, machine,
mmap_data, proc_map_timeout)) {
@@ -569,6 +656,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
}
}
}
+ free(namespaces_event);
+out_free_fork:
free(fork_event);
out_free_mmap:
free(mmap_event);
@@ -588,6 +677,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
char proc_path[PATH_MAX];
struct dirent *dirent;
union perf_event *comm_event, *mmap_event, *fork_event;
+ union perf_event *namespaces_event;
int err = -1;
if (machine__is_default_guest(machine))
@@ -605,11 +695,17 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
if (fork_event == NULL)
goto out_free_mmap;
+ namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (namespaces_event == NULL)
+ goto out_free_fork;
+
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
proc = opendir(proc_path);
if (proc == NULL)
- goto out_free_fork;
+ goto out_free_namespaces;
while ((dirent = readdir(proc)) != NULL) {
char *end;
@@ -621,13 +717,16 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
* We may race with exiting thread, so don't stop just because
* one thread couldn't be synthesized.
*/
- __event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
- 1, process, tool, machine, mmap_data,
+ __event__synthesize_thread(comm_event, mmap_event, fork_event,
+ namespaces_event, pid, 1, process,
+ tool, machine, mmap_data,
proc_map_timeout);
}
err = 0;
closedir(proc);
+out_free_namespaces:
+ free(namespaces_event);
out_free_fork:
free(fork_event);
out_free_mmap:
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 26efc77..a46a1b4 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -648,6 +648,12 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine);
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct machine *machine);
+
int perf_event__synthesize_mmap_events(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
Introduce a new option to display events of type PERF_RECORD_NAMESPACES
and update perf-script documentation accordingly. Shown below is output
(trimmed) of perf script command with the newly introduced option, on
perf.data generated with perf record command using --namespaces option.
$ perf script --show-namespace-events
swapper 0 [000] 0.000000: PERF_RECORD_NAMESPACES 1/1 - nr_namespaces: 7
[0/net: 3/0xf000001c, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc,
4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb]
swapper 0 [000] 0.000000: PERF_RECORD_NAMESPACES 2/2 - nr_namespaces: 7
[0/net: 3/0xf000001c, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc,
4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb]
Acked-by: Jiri Olsa <[email protected]>
Signed-off-by: Hari Bathini <[email protected]>
---
tools/perf/Documentation/perf-script.txt | 3 ++
tools/perf/builtin-script.c | 40 ++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 4ed5f23..62c9b0c 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -248,6 +248,9 @@ OPTIONS
--show-mmap-events
Display mmap related events (e.g. MMAP, MMAP2).
+--show-namespace-events
+ Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
+
--show-switch-events
Display context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index f1ce806..66d62c9 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -830,6 +830,7 @@ struct perf_script {
bool show_task_events;
bool show_mmap_events;
bool show_switch_events;
+ bool show_namespace_events;
bool allocated;
struct cpu_map *cpus;
struct thread_map *threads;
@@ -1118,6 +1119,41 @@ static int process_comm_event(struct perf_tool *tool,
return ret;
}
+static int process_namespaces_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+ int ret = -1;
+
+ thread = machine__findnew_thread(machine, event->namespaces.pid,
+ event->namespaces.tid);
+ if (thread == NULL) {
+ pr_debug("problem processing NAMESPACES event, skipping it.\n");
+ return -1;
+ }
+
+ if (perf_event__process_namespaces(tool, event, sample, machine) < 0)
+ goto out;
+
+ if (!evsel->attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = 0;
+ sample->tid = event->namespaces.tid;
+ sample->pid = event->namespaces.pid;
+ }
+ print_sample_start(sample, thread, evsel);
+ perf_event__fprintf(event, stdout);
+ ret = 0;
+out:
+ thread__put(thread);
+ return ret;
+}
+
static int process_fork_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -1293,6 +1329,8 @@ static int __cmd_script(struct perf_script *script)
}
if (script->show_switch_events)
script->tool.context_switch = process_switch_event;
+ if (script->show_namespace_events)
+ script->tool.namespaces = process_namespaces_event;
ret = perf_session__process_events(script->session);
@@ -2181,6 +2219,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"Show the mmap events"),
OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
"Show context switch events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
+ "Show namespace events (if recorded)"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
OPT_BOOLEAN(0, "ns", &nanosecs,
"Use 9 decimal places when displaying time"),
This patch introduces a cgroup identifier entry field in perf report to
identify or distinguish data of different cgroups. It uses the device
number and inode number of cgroup namespace, included in perf data with
the new PERF_RECORD_NAMESPACES event, as cgroup identifier.
With the assumption that each container is created with it's own cgroup
namespace, this allows assessment/analysis of multiple containers at
once. A simple test for this would be to clone a few processes passing
SIGCHILD & CLONE_NEWCROUP flags to each of them, execute shell and run
different workloads on each of those contexts, while running perf
record command with --namespaces option. Shown below is the output of
perf report, sorted with cgroup identifier, on perf.data generated with
the above test scenario, clearly indicating one context's considerable
use of kernel memory in comparison with others:
$ perf report -s cgroup_id,sample --stdio
#
# Total Lost Samples: 0
#
# Samples: 5K of event 'kmem:kmalloc'
# Event count (approx.): 5965
#
# Overhead cgroup id (dev/inode) Samples
# ........ ..................... ............
#
81.27% 3/0xeffffffb 4848
16.24% 3/0xf00000d0 969
1.16% 3/0xf00000ce 69
0.82% 3/0xf00000cf 49
0.50% 0/0x0 30
While this is a start, there is further scope of improving this. For
example, instead of cgroup namespace's device and inode numbers, dev
and inode numbers of some or all namespaces may be used to distinguish
which processes are running in a given container context. Also, scripts
to map device and inode info to containers sounds plausible for better
tracing of containers.
Signed-off-by: Hari Bathini <[email protected]>
---
tools/perf/Documentation/perf-report.txt | 4 ++-
tools/perf/util/hist.c | 7 +++++
tools/perf/util/hist.h | 1 +
tools/perf/util/sort.c | 41 ++++++++++++++++++++++++++++++
tools/perf/util/sort.h | 7 +++++
5 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 33f9190..9eaea2e 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -72,7 +72,8 @@ OPTIONS
--sort=::
Sort histogram entries by given key(s) - multiple keys can be specified
in CSV format. Following sort keys are available:
- pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
+ pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
+ local_weight, cgroup_id.
Each key has following meaning:
@@ -92,6 +93,7 @@ OPTIONS
- weight: Event specific weight, e.g. memory latency or transaction
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
+ - cgroup_id: ID derived from cgroup namespace device and inode numbers.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index eaf72a9..e3b38f6 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -3,6 +3,7 @@
#include "hist.h"
#include "map.h"
#include "session.h"
+#include "namespaces.h"
#include "sort.h"
#include "evlist.h"
#include "evsel.h"
@@ -169,6 +170,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
}
+ hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
hists__new_col_len(hists, HISTC_CPU, 3);
hists__new_col_len(hists, HISTC_SOCKET, 6);
hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
@@ -574,9 +576,14 @@ __hists__add_entry(struct hists *hists,
bool sample_self,
struct hist_entry_ops *ops)
{
+ struct namespaces *ns = thread__namespaces(al->thread);
struct hist_entry entry = {
.thread = al->thread,
.comm = thread__comm(al->thread),
+ .cgroup_id = {
+ .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
+ .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
+ },
.ms = {
.map = al->map,
.sym = al->sym,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 2e839bf..ee3670a 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -30,6 +30,7 @@ enum hist_column {
HISTC_DSO,
HISTC_THREAD,
HISTC_COMM,
+ HISTC_CGROUP_ID,
HISTC_PARENT,
HISTC_CPU,
HISTC_SOCKET,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f8f16c0..d8c7489 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -536,6 +536,46 @@ struct sort_entry sort_cpu = {
.se_width_idx = HISTC_CPU,
};
+/* --sort cgroup_id */
+
+static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
+{
+ return (int64_t)(right_dev - left_dev);
+}
+
+static int64_t _sort__cgroup_inode_cmp(u64 left_ino, u64 right_ino)
+{
+ return (int64_t)(right_ino - left_ino);
+}
+
+static int64_t
+sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ int64_t ret;
+
+ ret = _sort__cgroup_dev_cmp(right->cgroup_id.dev, left->cgroup_id.dev);
+ if (ret != 0)
+ return ret;
+
+ return _sort__cgroup_inode_cmp(right->cgroup_id.ino,
+ left->cgroup_id.ino);
+}
+
+static int hist_entry__cgroup_id_snprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ unsigned int width __maybe_unused)
+{
+ return repsep_snprintf(bf, size, "%lu/0x%lx", he->cgroup_id.dev,
+ he->cgroup_id.ino);
+}
+
+struct sort_entry sort_cgroup_id = {
+ .se_header = "cgroup id (dev/inode)",
+ .se_cmp = sort__cgroup_id_cmp,
+ .se_snprintf = hist_entry__cgroup_id_snprintf,
+ .se_width_idx = HISTC_CGROUP_ID,
+};
+
/* --sort socket */
static int64_t
@@ -1459,6 +1499,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_TRANSACTION, "transaction", sort_transaction),
DIM(SORT_TRACE, "trace", sort_trace),
DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
+ DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
};
#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index f583325..baf20a3 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -54,6 +54,11 @@ struct he_stat {
u32 nr_events;
};
+struct namespace_id {
+ u64 dev;
+ u64 ino;
+};
+
struct hist_entry_diff {
bool computed;
union {
@@ -91,6 +96,7 @@ struct hist_entry {
struct map_symbol ms;
struct thread *thread;
struct comm *comm;
+ struct namespace_id cgroup_id;
u64 ip;
u64 transaction;
s32 socket;
@@ -212,6 +218,7 @@ enum sort_type {
SORT_TRANSACTION,
SORT_TRACE,
SORT_SYM_SIZE,
+ SORT_CGROUP_ID,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
With the advert of container technologies like docker, that depend
on namespaces for isolation, there is a need for tracing support for
namespaces. This patch introduces new PERF_RECORD_NAMESPACES event
for recording namespaces related info. By recording info for every
namespace, it is left to userspace to take a call on the definition
of a container and trace containers by updating perf tool accordingly.
Each namespace has a combination of device and inode numbers. Though
every namespace has the same device number currently, that may change
in future to avoid the need for a namespace of namespaces. Considering
such possibility, record both device and inode numbers separately for
each namespace.
Acked-by: Jiri Olsa <[email protected]>
Acked-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Hari Bathini <[email protected]>
---
include/linux/perf_event.h | 2 +
include/uapi/linux/perf_event.h | 32 +++++++++
kernel/events/core.c | 139 +++++++++++++++++++++++++++++++++++++++
kernel/fork.c | 2 +
kernel/nsproxy.c | 3 +
5 files changed, 177 insertions(+), 1 deletion(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 000fdb2..f19a823 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
+extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);
/* Callchains */
@@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
+static inline void perf_event_namespaces(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c66a485..bec0aad 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
- __reserved_1 : 36;
+ namespaces : 1, /* include namespaces data */
+ __reserved_1 : 35;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};
+struct perf_ns_link_info {
+ __u64 dev;
+ __u64 ino;
+};
+
+enum {
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
+};
+
enum perf_event_type {
/*
@@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid;
+ * u32 tid;
+ * u64 nr_namespaces;
+ * { u64 dev, inode; } [nr_namespaces];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_NAMESPACES = 16,
+
PERF_RECORD_MAX, /* non-ABI */
};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6f41548f..16c877a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -48,6 +48,8 @@
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
+#include <linux/proc_ns.h>
+#include <linux/mount.h>
#include "internal.h"
@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
+static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
+ if (event->attr.namespaces)
+ atomic_dec(&nr_namespaces_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task)
{
perf_event_task(task, NULL, 1);
+ perf_event_namespaces(task);
}
/*
@@ -6593,6 +6599,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
}
/*
+ * namespaces tracking
+ */
+
+struct perf_namespaces_event {
+ struct task_struct *task;
+
+ struct {
+ struct perf_event_header header;
+
+ u32 pid;
+ u32 tid;
+ u64 nr_namespaces;
+ struct perf_ns_link_info link_info[NR_NAMESPACES];
+ } event_id;
+};
+
+static int perf_event_namespaces_match(struct perf_event *event)
+{
+ return event->attr.namespaces;
+}
+
+static void perf_event_namespaces_output(struct perf_event *event,
+ void *data)
+{
+ struct perf_namespaces_event *namespaces_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ if (!perf_event_namespaces_match(event))
+ return;
+
+ perf_event_header__init_id(&namespaces_event->event_id.header,
+ &sample, event);
+ ret = perf_output_begin(&handle, event,
+ namespaces_event->event_id.header.size);
+ if (ret)
+ return;
+
+ namespaces_event->event_id.pid = perf_event_pid(event,
+ namespaces_event->task);
+ namespaces_event->event_id.tid = perf_event_tid(event,
+ namespaces_event->task);
+
+ perf_output_put(&handle, namespaces_event->event_id);
+
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
+static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
+ struct task_struct *task,
+ const struct proc_ns_operations *ns_ops)
+{
+ struct path ns_path;
+ struct inode *ns_inode;
+ void *error;
+
+ error = ns_get_path(&ns_path, task, ns_ops);
+ if (!error) {
+ ns_inode = ns_path.dentry->d_inode;
+ ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
+ ns_link_info->ino = ns_inode->i_ino;
+ }
+}
+
+void perf_event_namespaces(struct task_struct *task)
+{
+ struct perf_namespaces_event namespaces_event;
+ struct perf_ns_link_info *ns_link_info;
+
+ if (!atomic_read(&nr_namespaces_events))
+ return;
+
+ namespaces_event = (struct perf_namespaces_event){
+ .task = task,
+ .event_id = {
+ .header = {
+ .type = PERF_RECORD_NAMESPACES,
+ .misc = 0,
+ .size = sizeof(namespaces_event.event_id),
+ },
+ /* .pid */
+ /* .tid */
+ .nr_namespaces = NR_NAMESPACES,
+ /* .link_info[NR_NAMESPACES] */
+ },
+ };
+
+ ns_link_info = namespaces_event.event_id.link_info;
+
+ perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
+ task, &mntns_operations);
+
+#ifdef CONFIG_USER_NS
+ perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
+ task, &userns_operations);
+#endif
+#ifdef CONFIG_NET_NS
+ perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
+ task, &netns_operations);
+#endif
+#ifdef CONFIG_UTS_NS
+ perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
+ task, &utsns_operations);
+#endif
+#ifdef CONFIG_IPC_NS
+ perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
+ task, &ipcns_operations);
+#endif
+#ifdef CONFIG_PID_NS
+ perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
+ task, &pidns_operations);
+#endif
+#ifdef CONFIG_CGROUPS
+ perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
+ task, &cgroupns_operations);
+#endif
+
+ perf_iterate_sb(perf_event_namespaces_output,
+ &namespaces_event,
+ NULL);
+}
+
+/*
* mmap tracking
*/
@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
+ if (event->attr.namespaces)
+ atomic_inc(&nr_namespaces_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq)
@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
return -EACCES;
}
+ if (attr.namespaces) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ }
+
if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;
diff --git a/kernel/fork.c b/kernel/fork.c
index 6c463c80..afa2947 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
}
}
+ perf_event_namespaces(current);
+
bad_unshare_cleanup_cred:
if (new_cred)
put_cred(new_cred);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 782102e..f6c5d33 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/cgroup.h>
+#include <linux/perf_event.h>
static struct kmem_cache *nsproxy_cachep;
@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
goto out;
}
switch_task_namespaces(tsk, new_nsproxy);
+
+ perf_event_namespaces(tsk);
out:
fput(file);
return err;
Em Wed, Mar 08, 2017 at 02:11:36AM +0530, Hari Bathini escreveu:
> @@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
> return -EACCES;
> }
>
> + if (attr.namespaces) {
> + if (!capable(CAP_SYS_ADMIN))
> + return -EACCES;
> + }
> +
So, testing the first tooling patch I get:
[acme@jouet linux]$ perf record --namespaces ls
Error:
You may not have permission to collect stats.
Consider tweaking /proc/sys/kernel/perf_event_paranoid,
which controls use of the performance events system by
unprivileged users (without CAP_SYS_ADMIN).
The current value is 2:
-1: Allow use of (almost) all events by all users
>= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
>= 1: Disallow CPU event access by users without CAP_SYS_ADMIN
>= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN
To make this setting permanent, edit /etc/sysctl.conf too, e.g.:
kernel.perf_event_paranoid = -1
[acme@jouet linux]$
----------------------- 8< ---------------------------------------
So this specific case is not related to perf_event_paranoid, we need to
improve error handling here, i.e. see attr.namespaces, !CAP_SYS_ADMIN,
-EACCESS at sys_perf_event_open() -> better error message.
And perhaps we make an extra check about perf_event_paranoid... Ingo? Peter?
Can be done as a follow up patch tho, just adding entry to the collective TODO
list.
- Arnaldo
Em Wed, Mar 08, 2017 at 02:11:51AM +0530, Hari Bathini escreveu:
> Synthesize PERF_RECORD_NAMESPACES events for processes that were
> running prior to invocation of perf record. The data for this is
> taken from /proc/$PID/ns. These changes make way for analyzing
> events with regard to namespaces.
Will investigate...
[root@jouet ~]# perf test Lookup
26: Lookup mmap thread : FAILED!
[root@jouet ~]# perf test -v Lookup
26: Lookup mmap thread :
--- start ---
test child forked, pid 3413
tid = 3413, map = 0x7fd99225d000
tid = 3414, map = 0x7fd99225c000
tid = 3415, map = 0x7fd99225b000
tid = 3416, map = 0x7fd99225a000
perf: Segmentation fault
Obtained 16 stack frames.
perf(dump_stack+0x2d) [0x53dee6]
perf(sighandler_dump_stack+0x2d) [0x53dfc6]
/lib64/libc.so.6(+0x3598f) [0x7fd98f48898f]
perf(perf_event__synthesize_namespaces+0x32) [0x4c808b]
perf() [0x4c8de1]
perf(perf_event__synthesize_threads+0x20c) [0x4c935d]
perf() [0x4aa6e8]
perf() [0x4aa7fe]
perf(test__mmap_thread_lookup+0x23) [0x4aa9e9]
perf() [0x48daf8]
perf() [0x48dc2e]
perf() [0x48deb5]
perf(cmd_test+0x233) [0x48e340]
perf() [0x4b90fa]
perf() [0x4b9367]
perf() [0x4b94ac]
test child interrupted
---- end ----
Lookup mmap thread: FAILED!
[root@jouet ~]#
> Acked-by: Jiri Olsa <[email protected]>
> Signed-off-by: Hari Bathini <[email protected]>
> ---
> tools/perf/builtin-record.c | 29 ++++++++++-
> tools/perf/util/event.c | 111 +++++++++++++++++++++++++++++++++++++++++--
> tools/perf/util/event.h | 6 ++
> 3 files changed, 136 insertions(+), 10 deletions(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 99562c7..04faef7 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -986,6 +986,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
> */
> if (forks) {
> union perf_event *event;
> + pid_t tgid;
>
> event = malloc(sizeof(event->comm) + machine->id_hdr_size);
> if (event == NULL) {
> @@ -999,10 +1000,30 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
> * cannot see a correct process name for those events.
> * Synthesize COMM event to prevent it.
> */
> - perf_event__synthesize_comm(tool, event,
> - rec->evlist->workload.pid,
> - process_synthesized_event,
> - machine);
> + tgid = perf_event__synthesize_comm(tool, event,
> + rec->evlist->workload.pid,
> + process_synthesized_event,
> + machine);
> + free(event);
> +
> + if (tgid == -1)
> + goto out_child;
> +
> + event = malloc(sizeof(event->namespaces) +
> + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
> + machine->id_hdr_size);
> + if (event == NULL) {
> + err = -ENOMEM;
> + goto out_child;
> + }
> +
> + /*
> + * Synthesize NAMESPACES event for the command specified.
> + */
> + perf_event__synthesize_namespaces(tool, event,
> + rec->evlist->workload.pid,
> + tgid, process_synthesized_event,
> + machine);
> free(event);
>
> perf_evlist__start_workload(rec->evlist);
> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> index f118eac..5a99af8 100644
> --- a/tools/perf/util/event.c
> +++ b/tools/perf/util/event.c
> @@ -50,6 +50,16 @@ static const char *perf_event__names[] = {
> [PERF_RECORD_TIME_CONV] = "TIME_CONV",
> };
>
> +static const char *perf_ns__names[] = {
> + [NET_NS_INDEX] = "net",
> + [UTS_NS_INDEX] = "uts",
> + [IPC_NS_INDEX] = "ipc",
> + [PID_NS_INDEX] = "pid",
> + [USER_NS_INDEX] = "user",
> + [MNT_NS_INDEX] = "mnt",
> + [CGROUP_NS_INDEX] = "cgroup",
> +};
> +
> const char *perf_event__name(unsigned int id)
> {
> if (id >= ARRAY_SIZE(perf_event__names))
> @@ -59,6 +69,13 @@ const char *perf_event__name(unsigned int id)
> return perf_event__names[id];
> }
>
> +static const char *perf_ns__name(unsigned int id)
> +{
> + if (id >= ARRAY_SIZE(perf_ns__names))
> + return "UNKNOWN";
> + return perf_ns__names[id];
> +}
> +
> static int perf_tool__process_synth_event(struct perf_tool *tool,
> union perf_event *event,
> struct machine *machine,
> @@ -204,6 +221,58 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
> return tgid;
> }
>
> +static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
> + struct perf_ns_link_info *ns_link_info)
> +{
> + struct stat64 st;
> + char proc_ns[128];
> +
> + sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
> + if (stat64(proc_ns, &st) == 0) {
> + ns_link_info->dev = st.st_dev;
> + ns_link_info->ino = st.st_ino;
> + }
> +}
> +
> +int perf_event__synthesize_namespaces(struct perf_tool *tool,
> + union perf_event *event,
> + pid_t pid, pid_t tgid,
> + perf_event__handler_t process,
> + struct machine *machine)
> +{
> + u32 idx;
> + struct perf_ns_link_info *ns_link_info;
> +
> + if (!tool->namespace_events)
> + return 0;
> +
> + memset(&event->namespaces, 0, (sizeof(event->namespaces) +
> + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
> + machine->id_hdr_size));
> +
> + event->namespaces.pid = tgid;
> + event->namespaces.tid = pid;
> +
> + event->namespaces.nr_namespaces = NR_NAMESPACES;
> +
> + ns_link_info = event->namespaces.link_info;
> +
> + for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
> + perf_event__get_ns_link_info(pid, perf_ns__name(idx),
> + &ns_link_info[idx]);
> +
> + event->namespaces.header.type = PERF_RECORD_NAMESPACES;
> +
> + event->namespaces.header.size = (sizeof(event->namespaces) +
> + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
> + machine->id_hdr_size);
> +
> + if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
> + return -1;
> +
> + return 0;
> +}
> +
> static int perf_event__synthesize_fork(struct perf_tool *tool,
> union perf_event *event,
> pid_t pid, pid_t tgid, pid_t ppid,
> @@ -435,8 +504,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
> static int __event__synthesize_thread(union perf_event *comm_event,
> union perf_event *mmap_event,
> union perf_event *fork_event,
> + union perf_event *namespaces_event,
> pid_t pid, int full,
> - perf_event__handler_t process,
> + perf_event__handler_t process,
> struct perf_tool *tool,
> struct machine *machine,
> bool mmap_data,
> @@ -456,6 +526,11 @@ static int __event__synthesize_thread(union perf_event *comm_event,
> if (tgid == -1)
> return -1;
>
> + if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
> + tgid, process, machine) < 0)
> + return -1;
> +
> +
> return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
> process, machine, mmap_data,
> proc_map_timeout);
> @@ -489,6 +564,11 @@ static int __event__synthesize_thread(union perf_event *comm_event,
> if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
> ppid, process, machine) < 0)
> break;
> +
> + if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
> + tgid, process, machine) < 0)
> + break;
> +
> /*
> * Send the prepared comm event
> */
> @@ -517,6 +597,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
> unsigned int proc_map_timeout)
> {
> union perf_event *comm_event, *mmap_event, *fork_event;
> + union perf_event *namespaces_event;
> int err = -1, thread, j;
>
> comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
> @@ -531,10 +612,16 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
> if (fork_event == NULL)
> goto out_free_mmap;
>
> + namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
> + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
> + machine->id_hdr_size);
> + if (namespaces_event == NULL)
> + goto out_free_fork;
> +
> err = 0;
> for (thread = 0; thread < threads->nr; ++thread) {
> if (__event__synthesize_thread(comm_event, mmap_event,
> - fork_event,
> + fork_event, namespaces_event,
> thread_map__pid(threads, thread), 0,
> process, tool, machine,
> mmap_data, proc_map_timeout)) {
> @@ -560,7 +647,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
> /* if not, generate events for it */
> if (need_leader &&
> __event__synthesize_thread(comm_event, mmap_event,
> - fork_event,
> + fork_event, namespaces_event,
> comm_event->comm.pid, 0,
> process, tool, machine,
> mmap_data, proc_map_timeout)) {
> @@ -569,6 +656,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
> }
> }
> }
> + free(namespaces_event);
> +out_free_fork:
> free(fork_event);
> out_free_mmap:
> free(mmap_event);
> @@ -588,6 +677,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
> char proc_path[PATH_MAX];
> struct dirent *dirent;
> union perf_event *comm_event, *mmap_event, *fork_event;
> + union perf_event *namespaces_event;
> int err = -1;
>
> if (machine__is_default_guest(machine))
> @@ -605,11 +695,17 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
> if (fork_event == NULL)
> goto out_free_mmap;
>
> + namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
> + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
> + machine->id_hdr_size);
> + if (namespaces_event == NULL)
> + goto out_free_fork;
> +
> snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
> proc = opendir(proc_path);
>
> if (proc == NULL)
> - goto out_free_fork;
> + goto out_free_namespaces;
>
> while ((dirent = readdir(proc)) != NULL) {
> char *end;
> @@ -621,13 +717,16 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
> * We may race with exiting thread, so don't stop just because
> * one thread couldn't be synthesized.
> */
> - __event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
> - 1, process, tool, machine, mmap_data,
> + __event__synthesize_thread(comm_event, mmap_event, fork_event,
> + namespaces_event, pid, 1, process,
> + tool, machine, mmap_data,
> proc_map_timeout);
> }
>
> err = 0;
> closedir(proc);
> +out_free_namespaces:
> + free(namespaces_event);
> out_free_fork:
> free(fork_event);
> out_free_mmap:
> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
> index 26efc77..a46a1b4 100644
> --- a/tools/perf/util/event.h
> +++ b/tools/perf/util/event.h
> @@ -648,6 +648,12 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
> perf_event__handler_t process,
> struct machine *machine);
>
> +int perf_event__synthesize_namespaces(struct perf_tool *tool,
> + union perf_event *event,
> + pid_t pid, pid_t tgid,
> + perf_event__handler_t process,
> + struct machine *machine);
> +
> int perf_event__synthesize_mmap_events(struct perf_tool *tool,
> union perf_event *event,
> pid_t pid, pid_t tgid,
Em Wed, Mar 08, 2017 at 02:11:59AM +0530, Hari Bathini escreveu:
> +size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
> +{
> + size_t ret = 0;
> + struct perf_ns_link_info *ns_link_info;
> + u32 nr_namespaces, idx;
> +
> + ns_link_info = event->namespaces.link_info;
> + nr_namespaces = event->namespaces.nr_namespaces;
> +
> + ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[",
> + event->namespaces.pid,
> + event->namespaces.tid,
> + nr_namespaces);
> +
> + for (idx = 0; idx < nr_namespaces; idx++) {
> + if (idx && (idx % 4 == 0))
> + ret += fprintf(fp, "\n\t\t ");
> +
> + ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx,
> + perf_ns__name(idx), (u64)ns_link_info[idx].dev,
> + (u64)ns_link_info[idx].ino,
> + ((idx + 1) != nr_namespaces) ? ", " : "]\n");
Fails in some environments, such as debian:experimental-x-mipsel:
CC /tmp/build/perf/util/evlist.o
util/event.c: In function 'perf_event__fprintf_namespaces':
util/event.c:1129:33: error: format '%lu' expects argument of type 'long unsigned int', but argument 5 has type 'long long unsigned int' [-Werror=format=]
ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx,
^
util/event.c:1129:39: error: format '%lx' expects argument of type 'long unsigned int', but argument 6 has type 'long long unsigned int' [-Werror=format=]
ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx,
^
CC /tmp/build/perf/builtin-list.o
-------------------
Fixing this up to use PRIu64...
- Arnaldo
Commit-ID: f3b3614a284deb124018155a618a7b19694c8b5c
Gitweb: http://git.kernel.org/tip/f3b3614a284deb124018155a618a7b19694c8b5c
Author: Hari Bathini <[email protected]>
AuthorDate: Wed, 8 Mar 2017 02:11:43 +0530
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Tue, 14 Mar 2017 11:38:23 -0300
perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info
Introduce a new option to record PERF_RECORD_NAMESPACES events emitted
by the kernel when fork, clone, setns or unshare are invoked. And update
perf-record documentation with the new option to record namespace
events.
Committer notes:
Combined it with a later patch to allow printing it via 'perf report -D'
and be able to test the feature introduced in this patch. Had to move
here also perf_ns__name(), that was introduced in another later patch.
Also used PRIu64 and PRIx64 to fix the build in some enfironments wrt:
util/event.c:1129:39: error: format '%lx' expects argument of type 'long unsigned int', but argument 6 has type 'long long unsigned int' [-Werror=format=]
ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx
^
Testing it:
# perf record --namespaces -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.083 MB perf.data (423 samples) ]
#
# perf report -D
<SNIP>
3 2028902078892 0x115140 [0xa0]: PERF_RECORD_NAMESPACES 14783/14783 - nr_namespaces: 7
[0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc,
4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb]
0x1151e0 [0x30]: event: 9
.
. ... raw event: size 48 bytes
. 0000: 09 00 00 00 02 00 30 00 c4 71 82 68 0c 7f 00 00 ......0..q.h....
. 0010: a9 39 00 00 a9 39 00 00 94 28 fe 63 d8 01 00 00 .9...9...(.c....
. 0020: 03 00 00 00 00 00 00 00 ce c4 02 00 00 00 00 00 ................
<SNIP>
NAMESPACES events: 1
<SNIP>
#
Signed-off-by: Hari Bathini <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
Tested-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Aravinda Prasad <[email protected]>
Cc: Brendan Gregg <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: Eric Biederman <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Sargun Dhillon <[email protected]>
Cc: Steven Rostedt <[email protected]>
Link: http://lkml.kernel.org/r/148891930386.25309.18412039920746995488.stgit@hbathini.in.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/include/uapi/linux/perf_event.h | 32 +++++++++++++++++-
tools/perf/Documentation/perf-record.txt | 3 ++
tools/perf/builtin-annotate.c | 1 +
tools/perf/builtin-diff.c | 1 +
tools/perf/builtin-inject.c | 13 ++++++++
tools/perf/builtin-kmem.c | 1 +
tools/perf/builtin-kvm.c | 2 ++
tools/perf/builtin-lock.c | 1 +
tools/perf/builtin-mem.c | 1 +
tools/perf/builtin-record.c | 6 ++++
tools/perf/builtin-report.c | 1 +
tools/perf/builtin-sched.c | 1 +
tools/perf/builtin-script.c | 1 +
tools/perf/builtin-trace.c | 3 +-
tools/perf/perf.h | 1 +
tools/perf/util/Build | 1 +
tools/perf/util/data-convert-bt.c | 1 +
tools/perf/util/event.c | 56 ++++++++++++++++++++++++++++++++
tools/perf/util/event.h | 13 ++++++++
tools/perf/util/evsel.c | 3 ++
tools/perf/util/machine.c | 34 +++++++++++++++++++
tools/perf/util/machine.h | 3 ++
tools/perf/util/namespaces.c | 36 ++++++++++++++++++++
tools/perf/util/namespaces.h | 26 +++++++++++++++
tools/perf/util/session.c | 7 ++++
tools/perf/util/thread.c | 44 +++++++++++++++++++++++--
tools/perf/util/thread.h | 6 ++++
tools/perf/util/tool.h | 2 ++
28 files changed, 296 insertions(+), 4 deletions(-)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index c66a485..bec0aad 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
- __reserved_1 : 36;
+ namespaces : 1, /* include namespaces data */
+ __reserved_1 : 35;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};
+struct perf_ns_link_info {
+ __u64 dev;
+ __u64 ino;
+};
+
+enum {
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
+};
+
enum perf_event_type {
/*
@@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid;
+ * u32 tid;
+ * u64 nr_namespaces;
+ * { u64 dev, inode; } [nr_namespaces];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_NAMESPACES = 16,
+
PERF_RECORD_MAX, /* non-ABI */
};
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b16003e..ea3789d 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can
displayed with the weight and local_weight sort keys. This currently works for TSX
abort events and some memory events in precise mode on modern Intel CPUs.
+--namespaces::
+Record events of type PERF_RECORD_NAMESPACES.
+
--transaction::
Record transaction flags for transaction related events.
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 4f52d85..e54b1f9 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -393,6 +393,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 1b96a31..5e480315 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -364,6 +364,7 @@ static struct perf_tool tool = {
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
};
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index b9bc7e3..8d1d13b 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -333,6 +333,18 @@ static int perf_event__repipe_comm(struct perf_tool *tool,
return err;
}
+static int perf_event__repipe_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err = perf_event__process_namespaces(tool, event, sample, machine);
+
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
static int perf_event__repipe_exit(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -660,6 +672,7 @@ static int __cmd_inject(struct perf_inject *inject)
session->itrace_synth_opts = &inject->itrace_synth_opts;
inject->itrace_synth_opts.inject = true;
inject->tool.comm = perf_event__repipe_comm;
+ inject->tool.namespaces = perf_event__repipe_namespaces;
inject->tool.exit = perf_event__repipe_exit;
inject->tool.id_index = perf_event__repipe_id_index;
inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 6da8d08..d509e74 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -964,6 +964,7 @@ static struct perf_tool perf_kmem = {
.comm = perf_event__process_comm,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 08fa88f..18e6c38 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1044,6 +1044,7 @@ static int read_events(struct perf_kvm_stat *kvm)
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
struct perf_data_file file = {
@@ -1348,6 +1349,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
kvm->tool.exit = perf_event__process_exit;
kvm->tool.fork = perf_event__process_fork;
kvm->tool.lost = process_lost_event;
+ kvm->tool.namespaces = perf_event__process_namespaces;
kvm->tool.ordered_events = true;
perf_tool__fill_defaults(&kvm->tool);
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index ce3bfb4..d750cca 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -858,6 +858,7 @@ static int __cmd_report(bool display_info)
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
struct perf_data_file file = {
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 6114e07..030a6cf 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -342,6 +342,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
.lost = perf_event__process_lost,
.fork = perf_event__process_fork,
.build_id = perf_event__process_build_id,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
},
.input_name = "perf.data",
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bc84a37..99562c7 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -876,6 +876,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
signal(SIGTERM, sig_handler);
signal(SIGSEGV, sigsegv_handler);
+ if (rec->opts.record_namespaces)
+ tool->namespace_events = true;
+
if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
signal(SIGUSR2, snapshot_sig_handler);
if (rec->opts.auxtrace_snapshot_mode)
@@ -1497,6 +1500,7 @@ static struct record record = {
.fork = perf_event__process_fork,
.exit = perf_event__process_exit,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.ordered_events = true,
@@ -1611,6 +1615,8 @@ static struct option __record_options[] = {
"opts", "AUX area tracing Snapshot Mode", ""),
OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
+ OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
+ "Record namespaces events"),
OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
"Record context switch events"),
OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f03a5ea..5ab8117 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -700,6 +700,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index b94cf0d..16170e9 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -3272,6 +3272,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
.tool = {
.sample = perf_sched__process_tracepoint_sample,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.lost = perf_event__process_lost,
.fork = perf_sched__process_fork_event,
.ordered_events = true,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c0783b4..f1ce806 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2097,6 +2097,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.attr = process_attr,
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 256f1fa..912fedc 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2415,8 +2415,9 @@ static int trace__replay(struct trace *trace)
trace->tool.exit = perf_event__process_exit;
trace->tool.fork = perf_event__process_fork;
trace->tool.attr = perf_event__process_attr;
- trace->tool.tracing_data = perf_event__process_tracing_data;
+ trace->tool.tracing_data = perf_event__process_tracing_data;
trace->tool.build_id = perf_event__process_build_id;
+ trace->tool.namespaces = perf_event__process_namespaces;
trace->tool.ordered_events = true;
trace->tool.ordering_requires_timestamps = true;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 1c27d94..806c216 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -50,6 +50,7 @@ struct record_opts {
bool running_time;
bool full_auxtrace;
bool auxtrace_snapshot_mode;
+ bool record_namespaces;
bool record_switch_events;
bool all_kernel;
bool all_user;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5da376b..2ea5ee1 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -42,6 +42,7 @@ libperf-y += pstack.o
libperf-y += session.o
libperf-$(CONFIG_AUDIT) += syscalltbl.o
libperf-y += ordered-events.o
+libperf-y += namespaces.o
libperf-y += comm.o
libperf-y += thread.o
libperf-y += thread_map.o
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 4e6cbc9..89ece24 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1468,6 +1468,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
.lost = perf_event__process_lost,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
+ .namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 4ea7ce7..fb52819 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -31,6 +31,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
[PERF_RECORD_SWITCH] = "SWITCH",
[PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
+ [PERF_RECORD_NAMESPACES] = "NAMESPACES",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -49,6 +50,16 @@ static const char *perf_event__names[] = {
[PERF_RECORD_TIME_CONV] = "TIME_CONV",
};
+static const char *perf_ns__names[] = {
+ [NET_NS_INDEX] = "net",
+ [UTS_NS_INDEX] = "uts",
+ [IPC_NS_INDEX] = "ipc",
+ [PID_NS_INDEX] = "pid",
+ [USER_NS_INDEX] = "user",
+ [MNT_NS_INDEX] = "mnt",
+ [CGROUP_NS_INDEX] = "cgroup",
+};
+
const char *perf_event__name(unsigned int id)
{
if (id >= ARRAY_SIZE(perf_event__names))
@@ -58,6 +69,13 @@ const char *perf_event__name(unsigned int id)
return perf_event__names[id];
}
+static const char *perf_ns__name(unsigned int id)
+{
+ if (id >= ARRAY_SIZE(perf_ns__names))
+ return "UNKNOWN";
+ return perf_ns__names[id];
+}
+
static int perf_tool__process_synth_event(struct perf_tool *tool,
union perf_event *event,
struct machine *machine,
@@ -1008,6 +1026,33 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid);
}
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
+{
+ size_t ret = 0;
+ struct perf_ns_link_info *ns_link_info;
+ u32 nr_namespaces, idx;
+
+ ns_link_info = event->namespaces.link_info;
+ nr_namespaces = event->namespaces.nr_namespaces;
+
+ ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[",
+ event->namespaces.pid,
+ event->namespaces.tid,
+ nr_namespaces);
+
+ for (idx = 0; idx < nr_namespaces; idx++) {
+ if (idx && (idx % 4 == 0))
+ ret += fprintf(fp, "\n\t\t ");
+
+ ret += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx,
+ perf_ns__name(idx), (u64)ns_link_info[idx].dev,
+ (u64)ns_link_info[idx].ino,
+ ((idx + 1) != nr_namespaces) ? ", " : "]\n");
+ }
+
+ return ret;
+}
+
int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -1016,6 +1061,14 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
return machine__process_comm_event(machine, event, sample);
}
+int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_namespaces_event(machine, event, sample);
+}
+
int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -1196,6 +1249,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_MMAP:
ret += perf_event__fprintf_mmap(event, fp);
break;
+ case PERF_RECORD_NAMESPACES:
+ ret += perf_event__fprintf_namespaces(event, fp);
+ break;
case PERF_RECORD_MMAP2:
ret += perf_event__fprintf_mmap2(event, fp);
break;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index c735c53..b39ff79 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -39,6 +39,13 @@ struct comm_event {
char comm[16];
};
+struct namespaces_event {
+ struct perf_event_header header;
+ u32 pid, tid;
+ u64 nr_namespaces;
+ struct perf_ns_link_info link_info[];
+};
+
struct fork_event {
struct perf_event_header header;
u32 pid, ppid;
@@ -485,6 +492,7 @@ union perf_event {
struct mmap_event mmap;
struct mmap2_event mmap2;
struct comm_event comm;
+ struct namespaces_event namespaces;
struct fork_event fork;
struct lost_event lost;
struct lost_samples_event lost_samples;
@@ -587,6 +595,10 @@ int perf_event__process_switch(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_event__process_mmap(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -653,6 +665,7 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
u64 kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ac59710..175dc23 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -932,6 +932,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
+ if (opts->record_namespaces)
+ attr->namespaces = track;
+
if (opts->record_switch_events)
attr->context_switch = track;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b9974fe..dfc6004 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -13,6 +13,7 @@
#include <symbol/kallsyms.h>
#include "unwind.h"
#include "linux/hash.h"
+#include "asm/bug.h"
static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
@@ -501,6 +502,37 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
return err;
}
+int machine__process_namespaces_event(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct thread *thread = machine__findnew_thread(machine,
+ event->namespaces.pid,
+ event->namespaces.tid);
+ int err = 0;
+
+ WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES,
+ "\nWARNING: kernel seems to support more namespaces than perf"
+ " tool.\nTry updating the perf tool..\n\n");
+
+ WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES,
+ "\nWARNING: perf tool seems to support more namespaces than"
+ " the kernel.\nTry updating the kernel..\n\n");
+
+ if (dump_trace)
+ perf_event__fprintf_namespaces(event, stdout);
+
+ if (thread == NULL ||
+ thread__set_namespaces(thread, sample->time, &event->namespaces)) {
+ dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n");
+ err = -1;
+ }
+
+ thread__put(thread);
+
+ return err;
+}
+
int machine__process_lost_event(struct machine *machine __maybe_unused,
union perf_event *event, struct perf_sample *sample __maybe_unused)
{
@@ -1538,6 +1570,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_comm_event(machine, event, sample); break;
case PERF_RECORD_MMAP:
ret = machine__process_mmap_event(machine, event, sample); break;
+ case PERF_RECORD_NAMESPACES:
+ ret = machine__process_namespaces_event(machine, event, sample); break;
case PERF_RECORD_MMAP2:
ret = machine__process_mmap2_event(machine, event, sample); break;
case PERF_RECORD_FORK:
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index a283050..3cdb134 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -97,6 +97,9 @@ int machine__process_itrace_start_event(struct machine *machine,
union perf_event *event);
int machine__process_switch_event(struct machine *machine,
union perf_event *event);
+int machine__process_namespaces_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
new file mode 100644
index 0000000..2de8da6
--- /dev/null
+++ b/tools/perf/util/namespaces.c
@@ -0,0 +1,36 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#include "namespaces.h"
+#include "util.h"
+#include "event.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+struct namespaces *namespaces__new(struct namespaces_event *event)
+{
+ struct namespaces *namespaces;
+ u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) *
+ sizeof(struct perf_ns_link_info));
+
+ namespaces = zalloc(sizeof(struct namespaces) + link_info_size);
+ if (!namespaces)
+ return NULL;
+
+ namespaces->end_time = -1;
+
+ if (event)
+ memcpy(namespaces->link_info, event->link_info, link_info_size);
+
+ return namespaces;
+}
+
+void namespaces__free(struct namespaces *namespaces)
+{
+ free(namespaces);
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
new file mode 100644
index 0000000..468f1e9
--- /dev/null
+++ b/tools/perf/util/namespaces.h
@@ -0,0 +1,26 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#ifndef __PERF_NAMESPACES_H
+#define __PERF_NAMESPACES_H
+
+#include "../perf.h"
+#include <linux/list.h>
+
+struct namespaces_event;
+
+struct namespaces {
+ struct list_head list;
+ u64 end_time;
+ struct perf_ns_link_info link_info[];
+};
+
+struct namespaces *namespaces__new(struct namespaces_event *event);
+void namespaces__free(struct namespaces *namespaces);
+
+#endif /* __PERF_NAMESPACES_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1dd617d..ae42e74 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1239,6 +1239,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->mmap2(tool, event, sample, machine);
case PERF_RECORD_COMM:
return tool->comm(tool, event, sample, machine);
+ case PERF_RECORD_NAMESPACES:
+ return tool->namespaces(tool, event, sample, machine);
case PERF_RECORD_FORK:
return tool->fork(tool, event, sample, machine);
case PERF_RECORD_EXIT:
@@ -1494,6 +1496,11 @@ int perf_session__register_idle_thread(struct perf_session *session)
err = -1;
}
+ if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) {
+ pr_err("problem inserting idle task.\n");
+ err = -1;
+ }
+
/* machine__findnew_thread() got the thread, so put it */
thread__put(thread);
return err;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 74e79d2..dcdb87a 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,6 +7,7 @@
#include "thread-stack.h"
#include "util.h"
#include "debug.h"
+#include "namespaces.h"
#include "comm.h"
#include "unwind.h"
@@ -40,6 +41,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread->tid = tid;
thread->ppid = -1;
thread->cpu = -1;
+ INIT_LIST_HEAD(&thread->namespaces_list);
INIT_LIST_HEAD(&thread->comm_list);
comm_str = malloc(32);
@@ -66,7 +68,8 @@ err_thread:
void thread__delete(struct thread *thread)
{
- struct comm *comm, *tmp;
+ struct namespaces *namespaces, *tmp_namespaces;
+ struct comm *comm, *tmp_comm;
BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
@@ -76,7 +79,12 @@ void thread__delete(struct thread *thread)
map_groups__put(thread->mg);
thread->mg = NULL;
}
- list_for_each_entry_safe(comm, tmp, &thread->comm_list, list) {
+ list_for_each_entry_safe(namespaces, tmp_namespaces,
+ &thread->namespaces_list, list) {
+ list_del(&namespaces->list);
+ namespaces__free(namespaces);
+ }
+ list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
list_del(&comm->list);
comm__free(comm);
}
@@ -104,6 +112,38 @@ void thread__put(struct thread *thread)
}
}
+struct namespaces *thread__namespaces(const struct thread *thread)
+{
+ if (list_empty(&thread->namespaces_list))
+ return NULL;
+
+ return list_first_entry(&thread->namespaces_list, struct namespaces, list);
+}
+
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+ struct namespaces_event *event)
+{
+ struct namespaces *new, *curr = thread__namespaces(thread);
+
+ new = namespaces__new(event);
+ if (!new)
+ return -ENOMEM;
+
+ list_add(&new->list, &thread->namespaces_list);
+
+ if (timestamp && curr) {
+ /*
+ * setns syscall must have changed few or all the namespaces
+ * of this thread. Update end time for the namespaces
+ * previously used.
+ */
+ curr = list_next_entry(new, list);
+ curr->end_time = timestamp;
+ }
+
+ return 0;
+}
+
struct comm *thread__comm(const struct thread *thread)
{
if (list_empty(&thread->comm_list))
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index e571885..4eb849e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -28,6 +28,7 @@ struct thread {
bool comm_set;
int comm_len;
bool dead; /* if set thread has exited */
+ struct list_head namespaces_list;
struct list_head comm_list;
u64 db_id;
@@ -40,6 +41,7 @@ struct thread {
};
struct machine;
+struct namespaces;
struct comm;
struct thread *thread__new(pid_t pid, pid_t tid);
@@ -62,6 +64,10 @@ static inline void thread__exited(struct thread *thread)
thread->dead = true;
}
+struct namespaces *thread__namespaces(const struct thread *thread);
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+ struct namespaces_event *event);
+
int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp,
bool exec);
static inline int thread__set_comm(struct thread *thread, const char *comm,
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index ac2590a..829471a 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -40,6 +40,7 @@ struct perf_tool {
event_op mmap,
mmap2,
comm,
+ namespaces,
fork,
exit,
lost,
@@ -66,6 +67,7 @@ struct perf_tool {
event_op3 auxtrace;
bool ordered_events;
bool ordering_requires_timestamps;
+ bool namespace_events;
};
#endif /* __PERF_TOOL_H */
Commit-ID: e422267322cd319e2695a535e47c5b1feeac45eb
Gitweb: http://git.kernel.org/tip/e422267322cd319e2695a535e47c5b1feeac45eb
Author: Hari Bathini <[email protected]>
AuthorDate: Wed, 8 Mar 2017 02:11:36 +0530
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Mon, 13 Mar 2017 15:57:41 -0300
perf: Add PERF_RECORD_NAMESPACES to include namespaces related info
With the advert of container technologies like docker, that depend on
namespaces for isolation, there is a need for tracing support for
namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for
recording namespaces related info. By recording info for every
namespace, it is left to userspace to take a call on the definition of a
container and trace containers by updating perf tool accordingly.
Each namespace has a combination of device and inode numbers. Though
every namespace has the same device number currently, that may change in
future to avoid the need for a namespace of namespaces. Considering such
possibility, record both device and inode numbers separately for each
namespace.
Signed-off-by: Hari Bathini <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Aravinda Prasad <[email protected]>
Cc: Brendan Gregg <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: Eric Biederman <[email protected]>
Cc: Sargun Dhillon <[email protected]>
Cc: Steven Rostedt <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
include/linux/perf_event.h | 2 +
include/uapi/linux/perf_event.h | 32 ++++++++-
kernel/events/core.c | 139 ++++++++++++++++++++++++++++++++++++++++
kernel/fork.c | 2 +
kernel/nsproxy.c | 3 +
5 files changed, 177 insertions(+), 1 deletion(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 000fdb2..f19a823 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
+extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);
/* Callchains */
@@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
+static inline void perf_event_namespaces(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c66a485..bec0aad 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
- __reserved_1 : 36;
+ namespaces : 1, /* include namespaces data */
+ __reserved_1 : 35;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};
+struct perf_ns_link_info {
+ __u64 dev;
+ __u64 ino;
+};
+
+enum {
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
+};
+
enum perf_event_type {
/*
@@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid;
+ * u32 tid;
+ * u64 nr_namespaces;
+ * { u64 dev, inode; } [nr_namespaces];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_NAMESPACES = 16,
+
PERF_RECORD_MAX, /* non-ABI */
};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6f41548f..16c877a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -48,6 +48,8 @@
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
+#include <linux/proc_ns.h>
+#include <linux/mount.h>
#include "internal.h"
@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
+static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
+ if (event->attr.namespaces)
+ atomic_dec(&nr_namespaces_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task)
{
perf_event_task(task, NULL, 1);
+ perf_event_namespaces(task);
}
/*
@@ -6593,6 +6599,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
}
/*
+ * namespaces tracking
+ */
+
+struct perf_namespaces_event {
+ struct task_struct *task;
+
+ struct {
+ struct perf_event_header header;
+
+ u32 pid;
+ u32 tid;
+ u64 nr_namespaces;
+ struct perf_ns_link_info link_info[NR_NAMESPACES];
+ } event_id;
+};
+
+static int perf_event_namespaces_match(struct perf_event *event)
+{
+ return event->attr.namespaces;
+}
+
+static void perf_event_namespaces_output(struct perf_event *event,
+ void *data)
+{
+ struct perf_namespaces_event *namespaces_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ if (!perf_event_namespaces_match(event))
+ return;
+
+ perf_event_header__init_id(&namespaces_event->event_id.header,
+ &sample, event);
+ ret = perf_output_begin(&handle, event,
+ namespaces_event->event_id.header.size);
+ if (ret)
+ return;
+
+ namespaces_event->event_id.pid = perf_event_pid(event,
+ namespaces_event->task);
+ namespaces_event->event_id.tid = perf_event_tid(event,
+ namespaces_event->task);
+
+ perf_output_put(&handle, namespaces_event->event_id);
+
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
+static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
+ struct task_struct *task,
+ const struct proc_ns_operations *ns_ops)
+{
+ struct path ns_path;
+ struct inode *ns_inode;
+ void *error;
+
+ error = ns_get_path(&ns_path, task, ns_ops);
+ if (!error) {
+ ns_inode = ns_path.dentry->d_inode;
+ ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
+ ns_link_info->ino = ns_inode->i_ino;
+ }
+}
+
+void perf_event_namespaces(struct task_struct *task)
+{
+ struct perf_namespaces_event namespaces_event;
+ struct perf_ns_link_info *ns_link_info;
+
+ if (!atomic_read(&nr_namespaces_events))
+ return;
+
+ namespaces_event = (struct perf_namespaces_event){
+ .task = task,
+ .event_id = {
+ .header = {
+ .type = PERF_RECORD_NAMESPACES,
+ .misc = 0,
+ .size = sizeof(namespaces_event.event_id),
+ },
+ /* .pid */
+ /* .tid */
+ .nr_namespaces = NR_NAMESPACES,
+ /* .link_info[NR_NAMESPACES] */
+ },
+ };
+
+ ns_link_info = namespaces_event.event_id.link_info;
+
+ perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
+ task, &mntns_operations);
+
+#ifdef CONFIG_USER_NS
+ perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
+ task, &userns_operations);
+#endif
+#ifdef CONFIG_NET_NS
+ perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
+ task, &netns_operations);
+#endif
+#ifdef CONFIG_UTS_NS
+ perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
+ task, &utsns_operations);
+#endif
+#ifdef CONFIG_IPC_NS
+ perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
+ task, &ipcns_operations);
+#endif
+#ifdef CONFIG_PID_NS
+ perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
+ task, &pidns_operations);
+#endif
+#ifdef CONFIG_CGROUPS
+ perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
+ task, &cgroupns_operations);
+#endif
+
+ perf_iterate_sb(perf_event_namespaces_output,
+ &namespaces_event,
+ NULL);
+}
+
+/*
* mmap tracking
*/
@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
+ if (event->attr.namespaces)
+ atomic_inc(&nr_namespaces_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq)
@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
return -EACCES;
}
+ if (attr.namespaces) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ }
+
if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;
diff --git a/kernel/fork.c b/kernel/fork.c
index 6c463c80..afa2947 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
}
}
+ perf_event_namespaces(current);
+
bad_unshare_cleanup_cred:
if (new_cred)
put_cred(new_cred);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 782102e..f6c5d33 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/cgroup.h>
+#include <linux/perf_event.h>
static struct kmem_cache *nsproxy_cachep;
@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
goto out;
}
switch_task_namespaces(tsk, new_nsproxy);
+
+ perf_event_namespaces(tsk);
out:
fput(file);
return err;
Commit-ID: e907caf3a07ee42ef08ba689a436fd1eb99fbf62
Gitweb: http://git.kernel.org/tip/e907caf3a07ee42ef08ba689a436fd1eb99fbf62
Author: Hari Bathini <[email protected]>
AuthorDate: Wed, 8 Mar 2017 02:11:51 +0530
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Tue, 14 Mar 2017 15:16:09 -0300
perf record: Synthesize namespace events for current processes
Synthesize PERF_RECORD_NAMESPACES events for processes that were running prior
to invocation of perf record. The data for this is taken from /proc/$PID/ns.
These changes make way for analyzing events with regard to namespaces.
Committer notes:
Check if 'tool' is NULL in perf_event__synthesize_namespaces(), as in the
test__mmap_thread_lookup case, i.e. 'perf test Lookup mmap thread".
Testing it:
# ps axH > /tmp/allthreads
# perf record -a --namespaces usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.169 MB perf.data (8 samples) ]
# perf report -D | grep PERF_RECORD_NAMESPACES | wc -l
602
# wc -l /tmp/allthreads
601 /tmp/allthreads
# tail /tmp/allthreads
16951 pts/4 T 0:00 git rebase -i a033bf1bfacdaa25642e6bcc857a7d0f67cc3c92^
16952 pts/4 T 0:00 /bin/sh /usr/libexec/git-core/git-rebase -i a033bf1bfacdaa25642e6bcc857a7d0f67cc3c92^
17176 pts/4 T 0:00 git commit --amend --no-post-rewrite
17204 pts/4 T 0:00 vim /home/acme/git/linux/.git/COMMIT_EDITMSG
18939 ? S 0:00 [kworker/2:1]
18947 ? S 0:00 [kworker/3:0]
18974 ? S 0:00 [kworker/1:0]
19047 ? S 0:00 [kworker/0:1]
19152 pts/6 S+ 0:00 weechat
19153 pts/7 R+ 0:00 ps axH
# perf report -D | grep PERF_RECORD_NAMESPACES | tail
0 0 0x125068 [0xa0]: PERF_RECORD_NAMESPACES 17176/17176 - nr_namespaces: 7
0 0 0x1255b8 [0xa0]: PERF_RECORD_NAMESPACES 17204/17204 - nr_namespaces: 7
0 0 0x125df0 [0xa0]: PERF_RECORD_NAMESPACES 18939/18939 - nr_namespaces: 7
0 0 0x125f00 [0xa0]: PERF_RECORD_NAMESPACES 18947/18947 - nr_namespaces: 7
0 0 0x126010 [0xa0]: PERF_RECORD_NAMESPACES 18974/18974 - nr_namespaces: 7
0 0 0x126120 [0xa0]: PERF_RECORD_NAMESPACES 19047/19047 - nr_namespaces: 7
0 0 0x126230 [0xa0]: PERF_RECORD_NAMESPACES 19152/19152 - nr_namespaces: 7
0 0 0x129330 [0xa0]: PERF_RECORD_NAMESPACES 19154/19154 - nr_namespaces: 7
0 0 0x12a1f8 [0xa0]: PERF_RECORD_NAMESPACES 19155/19155 - nr_namespaces: 7
0 0 0x12b0b8 [0xa0]: PERF_RECORD_NAMESPACES 19155/19155 - nr_namespaces: 7
#
Humm, investigate why we got two record for the 19155 pid/tid...
Signed-off-by: Hari Bathini <[email protected]>
Tested-by: Arnaldo Carvalho de Melo <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Aravinda Prasad <[email protected]>
Cc: Brendan Gregg <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: Eric Biederman <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Sargun Dhillon <[email protected]>
Cc: Steven Rostedt <[email protected]>
Link: http://lkml.kernel.org/r/148891931111.25309.11073854609798681633.stgit@hbathini.in.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-record.c | 29 ++++++++++++--
tools/perf/util/event.c | 94 ++++++++++++++++++++++++++++++++++++++++++---
tools/perf/util/event.h | 6 +++
3 files changed, 119 insertions(+), 10 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 99562c7..04faef7 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -986,6 +986,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
*/
if (forks) {
union perf_event *event;
+ pid_t tgid;
event = malloc(sizeof(event->comm) + machine->id_hdr_size);
if (event == NULL) {
@@ -999,10 +1000,30 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* cannot see a correct process name for those events.
* Synthesize COMM event to prevent it.
*/
- perf_event__synthesize_comm(tool, event,
- rec->evlist->workload.pid,
- process_synthesized_event,
- machine);
+ tgid = perf_event__synthesize_comm(tool, event,
+ rec->evlist->workload.pid,
+ process_synthesized_event,
+ machine);
+ free(event);
+
+ if (tgid == -1)
+ goto out_child;
+
+ event = malloc(sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (event == NULL) {
+ err = -ENOMEM;
+ goto out_child;
+ }
+
+ /*
+ * Synthesize NAMESPACES event for the command specified.
+ */
+ perf_event__synthesize_namespaces(tool, event,
+ rec->evlist->workload.pid,
+ tgid, process_synthesized_event,
+ machine);
free(event);
perf_evlist__start_workload(rec->evlist);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index fb52819..d082cb7 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -221,6 +221,58 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
return tgid;
}
+static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
+ struct perf_ns_link_info *ns_link_info)
+{
+ struct stat64 st;
+ char proc_ns[128];
+
+ sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
+ if (stat64(proc_ns, &st) == 0) {
+ ns_link_info->dev = st.st_dev;
+ ns_link_info->ino = st.st_ino;
+ }
+}
+
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ u32 idx;
+ struct perf_ns_link_info *ns_link_info;
+
+ if (!tool || !tool->namespace_events)
+ return 0;
+
+ memset(&event->namespaces, 0, (sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size));
+
+ event->namespaces.pid = tgid;
+ event->namespaces.tid = pid;
+
+ event->namespaces.nr_namespaces = NR_NAMESPACES;
+
+ ns_link_info = event->namespaces.link_info;
+
+ for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
+ perf_event__get_ns_link_info(pid, perf_ns__name(idx),
+ &ns_link_info[idx]);
+
+ event->namespaces.header.type = PERF_RECORD_NAMESPACES;
+
+ event->namespaces.header.size = (sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+ return -1;
+
+ return 0;
+}
+
static int perf_event__synthesize_fork(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid, pid_t ppid,
@@ -452,8 +504,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
static int __event__synthesize_thread(union perf_event *comm_event,
union perf_event *mmap_event,
union perf_event *fork_event,
+ union perf_event *namespaces_event,
pid_t pid, int full,
- perf_event__handler_t process,
+ perf_event__handler_t process,
struct perf_tool *tool,
struct machine *machine,
bool mmap_data,
@@ -473,6 +526,11 @@ static int __event__synthesize_thread(union perf_event *comm_event,
if (tgid == -1)
return -1;
+ if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
+ tgid, process, machine) < 0)
+ return -1;
+
+
return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
process, machine, mmap_data,
proc_map_timeout);
@@ -506,6 +564,11 @@ static int __event__synthesize_thread(union perf_event *comm_event,
if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
ppid, process, machine) < 0)
break;
+
+ if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
+ tgid, process, machine) < 0)
+ break;
+
/*
* Send the prepared comm event
*/
@@ -534,6 +597,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
unsigned int proc_map_timeout)
{
union perf_event *comm_event, *mmap_event, *fork_event;
+ union perf_event *namespaces_event;
int err = -1, thread, j;
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
@@ -548,10 +612,16 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
if (fork_event == NULL)
goto out_free_mmap;
+ namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (namespaces_event == NULL)
+ goto out_free_fork;
+
err = 0;
for (thread = 0; thread < threads->nr; ++thread) {
if (__event__synthesize_thread(comm_event, mmap_event,
- fork_event,
+ fork_event, namespaces_event,
thread_map__pid(threads, thread), 0,
process, tool, machine,
mmap_data, proc_map_timeout)) {
@@ -577,7 +647,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
/* if not, generate events for it */
if (need_leader &&
__event__synthesize_thread(comm_event, mmap_event,
- fork_event,
+ fork_event, namespaces_event,
comm_event->comm.pid, 0,
process, tool, machine,
mmap_data, proc_map_timeout)) {
@@ -586,6 +656,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
}
}
}
+ free(namespaces_event);
+out_free_fork:
free(fork_event);
out_free_mmap:
free(mmap_event);
@@ -605,6 +677,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
char proc_path[PATH_MAX];
struct dirent *dirent;
union perf_event *comm_event, *mmap_event, *fork_event;
+ union perf_event *namespaces_event;
int err = -1;
if (machine__is_default_guest(machine))
@@ -622,11 +695,17 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
if (fork_event == NULL)
goto out_free_mmap;
+ namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (namespaces_event == NULL)
+ goto out_free_fork;
+
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
proc = opendir(proc_path);
if (proc == NULL)
- goto out_free_fork;
+ goto out_free_namespaces;
while ((dirent = readdir(proc)) != NULL) {
char *end;
@@ -638,13 +717,16 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
* We may race with exiting thread, so don't stop just because
* one thread couldn't be synthesized.
*/
- __event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
- 1, process, tool, machine, mmap_data,
+ __event__synthesize_thread(comm_event, mmap_event, fork_event,
+ namespaces_event, pid, 1, process,
+ tool, machine, mmap_data,
proc_map_timeout);
}
err = 0;
closedir(proc);
+out_free_namespaces:
+ free(namespaces_event);
out_free_fork:
free(fork_event);
out_free_mmap:
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b39ff79..e1d8166 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -648,6 +648,12 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine);
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct machine *machine);
+
int perf_event__synthesize_mmap_events(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
Commit-ID: 96a44bbccdd8ff263829d42fe934e6094ea5bb20
Gitweb: http://git.kernel.org/tip/96a44bbccdd8ff263829d42fe934e6094ea5bb20
Author: Hari Bathini <[email protected]>
AuthorDate: Wed, 8 Mar 2017 02:12:06 +0530
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Tue, 14 Mar 2017 15:17:36 -0300
perf script: Add script print support for namespace events
Introduce a new option to display events of type PERF_RECORD_NAMESPACES
and update perf-script documentation accordingly.
Shown below is output (trimmed) of perf script command with the newly
introduced option, on perf.data generated with perf record command using
--namespaces option.
$ perf script --show-namespace-events
swapper 0 [000] 0.000000: PERF_RECORD_NAMESPACES 1/1 - nr_namespaces: 7
[0/net: 3/0xf000001c, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc,
4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb]
swapper 0 [000] 0.000000: PERF_RECORD_NAMESPACES 2/2 - nr_namespaces: 7
[0/net: 3/0xf000001c, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc,
4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb]
Commiter notes:
Testing it:
Investigating that double PERF_RECORD_NAMESPACES for the 19155
pid/tid... Its more than that, there are two PERF_RECORD_COMM as well,
and with zeroed timestamps, so probably a synthesizing artifact...
# perf script --show-task --show-namespace
<SNIP>
perf 0 [000] 0.000000: PERF_RECORD_COMM: perf:19154/19154
perf 0 [000] 0.000000: PERF_RECORD_FORK(19155:19155):(19154:19154)
perf 0 [000] 0.000000: PERF_RECORD_NAMESPACES 19155/19155 - nr_namespaces: 7
[0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc,
4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb]
perf 0 [000] 0.000000: PERF_RECORD_COMM: perf:19155/19155
perf 0 [000] 0.000000: PERF_RECORD_COMM: perf:19155/19155
perf 0 [000] 0.000000: PERF_RECORD_NAMESPACES 19155/19155 - nr_namespaces: 7
[0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc,
4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb]
swapper 0 [000] 3110.881834: 1 cycles: ffffffffa7060bf6 native_write_msr (/lib/modules/4.11.0-rc1+/build/vmlinux)
<SNIP>
Signed-off-by: Hari Bathini <[email protected]>
Tested-by: Arnaldo Carvalho de Melo <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Aravinda Prasad <[email protected]>
Cc: Brendan Gregg <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: Eric Biederman <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Sargun Dhillon <[email protected]>
Cc: Steven Rostedt <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-script.txt | 3 +++
tools/perf/builtin-script.c | 40 ++++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 4ed5f23..62c9b0c 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -248,6 +248,9 @@ OPTIONS
--show-mmap-events
Display mmap related events (e.g. MMAP, MMAP2).
+--show-namespace-events
+ Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
+
--show-switch-events
Display context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index f1ce806..66d62c9 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -830,6 +830,7 @@ struct perf_script {
bool show_task_events;
bool show_mmap_events;
bool show_switch_events;
+ bool show_namespace_events;
bool allocated;
struct cpu_map *cpus;
struct thread_map *threads;
@@ -1118,6 +1119,41 @@ out:
return ret;
}
+static int process_namespaces_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+ int ret = -1;
+
+ thread = machine__findnew_thread(machine, event->namespaces.pid,
+ event->namespaces.tid);
+ if (thread == NULL) {
+ pr_debug("problem processing NAMESPACES event, skipping it.\n");
+ return -1;
+ }
+
+ if (perf_event__process_namespaces(tool, event, sample, machine) < 0)
+ goto out;
+
+ if (!evsel->attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = 0;
+ sample->tid = event->namespaces.tid;
+ sample->pid = event->namespaces.pid;
+ }
+ print_sample_start(sample, thread, evsel);
+ perf_event__fprintf(event, stdout);
+ ret = 0;
+out:
+ thread__put(thread);
+ return ret;
+}
+
static int process_fork_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -1293,6 +1329,8 @@ static int __cmd_script(struct perf_script *script)
}
if (script->show_switch_events)
script->tool.context_switch = process_switch_event;
+ if (script->show_namespace_events)
+ script->tool.namespaces = process_namespaces_event;
ret = perf_session__process_events(script->session);
@@ -2181,6 +2219,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"Show the mmap events"),
OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
"Show context switch events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
+ "Show namespace events (if recorded)"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
OPT_BOOLEAN(0, "ns", &nanosecs,
"Use 9 decimal places when displaying time"),
Commit-ID: d890a98c9217892575761d0c1311c41612844c4d
Gitweb: http://git.kernel.org/tip/d890a98c9217892575761d0c1311c41612844c4d
Author: Hari Bathini <[email protected]>
AuthorDate: Wed, 8 Mar 2017 02:12:13 +0530
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Tue, 14 Mar 2017 15:17:37 -0300
perf tools: Add 'cgroup_id' sort order keyword
This patch introduces a cgroup identifier entry field in perf report to
identify or distinguish data of different cgroups. It uses the device
number and inode number of cgroup namespace, included in perf data with
the new PERF_RECORD_NAMESPACES event, as cgroup identifier.
With the assumption that each container is created with it's own cgroup
namespace, this allows assessment/analysis of multiple containers at
once.
A simple test for this would be to clone a few processes passing
SIGCHILD & CLONE_NEWCROUP flags to each of them, execute shell and run
different workloads on each of those contexts, while running perf
record command with --namespaces option.
Shown below is the output of perf report, sorted with cgroup identifier,
on perf.data generated with the above test scenario, clearly indicating
one context's considerable use of kernel memory in comparison with
others:
$ perf report -s cgroup_id,sample --stdio
#
# Total Lost Samples: 0
#
# Samples: 5K of event 'kmem:kmalloc'
# Event count (approx.): 5965
#
# Overhead cgroup id (dev/inode) Samples
# ........ ..................... ............
#
81.27% 3/0xeffffffb 4848
16.24% 3/0xf00000d0 969
1.16% 3/0xf00000ce 69
0.82% 3/0xf00000cf 49
0.50% 0/0x0 30
While this is a start, there is further scope of improving this. For
example, instead of cgroup namespace's device and inode numbers, dev
and inode numbers of some or all namespaces may be used to distinguish
which processes are running in a given container context.
Also, scripts to map device and inode info to containers sounds
plausible for better tracing of containers.
Signed-off-by: Hari Bathini <[email protected]>
Tested-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Aravinda Prasad <[email protected]>
Cc: Brendan Gregg <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: Eric Biederman <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Sargun Dhillon <[email protected]>
Cc: Steven Rostedt <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-report.txt | 4 +++-
tools/perf/util/hist.c | 7 ++++++
tools/perf/util/hist.h | 1 +
tools/perf/util/sort.c | 41 ++++++++++++++++++++++++++++++++
tools/perf/util/sort.h | 7 ++++++
5 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 672b149a..e9a61f5 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -72,7 +72,8 @@ OPTIONS
--sort=::
Sort histogram entries by given key(s) - multiple keys can be specified
in CSV format. Following sort keys are available:
- pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
+ pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
+ local_weight, cgroup_id.
Each key has following meaning:
@@ -92,6 +93,7 @@ OPTIONS
- weight: Event specific weight, e.g. memory latency or transaction
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
+ - cgroup_id: ID derived from cgroup namespace device and inode numbers.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index eaf72a9..e3b38f6 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -3,6 +3,7 @@
#include "hist.h"
#include "map.h"
#include "session.h"
+#include "namespaces.h"
#include "sort.h"
#include "evlist.h"
#include "evsel.h"
@@ -169,6 +170,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
}
+ hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
hists__new_col_len(hists, HISTC_CPU, 3);
hists__new_col_len(hists, HISTC_SOCKET, 6);
hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
@@ -574,9 +576,14 @@ __hists__add_entry(struct hists *hists,
bool sample_self,
struct hist_entry_ops *ops)
{
+ struct namespaces *ns = thread__namespaces(al->thread);
struct hist_entry entry = {
.thread = al->thread,
.comm = thread__comm(al->thread),
+ .cgroup_id = {
+ .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
+ .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
+ },
.ms = {
.map = al->map,
.sym = al->sym,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 2e839bf..ee3670a 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -30,6 +30,7 @@ enum hist_column {
HISTC_DSO,
HISTC_THREAD,
HISTC_COMM,
+ HISTC_CGROUP_ID,
HISTC_PARENT,
HISTC_CPU,
HISTC_SOCKET,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 93f755a..8b0d4e3 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -536,6 +536,46 @@ struct sort_entry sort_cpu = {
.se_width_idx = HISTC_CPU,
};
+/* --sort cgroup_id */
+
+static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
+{
+ return (int64_t)(right_dev - left_dev);
+}
+
+static int64_t _sort__cgroup_inode_cmp(u64 left_ino, u64 right_ino)
+{
+ return (int64_t)(right_ino - left_ino);
+}
+
+static int64_t
+sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ int64_t ret;
+
+ ret = _sort__cgroup_dev_cmp(right->cgroup_id.dev, left->cgroup_id.dev);
+ if (ret != 0)
+ return ret;
+
+ return _sort__cgroup_inode_cmp(right->cgroup_id.ino,
+ left->cgroup_id.ino);
+}
+
+static int hist_entry__cgroup_id_snprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ unsigned int width __maybe_unused)
+{
+ return repsep_snprintf(bf, size, "%lu/0x%lx", he->cgroup_id.dev,
+ he->cgroup_id.ino);
+}
+
+struct sort_entry sort_cgroup_id = {
+ .se_header = "cgroup id (dev/inode)",
+ .se_cmp = sort__cgroup_id_cmp,
+ .se_snprintf = hist_entry__cgroup_id_snprintf,
+ .se_width_idx = HISTC_CGROUP_ID,
+};
+
/* --sort socket */
static int64_t
@@ -1464,6 +1504,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_TRANSACTION, "transaction", sort_transaction),
DIM(SORT_TRACE, "trace", sort_trace),
DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
+ DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
};
#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index f583325..baf20a3 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -54,6 +54,11 @@ struct he_stat {
u32 nr_events;
};
+struct namespace_id {
+ u64 dev;
+ u64 ino;
+};
+
struct hist_entry_diff {
bool computed;
union {
@@ -91,6 +96,7 @@ struct hist_entry {
struct map_symbol ms;
struct thread *thread;
struct comm *comm;
+ struct namespace_id cgroup_id;
u64 ip;
u64 transaction;
s32 socket;
@@ -212,6 +218,7 @@ enum sort_type {
SORT_TRANSACTION,
SORT_TRACE,
SORT_SYM_SIZE,
+ SORT_CGROUP_ID,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,