Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755464AbcKJLi2 (ORCPT ); Thu, 10 Nov 2016 06:38:28 -0500 Received: from mx0a-001b2d01.pphosted.com ([148.163.156.1]:60879 "EHLO mx0a-001b2d01.pphosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755200AbcKJLiZ (ORCPT ); Thu, 10 Nov 2016 06:38:25 -0500 Subject: [PATCH 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info From: Hari Bathini To: ast@fb.com, peterz@infradead.org, lkml , acme@kernel.org, alexander.shishkin@linux.intel.com, mingo@redhat.com Cc: daniel@iogearbox.net, rostedt@goodmis.org, Ananth N Mavinakayanahalli , ebiederm@xmission.com, sargun@sargun.me, Aravinda Prasad , brendan.d.gregg@gmail.com Date: Thu, 10 Nov 2016 17:08:06 +0530 In-Reply-To: <147877784354.29988.8570048236764105701.stgit@hbathini.in.ibm.com> References: <147877784354.29988.8570048236764105701.stgit@hbathini.in.ibm.com> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit X-TM-AS-MML: disable X-Content-Scanned: Fidelis XPS MAILER x-cbid: 16111011-0008-0000-0000-000000E083E6 X-IBM-AV-DETECTION: SAVI=unused REMOTE=unused XFE=unused x-cbparentid: 16111011-0009-0000-0000-00000878A9EA Message-Id: <147877788475.29988.17221764769834489874.stgit@hbathini.in.ibm.com> X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10432:,, definitions=2016-11-10_03:,, signatures=0 X-Proofpoint-Spam-Details: rule=outbound_notspam policy=outbound score=0 spamscore=0 suspectscore=2 malwarescore=0 phishscore=0 adultscore=0 bulkscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1609300000 definitions=main-1611100218 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9547 Lines: 347 With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for tracing based on namespaces related info. Signed-off-by: Hari Bathini --- fs/mount.h | 17 ----- include/linux/mnt_namespace.h | 18 +++++ include/linux/perf_event.h | 1 include/uapi/linux/perf_event.h | 21 ++++++ kernel/events/core.c | 136 +++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 4 + kernel/nsproxy.c | 5 + 7 files changed, 184 insertions(+), 18 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index d2e25d7..5ec592b 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -1,22 +1,7 @@ #include #include -#include -#include #include - -struct mnt_namespace { - atomic_t count; - struct ns_common ns; - struct mount * root; - struct list_head list; - struct user_namespace *user_ns; - struct ucounts *ucounts; - u64 seq; /* Sequence number to prevent loops */ - wait_queue_head_t poll; - u64 event; - unsigned int mounts; /* # of mounts in the namespace */ - unsigned int pending_mounts; -}; +#include struct mnt_pcp { int mnt_count; diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 12b2ab5..b911ca6 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -2,10 +2,26 @@ #define _NAMESPACE_H_ #ifdef __KERNEL__ -struct mnt_namespace; +#include +#include + struct fs_struct; struct user_namespace; +struct mnt_namespace { + atomic_t count; + struct ns_common ns; + struct mount *root; + struct list_head list; + struct user_namespace *user_ns; + struct ucounts *ucounts; + u64 seq; /* Sequence number to prevent loops */ + wait_queue_head_t poll; + u64 event; + unsigned int mounts; /* # of mounts in the namespace */ + unsigned int pending_mounts; +}; + extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct user_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4741ecd..243b988 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1110,6 +1110,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); +extern void perf_event_namespaces(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485..575aed6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events; /* wakeup every n events */ @@ -862,6 +863,24 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 time; + * u32 uts_ns_inum; + * u32 ipc_ns_inum; + * u32 mnt_ns_inum; + * u32 pid_ns_inum; + * u32 net_ns_inum; + * u32 cgroup_ns_inum; + * u32 user_ns_inum; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 0e29213..3bdc2e7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -46,6 +46,9 @@ #include #include #include +#include +#include +#include #include "internal.h" @@ -375,6 +378,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; @@ -3874,6 +3878,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_mmap_events); if (event->attr.comm) atomic_dec(&nr_comm_events); + if (event->attr.namespaces) + atomic_dec(&nr_namespaces_events); if (event->attr.task) atomic_dec(&nr_task_events); if (event->attr.freq) @@ -6476,6 +6482,134 @@ void perf_event_comm(struct task_struct *task, bool exec) } /* + * namespaces tracking + */ + +struct perf_namespaces_event { + struct task_struct *task; + + struct { + struct perf_event_header header; + u32 pid; + u32 tid; + u64 time; + u32 uts_ns_inum; + u32 ipc_ns_inum; + u32 mnt_ns_inum; + u32 pid_ns_inum; + u32 net_ns_inum; + u32 cgroup_ns_inum; + u32 user_ns_inum; + } event_id; +}; + +static int perf_event_namespaces_match(struct perf_event *event) +{ + return event->attr.namespaces; +} + +static void perf_event_namespaces_output(struct perf_event *event, + void *data) +{ + struct perf_namespaces_event *namespaces_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int size = namespaces_event->event_id.header.size; + struct nsproxy *nsproxy; + int ret; + + if (!perf_event_namespaces_match(event)) + return; + + perf_event_header__init_id(&namespaces_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + namespaces_event->event_id.header.size); + + if (ret) + goto out; + + namespaces_event->event_id.pid = perf_event_pid(event, + namespaces_event->task); + namespaces_event->event_id.tid = perf_event_tid(event, + namespaces_event->task); + + if (namespaces_event->task != current) + task_lock(namespaces_event->task); + + nsproxy = namespaces_event->task->nsproxy; + if (nsproxy != NULL) { + namespaces_event->event_id.uts_ns_inum = + nsproxy->uts_ns->ns.inum; +#ifdef CONFIG_IPC_NS + namespaces_event->event_id.ipc_ns_inum = + nsproxy->ipc_ns->ns.inum; +#endif + namespaces_event->event_id.mnt_ns_inum = + nsproxy->mnt_ns->ns.inum; + namespaces_event->event_id.pid_ns_inum = + nsproxy->pid_ns_for_children->ns.inum; +#ifdef CONFIG_NET + namespaces_event->event_id.net_ns_inum = + nsproxy->net_ns->ns.inum; +#endif +#ifdef CONFIG_CGROUPS + namespaces_event->event_id.cgroup_ns_inum = + nsproxy->cgroup_ns->ns.inum; +#endif + } + + namespaces_event->event_id.user_ns_inum = + __task_cred(namespaces_event->task)->user_ns->ns.inum; + + if (namespaces_event->task != current) + task_unlock(namespaces_event->task); + + namespaces_event->event_id.time = perf_event_clock(event); + + perf_output_put(&handle, namespaces_event->event_id); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +out: + namespaces_event->event_id.header.size = size; +} + +void perf_event_namespaces(struct task_struct *task) +{ + struct perf_namespaces_event namespaces_event; + + if (!atomic_read(&nr_namespaces_events)) + return; + + namespaces_event = (struct perf_namespaces_event){ + .task = task, + .event_id = { + .header = { + .type = PERF_RECORD_NAMESPACES, + .misc = 0, + .size = sizeof(namespaces_event.event_id), + }, + /* .pid */ + /* .tid */ + /* .time */ + /* .uts_ns_inum */ + /* .ipc_ns_inum */ + /* .mnt_ns_inum */ + /* .pid_ns_inum */ + /* .net_ns_inum */ + /* .cgroup_ns_inum */ + /* .user_ns_inum */ + }, + }; + + perf_iterate_sb(perf_event_namespaces_output, + &namespaces_event, + NULL); +} + +/* * mmap tracking */ @@ -9018,6 +9152,8 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_mmap_events); if (event->attr.comm) atomic_inc(&nr_comm_events); + if (event->attr.namespaces) + atomic_inc(&nr_namespaces_events); if (event->attr.task) atomic_inc(&nr_task_events); if (event->attr.freq) diff --git a/kernel/fork.c b/kernel/fork.c index 997ac1d..3faca3d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1818,6 +1818,7 @@ static __latent_entropy struct task_struct *copy_process( cgroup_post_fork(p); threadgroup_change_end(current); perf_event_fork(p); + perf_event_namespaces(p); trace_task_newtask(p, clone_flags); uprobe_copy_process(p, clone_flags); @@ -2280,6 +2281,9 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) free_fs_struct(new_fs); bad_unshare_out: + if (!err) + perf_event_namespaces(current); + return err; } diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 782102e..4c25e6e 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -26,6 +26,7 @@ #include #include #include +#include static struct kmem_cache *nsproxy_cachep; @@ -264,6 +265,10 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) switch_task_namespaces(tsk, new_nsproxy); out: fput(file); + + if (!err) + perf_event_namespaces(tsk); + return err; }