Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754105AbbG2Xae (ORCPT ); Wed, 29 Jul 2015 19:30:34 -0400 Received: from www62.your-server.de ([213.133.104.62]:38877 "EHLO www62.your-server.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753499AbbG2Xad (ORCPT ); Wed, 29 Jul 2015 19:30:33 -0400 Message-ID: <55B96209.9070406@iogearbox.net> Date: Thu, 30 Jul 2015 01:30:17 +0200 From: Daniel Borkmann User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.4.0 MIME-Version: 1.0 To: Kaixu Xia , ast@plumgrid.com, davem@davemloft.net, acme@kernel.org, mingo@redhat.com, a.p.zijlstra@chello.nl, masami.hiramatsu.pt@hitachi.com, jolsa@kernel.org CC: wangnan0@huawei.com, linux-kernel@vger.kernel.org, pi3orama@163.com, hekuang@huawei.com Subject: Re: [PATCH v4 2/4] bpf: Add new bpf map type to store the pointer to struct perf_event References: <1438082255-60683-1-git-send-email-xiakaixu@huawei.com> <1438082255-60683-3-git-send-email-xiakaixu@huawei.com> In-Reply-To: <1438082255-60683-3-git-send-email-xiakaixu@huawei.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit X-Authenticated-Sender: daniel@iogearbox.net Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5814 Lines: 170 On 07/28/2015 01:17 PM, Kaixu Xia wrote: > Introduce a new bpf map type 'BPF_MAP_TYPE_PERF_EVENT_ARRAY'. > This map only stores the pointer to struct perf_event. The > user space event FDs from perf_event_open() syscall are converted > to the pointer to struct perf_event and stored in map. > > Signed-off-by: Kaixu Xia > --- > include/linux/bpf.h | 1 + > include/linux/perf_event.h | 2 ++ > include/uapi/linux/bpf.h | 1 + > kernel/bpf/arraymap.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++ > kernel/bpf/verifier.c | 15 ++++++++++++ > kernel/events/core.c | 17 ++++++++++++++ > 6 files changed, 93 insertions(+) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 610b730..3c9c0eb 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -10,6 +10,7 @@ > #include > #include > #include > +#include > > struct bpf_map; > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 2027809..2ea4067 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -641,6 +641,7 @@ extern int perf_event_init_task(struct task_struct *child); > extern void perf_event_exit_task(struct task_struct *child); > extern void perf_event_free_task(struct task_struct *task); > extern void perf_event_delayed_put(struct task_struct *task); > +extern struct perf_event *perf_event_get(unsigned int fd); > extern void perf_event_print_debug(void); > extern void perf_pmu_disable(struct pmu *pmu); > extern void perf_pmu_enable(struct pmu *pmu); > @@ -979,6 +980,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; } > static inline void perf_event_exit_task(struct task_struct *child) { } > static inline void perf_event_free_task(struct task_struct *task) { } > static inline void perf_event_delayed_put(struct task_struct *task) { } > +static struct perf_event *perf_event_get(unsigned int fd) { return NULL; } > static inline void perf_event_print_debug(void) { } > static inline int perf_event_task_disable(void) { return -EINVAL; } > static inline int perf_event_task_enable(void) { return -EINVAL; } > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 29ef6f9..69a1f6b 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -114,6 +114,7 @@ enum bpf_map_type { > BPF_MAP_TYPE_HASH, > BPF_MAP_TYPE_ARRAY, > BPF_MAP_TYPE_PROG_ARRAY, > + BPF_MAP_TYPE_PERF_EVENT_ARRAY, > }; > > enum bpf_prog_type { > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c > index 4784cdc..a9e0235 100644 > --- a/kernel/bpf/arraymap.c > +++ b/kernel/bpf/arraymap.c > @@ -297,3 +297,60 @@ static int __init register_prog_array_map(void) > return 0; > } > late_initcall(register_prog_array_map); > + > +static void *perf_event_fd_array_get_ptr(struct bpf_array *array, int fd) > +{ > + struct perf_event *event; > + > + event = perf_event_get(fd); > + if (IS_ERR(event)) > + return event; So in case of !CONFIG_PERF_EVENTS, you define perf_event_get() to return NULL. Seems like this will give a nice NULL ptr deref below. ;) > + /* > + * prevent some crazy events so we can make our life easier > + */ > + if (event->attr.type != PERF_TYPE_RAW && > + event->attr.type != PERF_TYPE_HARDWARE) { > + perf_event_release_kernel(event); > + return ERR_PTR(-EINVAL); > + } > + return event; > +} > + > +static void perf_event_fd_array_put_ptr(struct bpf_array *array, void *ptr) > +{ > + struct perf_event *event = (struct perf_event *)ptr; ( Same comment as in previous patch. ) > + perf_event_release_kernel(event); > +} > + > +static const struct fd_array_map_ops perf_event_fd_array_map_ops = { > + .get_ptr = perf_event_fd_array_get_ptr, > + .put_ptr = perf_event_fd_array_put_ptr, > +}; ... > +late_initcall(register_perf_event_array_map); > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 039d866..c70f7e7 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -924,6 +924,21 @@ static int check_call(struct verifier_env *env, int func_id) > */ > return -EINVAL; > > + if (map && map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && > + func_id != BPF_FUNC_perf_event_read) > + /* perf_event_array map type needs extra care: > + * only allow to pass it into bpf_perf_event_read() for now. > + * bpf_map_update/delete_elem() must only be done via syscall > + */ > + return -EINVAL; > + > + if (func_id == BPF_FUNC_perf_event_read && > + map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) > + /* don't allow any other map type to be passed into > + * bpf_perf_event_read() > + */ > + return -EINVAL; Maybe a function as we do similar checks with BPF_MAP_TYPE_PROG_ARRAY? > return 0; > } > > diff --git a/kernel/events/core.c b/kernel/events/core.c > index d3dae34..58f0d47 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -8574,6 +8574,23 @@ void perf_event_delayed_put(struct task_struct *task) > WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); > } > > +struct perf_event *perf_event_get(unsigned int fd) > +{ > + int err; > + struct fd f; > + struct perf_event *event; > + > + err = perf_fget_light(fd, &f); > + if (err) > + return ERR_PTR(err); > + > + event = f.file->private_data; > + atomic_long_inc(&event->refcount); > + fdput(f); > + > + return event; > +} > + > /* > * inherit a event from parent task to child task: > */ > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/