Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933388AbbGVIMt (ORCPT ); Wed, 22 Jul 2015 04:12:49 -0400 Received: from szxga01-in.huawei.com ([58.251.152.64]:36803 "EHLO szxga01-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756131AbbGVIJ6 (ORCPT ); Wed, 22 Jul 2015 04:09:58 -0400 From: Kaixu Xia To: , , , , , , CC: , , , , Subject: [PATCH v2 3/5] bpf: Save the pointer to struct perf_event to map Date: Wed, 22 Jul 2015 08:09:30 +0000 Message-ID: <1437552572-84748-4-git-send-email-xiakaixu@huawei.com> X-Mailer: git-send-email 1.8.3.4 In-Reply-To: <1437552572-84748-1-git-send-email-xiakaixu@huawei.com> References: <1437552572-84748-1-git-send-email-xiakaixu@huawei.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.107.193.250] X-CFilter-Loop: Reflected Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5936 Lines: 205 The user space event FDs from perf_event_open() syscall are converted to the pointer to struct perf_event and stored in map. Signed-off-by: Kaixu Xia --- include/linux/perf_event.h | 2 ++ kernel/bpf/arraymap.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 26 +++++++++++++++++++++++ kernel/events/core.c | 26 +++++++++++++++++++++++ 4 files changed, 107 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2027809..2ea4067 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -641,6 +641,7 @@ extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); extern void perf_event_delayed_put(struct task_struct *task); +extern struct perf_event *perf_event_get(unsigned int fd); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); @@ -979,6 +980,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_delayed_put(struct task_struct *task) { } +static struct perf_event *perf_event_get(unsigned int fd) { return NULL; } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 410bc40..a7475ae 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -15,6 +15,7 @@ #include #include #include +#include /* Called from syscall */ static struct bpf_map *array_map_alloc(union bpf_attr *attr) @@ -276,6 +277,57 @@ static int perf_event_array_map_get_next_key(struct bpf_map *map, void *key, return -EINVAL; } +static int replace_map_with_perf_event(void *value) +{ + struct perf_event *event; + u32 fd; + + fd = *(u32 *)value; + + event = perf_event_get(fd); + if (IS_ERR(event)) + return PTR_ERR(event); + + /* limit the event type to PERF_TYPE_RAW + * and PERF_TYPE_HARDWARE + */ + if (event->attr.type != PERF_TYPE_RAW && + event->attr.type != PERF_TYPE_HARDWARE) + return -EINVAL; + + memcpy(value, &event, sizeof(struct perf_event *)); + + return 0; +} + +static bool check_map_perf_event_stored(struct bpf_map *map, void *key) +{ + void *event; + bool is_stored = false; + + rcu_read_lock(); + event = array_map_lookup_elem(map, key); + if (event && (*(unsigned long *)event)) + is_stored = true; + rcu_read_unlock(); + + return is_stored; +} + +/* only called from syscall */ +static int perf_event_array_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + /* check if the value is already stored */ + if (check_map_perf_event_stored(map, key)) + return -EINVAL; + + if (replace_map_with_perf_event(value)) + return -EBADF; + + return array_map_update_elem(map, key, value, map_flags); +} + static int perf_event_array_map_traverse_elem(bpf_map_traverse_callback func, struct bpf_map *map) { @@ -297,6 +349,7 @@ static const struct bpf_map_ops perf_event_array_ops = { .map_free = array_map_free, .map_get_next_key = perf_event_array_map_get_next_key, .map_lookup_elem = array_map_lookup_elem, + .map_update_elem = perf_event_array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_traverse_elem = perf_event_array_map_traverse_elem, }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a1b14d1..854f351 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -17,6 +17,7 @@ #include #include #include +#include static LIST_HEAD(bpf_map_types); @@ -64,6 +65,19 @@ void bpf_map_put(struct bpf_map *map) } } +static int bpf_map_perf_event_put(void *value) +{ + struct perf_event *event; + + event = (struct perf_event *)(*(unsigned long *)value); + if (!event) + return -EBADF; + + perf_event_release_kernel(event); + + return 0; +} + static int bpf_map_release(struct inode *inode, struct file *filp) { struct bpf_map *map = filp->private_data; @@ -74,6 +88,18 @@ static int bpf_map_release(struct inode *inode, struct file *filp) */ bpf_prog_array_map_clear(map); + if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + if (!map->ops->map_traverse_elem) + return -EPERM; + + rcu_read_lock(); + if (map->ops->map_traverse_elem(bpf_map_perf_event_put, map) < 0) { + rcu_read_unlock(); + return -EINVAL; + } + rcu_read_unlock(); + } + bpf_map_put(map); return 0; } diff --git a/kernel/events/core.c b/kernel/events/core.c index d3dae34..14a9924 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8574,6 +8574,32 @@ void perf_event_delayed_put(struct task_struct *task) WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); } +struct perf_event *perf_event_get(unsigned int fd) +{ + struct perf_event *event; + struct fd f; + + f = fdget(fd); + + if (!f.file) + return ERR_PTR(-EBADF); + + if (f.file->f_op != &perf_fops) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + event = f.file->private_data; + + if (!atomic_long_inc_not_zero(&event->refcount)) { + fdput(f); + return ERR_PTR(-ENOENT); + } + + fdput(f); + return event; +} + /* * inherit a event from parent task to child task: */ -- 1.8.3.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/