Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752326AbbGBNwF (ORCPT ); Thu, 2 Jul 2015 09:52:05 -0400 Received: from szxga02-in.huawei.com ([119.145.14.65]:29706 "EHLO szxga02-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752928AbbGBNvU (ORCPT ); Thu, 2 Jul 2015 09:51:20 -0400 From: He Kuang To: , , , , , , , CC: , , Subject: [RFC PATCH v2 3/4] bpf: Introduce function for outputing data to perf event Date: Thu, 2 Jul 2015 13:50:42 +0000 Message-ID: <1435845043-107196-4-git-send-email-hekuang@huawei.com> X-Mailer: git-send-email 1.8.3.4 In-Reply-To: <1435845043-107196-1-git-send-email-hekuang@huawei.com> References: <559386D7.1020208@huawei.com> <1435845043-107196-1-git-send-email-hekuang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.107.197.210] X-CFilter-Loop: Reflected Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7982 Lines: 230 Add function to receive data from eBPF programs and fill that into perf trace buffer of the current context. In previous patch we make sure that the recursion counter protecting perf trace buffer is checked when bpf_prog is executed, so here we can safely fill the trace buffer. The data is temporarily stored at the end of perf_trace_buf, the last 4 bytes of the buffer is used as a valid flag and contains tempory buffer length. In order to get the corresponding trace buffer of the context, new function perf_swevent_current_context_type() is added, this function only gets the current context type but does not increase the recursion count. Signed-off-by: He Kuang --- include/linux/ftrace_event.h | 2 ++ include/linux/perf_event.h | 2 ++ include/uapi/linux/bpf.h | 3 +++ kernel/events/core.c | 6 ++++++ kernel/events/internal.h | 17 ++++++++++------- kernel/trace/bpf_trace.c | 29 +++++++++++++++++++++++++++++ kernel/trace/trace_event_perf.c | 29 +++++++++++++++++++++++++++++ samples/bpf/bpf_helpers.h | 2 ++ 8 files changed, 83 insertions(+), 7 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index d54f11d..1c1f3ad 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -616,6 +616,8 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type, struct pt_regs **regs, int *rctxp); extern void *perf_trace_buf_prepare_rctx(int size, unsigned short type, struct pt_regs **regs, int rctx); +extern void *perf_trace_buf_prepare_rctx_tail(int size, int rctx); +extern void *get_perf_trace_buf(int rctx); static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a204d52..984c89c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -939,6 +939,7 @@ extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); extern unsigned int perf_output_skip(struct perf_output_handle *handle, unsigned int len); +extern int perf_swevent_current_context_type(void); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern u64 perf_swevent_set_period(struct perf_event *event); @@ -995,6 +996,7 @@ static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } +static inline int perf_swevent_current_context_type(void); { return -1; } static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } static inline u64 perf_swevent_set_period(struct perf_event *event) { return 0; } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9ebdf5..13d3e46 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -210,6 +210,9 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_l4_csum_replace, + + /* int bpf_output_data(void *src, int size) */ + BPF_FUNC_output_data, __BPF_FUNC_MAX_ID, }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 9e0773d..0224d5b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6376,6 +6376,12 @@ end: DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]); +int perf_swevent_current_context_type(void) +{ + return current_context_type(); +} +EXPORT_SYMBOL_GPL(perf_swevent_current_context_type); + int perf_swevent_get_recursion_context(void) { struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 2deb24c..5cabce5 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -175,18 +175,21 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs); extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); -static inline int get_recursion_context(int *recursion) +static inline int current_context_type(void) { - int rctx; - if (in_nmi()) - rctx = 3; + return 3; else if (in_irq()) - rctx = 2; + return 2; else if (in_softirq()) - rctx = 1; + return 1; else - rctx = 0; + return 0; +} + +static inline int get_recursion_context(int *recursion) +{ + int rctx = current_context_type(); if (recursion[rctx]) return -1; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2d56ce5..9159b5e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -79,6 +79,33 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; +static u64 bpf_output_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + void *src = (void *) (long) r1; + int size = (int) r2; + void *buf; + int rctx = perf_swevent_current_context_type(); + + if (rctx < 0) + return -EINVAL; + + buf = perf_trace_buf_prepare_rctx_tail(size, rctx); + if (!buf) + return -ENOMEM; + + memcpy(buf, src, size); + + return 0; +} + +static const struct bpf_func_proto bpf_output_data_proto = { + .func = bpf_output_data, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, +}; + static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { /* NMI safe access to clock monotonic */ @@ -170,6 +197,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_map_delete_elem_proto; case BPF_FUNC_probe_read: return &bpf_probe_read_proto; + case BPF_FUNC_output_data: + return &bpf_output_data_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 344b601..2eeb59b 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -260,6 +260,35 @@ void perf_trace_del(struct perf_event *p_event, int flags) tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); } +void *perf_trace_buf_prepare_rctx_tail(int size, int rctx) +{ + char *raw_data; + + BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); + + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, + "perf buffer not large enough")) + return NULL; + + raw_data = this_cpu_ptr(perf_trace_buf[rctx]); + raw_data += (PERF_MAX_TRACE_SIZE - sizeof(u32)); + + /* The lat 4 bytes is raw_data size and it is used as a valid flag */ + *(u32 *)raw_data = size; + raw_data -= size; + + return raw_data; +} +EXPORT_SYMBOL_GPL(perf_trace_buf_prepare_rctx_tail); +NOKPROBE_SYMBOL(perf_trace_buf_prepare_rctx_tail); + +void *get_perf_trace_buf(int rctx) +{ + return this_cpu_ptr(perf_trace_buf[rctx]); +} +EXPORT_SYMBOL_GPL(get_perf_trace_buf); +NOKPROBE_SYMBOL(get_perf_trace_buf); + static void *__perf_trace_buf_prepare(int size, unsigned short type, struct pt_regs **regs, int *rctxp, bool update_rctx) diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index f960b5f..44bfbeb 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -17,6 +17,8 @@ static int (*bpf_map_delete_elem)(void *map, void *key) = (void *) BPF_FUNC_map_delete_elem; static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = (void *) BPF_FUNC_probe_read; +static int (*bpf_output_data)(void *src, int size) = + (void *) BPF_FUNC_output_data; static unsigned long long (*bpf_ktime_get_ns)(void) = (void *) BPF_FUNC_ktime_get_ns; static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = -- 1.8.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/