Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754596AbbGJKEb (ORCPT ); Fri, 10 Jul 2015 06:04:31 -0400 Received: from szxga03-in.huawei.com ([119.145.14.66]:57095 "EHLO szxga03-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754426AbbGJKEP (ORCPT ); Fri, 10 Jul 2015 06:04:15 -0400 From: He Kuang To: , , , , , , , CC: , , , Subject: [RFC PATCH v4 3/3] bpf: Introduce function for outputing data to perf event Date: Fri, 10 Jul 2015 10:03:07 +0000 Message-ID: <1436522587-136825-4-git-send-email-hekuang@huawei.com> X-Mailer: git-send-email 1.8.3.4 In-Reply-To: <1436522587-136825-1-git-send-email-hekuang@huawei.com> References: <1436522587-136825-1-git-send-email-hekuang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.107.197.210] X-CFilter-Loop: Reflected X-Mirapoint-Virus-RAPID-Raw: score=unknown(0), refid=str=0001.0A020203.559F9888.000E,ss=1,re=0.000,recu=0.000,reip=0.000,cl=1,cld=1,fgs=0, ip=0.0.0.0, so=2013-05-26 15:14:31, dmn=2013-03-21 17:37:32 X-Mirapoint-Loop-Id: 80bcb7550a6159f22bce495008776f2d Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4798 Lines: 158 There're scenarios that we need an eBPF program to record not only kprobe point args, but also the PMU counters, time latencies or the number of cache misses between two probe points and other information when the probe point is entered. This patch adds a new trace event to establish infrastruction for bpf to output data to perf. Userspace perf tools can detect and use this event as using the existing tracepoint events. New bpf trace event entry in debugfs: /sys/kernel/debug/tracing/events/bpf/bpf_output_data Userspace perf tools detect the new tracepoint event as: bpf:bpf_output_data [Tracepoint event] Data in ring-buffer of perf events added to this event will be polled out, sample types and other attributes can be adjusted to those events directly without touching the original kprobe events. The bpf helper function gives eBPF program ability to output data as perf sample event. This helper simple call the new trace event and userspace perf tools can record the BPF ftrace event to collect those records. Signed-off-by: He Kuang --- include/trace/events/bpf.h | 30 ++++++++++++++++++++++++++++++ include/uapi/linux/bpf.h | 7 +++++++ kernel/trace/bpf_trace.c | 23 +++++++++++++++++++++++ samples/bpf/bpf_helpers.h | 2 ++ 4 files changed, 62 insertions(+) create mode 100644 include/trace/events/bpf.h diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h new file mode 100644 index 0000000..a659a91 --- /dev/null +++ b/include/trace/events/bpf.h @@ -0,0 +1,30 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf + +#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BPF_H + +#include + +TRACE_EVENT(bpf_output_data, + + TP_PROTO(u64 *src, int len), + + TP_ARGS(src, len), + + TP_STRUCT__entry( + __dynamic_array(u64, buf, len) + ), + + TP_fast_assign( + memcpy(__get_dynamic_array(buf), src, len * sizeof(u64)); + ), + + TP_printk("%s", __print_array(__get_dynamic_array(buf), + __get_dynamic_array_len(buf), 8)) +); + +#endif /* _TRACE_BPF_H */ + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 29ef6f9..5068ab1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -249,6 +249,13 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_get_current_comm, + + /** + * int bpf_output_trace_data(void *src, int size) + * Return: 0 on success + */ + BPF_FUNC_output_trace_data, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 88a041a..31fc31a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -11,7 +11,10 @@ #include #include #include + #include "trace.h" +#define CREATE_TRACE_POINTS +#include static DEFINE_PER_CPU(int, bpf_prog_active); @@ -79,6 +82,24 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; +static u64 bpf_output_trace_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + void *src = (void *) (long) r1; + int size = (int) r2; + + trace_bpf_output_data(src, size / sizeof(u64)); + + return 0; +} + +static const struct bpf_func_proto bpf_output_trace_data_proto = { + .func = bpf_output_trace_data, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, +}; + /* * limited trace_printk() * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed @@ -169,6 +190,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_map_delete_elem_proto; case BPF_FUNC_probe_read: return &bpf_probe_read_proto; + case BPF_FUNC_output_trace_data: + return &bpf_output_trace_data_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_tail_call: diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index bdf1c16..0aeaebe 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -59,5 +59,7 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_output_trace_data)(void *src, int size) = + (void *) BPF_FUNC_output_trace_data; #endif -- 1.8.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/