Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933118AbbGUPPe (ORCPT ); Tue, 21 Jul 2015 11:15:34 -0400 Received: from mail-pd0-f181.google.com ([209.85.192.181]:33226 "EHLO mail-pd0-f181.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933100AbbGUPPb (ORCPT ); Tue, 21 Jul 2015 11:15:31 -0400 Date: Wed, 22 Jul 2015 00:13:09 +0900 From: Namhyung Kim To: He Kuang Cc: rostedt@goodmis.org, ast@plumgrid.com, masami.hiramatsu.pt@hitachi.com, acme@kernel.org, a.p.zijlstra@chello.nl, mingo@redhat.com, jolsa@kernel.org, wangnan0@huawei.com, pi3orama@163.com, linux-kernel@vger.kernel.org Subject: Re: [RFC PATCH v6 2/2] bpf: Introduce function for outputing data to perf event Message-ID: <20150721151309.GC10689@danjae.kornet> References: <1437448130-134621-1-git-send-email-hekuang@huawei.com> <1437448130-134621-3-git-send-email-hekuang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <1437448130-134621-3-git-send-email-hekuang@huawei.com> User-Agent: Mutt/1.5.23+89 (0255b37be491) (2014-03-12) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5246 Lines: 167 On Tue, Jul 21, 2015 at 03:08:50AM +0000, He Kuang wrote: > There're scenarios that we need an eBPF program to record not only > kprobe point args, but also the PMU counters, time latencies or the > number of cache misses between two probe points and other information > when the probe point is entered. > > This patch adds a new trace event to establish infrastruction for bpf to > output data to perf. Userspace perf tools can detect and use this event > as using the existing tracepoint events. > > New bpf trace event entry in debugfs: > > /sys/kernel/debug/tracing/events/bpf/bpf_output_data > > Userspace perf tools detect the new tracepoint event as: > > bpf:bpf_output_data [Tracepoint event] > > Data in ring-buffer of perf events added to this event will be polled > out, sample types and other attributes can be adjusted to those events > directly without touching the original kprobe events. > > The bpf helper function gives eBPF program ability to output data as > perf sample event. This helper simple call the new trace event and > userspace perf tools can record the BPF ftrace event to collect those > records. > > Signed-off-by: He Kuang > Acked-by: Alexei Starovoitov Acked-by: Namhyung Kim Thanks, Namhyung > --- > include/trace/events/bpf.h | 30 ++++++++++++++++++++++++++++++ > include/uapi/linux/bpf.h | 7 +++++++ > kernel/trace/bpf_trace.c | 23 +++++++++++++++++++++++ > samples/bpf/bpf_helpers.h | 2 ++ > 4 files changed, 62 insertions(+) > create mode 100644 include/trace/events/bpf.h > > diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h > new file mode 100644 > index 0000000..6b739b8 > --- /dev/null > +++ b/include/trace/events/bpf.h > @@ -0,0 +1,30 @@ > +#undef TRACE_SYSTEM > +#define TRACE_SYSTEM bpf > + > +#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ) > +#define _TRACE_BPF_H > + > +#include > + > +TRACE_EVENT(bpf_output_data, > + > + TP_PROTO(u64 *src, int size), > + > + TP_ARGS(src, size), > + > + TP_STRUCT__entry( > + __dynamic_array(u8, buf, size) > + ), > + > + TP_fast_assign( > + memcpy(__get_dynamic_array(buf), src, size); > + ), > + > + TP_printk("%s", __print_hex(__get_dynamic_array(buf), > + __get_dynamic_array_len(buf))) > +); > + > +#endif /* _TRACE_BPF_H */ > + > +/* This part must be outside protection */ > +#include > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 29ef6f9..5068ab1 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -249,6 +249,13 @@ enum bpf_func_id { > * Return: 0 on success > */ > BPF_FUNC_get_current_comm, > + > + /** > + * int bpf_output_trace_data(void *src, int size) > + * Return: 0 on success > + */ > + BPF_FUNC_output_trace_data, > + > __BPF_FUNC_MAX_ID, > }; > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 88a041a..219f670 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -11,7 +11,10 @@ > #include > #include > #include > + > #include "trace.h" > +#define CREATE_TRACE_POINTS > +#include > > static DEFINE_PER_CPU(int, bpf_prog_active); > > @@ -79,6 +82,24 @@ static const struct bpf_func_proto bpf_probe_read_proto = { > .arg3_type = ARG_ANYTHING, > }; > > +static u64 bpf_output_trace_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) > +{ > + void *src = (void *) (long) r1; > + int size = (int) r2; > + > + trace_bpf_output_data(src, size); > + > + return 0; > +} > + > +static const struct bpf_func_proto bpf_output_trace_data_proto = { > + .func = bpf_output_trace_data, > + .gpl_only = true, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_STACK, > + .arg2_type = ARG_CONST_STACK_SIZE, > +}; > + > /* > * limited trace_printk() > * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed > @@ -169,6 +190,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func > return &bpf_map_delete_elem_proto; > case BPF_FUNC_probe_read: > return &bpf_probe_read_proto; > + case BPF_FUNC_output_trace_data: > + return &bpf_output_trace_data_proto; > case BPF_FUNC_ktime_get_ns: > return &bpf_ktime_get_ns_proto; > case BPF_FUNC_tail_call: > diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h > index bdf1c16..0aeaebe 100644 > --- a/samples/bpf/bpf_helpers.h > +++ b/samples/bpf/bpf_helpers.h > @@ -59,5 +59,7 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag > (void *) BPF_FUNC_l3_csum_replace; > static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = > (void *) BPF_FUNC_l4_csum_replace; > +static int (*bpf_output_trace_data)(void *src, int size) = > + (void *) BPF_FUNC_output_trace_data; > > #endif > -- > 1.8.5.2 > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/