Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753193AbbGNCBQ (ORCPT ); Mon, 13 Jul 2015 22:01:16 -0400 Received: from szxga02-in.huawei.com ([119.145.14.65]:12347 "EHLO szxga02-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751741AbbGNCAF (ORCPT ); Mon, 13 Jul 2015 22:00:05 -0400 From: He Kuang To: , , , , , , , CC: , , , Subject: [RFC PATCH v5 3/3] bpf: Introduce function for outputing trace event data Date: Tue, 14 Jul 2015 01:59:31 +0000 Message-ID: <1436839171-31527-4-git-send-email-hekuang@huawei.com> X-Mailer: git-send-email 1.8.3.4 In-Reply-To: <1436839171-31527-1-git-send-email-hekuang@huawei.com> References: <1436839171-31527-1-git-send-email-hekuang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.107.197.210] X-CFilter-Loop: Reflected Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4767 Lines: 158 There're scenarios that we need an eBPF program to record not only kprobe point args, but also the PMU counters, time latencies or the number of cache misses between two probe points and other information when the probe point is entered. This patch adds a new trace event to establish infrastruction for bpf to output data to perf. Userspace perf tools can detect and use this event as using the existing tracepoint events. New bpf trace event entry in debugfs: /sys/kernel/debug/tracing/events/bpf/bpf_output_data Userspace perf tools detect the new tracepoint event as: bpf:bpf_output_data [Tracepoint event] Data in ring-buffer of perf events added to this event will be polled out, sample types and other attributes can be adjusted to those events directly without touching the original kprobe events. The bpf helper function gives eBPF program ability to output data as perf sample event. This helper simple call the new trace event and userspace perf tools can record the BPF ftrace event to collect those records. Signed-off-by: He Kuang --- include/trace/events/bpf.h | 30 ++++++++++++++++++++++++++++++ include/uapi/linux/bpf.h | 7 +++++++ kernel/trace/bpf_trace.c | 23 +++++++++++++++++++++++ samples/bpf/bpf_helpers.h | 2 ++ 4 files changed, 62 insertions(+) create mode 100644 include/trace/events/bpf.h diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h new file mode 100644 index 0000000..82ace8a --- /dev/null +++ b/include/trace/events/bpf.h @@ -0,0 +1,30 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf + +#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BPF_H + +#include + +TRACE_EVENT(bpf_output_data, + + TP_PROTO(void *src, int size), + + TP_ARGS(src, size), + + TP_STRUCT__entry( + __dynamic_array(u8, buf, size) + ), + + TP_fast_assign( + memcpy(__get_dynamic_array(buf), src, size); + ), + + TP_printk("%s", __print_hex(__get_dynamic_array(buf), + __get_dynamic_array_len(buf))) +); + +#endif /* _TRACE_BPF_H */ + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 29ef6f9..5068ab1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -249,6 +249,13 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_get_current_comm, + + /** + * int bpf_output_trace_data(void *src, int size) + * Return: 0 on success + */ + BPF_FUNC_output_trace_data, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 88a041a..219f670 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -11,7 +11,10 @@ #include #include #include + #include "trace.h" +#define CREATE_TRACE_POINTS +#include static DEFINE_PER_CPU(int, bpf_prog_active); @@ -79,6 +82,24 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; +static u64 bpf_output_trace_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + void *src = (void *) (long) r1; + int size = (int) r2; + + trace_bpf_output_data(src, size); + + return 0; +} + +static const struct bpf_func_proto bpf_output_trace_data_proto = { + .func = bpf_output_trace_data, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, +}; + /* * limited trace_printk() * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed @@ -169,6 +190,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_map_delete_elem_proto; case BPF_FUNC_probe_read: return &bpf_probe_read_proto; + case BPF_FUNC_output_trace_data: + return &bpf_output_trace_data_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_tail_call: diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index bdf1c16..0aeaebe 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -59,5 +59,7 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_output_trace_data)(void *src, int size) = + (void *) BPF_FUNC_output_trace_data; #endif -- 1.8.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/