From: He Kuang <hekuang@huawei.com>
To: <rostedt@goodmis.org>, <ast@plumgrid.com>,
        <masami.hiramatsu.pt@hitachi.com>, <acme@kernel.org>,
        <a.p.zijlstra@chello.nl>, <mingo@redhat.com>, <namhyung@kernel.org>,
        <jolsa@kernel.org>
CC: <wangnan0@huawei.com>, <pi3orama@163.com>, <linux-kernel@vger.kernel.org>,
        <hekuang@huawei.com>
Subject: [RFC PATCH v5 3/3] bpf: Introduce function for outputing trace event data
Date: Tue, 14 Jul 2015 01:59:31 +0000
Message-ID: <1436839171-31527-4-git-send-email-hekuang@huawei.com>
In-Reply-To: <1436839171-31527-1-git-send-email-hekuang@huawei.com>
References: <1436839171-31527-1-git-send-email-hekuang@huawei.com>
MIME-Version: 1.0
Content-Type: text/plain
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 4767
Lines: 158

There're scenarios that we need an eBPF program to record not only
kprobe point args, but also the PMU counters, time latencies or the
number of cache misses between two probe points and other information
when the probe point is entered.

This patch adds a new trace event to establish infrastruction for bpf to
output data to perf. Userspace perf tools can detect and use this event
as using the existing tracepoint events.

New bpf trace event entry in debugfs:

     /sys/kernel/debug/tracing/events/bpf/bpf_output_data

Userspace perf tools detect the new tracepoint event as:

     bpf:bpf_output_data                          [Tracepoint event]

Data in ring-buffer of perf events added to this event will be polled
out, sample types and other attributes can be adjusted to those events
directly without touching the original kprobe events.

The bpf helper function gives eBPF program ability to output data as
perf sample event. This helper simple call the new trace event and
userspace perf tools can record the BPF ftrace event to collect those
records.

Signed-off-by: He Kuang <hekuang@huawei.com>
---
 include/trace/events/bpf.h | 30 ++++++++++++++++++++++++++++++
 include/uapi/linux/bpf.h   |  7 +++++++
 kernel/trace/bpf_trace.c   | 23 +++++++++++++++++++++++
 samples/bpf/bpf_helpers.h  |  2 ++
 4 files changed, 62 insertions(+)
 create mode 100644 include/trace/events/bpf.h

diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
new file mode 100644
index 0000000..82ace8a
--- /dev/null
+++ b/include/trace/events/bpf.h
@@ -0,0 +1,30 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bpf
+
+#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BPF_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(bpf_output_data,
+
+	TP_PROTO(void *src, int size),
+
+	TP_ARGS(src, size),
+
+	TP_STRUCT__entry(
+		__dynamic_array(u8,		buf,		size)
+	),
+
+	TP_fast_assign(
+		memcpy(__get_dynamic_array(buf), src, size);
+	),
+
+	TP_printk("%s", __print_hex(__get_dynamic_array(buf),
+				    __get_dynamic_array_len(buf)))
+);
+
+#endif /* _TRACE_BPF_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 29ef6f9..5068ab1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -249,6 +249,13 @@ enum bpf_func_id {
 	 * Return: 0 on success
 	 */
 	BPF_FUNC_get_current_comm,
+
+	/**
+	 * int bpf_output_trace_data(void *src, int size)
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_output_trace_data,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 88a041a..219f670 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -11,7 +11,10 @@
 #include <linux/filter.h>
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
+
 #include "trace.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/bpf.h>
 
 static DEFINE_PER_CPU(int, bpf_prog_active);
 
@@ -79,6 +82,24 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+static u64 bpf_output_trace_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	void *src = (void *) (long) r1;
+	int size = (int) r2;
+
+	trace_bpf_output_data(src, size);
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_output_trace_data_proto = {
+	.func		= bpf_output_trace_data,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_STACK,
+	.arg2_type	= ARG_CONST_STACK_SIZE,
+};
+
 /*
  * limited trace_printk()
  * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
@@ -169,6 +190,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_map_delete_elem_proto;
 	case BPF_FUNC_probe_read:
 		return &bpf_probe_read_proto;
+	case BPF_FUNC_output_trace_data:
+		return &bpf_output_trace_data_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_tail_call:
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index bdf1c16..0aeaebe 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -59,5 +59,7 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
 	(void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
 	(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_output_trace_data)(void *src, int size) =
+	(void *) BPF_FUNC_output_trace_data;
 
 #endif
-- 
1.8.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/