Date: Wed, 22 Jul 2015 00:13:09 +0900
From: Namhyung Kim <namhyung@kernel.org>
To: He Kuang <hekuang@huawei.com>
Cc: rostedt@goodmis.org, ast@plumgrid.com, masami.hiramatsu.pt@hitachi.com,
        acme@kernel.org, a.p.zijlstra@chello.nl, mingo@redhat.com,
        jolsa@kernel.org, wangnan0@huawei.com, pi3orama@163.com,
        linux-kernel@vger.kernel.org
Subject: Re: [RFC PATCH v6 2/2] bpf: Introduce function for outputing data to
 perf event
Message-ID: <20150721151309.GC10689@danjae.kornet>
References: <1437448130-134621-1-git-send-email-hekuang@huawei.com>
 <1437448130-134621-3-git-send-email-hekuang@huawei.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Disposition: inline
In-Reply-To: <1437448130-134621-3-git-send-email-hekuang@huawei.com>
User-Agent: Mutt/1.5.23+89 (0255b37be491) (2014-03-12)
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 5246
Lines: 167

On Tue, Jul 21, 2015 at 03:08:50AM +0000, He Kuang wrote:
> There're scenarios that we need an eBPF program to record not only
> kprobe point args, but also the PMU counters, time latencies or the
> number of cache misses between two probe points and other information
> when the probe point is entered.
> 
> This patch adds a new trace event to establish infrastruction for bpf to
> output data to perf. Userspace perf tools can detect and use this event
> as using the existing tracepoint events.
> 
> New bpf trace event entry in debugfs:
> 
>      /sys/kernel/debug/tracing/events/bpf/bpf_output_data
> 
> Userspace perf tools detect the new tracepoint event as:
> 
>      bpf:bpf_output_data                          [Tracepoint event]
> 
> Data in ring-buffer of perf events added to this event will be polled
> out, sample types and other attributes can be adjusted to those events
> directly without touching the original kprobe events.
> 
> The bpf helper function gives eBPF program ability to output data as
> perf sample event. This helper simple call the new trace event and
> userspace perf tools can record the BPF ftrace event to collect those
> records.
> 
> Signed-off-by: He Kuang <hekuang@huawei.com>
> Acked-by: Alexei Starovoitov <ast@plumgrid.com>

Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks,
Namhyung


> ---
>  include/trace/events/bpf.h | 30 ++++++++++++++++++++++++++++++
>  include/uapi/linux/bpf.h   |  7 +++++++
>  kernel/trace/bpf_trace.c   | 23 +++++++++++++++++++++++
>  samples/bpf/bpf_helpers.h  |  2 ++
>  4 files changed, 62 insertions(+)
>  create mode 100644 include/trace/events/bpf.h
> 
> diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
> new file mode 100644
> index 0000000..6b739b8
> --- /dev/null
> +++ b/include/trace/events/bpf.h
> @@ -0,0 +1,30 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM bpf
> +
> +#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_BPF_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(bpf_output_data,
> +
> +	TP_PROTO(u64 *src, int size),
> +
> +	TP_ARGS(src, size),
> +
> +	TP_STRUCT__entry(
> +		__dynamic_array(u8,		buf,		size)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__get_dynamic_array(buf), src, size);
> +	),
> +
> +	TP_printk("%s", __print_hex(__get_dynamic_array(buf),
> +				    __get_dynamic_array_len(buf)))
> +);
> +
> +#endif /* _TRACE_BPF_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 29ef6f9..5068ab1 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -249,6 +249,13 @@ enum bpf_func_id {
>  	 * Return: 0 on success
>  	 */
>  	BPF_FUNC_get_current_comm,
> +
> +	/**
> +	 * int bpf_output_trace_data(void *src, int size)
> +	 * Return: 0 on success
> +	 */
> +	BPF_FUNC_output_trace_data,
> +
>  	__BPF_FUNC_MAX_ID,
>  };
>  
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 88a041a..219f670 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -11,7 +11,10 @@
>  #include <linux/filter.h>
>  #include <linux/uaccess.h>
>  #include <linux/ctype.h>
> +
>  #include "trace.h"
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/bpf.h>
>  
>  static DEFINE_PER_CPU(int, bpf_prog_active);
>  
> @@ -79,6 +82,24 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
>  	.arg3_type	= ARG_ANYTHING,
>  };
>  
> +static u64 bpf_output_trace_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
> +{
> +	void *src = (void *) (long) r1;
> +	int size = (int) r2;
> +
> +	trace_bpf_output_data(src, size);
> +
> +	return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_output_trace_data_proto = {
> +	.func		= bpf_output_trace_data,
> +	.gpl_only	= true,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_STACK,
> +	.arg2_type	= ARG_CONST_STACK_SIZE,
> +};
> +
>  /*
>   * limited trace_printk()
>   * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
> @@ -169,6 +190,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
>  		return &bpf_map_delete_elem_proto;
>  	case BPF_FUNC_probe_read:
>  		return &bpf_probe_read_proto;
> +	case BPF_FUNC_output_trace_data:
> +		return &bpf_output_trace_data_proto;
>  	case BPF_FUNC_ktime_get_ns:
>  		return &bpf_ktime_get_ns_proto;
>  	case BPF_FUNC_tail_call:
> diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
> index bdf1c16..0aeaebe 100644
> --- a/samples/bpf/bpf_helpers.h
> +++ b/samples/bpf/bpf_helpers.h
> @@ -59,5 +59,7 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
>  	(void *) BPF_FUNC_l3_csum_replace;
>  static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
>  	(void *) BPF_FUNC_l4_csum_replace;
> +static int (*bpf_output_trace_data)(void *src, int size) =
> +	(void *) BPF_FUNC_output_trace_data;
>  
>  #endif
> -- 
> 1.8.5.2
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/