This 3 patches demonstrate how to attach BPF programs on uprobe events.
They are based on '[RFC PATCH v8 00/49] perf tools: filtering events
using eBPF programs'.
The first patch fix a bug in perf probe.
The second patch is a kernel side patch, which minimic procedures for kprobe
events processing on uprobe events.
The final patch improve BPF object section name to allow perf probing at
uprobe string.
With this 3 patches perf now can connect user space events and kernel
space events with eBPF maps. I believe some useful things could be done.
Wang Nan (3):
perf probe: Init symbol as kprobe if any pev is kprobe
tracing, perf: Implement BPF programs attached to uprobes
perf tools: Support attach BPF program on uprobe events
include/linux/ftrace_event.h | 5 +++
kernel/events/core.c | 4 +--
kernel/trace/trace_uprobe.c | 5 +++
tools/perf/util/bpf-loader.c | 78 +++++++++++++++++++++++++++++++++++++++----
tools/perf/util/probe-event.c | 15 ++++++++-
5 files changed, 97 insertions(+), 10 deletions(-)
--
1.8.3.4
Before this patch, add_perf_probe_events() init symbol maps only for
uprobe if the first pev passed to it is a uprobe event. However, with
the incoming BPF uprobe support, now it will be possible to pass an
array with combined kprobe and uprobe events to add_perf_probe_events().
This patch check all pevs instead of the first one, and init kernel
symbol if any events is not uprobe.
Signed-off-by: Wang Nan <[email protected]>
---
tools/perf/util/probe-event.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index b386d2f..a2b3026 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2802,8 +2802,21 @@ int cleanup_perf_probe_event(struct perf_probe_event *pev)
int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, bool cleanup)
{
int i, ret;
+ bool user_only = true;
- ret = init_symbol_maps(pevs->uprobes);
+ /* If any pev is kprobe, init kernel symbols. */
+ for (i = 0; i < npevs; i++) {
+ if (!pevs[i].uprobes) {
+ user_only = false;
+ break;
+ }
+ }
+
+ /*
+ * Compiler can drop user_only:
+ * ret = init_symbol_maps(i >= npevs);
+ */
+ ret = init_symbol_maps(user_only);
if (ret < 0)
return ret;
--
1.8.3.4
By copying BPF related operation to uprobe processing path, this patch
allow users attach BPF programs to uprobes like what they are already
doing on kprobes.
After this patch, users are allowed to use PERF_EVENT_IOC_SET_BPF on a
uprobe perf event. Which make it possible to profile user space programs
and kernel events together using BPF.
Signed-off-by: Wang Nan <[email protected]>
---
include/linux/ftrace_event.h | 5 +++++
kernel/events/core.c | 4 ++--
kernel/trace/trace_uprobe.c | 5 +++++
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index f9ecf63..eec6f27 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -254,6 +254,7 @@ enum {
TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
TRACE_EVENT_FL_TRACEPOINT_BIT,
TRACE_EVENT_FL_KPROBE_BIT,
+ TRACE_EVENT_FL_UPROBE_BIT,
};
/*
@@ -268,6 +269,7 @@ enum {
* USE_CALL_FILTER - For ftrace internal events, don't use file filter
* TRACEPOINT - Event is a tracepoint
* KPROBE - Event is a kprobe
+ * UPROBE - Event is a uprobe
*/
enum {
TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -278,8 +280,11 @@ enum {
TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT),
+ TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT),
};
+#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
+
struct ftrace_event_call {
struct list_head list;
struct ftrace_event_class *class;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9e0773d..18835de 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6775,8 +6775,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
if (event->tp_event->prog)
return -EEXIST;
- if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
- /* bpf programs can only be attached to kprobes */
+ if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
+ /* bpf programs can only be attached to u/kprobes */
return -EINVAL;
prog = bpf_prog_get(prog_fd);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 6dd022c..0b580bd 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1095,11 +1095,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
{
struct ftrace_event_call *call = &tu->tp.call;
struct uprobe_trace_entry_head *entry;
+ struct bpf_prog *prog = call->prog;
struct hlist_head *head;
void *data;
int size, esize;
int rctx;
+ if (prog && !trace_call_bpf(prog, regs))
+ return;
+
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
size = esize + tu->tp.size + dsize;
@@ -1289,6 +1293,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
return -ENODEV;
}
+ call->flags = TRACE_EVENT_FL_UPROBE;
call->class->reg = trace_uprobe_register;
call->data = tu;
ret = trace_add_event_call(call);
--
1.8.3.4
This patch append new syntax to BPF object section name to support
probing at uprobe event. Now we can use BPF program like this:
SEC(
"target:/lib64/libc.so.6\n"
"libcwrite=__write"
)
int libcwrite(void *ctx)
{
return 1;
}
Where, in section name of a program, before the main config string,
we can use 'key:value' style options. Now the only option key "target"
is for uprobe probing.
Signed-off-by: Wang Nan <[email protected]>
---
tools/perf/util/bpf-loader.c | 78 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 71 insertions(+), 7 deletions(-)
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 55843d8..2898f9c 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -73,6 +73,74 @@ bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
}
static int
+do_config(const char *key, const char *value,
+ struct perf_probe_event *pev)
+{
+ pr_debug("config bpf program: %s:%s\n", key, value);
+ if (strcmp(key, "target") == 0) {
+ pev->uprobes = true;
+ pev->target = strdup(value);
+ }
+
+ return 0;
+}
+
+static const char *
+parse_config_kvpair(const char *config_str, struct perf_probe_event *pev)
+{
+ char *text = strdup(config_str);
+ char *sep, *line;
+ const char *main_str = NULL;
+ int err = 0;
+
+ if (!text) {
+ pr_err("No enough memory: dup config_str failed\n");
+ return NULL;
+ }
+
+ line = text;
+ while ((sep = strchr(line, '\n'))) {
+ char *semi;
+
+ *sep = '\0';
+ semi = strchr(line, ':');
+ if (!semi)
+ goto nextline;
+ *semi = '\0';
+ if ((err = do_config(line, semi + 1, pev)))
+ break;
+nextline:
+ line = sep + 1;
+ }
+
+ if (!err)
+ main_str = config_str + (line - text);
+ free(text);
+
+ return main_str;
+}
+
+static int
+parse_config_pev(const char *config_str, struct perf_probe_event *pev)
+{
+ const char *main_str;
+ int err;
+
+ main_str = parse_config_kvpair(config_str, pev);
+ if (!main_str)
+ return -EINVAL;
+
+ err = parse_perf_probe_command(main_str, pev);
+ if (err < 0) {
+ pr_err("bpf: '%s' is not a valid config string\n",
+ config_str);
+ /* parse failed, don't need clear pev. */
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int
config_bpf_program(struct bpf_program *prog)
{
struct perf_probe_event *pev = alloc_perf_probe_event();
@@ -91,13 +159,9 @@ config_bpf_program(struct bpf_program *prog)
}
pr_debug("bpf: config program '%s'\n", config_str);
- err = parse_perf_probe_command(config_str, pev);
- if (err < 0) {
- pr_err("bpf: '%s' is not a valid config string\n",
- config_str);
- /* parse failed, don't need clear pev. */
- return -EINVAL;
- }
+ err = parse_config_pev(config_str, pev);
+ if (err)
+ return err;
if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
pr_err("bpf: '%s': group for event is set and not '%s'.\n",
--
1.8.3.4
On 6/25/15 3:37 AM, Wang Nan wrote:
> By copying BPF related operation to uprobe processing path, this patch
> allow users attach BPF programs to uprobes like what they are already
> doing on kprobes.
>
> After this patch, users are allowed to use PERF_EVENT_IOC_SET_BPF on a
> uprobe perf event. Which make it possible to profile user space programs
> and kernel events together using BPF.
>
> Signed-off-by: Wang Nan <[email protected]>
That's great.
Acked-by: Alexei Starovoitov <[email protected]>
On 6/25/15 3:37 AM, Wang Nan wrote:
> This patch append new syntax to BPF object section name to support
> probing at uprobe event. Now we can use BPF program like this:
>
> SEC(
> "target:/lib64/libc.so.6\n"
> "libcwrite=__write"
> )
> int libcwrite(void *ctx)
> {
> return 1;
> }
>
> Where, in section name of a program, before the main config string,
> we can use 'key:value' style options. Now the only option key "target"
> is for uprobe probing.
>
> Signed-off-by: Wang Nan<[email protected]>
looks good.
why use ':' separator? '=' should work as well and will be more
consistent ?
On 6/25/15 3:37 AM, Wang Nan wrote:
> Before this patch, add_perf_probe_events() init symbol maps only for
> uprobe if the first pev passed to it is a uprobe event. However, with
> the incoming BPF uprobe support, now it will be possible to pass an
> array with combined kprobe and uprobe events to add_perf_probe_events().
>
> This patch check all pevs instead of the first one, and init kernel
> symbol if any events is not uprobe.
>
> Signed-off-by: Wang Nan<[email protected]>
same as other patches: please decipher 'pev'
On 2015/6/26 16:47, Alexei Starovoitov wrote:
> On 6/25/15 3:37 AM, Wang Nan wrote:
>> This patch append new syntax to BPF object section name to support
>> probing at uprobe event. Now we can use BPF program like this:
>>
>> SEC(
>> "target:/lib64/libc.so.6\n"
>> "libcwrite=__write"
>> )
>> int libcwrite(void *ctx)
>> {
>> return 1;
>> }
>>
>> Where, in section name of a program, before the main config string,
>> we can use 'key:value' style options. Now the only option key "target"
>> is for uprobe probing.
>>
>> Signed-off-by: Wang Nan<[email protected]>
>
> looks good.
> why use ':' separator? '=' should work as well and will be more
> consistent ?
Will do.
Thank you.
On 2015/06/25 19:37, Wang Nan wrote:
> Before this patch, add_perf_probe_events() init symbol maps only for
> uprobe if the first pev passed to it is a uprobe event. However, with
> the incoming BPF uprobe support, now it will be possible to pass an
> array with combined kprobe and uprobe events to add_perf_probe_events().
This description is not correct. Actually, add_perf_probe_events already
supports mix of uprobes and kprobes. However, from the command line syntax
constrains the first elements of the probe_event arrays must be kprobes.
So, if the array starts with uprobes, no kprobes should be there.
>
> This patch check all pevs instead of the first one, and init kernel
> symbol if any events is not uprobe.
Anyway, I prefer to call init_symbol_maps() with "false" :)
Thank you,
>
> Signed-off-by: Wang Nan <[email protected]>
> ---
> tools/perf/util/probe-event.c | 15 ++++++++++++++-
> 1 file changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
> index b386d2f..a2b3026 100644
> --- a/tools/perf/util/probe-event.c
> +++ b/tools/perf/util/probe-event.c
> @@ -2802,8 +2802,21 @@ int cleanup_perf_probe_event(struct perf_probe_event *pev)
> int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, bool cleanup)
> {
> int i, ret;
> + bool user_only = true;
>
> - ret = init_symbol_maps(pevs->uprobes);
> + /* If any pev is kprobe, init kernel symbols. */
> + for (i = 0; i < npevs; i++) {
> + if (!pevs[i].uprobes) {
> + user_only = false;
> + break;
> + }
> + }
> +
> + /*
> + * Compiler can drop user_only:
> + * ret = init_symbol_maps(i >= npevs);
> + */
> + ret = init_symbol_maps(user_only);
> if (ret < 0)
> return ret;
>
>
--
Masami HIRAMATSU
Linux Technology Research Center, System Productivity Research Dept.
Center for Technology Innovation - Systems Engineering
Hitachi, Ltd., Research & Development Group
E-mail: [email protected]
On 2015/6/27 15:29, Masami Hiramatsu wrote:
> On 2015/06/25 19:37, Wang Nan wrote:
>> Before this patch, add_perf_probe_events() init symbol maps only for
>> uprobe if the first pev passed to it is a uprobe event. However, with
>> the incoming BPF uprobe support, now it will be possible to pass an
>> array with combined kprobe and uprobe events to add_perf_probe_events().
> This description is not correct. Actually, add_perf_probe_events already
> supports mix of uprobes and kprobes. However, from the command line syntax
> constrains the first elements of the probe_event arrays must be kprobes.
> So, if the array starts with uprobes, no kprobes should be there.
>
>> This patch check all pevs instead of the first one, and init kernel
>> symbol if any events is not uprobe.
> Anyway, I prefer to call init_symbol_maps() with "false" :)
I also prefer "false", so I try to find whether all events are uprobe
instead
of init_symbol_maps(true).
So for this patch only commit message needs to be corrected, the code is
no problem, right?
Thank you.
> Thank you,
>
>> Signed-off-by: Wang Nan <[email protected]>
>> ---
>> tools/perf/util/probe-event.c | 15 ++++++++++++++-
>> 1 file changed, 14 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
>> index b386d2f..a2b3026 100644
>> --- a/tools/perf/util/probe-event.c
>> +++ b/tools/perf/util/probe-event.c
>> @@ -2802,8 +2802,21 @@ int cleanup_perf_probe_event(struct perf_probe_event *pev)
>> int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, bool cleanup)
>> {
>> int i, ret;
>> + bool user_only = true;
>>
>> - ret = init_symbol_maps(pevs->uprobes);
>> + /* If any pev is kprobe, init kernel symbols. */
>> + for (i = 0; i < npevs; i++) {
>> + if (!pevs[i].uprobes) {
>> + user_only = false;
>> + break;
>> + }
>> + }
>> +
>> + /*
>> + * Compiler can drop user_only:
>> + * ret = init_symbol_maps(i >= npevs);
>> + */
>> + ret = init_symbol_maps(user_only);
>> if (ret < 0)
>> return ret;
>>
>>
>
Hi Wang,
On 2015/06/27 16:34, Wangnan (F) wrote:
>
>
> On 2015/6/27 15:29, Masami Hiramatsu wrote:
>> On 2015/06/25 19:37, Wang Nan wrote:
>>> Before this patch, add_perf_probe_events() init symbol maps only for
>>> uprobe if the first pev passed to it is a uprobe event. However, with
>>> the incoming BPF uprobe support, now it will be possible to pass an
>>> array with combined kprobe and uprobe events to add_perf_probe_events().
>> This description is not correct. Actually, add_perf_probe_events already
>> supports mix of uprobes and kprobes. However, from the command line syntax
>> constrains the first elements of the probe_event arrays must be kprobes.
>> So, if the array starts with uprobes, no kprobes should be there.
>>
>>> This patch check all pevs instead of the first one, and init kernel
>>> symbol if any events is not uprobe.
>> Anyway, I prefer to call init_symbol_maps() with "false" :)
>
> I also prefer "false", so I try to find whether all events are uprobe
> instead
> of init_symbol_maps(true).
>
> So for this patch only commit message needs to be corrected, the code is
> no problem, right?
>
No, I meant you just need to change one line, as below.
- ret = init_symbol_maps(pevs->uprobes);
+ ret = init_symbol_maps(false);
That's enough, I don't like to introduce a bool flag and loop.
Thanks,
> Thank you.
>
>> Thank you,
>>
>>> Signed-off-by: Wang Nan <[email protected]>
>>> ---
>>> tools/perf/util/probe-event.c | 15 ++++++++++++++-
>>> 1 file changed, 14 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
>>> index b386d2f..a2b3026 100644
>>> --- a/tools/perf/util/probe-event.c
>>> +++ b/tools/perf/util/probe-event.c
>>> @@ -2802,8 +2802,21 @@ int cleanup_perf_probe_event(struct perf_probe_event *pev)
>>> int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, bool cleanup)
>>> {
>>> int i, ret;
>>> + bool user_only = true;
>>>
>>> - ret = init_symbol_maps(pevs->uprobes);
>>> + /* If any pev is kprobe, init kernel symbols. */
>>> + for (i = 0; i < npevs; i++) {
>>> + if (!pevs[i].uprobes) {
>>> + user_only = false;
>>> + break;
>>> + }
>>> + }
>>> +
>>> + /*
>>> + * Compiler can drop user_only:
>>> + * ret = init_symbol_maps(i >= npevs);
>>> + */
>>> + ret = init_symbol_maps(user_only);
>>> if (ret < 0)
>>> return ret;
>>>
>>>
>>
>
>
>
--
Masami HIRAMATSU
Linux Technology Research Center, System Productivity Research Dept.
Center for Technology Innovation - Systems Engineering
Hitachi, Ltd., Research & Development Group
E-mail: [email protected]
On 2015/6/27 16:30, Masami Hiramatsu wrote:
> Hi Wang,
>
> On 2015/06/27 16:34, Wangnan (F) wrote:
>>
>> On 2015/6/27 15:29, Masami Hiramatsu wrote:
>>> On 2015/06/25 19:37, Wang Nan wrote:
>>>> Before this patch, add_perf_probe_events() init symbol maps only for
>>>> uprobe if the first pev passed to it is a uprobe event. However, with
>>>> the incoming BPF uprobe support, now it will be possible to pass an
>>>> array with combined kprobe and uprobe events to add_perf_probe_events().
>>> This description is not correct. Actually, add_perf_probe_events already
>>> supports mix of uprobes and kprobes. However, from the command line syntax
>>> constrains the first elements of the probe_event arrays must be kprobes.
>>> So, if the array starts with uprobes, no kprobes should be there.
>>>
>>>> This patch check all pevs instead of the first one, and init kernel
>>>> symbol if any events is not uprobe.
>>> Anyway, I prefer to call init_symbol_maps() with "false" :)
>> I also prefer "false", so I try to find whether all events are uprobe
>> instead
>> of init_symbol_maps(true).
>>
>> So for this patch only commit message needs to be corrected, the code is
>> no problem, right?
>>
> No, I meant you just need to change one line, as below.
>
> - ret = init_symbol_maps(pevs->uprobes);
> + ret = init_symbol_maps(false);
>
> That's enough, I don't like to introduce a bool flag and loop.
>
> Thanks,
>
Then basic structure of kernel map will be build even if all probe
points are
uprobes. It costs some memory. But sure, we can skip the loop.
Will post an updated version.
Thank you.
>
>> Thank you.
>>
>>> Thank you,
>>>
>>>> Signed-off-by: Wang Nan <[email protected]>
>>>> ---
>>>> tools/perf/util/probe-event.c | 15 ++++++++++++++-
>>>> 1 file changed, 14 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
>>>> index b386d2f..a2b3026 100644
>>>> --- a/tools/perf/util/probe-event.c
>>>> +++ b/tools/perf/util/probe-event.c
>>>> @@ -2802,8 +2802,21 @@ int cleanup_perf_probe_event(struct perf_probe_event *pev)
>>>> int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, bool cleanup)
>>>> {
>>>> int i, ret;
>>>> + bool user_only = true;
>>>>
>>>> - ret = init_symbol_maps(pevs->uprobes);
>>>> + /* If any pev is kprobe, init kernel symbols. */
>>>> + for (i = 0; i < npevs; i++) {
>>>> + if (!pevs[i].uprobes) {
>>>> + user_only = false;
>>>> + break;
>>>> + }
>>>> + }
>>>> +
>>>> + /*
>>>> + * Compiler can drop user_only:
>>>> + * ret = init_symbol_maps(i >= npevs);
>>>> + */
>>>> + ret = init_symbol_maps(user_only);
>>>> if (ret < 0)
>>>> return ret;
>>>>
>>>>
>>
>>
>