This 4 patches will be appended into my next perf BPF pull request.
Without them, current perf BPF implementation has a small problem that,
if passing '--filter' after '--event test_bpf.o' event selection, the
filter won't be correctly applied.
Patch 1/4 append a new function to libbpf which allow fetching
object name from bpf_object.
Patch 2/4 add a safety check to avoid access invalid memory if the list
is empty during event parsing.
Patch 3/4 introduces a dummy evsel mechanism.
Patch 4/4 utilizes dummy evsel mechanism, make BPF event collect filter
options by dummy evsel and sync with them after real evsel generated.
The 4th patch should be applied after patch 'perf tools: Add bpf_fd
field to evsel and config it'. Patch 'perf tools: Enable passing bpf
object file to --event' should also be modified to utilize dummy evsel.
Please see my following pull request.
Thank you.
--
1.8.3.4
Before this patch there's no way to connect a loaded bpf object
to its source file. However, during applying perf's '--filter' to BPF
object, without this connection makes things harder, because perf loads
all programs together, but '--filter' setting is for each object.
API of bpf_object__open_buffer() is changed to allow passing a name.
Fortunately, at this time there's only one user of it (perf test LLVM),
so we change it together.
Signed-off-by: Wang Nan <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Brendan Gregg <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: David Ahern <[email protected]>
Cc: He Kuang <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Kaixu Xia <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
Cc: Arnaldo Carvalho de Melo <[email protected]>
---
tools/lib/bpf/libbpf.c | 25 ++++++++++++++++++++++---
tools/lib/bpf/libbpf.h | 4 +++-
tools/perf/tests/llvm.c | 2 +-
3 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4fa4bc4..4252fc2 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -880,15 +880,26 @@ struct bpf_object *bpf_object__open(const char *path)
}
struct bpf_object *bpf_object__open_buffer(void *obj_buf,
- size_t obj_buf_sz)
+ size_t obj_buf_sz,
+ const char *name)
{
+ char tmp_name[64];
+
/* param validation */
if (!obj_buf || obj_buf_sz <= 0)
return NULL;
- pr_debug("loading object from buffer\n");
+ if (!name) {
+ snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
+ (unsigned long)obj_buf,
+ (unsigned long)obj_buf_sz);
+ tmp_name[sizeof(tmp_name) - 1] = '\0';
+ name = tmp_name;
+ }
+ pr_debug("loading object '%s' from buffer\n",
+ name);
- return __bpf_object__open("[buffer]", obj_buf, obj_buf_sz);
+ return __bpf_object__open(name, obj_buf, obj_buf_sz);
}
int bpf_object__unload(struct bpf_object *obj)
@@ -975,6 +986,14 @@ bpf_object__next(struct bpf_object *prev)
return next;
}
+const char *
+bpf_object__get_name(struct bpf_object *obj)
+{
+ if (!obj)
+ return NULL;
+ return obj->path;
+}
+
struct bpf_program *
bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
{
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index ea8adc2..f16170c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -28,12 +28,14 @@ struct bpf_object;
struct bpf_object *bpf_object__open(const char *path);
struct bpf_object *bpf_object__open_buffer(void *obj_buf,
- size_t obj_buf_sz);
+ size_t obj_buf_sz,
+ const char *name);
void bpf_object__close(struct bpf_object *object);
/* Load/unload object into/from kernel */
int bpf_object__load(struct bpf_object *obj);
int bpf_object__unload(struct bpf_object *obj);
+const char *bpf_object__get_name(struct bpf_object *obj);
struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index a337356..52d5597 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -26,7 +26,7 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
{
struct bpf_object *obj;
- obj = bpf_object__open_buffer(obj_buf, obj_buf_sz);
+ obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL);
if (!obj)
return -1;
bpf_object__close(obj);
--
1.8.3.4
If parse_events__scanner() collects no entry, perf_evlist__last(evlist)
is invalid. Then setting of cmdline_group_boundary touches invalid.
It could happend in currect BPF implementation. See [1]. Although it
can be fixed, for safety reason it whould be better to introduce this
check.
Instead of checking number of entries, check data.list instead, so we
can add dummy evsel here.
[1]: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Wang Nan <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
Cc: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/parse-events.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index d826e6f..14cd7e3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1143,10 +1143,14 @@ int parse_events(struct perf_evlist *evlist, const char *str,
int entries = data.idx - evlist->nr_entries;
struct perf_evsel *last;
+ if (!list_empty(&data.list)) {
+ last = list_entry(data.list.prev,
+ struct perf_evsel, node);
+ last->cmdline_group_boundary = true;
+ }
+
perf_evlist__splice_list_tail(evlist, &data.list, entries);
evlist->nr_groups += data.nr_groups;
- last = perf_evlist__last(evlist);
- last->cmdline_group_boundary = true;
return 0;
}
--
1.8.3.4
This patch allows linking dummy evsel onto evlist as a placeholder. It
is for following patch which allows passing BPF object using '--event
object.o'.
Doesn't link other event selectors, if passing a BPF object file to
'--event', nothing is linked onto evlist. Instead, events described in
BPF object file are probed and linked in a delayed manner because we
want do all probing work together. Therefore, evsel for events in BPF
object would be linked at the end of evlist. Which causes a small
problem that, if passing '--filter' setting after object file, the
filter option won't be correctly applied to those events.
This patch links dummy onto evlist, so following --filter can be
collected by the dummy evsel. For this reason dummy evsels are set to
PERF_TYPE_TRACEPOINT.
Due to the possibility of existance of dummy evsel,
perf_evlist__purge_dummy() must be called right after parse_options().
This patch adds it to record, top, trace and stat builtin commands.
Further patch moves it down after real BPF events are processed with.
Signed-off-by: Wang Nan <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Brendan Gregg <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: David Ahern <[email protected]>
Cc: He Kuang <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Kaixu Xia <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
---
tools/perf/builtin-record.c | 2 ++
tools/perf/builtin-stat.c | 1 +
tools/perf/builtin-top.c | 1 +
tools/perf/builtin-trace.c | 1 +
tools/perf/util/evlist.c | 19 +++++++++++++++++++
tools/perf/util/evlist.h | 1 +
tools/perf/util/evsel.c | 32 ++++++++++++++++++++++++++++++++
tools/perf/util/evsel.h | 6 ++++++
tools/perf/util/parse-events.c | 25 +++++++++++++++++++++----
9 files changed, 84 insertions(+), 4 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a660022..81829de 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1112,6 +1112,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
+ perf_evlist__purge_dummy(rec->evlist);
+
if (!argc && target__none(&rec->opts.target))
usage_with_options(record_usage, record_options);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7aa039b..99b62f1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1208,6 +1208,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
argc = parse_options(argc, argv, options, stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
+ perf_evlist__purge_dummy(evsel_list);
interval = stat_config.interval;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8c465c8..246203b 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1198,6 +1198,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
perf_config(perf_top_config, &top);
argc = parse_options(argc, argv, options, top_usage, 0);
+ perf_evlist__purge_dummy(top.evlist);
if (argc)
usage_with_options(top_usage, options);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 2f1162d..ef5fde6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3080,6 +3080,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+ perf_evlist__purge_dummy(trace.evlist);
if (trace.trace_pgfaults) {
trace.opts.sample_address = true;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e9a5d43..30fc327 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1694,3 +1694,22 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
tracking_evsel->tracking = true;
}
+
+void perf_evlist__purge_dummy(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos, *n;
+
+ /*
+ * Remove all dummy events.
+ * During linking, we don't touch anything except link
+ * it into evlist. As a result, we don't
+ * need to adjust evlist->nr_entries during removal.
+ */
+
+ evlist__for_each_safe(evlist, n, pos) {
+ if (perf_evsel__is_dummy(pos)) {
+ list_del_init(&pos->node);
+ perf_evsel__delete(pos);
+ }
+ }
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 436e358..df4820e 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -180,6 +180,7 @@ bool perf_evlist__valid_read_format(struct perf_evlist *evlist);
void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
struct list_head *list,
int nr_entries);
+void perf_evlist__purge_dummy(struct perf_evlist *evlist);
static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
{
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index b096ef7..35947f5 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -212,6 +212,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
perf_evsel__calc_id_pos(evsel);
evsel->cmdline_group_boundary = false;
+ evsel->is_dummy = false;
}
struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
@@ -224,6 +225,37 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
return evsel;
}
+struct perf_evsel *perf_evsel__new_dummy(const char *name)
+{
+ struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
+
+ if (!evsel)
+ return NULL;
+
+ /*
+ * Don't need call perf_evsel__init() for dummy evsel.
+ * Keep it simple.
+ */
+ evsel->name = strdup(name);
+ if (!evsel->name)
+ goto out_free;
+
+ INIT_LIST_HEAD(&evsel->node);
+ INIT_LIST_HEAD(&evsel->config_terms);
+
+ evsel->cmdline_group_boundary = false;
+ /*
+ * Set dummy evsel as TRACEPOINT event so it can collect filter
+ * options.
+ */
+ evsel->attr.type = PERF_TYPE_TRACEPOINT;
+ evsel->is_dummy = true;
+ return evsel;
+out_free:
+ free(evsel);
+ return NULL;
+}
+
struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
{
struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 93ac6b1..443995b 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -114,6 +114,7 @@ struct perf_evsel {
struct perf_evsel *leader;
char *group_name;
bool cmdline_group_boundary;
+ bool is_dummy;
struct list_head config_terms;
};
@@ -149,6 +150,11 @@ int perf_evsel__object_config(size_t object_size,
void (*fini)(struct perf_evsel *evsel));
struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx);
+struct perf_evsel *perf_evsel__new_dummy(const char *name);
+static inline bool perf_evsel__is_dummy(struct perf_evsel *evsel)
+{
+ return evsel->is_dummy;
+}
static inline struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
{
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 14cd7e3..71d91fb 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1141,7 +1141,7 @@ int parse_events(struct perf_evlist *evlist, const char *str,
perf_pmu__parse_cleanup();
if (!ret) {
int entries = data.idx - evlist->nr_entries;
- struct perf_evsel *last;
+ struct perf_evsel *last = NULL;
if (!list_empty(&data.list)) {
last = list_entry(data.list.prev,
@@ -1149,8 +1149,25 @@ int parse_events(struct perf_evlist *evlist, const char *str,
last->cmdline_group_boundary = true;
}
- perf_evlist__splice_list_tail(evlist, &data.list, entries);
- evlist->nr_groups += data.nr_groups;
+ if (last && perf_evsel__is_dummy(last)) {
+ if (!list_is_singular(&data.list)) {
+ parse_events_evlist_error(&data, 0,
+ "Dummy evsel error: not on a singular list");
+ return -1;
+ }
+ /*
+ * We are introducing a dummy event. Don't touch
+ * anything, just link it.
+ *
+ * Don't use perf_evlist__splice_list_tail() since
+ * it alerts evlist->nr_entries, which affect header
+ * of resulting perf.data.
+ */
+ list_splice_tail(&data.list, &evlist->entries);
+ } else {
+ perf_evlist__splice_list_tail(evlist, &data.list, entries);
+ evlist->nr_groups += data.nr_groups;
+ }
return 0;
}
@@ -1256,7 +1273,7 @@ foreach_evsel_in_last_glob(struct perf_evlist *evlist,
struct perf_evsel *last = NULL;
int err;
- if (evlist->nr_entries > 0)
+ if (!list_empty(&evlist->entries))
last = perf_evlist__last(evlist);
do {
--
1.8.3.4
Before this patch, --filter options can't be applied to BPF object
'events'. For example, the following command:
# perf record -e cycles -e test_bpf.o --exclude-perf -a sleep 1
doesn't apply '--exclude-perf' to events in test_bpf.o. Instead, the
filter will be applied to 'cycles' event. This is caused by the delay
manner of adding real BPF events. Because all BPF probing points are
probed by one call, we can't add real events until all BPF objects
are collected. In previous patch (perf tools: Enable passing bpf object
file to --event), nothing is appended to evlist.
This patch fixes this by utilizing the dummy event linked during
parse_events(). Filter settings goes to dummy event, and be synced with
real events in add_bpf_event().
Signed-off-by: Wang Nan <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Brendan Gregg <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: David Ahern <[email protected]>
Cc: He Kuang <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Kaixu Xia <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
Cc: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-record.c | 6 ++++-
tools/perf/util/bpf-loader.c | 8 ++++++-
tools/perf/util/bpf-loader.h | 2 ++
tools/perf/util/evlist.c | 53 +++++++++++++++++++++++++++++++++++++++++---
4 files changed, 64 insertions(+), 5 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5051d3b..fd56a5b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1113,7 +1113,6 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
- perf_evlist__purge_dummy(rec->evlist);
if (!argc && target__none(&rec->opts.target))
usage_with_options(record_usage, record_options);
@@ -1178,6 +1177,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
pr_err("Failed to add events from BPF object(s)\n");
goto out_symbol_exit;
}
+ /*
+ * Until now let's purge dummy event. Filter options should
+ * have been attached to real events by perf_evlist__add_bpf().
+ */
+ perf_evlist__purge_dummy(rec->evlist);
symbol__init(NULL);
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 126aa71..c3bc0a8 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -293,6 +293,12 @@ int bpf__foreach_tev(bpf_prog_iter_callback_t func, void *arg)
int err;
bpf_object__for_each_safe(obj, tmp) {
+ const char *obj_name;
+
+ obj_name = bpf_object__get_name(obj);
+ if (!obj_name)
+ obj_name = "[unknown]";
+
bpf_object__for_each_program(prog, obj) {
struct probe_trace_event *tev;
struct perf_probe_event *pev;
@@ -316,7 +322,7 @@ int bpf__foreach_tev(bpf_prog_iter_callback_t func, void *arg)
return fd;
}
- err = func(tev, fd, arg);
+ err = func(tev, obj_name, fd, arg);
if (err) {
pr_debug("bpf: call back failed, stop iterate\n");
return err;
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 34656f8..323e664 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -6,6 +6,7 @@
#define __BPF_LOADER_H
#include <linux/compiler.h>
+#include <linux/perf_event.h>
#include <string.h>
#include "probe-event.h"
#include "debug.h"
@@ -13,6 +14,7 @@
#define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev,
+ const char *obj_name,
int fd, void *arg);
#ifdef HAVE_LIBBPF_SUPPORT
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 3bedf64..21a11c9 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -195,7 +195,45 @@ error:
return -ENOMEM;
}
-static int add_bpf_event(struct probe_trace_event *tev, int fd,
+static void
+sync_with_dummy(struct perf_evlist *evlist, const char *obj_name,
+ struct list_head *list)
+{
+ struct perf_evsel *dummy_evsel, *pos;
+ const char *filter;
+ bool found = false;
+ int err;
+
+ evlist__for_each(evlist, dummy_evsel) {
+ if (!perf_evsel__is_dummy(dummy_evsel))
+ continue;
+
+ if (strcmp(dummy_evsel->name, obj_name) == 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("Failed to find dummy event of '%s'\n",
+ obj_name);
+ return;
+ }
+
+ filter = dummy_evsel->filter;
+ if (!filter)
+ return;
+
+ list_for_each_entry(pos, list, node) {
+ err = perf_evsel__set_filter(pos, filter);
+ if (err)
+ pr_debug("Failed to set filter '%s' to evsel %s\n",
+ filter, pos->name);
+ }
+}
+
+static int add_bpf_event(struct probe_trace_event *tev,
+ const char *obj_name, int fd,
void *arg)
{
struct perf_evlist *evlist = arg;
@@ -203,8 +241,8 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd,
struct list_head list;
int err, idx, entries;
- pr_debug("add bpf event %s:%s and attach bpf program %d\n",
- tev->group, tev->event, fd);
+ pr_debug("add bpf event %s:%s and attach bpf program %d (from %s)\n",
+ tev->group, tev->event, fd, obj_name);
INIT_LIST_HEAD(&list);
idx = evlist->nr_entries;
@@ -226,6 +264,15 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd,
list_for_each_entry(pos, &list, node)
pos->bpf_fd = fd;
entries = idx - evlist->nr_entries;
+
+ sync_with_dummy(evlist, obj_name, &list);
+
+ /*
+ * Currectly we don't need to link those new events at the
+ * same place where dummy node reside because order of
+ * events in cmdline won't be used after
+ * 'perf_evlist__add_bpf'.
+ */
perf_evlist__splice_list_tail(evlist, &list, entries);
return 0;
}
--
1.8.3.4