2022-04-07 20:27:35

by Jiri Olsa

[permalink] [raw]
Subject: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

Adding test that reads all functions from ftrace available_filter_functions
file and attach them all through kprobe_multi API.

It checks that the attach and detach times is under 2 seconds
and printf stats info with -v option, like on my setup:

test_bench_attach: found 48712 functions
test_bench_attach: attached in 1.069s
test_bench_attach: detached in 0.373s

Signed-off-by: Jiri Olsa <[email protected]>
---
.../bpf/prog_tests/kprobe_multi_test.c | 141 ++++++++++++++++++
.../selftests/bpf/progs/kprobe_multi_empty.c | 12 ++
2 files changed, 153 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/kprobe_multi_empty.c

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index b9876b55fc0c..6798b54416de 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -2,6 +2,9 @@
#include <test_progs.h>
#include "kprobe_multi.skel.h"
#include "trace_helpers.h"
+#include "kprobe_multi_empty.skel.h"
+#include "bpf/libbpf_internal.h"
+#include "bpf/hashmap.h"

static void kprobe_multi_test_run(struct kprobe_multi *skel, bool test_return)
{
@@ -301,6 +304,142 @@ static void test_attach_api_fails(void)
kprobe_multi__destroy(skel);
}

+static inline __u64 get_time_ns(void)
+{
+ struct timespec t;
+
+ clock_gettime(CLOCK_MONOTONIC, &t);
+ return (__u64) t.tv_sec * 1000000000 + t.tv_nsec;
+}
+
+static size_t symbol_hash(const void *key, void *ctx __maybe_unused)
+{
+ return str_hash((const char *) key);
+}
+
+static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
+{
+ return strcmp((const char *) key1, (const char *) key2) == 0;
+}
+
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
+static int get_syms(char ***symsp, size_t *cntp)
+{
+ size_t cap = 0, cnt = 0, i;
+ char *name, **syms = NULL;
+ struct hashmap *map;
+ char buf[256];
+ FILE *f;
+ int err;
+
+ /*
+ * The available_filter_functions contains many duplicates,
+ * but other than that all symbols are usable in kprobe multi
+ * interface.
+ * Filtering out duplicates by using hashmap__add, which won't
+ * add existing entry.
+ */
+ f = fopen(DEBUGFS "available_filter_functions", "r");
+ if (!f)
+ return -EINVAL;
+
+ map = hashmap__new(symbol_hash, symbol_equal, NULL);
+ err = libbpf_get_error(map);
+ if (err)
+ goto error;
+
+ while (fgets(buf, sizeof(buf), f)) {
+ /* skip modules */
+ if (strchr(buf, '['))
+ continue;
+ if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
+ continue;
+ err = hashmap__add(map, name, NULL);
+ if (err) {
+ free(name);
+ if (err == -EEXIST)
+ continue;
+ goto error;
+ }
+ err = libbpf_ensure_mem((void **) &syms, &cap,
+ sizeof(*syms), cnt + 1);
+ if (err) {
+ free(name);
+ goto error;
+ }
+ syms[cnt] = name;
+ cnt++;
+ }
+
+ *symsp = syms;
+ *cntp = cnt;
+
+error:
+ fclose(f);
+ hashmap__free(map);
+ if (err) {
+ for (i = 0; i < cnt; i++)
+ free(syms[cnt]);
+ free(syms);
+ }
+ return err;
+}
+
+static void test_bench_attach(void)
+{
+ double attach_delta_ns, detach_delta_ns;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi_empty *skel = NULL;
+ long attach_start_ns, attach_end_ns;
+ long detach_start_ns, detach_end_ns;
+ struct bpf_link *link = NULL;
+ char **syms = NULL;
+ size_t cnt, i;
+
+ if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms"))
+ return;
+
+ skel = kprobe_multi_empty__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+
+ attach_start_ns = get_time_ns();
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_empty,
+ NULL, &opts);
+ attach_end_ns = get_time_ns();
+
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ detach_start_ns = get_time_ns();
+ bpf_link__destroy(link);
+ detach_end_ns = get_time_ns();
+
+ attach_delta_ns = (attach_end_ns - attach_start_ns) / 1000000000.0;
+ detach_delta_ns = (detach_end_ns - detach_start_ns) / 1000000000.0;
+
+ fprintf(stderr, "%s: found %lu functions\n", __func__, cnt);
+ fprintf(stderr, "%s: attached in %7.3lfs\n", __func__, attach_delta_ns);
+ fprintf(stderr, "%s: detached in %7.3lfs\n", __func__, detach_delta_ns);
+
+ if (attach_delta_ns > 2.0)
+ PRINT_FAIL("attach time above 2 seconds\n");
+ if (detach_delta_ns > 2.0)
+ PRINT_FAIL("detach time above 2 seconds\n");
+
+cleanup:
+ kprobe_multi_empty__destroy(skel);
+ if (syms) {
+ for (i = 0; i < cnt; i++)
+ free(syms[i]);
+ free(syms);
+ }
+}
+
void test_kprobe_multi_test(void)
{
if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
@@ -320,4 +459,6 @@ void test_kprobe_multi_test(void)
test_attach_api_syms();
if (test__start_subtest("attach_api_fails"))
test_attach_api_fails();
+ if (test__start_subtest("bench_attach"))
+ test_bench_attach();
}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
new file mode 100644
index 000000000000..be9e3d891d46
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe.multi/*")
+int test_kprobe_empty(struct pt_regs *ctx)
+{
+ return 0;
+}
--
2.35.1


2022-04-12 09:36:28

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Thu, Apr 7, 2022 at 5:53 AM Jiri Olsa <[email protected]> wrote:
>
> Adding test that reads all functions from ftrace available_filter_functions
> file and attach them all through kprobe_multi API.
>
> It checks that the attach and detach times is under 2 seconds
> and printf stats info with -v option, like on my setup:
>
> test_bench_attach: found 48712 functions
> test_bench_attach: attached in 1.069s
> test_bench_attach: detached in 0.373s
>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> .../bpf/prog_tests/kprobe_multi_test.c | 141 ++++++++++++++++++
> .../selftests/bpf/progs/kprobe_multi_empty.c | 12 ++
> 2 files changed, 153 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> index b9876b55fc0c..6798b54416de 100644
> --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> @@ -2,6 +2,9 @@
> #include <test_progs.h>
> #include "kprobe_multi.skel.h"
> #include "trace_helpers.h"
> +#include "kprobe_multi_empty.skel.h"
> +#include "bpf/libbpf_internal.h"
> +#include "bpf/hashmap.h"
>
> static void kprobe_multi_test_run(struct kprobe_multi *skel, bool test_return)
> {
> @@ -301,6 +304,142 @@ static void test_attach_api_fails(void)
> kprobe_multi__destroy(skel);
> }
>
> +static inline __u64 get_time_ns(void)
> +{
> + struct timespec t;
> +
> + clock_gettime(CLOCK_MONOTONIC, &t);
> + return (__u64) t.tv_sec * 1000000000 + t.tv_nsec;
> +}
> +
> +static size_t symbol_hash(const void *key, void *ctx __maybe_unused)
> +{
> + return str_hash((const char *) key);
> +}
> +
> +static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
> +{
> + return strcmp((const char *) key1, (const char *) key2) == 0;
> +}
> +
> +#define DEBUGFS "/sys/kernel/debug/tracing/"
> +
> +static int get_syms(char ***symsp, size_t *cntp)
> +{
> + size_t cap = 0, cnt = 0, i;
> + char *name, **syms = NULL;
> + struct hashmap *map;
> + char buf[256];
> + FILE *f;
> + int err;
> +
> + /*
> + * The available_filter_functions contains many duplicates,
> + * but other than that all symbols are usable in kprobe multi
> + * interface.
> + * Filtering out duplicates by using hashmap__add, which won't
> + * add existing entry.
> + */
> + f = fopen(DEBUGFS "available_filter_functions", "r");

I'm really curious how did you manage to attach to everything in
available_filter_functions because when I'm trying to do that I fail.
available_filter_functions has a bunch of functions that should not be
attachable (e.g., notrace functions). Look just at __bpf_tramp_exit:

void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);

So first, curious what I am doing wrong or rather why it succeeds in
your case ;)

But second, just wanted to plea to "fix" available_filter_functions to
not list stuff that should not be attachable. Can you please take a
look and checks what's going on there and why do we have notrace
functions (and what else should *NOT* be there)?


> + if (!f)
> + return -EINVAL;
> +
> + map = hashmap__new(symbol_hash, symbol_equal, NULL);
> + err = libbpf_get_error(map);
> + if (err)
> + goto error;
> +

[...]

> +
> + attach_delta_ns = (attach_end_ns - attach_start_ns) / 1000000000.0;
> + detach_delta_ns = (detach_end_ns - detach_start_ns) / 1000000000.0;
> +
> + fprintf(stderr, "%s: found %lu functions\n", __func__, cnt);
> + fprintf(stderr, "%s: attached in %7.3lfs\n", __func__, attach_delta_ns);
> + fprintf(stderr, "%s: detached in %7.3lfs\n", __func__, detach_delta_ns);
> +
> + if (attach_delta_ns > 2.0)
> + PRINT_FAIL("attach time above 2 seconds\n");
> + if (detach_delta_ns > 2.0)
> + PRINT_FAIL("detach time above 2 seconds\n");

see my reply on the cover letter, any such "2 second" assumption are
guaranteed to bite us. We've dealt with a lot of timing issues due to
CI being slower and more unpredictable in terms of performance, I'd
like to avoid dealing with one more case like that.


> +
> +cleanup:
> + kprobe_multi_empty__destroy(skel);
> + if (syms) {
> + for (i = 0; i < cnt; i++)
> + free(syms[i]);
> + free(syms);
> + }
> +}
> +
> void test_kprobe_multi_test(void)
> {
> if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
> @@ -320,4 +459,6 @@ void test_kprobe_multi_test(void)
> test_attach_api_syms();
> if (test__start_subtest("attach_api_fails"))
> test_attach_api_fails();
> + if (test__start_subtest("bench_attach"))
> + test_bench_attach();
> }
> diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
> new file mode 100644
> index 000000000000..be9e3d891d46
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
> @@ -0,0 +1,12 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +SEC("kprobe.multi/*")
> +int test_kprobe_empty(struct pt_regs *ctx)
> +{
> + return 0;
> +}
> --
> 2.35.1
>

2022-04-12 20:27:48

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Mon, 11 Apr 2022 15:15:40 -0700
Andrii Nakryiko <[email protected]> wrote:

> > +#define DEBUGFS "/sys/kernel/debug/tracing/"
> > +
> > +static int get_syms(char ***symsp, size_t *cntp)
> > +{
> > + size_t cap = 0, cnt = 0, i;
> > + char *name, **syms = NULL;
> > + struct hashmap *map;
> > + char buf[256];
> > + FILE *f;
> > + int err;
> > +
> > + /*
> > + * The available_filter_functions contains many duplicates,
> > + * but other than that all symbols are usable in kprobe multi
> > + * interface.
> > + * Filtering out duplicates by using hashmap__add, which won't
> > + * add existing entry.
> > + */
> > + f = fopen(DEBUGFS "available_filter_functions", "r");
>
> I'm really curious how did you manage to attach to everything in
> available_filter_functions because when I'm trying to do that I fail.
> available_filter_functions has a bunch of functions that should not be
> attachable (e.g., notrace functions). Look just at __bpf_tramp_exit:
>
> void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);

Hmm, this sounds like a bug in ftrace side. IIUC, the
"available_filter_functions" only shows the functions which is NOT
instrumented by mcount, we should not see any notrace functions on it.

Technically, this is done by __no_instrument_function__ attribute.

#if defined(CC_USING_HOTPATCH)
#define notrace __attribute__((hotpatch(0, 0)))
#elif defined(CC_USING_PATCHABLE_FUNCTION_ENTRY)
#define notrace __attribute__((patchable_function_entry(0, 0)))
#else
#define notrace __attribute__((__no_instrument_function__))
#endif

>
> So first, curious what I am doing wrong or rather why it succeeds in
> your case ;)
>
> But second, just wanted to plea to "fix" available_filter_functions to
> not list stuff that should not be attachable. Can you please take a
> look and checks what's going on there and why do we have notrace
> functions (and what else should *NOT* be there)?

Can you share how did you reproduce the issue? I'll check it.

Thank you,


--
Masami Hiramatsu <[email protected]>

2022-04-12 23:38:06

by Jiri Olsa

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Mon, Apr 11, 2022 at 03:15:40PM -0700, Andrii Nakryiko wrote:

SNIP

> > +static int get_syms(char ***symsp, size_t *cntp)
> > +{
> > + size_t cap = 0, cnt = 0, i;
> > + char *name, **syms = NULL;
> > + struct hashmap *map;
> > + char buf[256];
> > + FILE *f;
> > + int err;
> > +
> > + /*
> > + * The available_filter_functions contains many duplicates,
> > + * but other than that all symbols are usable in kprobe multi
> > + * interface.
> > + * Filtering out duplicates by using hashmap__add, which won't
> > + * add existing entry.
> > + */
> > + f = fopen(DEBUGFS "available_filter_functions", "r");
>
> I'm really curious how did you manage to attach to everything in
> available_filter_functions because when I'm trying to do that I fail.

the new code makes the differece ;-) so the main problem I could not
use available_filter_functions functions before were cases like:

# cat available_filter_functions | grep sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall
sys_ni_syscall

which when you try to resolve you'll find just one address:

# cat /proc/kallsyms | egrep 'T sys_ni_syscall'
ffffffff81170020 T sys_ni_syscall

this is caused by entries like:
__SYSCALL(156, sys_ni_syscall)

when generating syscalls for given arch

this is handled by the new code by removing duplicates when
reading available_filter_functions



another case is the other way round, like with:

# cat /proc/kallsyms | grep 't t_next'
ffffffff8125c3f0 t t_next
ffffffff8126a320 t t_next
ffffffff81275de0 t t_next
ffffffff8127efd0 t t_next
ffffffff814d6660 t t_next

that has just one 'ftrace-able' instance:

# cat available_filter_functions | grep '^t_next$'
t_next

and this is handled by calling ftrace_location on address when
resolving symbols, to ensure each reasolved symbol lives in ftrace

> available_filter_functions has a bunch of functions that should not be
> attachable (e.g., notrace functions). Look just at __bpf_tramp_exit:
>
> void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
>
> So first, curious what I am doing wrong or rather why it succeeds in
> your case ;)
>
> But second, just wanted to plea to "fix" available_filter_functions to
> not list stuff that should not be attachable. Can you please take a
> look and checks what's going on there and why do we have notrace
> functions (and what else should *NOT* be there)?

yes, seems like a bug ;-) it's in available_filter_functions
but it does not have 'call __fentry__' at the entry..

I was going to check on that, because you brought that up before,
but did not get to it yet

>
>
> > + if (!f)
> > + return -EINVAL;
> > +
> > + map = hashmap__new(symbol_hash, symbol_equal, NULL);
> > + err = libbpf_get_error(map);
> > + if (err)
> > + goto error;
> > +
>
> [...]
>
> > +
> > + attach_delta_ns = (attach_end_ns - attach_start_ns) / 1000000000.0;
> > + detach_delta_ns = (detach_end_ns - detach_start_ns) / 1000000000.0;
> > +
> > + fprintf(stderr, "%s: found %lu functions\n", __func__, cnt);
> > + fprintf(stderr, "%s: attached in %7.3lfs\n", __func__, attach_delta_ns);
> > + fprintf(stderr, "%s: detached in %7.3lfs\n", __func__, detach_delta_ns);
> > +
> > + if (attach_delta_ns > 2.0)
> > + PRINT_FAIL("attach time above 2 seconds\n");
> > + if (detach_delta_ns > 2.0)
> > + PRINT_FAIL("detach time above 2 seconds\n");
>
> see my reply on the cover letter, any such "2 second" assumption are
> guaranteed to bite us. We've dealt with a lot of timing issues due to
> CI being slower and more unpredictable in terms of performance, I'd
> like to avoid dealing with one more case like that.

right, I'll remove the check

thanks,
jirka

2022-04-13 01:42:38

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Tue, Apr 12, 2022 at 8:44 AM Jiri Olsa <[email protected]> wrote:
>
> On Mon, Apr 11, 2022 at 03:15:40PM -0700, Andrii Nakryiko wrote:
>
> SNIP
>
> > > +static int get_syms(char ***symsp, size_t *cntp)
> > > +{
> > > + size_t cap = 0, cnt = 0, i;
> > > + char *name, **syms = NULL;
> > > + struct hashmap *map;
> > > + char buf[256];
> > > + FILE *f;
> > > + int err;
> > > +
> > > + /*
> > > + * The available_filter_functions contains many duplicates,
> > > + * but other than that all symbols are usable in kprobe multi
> > > + * interface.
> > > + * Filtering out duplicates by using hashmap__add, which won't
> > > + * add existing entry.
> > > + */
> > > + f = fopen(DEBUGFS "available_filter_functions", "r");
> >
> > I'm really curious how did you manage to attach to everything in
> > available_filter_functions because when I'm trying to do that I fail.
>
> the new code makes the differece ;-) so the main problem I could not
> use available_filter_functions functions before were cases like:
>
> # cat available_filter_functions | grep sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
> sys_ni_syscall
>
> which when you try to resolve you'll find just one address:
>
> # cat /proc/kallsyms | egrep 'T sys_ni_syscall'
> ffffffff81170020 T sys_ni_syscall
>
> this is caused by entries like:
> __SYSCALL(156, sys_ni_syscall)
>
> when generating syscalls for given arch
>
> this is handled by the new code by removing duplicates when
> reading available_filter_functions
>
>
>
> another case is the other way round, like with:
>
> # cat /proc/kallsyms | grep 't t_next'
> ffffffff8125c3f0 t t_next
> ffffffff8126a320 t t_next
> ffffffff81275de0 t t_next
> ffffffff8127efd0 t t_next
> ffffffff814d6660 t t_next
>
> that has just one 'ftrace-able' instance:
>
> # cat available_filter_functions | grep '^t_next$'
> t_next
>
> and this is handled by calling ftrace_location on address when
> resolving symbols, to ensure each reasolved symbol lives in ftrace
>
> > available_filter_functions has a bunch of functions that should not be
> > attachable (e.g., notrace functions). Look just at __bpf_tramp_exit:
> >
> > void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
> >
> > So first, curious what I am doing wrong or rather why it succeeds in
> > your case ;)
> >
> > But second, just wanted to plea to "fix" available_filter_functions to
> > not list stuff that should not be attachable. Can you please take a
> > look and checks what's going on there and why do we have notrace
> > functions (and what else should *NOT* be there)?
>
> yes, seems like a bug ;-) it's in available_filter_functions
> but it does not have 'call __fentry__' at the entry..
>
> I was going to check on that, because you brought that up before,
> but did not get to it yet

yeah, see also my reply to Masami. __bpf_tramp_exit and
__bpf_tramp_enter are two specific examples. Both are marked notrace,
but one is in available_filter_functions and another is not. Neither
should be attachable, but doing this local change you can see that one
of them (__bpf_tramp_exit) is:

$ git diff
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index b9876b55fc0c..77cff034d427 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -165,8 +165,8 @@ static void test_attach_api_pattern(void)
{
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);

- test_attach_api("bpf_fentry_test*", &opts);
- test_attach_api("bpf_fentry_test?", NULL);
+ test_attach_api("__bpf_tramp_enter", &opts);
+ test_attach_api("__bpf_tramp_exit", NULL);
}


$ sudo ./test_progs -t kprobe_multi/attach_api_pattern -v
bpf_testmod.ko is already unloaded.
Loading bpf_testmod.ko...
Successfully loaded bpf_testmod.ko.
test_kprobe_multi_test:PASS:load_kallsyms 0 nsec
test_attach_api:PASS:fentry_raw_skel_load 0 nsec
libbpf: prog 'test_kprobe': failed to attach: Invalid argument
test_attach_api:FAIL:bpf_program__attach_kprobe_multi_opts unexpected error: -22
test_attach_api:PASS:fentry_raw_skel_load 0 nsec
test_attach_api:PASS:bpf_program__attach_kprobe_multi_opts 0 nsec

Quite weird.



>
> >
> >
> > > + if (!f)
> > > + return -EINVAL;
> > > +
> > > + map = hashmap__new(symbol_hash, symbol_equal, NULL);
> > > + err = libbpf_get_error(map);
> > > + if (err)
> > > + goto error;
> > > +
> >
> > [...]
> >
> > > +
> > > + attach_delta_ns = (attach_end_ns - attach_start_ns) / 1000000000.0;
> > > + detach_delta_ns = (detach_end_ns - detach_start_ns) / 1000000000.0;
> > > +
> > > + fprintf(stderr, "%s: found %lu functions\n", __func__, cnt);
> > > + fprintf(stderr, "%s: attached in %7.3lfs\n", __func__, attach_delta_ns);
> > > + fprintf(stderr, "%s: detached in %7.3lfs\n", __func__, detach_delta_ns);
> > > +
> > > + if (attach_delta_ns > 2.0)
> > > + PRINT_FAIL("attach time above 2 seconds\n");
> > > + if (detach_delta_ns > 2.0)
> > > + PRINT_FAIL("detach time above 2 seconds\n");
> >
> > see my reply on the cover letter, any such "2 second" assumption are
> > guaranteed to bite us. We've dealt with a lot of timing issues due to
> > CI being slower and more unpredictable in terms of performance, I'd
> > like to avoid dealing with one more case like that.
>
> right, I'll remove the check
>
> thanks,
> jirka

2022-04-13 01:52:58

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Mon, Apr 11, 2022 at 5:49 PM Masami Hiramatsu <[email protected]> wrote:
>
> On Mon, 11 Apr 2022 15:15:40 -0700
> Andrii Nakryiko <[email protected]> wrote:
>
> > > +#define DEBUGFS "/sys/kernel/debug/tracing/"
> > > +
> > > +static int get_syms(char ***symsp, size_t *cntp)
> > > +{
> > > + size_t cap = 0, cnt = 0, i;
> > > + char *name, **syms = NULL;
> > > + struct hashmap *map;
> > > + char buf[256];
> > > + FILE *f;
> > > + int err;
> > > +
> > > + /*
> > > + * The available_filter_functions contains many duplicates,
> > > + * but other than that all symbols are usable in kprobe multi
> > > + * interface.
> > > + * Filtering out duplicates by using hashmap__add, which won't
> > > + * add existing entry.
> > > + */
> > > + f = fopen(DEBUGFS "available_filter_functions", "r");
> >
> > I'm really curious how did you manage to attach to everything in
> > available_filter_functions because when I'm trying to do that I fail.
> > available_filter_functions has a bunch of functions that should not be
> > attachable (e.g., notrace functions). Look just at __bpf_tramp_exit:
> >
> > void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
>
> Hmm, this sounds like a bug in ftrace side. IIUC, the
> "available_filter_functions" only shows the functions which is NOT
> instrumented by mcount, we should not see any notrace functions on it.
>
> Technically, this is done by __no_instrument_function__ attribute.
>
> #if defined(CC_USING_HOTPATCH)
> #define notrace __attribute__((hotpatch(0, 0)))
> #elif defined(CC_USING_PATCHABLE_FUNCTION_ENTRY)
> #define notrace __attribute__((patchable_function_entry(0, 0)))
> #else
> #define notrace __attribute__((__no_instrument_function__))
> #endif
>
> >
> > So first, curious what I am doing wrong or rather why it succeeds in
> > your case ;)
> >
> > But second, just wanted to plea to "fix" available_filter_functions to
> > not list stuff that should not be attachable. Can you please take a
> > look and checks what's going on there and why do we have notrace
> > functions (and what else should *NOT* be there)?
>
> Can you share how did you reproduce the issue? I'll check it.
>

$ sudo cat /sys/kernel/debug/tracing/available_filter_functions | grep
__bpf_tramp
__bpf_tramp_image_release
__bpf_tramp_image_put_rcu_tasks
__bpf_tramp_image_put_rcu
__bpf_tramp_image_put_deferred
__bpf_tramp_exit


__bpf_tramp_exit is notrace function, so shouldn't be here. Notice
that __bpf_tramp_enter (which is also notrace) are not in
available_filter_functions.

So it's quite bizarre and inconsistent.

> Thank you,
>
>
> --
> Masami Hiramatsu <[email protected]>

2022-04-14 11:30:49

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Wed, Apr 13, 2022 at 9:59 AM Steven Rostedt <[email protected]> wrote:
>
> On Wed, 13 Apr 2022 09:45:52 -0700
> Andrii Nakryiko <[email protected]> wrote:
>
> > > Did you only use the "notrace" on the prototype? I see the semicolon at
> > > the end of your comment. It only affects the actual function itself,
> > > not the prototype.
> >
> > notrace is both on declaration and on definition, see kernel/bpf/trampoline.c:
>
> OK. Note, it only needs to be on the function, the prototype doesn't do
> anything. But that shouldn't be the issue.
>
> >
> > void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
> > {
> > percpu_ref_put(&tr->pcref);
> > }
> >
>
> What compiler are you using? as this seems to be a compiler bug.
> Because it's not ftrace that picks what functions to trace, but the
> compiler itself.

I build my local kernel with

$ gcc --version
gcc (GCC) 11.1.1 20210623 (Red Hat 11.1.1-6)


But we have the same issue in our production kernels which are most
probably built with some other version of GCC, but I don't know which
one.


>
> -- Steve
>
>

2022-04-14 14:34:27

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Wed, Apr 13, 2022 at 9:44 AM Steven Rostedt <[email protected]> wrote:
>
> On Tue, 12 Apr 2022 09:49:23 +0900
> Masami Hiramatsu <[email protected]> wrote:
>
> > > I'm really curious how did you manage to attach to everything in
> > > available_filter_functions because when I'm trying to do that I fail.
> > > available_filter_functions has a bunch of functions that should not be
> > > attachable (e.g., notrace functions). Look just at __bpf_tramp_exit:
> > >
> > > void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
>
> Did you only use the "notrace" on the prototype? I see the semicolon at
> the end of your comment. It only affects the actual function itself,
> not the prototype.

notrace is both on declaration and on definition, see kernel/bpf/trampoline.c:

void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
{
percpu_ref_put(&tr->pcref);
}


>
> -- Steve

2022-04-14 14:34:27

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Tue, 12 Apr 2022 09:49:23 +0900
Masami Hiramatsu <[email protected]> wrote:

> > I'm really curious how did you manage to attach to everything in
> > available_filter_functions because when I'm trying to do that I fail.
> > available_filter_functions has a bunch of functions that should not be
> > attachable (e.g., notrace functions). Look just at __bpf_tramp_exit:
> >
> > void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);

Did you only use the "notrace" on the prototype? I see the semicolon at
the end of your comment. It only affects the actual function itself,
not the prototype.

-- Steve

2022-04-14 15:48:26

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Wed, 13 Apr 2022 09:45:52 -0700
Andrii Nakryiko <[email protected]> wrote:

> > Did you only use the "notrace" on the prototype? I see the semicolon at
> > the end of your comment. It only affects the actual function itself,
> > not the prototype.
>
> notrace is both on declaration and on definition, see kernel/bpf/trampoline.c:

OK. Note, it only needs to be on the function, the prototype doesn't do
anything. But that shouldn't be the issue.

>
> void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
> {
> percpu_ref_put(&tr->pcref);
> }
>

What compiler are you using? as this seems to be a compiler bug.
Because it's not ftrace that picks what functions to trace, but the
compiler itself.

-- Steve


2022-04-17 00:13:05

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

Hi,

On Tue, 12 Apr 2022 15:51:43 -0700
Andrii Nakryiko <[email protected]> wrote:

> On Mon, Apr 11, 2022 at 5:49 PM Masami Hiramatsu <[email protected]> wrote:
> >
> > On Mon, 11 Apr 2022 15:15:40 -0700
> > Andrii Nakryiko <[email protected]> wrote:
> >
> > > > +#define DEBUGFS "/sys/kernel/debug/tracing/"
> > > > +
> > > > +static int get_syms(char ***symsp, size_t *cntp)
> > > > +{
> > > > + size_t cap = 0, cnt = 0, i;
> > > > + char *name, **syms = NULL;
> > > > + struct hashmap *map;
> > > > + char buf[256];
> > > > + FILE *f;
> > > > + int err;
> > > > +
> > > > + /*
> > > > + * The available_filter_functions contains many duplicates,
> > > > + * but other than that all symbols are usable in kprobe multi
> > > > + * interface.
> > > > + * Filtering out duplicates by using hashmap__add, which won't
> > > > + * add existing entry.
> > > > + */
> > > > + f = fopen(DEBUGFS "available_filter_functions", "r");
> > >
> > > I'm really curious how did you manage to attach to everything in
> > > available_filter_functions because when I'm trying to do that I fail.
> > > available_filter_functions has a bunch of functions that should not be
> > > attachable (e.g., notrace functions). Look just at __bpf_tramp_exit:
> > >
> > > void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
> >
> > Hmm, this sounds like a bug in ftrace side. IIUC, the
> > "available_filter_functions" only shows the functions which is NOT
> > instrumented by mcount, we should not see any notrace functions on it.
> >
> > Technically, this is done by __no_instrument_function__ attribute.
> >
> > #if defined(CC_USING_HOTPATCH)
> > #define notrace __attribute__((hotpatch(0, 0)))
> > #elif defined(CC_USING_PATCHABLE_FUNCTION_ENTRY)
> > #define notrace __attribute__((patchable_function_entry(0, 0)))
> > #else
> > #define notrace __attribute__((__no_instrument_function__))
> > #endif
> >
> > >
> > > So first, curious what I am doing wrong or rather why it succeeds in
> > > your case ;)
> > >
> > > But second, just wanted to plea to "fix" available_filter_functions to
> > > not list stuff that should not be attachable. Can you please take a
> > > look and checks what's going on there and why do we have notrace
> > > functions (and what else should *NOT* be there)?
> >
> > Can you share how did you reproduce the issue? I'll check it.
> >
>
> $ sudo cat /sys/kernel/debug/tracing/available_filter_functions | grep
> __bpf_tramp
> __bpf_tramp_image_release
> __bpf_tramp_image_put_rcu_tasks
> __bpf_tramp_image_put_rcu
> __bpf_tramp_image_put_deferred
> __bpf_tramp_exit
>
>
> __bpf_tramp_exit is notrace function, so shouldn't be here. Notice
> that __bpf_tramp_enter (which is also notrace) are not in
> available_filter_functions.

OK, I also confirmed that __bpf_tramp_exit is listed. (others seems no notrace)

/sys/kernel/tracing # cat available_filter_functions | grep __bpf_tramp
__bpf_tramp_image_release
__bpf_tramp_image_put_rcu
__bpf_tramp_image_put_rcu_tasks
__bpf_tramp_image_put_deferred
__bpf_tramp_exit

My gcc is older one.
gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~20.04.1)

But it seems that __bpf_tramp_exit() doesn't call __fentry__. (I objdump'ed)

ffffffff81208270 <__bpf_tramp_exit>:
ffffffff81208270: 55 push %rbp
ffffffff81208271: 48 89 e5 mov %rsp,%rbp
ffffffff81208274: 53 push %rbx
ffffffff81208275: 48 89 fb mov %rdi,%rbx
ffffffff81208278: e8 83 70 ef ff callq ffffffff810ff300 <__rcu_read_lock>
ffffffff8120827d: 31 d2 xor %edx,%edx


>
> So it's quite bizarre and inconsistent.

Indeed. I guess there is a bug in scripts/recordmcount.pl.

Thank you,

>
> > Thank you,
> >
> >
> > --
> > Masami Hiramatsu <[email protected]>


--
Masami Hiramatsu <[email protected]>

2022-04-19 00:14:11

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Sat, 16 Apr 2022 23:21:03 +0900
Masami Hiramatsu <[email protected]> wrote:

> But it seems that __bpf_tramp_exit() doesn't call __fentry__. (I objdump'ed)
>
> ffffffff81208270 <__bpf_tramp_exit>:
> ffffffff81208270: 55 push %rbp
> ffffffff81208271: 48 89 e5 mov %rsp,%rbp
> ffffffff81208274: 53 push %rbx
> ffffffff81208275: 48 89 fb mov %rdi,%rbx
> ffffffff81208278: e8 83 70 ef ff callq ffffffff810ff300 <__rcu_read_lock>
> ffffffff8120827d: 31 d2 xor %edx,%edx
>
>
> >
> > So it's quite bizarre and inconsistent.
>
> Indeed. I guess there is a bug in scripts/recordmcount.pl.

Actually, x86 doesn't use that script. It either uses the C version, or
with latest gcc, it is created by the compiler itself.

I'll look deeper into it.

-- Steve

2022-04-29 09:17:40

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Thu, 28 Apr 2022 16:32:20 -0700
Andrii Nakryiko <[email protected]> wrote:
> >
> > The job of recordmcount is to create a section of all the locations that
> > call fentry. That is EXACTLY what it did. No bug there! It did its job.
>
> But that __fentry__ call is not part of __bpf_tramp_exit, actually.
> Whether to call it a bug or limitation is secondary. It marks
> __bpf_tramp_exit as attachable through kprobe/ftrace while it really
> isn't.

I'm confused by what you mean by "marks __bpf_tramp_exit as attachable"?
What does? Where does it get that information? Does it read
available_filter_functions?

recordmcount isn't responsible for any of that, you are thinking of
kallsyms. Specifically *printf("%ps"). Because that's where the name comes
from. Anytime you print an address with "%ps" on a weak function that has
been overridden, it will give you the symbol before it. I guess you can
call it a bug in the "%ps" logic.


>
> Below you are saying there is only user confusion. It's not just
> confusion. You'll get an error when you try to attach to
> __bpf_tramp_exit because __bpf_tramp_exit doesn't really have
> __fentry__ preamble and thus the kernel itself will reject it as a
> target. So when you build a generic tracing tool that fetches all the
> attachable kprobes, filters out all the blacklisted ones, you still
> end up with kprobe targets that are not attachable. It's definitely
> more than an inconvenience which I experienced first hand.
>
> Can recordmcount or whoever does this be taught to use proper FUNC
> symbol size to figure out boundaries of the function?

kallsyms needs to do it. All recordmcount does is to give the locations of
the calls to fentry. It only gives addresses and does not give any symbol
information. Stop blaming recordcmcount!

>
> $ readelf -s ~/linux-build/default/vmlinux | rg __bpf_tramp_exit
> 129408: ffffffff811b2ba0 63 FUNC GLOBAL DEFAULT 1 __bpf_tramp_exit
>
> So only the first 63 bytes of instruction after __bpf_tramp_exit
> should be taken into account. Everything else doesn't belong to
> __bpf_tramp_exit. So even though objdump pretends that call __fentry__
> is part of __bpf_tramp_exit, it's not.
>
> ffffffff811b2ba0 <__bpf_tramp_exit>:
> ffffffff811b2ba0: 53 push %rbx
> ffffffff811b2ba1: 48 89 fb mov %rdi,%rbx
> ffffffff811b2ba4: e8 97 d2 f2 ff call
> ffffffff810dfe40 <__rcu_read_lock>
> ffffffff811b2ba9: 48 8b 83 e0 00 00 00 mov 0xe0(%rbx),%rax
> ffffffff811b2bb0: a8 03 test $0x3,%al
> ffffffff811b2bb2: 75 0a jne
> ffffffff811b2bbe <__bpf_tramp_exit+0x1e>
> ffffffff811b2bb4: 65 48 ff 08 decq %gs:(%rax)
> ffffffff811b2bb8: 5b pop %rbx
> ffffffff811b2bb9: e9 d2 0e f3 ff jmp
> ffffffff810e3a90 <__rcu_read_unlock>
> ffffffff811b2bbe: 48 8b 83 e8 00 00 00 mov 0xe8(%rbx),%rax
> ffffffff811b2bc5: f0 48 83 28 01 lock subq $0x1,(%rax)
> ffffffff811b2bca: 75 ec jne
> ffffffff811b2bb8 <__bpf_tramp_exit+0x18>
> ffffffff811b2bcc: 48 8b 83 e8 00 00 00 mov 0xe8(%rbx),%rax
> ffffffff811b2bd3: 48 8d bb e0 00 00 00 lea 0xe0(%rbx),%rdi
> ffffffff811b2bda: ff 50 08 call *0x8(%rax)
> ffffffff811b2bdd: eb d9 jmp
> ffffffff811b2bb8 <__bpf_tramp_exit+0x18>
> ffffffff811b2bdf: 90 nop
>
> ^^^ ffffffff811b2ba0 + 63 = ffffffff811b2bdf -- this is the end of
> __bpf_tramp_exit
>
> ffffffff811b2be0: e8 3b 9c e9 ff call
> ffffffff8104c820 <__fentry__>
> ffffffff811b2be5: b8 f4 fd ff ff mov $0xfffffdf4,%eax
> ffffffff811b2bea: c3 ret
> ffffffff811b2beb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
>
>


> > One solution is to simply get the end of the function that is provided by
> > kallsyms to make sure the fentry call location is inside the function, and
> > if it is not, then not show that function in available_filter_functions but
> > instead show something like "** unnamed function **" or whatever.

Do the above. The names in available_filter_functions are derived from
kallsyms. There's a way to also ask kallsyms to give you the end pointer
of the function address. The only thing that avaliable_filter_functions
does is to print the location found by recordmcount with a "%ps".

If you don't want it to show up in available_filter_functions, then you
need to open code the %ps onto kallsyms lookup and then compare the
function with the end (if it is found). Or fix %ps.

-- Steve

2022-04-29 09:41:38

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Thu, 28 Apr 2022 09:58:03 -0400
Steven Rostedt <[email protected]> wrote:

> I added the addresses it was mapping and found this:
>
> ffffffffa828f680 T __bpf_tramp_exit

cut and pasted the kallsyms not the avaliable filter functions, which had
this:

__bpf_tramp_exit (ffffffffa828f780)

-- Steve

2022-04-29 12:19:12

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Thu, 28 Apr 2022 11:59:55 -0700
Alexei Starovoitov <[email protected]> wrote:

> > The weak function gets a call to ftrace, but it still gets compiled into
> > vmlinux but its symbol is dropped due to it being overridden. Thus, the
> > mcount_loc finds this call to fentry, and maps it to the symbol that is
> > before it, which just happened to be __bpf_tramp_exit.
>
> Ouch. That _is_ a bug in recordmocount.

Exactly HOW is it a bug in recordmcount?

The job of recordmcount is to create a section of all the locations that
call fentry. That is EXACTLY what it did. No bug there! It did its job.

In fact, recordmcount probably didn't even get called. If you see this on
x86 with gcc version greater than 8 (which I do), recordmcount is not even
used. gcc creates this section internally instead.

>
> > I made that weak function "notrace" and the __bpf_tramp_exit disappeared
> > from the available_filter_functions list.
>
> That's a hack. We cannot rely on such hacks for all weak functions.

Then don't do anything. The only thing this bug causes is perhaps some
confusion, because functions before weak functions that are overridden will
be listed incorrectly in the available_filter_functions file. And that's
because of the way it is created with respect to kallsyms.

If you enable __bpf_tramp_exit, it will not do anything to that function.
What it will do is enable the location inside of the weak function that no
longer has its symbol shown.

One solution is to simply get the end of the function that is provided by
kallsyms to make sure the fentry call location is inside the function, and
if it is not, then not show that function in available_filter_functions but
instead show something like "** unnamed function **" or whatever.

I could write a patch to do that when I get the time. But because the only
issue that this causes is some confusion among the users and does not cause
any issue with functionality, then it is low priority.

-- Steve

2022-04-29 12:39:17

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Thu, 28 Apr 2022 19:53:03 -0400
Steven Rostedt <[email protected]> wrote:

> > But that __fentry__ call is not part of __bpf_tramp_exit, actually.
> > Whether to call it a bug or limitation is secondary. It marks
> > __bpf_tramp_exit as attachable through kprobe/ftrace while it really
> > isn't.
>
> I'm confused by what you mean by "marks __bpf_tramp_exit as attachable"?
> What does? Where does it get that information? Does it read
> available_filter_functions?

OK, I think I see the issue you have. Because the functions shown in
available_filter_functions which uses the simple "%ps" to show the function
name:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/ftrace.c#n3692

And the code that does the actual matching uses kallsyms_lookup()

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/ftrace.c#n4017

Which appears not to match the function for the address, you can't pass in
__bpf_tramp_exit because it wont match the symbol returned by
kallsyms_lookup.

This does indeed look like a bug in %ps.

But in the mean time, I could open code %ps and see if that fixes it. I'll
give it try.

-- Steve

2022-04-29 13:37:44

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Thu, Apr 28, 2022 at 1:05 PM Steven Rostedt <[email protected]> wrote:
>
> On Thu, 28 Apr 2022 11:59:55 -0700
> Alexei Starovoitov <[email protected]> wrote:
>
> > > The weak function gets a call to ftrace, but it still gets compiled into
> > > vmlinux but its symbol is dropped due to it being overridden. Thus, the
> > > mcount_loc finds this call to fentry, and maps it to the symbol that is
> > > before it, which just happened to be __bpf_tramp_exit.
> >
> > Ouch. That _is_ a bug in recordmocount.
>
> Exactly HOW is it a bug in recordmcount?
>
> The job of recordmcount is to create a section of all the locations that
> call fentry. That is EXACTLY what it did. No bug there! It did its job.

But that __fentry__ call is not part of __bpf_tramp_exit, actually.
Whether to call it a bug or limitation is secondary. It marks
__bpf_tramp_exit as attachable through kprobe/ftrace while it really
isn't.

Below you are saying there is only user confusion. It's not just
confusion. You'll get an error when you try to attach to
__bpf_tramp_exit because __bpf_tramp_exit doesn't really have
__fentry__ preamble and thus the kernel itself will reject it as a
target. So when you build a generic tracing tool that fetches all the
attachable kprobes, filters out all the blacklisted ones, you still
end up with kprobe targets that are not attachable. It's definitely
more than an inconvenience which I experienced first hand.

Can recordmcount or whoever does this be taught to use proper FUNC
symbol size to figure out boundaries of the function?

$ readelf -s ~/linux-build/default/vmlinux | rg __bpf_tramp_exit
129408: ffffffff811b2ba0 63 FUNC GLOBAL DEFAULT 1 __bpf_tramp_exit

So only the first 63 bytes of instruction after __bpf_tramp_exit
should be taken into account. Everything else doesn't belong to
__bpf_tramp_exit. So even though objdump pretends that call __fentry__
is part of __bpf_tramp_exit, it's not.

ffffffff811b2ba0 <__bpf_tramp_exit>:
ffffffff811b2ba0: 53 push %rbx
ffffffff811b2ba1: 48 89 fb mov %rdi,%rbx
ffffffff811b2ba4: e8 97 d2 f2 ff call
ffffffff810dfe40 <__rcu_read_lock>
ffffffff811b2ba9: 48 8b 83 e0 00 00 00 mov 0xe0(%rbx),%rax
ffffffff811b2bb0: a8 03 test $0x3,%al
ffffffff811b2bb2: 75 0a jne
ffffffff811b2bbe <__bpf_tramp_exit+0x1e>
ffffffff811b2bb4: 65 48 ff 08 decq %gs:(%rax)
ffffffff811b2bb8: 5b pop %rbx
ffffffff811b2bb9: e9 d2 0e f3 ff jmp
ffffffff810e3a90 <__rcu_read_unlock>
ffffffff811b2bbe: 48 8b 83 e8 00 00 00 mov 0xe8(%rbx),%rax
ffffffff811b2bc5: f0 48 83 28 01 lock subq $0x1,(%rax)
ffffffff811b2bca: 75 ec jne
ffffffff811b2bb8 <__bpf_tramp_exit+0x18>
ffffffff811b2bcc: 48 8b 83 e8 00 00 00 mov 0xe8(%rbx),%rax
ffffffff811b2bd3: 48 8d bb e0 00 00 00 lea 0xe0(%rbx),%rdi
ffffffff811b2bda: ff 50 08 call *0x8(%rax)
ffffffff811b2bdd: eb d9 jmp
ffffffff811b2bb8 <__bpf_tramp_exit+0x18>
ffffffff811b2bdf: 90 nop

^^^ ffffffff811b2ba0 + 63 = ffffffff811b2bdf -- this is the end of
__bpf_tramp_exit

ffffffff811b2be0: e8 3b 9c e9 ff call
ffffffff8104c820 <__fentry__>
ffffffff811b2be5: b8 f4 fd ff ff mov $0xfffffdf4,%eax
ffffffff811b2bea: c3 ret
ffffffff811b2beb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)


>
> In fact, recordmcount probably didn't even get called. If you see this on
> x86 with gcc version greater than 8 (which I do), recordmcount is not even
> used. gcc creates this section internally instead.
>
> >
> > > I made that weak function "notrace" and the __bpf_tramp_exit disappeared
> > > from the available_filter_functions list.
> >
> > That's a hack. We cannot rely on such hacks for all weak functions.
>
> Then don't do anything. The only thing this bug causes is perhaps some
> confusion, because functions before weak functions that are overridden will
> be listed incorrectly in the available_filter_functions file. And that's
> because of the way it is created with respect to kallsyms.
>
> If you enable __bpf_tramp_exit, it will not do anything to that function.
> What it will do is enable the location inside of the weak function that no
> longer has its symbol shown.
>
> One solution is to simply get the end of the function that is provided by
> kallsyms to make sure the fentry call location is inside the function, and
> if it is not, then not show that function in available_filter_functions but
> instead show something like "** unnamed function **" or whatever.
>
> I could write a patch to do that when I get the time. But because the only
> issue that this causes is some confusion among the users and does not cause
> any issue with functionality, then it is low priority.
>
> -- Steve

2022-04-29 21:14:03

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Sat, 16 Apr 2022 23:21:03 +0900
Masami Hiramatsu <[email protected]> wrote:

> OK, I also confirmed that __bpf_tramp_exit is listed. (others seems no notrace)
>
> /sys/kernel/tracing # cat available_filter_functions | grep __bpf_tramp
> __bpf_tramp_image_release
> __bpf_tramp_image_put_rcu
> __bpf_tramp_image_put_rcu_tasks
> __bpf_tramp_image_put_deferred
> __bpf_tramp_exit
>
> My gcc is older one.
> gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
>
> But it seems that __bpf_tramp_exit() doesn't call __fentry__. (I objdump'ed)
>
> ffffffff81208270 <__bpf_tramp_exit>:
> ffffffff81208270: 55 push %rbp
> ffffffff81208271: 48 89 e5 mov %rsp,%rbp
> ffffffff81208274: 53 push %rbx
> ffffffff81208275: 48 89 fb mov %rdi,%rbx
> ffffffff81208278: e8 83 70 ef ff callq ffffffff810ff300 <__rcu_read_lock>
> ffffffff8120827d: 31 d2 xor %edx,%edx

You need to look deeper ;-)
>
>
> >
> > So it's quite bizarre and inconsistent.
>
> Indeed. I guess there is a bug in scripts/recordmcount.pl.

No there isn't.

I added the addresses it was mapping and found this:

ffffffffa828f680 T __bpf_tramp_exit

(which is relocated, but it's trivial to map it with the actual function).

At the end of that function we have:

ffffffff8128f767: 48 8d bb e0 00 00 00 lea 0xe0(%rbx),%rdi
ffffffff8128f76e: 48 8b 40 08 mov 0x8(%rax),%rax
ffffffff8128f772: e8 89 28 d7 00 call ffffffff82002000 <__x86_indirect_thunk_array>
ffffffff8128f773: R_X86_64_PLT32 __x86_indirect_thunk_rax-0x4
ffffffff8128f777: e9 4a ff ff ff jmp ffffffff8128f6c6 <__bpf_tramp_exit+0x46>
ffffffff8128f77c: 0f 1f 40 00 nopl 0x0(%rax)
ffffffff8128f780: e8 8b df dc ff call ffffffff8105d710 <__fentry__>
ffffffff8128f781: R_X86_64_PLT32 __fentry__-0x4
ffffffff8128f785: b8 f4 fd ff ff mov $0xfffffdf4,%eax
ffffffff8128f78a: c3 ret
ffffffff8128f78b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)


Notice the call to fentry!

It's due to this:

void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
{
percpu_ref_put(&tr->pcref);
}

int __weak
arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_progs *tprogs,
void *orig_call)
{
return -ENOTSUPP;
}

The weak function gets a call to ftrace, but it still gets compiled into
vmlinux but its symbol is dropped due to it being overridden. Thus, the
mcount_loc finds this call to fentry, and maps it to the symbol that is
before it, which just happened to be __bpf_tramp_exit.

I made that weak function "notrace" and the __bpf_tramp_exit disappeared
from the available_filter_functions list.

-- Steve

2022-04-29 22:12:03

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Thu, 28 Apr 2022 20:09:45 -0400
Steven Rostedt <[email protected]> wrote:

> OK, I think I see the issue you have. Because the functions shown in
> available_filter_functions which uses the simple "%ps" to show the function
> name:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/ftrace.c#n3692
>
> And the code that does the actual matching uses kallsyms_lookup()
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/ftrace.c#n4017
>
> Which appears not to match the function for the address, you can't pass in
> __bpf_tramp_exit because it wont match the symbol returned by
> kallsyms_lookup.

Never mind, in testing this I had marked the weak function as notrace,
which was the reason I couldn't add it to the set_ftrace_notrace.

After removing the notrace, kallsyms_lookup() doesn't make a difference. It
appears that kallsyms will include overridden weak functions into the size
of the function before it. I tried:

ret = kallsyms_lookup(rec->ip, &size, &offset, &modname, str);
if (!ret || offset > size) {
seq_printf(m, "no function at %lx", rec->ip);
} else {
seq_printf(m, "%s", str);
if (modname)
seq_printf(m, " [%s]", modname);
}

And it made no difference.

>
> This does indeed look like a bug in %ps.
>

Yes, this does appear to be a issue with kallsyms in general.

-- Steve

2022-04-29 22:36:20

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [RFC bpf-next 4/4] selftests/bpf: Add attach bench test

On Thu, Apr 28, 2022 at 6:58 AM Steven Rostedt <[email protected]> wrote:
>
> On Sat, 16 Apr 2022 23:21:03 +0900
> Masami Hiramatsu <[email protected]> wrote:
>
> > OK, I also confirmed that __bpf_tramp_exit is listed. (others seems no notrace)
> >
> > /sys/kernel/tracing # cat available_filter_functions | grep __bpf_tramp
> > __bpf_tramp_image_release
> > __bpf_tramp_image_put_rcu
> > __bpf_tramp_image_put_rcu_tasks
> > __bpf_tramp_image_put_deferred
> > __bpf_tramp_exit
> >
> > My gcc is older one.
> > gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
> >
> > But it seems that __bpf_tramp_exit() doesn't call __fentry__. (I objdump'ed)
> >
> > ffffffff81208270 <__bpf_tramp_exit>:
> > ffffffff81208270: 55 push %rbp
> > ffffffff81208271: 48 89 e5 mov %rsp,%rbp
> > ffffffff81208274: 53 push %rbx
> > ffffffff81208275: 48 89 fb mov %rdi,%rbx
> > ffffffff81208278: e8 83 70 ef ff callq ffffffff810ff300 <__rcu_read_lock>
> > ffffffff8120827d: 31 d2 xor %edx,%edx
>
> You need to look deeper ;-)
> >
> >
> > >
> > > So it's quite bizarre and inconsistent.
> >
> > Indeed. I guess there is a bug in scripts/recordmcount.pl.
>
> No there isn't.
>
> I added the addresses it was mapping and found this:
>
> ffffffffa828f680 T __bpf_tramp_exit
>
> (which is relocated, but it's trivial to map it with the actual function).
>
> At the end of that function we have:
>
> ffffffff8128f767: 48 8d bb e0 00 00 00 lea 0xe0(%rbx),%rdi
> ffffffff8128f76e: 48 8b 40 08 mov 0x8(%rax),%rax
> ffffffff8128f772: e8 89 28 d7 00 call ffffffff82002000 <__x86_indirect_thunk_array>
> ffffffff8128f773: R_X86_64_PLT32 __x86_indirect_thunk_rax-0x4
> ffffffff8128f777: e9 4a ff ff ff jmp ffffffff8128f6c6 <__bpf_tramp_exit+0x46>
> ffffffff8128f77c: 0f 1f 40 00 nopl 0x0(%rax)
> ffffffff8128f780: e8 8b df dc ff call ffffffff8105d710 <__fentry__>
> ffffffff8128f781: R_X86_64_PLT32 __fentry__-0x4
> ffffffff8128f785: b8 f4 fd ff ff mov $0xfffffdf4,%eax
> ffffffff8128f78a: c3 ret
> ffffffff8128f78b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
>
>
> Notice the call to fentry!
>
> It's due to this:
>
> void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
> {
> percpu_ref_put(&tr->pcref);
> }
>
> int __weak
> arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
> const struct btf_func_model *m, u32 flags,
> struct bpf_tramp_progs *tprogs,
> void *orig_call)
> {
> return -ENOTSUPP;
> }
>
> The weak function gets a call to ftrace, but it still gets compiled into
> vmlinux but its symbol is dropped due to it being overridden. Thus, the
> mcount_loc finds this call to fentry, and maps it to the symbol that is
> before it, which just happened to be __bpf_tramp_exit.

Ouch. That _is_ a bug in recordmocount.

> I made that weak function "notrace" and the __bpf_tramp_exit disappeared
> from the available_filter_functions list.

That's a hack. We cannot rely on such hacks for all weak functions.