hi,
this patchset adds new link type BPF_TRACE_KPROBE_MULTI that attaches
kprobe program through fprobe API [1] instroduced by Masami.
The fprobe API allows to attach probe on multiple functions at once very
fast, because it works on top of ftrace. On the other hand this limits
the probe point to the function entry or return.
With bpftrace support I see following attach speed:
# perf stat --null -r 5 ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); } '
Attaching 2 probes...
Attaching 3342 functions
...
1.4960 +- 0.0285 seconds time elapsed ( +- 1.91% )
v3 changes:
- based on latest fprobe post from Masami [2]
- add acks
- add extra comment to kprobe_multi_link_handler wrt entry ip setup [Masami]
- keep swap_words_64 static and swap values directly in
bpf_kprobe_multi_cookie_swap [Andrii]
- rearrange locking/migrate setup in kprobe_multi_link_prog_run [Andrii]
- move uapi fields [Andrii]
- add bpf_program__attach_kprobe_multi_opts function [Andrii]
- many small test changes [Andrii]
- added tests for bpf_program__attach_kprobe_multi_opts
- make kallsyms_lookup_name check for empty string [Andrii]
v2 changes:
- based on latest fprobe changes [1]
- renaming the uapi interface to kprobe multi
- adding support for sort_r to pass user pointer for swap functions
and using that in cookie support to keep just single functions array
- moving new link to kernel/trace/bpf_trace.c file
- using single fprobe callback function for entry and exit
- using kvzalloc, libbpf_ensure_mem functions
- adding new k[ret]probe.multi sections instead of using current kprobe
- used glob_match from test_progs.c, added '?' matching
- move bpf_get_func_ip verifier inline change to seprate change
- couple of other minor fixes
Also available at:
https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
bpf/kprobe_multi
thanks,
jirka
[1] https://lore.kernel.org/bpf/164458044634.586276.3261555265565111183.stgit@devnote2/
[2] https://lore.kernel.org/bpf/164735281449.1084943.12438881786173547153.stgit@devnote2/
---
Jiri Olsa (13):
lib/sort: Add priv pointer to swap function
kallsyms: Skip the name search for empty string
bpf: Add multi kprobe link
bpf: Add bpf_get_func_ip kprobe helper for multi kprobe link
bpf: Add support to inline bpf_get_func_ip helper on x86
bpf: Add cookie support to programs attached with kprobe multi link
libbpf: Add libbpf_kallsyms_parse function
libbpf: Add bpf_link_create support for multi kprobes
libbpf: Add bpf_program__attach_kprobe_multi_opts function
selftests/bpf: Add kprobe_multi attach test
selftests/bpf: Add kprobe_multi bpf_cookie test
selftests/bpf: Add attach test for bpf_program__attach_kprobe_multi_opts
selftests/bpf: Add cookie test for bpf_program__attach_kprobe_multi_opts
include/linux/bpf_types.h | 1 +
include/linux/sort.h | 2 +-
include/linux/trace_events.h | 7 ++
include/linux/types.h | 1 +
include/uapi/linux/bpf.h | 14 ++++
kernel/bpf/syscall.c | 26 +++++--
kernel/bpf/verifier.c | 21 +++++-
kernel/kallsyms.c | 4 ++
kernel/trace/bpf_trace.c | 342 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
lib/sort.c | 40 ++++++++---
tools/include/uapi/linux/bpf.h | 14 ++++
tools/lib/bpf/bpf.c | 9 +++
tools/lib/bpf/bpf.h | 9 ++-
tools/lib/bpf/libbpf.c | 222 +++++++++++++++++++++++++++++++++++++++++++++++++++++-------
tools/lib/bpf/libbpf.h | 23 +++++++
tools/lib/bpf/libbpf.map | 1 +
tools/lib/bpf/libbpf_internal.h | 5 ++
tools/testing/selftests/bpf/prog_tests/bpf_cookie.c | 177 ++++++++++++++++++++++++++++++++++++++++++++++++
tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 323 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tools/testing/selftests/bpf/progs/kprobe_multi.c | 98 +++++++++++++++++++++++++++
tools/testing/selftests/bpf/trace_helpers.c | 7 ++
21 files changed, 1302 insertions(+), 44 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
create mode 100644 tools/testing/selftests/bpf/progs/kprobe_multi.c
Adding bpf_cookie test for programs attached by kprobe_multi links.
Signed-off-by: Jiri Olsa <[email protected]>
---
.../selftests/bpf/prog_tests/bpf_cookie.c | 109 ++++++++++++++++++
.../selftests/bpf/progs/kprobe_multi.c | 41 ++++---
2 files changed, 131 insertions(+), 19 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 0612e79a9281..6671d4dc0b5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -7,6 +7,7 @@
#include <unistd.h>
#include <test_progs.h>
#include "test_bpf_cookie.skel.h"
+#include "kprobe_multi.skel.h"
/* uprobe attach point */
static void trigger_func(void)
@@ -63,6 +64,112 @@ static void kprobe_subtest(struct test_bpf_cookie *skel)
bpf_link__destroy(retlink2);
}
+static void kprobe_multi_test_run(struct kprobe_multi *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->kprobe_test1_result, 1, "kprobe_test1_result");
+ ASSERT_EQ(skel->bss->kprobe_test2_result, 1, "kprobe_test2_result");
+ ASSERT_EQ(skel->bss->kprobe_test3_result, 1, "kprobe_test3_result");
+ ASSERT_EQ(skel->bss->kprobe_test4_result, 1, "kprobe_test4_result");
+ ASSERT_EQ(skel->bss->kprobe_test5_result, 1, "kprobe_test5_result");
+ ASSERT_EQ(skel->bss->kprobe_test6_result, 1, "kprobe_test6_result");
+ ASSERT_EQ(skel->bss->kprobe_test7_result, 1, "kprobe_test7_result");
+ ASSERT_EQ(skel->bss->kprobe_test8_result, 1, "kprobe_test8_result");
+
+ ASSERT_EQ(skel->bss->kretprobe_test1_result, 1, "kretprobe_test1_result");
+ ASSERT_EQ(skel->bss->kretprobe_test2_result, 1, "kretprobe_test2_result");
+ ASSERT_EQ(skel->bss->kretprobe_test3_result, 1, "kretprobe_test3_result");
+ ASSERT_EQ(skel->bss->kretprobe_test4_result, 1, "kretprobe_test4_result");
+ ASSERT_EQ(skel->bss->kretprobe_test5_result, 1, "kretprobe_test5_result");
+ ASSERT_EQ(skel->bss->kretprobe_test6_result, 1, "kretprobe_test6_result");
+ ASSERT_EQ(skel->bss->kretprobe_test7_result, 1, "kretprobe_test7_result");
+ ASSERT_EQ(skel->bss->kretprobe_test8_result, 1, "kretprobe_test8_result");
+}
+
+static void kprobe_multi_link_api_subtest(void)
+{
+ int prog_fd, link1_fd = -1, link2_fd = -1;
+ struct kprobe_multi *skel = NULL;
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ unsigned long long addrs[8];
+ __u64 cookies[8];
+
+ if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+ goto cleanup;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ skel->bss->test_cookie = true;
+
+#define GET_ADDR(__sym, __addr) ({ \
+ __addr = ksym_get_addr(__sym); \
+ if (!ASSERT_NEQ(__addr, 0, "ksym_get_addr " #__sym)) \
+ goto cleanup; \
+})
+
+ GET_ADDR("bpf_fentry_test1", addrs[0]);
+ GET_ADDR("bpf_fentry_test2", addrs[1]);
+ GET_ADDR("bpf_fentry_test3", addrs[2]);
+ GET_ADDR("bpf_fentry_test4", addrs[3]);
+ GET_ADDR("bpf_fentry_test5", addrs[4]);
+ GET_ADDR("bpf_fentry_test6", addrs[5]);
+ GET_ADDR("bpf_fentry_test7", addrs[6]);
+ GET_ADDR("bpf_fentry_test8", addrs[7]);
+
+#undef GET_ADDR
+
+ cookies[0] = 1;
+ cookies[1] = 2;
+ cookies[2] = 3;
+ cookies[3] = 4;
+ cookies[4] = 5;
+ cookies[5] = 6;
+ cookies[6] = 7;
+ cookies[7] = 8;
+
+ opts.kprobe_multi.addrs = (const unsigned long *) &addrs;
+ opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
+ opts.kprobe_multi.cookies = (const __u64 *) &cookies;
+ prog_fd = bpf_program__fd(skel->progs.test_kprobe);
+
+ link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link1_fd, 0, "link1_fd"))
+ goto cleanup;
+
+ cookies[0] = 8;
+ cookies[1] = 7;
+ cookies[2] = 6;
+ cookies[3] = 5;
+ cookies[4] = 4;
+ cookies[5] = 3;
+ cookies[6] = 2;
+ cookies[7] = 1;
+
+ opts.kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN;
+ prog_fd = bpf_program__fd(skel->progs.test_kretprobe);
+
+ link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link2_fd, 0, "link2_fd"))
+ goto cleanup;
+
+ kprobe_multi_test_run(skel);
+
+cleanup:
+ close(link1_fd);
+ close(link2_fd);
+ kprobe_multi__destroy(skel);
+}
+
static void uprobe_subtest(struct test_bpf_cookie *skel)
{
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
@@ -249,6 +356,8 @@ void test_bpf_cookie(void)
if (test__start_subtest("kprobe"))
kprobe_subtest(skel);
+ if (test__start_subtest("multi_kprobe_link_api"))
+ kprobe_multi_link_api_subtest();
if (test__start_subtest("uprobe"))
uprobe_subtest(skel);
if (test__start_subtest("tracepoint"))
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
index 6616c082c8c2..af27d2c6fce8 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -16,6 +16,7 @@ extern const void bpf_fentry_test7 __ksym;
extern const void bpf_fentry_test8 __ksym;
int pid = 0;
+bool test_cookie = false;
__u64 kprobe_test1_result = 0;
__u64 kprobe_test2_result = 0;
@@ -40,31 +41,33 @@ static void kprobe_multi_check(void *ctx, bool is_return)
if (bpf_get_current_pid_tgid() >> 32 != pid)
return;
+ __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
__u64 addr = bpf_get_func_ip(ctx);
-#define SET(__var, __addr) ({ \
- if ((const void *) addr == __addr) \
- __var = 1; \
+#define SET(__var, __addr, __cookie) ({ \
+ if (((const void *) addr == __addr) && \
+ (!test_cookie || (cookie == __cookie))) \
+ __var = 1; \
})
if (is_return) {
- SET(kretprobe_test1_result, &bpf_fentry_test1);
- SET(kretprobe_test2_result, &bpf_fentry_test2);
- SET(kretprobe_test3_result, &bpf_fentry_test3);
- SET(kretprobe_test4_result, &bpf_fentry_test4);
- SET(kretprobe_test5_result, &bpf_fentry_test5);
- SET(kretprobe_test6_result, &bpf_fentry_test6);
- SET(kretprobe_test7_result, &bpf_fentry_test7);
- SET(kretprobe_test8_result, &bpf_fentry_test8);
+ SET(kretprobe_test1_result, &bpf_fentry_test1, 8);
+ SET(kretprobe_test2_result, &bpf_fentry_test2, 7);
+ SET(kretprobe_test3_result, &bpf_fentry_test3, 6);
+ SET(kretprobe_test4_result, &bpf_fentry_test4, 5);
+ SET(kretprobe_test5_result, &bpf_fentry_test5, 4);
+ SET(kretprobe_test6_result, &bpf_fentry_test6, 3);
+ SET(kretprobe_test7_result, &bpf_fentry_test7, 2);
+ SET(kretprobe_test8_result, &bpf_fentry_test8, 1);
} else {
- SET(kprobe_test1_result, &bpf_fentry_test1);
- SET(kprobe_test2_result, &bpf_fentry_test2);
- SET(kprobe_test3_result, &bpf_fentry_test3);
- SET(kprobe_test4_result, &bpf_fentry_test4);
- SET(kprobe_test5_result, &bpf_fentry_test5);
- SET(kprobe_test6_result, &bpf_fentry_test6);
- SET(kprobe_test7_result, &bpf_fentry_test7);
- SET(kprobe_test8_result, &bpf_fentry_test8);
+ SET(kprobe_test1_result, &bpf_fentry_test1, 1);
+ SET(kprobe_test2_result, &bpf_fentry_test2, 2);
+ SET(kprobe_test3_result, &bpf_fentry_test3, 3);
+ SET(kprobe_test4_result, &bpf_fentry_test4, 4);
+ SET(kprobe_test5_result, &bpf_fentry_test5, 5);
+ SET(kprobe_test6_result, &bpf_fentry_test6, 6);
+ SET(kprobe_test7_result, &bpf_fentry_test7, 7);
+ SET(kprobe_test8_result, &bpf_fentry_test8, 8);
}
#undef SET
--
2.35.1
Hello:
This series was applied to bpf/bpf-next.git (master)
by Alexei Starovoitov <[email protected]>:
On Wed, 16 Mar 2022 13:24:06 +0100 you wrote:
> hi,
> this patchset adds new link type BPF_TRACE_KPROBE_MULTI that attaches
> kprobe program through fprobe API [1] instroduced by Masami.
>
> The fprobe API allows to attach probe on multiple functions at once very
> fast, because it works on top of ftrace. On the other hand this limits
> the probe point to the function entry or return.
>
> [...]
Here is the summary with links:
- [PATCHv3,bpf-next,01/13] lib/sort: Add priv pointer to swap function
https://git.kernel.org/bpf/bpf-next/c/a0019cd7d41a
- [PATCHv3,bpf-next,02/13] kallsyms: Skip the name search for empty string
https://git.kernel.org/bpf/bpf-next/c/aecf489f2ce5
- [PATCHv3,bpf-next,03/13] bpf: Add multi kprobe link
https://git.kernel.org/bpf/bpf-next/c/0dcac2725406
- [PATCHv3,bpf-next,04/13] bpf: Add bpf_get_func_ip kprobe helper for multi kprobe link
https://git.kernel.org/bpf/bpf-next/c/42a5712094e8
- [PATCHv3,bpf-next,05/13] bpf: Add support to inline bpf_get_func_ip helper on x86
https://git.kernel.org/bpf/bpf-next/c/97ee4d20ee67
- [PATCHv3,bpf-next,06/13] bpf: Add cookie support to programs attached with kprobe multi link
https://git.kernel.org/bpf/bpf-next/c/ca74823c6e16
- [PATCHv3,bpf-next,07/13] libbpf: Add libbpf_kallsyms_parse function
https://git.kernel.org/bpf/bpf-next/c/85153ac06283
- [PATCHv3,bpf-next,08/13] libbpf: Add bpf_link_create support for multi kprobes
https://git.kernel.org/bpf/bpf-next/c/5117c26e8773
- [PATCHv3,bpf-next,09/13] libbpf: Add bpf_program__attach_kprobe_multi_opts function
https://git.kernel.org/bpf/bpf-next/c/ddc6b04989eb
- [PATCHv3,bpf-next,10/13] selftests/bpf: Add kprobe_multi attach test
https://git.kernel.org/bpf/bpf-next/c/f7a11eeccb11
- [PATCHv3,bpf-next,11/13] selftests/bpf: Add kprobe_multi bpf_cookie test
https://git.kernel.org/bpf/bpf-next/c/2c6401c966ae
- [PATCHv3,bpf-next,12/13] selftests/bpf: Add attach test for bpf_program__attach_kprobe_multi_opts
https://git.kernel.org/bpf/bpf-next/c/9271a0c7ae7a
- [PATCHv3,bpf-next,13/13] selftests/bpf: Add cookie test for bpf_program__attach_kprobe_multi_opts
https://git.kernel.org/bpf/bpf-next/c/318c812cebfc
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
On Wed, Mar 16, 2022 at 5:24 AM Jiri Olsa <[email protected]> wrote:
>
> hi,
> this patchset adds new link type BPF_TRACE_KPROBE_MULTI that attaches
> kprobe program through fprobe API [1] instroduced by Masami.
>
> The fprobe API allows to attach probe on multiple functions at once very
> fast, because it works on top of ftrace. On the other hand this limits
> the probe point to the function entry or return.
>
>
> With bpftrace support I see following attach speed:
>
> # perf stat --null -r 5 ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); } '
> Attaching 2 probes...
> Attaching 3342 functions
> ...
>
> 1.4960 +- 0.0285 seconds time elapsed ( +- 1.91% )
>
> v3 changes:
> - based on latest fprobe post from Masami [2]
> - add acks
> - add extra comment to kprobe_multi_link_handler wrt entry ip setup [Masami]
> - keep swap_words_64 static and swap values directly in
> bpf_kprobe_multi_cookie_swap [Andrii]
> - rearrange locking/migrate setup in kprobe_multi_link_prog_run [Andrii]
> - move uapi fields [Andrii]
> - add bpf_program__attach_kprobe_multi_opts function [Andrii]
> - many small test changes [Andrii]
> - added tests for bpf_program__attach_kprobe_multi_opts
> - make kallsyms_lookup_name check for empty string [Andrii]
>
> v2 changes:
> - based on latest fprobe changes [1]
> - renaming the uapi interface to kprobe multi
> - adding support for sort_r to pass user pointer for swap functions
> and using that in cookie support to keep just single functions array
> - moving new link to kernel/trace/bpf_trace.c file
> - using single fprobe callback function for entry and exit
> - using kvzalloc, libbpf_ensure_mem functions
> - adding new k[ret]probe.multi sections instead of using current kprobe
> - used glob_match from test_progs.c, added '?' matching
> - move bpf_get_func_ip verifier inline change to seprate change
> - couple of other minor fixes
>
>
> Also available at:
> https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
> bpf/kprobe_multi
>
> thanks,
> jirka
>
>
> [1] https://lore.kernel.org/bpf/164458044634.586276.3261555265565111183.stgit@devnote2/
> [2] https://lore.kernel.org/bpf/164735281449.1084943.12438881786173547153.stgit@devnote2/
> ---
> Jiri Olsa (13):
> lib/sort: Add priv pointer to swap function
> kallsyms: Skip the name search for empty string
> bpf: Add multi kprobe link
> bpf: Add bpf_get_func_ip kprobe helper for multi kprobe link
> bpf: Add support to inline bpf_get_func_ip helper on x86
> bpf: Add cookie support to programs attached with kprobe multi link
> libbpf: Add libbpf_kallsyms_parse function
> libbpf: Add bpf_link_create support for multi kprobes
> libbpf: Add bpf_program__attach_kprobe_multi_opts function
> selftests/bpf: Add kprobe_multi attach test
> selftests/bpf: Add kprobe_multi bpf_cookie test
> selftests/bpf: Add attach test for bpf_program__attach_kprobe_multi_opts
> selftests/bpf: Add cookie test for bpf_program__attach_kprobe_multi_opts
>
Ok, so I've integrated multi-attach kprobes into retsnoop. It was
pretty straightforward. Here are some numbers for the speed of
attaching and, even more importantly, detaching for a set of almost
400 functions. It's a bit less functions for fentry-based mode due to
more limited BTF information for static functions. Note that retsnoop
attaches two BPF programs for each kernel function, so it's actually
two multi-attach kprobes, each attaching to 397 functions. For
single-attach kprobe, we perform 397 * 2 = 794 attachments.
I've been invoking retsnoop with the following specified set of
functions: -e '*sys_bpf*' -a ':kernel/bpf/syscall.c' -a
':kernel/bpf/verifier.c' -a ':kernel/bpf/btf.c' -a
':kernel/bpf/core.c'. So basically bpf syscall entry functions and all
the discoverable functions from syscall.c, verifier.c, core.c and
btf.c from kernel/bpf subdirectory.
Results:
fentry attach/detach (263 kfuncs): 2133 ms / 31671 ms (33 seconds)
kprobe attach/detach (397 kfuncs): 3121 ms / 13195 ms (16 seconds)
multi-kprobe attach/detach (397 kfuncs): 9 ms / 248 ms (0.25 seconds)
So as you can see, the speed up is tremendous! API is also very
convenient, I didn't have to modify retsnoop internals much to
accommodate multi-attach API. Great job!
Now for the bad news. :(
Stack traces from multi-attach kretprobe are broken, which makes all
this way less useful.
Below, see stack traces captured with multi- and single- kretprobes
for two different use cases. Single kprobe stack traces make much more
sense. Ignore that last function doesn't have actual address
associated with it (i.e. for __sys_bpf and bpf_tracing_prog_attach,
respectively). That's just how retsnoop is doing things, I think. We
actually were capturing stack traces from inside __sys_bpf (first
case) and bpf_tracing_prog_attach (second case).
MULTI KPROBE:
ffffffff81185a80 __sys_bpf+0x0
(kernel/bpf/syscall.c:4622:1)
SINGLE KPROBE:
ffffffff81e0007c entry_SYSCALL_64_after_hwframe+0x44
(arch/x86/entry/entry_64.S:113:0)
ffffffff81cd2b15 do_syscall_64+0x35
(arch/x86/entry/common.c:80:7)
. do_syscall_x64
(arch/x86/entry/common.c:50:12)
ffffffff811881aa __x64_sys_bpf+0x1a
(kernel/bpf/syscall.c:4765:1)
__sys_bpf
MULTI KPROBE:
ffffffff811851b0 bpf_tracing_prog_attach+0x0
(kernel/bpf/syscall.c:2708:1)
SINGLE KPROBE:
ffffffff81e0007c entry_SYSCALL_64_after_hwframe+0x44
(arch/x86/entry/entry_64.S:113:0)
ffffffff81cd2b15 do_syscall_64+0x35
(arch/x86/entry/common.c:80:7)
. do_syscall_x64
(arch/x86/entry/common.c:50:12)
ffffffff811881aa __x64_sys_bpf+0x1a
(kernel/bpf/syscall.c:4765:1)
ffffffff81185e79 __sys_bpf+0x3f9
(kernel/bpf/syscall.c:4705:9)
ffffffff8118583a bpf_raw_tracepoint_open+0x19a
(kernel/bpf/syscall.c:3069:6)
bpf_tracing_prog_attach
You can see that in multi-attach kprobe we only get one entry, which
is the very last function in the stack trace. We have no parent
function captured whatsoever. Jiri, Masami, any ideas what's wrong and
how to fix this? Let's try to figure this out and fix it before the
feature makes it into the kernel release. Thanks in advance!
[...]
On Sat, Mar 19, 2022 at 01:27:56PM +0100, Jiri Olsa wrote:
> On Fri, Mar 18, 2022 at 10:50:46PM -0700, Andrii Nakryiko wrote:
> > On Wed, Mar 16, 2022 at 5:24 AM Jiri Olsa <[email protected]> wrote:
> > >
> > > hi,
> > > this patchset adds new link type BPF_TRACE_KPROBE_MULTI that attaches
> > > kprobe program through fprobe API [1] instroduced by Masami.
> > >
> > > The fprobe API allows to attach probe on multiple functions at once very
> > > fast, because it works on top of ftrace. On the other hand this limits
> > > the probe point to the function entry or return.
> > >
> > >
> > > With bpftrace support I see following attach speed:
> > >
> > > # perf stat --null -r 5 ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); } '
> > > Attaching 2 probes...
> > > Attaching 3342 functions
> > > ...
> > >
> > > 1.4960 +- 0.0285 seconds time elapsed ( +- 1.91% )
> > >
> > > v3 changes:
> > > - based on latest fprobe post from Masami [2]
> > > - add acks
> > > - add extra comment to kprobe_multi_link_handler wrt entry ip setup [Masami]
> > > - keep swap_words_64 static and swap values directly in
> > > bpf_kprobe_multi_cookie_swap [Andrii]
> > > - rearrange locking/migrate setup in kprobe_multi_link_prog_run [Andrii]
> > > - move uapi fields [Andrii]
> > > - add bpf_program__attach_kprobe_multi_opts function [Andrii]
> > > - many small test changes [Andrii]
> > > - added tests for bpf_program__attach_kprobe_multi_opts
> > > - make kallsyms_lookup_name check for empty string [Andrii]
> > >
> > > v2 changes:
> > > - based on latest fprobe changes [1]
> > > - renaming the uapi interface to kprobe multi
> > > - adding support for sort_r to pass user pointer for swap functions
> > > and using that in cookie support to keep just single functions array
> > > - moving new link to kernel/trace/bpf_trace.c file
> > > - using single fprobe callback function for entry and exit
> > > - using kvzalloc, libbpf_ensure_mem functions
> > > - adding new k[ret]probe.multi sections instead of using current kprobe
> > > - used glob_match from test_progs.c, added '?' matching
> > > - move bpf_get_func_ip verifier inline change to seprate change
> > > - couple of other minor fixes
> > >
> > >
> > > Also available at:
> > > https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
> > > bpf/kprobe_multi
> > >
> > > thanks,
> > > jirka
> > >
> > >
> > > [1] https://lore.kernel.org/bpf/164458044634.586276.3261555265565111183.stgit@devnote2/
> > > [2] https://lore.kernel.org/bpf/164735281449.1084943.12438881786173547153.stgit@devnote2/
> > > ---
> > > Jiri Olsa (13):
> > > lib/sort: Add priv pointer to swap function
> > > kallsyms: Skip the name search for empty string
> > > bpf: Add multi kprobe link
> > > bpf: Add bpf_get_func_ip kprobe helper for multi kprobe link
> > > bpf: Add support to inline bpf_get_func_ip helper on x86
> > > bpf: Add cookie support to programs attached with kprobe multi link
> > > libbpf: Add libbpf_kallsyms_parse function
> > > libbpf: Add bpf_link_create support for multi kprobes
> > > libbpf: Add bpf_program__attach_kprobe_multi_opts function
> > > selftests/bpf: Add kprobe_multi attach test
> > > selftests/bpf: Add kprobe_multi bpf_cookie test
> > > selftests/bpf: Add attach test for bpf_program__attach_kprobe_multi_opts
> > > selftests/bpf: Add cookie test for bpf_program__attach_kprobe_multi_opts
> > >
> >
> > Ok, so I've integrated multi-attach kprobes into retsnoop. It was
> > pretty straightforward. Here are some numbers for the speed of
> > attaching and, even more importantly, detaching for a set of almost
> > 400 functions. It's a bit less functions for fentry-based mode due to
> > more limited BTF information for static functions. Note that retsnoop
> > attaches two BPF programs for each kernel function, so it's actually
> > two multi-attach kprobes, each attaching to 397 functions. For
> > single-attach kprobe, we perform 397 * 2 = 794 attachments.
> >
> > I've been invoking retsnoop with the following specified set of
> > functions: -e '*sys_bpf*' -a ':kernel/bpf/syscall.c' -a
> > ':kernel/bpf/verifier.c' -a ':kernel/bpf/btf.c' -a
> > ':kernel/bpf/core.c'. So basically bpf syscall entry functions and all
> > the discoverable functions from syscall.c, verifier.c, core.c and
> > btf.c from kernel/bpf subdirectory.
> >
> > Results:
> >
> > fentry attach/detach (263 kfuncs): 2133 ms / 31671 ms (33 seconds)
> > kprobe attach/detach (397 kfuncs): 3121 ms / 13195 ms (16 seconds)
> > multi-kprobe attach/detach (397 kfuncs): 9 ms / 248 ms (0.25 seconds)
> >
> > So as you can see, the speed up is tremendous! API is also very
> > convenient, I didn't have to modify retsnoop internals much to
> > accommodate multi-attach API. Great job!
>
> nice! thanks for doing that so quickly
>
> >
> > Now for the bad news. :(
> >
> > Stack traces from multi-attach kretprobe are broken, which makes all
> > this way less useful.
> >
> > Below, see stack traces captured with multi- and single- kretprobes
> > for two different use cases. Single kprobe stack traces make much more
> > sense. Ignore that last function doesn't have actual address
> > associated with it (i.e. for __sys_bpf and bpf_tracing_prog_attach,
> > respectively). That's just how retsnoop is doing things, I think. We
> > actually were capturing stack traces from inside __sys_bpf (first
> > case) and bpf_tracing_prog_attach (second case).
> >
> > MULTI KPROBE:
> > ffffffff81185a80 __sys_bpf+0x0
> > (kernel/bpf/syscall.c:4622:1)
> >
> > SINGLE KPROBE:
> > ffffffff81e0007c entry_SYSCALL_64_after_hwframe+0x44
> > (arch/x86/entry/entry_64.S:113:0)
> > ffffffff81cd2b15 do_syscall_64+0x35
> > (arch/x86/entry/common.c:80:7)
> > . do_syscall_x64
> > (arch/x86/entry/common.c:50:12)
> > ffffffff811881aa __x64_sys_bpf+0x1a
> > (kernel/bpf/syscall.c:4765:1)
> > __sys_bpf
> >
> >
> > MULTI KPROBE:
> > ffffffff811851b0 bpf_tracing_prog_attach+0x0
> > (kernel/bpf/syscall.c:2708:1)
> >
> > SINGLE KPROBE:
> > ffffffff81e0007c entry_SYSCALL_64_after_hwframe+0x44
> > (arch/x86/entry/entry_64.S:113:0)
> > ffffffff81cd2b15 do_syscall_64+0x35
> > (arch/x86/entry/common.c:80:7)
> > . do_syscall_x64
> > (arch/x86/entry/common.c:50:12)
> > ffffffff811881aa __x64_sys_bpf+0x1a
> > (kernel/bpf/syscall.c:4765:1)
> > ffffffff81185e79 __sys_bpf+0x3f9
> > (kernel/bpf/syscall.c:4705:9)
> > ffffffff8118583a bpf_raw_tracepoint_open+0x19a
> > (kernel/bpf/syscall.c:3069:6)
> > bpf_tracing_prog_attach
> >
> > You can see that in multi-attach kprobe we only get one entry, which
> > is the very last function in the stack trace. We have no parent
> > function captured whatsoever. Jiri, Masami, any ideas what's wrong and
> > how to fix this? Let's try to figure this out and fix it before the
> > feature makes it into the kernel release. Thanks in advance!
>
> oops, I should have tried kstack with the bpftrace's kretprobe, I see the same:
>
> # ./src/bpftrace -e 'kretprobe:x* { @[kstack] = count(); }'
> Attaching 1 probe...
> Attaching 3340probes
> ^C
>
> @[
> xfs_trans_apply_dquot_deltas+0
> ]: 22
> @[
> xlog_cil_commit+0
> ]: 22
> @[
> xlog_grant_push_threshold+0
> ]: 22
> @[
> xfs_trans_add_item+0
> ]: 22
> @[
> xfs_log_reserve+0
> ]: 22
> @[
> xlog_space_left+0
> ]: 22
> @[
> xfs_buf_offset+0
> ]: 22
> @[
> xfs_trans_free_dqinfo+0
> ]: 22
> @[
> xlog_ticket_alloc+0
> xfs_log_reserve+5
> ]: 22
> @[
> xfs_cil_prepare_item+0
>
>
> I think it's because we set original ip for return probe to have
> bpf_get_func_ip working properly, but it breaks backtrace of course
>
> I'm not sure we could bring along the original regs for return probe,
> but I have an idea how to workaround the bpf_get_func_ip issue and
> keep the registers intact for other helpers
change below is using bpf_run_ctx to store link and entry ip on stack,
where helpers can find it.. it fixed the retprobe backtrace for me
I had to revert the get_func_ip inline.. it's squashed in the change
below for quick testing.. I'll send revert in separate patch with the
formal change
could you please test?
thanks,
jirka
---
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0287176bfe9a..cf92f9c01556 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13678,7 +13678,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
continue;
}
- /* Implement tracing bpf_get_func_ip inline. */
+ /* Implement bpf_get_func_ip inline. */
if (prog_type == BPF_PROG_TYPE_TRACING &&
insn->imm == BPF_FUNC_get_func_ip) {
/* Load IP address from ctx - 16 */
@@ -13693,25 +13693,6 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
continue;
}
-#ifdef CONFIG_X86
- /* Implement kprobe_multi bpf_get_func_ip inline. */
- if (prog_type == BPF_PROG_TYPE_KPROBE &&
- eatype == BPF_TRACE_KPROBE_MULTI &&
- insn->imm == BPF_FUNC_get_func_ip) {
- /* Load IP address from ctx (struct pt_regs) ip */
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
- offsetof(struct pt_regs, ip));
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
- if (!new_prog)
- return -ENOMEM;
-
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- continue;
- }
-#endif
-
patch_call_imm:
fn = env->ops->get_func_proto(insn->imm, env->prog);
/* all functions that have prototype and verifier allowed
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9a7b6be655e4..aefe33c08296 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -80,7 +80,8 @@ u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
u64 flags, const struct btf **btf,
s32 *btf_id);
-static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx, u64 ip);
+static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx);
+static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
/**
* trace_call_bpf - invoke BPF program
@@ -1042,8 +1043,7 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs)
{
- /* This helper call is inlined by verifier on x86. */
- return instruction_pointer(regs);
+ return bpf_kprobe_multi_entry_ip(current->bpf_ctx);
}
static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = {
@@ -1055,7 +1055,7 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = {
BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs)
{
- return bpf_kprobe_multi_cookie(current->bpf_ctx, instruction_pointer(regs));
+ return bpf_kprobe_multi_cookie(current->bpf_ctx);
}
static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = {
@@ -2220,15 +2220,16 @@ struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
unsigned long *addrs;
- /*
- * The run_ctx here is used to get struct bpf_kprobe_multi_link in
- * get_attach_cookie helper, so it can't be used to store data.
- */
- struct bpf_run_ctx run_ctx;
u64 *cookies;
u32 cnt;
};
+struct bpf_kprobe_multi_run_ctx {
+ struct bpf_run_ctx run_ctx;
+ struct bpf_kprobe_multi_link *link;
+ unsigned long entry_ip;
+};
+
static void bpf_kprobe_multi_link_release(struct bpf_link *link)
{
struct bpf_kprobe_multi_link *kmulti_link;
@@ -2282,18 +2283,21 @@ static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void
return __bpf_kprobe_multi_cookie_cmp(a, b);
}
-static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx, u64 ip)
+static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
{
+ struct bpf_kprobe_multi_run_ctx *run_ctx;
struct bpf_kprobe_multi_link *link;
+ u64 *cookie, entry_ip;
unsigned long *addr;
- u64 *cookie;
if (WARN_ON_ONCE(!ctx))
return 0;
- link = container_of(ctx, struct bpf_kprobe_multi_link, run_ctx);
+ run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
+ link = run_ctx->link;
+ entry_ip = run_ctx->entry_ip;
if (!link->cookies)
return 0;
- addr = bsearch(&ip, link->addrs, link->cnt, sizeof(ip),
+ addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip),
__bpf_kprobe_multi_cookie_cmp);
if (!addr)
return 0;
@@ -2301,10 +2305,22 @@ static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx, u64 ip)
return *cookie;
}
+static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
+{
+ struct bpf_kprobe_multi_run_ctx *run_ctx;
+
+ run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
+ return run_ctx->entry_ip;
+}
+
static int
kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
- struct pt_regs *regs)
+ unsigned long entry_ip, struct pt_regs *regs)
{
+ struct bpf_kprobe_multi_run_ctx run_ctx = {
+ .link = link,
+ .entry_ip = entry_ip,
+ };
struct bpf_run_ctx *old_run_ctx;
int err;
@@ -2315,7 +2331,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
migrate_disable();
rcu_read_lock();
- old_run_ctx = bpf_set_run_ctx(&link->run_ctx);
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
@@ -2330,24 +2346,10 @@ static void
kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip,
struct pt_regs *regs)
{
- unsigned long saved_ip = instruction_pointer(regs);
struct bpf_kprobe_multi_link *link;
- /*
- * Because fprobe's regs->ip is set to the next instruction of
- * dynamic-ftrace instruction, correct entry ip must be set, so
- * that the bpf program can access entry address via regs as same
- * as kprobes.
- *
- * Both kprobe and kretprobe see the entry ip of traced function
- * as instruction pointer.
- */
- instruction_pointer_set(regs, entry_ip);
-
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
- kprobe_multi_link_prog_run(link, regs);
-
- instruction_pointer_set(regs, saved_ip);
+ kprobe_multi_link_prog_run(link, entry_ip, regs);
}
static int
@@ -2514,7 +2516,11 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
{
return -EOPNOTSUPP;
}
-static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx, u64 ip)
+static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
+{
+ return 0;
+}
+static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
{
return 0;
}
On Fri, Mar 18, 2022 at 10:50:46PM -0700, Andrii Nakryiko wrote:
> On Wed, Mar 16, 2022 at 5:24 AM Jiri Olsa <[email protected]> wrote:
> >
> > hi,
> > this patchset adds new link type BPF_TRACE_KPROBE_MULTI that attaches
> > kprobe program through fprobe API [1] instroduced by Masami.
> >
> > The fprobe API allows to attach probe on multiple functions at once very
> > fast, because it works on top of ftrace. On the other hand this limits
> > the probe point to the function entry or return.
> >
> >
> > With bpftrace support I see following attach speed:
> >
> > # perf stat --null -r 5 ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); } '
> > Attaching 2 probes...
> > Attaching 3342 functions
> > ...
> >
> > 1.4960 +- 0.0285 seconds time elapsed ( +- 1.91% )
> >
> > v3 changes:
> > - based on latest fprobe post from Masami [2]
> > - add acks
> > - add extra comment to kprobe_multi_link_handler wrt entry ip setup [Masami]
> > - keep swap_words_64 static and swap values directly in
> > bpf_kprobe_multi_cookie_swap [Andrii]
> > - rearrange locking/migrate setup in kprobe_multi_link_prog_run [Andrii]
> > - move uapi fields [Andrii]
> > - add bpf_program__attach_kprobe_multi_opts function [Andrii]
> > - many small test changes [Andrii]
> > - added tests for bpf_program__attach_kprobe_multi_opts
> > - make kallsyms_lookup_name check for empty string [Andrii]
> >
> > v2 changes:
> > - based on latest fprobe changes [1]
> > - renaming the uapi interface to kprobe multi
> > - adding support for sort_r to pass user pointer for swap functions
> > and using that in cookie support to keep just single functions array
> > - moving new link to kernel/trace/bpf_trace.c file
> > - using single fprobe callback function for entry and exit
> > - using kvzalloc, libbpf_ensure_mem functions
> > - adding new k[ret]probe.multi sections instead of using current kprobe
> > - used glob_match from test_progs.c, added '?' matching
> > - move bpf_get_func_ip verifier inline change to seprate change
> > - couple of other minor fixes
> >
> >
> > Also available at:
> > https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
> > bpf/kprobe_multi
> >
> > thanks,
> > jirka
> >
> >
> > [1] https://lore.kernel.org/bpf/164458044634.586276.3261555265565111183.stgit@devnote2/
> > [2] https://lore.kernel.org/bpf/164735281449.1084943.12438881786173547153.stgit@devnote2/
> > ---
> > Jiri Olsa (13):
> > lib/sort: Add priv pointer to swap function
> > kallsyms: Skip the name search for empty string
> > bpf: Add multi kprobe link
> > bpf: Add bpf_get_func_ip kprobe helper for multi kprobe link
> > bpf: Add support to inline bpf_get_func_ip helper on x86
> > bpf: Add cookie support to programs attached with kprobe multi link
> > libbpf: Add libbpf_kallsyms_parse function
> > libbpf: Add bpf_link_create support for multi kprobes
> > libbpf: Add bpf_program__attach_kprobe_multi_opts function
> > selftests/bpf: Add kprobe_multi attach test
> > selftests/bpf: Add kprobe_multi bpf_cookie test
> > selftests/bpf: Add attach test for bpf_program__attach_kprobe_multi_opts
> > selftests/bpf: Add cookie test for bpf_program__attach_kprobe_multi_opts
> >
>
> Ok, so I've integrated multi-attach kprobes into retsnoop. It was
> pretty straightforward. Here are some numbers for the speed of
> attaching and, even more importantly, detaching for a set of almost
> 400 functions. It's a bit less functions for fentry-based mode due to
> more limited BTF information for static functions. Note that retsnoop
> attaches two BPF programs for each kernel function, so it's actually
> two multi-attach kprobes, each attaching to 397 functions. For
> single-attach kprobe, we perform 397 * 2 = 794 attachments.
>
> I've been invoking retsnoop with the following specified set of
> functions: -e '*sys_bpf*' -a ':kernel/bpf/syscall.c' -a
> ':kernel/bpf/verifier.c' -a ':kernel/bpf/btf.c' -a
> ':kernel/bpf/core.c'. So basically bpf syscall entry functions and all
> the discoverable functions from syscall.c, verifier.c, core.c and
> btf.c from kernel/bpf subdirectory.
>
> Results:
>
> fentry attach/detach (263 kfuncs): 2133 ms / 31671 ms (33 seconds)
> kprobe attach/detach (397 kfuncs): 3121 ms / 13195 ms (16 seconds)
> multi-kprobe attach/detach (397 kfuncs): 9 ms / 248 ms (0.25 seconds)
>
> So as you can see, the speed up is tremendous! API is also very
> convenient, I didn't have to modify retsnoop internals much to
> accommodate multi-attach API. Great job!
nice! thanks for doing that so quickly
>
> Now for the bad news. :(
>
> Stack traces from multi-attach kretprobe are broken, which makes all
> this way less useful.
>
> Below, see stack traces captured with multi- and single- kretprobes
> for two different use cases. Single kprobe stack traces make much more
> sense. Ignore that last function doesn't have actual address
> associated with it (i.e. for __sys_bpf and bpf_tracing_prog_attach,
> respectively). That's just how retsnoop is doing things, I think. We
> actually were capturing stack traces from inside __sys_bpf (first
> case) and bpf_tracing_prog_attach (second case).
>
> MULTI KPROBE:
> ffffffff81185a80 __sys_bpf+0x0
> (kernel/bpf/syscall.c:4622:1)
>
> SINGLE KPROBE:
> ffffffff81e0007c entry_SYSCALL_64_after_hwframe+0x44
> (arch/x86/entry/entry_64.S:113:0)
> ffffffff81cd2b15 do_syscall_64+0x35
> (arch/x86/entry/common.c:80:7)
> . do_syscall_x64
> (arch/x86/entry/common.c:50:12)
> ffffffff811881aa __x64_sys_bpf+0x1a
> (kernel/bpf/syscall.c:4765:1)
> __sys_bpf
>
>
> MULTI KPROBE:
> ffffffff811851b0 bpf_tracing_prog_attach+0x0
> (kernel/bpf/syscall.c:2708:1)
>
> SINGLE KPROBE:
> ffffffff81e0007c entry_SYSCALL_64_after_hwframe+0x44
> (arch/x86/entry/entry_64.S:113:0)
> ffffffff81cd2b15 do_syscall_64+0x35
> (arch/x86/entry/common.c:80:7)
> . do_syscall_x64
> (arch/x86/entry/common.c:50:12)
> ffffffff811881aa __x64_sys_bpf+0x1a
> (kernel/bpf/syscall.c:4765:1)
> ffffffff81185e79 __sys_bpf+0x3f9
> (kernel/bpf/syscall.c:4705:9)
> ffffffff8118583a bpf_raw_tracepoint_open+0x19a
> (kernel/bpf/syscall.c:3069:6)
> bpf_tracing_prog_attach
>
> You can see that in multi-attach kprobe we only get one entry, which
> is the very last function in the stack trace. We have no parent
> function captured whatsoever. Jiri, Masami, any ideas what's wrong and
> how to fix this? Let's try to figure this out and fix it before the
> feature makes it into the kernel release. Thanks in advance!
oops, I should have tried kstack with the bpftrace's kretprobe, I see the same:
# ./src/bpftrace -e 'kretprobe:x* { @[kstack] = count(); }'
Attaching 1 probe...
Attaching 3340probes
^C
@[
xfs_trans_apply_dquot_deltas+0
]: 22
@[
xlog_cil_commit+0
]: 22
@[
xlog_grant_push_threshold+0
]: 22
@[
xfs_trans_add_item+0
]: 22
@[
xfs_log_reserve+0
]: 22
@[
xlog_space_left+0
]: 22
@[
xfs_buf_offset+0
]: 22
@[
xfs_trans_free_dqinfo+0
]: 22
@[
xlog_ticket_alloc+0
xfs_log_reserve+5
]: 22
@[
xfs_cil_prepare_item+0
I think it's because we set original ip for return probe to have
bpf_get_func_ip working properly, but it breaks backtrace of course
I'm not sure we could bring along the original regs for return probe,
but I have an idea how to workaround the bpf_get_func_ip issue and
keep the registers intact for other helpers
jirka
On Sat, Mar 19, 2022 at 11:26 AM Andrii Nakryiko
<[email protected]> wrote:
>
> On Sat, Mar 19, 2022 at 7:31 AM Jiri Olsa <[email protected]> wrote:
> >
> > On Sat, Mar 19, 2022 at 01:27:56PM +0100, Jiri Olsa wrote:
> > > On Fri, Mar 18, 2022 at 10:50:46PM -0700, Andrii Nakryiko wrote:
> > > > On Wed, Mar 16, 2022 at 5:24 AM Jiri Olsa <[email protected]> wrote:
> > > > >
> > > > > hi,
> > > > > this patchset adds new link type BPF_TRACE_KPROBE_MULTI that attaches
> > > > > kprobe program through fprobe API [1] instroduced by Masami.
> > > > >
> > > > > The fprobe API allows to attach probe on multiple functions at once very
> > > > > fast, because it works on top of ftrace. On the other hand this limits
> > > > > the probe point to the function entry or return.
> > > > >
> > > > >
> > > > > With bpftrace support I see following attach speed:
> > > > >
> > > > > # perf stat --null -r 5 ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); } '
> > > > > Attaching 2 probes...
> > > > > Attaching 3342 functions
> > > > > ...
> > > > >
> > > > > 1.4960 +- 0.0285 seconds time elapsed ( +- 1.91% )
> > > > >
> > > > > v3 changes:
> > > > > - based on latest fprobe post from Masami [2]
> > > > > - add acks
> > > > > - add extra comment to kprobe_multi_link_handler wrt entry ip setup [Masami]
> > > > > - keep swap_words_64 static and swap values directly in
> > > > > bpf_kprobe_multi_cookie_swap [Andrii]
> > > > > - rearrange locking/migrate setup in kprobe_multi_link_prog_run [Andrii]
> > > > > - move uapi fields [Andrii]
> > > > > - add bpf_program__attach_kprobe_multi_opts function [Andrii]
> > > > > - many small test changes [Andrii]
> > > > > - added tests for bpf_program__attach_kprobe_multi_opts
> > > > > - make kallsyms_lookup_name check for empty string [Andrii]
> > > > >
> > > > > v2 changes:
> > > > > - based on latest fprobe changes [1]
> > > > > - renaming the uapi interface to kprobe multi
> > > > > - adding support for sort_r to pass user pointer for swap functions
> > > > > and using that in cookie support to keep just single functions array
> > > > > - moving new link to kernel/trace/bpf_trace.c file
> > > > > - using single fprobe callback function for entry and exit
> > > > > - using kvzalloc, libbpf_ensure_mem functions
> > > > > - adding new k[ret]probe.multi sections instead of using current kprobe
> > > > > - used glob_match from test_progs.c, added '?' matching
> > > > > - move bpf_get_func_ip verifier inline change to seprate change
> > > > > - couple of other minor fixes
> > > > >
> > > > >
> > > > > Also available at:
> > > > > https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
> > > > > bpf/kprobe_multi
> > > > >
> > > > > thanks,
> > > > > jirka
> > > > >
> > > > >
> > > > > [1] https://lore.kernel.org/bpf/164458044634.586276.3261555265565111183.stgit@devnote2/
> > > > > [2] https://lore.kernel.org/bpf/164735281449.1084943.12438881786173547153.stgit@devnote2/
> > > > > ---
> > > > > Jiri Olsa (13):
> > > > > lib/sort: Add priv pointer to swap function
> > > > > kallsyms: Skip the name search for empty string
> > > > > bpf: Add multi kprobe link
> > > > > bpf: Add bpf_get_func_ip kprobe helper for multi kprobe link
> > > > > bpf: Add support to inline bpf_get_func_ip helper on x86
> > > > > bpf: Add cookie support to programs attached with kprobe multi link
> > > > > libbpf: Add libbpf_kallsyms_parse function
> > > > > libbpf: Add bpf_link_create support for multi kprobes
> > > > > libbpf: Add bpf_program__attach_kprobe_multi_opts function
> > > > > selftests/bpf: Add kprobe_multi attach test
> > > > > selftests/bpf: Add kprobe_multi bpf_cookie test
> > > > > selftests/bpf: Add attach test for bpf_program__attach_kprobe_multi_opts
> > > > > selftests/bpf: Add cookie test for bpf_program__attach_kprobe_multi_opts
> > > > >
> > > >
> > > > Ok, so I've integrated multi-attach kprobes into retsnoop. It was
> > > > pretty straightforward. Here are some numbers for the speed of
> > > > attaching and, even more importantly, detaching for a set of almost
> > > > 400 functions. It's a bit less functions for fentry-based mode due to
> > > > more limited BTF information for static functions. Note that retsnoop
> > > > attaches two BPF programs for each kernel function, so it's actually
> > > > two multi-attach kprobes, each attaching to 397 functions. For
> > > > single-attach kprobe, we perform 397 * 2 = 794 attachments.
> > > >
> > > > I've been invoking retsnoop with the following specified set of
> > > > functions: -e '*sys_bpf*' -a ':kernel/bpf/syscall.c' -a
> > > > ':kernel/bpf/verifier.c' -a ':kernel/bpf/btf.c' -a
> > > > ':kernel/bpf/core.c'. So basically bpf syscall entry functions and all
> > > > the discoverable functions from syscall.c, verifier.c, core.c and
> > > > btf.c from kernel/bpf subdirectory.
> > > >
> > > > Results:
> > > >
> > > > fentry attach/detach (263 kfuncs): 2133 ms / 31671 ms (33 seconds)
> > > > kprobe attach/detach (397 kfuncs): 3121 ms / 13195 ms (16 seconds)
> > > > multi-kprobe attach/detach (397 kfuncs): 9 ms / 248 ms (0.25 seconds)
> > > >
> > > > So as you can see, the speed up is tremendous! API is also very
> > > > convenient, I didn't have to modify retsnoop internals much to
> > > > accommodate multi-attach API. Great job!
> > >
> > > nice! thanks for doing that so quickly
> > >
> > > >
> > > > Now for the bad news. :(
> > > >
> > > > Stack traces from multi-attach kretprobe are broken, which makes all
> > > > this way less useful.
> > > >
> > > > Below, see stack traces captured with multi- and single- kretprobes
> > > > for two different use cases. Single kprobe stack traces make much more
> > > > sense. Ignore that last function doesn't have actual address
> > > > associated with it (i.e. for __sys_bpf and bpf_tracing_prog_attach,
> > > > respectively). That's just how retsnoop is doing things, I think. We
> > > > actually were capturing stack traces from inside __sys_bpf (first
> > > > case) and bpf_tracing_prog_attach (second case).
> > > >
> > > > MULTI KPROBE:
> > > > ffffffff81185a80 __sys_bpf+0x0
> > > > (kernel/bpf/syscall.c:4622:1)
> > > >
> > > > SINGLE KPROBE:
> > > > ffffffff81e0007c entry_SYSCALL_64_after_hwframe+0x44
> > > > (arch/x86/entry/entry_64.S:113:0)
> > > > ffffffff81cd2b15 do_syscall_64+0x35
> > > > (arch/x86/entry/common.c:80:7)
> > > > . do_syscall_x64
> > > > (arch/x86/entry/common.c:50:12)
> > > > ffffffff811881aa __x64_sys_bpf+0x1a
> > > > (kernel/bpf/syscall.c:4765:1)
> > > > __sys_bpf
> > > >
> > > >
> > > > MULTI KPROBE:
> > > > ffffffff811851b0 bpf_tracing_prog_attach+0x0
> > > > (kernel/bpf/syscall.c:2708:1)
> > > >
> > > > SINGLE KPROBE:
> > > > ffffffff81e0007c entry_SYSCALL_64_after_hwframe+0x44
> > > > (arch/x86/entry/entry_64.S:113:0)
> > > > ffffffff81cd2b15 do_syscall_64+0x35
> > > > (arch/x86/entry/common.c:80:7)
> > > > . do_syscall_x64
> > > > (arch/x86/entry/common.c:50:12)
> > > > ffffffff811881aa __x64_sys_bpf+0x1a
> > > > (kernel/bpf/syscall.c:4765:1)
> > > > ffffffff81185e79 __sys_bpf+0x3f9
> > > > (kernel/bpf/syscall.c:4705:9)
> > > > ffffffff8118583a bpf_raw_tracepoint_open+0x19a
> > > > (kernel/bpf/syscall.c:3069:6)
> > > > bpf_tracing_prog_attach
> > > >
> > > > You can see that in multi-attach kprobe we only get one entry, which
> > > > is the very last function in the stack trace. We have no parent
> > > > function captured whatsoever. Jiri, Masami, any ideas what's wrong and
> > > > how to fix this? Let's try to figure this out and fix it before the
> > > > feature makes it into the kernel release. Thanks in advance!
> > >
> > > oops, I should have tried kstack with the bpftrace's kretprobe, I see the same:
> > >
> > > # ./src/bpftrace -e 'kretprobe:x* { @[kstack] = count(); }'
> > > Attaching 1 probe...
> > > Attaching 3340probes
> > > ^C
> > >
> > > @[
> > > xfs_trans_apply_dquot_deltas+0
> > > ]: 22
> > > @[
> > > xlog_cil_commit+0
> > > ]: 22
> > > @[
> > > xlog_grant_push_threshold+0
> > > ]: 22
> > > @[
> > > xfs_trans_add_item+0
> > > ]: 22
> > > @[
> > > xfs_log_reserve+0
> > > ]: 22
> > > @[
> > > xlog_space_left+0
> > > ]: 22
> > > @[
> > > xfs_buf_offset+0
> > > ]: 22
> > > @[
> > > xfs_trans_free_dqinfo+0
> > > ]: 22
> > > @[
> > > xlog_ticket_alloc+0
> > > xfs_log_reserve+5
> > > ]: 22
> > > @[
> > > xfs_cil_prepare_item+0
> > >
> > >
> > > I think it's because we set original ip for return probe to have
> > > bpf_get_func_ip working properly, but it breaks backtrace of course
> > >
> > > I'm not sure we could bring along the original regs for return probe,
> > > but I have an idea how to workaround the bpf_get_func_ip issue and
> > > keep the registers intact for other helpers
> >
> > change below is using bpf_run_ctx to store link and entry ip on stack,
> > where helpers can find it.. it fixed the retprobe backtrace for me
> >
> > I had to revert the get_func_ip inline.. it's squashed in the change
> > below for quick testing.. I'll send revert in separate patch with the
> > formal change
> >
> > could you please test?
> >
>
> Yep, tried locally and now stack traces work as expected. Thanks!
BTW, there is a separate and unrelated problem, forgot to mention it yesterday.
It is possible to attach to some functions through using .syms
(resolve by name), but it is rejected when specifying .addrs (which
retsnoop derives directly from kallsyms). One such example is
"__bpf_tramp_exit". What's interesting, this function is marked
notrace, but is still attachable by name (including using
single-attach kprobe), while I'd assume it shouldn't.
It is all kind of broken, tbh, this notrace function is present in
/sys/kernel/debug/tracing/available_filter_functions, it's not in
/sys/kernel/debug/kprobes/blacklist, so the whole notrace and
blacklisting doesn't seem to work well.
So anyways, worth looking into this as well and figuring out why this
is inconsistent between .syms and .addrs approaches. Please take a
look when you get a chance. And I'll try to figure out if there is any
way to determine notrace functions from outside the kernel.
> Please resubmit as a proper patch and add my ack:
>
> Acked-by: Andrii Nakryiko <[email protected]>
>
> > thanks,
> > jirka
> >
> >
> > ---
>
> [...]
>
> > -static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx, u64 ip)
> > +static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
> > {
> > + struct bpf_kprobe_multi_run_ctx *run_ctx;
> > struct bpf_kprobe_multi_link *link;
> > + u64 *cookie, entry_ip;
> > unsigned long *addr;
> > - u64 *cookie;
> >
> > if (WARN_ON_ONCE(!ctx))
> > return 0;
> > - link = container_of(ctx, struct bpf_kprobe_multi_link, run_ctx);
> > + run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
> > + link = run_ctx->link;
> > + entry_ip = run_ctx->entry_ip;
>
> nit: this can be assigned after we checked that we have link->cookies
>
> > if (!link->cookies)
> > return 0;
> > - addr = bsearch(&ip, link->addrs, link->cnt, sizeof(ip),
> > + addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip),
> > __bpf_kprobe_multi_cookie_cmp);
> > if (!addr)
> > return 0;
>
> [...]
On Sat, Mar 19, 2022 at 7:31 AM Jiri Olsa <[email protected]> wrote:
>
> On Sat, Mar 19, 2022 at 01:27:56PM +0100, Jiri Olsa wrote:
> > On Fri, Mar 18, 2022 at 10:50:46PM -0700, Andrii Nakryiko wrote:
> > > On Wed, Mar 16, 2022 at 5:24 AM Jiri Olsa <[email protected]> wrote:
> > > >
> > > > hi,
> > > > this patchset adds new link type BPF_TRACE_KPROBE_MULTI that attaches
> > > > kprobe program through fprobe API [1] instroduced by Masami.
> > > >
> > > > The fprobe API allows to attach probe on multiple functions at once very
> > > > fast, because it works on top of ftrace. On the other hand this limits
> > > > the probe point to the function entry or return.
> > > >
> > > >
> > > > With bpftrace support I see following attach speed:
> > > >
> > > > # perf stat --null -r 5 ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); } '
> > > > Attaching 2 probes...
> > > > Attaching 3342 functions
> > > > ...
> > > >
> > > > 1.4960 +- 0.0285 seconds time elapsed ( +- 1.91% )
> > > >
> > > > v3 changes:
> > > > - based on latest fprobe post from Masami [2]
> > > > - add acks
> > > > - add extra comment to kprobe_multi_link_handler wrt entry ip setup [Masami]
> > > > - keep swap_words_64 static and swap values directly in
> > > > bpf_kprobe_multi_cookie_swap [Andrii]
> > > > - rearrange locking/migrate setup in kprobe_multi_link_prog_run [Andrii]
> > > > - move uapi fields [Andrii]
> > > > - add bpf_program__attach_kprobe_multi_opts function [Andrii]
> > > > - many small test changes [Andrii]
> > > > - added tests for bpf_program__attach_kprobe_multi_opts
> > > > - make kallsyms_lookup_name check for empty string [Andrii]
> > > >
> > > > v2 changes:
> > > > - based on latest fprobe changes [1]
> > > > - renaming the uapi interface to kprobe multi
> > > > - adding support for sort_r to pass user pointer for swap functions
> > > > and using that in cookie support to keep just single functions array
> > > > - moving new link to kernel/trace/bpf_trace.c file
> > > > - using single fprobe callback function for entry and exit
> > > > - using kvzalloc, libbpf_ensure_mem functions
> > > > - adding new k[ret]probe.multi sections instead of using current kprobe
> > > > - used glob_match from test_progs.c, added '?' matching
> > > > - move bpf_get_func_ip verifier inline change to seprate change
> > > > - couple of other minor fixes
> > > >
> > > >
> > > > Also available at:
> > > > https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
> > > > bpf/kprobe_multi
> > > >
> > > > thanks,
> > > > jirka
> > > >
> > > >
> > > > [1] https://lore.kernel.org/bpf/164458044634.586276.3261555265565111183.stgit@devnote2/
> > > > [2] https://lore.kernel.org/bpf/164735281449.1084943.12438881786173547153.stgit@devnote2/
> > > > ---
> > > > Jiri Olsa (13):
> > > > lib/sort: Add priv pointer to swap function
> > > > kallsyms: Skip the name search for empty string
> > > > bpf: Add multi kprobe link
> > > > bpf: Add bpf_get_func_ip kprobe helper for multi kprobe link
> > > > bpf: Add support to inline bpf_get_func_ip helper on x86
> > > > bpf: Add cookie support to programs attached with kprobe multi link
> > > > libbpf: Add libbpf_kallsyms_parse function
> > > > libbpf: Add bpf_link_create support for multi kprobes
> > > > libbpf: Add bpf_program__attach_kprobe_multi_opts function
> > > > selftests/bpf: Add kprobe_multi attach test
> > > > selftests/bpf: Add kprobe_multi bpf_cookie test
> > > > selftests/bpf: Add attach test for bpf_program__attach_kprobe_multi_opts
> > > > selftests/bpf: Add cookie test for bpf_program__attach_kprobe_multi_opts
> > > >
> > >
> > > Ok, so I've integrated multi-attach kprobes into retsnoop. It was
> > > pretty straightforward. Here are some numbers for the speed of
> > > attaching and, even more importantly, detaching for a set of almost
> > > 400 functions. It's a bit less functions for fentry-based mode due to
> > > more limited BTF information for static functions. Note that retsnoop
> > > attaches two BPF programs for each kernel function, so it's actually
> > > two multi-attach kprobes, each attaching to 397 functions. For
> > > single-attach kprobe, we perform 397 * 2 = 794 attachments.
> > >
> > > I've been invoking retsnoop with the following specified set of
> > > functions: -e '*sys_bpf*' -a ':kernel/bpf/syscall.c' -a
> > > ':kernel/bpf/verifier.c' -a ':kernel/bpf/btf.c' -a
> > > ':kernel/bpf/core.c'. So basically bpf syscall entry functions and all
> > > the discoverable functions from syscall.c, verifier.c, core.c and
> > > btf.c from kernel/bpf subdirectory.
> > >
> > > Results:
> > >
> > > fentry attach/detach (263 kfuncs): 2133 ms / 31671 ms (33 seconds)
> > > kprobe attach/detach (397 kfuncs): 3121 ms / 13195 ms (16 seconds)
> > > multi-kprobe attach/detach (397 kfuncs): 9 ms / 248 ms (0.25 seconds)
> > >
> > > So as you can see, the speed up is tremendous! API is also very
> > > convenient, I didn't have to modify retsnoop internals much to
> > > accommodate multi-attach API. Great job!
> >
> > nice! thanks for doing that so quickly
> >
> > >
> > > Now for the bad news. :(
> > >
> > > Stack traces from multi-attach kretprobe are broken, which makes all
> > > this way less useful.
> > >
> > > Below, see stack traces captured with multi- and single- kretprobes
> > > for two different use cases. Single kprobe stack traces make much more
> > > sense. Ignore that last function doesn't have actual address
> > > associated with it (i.e. for __sys_bpf and bpf_tracing_prog_attach,
> > > respectively). That's just how retsnoop is doing things, I think. We
> > > actually were capturing stack traces from inside __sys_bpf (first
> > > case) and bpf_tracing_prog_attach (second case).
> > >
> > > MULTI KPROBE:
> > > ffffffff81185a80 __sys_bpf+0x0
> > > (kernel/bpf/syscall.c:4622:1)
> > >
> > > SINGLE KPROBE:
> > > ffffffff81e0007c entry_SYSCALL_64_after_hwframe+0x44
> > > (arch/x86/entry/entry_64.S:113:0)
> > > ffffffff81cd2b15 do_syscall_64+0x35
> > > (arch/x86/entry/common.c:80:7)
> > > . do_syscall_x64
> > > (arch/x86/entry/common.c:50:12)
> > > ffffffff811881aa __x64_sys_bpf+0x1a
> > > (kernel/bpf/syscall.c:4765:1)
> > > __sys_bpf
> > >
> > >
> > > MULTI KPROBE:
> > > ffffffff811851b0 bpf_tracing_prog_attach+0x0
> > > (kernel/bpf/syscall.c:2708:1)
> > >
> > > SINGLE KPROBE:
> > > ffffffff81e0007c entry_SYSCALL_64_after_hwframe+0x44
> > > (arch/x86/entry/entry_64.S:113:0)
> > > ffffffff81cd2b15 do_syscall_64+0x35
> > > (arch/x86/entry/common.c:80:7)
> > > . do_syscall_x64
> > > (arch/x86/entry/common.c:50:12)
> > > ffffffff811881aa __x64_sys_bpf+0x1a
> > > (kernel/bpf/syscall.c:4765:1)
> > > ffffffff81185e79 __sys_bpf+0x3f9
> > > (kernel/bpf/syscall.c:4705:9)
> > > ffffffff8118583a bpf_raw_tracepoint_open+0x19a
> > > (kernel/bpf/syscall.c:3069:6)
> > > bpf_tracing_prog_attach
> > >
> > > You can see that in multi-attach kprobe we only get one entry, which
> > > is the very last function in the stack trace. We have no parent
> > > function captured whatsoever. Jiri, Masami, any ideas what's wrong and
> > > how to fix this? Let's try to figure this out and fix it before the
> > > feature makes it into the kernel release. Thanks in advance!
> >
> > oops, I should have tried kstack with the bpftrace's kretprobe, I see the same:
> >
> > # ./src/bpftrace -e 'kretprobe:x* { @[kstack] = count(); }'
> > Attaching 1 probe...
> > Attaching 3340probes
> > ^C
> >
> > @[
> > xfs_trans_apply_dquot_deltas+0
> > ]: 22
> > @[
> > xlog_cil_commit+0
> > ]: 22
> > @[
> > xlog_grant_push_threshold+0
> > ]: 22
> > @[
> > xfs_trans_add_item+0
> > ]: 22
> > @[
> > xfs_log_reserve+0
> > ]: 22
> > @[
> > xlog_space_left+0
> > ]: 22
> > @[
> > xfs_buf_offset+0
> > ]: 22
> > @[
> > xfs_trans_free_dqinfo+0
> > ]: 22
> > @[
> > xlog_ticket_alloc+0
> > xfs_log_reserve+5
> > ]: 22
> > @[
> > xfs_cil_prepare_item+0
> >
> >
> > I think it's because we set original ip for return probe to have
> > bpf_get_func_ip working properly, but it breaks backtrace of course
> >
> > I'm not sure we could bring along the original regs for return probe,
> > but I have an idea how to workaround the bpf_get_func_ip issue and
> > keep the registers intact for other helpers
>
> change below is using bpf_run_ctx to store link and entry ip on stack,
> where helpers can find it.. it fixed the retprobe backtrace for me
>
> I had to revert the get_func_ip inline.. it's squashed in the change
> below for quick testing.. I'll send revert in separate patch with the
> formal change
>
> could you please test?
>
Yep, tried locally and now stack traces work as expected. Thanks!
Please resubmit as a proper patch and add my ack:
Acked-by: Andrii Nakryiko <[email protected]>
> thanks,
> jirka
>
>
> ---
[...]
> -static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx, u64 ip)
> +static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
> {
> + struct bpf_kprobe_multi_run_ctx *run_ctx;
> struct bpf_kprobe_multi_link *link;
> + u64 *cookie, entry_ip;
> unsigned long *addr;
> - u64 *cookie;
>
> if (WARN_ON_ONCE(!ctx))
> return 0;
> - link = container_of(ctx, struct bpf_kprobe_multi_link, run_ctx);
> + run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
> + link = run_ctx->link;
> + entry_ip = run_ctx->entry_ip;
nit: this can be assigned after we checked that we have link->cookies
> if (!link->cookies)
> return 0;
> - addr = bsearch(&ip, link->addrs, link->cnt, sizeof(ip),
> + addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip),
> __bpf_kprobe_multi_cookie_cmp);
> if (!addr)
> return 0;
[...]