2022-03-10 09:54:32

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0

Let's say that the caller has storage for num_elem stack frames. Then,
the BPF stack helper functions walk the stack for only num_elem frames.
This means that if skip > 0, one keeps only 'num_elem - skip' frames.

This is because it sets init_nr in the perf_callchain_entry to the end
of the buffer to save num_elem entries only. I believe it was because
the perf callchain code unwound the stack frames until it reached the
global max size (sysctl_perf_event_max_stack).

However it now has perf_callchain_entry_ctx.max_stack to limit the
iteration locally. This simplifies the code to handle init_nr in the
BPF callstack entries and removes the confusion with the perf_event's
__PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0.

Also change the comment on bpf_get_stack() in the header file to be
more explicit what the return value means.

Based-on-patch-by: Eugene Loh <[email protected]>
Signed-off-by: Namhyung Kim <[email protected]>
---
include/uapi/linux/bpf.h | 4 +--
kernel/bpf/stackmap.c | 56 +++++++++++++++++-----------------------
2 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b0383d371b9a..77f4a022c60c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2975,8 +2975,8 @@ union bpf_attr {
*
* # sysctl kernel.perf_event_max_stack=<new value>
* Return
- * A non-negative value equal to or less than *size* on success,
- * or a negative error in case of failure.
+ * The non-negative copied *buf* length equal to or less than
+ * *size* on success, or a negative error in case of failure.
*
* long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
* Description
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 22c8ae94e4c1..2823dcefae10 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -166,7 +166,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
}

static struct perf_callchain_entry *
-get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
+get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
{
#ifdef CONFIG_STACKTRACE
struct perf_callchain_entry *entry;
@@ -177,9 +177,8 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
if (!entry)
return NULL;

- entry->nr = init_nr +
- stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
- sysctl_perf_event_max_stack - init_nr, 0);
+ entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip,
+ max_depth, 0);

/* stack_trace_save_tsk() works on unsigned long array, while
* perf_callchain_entry uses u64 array. For 32-bit systems, it is
@@ -191,7 +190,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
int i;

/* copy data from the end to avoid using extra buffer */
- for (i = entry->nr - 1; i >= (int)init_nr; i--)
+ for (i = entry->nr - 1; i >= 0; i--)
to[i] = (u64)(from[i]);
}

@@ -208,27 +207,19 @@ static long __bpf_get_stackid(struct bpf_map *map,
{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
- u32 max_depth = map->value_size / stack_map_data_size(map);
- /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
- u32 init_nr = sysctl_perf_event_max_stack - max_depth;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 hash, id, trace_nr, trace_len;
bool user = flags & BPF_F_USER_STACK;
u64 *ips;
bool hash_matches;

- /* get_perf_callchain() guarantees that trace->nr >= init_nr
- * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
- */
- trace_nr = trace->nr - init_nr;
-
- if (trace_nr <= skip)
+ if (trace->nr <= skip)
/* skipping more than usable stack trace */
return -EFAULT;

- trace_nr -= skip;
+ trace_nr = trace->nr - skip;
trace_len = trace_nr * sizeof(u64);
- ips = trace->ip + skip + init_nr;
+ ips = trace->ip + skip;
hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
id = hash & (smap->n_buckets - 1);
bucket = READ_ONCE(smap->buckets[id]);
@@ -285,8 +276,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags)
{
u32 max_depth = map->value_size / stack_map_data_size(map);
- /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
- u32 init_nr = sysctl_perf_event_max_stack - max_depth;
+ u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
bool user = flags & BPF_F_USER_STACK;
struct perf_callchain_entry *trace;
bool kernel = !user;
@@ -295,8 +285,12 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
return -EINVAL;

- trace = get_perf_callchain(regs, init_nr, kernel, user,
- sysctl_perf_event_max_stack, false, false);
+ max_depth += skip;
+ if (max_depth > sysctl_perf_event_max_stack)
+ max_depth = sysctl_perf_event_max_stack;
+
+ trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
+ false, false);

if (unlikely(!trace))
/* couldn't fetch the stack trace */
@@ -387,7 +381,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
struct perf_callchain_entry *trace_in,
void *buf, u32 size, u64 flags)
{
- u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
+ u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
bool user = flags & BPF_F_USER_STACK;
@@ -412,30 +406,28 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
goto err_fault;

num_elem = size / elem_size;
- if (sysctl_perf_event_max_stack < num_elem)
- init_nr = 0;
- else
- init_nr = sysctl_perf_event_max_stack - num_elem;
+ max_depth = num_elem + skip;
+ if (sysctl_perf_event_max_stack < max_depth)
+ max_depth = sysctl_perf_event_max_stack;

if (trace_in)
trace = trace_in;
else if (kernel && task)
- trace = get_callchain_entry_for_task(task, init_nr);
+ trace = get_callchain_entry_for_task(task, max_depth);
else
- trace = get_perf_callchain(regs, init_nr, kernel, user,
- sysctl_perf_event_max_stack,
+ trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
false, false);
if (unlikely(!trace))
goto err_fault;

- trace_nr = trace->nr - init_nr;
- if (trace_nr < skip)
+ if (trace->nr < skip)
goto err_fault;

- trace_nr -= skip;
+ trace_nr = trace->nr - skip;
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
copy_len = trace_nr * elem_size;
- ips = trace->ip + skip + init_nr;
+
+ ips = trace->ip + skip;
if (user && user_build_id)
stack_map_get_build_id_offset(buf, ips, trace_nr, user);
else
--
2.35.1.723.g4982287a31-goog


2022-03-10 14:33:48

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 2/2] bpf/selftests: Test skipping stacktrace

Add a test case for stacktrace with skip > 0 using a small sized
buffer. It didn't support skipping entries greater than or equal to
the size of buffer and filled the skipped part with 0.

Signed-off-by: Namhyung Kim <[email protected]>
---
.../bpf/prog_tests/stacktrace_map_skip.c | 72 ++++++++++++++++
.../selftests/bpf/progs/stacktrace_map_skip.c | 82 +++++++++++++++++++
2 files changed, 154 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_map_skip.c

diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
new file mode 100644
index 000000000000..bcb244aa3c78
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_map_skip.skel.h"
+
+#define TEST_STACK_DEPTH 2
+
+void test_stacktrace_map_skip(void)
+{
+ struct stacktrace_map_skip *skel;
+ int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+ int err, stack_trace_len;
+ __u32 key, val, duration = 0;
+
+ skel = stacktrace_map_skip__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open failed\n"))
+ return;
+
+ /* find map fds */
+ control_map_fd = bpf_map__fd(skel->maps.control_map);
+ if (CHECK_FAIL(control_map_fd < 0))
+ goto out;
+
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ if (CHECK_FAIL(stackid_hmap_fd < 0))
+ goto out;
+
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+ if (CHECK_FAIL(stackmap_fd < 0))
+ goto out;
+
+ stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+ if (CHECK_FAIL(stack_amap_fd < 0))
+ goto out;
+
+ err = stacktrace_map_skip__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
+ goto out;
+
+ /* give some time for bpf program run */
+ sleep(1);
+
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto out;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto out;
+
+ stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
+ err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+ if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+ "err %d errno %d\n", err, errno))
+ goto out;
+
+ if (CHECK(skel->bss->failed, "check skip",
+ "failed to skip some depth: %d", skel->bss->failed))
+ goto out;
+
+out:
+ stacktrace_map_skip__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
new file mode 100644
index 000000000000..323248b17ae4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define TEST_STACK_DEPTH 2
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} control_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 16384);
+ __type(key, __u32);
+ __type(value, __u32);
+} stackid_hmap SEC(".maps");
+
+typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 16384);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 16384);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stack_amap SEC(".maps");
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+ unsigned long long pad;
+ char prev_comm[TASK_COMM_LEN];
+ int prev_pid;
+ int prev_prio;
+ long long prev_state;
+ char next_comm[TASK_COMM_LEN];
+ int next_pid;
+ int next_prio;
+};
+
+int failed = 0;
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+ __u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
+ __u32 key = 0, val = 0, *value_p;
+ __u64 *stack_p;
+
+ value_p = bpf_map_lookup_elem(&control_map, &key);
+ if (value_p && *value_p)
+ return 0; /* skip if non-zero *value_p */
+
+ /* it should allow skipping whole buffer size entries */
+ key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
+ if ((int)key >= 0) {
+ /* The size of stackmap and stack_amap should be the same */
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+ stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+ if (stack_p) {
+ bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
+ /* it wrongly skipped all the entries and filled zero */
+ if (stack_p[0] == 0)
+ failed = 1;
+ }
+ } else if ((int)key == -14/*EFAULT*/) {
+ /* old kernel doesn't support skipping that many entries */
+ failed = 2;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
--
2.35.1.723.g4982287a31-goog

2022-03-11 07:25:51

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0



On 3/10/22 12:22 AM, Namhyung Kim wrote:
> Let's say that the caller has storage for num_elem stack frames. Then,
> the BPF stack helper functions walk the stack for only num_elem frames.
> This means that if skip > 0, one keeps only 'num_elem - skip' frames.
>
> This is because it sets init_nr in the perf_callchain_entry to the end
> of the buffer to save num_elem entries only. I believe it was because
> the perf callchain code unwound the stack frames until it reached the
> global max size (sysctl_perf_event_max_stack).
>
> However it now has perf_callchain_entry_ctx.max_stack to limit the
> iteration locally. This simplifies the code to handle init_nr in the
> BPF callstack entries and removes the confusion with the perf_event's
> __PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0.
>
> Also change the comment on bpf_get_stack() in the header file to be
> more explicit what the return value means.
>
> Based-on-patch-by: Eugene Loh <[email protected]>
> Signed-off-by: Namhyung Kim <[email protected]>

The change looks good to me. This patch actually fixed a bug
discussed below:


https://lore.kernel.org/bpf/[email protected]/

A reference to the above link in the commit message
will be useful for people to understand better with an
example.

Also, the following fixes tag should be added:

Fixes: c195651e565a ("bpf: add bpf_get_stack helper")

Since the bug needs skip > 0 which is seldomly used,
and the current returned stack is still correct although
with less entries, I guess that is why less people
complains.

Anyway, ack the patch:
Acked-by: Yonghong Song <[email protected]>


> ---
> include/uapi/linux/bpf.h | 4 +--
> kernel/bpf/stackmap.c | 56 +++++++++++++++++-----------------------
> 2 files changed, 26 insertions(+), 34 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index b0383d371b9a..77f4a022c60c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2975,8 +2975,8 @@ union bpf_attr {
> *
> * # sysctl kernel.perf_event_max_stack=<new value>
> * Return
> - * A non-negative value equal to or less than *size* on success,
> - * or a negative error in case of failure.
> + * The non-negative copied *buf* length equal to or less than
> + * *size* on success, or a negative error in case of failure.
> *
> * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
[...]

2022-03-11 08:46:36

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH 2/2] bpf/selftests: Test skipping stacktrace



On 3/10/22 12:22 AM, Namhyung Kim wrote:
> Add a test case for stacktrace with skip > 0 using a small sized
> buffer. It didn't support skipping entries greater than or equal to
> the size of buffer and filled the skipped part with 0.
>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> .../bpf/prog_tests/stacktrace_map_skip.c | 72 ++++++++++++++++
> .../selftests/bpf/progs/stacktrace_map_skip.c | 82 +++++++++++++++++++
> 2 files changed, 154 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> new file mode 100644
> index 000000000000..bcb244aa3c78
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> @@ -0,0 +1,72 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <test_progs.h>
> +#include "stacktrace_map_skip.skel.h"
> +
> +#define TEST_STACK_DEPTH 2
> +
> +void test_stacktrace_map_skip(void)
> +{
> + struct stacktrace_map_skip *skel;
> + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
> + int err, stack_trace_len;
> + __u32 key, val, duration = 0;
> +
> + skel = stacktrace_map_skip__open_and_load();
> + if (CHECK(!skel, "skel_open_and_load", "skeleton open failed\n"))
> + return;

Please use ASSERT_* macros instead of CHECK* macros.
You can see other prog_tests/*.c files for examples.

> +
> + /* find map fds */
> + control_map_fd = bpf_map__fd(skel->maps.control_map);
> + if (CHECK_FAIL(control_map_fd < 0))
> + goto out;
> +
> + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
> + if (CHECK_FAIL(stackid_hmap_fd < 0))
> + goto out;
> +
> + stackmap_fd = bpf_map__fd(skel->maps.stackmap);
> + if (CHECK_FAIL(stackmap_fd < 0))
> + goto out;
> +
> + stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
> + if (CHECK_FAIL(stack_amap_fd < 0))
> + goto out;
> +
> + err = stacktrace_map_skip__attach(skel);
> + if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
> + goto out;
> +
> + /* give some time for bpf program run */
> + sleep(1);
> +
> + /* disable stack trace collection */
> + key = 0;
> + val = 1;
> + bpf_map_update_elem(control_map_fd, &key, &val, 0);
> +
> + /* for every element in stackid_hmap, we can find a corresponding one
> + * in stackmap, and vise versa.
> + */
> + err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
> + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
> + "err %d errno %d\n", err, errno))
> + goto out;
> +
> + err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
> + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
> + "err %d errno %d\n", err, errno))
> + goto out;
> +
> + stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
> + err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
> + if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
> + "err %d errno %d\n", err, errno))
> + goto out;
> +
> + if (CHECK(skel->bss->failed, "check skip",
> + "failed to skip some depth: %d", skel->bss->failed))
> + goto out;
> +
> +out:
> + stacktrace_map_skip__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> new file mode 100644
> index 000000000000..323248b17ae4
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> @@ -0,0 +1,82 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <vmlinux.h>
> +#include <bpf/bpf_helpers.h>
> +
> +#define TEST_STACK_DEPTH 2
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_ARRAY);
> + __uint(max_entries, 1);
> + __type(key, __u32);
> + __type(value, __u32);
> +} control_map SEC(".maps");

You can use a global variable for this.
The global variable can be assigned a value (if needed, e.g., non-zero)
before skeleton open and load.

> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_HASH);
> + __uint(max_entries, 16384);
> + __type(key, __u32);
> + __type(value, __u32);
> +} stackid_hmap SEC(".maps");
> +
> +typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_STACK_TRACE);
> + __uint(max_entries, 16384);
> + __type(key, __u32);
> + __type(value, stack_trace_t);
> +} stackmap SEC(".maps");
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_ARRAY);
> + __uint(max_entries, 16384);
> + __type(key, __u32);
> + __type(value, stack_trace_t);
> +} stack_amap SEC(".maps");
> +
> +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
> +struct sched_switch_args {
> + unsigned long long pad;
> + char prev_comm[TASK_COMM_LEN];
> + int prev_pid;
> + int prev_prio;
> + long long prev_state;
> + char next_comm[TASK_COMM_LEN];
> + int next_pid;
> + int next_prio;
> +};

You can use this structure in vmlinux.h instead of the above:
struct trace_event_raw_sched_switch {
struct trace_entry ent;
char prev_comm[16];
pid_t prev_pid;
int prev_prio;
long int prev_state;
char next_comm[16];
pid_t next_pid;
int next_prio;
char __data[0];
};


> +
> +int failed = 0;
> +
> +SEC("tracepoint/sched/sched_switch")
> +int oncpu(struct sched_switch_args *ctx)
> +{
> + __u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
> + __u32 key = 0, val = 0, *value_p;
> + __u64 *stack_p;
> +
> + value_p = bpf_map_lookup_elem(&control_map, &key);
> + if (value_p && *value_p)
> + return 0; /* skip if non-zero *value_p */
> +
> + /* it should allow skipping whole buffer size entries */
> + key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
> + if ((int)key >= 0) {
> + /* The size of stackmap and stack_amap should be the same */
> + bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
> + stack_p = bpf_map_lookup_elem(&stack_amap, &key);
> + if (stack_p) {
> + bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
> + /* it wrongly skipped all the entries and filled zero */
> + if (stack_p[0] == 0)
> + failed = 1;
> + }
> + } else if ((int)key == -14/*EFAULT*/) {
> + /* old kernel doesn't support skipping that many entries */
> + failed = 2;

The selftest is supposed to run with the kernel in the same code base.
So it is okay to skip the above 'if' test and just set failed = 2.

> + }
> +
> + return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";

2022-03-11 09:39:53

by Hao Luo

[permalink] [raw]
Subject: Re: [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0

On Thu, Mar 10, 2022 at 2:54 PM Yonghong Song <[email protected]> wrote:
>
>
>
> On 3/10/22 12:22 AM, Namhyung Kim wrote:
> > Let's say that the caller has storage for num_elem stack frames. Then,
> > the BPF stack helper functions walk the stack for only num_elem frames.
> > This means that if skip > 0, one keeps only 'num_elem - skip' frames.
> >
> > This is because it sets init_nr in the perf_callchain_entry to the end
> > of the buffer to save num_elem entries only. I believe it was because
> > the perf callchain code unwound the stack frames until it reached the
> > global max size (sysctl_perf_event_max_stack).
> >
> > However it now has perf_callchain_entry_ctx.max_stack to limit the
> > iteration locally. This simplifies the code to handle init_nr in the
> > BPF callstack entries and removes the confusion with the perf_event's
> > __PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0.
> >
> > Also change the comment on bpf_get_stack() in the header file to be
> > more explicit what the return value means.
> >
> > Based-on-patch-by: Eugene Loh <[email protected]>
> > Signed-off-by: Namhyung Kim <[email protected]>
>
> The change looks good to me. This patch actually fixed a bug
> discussed below:
>
>
> https://lore.kernel.org/bpf/[email protected]/
>
> A reference to the above link in the commit message
> will be useful for people to understand better with an
> example.
>
> Also, the following fixes tag should be added:
>
> Fixes: c195651e565a ("bpf: add bpf_get_stack helper")
>
> Since the bug needs skip > 0 which is seldomly used,
> and the current returned stack is still correct although
> with less entries, I guess that is why less people
> complains.
>
> Anyway, ack the patch:
> Acked-by: Yonghong Song <[email protected]>
>
>
> > ---
> > include/uapi/linux/bpf.h | 4 +--
> > kernel/bpf/stackmap.c | 56 +++++++++++++++++-----------------------
> > 2 files changed, 26 insertions(+), 34 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index b0383d371b9a..77f4a022c60c 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2975,8 +2975,8 @@ union bpf_attr {
> > *
> > * # sysctl kernel.perf_event_max_stack=<new value>
> > * Return
> > - * A non-negative value equal to or less than *size* on success,
> > - * or a negative error in case of failure.
> > + * The non-negative copied *buf* length equal to or less than
> > + * *size* on success, or a negative error in case of failure.
> > *
> > * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)

Namhyung, I think you also need to mirror the change in
tools/include/uapi/linux/bpf.h

2022-03-11 11:37:06

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH 2/2] bpf/selftests: Test skipping stacktrace

On Thu, Mar 10, 2022 at 3:22 PM Yonghong Song <[email protected]> wrote:
>
>
>
> On 3/10/22 12:22 AM, Namhyung Kim wrote:
> > Add a test case for stacktrace with skip > 0 using a small sized
> > buffer. It didn't support skipping entries greater than or equal to
> > the size of buffer and filled the skipped part with 0.
> >
> > Signed-off-by: Namhyung Kim <[email protected]>
> > ---
> > .../bpf/prog_tests/stacktrace_map_skip.c | 72 ++++++++++++++++
> > .../selftests/bpf/progs/stacktrace_map_skip.c | 82 +++++++++++++++++++
> > 2 files changed, 154 insertions(+)
> > create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> > create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> > new file mode 100644
> > index 000000000000..bcb244aa3c78
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> > @@ -0,0 +1,72 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <test_progs.h>
> > +#include "stacktrace_map_skip.skel.h"
> > +
> > +#define TEST_STACK_DEPTH 2
> > +
> > +void test_stacktrace_map_skip(void)
> > +{
> > + struct stacktrace_map_skip *skel;
> > + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
> > + int err, stack_trace_len;
> > + __u32 key, val, duration = 0;
> > +
> > + skel = stacktrace_map_skip__open_and_load();
> > + if (CHECK(!skel, "skel_open_and_load", "skeleton open failed\n"))
> > + return;
>
> Please use ASSERT_* macros instead of CHECK* macros.
> You can see other prog_tests/*.c files for examples.

I'll take a look and make the changes.

>
> > +
> > + /* find map fds */
> > + control_map_fd = bpf_map__fd(skel->maps.control_map);
> > + if (CHECK_FAIL(control_map_fd < 0))
> > + goto out;
> > +
> > + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
> > + if (CHECK_FAIL(stackid_hmap_fd < 0))
> > + goto out;
> > +
> > + stackmap_fd = bpf_map__fd(skel->maps.stackmap);
> > + if (CHECK_FAIL(stackmap_fd < 0))
> > + goto out;
> > +
> > + stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
> > + if (CHECK_FAIL(stack_amap_fd < 0))
> > + goto out;
> > +
> > + err = stacktrace_map_skip__attach(skel);
> > + if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
> > + goto out;
> > +
> > + /* give some time for bpf program run */
> > + sleep(1);
> > +
> > + /* disable stack trace collection */
> > + key = 0;
> > + val = 1;
> > + bpf_map_update_elem(control_map_fd, &key, &val, 0);
> > +
> > + /* for every element in stackid_hmap, we can find a corresponding one
> > + * in stackmap, and vise versa.
> > + */
> > + err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
> > + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
> > + "err %d errno %d\n", err, errno))
> > + goto out;
> > +
> > + err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
> > + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
> > + "err %d errno %d\n", err, errno))
> > + goto out;
> > +
> > + stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
> > + err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
> > + if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
> > + "err %d errno %d\n", err, errno))
> > + goto out;
> > +
> > + if (CHECK(skel->bss->failed, "check skip",
> > + "failed to skip some depth: %d", skel->bss->failed))
> > + goto out;
> > +
> > +out:
> > + stacktrace_map_skip__destroy(skel);
> > +}
> > diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> > new file mode 100644
> > index 000000000000..323248b17ae4
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> > @@ -0,0 +1,82 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <vmlinux.h>
> > +#include <bpf/bpf_helpers.h>
> > +
> > +#define TEST_STACK_DEPTH 2
> > +
> > +struct {
> > + __uint(type, BPF_MAP_TYPE_ARRAY);
> > + __uint(max_entries, 1);
> > + __type(key, __u32);
> > + __type(value, __u32);
> > +} control_map SEC(".maps");
>
> You can use a global variable for this.
> The global variable can be assigned a value (if needed, e.g., non-zero)
> before skeleton open and load.

Right, will change.

>
> > +
> > +struct {
> > + __uint(type, BPF_MAP_TYPE_HASH);
> > + __uint(max_entries, 16384);
> > + __type(key, __u32);
> > + __type(value, __u32);
> > +} stackid_hmap SEC(".maps");
> > +
> > +typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
> > +
> > +struct {
> > + __uint(type, BPF_MAP_TYPE_STACK_TRACE);
> > + __uint(max_entries, 16384);
> > + __type(key, __u32);
> > + __type(value, stack_trace_t);
> > +} stackmap SEC(".maps");
> > +
> > +struct {
> > + __uint(type, BPF_MAP_TYPE_ARRAY);
> > + __uint(max_entries, 16384);
> > + __type(key, __u32);
> > + __type(value, stack_trace_t);
> > +} stack_amap SEC(".maps");
> > +
> > +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
> > +struct sched_switch_args {
> > + unsigned long long pad;
> > + char prev_comm[TASK_COMM_LEN];
> > + int prev_pid;
> > + int prev_prio;
> > + long long prev_state;
> > + char next_comm[TASK_COMM_LEN];
> > + int next_pid;
> > + int next_prio;
> > +};
>
> You can use this structure in vmlinux.h instead of the above:
> struct trace_event_raw_sched_switch {
> struct trace_entry ent;
> char prev_comm[16];
> pid_t prev_pid;
> int prev_prio;
> long int prev_state;
> char next_comm[16];
> pid_t next_pid;
> int next_prio;
> char __data[0];
> };

Looks good, will change.

>
> > +
> > +int failed = 0;
> > +
> > +SEC("tracepoint/sched/sched_switch")
> > +int oncpu(struct sched_switch_args *ctx)
> > +{
> > + __u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
> > + __u32 key = 0, val = 0, *value_p;
> > + __u64 *stack_p;
> > +
> > + value_p = bpf_map_lookup_elem(&control_map, &key);
> > + if (value_p && *value_p)
> > + return 0; /* skip if non-zero *value_p */
> > +
> > + /* it should allow skipping whole buffer size entries */
> > + key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
> > + if ((int)key >= 0) {
> > + /* The size of stackmap and stack_amap should be the same */
> > + bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
> > + stack_p = bpf_map_lookup_elem(&stack_amap, &key);
> > + if (stack_p) {
> > + bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
> > + /* it wrongly skipped all the entries and filled zero */
> > + if (stack_p[0] == 0)
> > + failed = 1;
> > + }
> > + } else if ((int)key == -14/*EFAULT*/) {
> > + /* old kernel doesn't support skipping that many entries */
> > + failed = 2;
>
> The selftest is supposed to run with the kernel in the same code base.
> So it is okay to skip the above 'if' test and just set failed = 2.

I see. I will make the change.

Thanks,
Namhyung

>
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +char _license[] SEC("license") = "GPL";

2022-03-11 13:23:34

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0

Hello,

On Thu, Mar 10, 2022 at 2:55 PM Yonghong Song <[email protected]> wrote:
>
>
>
> On 3/10/22 12:22 AM, Namhyung Kim wrote:
> > Let's say that the caller has storage for num_elem stack frames. Then,
> > the BPF stack helper functions walk the stack for only num_elem frames.
> > This means that if skip > 0, one keeps only 'num_elem - skip' frames.
> >
> > This is because it sets init_nr in the perf_callchain_entry to the end
> > of the buffer to save num_elem entries only. I believe it was because
> > the perf callchain code unwound the stack frames until it reached the
> > global max size (sysctl_perf_event_max_stack).
> >
> > However it now has perf_callchain_entry_ctx.max_stack to limit the
> > iteration locally. This simplifies the code to handle init_nr in the
> > BPF callstack entries and removes the confusion with the perf_event's
> > __PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0.
> >
> > Also change the comment on bpf_get_stack() in the header file to be
> > more explicit what the return value means.
> >
> > Based-on-patch-by: Eugene Loh <[email protected]>
> > Signed-off-by: Namhyung Kim <[email protected]>
>
> The change looks good to me. This patch actually fixed a bug
> discussed below:
>
>
> https://lore.kernel.org/bpf/[email protected]/
>
> A reference to the above link in the commit message
> will be useful for people to understand better with an
> example.

Ok, will do.

>
> Also, the following fixes tag should be added:
>
> Fixes: c195651e565a ("bpf: add bpf_get_stack helper")

Sure.

>
> Since the bug needs skip > 0 which is seldomly used,
> and the current returned stack is still correct although
> with less entries, I guess that is why less people
> complains.
>
> Anyway, ack the patch:
> Acked-by: Yonghong Song <[email protected]>

Thanks for your review!

Namhyung


>
>
> > ---
> > include/uapi/linux/bpf.h | 4 +--
> > kernel/bpf/stackmap.c | 56 +++++++++++++++++-----------------------
> > 2 files changed, 26 insertions(+), 34 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index b0383d371b9a..77f4a022c60c 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2975,8 +2975,8 @@ union bpf_attr {
> > *
> > * # sysctl kernel.perf_event_max_stack=<new value>
> > * Return
> > - * A non-negative value equal to or less than *size* on success,
> > - * or a negative error in case of failure.
> > + * The non-negative copied *buf* length equal to or less than
> > + * *size* on success, or a negative error in case of failure.
> > *
> > * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
> [...]

2022-03-11 21:37:30

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0

Hi Hao,

On Thu, Mar 10, 2022 at 4:24 PM Hao Luo <[email protected]> wrote:
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index b0383d371b9a..77f4a022c60c 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -2975,8 +2975,8 @@ union bpf_attr {
> > > *
> > > * # sysctl kernel.perf_event_max_stack=<new value>
> > > * Return
> > > - * A non-negative value equal to or less than *size* on success,
> > > - * or a negative error in case of failure.
> > > + * The non-negative copied *buf* length equal to or less than
> > > + * *size* on success, or a negative error in case of failure.
> > > *
> > > * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
>
> Namhyung, I think you also need to mirror the change in
> tools/include/uapi/linux/bpf.h

Oh, right. Will update.

Thanks,
Namhyung

2022-03-11 23:37:03

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [PATCH 2/2] bpf/selftests: Test skipping stacktrace

On Thu, Mar 10, 2022 at 12:22 AM Namhyung Kim <[email protected]> wrote:
>
> Add a test case for stacktrace with skip > 0 using a small sized
> buffer. It didn't support skipping entries greater than or equal to
> the size of buffer and filled the skipped part with 0.
>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> .../bpf/prog_tests/stacktrace_map_skip.c | 72 ++++++++++++++++
> .../selftests/bpf/progs/stacktrace_map_skip.c | 82 +++++++++++++++++++
> 2 files changed, 154 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> new file mode 100644
> index 000000000000..bcb244aa3c78
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> @@ -0,0 +1,72 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <test_progs.h>
> +#include "stacktrace_map_skip.skel.h"
> +
> +#define TEST_STACK_DEPTH 2
> +
> +void test_stacktrace_map_skip(void)
> +{
> + struct stacktrace_map_skip *skel;
> + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
> + int err, stack_trace_len;
> + __u32 key, val, duration = 0;
> +
> + skel = stacktrace_map_skip__open_and_load();
> + if (CHECK(!skel, "skel_open_and_load", "skeleton open failed\n"))
> + return;
> +
> + /* find map fds */
> + control_map_fd = bpf_map__fd(skel->maps.control_map);
> + if (CHECK_FAIL(control_map_fd < 0))
> + goto out;
> +
> + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
> + if (CHECK_FAIL(stackid_hmap_fd < 0))
> + goto out;
> +
> + stackmap_fd = bpf_map__fd(skel->maps.stackmap);
> + if (CHECK_FAIL(stackmap_fd < 0))
> + goto out;
> +
> + stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
> + if (CHECK_FAIL(stack_amap_fd < 0))
> + goto out;
> +
> + err = stacktrace_map_skip__attach(skel);
> + if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
> + goto out;
> +
> + /* give some time for bpf program run */
> + sleep(1);
> +
> + /* disable stack trace collection */
> + key = 0;
> + val = 1;
> + bpf_map_update_elem(control_map_fd, &key, &val, 0);
> +
> + /* for every element in stackid_hmap, we can find a corresponding one
> + * in stackmap, and vise versa.
> + */
> + err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
> + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
> + "err %d errno %d\n", err, errno))
> + goto out;
> +
> + err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
> + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
> + "err %d errno %d\n", err, errno))
> + goto out;
> +
> + stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
> + err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
> + if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
> + "err %d errno %d\n", err, errno))
> + goto out;
> +
> + if (CHECK(skel->bss->failed, "check skip",
> + "failed to skip some depth: %d", skel->bss->failed))
> + goto out;
> +
> +out:
> + stacktrace_map_skip__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> new file mode 100644
> index 000000000000..323248b17ae4
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> @@ -0,0 +1,82 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <vmlinux.h>
> +#include <bpf/bpf_helpers.h>
> +
> +#define TEST_STACK_DEPTH 2
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_ARRAY);
> + __uint(max_entries, 1);
> + __type(key, __u32);
> + __type(value, __u32);
> +} control_map SEC(".maps");
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_HASH);
> + __uint(max_entries, 16384);
> + __type(key, __u32);
> + __type(value, __u32);
> +} stackid_hmap SEC(".maps");
> +
> +typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_STACK_TRACE);
> + __uint(max_entries, 16384);
> + __type(key, __u32);
> + __type(value, stack_trace_t);
> +} stackmap SEC(".maps");
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_ARRAY);
> + __uint(max_entries, 16384);
> + __type(key, __u32);
> + __type(value, stack_trace_t);
> +} stack_amap SEC(".maps");
> +
> +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
> +struct sched_switch_args {
> + unsigned long long pad;
> + char prev_comm[TASK_COMM_LEN];
> + int prev_pid;
> + int prev_prio;
> + long long prev_state;
> + char next_comm[TASK_COMM_LEN];
> + int next_pid;
> + int next_prio;
> +};
> +
> +int failed = 0;
> +
> +SEC("tracepoint/sched/sched_switch")
> +int oncpu(struct sched_switch_args *ctx)
> +{
> + __u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
> + __u32 key = 0, val = 0, *value_p;
> + __u64 *stack_p;
> +

please also add filtering by PID to avoid interference from other
selftests when run in parallel mode

> + value_p = bpf_map_lookup_elem(&control_map, &key);
> + if (value_p && *value_p)
> + return 0; /* skip if non-zero *value_p */
> +
> + /* it should allow skipping whole buffer size entries */
> + key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
> + if ((int)key >= 0) {
> + /* The size of stackmap and stack_amap should be the same */
> + bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
> + stack_p = bpf_map_lookup_elem(&stack_amap, &key);
> + if (stack_p) {
> + bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
> + /* it wrongly skipped all the entries and filled zero */
> + if (stack_p[0] == 0)
> + failed = 1;
> + }
> + } else if ((int)key == -14/*EFAULT*/) {
> + /* old kernel doesn't support skipping that many entries */
> + failed = 2;
> + }
> +
> + return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> --
> 2.35.1.723.g4982287a31-goog
>

2022-03-15 03:07:04

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH 2/2] bpf/selftests: Test skipping stacktrace

Hello,

On Fri, Mar 11, 2022 at 2:23 PM Andrii Nakryiko
<[email protected]> wrote:
>
> On Thu, Mar 10, 2022 at 12:22 AM Namhyung Kim <[email protected]> wrote:
> > +SEC("tracepoint/sched/sched_switch")
> > +int oncpu(struct sched_switch_args *ctx)
> > +{
> > + __u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
> > + __u32 key = 0, val = 0, *value_p;
> > + __u64 *stack_p;
> > +
>
> please also add filtering by PID to avoid interference from other
> selftests when run in parallel mode

Will do!

Thanks,
Namhyung

>
> > + value_p = bpf_map_lookup_elem(&control_map, &key);
> > + if (value_p && *value_p)
> > + return 0; /* skip if non-zero *value_p */
> > +
> > + /* it should allow skipping whole buffer size entries */
> > + key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
> > + if ((int)key >= 0) {
> > + /* The size of stackmap and stack_amap should be the same */
> > + bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
> > + stack_p = bpf_map_lookup_elem(&stack_amap, &key);
> > + if (stack_p) {
> > + bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
> > + /* it wrongly skipped all the entries and filled zero */
> > + if (stack_p[0] == 0)
> > + failed = 1;
> > + }
> > + } else if ((int)key == -14/*EFAULT*/) {
> > + /* old kernel doesn't support skipping that many entries */
> > + failed = 2;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +char _license[] SEC("license") = "GPL";
> > --
> > 2.35.1.723.g4982287a31-goog
> >