2020-07-15 06:10:03

by Song Liu

[permalink] [raw]
Subject: [PATCH v2 bpf-next 0/2] bpf: fix stackmap on perf_events with PEBS

Calling get_perf_callchain() on perf_events from PEBS entries may cause
unwinder errors. To fix this issue, perf subsystem fetches callchain early,
and marks perf_events are marked with __PERF_SAMPLE_CALLCHAIN_EARLY.
Similar issue exists when BPF program calls get_perf_callchain() via
helper functions. For more information about this issue, please refer to
discussions in [1].

This set fixes this issue with helper proto bpf_get_stackid_pe and
bpf_get_stack_pe.

[1] https://lore.kernel.org/lkml/[email protected]/

Changes v1 => v2:
1. Simplify the design and avoid introducing new helper function. (Andrii)

Song Liu (2):
bpf: separate bpf_get_[stack|stackid] for perf events BPF
selftests/bpf: add callchain_stackid

include/linux/bpf.h | 2 +
kernel/bpf/stackmap.c | 204 ++++++++++++++++--
kernel/trace/bpf_trace.c | 4 +-
.../bpf/prog_tests/perf_event_stackmap.c | 120 +++++++++++
.../selftests/bpf/progs/perf_event_stackmap.c | 64 ++++++
5 files changed, 374 insertions(+), 20 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
create mode 100644 tools/testing/selftests/bpf/progs/perf_event_stackmap.c

--
2.24.1


2020-07-15 06:10:07

by Song Liu

[permalink] [raw]
Subject: [PATCH v2 bpf-next 2/2] selftests/bpf: add callchain_stackid

This tests new helper function bpf_get_stackid_pe and bpf_get_stack_pe.
These two helpers have different implementation for perf_event with PEB
entries.

Signed-off-by: Song Liu <[email protected]>
---
.../bpf/prog_tests/perf_event_stackmap.c | 120 ++++++++++++++++++
.../selftests/bpf/progs/perf_event_stackmap.c | 64 ++++++++++
2 files changed, 184 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
create mode 100644 tools/testing/selftests/bpf/progs/perf_event_stackmap.c

diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
new file mode 100644
index 0000000000000..6dcc67572afc7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "perf_event_stackmap.skel.h"
+
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+noinline int func_1(void)
+{
+ static int val = 1;
+
+ val += 1;
+
+ usleep(100);
+ return val;
+}
+
+noinline int func_2(void)
+{
+ return func_1();
+}
+
+noinline int func_3(void)
+{
+ return func_2();
+}
+
+noinline int func_4(void)
+{
+ return func_3();
+}
+
+noinline int func_5(void)
+{
+ return func_4();
+}
+
+noinline int func_6(void)
+{
+ int i, val = 1;
+
+ for (i = 0; i < 100; i++)
+ val += func_5();
+
+ return val;
+}
+
+void test_perf_event_stackmap(void)
+{
+ struct perf_event_attr attr = {
+ /* .type = PERF_TYPE_SOFTWARE, */
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .precise_ip = 2,
+ .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK |
+ PERF_SAMPLE_CALLCHAIN,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_CALL_STACK,
+ .sample_period = 5000,
+ .size = sizeof(struct perf_event_attr),
+ };
+ struct perf_event_stackmap *skel;
+ __u32 duration = 0;
+ cpu_set_t cpu_set;
+ int pmu_fd, err;
+
+ skel = perf_event_stackmap__open();
+
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ return;
+
+ /* override program type */
+ bpf_program__set_perf_event(skel->progs.oncpu);
+
+ err = perf_event_stackmap__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno))
+ goto cleanup;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (pmu_fd < 0) {
+ printf("%s:SKIP:cpu doesn't support the event\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
+ "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ close(pmu_fd);
+ goto cleanup;
+ }
+
+ /* create kernel and user stack traces for testing */
+ func_6();
+
+ CHECK(skel->data->stackid_kernel != 2, "get_stackid_kernel", "failed\n");
+ CHECK(skel->data->stackid_user != 2, "get_stackid_user", "failed\n");
+ CHECK(skel->data->stack_kernel != 2, "get_stack_kernel", "failed\n");
+ CHECK(skel->data->stack_user != 2, "get_stack_user", "failed\n");
+ close(pmu_fd);
+
+cleanup:
+ perf_event_stackmap__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
new file mode 100644
index 0000000000000..1b0457efeedec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+#ifndef BPF_F_USER_STACK
+#define BPF_F_USER_STACK (1ULL << 8)
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 16384);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(stack_trace_t));
+} stackmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackdata_map SEC(".maps");
+
+long stackid_kernel = 1;
+long stackid_user = 1;
+long stack_kernel = 1;
+long stack_user = 1;
+
+SEC("perf_event")
+int oncpu(void *ctx)
+{
+ int max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
+ stack_trace_t *trace;
+ __u32 key = 0;
+ long val;
+
+ val = bpf_get_stackid(ctx, &stackmap, 0);
+ if (val > 0)
+ stackid_kernel = 2;
+ val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
+ if (val > 0)
+ stackid_user = 2;
+
+ trace = bpf_map_lookup_elem(&stackdata_map, &key);
+ if (!trace)
+ return 0;
+
+ val = bpf_get_stack(ctx, trace, max_len, 0);
+ if (val > 0)
+ stack_kernel = 2;
+
+ val = bpf_get_stack(ctx, trace, max_len, BPF_F_USER_STACK);
+ if (val > 0)
+ stack_user = 2;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
--
2.24.1

2020-07-15 06:11:00

by Song Liu

[permalink] [raw]
Subject: [PATCH v2 bpf-next 1/2] bpf: separate bpf_get_[stack|stackid] for perf events BPF

Calling get_perf_callchain() on perf_events from PEBS entries may cause
unwinder errors. To fix this issue, the callchain is fetched early. Such
perf_events are marked with __PERF_SAMPLE_CALLCHAIN_EARLY.

Similarly, calling bpf_get_[stack|stackid] on perf_events from PEBS may
also cause unwinder errors. To fix this, add separate version of these
two helpers, bpf_get_[stack|stackid]_pe. These two hepers use callchain in
bpf_perf_event_data_kern->data->callchain.

Signed-off-by: Song Liu <[email protected]>
---
include/linux/bpf.h | 2 +
kernel/bpf/stackmap.c | 204 +++++++++++++++++++++++++++++++++++----
kernel/trace/bpf_trace.c | 4 +-
3 files changed, 190 insertions(+), 20 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c67c88ad35f85..bfc7a283c0f93 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1637,6 +1637,8 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_get_task_stack_proto;
+extern const struct bpf_func_proto bpf_get_stackid_proto_pe;
+extern const struct bpf_func_proto bpf_get_stack_proto_pe;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 48d8e739975fa..0587d4ddb06ce 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/jhash.h>
#include <linux/filter.h>
+#include <linux/kernel.h>
#include <linux/stacktrace.h>
#include <linux/perf_event.h>
#include <linux/elf.h>
@@ -387,11 +388,10 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
#endif
}

-BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
- u64, flags)
+static long __bpf_get_stackid(struct bpf_map *map,
+ struct perf_callchain_entry *trace, u64 flags)
{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
- struct perf_callchain_entry *trace;
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
u32 max_depth = map->value_size / stack_map_data_size(map);
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
@@ -399,21 +399,9 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 hash, id, trace_nr, trace_len;
bool user = flags & BPF_F_USER_STACK;
- bool kernel = !user;
u64 *ips;
bool hash_matches;

- if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
- BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
- return -EINVAL;
-
- trace = get_perf_callchain(regs, init_nr, kernel, user,
- sysctl_perf_event_max_stack, false, false);
-
- if (unlikely(!trace))
- /* couldn't fetch the stack trace */
- return -EFAULT;
-
/* get_perf_callchain() guarantees that trace->nr >= init_nr
* and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
*/
@@ -478,6 +466,30 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
return id;
}

+BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+ u64, flags)
+{
+ u32 max_depth = map->value_size / stack_map_data_size(map);
+ /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
+ u32 init_nr = sysctl_perf_event_max_stack - max_depth;
+ bool user = flags & BPF_F_USER_STACK;
+ struct perf_callchain_entry *trace;
+ bool kernel = !user;
+
+ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+ BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
+ return -EINVAL;
+
+ trace = get_perf_callchain(regs, init_nr, kernel, user,
+ sysctl_perf_event_max_stack, false, false);
+
+ if (unlikely(!trace))
+ /* couldn't fetch the stack trace */
+ return -EFAULT;
+
+ return __bpf_get_stackid(map, trace, flags);
+}
+
const struct bpf_func_proto bpf_get_stackid_proto = {
.func = bpf_get_stackid,
.gpl_only = true,
@@ -487,7 +499,87 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
.arg3_type = ARG_ANYTHING,
};

+static __u64 count_kernel_ip(struct perf_callchain_entry *trace)
+{
+ __u64 nr_kernel = 0;
+
+ while (nr_kernel < trace->nr) {
+ if (trace->ip[nr_kernel] == PERF_CONTEXT_USER)
+ break;
+ nr_kernel++;
+ }
+ return nr_kernel;
+}
+
+BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
+ struct bpf_map *, map, u64, flags)
+{
+ struct perf_event *event = ctx->event;
+ struct perf_callchain_entry *trace;
+ bool has_kernel, has_user;
+ bool kernel, user;
+
+ /* perf_sample_data doesn't have callchain, use bpf_get_stackid */
+ if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ return bpf_get_stackid((unsigned long)(ctx->regs),
+ (unsigned long) map, flags, 0, 0);
+
+ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+ BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
+ return -EINVAL;
+
+ user = flags & BPF_F_USER_STACK;
+ kernel = !user;
+
+ has_kernel = !event->attr.exclude_callchain_kernel;
+ has_user = !event->attr.exclude_callchain_user;
+
+ if ((kernel && !has_kernel) || (user && !has_user))
+ return -EINVAL;
+
+ trace = ctx->data->callchain;
+ if (!trace || (!has_kernel && !has_user))
+ return -EFAULT;
+
+ if (has_kernel && has_user) {
+ __u64 nr_kernel = count_kernel_ip(trace);
+ int ret;
+
+ if (kernel) {
+ __u64 nr = trace->nr;
+
+ trace->nr = nr_kernel;
+ ret = __bpf_get_stackid(map, trace, flags);
+
+ /* restore nr */
+ trace->nr = nr;
+ } else { /* user */
+ u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
+
+ skip += nr_kernel;
+ if (skip > ~BPF_F_SKIP_FIELD_MASK)
+ return -EFAULT;
+
+ flags = (flags & ~BPF_F_SKIP_FIELD_MASK) |
+ (skip & BPF_F_SKIP_FIELD_MASK);
+ ret = __bpf_get_stackid(map, trace, flags);
+ }
+ return ret;
+ }
+ return __bpf_get_stackid(map, trace, flags);
+}
+
+const struct bpf_func_proto bpf_get_stackid_proto_pe = {
+ .func = bpf_get_stackid_pe,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+};
+
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
+ struct perf_callchain_entry *trace_in,
void *buf, u32 size, u64 flags)
{
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
@@ -520,7 +612,9 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
else
init_nr = sysctl_perf_event_max_stack - num_elem;

- if (kernel && task)
+ if (trace_in)
+ trace = trace_in;
+ else if (kernel && task)
trace = get_callchain_entry_for_task(task, init_nr);
else
trace = get_perf_callchain(regs, init_nr, kernel, user,
@@ -556,7 +650,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
u64, flags)
{
- return __bpf_get_stack(regs, NULL, buf, size, flags);
+ return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
}

const struct bpf_func_proto bpf_get_stack_proto = {
@@ -574,7 +668,7 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
{
struct pt_regs *regs = task_pt_regs(task);

- return __bpf_get_stack(regs, task, buf, size, flags);
+ return __bpf_get_stack(regs, task, NULL, buf, size, flags);
}

BTF_ID_LIST(bpf_get_task_stack_btf_ids)
@@ -591,6 +685,80 @@ const struct bpf_func_proto bpf_get_task_stack_proto = {
.btf_id = bpf_get_task_stack_btf_ids,
};

+BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
+ void *, buf, u32, size, u64, flags)
+{
+ struct perf_event *event = ctx->event;
+ struct perf_callchain_entry *trace;
+ bool has_kernel, has_user;
+ bool kernel, user;
+ int err = -EINVAL;
+
+ if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ return __bpf_get_stack(ctx->regs, NULL, NULL, buf, size, flags);
+
+ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+ BPF_F_USER_BUILD_ID)))
+ goto clear;
+
+ user = flags & BPF_F_USER_STACK;
+ kernel = !user;
+
+ has_kernel = !event->attr.exclude_callchain_kernel;
+ has_user = !event->attr.exclude_callchain_user;
+
+ if ((kernel && !has_kernel) || (user && !has_user))
+ goto clear;
+
+ err = -EFAULT;
+ trace = ctx->data->callchain;
+ if (!trace || (!has_kernel && !has_user))
+ goto clear;
+
+ if (has_kernel && has_user) {
+ __u64 nr_kernel = count_kernel_ip(trace);
+ int ret;
+
+ if (kernel) {
+ __u64 nr = trace->nr;
+
+ trace->nr = nr_kernel;
+ ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
+ size, flags);
+
+ /* restore nr */
+ trace->nr = nr;
+ } else { /* user */
+ u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
+
+ skip += nr_kernel;
+ if (skip > ~BPF_F_SKIP_FIELD_MASK)
+ goto clear;
+
+ flags = (flags & ~BPF_F_SKIP_FIELD_MASK) |
+ (skip & BPF_F_SKIP_FIELD_MASK);
+ ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
+ size, flags);
+ }
+ return ret;
+ }
+ return __bpf_get_stack(ctx->regs, NULL, trace, buf, size, flags);
+clear:
+ memset(buf, 0, size);
+ return err;
+
+}
+
+const struct bpf_func_proto bpf_get_stack_proto_pe = {
+ .func = bpf_get_stack_pe,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
+};
+
/* Called from eBPF program */
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
{
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3cc0dcb60ca20..cb91ef902cc43 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1411,9 +1411,9 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_perf_event_output_proto_tp;
case BPF_FUNC_get_stackid:
- return &bpf_get_stackid_proto_tp;
+ return &bpf_get_stackid_proto_pe;
case BPF_FUNC_get_stack:
- return &bpf_get_stack_proto_tp;
+ return &bpf_get_stack_proto_pe;
case BPF_FUNC_perf_prog_read_value:
return &bpf_perf_prog_read_value_proto;
case BPF_FUNC_read_branch_records:
--
2.24.1

2020-07-16 05:56:05

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [PATCH v2 bpf-next 1/2] bpf: separate bpf_get_[stack|stackid] for perf events BPF

On Tue, Jul 14, 2020 at 11:08 PM Song Liu <[email protected]> wrote:
>
> Calling get_perf_callchain() on perf_events from PEBS entries may cause
> unwinder errors. To fix this issue, the callchain is fetched early. Such
> perf_events are marked with __PERF_SAMPLE_CALLCHAIN_EARLY.
>
> Similarly, calling bpf_get_[stack|stackid] on perf_events from PEBS may
> also cause unwinder errors. To fix this, add separate version of these
> two helpers, bpf_get_[stack|stackid]_pe. These two hepers use callchain in
> bpf_perf_event_data_kern->data->callchain.
>
> Signed-off-by: Song Liu <[email protected]>
> ---
> include/linux/bpf.h | 2 +
> kernel/bpf/stackmap.c | 204 +++++++++++++++++++++++++++++++++++----
> kernel/trace/bpf_trace.c | 4 +-
> 3 files changed, 190 insertions(+), 20 deletions(-)
>

Glad this approach worked out! Few minor bugs below, though.

[...]

> + if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
> + BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
> + return -EINVAL;
> +
> + user = flags & BPF_F_USER_STACK;
> + kernel = !user;
> +
> + has_kernel = !event->attr.exclude_callchain_kernel;
> + has_user = !event->attr.exclude_callchain_user;
> +
> + if ((kernel && !has_kernel) || (user && !has_user))
> + return -EINVAL;
> +
> + trace = ctx->data->callchain;
> + if (!trace || (!has_kernel && !has_user))

(!has_kernel && !has_user) can never happen, it's checked by if above
(one of kernel or user is always true => one of has_user or has_kernel
is always true).

> + return -EFAULT;
> +
> + if (has_kernel && has_user) {
> + __u64 nr_kernel = count_kernel_ip(trace);
> + int ret;
> +
> + if (kernel) {
> + __u64 nr = trace->nr;
> +
> + trace->nr = nr_kernel;
> + ret = __bpf_get_stackid(map, trace, flags);
> +
> + /* restore nr */
> + trace->nr = nr;
> + } else { /* user */
> + u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
> +
> + skip += nr_kernel;
> + if (skip > ~BPF_F_SKIP_FIELD_MASK)

something fishy here: ~BPF_F_SKIP_FIELD_MASK is a really big number,
were you going to check that skip is not bigger than 255 (i.e., fits
within BPF_F_SKIP_FIELD_MASK)?

> + return -EFAULT;
> +
> + flags = (flags & ~BPF_F_SKIP_FIELD_MASK) |
> + (skip & BPF_F_SKIP_FIELD_MASK);
> + ret = __bpf_get_stackid(map, trace, flags);
> + }
> + return ret;
> + }
> + return __bpf_get_stackid(map, trace, flags);
> +}
> +

[...]

> +
> + has_kernel = !event->attr.exclude_callchain_kernel;
> + has_user = !event->attr.exclude_callchain_user;
> +
> + if ((kernel && !has_kernel) || (user && !has_user))
> + goto clear;
> +
> + err = -EFAULT;
> + trace = ctx->data->callchain;
> + if (!trace || (!has_kernel && !has_user))
> + goto clear;

same as above for bpf_get_stackid, probably can be simplified

> +
> + if (has_kernel && has_user) {
> + __u64 nr_kernel = count_kernel_ip(trace);
> + int ret;
> +
> + if (kernel) {
> + __u64 nr = trace->nr;
> +
> + trace->nr = nr_kernel;
> + ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
> + size, flags);
> +
> + /* restore nr */
> + trace->nr = nr;
> + } else { /* user */
> + u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
> +
> + skip += nr_kernel;
> + if (skip > ~BPF_F_SKIP_FIELD_MASK)
> + goto clear;
> +

and here

> + flags = (flags & ~BPF_F_SKIP_FIELD_MASK) |
> + (skip & BPF_F_SKIP_FIELD_MASK);

actually if you check that skip <= BPF_F_SKIP_FIELD_MASK, you don't
need to mask it here, just `| skip`

> + ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
> + size, flags);
> + }
> + return ret;
> + }
> + return __bpf_get_stack(ctx->regs, NULL, trace, buf, size, flags);
> +clear:
> + memset(buf, 0, size);
> + return err;
> +
> +}
> +

[...]

2020-07-16 06:05:43

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [PATCH v2 bpf-next 2/2] selftests/bpf: add callchain_stackid

On Tue, Jul 14, 2020 at 11:09 PM Song Liu <[email protected]> wrote:
>
> This tests new helper function bpf_get_stackid_pe and bpf_get_stack_pe.
> These two helpers have different implementation for perf_event with PEB
> entries.
>
> Signed-off-by: Song Liu <[email protected]>
> ---
> .../bpf/prog_tests/perf_event_stackmap.c | 120 ++++++++++++++++++
> .../selftests/bpf/progs/perf_event_stackmap.c | 64 ++++++++++
> 2 files changed, 184 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
> create mode 100644 tools/testing/selftests/bpf/progs/perf_event_stackmap.c
>

Just few simplification suggestions, but overall looks good, so please add:

Acked-by: Andrii Nakryiko <[email protected]>

[...]

> +
> +void test_perf_event_stackmap(void)
> +{
> + struct perf_event_attr attr = {
> + /* .type = PERF_TYPE_SOFTWARE, */
> + .type = PERF_TYPE_HARDWARE,
> + .config = PERF_COUNT_HW_CPU_CYCLES,
> + .precise_ip = 2,
> + .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK |
> + PERF_SAMPLE_CALLCHAIN,
> + .branch_sample_type = PERF_SAMPLE_BRANCH_USER |
> + PERF_SAMPLE_BRANCH_NO_FLAGS |
> + PERF_SAMPLE_BRANCH_NO_CYCLES |
> + PERF_SAMPLE_BRANCH_CALL_STACK,
> + .sample_period = 5000,
> + .size = sizeof(struct perf_event_attr),
> + };
> + struct perf_event_stackmap *skel;
> + __u32 duration = 0;
> + cpu_set_t cpu_set;
> + int pmu_fd, err;
> +
> + skel = perf_event_stackmap__open();
> +
> + if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
> + return;
> +
> + /* override program type */
> + bpf_program__set_perf_event(skel->progs.oncpu);

this should be unnecessary, didn't libbpf detect the type correctly
from SEC? If not, let's fix that.

> +
> + err = perf_event_stackmap__load(skel);
> + if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
> + goto cleanup;
> +
> + CPU_ZERO(&cpu_set);
> + CPU_SET(0, &cpu_set);
> + err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
> + if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno))
> + goto cleanup;
> +
> + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
> + 0 /* cpu 0 */, -1 /* group id */,
> + 0 /* flags */);
> + if (pmu_fd < 0) {
> + printf("%s:SKIP:cpu doesn't support the event\n", __func__);
> + test__skip();
> + goto cleanup;
> + }
> +
> + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
> + pmu_fd);
> + if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
> + "err %ld\n", PTR_ERR(skel->links.oncpu))) {
> + close(pmu_fd);
> + goto cleanup;
> + }
> +
> + /* create kernel and user stack traces for testing */
> + func_6();
> +
> + CHECK(skel->data->stackid_kernel != 2, "get_stackid_kernel", "failed\n");
> + CHECK(skel->data->stackid_user != 2, "get_stackid_user", "failed\n");
> + CHECK(skel->data->stack_kernel != 2, "get_stack_kernel", "failed\n");
> + CHECK(skel->data->stack_user != 2, "get_stack_user", "failed\n");
> + close(pmu_fd);

I think pmu_fd will be closed by perf_event_stackmap__destory (through
closing the link)

> +
> +cleanup:
> + perf_event_stackmap__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
> new file mode 100644
> index 0000000000000..1b0457efeedec
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
> @@ -0,0 +1,64 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (c) 2020 Facebook
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +
> +#ifndef PERF_MAX_STACK_DEPTH
> +#define PERF_MAX_STACK_DEPTH 127
> +#endif
> +
> +#ifndef BPF_F_USER_STACK
> +#define BPF_F_USER_STACK (1ULL << 8)
> +#endif

BPF_F_USER_STACK should be in vmlinux.h already, similarly to BPF_F_CURRENT_CPU

> +
> +typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
> +struct {
> + __uint(type, BPF_MAP_TYPE_STACK_TRACE);
> + __uint(max_entries, 16384);
> + __uint(key_size, sizeof(__u32));
> + __uint(value_size, sizeof(stack_trace_t));
> +} stackmap SEC(".maps");
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
> + __uint(max_entries, 1);
> + __type(key, __u32);
> + __type(value, stack_trace_t);
> +} stackdata_map SEC(".maps");
> +
> +long stackid_kernel = 1;
> +long stackid_user = 1;
> +long stack_kernel = 1;
> +long stack_user = 1;
> +

nit: kind of unusual to go from 1 -> 2, why no zero to one as a flag?
those variables will be available through skel->bss, btw

> +SEC("perf_event")
> +int oncpu(void *ctx)
> +{
> + int max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
> + stack_trace_t *trace;
> + __u32 key = 0;
> + long val;
> +
> + val = bpf_get_stackid(ctx, &stackmap, 0);
> + if (val > 0)
> + stackid_kernel = 2;
> + val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
> + if (val > 0)
> + stackid_user = 2;
> +
> + trace = bpf_map_lookup_elem(&stackdata_map, &key);
> + if (!trace)
> + return 0;
> +
> + val = bpf_get_stack(ctx, trace, max_len, 0);

given you don't care about contents of trace, you could have used
`stack_trace_t trace = {}` global variable instead of PERCPU_ARRAY.

> + if (val > 0)
> + stack_kernel = 2;
> +
> + val = bpf_get_stack(ctx, trace, max_len, BPF_F_USER_STACK);

nit: max_len == sizeof(stack_trace_t) ?

> + if (val > 0)
> + stack_user = 2;
> +
> + return 0;
> +}
> +
> +char LICENSE[] SEC("license") = "GPL";
> --
> 2.24.1
>

2020-07-17 01:10:18

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v2 bpf-next 1/2] bpf: separate bpf_get_[stack|stackid] for perf events BPF

Hi Song,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url: https://github.com/0day-ci/linux/commits/Song-Liu/bpf-fix-stackmap-on-perf_events-with-PEBS/20200715-133118
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: arm64-randconfig-r004-20200716 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project ed6b578040a85977026c93bf4188f996148f3218)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm64 cross compiling tool for clang build
# apt-get install binutils-aarch64-linux-gnu
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

>> kernel/bpf/stackmap.c:698:26: error: incompatible pointer types passing 'bpf_user_pt_regs_t *' (aka 'struct user_pt_regs *') to parameter of type 'struct pt_regs *' [-Werror,-Wincompatible-pointer-types]
return __bpf_get_stack(ctx->regs, NULL, NULL, buf, size, flags);
^~~~~~~~~
kernel/bpf/stackmap.c:581:45: note: passing argument to parameter 'regs' here
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
^
kernel/bpf/stackmap.c:726:26: error: incompatible pointer types passing 'bpf_user_pt_regs_t *' (aka 'struct user_pt_regs *') to parameter of type 'struct pt_regs *' [-Werror,-Wincompatible-pointer-types]
ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
^~~~~~~~~
kernel/bpf/stackmap.c:581:45: note: passing argument to parameter 'regs' here
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
^
kernel/bpf/stackmap.c:740:26: error: incompatible pointer types passing 'bpf_user_pt_regs_t *' (aka 'struct user_pt_regs *') to parameter of type 'struct pt_regs *' [-Werror,-Wincompatible-pointer-types]
ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
^~~~~~~~~
kernel/bpf/stackmap.c:581:45: note: passing argument to parameter 'regs' here
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
^
kernel/bpf/stackmap.c:745:25: error: incompatible pointer types passing 'bpf_user_pt_regs_t *' (aka 'struct user_pt_regs *') to parameter of type 'struct pt_regs *' [-Werror,-Wincompatible-pointer-types]
return __bpf_get_stack(ctx->regs, NULL, trace, buf, size, flags);
^~~~~~~~~
kernel/bpf/stackmap.c:581:45: note: passing argument to parameter 'regs' here
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
^
4 errors generated.

vim +698 kernel/bpf/stackmap.c

687
688 BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
689 void *, buf, u32, size, u64, flags)
690 {
691 struct perf_event *event = ctx->event;
692 struct perf_callchain_entry *trace;
693 bool has_kernel, has_user;
694 bool kernel, user;
695 int err = -EINVAL;
696
697 if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
> 698 return __bpf_get_stack(ctx->regs, NULL, NULL, buf, size, flags);
699
700 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
701 BPF_F_USER_BUILD_ID)))
702 goto clear;
703
704 user = flags & BPF_F_USER_STACK;
705 kernel = !user;
706
707 has_kernel = !event->attr.exclude_callchain_kernel;
708 has_user = !event->attr.exclude_callchain_user;
709
710 if ((kernel && !has_kernel) || (user && !has_user))
711 goto clear;
712
713 err = -EFAULT;
714 trace = ctx->data->callchain;
715 if (!trace || (!has_kernel && !has_user))
716 goto clear;
717
718 if (has_kernel && has_user) {
719 __u64 nr_kernel = count_kernel_ip(trace);
720 int ret;
721
722 if (kernel) {
723 __u64 nr = trace->nr;
724
725 trace->nr = nr_kernel;
726 ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
727 size, flags);
728
729 /* restore nr */
730 trace->nr = nr;
731 } else { /* user */
732 u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
733
734 skip += nr_kernel;
735 if (skip > ~BPF_F_SKIP_FIELD_MASK)
736 goto clear;
737
738 flags = (flags & ~BPF_F_SKIP_FIELD_MASK) |
739 (skip & BPF_F_SKIP_FIELD_MASK);
740 ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
741 size, flags);
742 }
743 return ret;
744 }
745 return __bpf_get_stack(ctx->regs, NULL, trace, buf, size, flags);
746 clear:
747 memset(buf, 0, size);
748 return err;
749

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]


Attachments:
(No filename) (5.38 kB)
.config.gz (37.80 kB)
Download all attachments