If the perf_event has PERF_SAMPLE_CALLCHAIN, BPF can use it for stack trace.
The problematic cases like PEBS and IBS already handled in the PMU driver and
they filled the callchain info in the sample data. For others, we can call
perf_callchain() before the BPF handler.
Signed-off-by: Namhyung Kim <[email protected]>
---
kernel/bpf/stackmap.c | 4 ++--
kernel/events/core.c | 12 ++++++++++--
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 1adbe67cdb95..aecea7451b61 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -338,7 +338,7 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
int ret;
/* perf_sample_data doesn't have callchain, use bpf_get_stackid */
- if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
return bpf_get_stackid((unsigned long)(ctx->regs),
(unsigned long) map, flags, 0, 0);
@@ -506,7 +506,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
int err = -EINVAL;
__u64 nr_kernel;
- if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b8af9fdbf26f..2ea93ce75ad4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10003,8 +10003,16 @@ static void bpf_overflow_handler(struct perf_event *event,
goto out;
rcu_read_lock();
prog = READ_ONCE(event->prog);
- if (prog)
+ if (prog) {
+ if (prog->call_get_stack &&
+ (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
+ !(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) {
+ data->callchain = perf_callchain(event, regs);
+ data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
+ }
+
ret = bpf_prog_run(prog, &ctx);
+ }
rcu_read_unlock();
out:
__this_cpu_dec(bpf_prog_active);
@@ -10030,7 +10038,7 @@ static int perf_event_set_bpf_handler(struct perf_event *event,
if (event->attr.precise_ip &&
prog->call_get_stack &&
- (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY) ||
+ (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) ||
event->attr.exclude_callchain_kernel ||
event->attr.exclude_callchain_user)) {
/*
--
2.37.2.789.g6183377224-goog
On 09/08, Namhyung Kim wrote:
> If the perf_event has PERF_SAMPLE_CALLCHAIN, BPF can use it for stack
> trace.
> The problematic cases like PEBS and IBS already handled in the PMU driver
> and
> they filled the callchain info in the sample data. For others, we can
> call
> perf_callchain() before the BPF handler.
> Signed-off-by: Namhyung Kim <[email protected]>
Reviewed-by: Stanislav Fomichev <[email protected]>
At least from the description it make sense. We're filling a callchain
when it's been requested by the event, but it's missing on the
sample data (aka, software fallback?). perf_callchain also seems to
always fallback to &__empty_callchain in case of an error, so seems
safe.
> ---
> kernel/bpf/stackmap.c | 4 ++--
> kernel/events/core.c | 12 ++++++++++--
> 2 files changed, 12 insertions(+), 4 deletions(-)
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 1adbe67cdb95..aecea7451b61 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -338,7 +338,7 @@ BPF_CALL_3(bpf_get_stackid_pe, struct
> bpf_perf_event_data_kern *, ctx,
> int ret;
> /* perf_sample_data doesn't have callchain, use bpf_get_stackid */
> - if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
> + if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
> return bpf_get_stackid((unsigned long)(ctx->regs),
> (unsigned long) map, flags, 0, 0);
> @@ -506,7 +506,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct
> bpf_perf_event_data_kern *, ctx,
> int err = -EINVAL;
> __u64 nr_kernel;
> - if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
> + if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
> return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
> if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index b8af9fdbf26f..2ea93ce75ad4 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -10003,8 +10003,16 @@ static void bpf_overflow_handler(struct
> perf_event *event,
> goto out;
> rcu_read_lock();
> prog = READ_ONCE(event->prog);
> - if (prog)
> + if (prog) {
> + if (prog->call_get_stack &&
> + (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
> + !(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) {
> + data->callchain = perf_callchain(event, regs);
> + data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
> + }
> +
> ret = bpf_prog_run(prog, &ctx);
> + }
> rcu_read_unlock();
> out:
> __this_cpu_dec(bpf_prog_active);
> @@ -10030,7 +10038,7 @@ static int perf_event_set_bpf_handler(struct
> perf_event *event,
> if (event->attr.precise_ip &&
> prog->call_get_stack &&
> - (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY) ||
> + (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) ||
> event->attr.exclude_callchain_kernel ||
> event->attr.exclude_callchain_user)) {
> /*
> --
> 2.37.2.789.g6183377224-goog
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 16817ad7e8b31728b44ff9f17d8d894ed8a450d0
Gitweb: https://git.kernel.org/tip/16817ad7e8b31728b44ff9f17d8d894ed8a450d0
Author: Namhyung Kim <[email protected]>
AuthorDate: Thu, 08 Sep 2022 14:41:03 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Tue, 13 Sep 2022 15:03:22 +02:00
perf/bpf: Always use perf callchains if exist
If the perf_event has PERF_SAMPLE_CALLCHAIN, BPF can use it for stack trace.
The problematic cases like PEBS and IBS already handled in the PMU driver and
they filled the callchain info in the sample data. For others, we can call
perf_callchain() before the BPF handler.
Signed-off-by: Namhyung Kim <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/bpf/stackmap.c | 4 ++--
kernel/events/core.c | 12 ++++++++++--
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 1adbe67..aecea74 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -338,7 +338,7 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
int ret;
/* perf_sample_data doesn't have callchain, use bpf_get_stackid */
- if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
return bpf_get_stackid((unsigned long)(ctx->regs),
(unsigned long) map, flags, 0, 0);
@@ -506,7 +506,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
int err = -EINVAL;
__u64 nr_kernel;
- if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c98ecf3..7da5515 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10000,8 +10000,16 @@ static void bpf_overflow_handler(struct perf_event *event,
goto out;
rcu_read_lock();
prog = READ_ONCE(event->prog);
- if (prog)
+ if (prog) {
+ if (prog->call_get_stack &&
+ (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
+ !(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) {
+ data->callchain = perf_callchain(event, regs);
+ data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
+ }
+
ret = bpf_prog_run(prog, &ctx);
+ }
rcu_read_unlock();
out:
__this_cpu_dec(bpf_prog_active);
@@ -10027,7 +10035,7 @@ static int perf_event_set_bpf_handler(struct perf_event *event,
if (event->attr.precise_ip &&
prog->call_get_stack &&
- (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY) ||
+ (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) ||
event->attr.exclude_callchain_kernel ||
event->attr.exclude_callchain_user)) {
/*