Hello,
I'm working on perf event sample filtering using BPF. To do that BPF needs
to access perf sample data and return 0 or 1 to drop or keep the samples.
Changes in v2)
- reuse perf_prepare_sample() instead of adding new bpf_prepare_sample()
- drop bpf_perf_event_read_helper() and access ctx->data directly using
bpf_cast_to_kern_ctx().
v1) https://lore.kernel.org/r/[email protected]
Thanks to bpf_cast_to_kern_ctx() kfunc, it can easily access the sample data
now. But the problem is that perf didn't populate the sample data at the time
it calls bpf_prog_run(). I changed the code to simply call perf_prepare_sample
function before calling the BPF program.
But it also checks if the BPF calls bpf_cast_to_kern_ctx() since calling
perf_prepare_sample() is unnecessary if the BPF doesn't access to the sample.
The perf_prepare_sample() was only called right before putting it to the perf
ring buffer. I think I can add a little optimization not to fill already set
fields as it can be called twice now. It can be a separate patch for perf.
Another issue is that perf sample data only has selected fields according to
the sample_type flags in the perf_event_attr. Accessing other fields can
result in uninitialized read. I'm not sure how much it's gonna be a problem
but it seems there's no way to prevent it completely. So properly written
programs should check the sample_type flags first when reading the sample data.
The code is available at 'bpf/perf-sample-v2' branch in
git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
Thanks,
Namhyung
Namhyung Kim (2):
bpf/perf: Call perf_prepare_sample() before bpf_prog_run()
selftests/bpf: Add perf_event_read_sample test cases
include/linux/bpf.h | 1 +
kernel/bpf/verifier.c | 1 +
kernel/events/core.c | 3 +
.../selftests/bpf/prog_tests/perf_sample.c | 167 ++++++++++++++++++
.../selftests/bpf/progs/test_perf_sample.c | 33 ++++
5 files changed, 205 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/perf_sample.c
create mode 100644 tools/testing/selftests/bpf/progs/test_perf_sample.c
--
2.39.0.314.g84b9a713c41-goog
It checks the perf event sample access with bpf_cast_to_kern_ctx().
It should access sample data only event->attr.sample_type allows.
Other fields might not be initialized.
$ ./vmtest.sh ./test_progs -t perf_event_read_sample
...
#135/1 perf_event_read_sample/perf_event_read_sample_ok:OK
#135/2 perf_event_read_sample/perf_event_read_sample_invalid:OK
#135 perf_event_read_sample:OK
Signed-off-by: Namhyung Kim <[email protected]>
---
.../selftests/bpf/prog_tests/perf_sample.c | 167 ++++++++++++++++++
.../selftests/bpf/progs/test_perf_sample.c | 33 ++++
2 files changed, 200 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/perf_sample.c
create mode 100644 tools/testing/selftests/bpf/progs/test_perf_sample.c
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_sample.c b/tools/testing/selftests/bpf/prog_tests/perf_sample.c
new file mode 100644
index 000000000000..dc1e88711e23
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_sample.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+#include <test_progs.h>
+#include "test_perf_sample.skel.h"
+
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+/* treat user-stack data as invalid (for testing only) */
+#define PERF_SAMPLE_INVALID PERF_SAMPLE_STACK_USER
+
+#define PERF_MMAP_SIZE 8192
+#define DATA_MMAP_SIZE 4096
+
+static int perf_fd = -1;
+static void *perf_ringbuf;
+static struct test_perf_sample *skel;
+
+static int open_perf_event(u64 sample_flags)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_PAGE_FAULTS,
+ .sample_type = sample_flags,
+ .sample_period = 1,
+ .disabled = 1,
+ .size = sizeof(attr),
+ };
+ int fd;
+ void *ptr;
+
+ fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
+ if (!ASSERT_GT(fd, 0, "perf_event_open"))
+ return -1;
+
+ ptr = mmap(NULL, PERF_MMAP_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (!ASSERT_NEQ(ptr, MAP_FAILED, "mmap")) {
+ close(fd);
+ return -1;
+ }
+
+ perf_fd = fd;
+ perf_ringbuf = ptr;
+
+ return 0;
+}
+
+static void close_perf_event(void)
+{
+ if (perf_fd == -1)
+ return;
+
+ munmap(perf_ringbuf, PERF_MMAP_SIZE);
+ close(perf_fd);
+
+ perf_fd = -1;
+ perf_ringbuf = NULL;
+}
+
+static noinline void *trigger_perf_event(void)
+{
+ int *buf = mmap(NULL, DATA_MMAP_SIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+
+ if (!ASSERT_NEQ(buf, MAP_FAILED, "mmap"))
+ return NULL;
+
+ ioctl(perf_fd, PERF_EVENT_IOC_ENABLE);
+
+ /* it should generate a page fault which triggers the perf_event */
+ *buf = 1;
+
+ ioctl(perf_fd, PERF_EVENT_IOC_DISABLE);
+
+ munmap(buf, DATA_MMAP_SIZE);
+
+ /* return the map address to check the sample addr */
+ return buf;
+}
+
+/* check if the perf ringbuf has a sample data */
+static int check_perf_event(void)
+{
+ struct perf_event_mmap_page *page = perf_ringbuf;
+ struct perf_event_header *hdr;
+
+ if (page->data_head == page->data_tail)
+ return 0;
+
+ hdr = perf_ringbuf + page->data_offset;
+
+ if (hdr->type != PERF_RECORD_SAMPLE)
+ return 0;
+
+ return 1;
+}
+
+static void setup_perf_sample_bpf_skel(void)
+{
+ struct bpf_link *link;
+
+ skel = test_perf_sample__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_perf_sample_open_and_load"))
+ return;
+
+ link = bpf_program__attach_perf_event(skel->progs.perf_sample_filter, perf_fd);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_perf_event"))
+ return;
+}
+
+static void clean_perf_sample_bpf_skel(void)
+{
+ test_perf_sample__detach(skel);
+ test_perf_sample__destroy(skel);
+}
+
+static void test_perf_event_read_sample_ok(void)
+{
+ u64 flags = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR;
+ uintptr_t map_addr;
+
+ if (open_perf_event(flags) < 0)
+ return;
+ setup_perf_sample_bpf_skel();
+ map_addr = (uintptr_t)trigger_perf_event();
+
+ ASSERT_EQ(check_perf_event(), 1, "number of sample");
+ ASSERT_EQ(skel->bss->sample_addr, map_addr, "sample addr");
+ ASSERT_EQ((int)skel->bss->sample_pid, getpid(), "sample pid");
+ /* just assume the IP in (trigger_perf_event, +4096) */
+ ASSERT_GT(skel->bss->sample_ip, (uintptr_t)trigger_perf_event, "sample ip");
+ ASSERT_LT(skel->bss->sample_ip, (uintptr_t)trigger_perf_event + 4096, "sample ip");
+
+ clean_perf_sample_bpf_skel();
+ close_perf_event();
+}
+
+static void test_perf_event_read_sample_invalid(void)
+{
+ u64 flags = PERF_SAMPLE_INVALID;
+
+ if (open_perf_event(flags) < 0)
+ return;
+ setup_perf_sample_bpf_skel();
+ trigger_perf_event();
+
+ ASSERT_EQ(check_perf_event(), 0, "number of sample");
+
+ clean_perf_sample_bpf_skel();
+ close_perf_event();
+}
+
+void test_perf_event_read_sample(void)
+{
+ if (test__start_subtest("perf_event_read_sample_ok"))
+ test_perf_event_read_sample_ok();
+ if (test__start_subtest("perf_event_read_sample_invalid"))
+ test_perf_event_read_sample_invalid();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_perf_sample.c b/tools/testing/selftests/bpf/progs/test_perf_sample.c
new file mode 100644
index 000000000000..b1f498d447b9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_sample.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2022 Google
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+unsigned long long sample_ip;
+unsigned long long sample_pid;
+unsigned long long sample_addr;
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+
+#define SAMPLE_FLAGS (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR)
+
+SEC("perf_event")
+int perf_sample_filter(void *ctx)
+{
+ struct bpf_perf_event_data_kern *kctx;
+
+ kctx = bpf_cast_to_kern_ctx(ctx);
+
+ if ((kctx->event->attr.sample_type & SAMPLE_FLAGS) != SAMPLE_FLAGS)
+ return 0;
+
+ sample_ip = kctx->data->ip;
+ sample_pid = kctx->data->tid_entry.pid;
+ sample_addr = kctx->data->addr;
+
+ /* generate sample data */
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
--
2.39.0.314.g84b9a713c41-goog