hi,
we have a use cases for bpf programs to use binary file's build id.
After some attempts to add helpers/kfuncs [1] [2] Andrii had an idea [3]
to store build id directly in the file object. That would solve our use
case and might be beneficial for other profiling/tracing use cases with
bpf programs.
This RFC patchset adds new config CONFIG_FILE_BUILD_ID option, which adds
build id object pointer to the file object when enabled. The build id is
read/populated when the file is mmap-ed.
I also added bpf and perf changes that would benefit from this.
I'm not sure what's the policy on adding stuff to file object, so apologies
if that's out of line. I'm open to any feedback or suggestions if there's
better place or way to do this.
thanks,
jirka
[1] https://lore.kernel.org/bpf/[email protected]/
[2] https://lore.kernel.org/bpf/[email protected]/
[3] https://lore.kernel.org/bpf/CAEf4BzaZCUoxN_X2ALXwQeFTCwtL17R4P_B_-hUCcidfyO2xyQ@mail.gmail.com/
---
Jiri Olsa (5):
mm: Store build id in file object
bpf: Use file object build id in stackmap
perf: Use file object build id in perf_event_mmap_event
selftests/bpf: Add file_build_id test
selftests/bpf: Add iter_task_vma_buildid test
fs/file_table.c | 3 +++
include/linux/buildid.h | 17 +++++++++++++++++
include/linux/fs.h | 3 +++
kernel/bpf/stackmap.c | 8 ++++++++
kernel/events/core.c | 43 +++++++++++++++++++++++++++++++++++++++----
lib/buildid.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
mm/Kconfig | 7 +++++++
mm/mmap.c | 15 +++++++++++++++
tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tools/testing/selftests/bpf/prog_tests/file_build_id.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
tools/testing/selftests/bpf/progs/file_build_id.c | 34 ++++++++++++++++++++++++++++++++++
tools/testing/selftests/bpf/trace_helpers.c | 35 +++++++++++++++++++++++++++++++++++
tools/testing/selftests/bpf/trace_helpers.h | 1 +
14 files changed, 413 insertions(+), 4 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/file_build_id.c
create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
create mode 100644 tools/testing/selftests/bpf/progs/file_build_id.c
Storing build id in file object for elf executable with build
id defined. The build id is stored when file is mmaped.
The build id object assignment to the file is locked with existing
file->f_mapping semaphore.
It's hidden behind new config option CONFIG_FILE_BUILD_ID.
Signed-off-by: Jiri Olsa <[email protected]>
---
fs/file_table.c | 3 +++
include/linux/buildid.h | 17 ++++++++++++++++
include/linux/fs.h | 3 +++
lib/buildid.c | 44 +++++++++++++++++++++++++++++++++++++++++
mm/Kconfig | 7 +++++++
mm/mmap.c | 15 ++++++++++++++
6 files changed, 89 insertions(+)
diff --git a/fs/file_table.c b/fs/file_table.c
index dd88701e54a9..d1c814cdb623 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -28,6 +28,7 @@
#include <linux/ima.h>
#include <linux/swap.h>
#include <linux/kmemleak.h>
+#include <linux/buildid.h>
#include <linux/atomic.h>
@@ -47,6 +48,7 @@ static void file_free_rcu(struct rcu_head *head)
{
struct file *f = container_of(head, struct file, f_rcuhead);
+ file_build_id_free(f);
put_cred(f->f_cred);
kmem_cache_free(filp_cachep, f);
}
@@ -412,6 +414,7 @@ void __init files_init(void)
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
+ build_id_init();
}
/*
diff --git a/include/linux/buildid.h b/include/linux/buildid.h
index 3b7a0ff4642f..7c818085ad2c 100644
--- a/include/linux/buildid.h
+++ b/include/linux/buildid.h
@@ -3,9 +3,15 @@
#define _LINUX_BUILDID_H
#include <linux/mm_types.h>
+#include <linux/slab.h>
#define BUILD_ID_SIZE_MAX 20
+struct build_id {
+ u32 sz;
+ char data[BUILD_ID_SIZE_MAX];
+};
+
int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
__u32 *size);
int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size);
@@ -17,4 +23,15 @@ void init_vmlinux_build_id(void);
static inline void init_vmlinux_build_id(void) { }
#endif
+#ifdef CONFIG_FILE_BUILD_ID
+void __init build_id_init(void);
+void build_id_free(struct build_id *bid);
+int vma_get_build_id(struct vm_area_struct *vma, struct build_id **bidp);
+void file_build_id_free(struct file *f);
+#else
+static inline void __init build_id_init(void) { }
+static inline void build_id_free(struct build_id *bid) { }
+static inline void file_build_id_free(struct file *f) { }
+#endif /* CONFIG_FILE_BUILD_ID */
+
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c1769a2c5d70..9ad5e5fbf680 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -975,6 +975,9 @@ struct file {
struct address_space *f_mapping;
errseq_t f_wb_err;
errseq_t f_sb_err; /* for syncfs */
+#ifdef CONFIG_FILE_BUILD_ID
+ struct build_id *f_bid;
+#endif
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
diff --git a/lib/buildid.c b/lib/buildid.c
index dfc62625cae4..7f6c3ca7b257 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -5,6 +5,7 @@
#include <linux/elf.h>
#include <linux/kernel.h>
#include <linux/pagemap.h>
+#include <linux/slab.h>
#define BUILD_ID 3
@@ -189,3 +190,46 @@ void __init init_vmlinux_build_id(void)
build_id_parse_buf(&__start_notes, vmlinux_build_id, size);
}
#endif
+
+#ifdef CONFIG_FILE_BUILD_ID
+
+/* SLAB cache for build_id structures */
+static struct kmem_cache *build_id_cachep;
+
+int vma_get_build_id(struct vm_area_struct *vma, struct build_id **bidp)
+{
+ struct build_id *bid;
+ int err;
+
+ bid = kmem_cache_alloc(build_id_cachep, GFP_KERNEL);
+ if (!bid)
+ return -ENOMEM;
+ err = build_id_parse(vma, bid->data, &bid->sz);
+ if (err) {
+ build_id_free(bid);
+ /* ignore parsing error */
+ return 0;
+ }
+ *bidp = bid;
+ return 0;
+}
+
+void file_build_id_free(struct file *f)
+{
+ build_id_free(f->f_bid);
+}
+
+void build_id_free(struct build_id *bid)
+{
+ if (!bid)
+ return;
+ kmem_cache_free(build_id_cachep, bid);
+}
+
+void __init build_id_init(void)
+{
+ build_id_cachep = kmem_cache_create("build_id", sizeof(struct build_id), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL);
+}
+
+#endif /* CONFIG_FILE_BUILD_ID */
diff --git a/mm/Kconfig b/mm/Kconfig
index ff7b209dec05..68911c3780c4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1183,6 +1183,13 @@ config LRU_GEN_STATS
This option has a per-memcg and per-node memory overhead.
# }
+config FILE_BUILD_ID
+ bool "Store build id in file object"
+ default n
+ help
+ Store build id in file object for elf executable with build id
+ defined. The build id is stored when file is mmaped.
+
source "mm/damon/Kconfig"
endmenu
diff --git a/mm/mmap.c b/mm/mmap.c
index 425a9349e610..a06f744206e3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2530,6 +2530,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
pgoff_t vm_pgoff;
int error;
MA_STATE(mas, &mm->mm_mt, addr, end - 1);
+ struct build_id *bid = NULL;
/* Check against address space limit. */
if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
@@ -2626,6 +2627,13 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
if (error)
goto unmap_and_free_vma;
+#ifdef CONFIG_FILE_BUILD_ID
+ if (vma->vm_flags & VM_EXEC && !file->f_bid) {
+ error = vma_get_build_id(vma, &bid);
+ if (error)
+ goto close_and_free_vma;
+ }
+#endif
/*
* Expansion is handled above, merging is handled below.
* Drivers should not alter the address of the VMA.
@@ -2699,6 +2707,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
if (vma->vm_flags & VM_SHARED)
mapping_allow_writable(vma->vm_file->f_mapping);
+#ifdef CONFIG_FILE_BUILD_ID
+ if (bid && !file->f_bid)
+ file->f_bid = bid;
+ else
+ build_id_free(bid);
+#endif
flush_dcache_mmap_lock(vma->vm_file->f_mapping);
vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap);
flush_dcache_mmap_unlock(vma->vm_file->f_mapping);
@@ -2759,6 +2773,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
mapping_unmap_writable(file->f_mapping);
free_vma:
vm_area_free(vma);
+ build_id_free(bid);
unacct_error:
if (charged)
vm_unacct_memory(charged);
--
2.39.1
Use build id from file object in stackmap if it's available.
Signed-off-by: Jiri Olsa <[email protected]>
---
kernel/bpf/stackmap.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index aecea7451b61..944cb260a42c 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -156,7 +156,15 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
goto build_id_valid;
}
vma = find_vma(current->mm, ips[i]);
+#ifdef CONFIG_FILE_BUILD_ID
+ if (vma && vma->vm_file && vma->vm_file->f_bid) {
+ memcpy(id_offs[i].build_id,
+ vma->vm_file->f_bid->data,
+ vma->vm_file->f_bid->sz);
+ } else {
+#else
if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
+#endif
/* per entry fall back to ips */
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
--
2.39.1
Use build id from file object when available for perf's MMAP2
event build id data.
Signed-off-by: Jiri Olsa <[email protected]>
---
kernel/events/core.c | 43 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 39 insertions(+), 4 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d56328e5080e..44001fc7edb7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8508,6 +8508,9 @@ struct perf_mmap_event {
u32 prot, flags;
u8 build_id[BUILD_ID_SIZE_MAX];
u32 build_id_size;
+#ifdef CONFIG_FILE_BUILD_ID
+ struct build_id *f_bid;
+#endif
struct {
struct perf_event_header header;
@@ -8520,6 +8523,38 @@ struct perf_mmap_event {
} event_id;
};
+#ifdef CONFIG_FILE_BUILD_ID
+static void build_id_read(struct perf_mmap_event *mmap_event)
+{
+ struct vm_area_struct *vma = mmap_event->vma;
+
+ mmap_event->f_bid = vma->vm_file ? vma->vm_file->f_bid : NULL;
+}
+
+static bool has_build_id(struct perf_mmap_event *mmap_event)
+{
+ return mmap_event->f_bid;
+}
+
+#define build_id_data mmap_event->f_bid->data
+#define build_id_size mmap_event->f_bid->sz
+#else
+static void build_id_read(struct perf_mmap_event *mmap_event)
+{
+ struct vm_area_struct *vma = mmap_event->vma;
+
+ build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size);
+}
+
+static bool has_build_id(struct perf_mmap_event *mmap_event)
+{
+ return mmap_event->build_id_size;
+}
+
+#define build_id_data mmap_event->build_id
+#define build_id_size mmap_event->build_id_size
+#endif
+
static int perf_event_mmap_match(struct perf_event *event,
void *data)
{
@@ -8564,7 +8599,7 @@ static void perf_event_mmap_output(struct perf_event *event,
mmap_event->event_id.pid = perf_event_pid(event, current);
mmap_event->event_id.tid = perf_event_tid(event, current);
- use_build_id = event->attr.build_id && mmap_event->build_id_size;
+ use_build_id = event->attr.build_id && has_build_id(mmap_event);
if (event->attr.mmap2 && use_build_id)
mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID;
@@ -8573,10 +8608,10 @@ static void perf_event_mmap_output(struct perf_event *event,
if (event->attr.mmap2) {
if (use_build_id) {
- u8 size[4] = { (u8) mmap_event->build_id_size, 0, 0, 0 };
+ u8 size[4] = { (u8) build_id_size, 0, 0, 0 };
__output_copy(&handle, size, 4);
- __output_copy(&handle, mmap_event->build_id, BUILD_ID_SIZE_MAX);
+ __output_copy(&handle, build_id_data, BUILD_ID_SIZE_MAX);
} else {
perf_output_put(&handle, mmap_event->maj);
perf_output_put(&handle, mmap_event->min);
@@ -8708,7 +8743,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
if (atomic_read(&nr_build_id_events))
- build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size);
+ build_id_read(mmap_event);
perf_iterate_sb(perf_event_mmap_output,
mmap_event,
--
2.39.1
The test attaches bpf program to sched_process_exec tracepoint
and gets build of executed file from bprm->file object.
Signed-off-by: Jiri Olsa <[email protected]>
---
.../selftests/bpf/prog_tests/file_build_id.c | 70 +++++++++++++++++++
.../selftests/bpf/progs/file_build_id.c | 34 +++++++++
tools/testing/selftests/bpf/trace_helpers.c | 35 ++++++++++
tools/testing/selftests/bpf/trace_helpers.h | 1 +
4 files changed, 140 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/file_build_id.c
create mode 100644 tools/testing/selftests/bpf/progs/file_build_id.c
diff --git a/tools/testing/selftests/bpf/prog_tests/file_build_id.c b/tools/testing/selftests/bpf/prog_tests/file_build_id.c
new file mode 100644
index 000000000000..a7b6307cc0f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/file_build_id.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <unistd.h>
+#include <test_progs.h>
+#include "file_build_id.skel.h"
+#include "trace_helpers.h"
+
+#define BUILDID_STR_SIZE (BPF_BUILD_ID_SIZE*2 + 1)
+
+void test_file_build_id(void)
+{
+ int go[2], err, child_pid, child_status, c = 1, i;
+ char bpf_build_id[BUILDID_STR_SIZE] = {};
+ struct file_build_id *skel;
+ char *bid = NULL;
+
+ skel = file_build_id__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "file_build_id__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(pipe(go), "pipe"))
+ goto out;
+
+ child_pid = fork();
+ if (child_pid < 0)
+ goto out;
+
+ /* child */
+ if (child_pid == 0) {
+ /* wait for parent's pid update */
+ err = read(go[0], &c, 1);
+ if (!ASSERT_EQ(err, 1, "child_read_pipe"))
+ exit(err);
+
+ execle("/bin/bash", "bash", "-c", "exit 0", NULL, NULL);
+ exit(errno);
+ }
+
+ /* parent, update child's pid and kick it */
+ skel->bss->pid = child_pid;
+
+ err = file_build_id__attach(skel);
+ if (!ASSERT_OK(err, "file_build_id__attach"))
+ goto out;
+
+ err = write(go[1], &c, 1);
+ if (!ASSERT_EQ(err, 1, "child_write_pipe"))
+ goto out;
+
+ /* wait for child to exit */
+ waitpid(child_pid, &child_status, 0);
+ if (!ASSERT_EQ(WEXITSTATUS(child_status), 0, "child_exit_value"))
+ goto out;
+
+ if (!ASSERT_OK(read_buildid("/bin/bash", &bid), "read_buildid"))
+ goto out;
+
+ ASSERT_EQ(skel->bss->build_id_size, strlen(bid)/2, "build_id_size");
+
+ /* Convert bpf build id to string, so we can compare it later. */
+ for (i = 0; i < skel->bss->build_id_size; i++) {
+ sprintf(bpf_build_id + i*2, "%02x",
+ (unsigned char) skel->bss->build_id[i]);
+ }
+ ASSERT_STREQ(bpf_build_id, bid, "build_id_data");
+
+out:
+ file_build_id__destroy(skel);
+ free(bid);
+}
diff --git a/tools/testing/selftests/bpf/progs/file_build_id.c b/tools/testing/selftests/bpf/progs/file_build_id.c
new file mode 100644
index 000000000000..639a7217a927
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/file_build_id.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <linux/string.h>
+
+char _license[] SEC("license") = "GPL";
+
+int pid;
+u32 build_id_size;
+char build_id[20];
+
+SEC("tp_btf/sched_process_exec")
+int BPF_PROG(prog, struct task_struct *p, pid_t old_pid, struct linux_binprm *bprm)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct build_id *bid;
+
+ if (pid != cur_pid)
+ return 0;
+
+ if (!bprm->file || !bprm->file->f_bid)
+ return 0;
+
+ bid = bprm->file->f_bid;
+ build_id_size = bid->sz;
+
+ if (build_id_size > 20)
+ return 0;
+
+ memcpy(build_id, bid->data, 20);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 09a16a77bae4..f5557890e383 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -9,6 +9,7 @@
#include <poll.h>
#include <unistd.h>
#include <linux/perf_event.h>
+#include <linux/limits.h>
#include <sys/mman.h>
#include "trace_helpers.h"
@@ -230,3 +231,37 @@ ssize_t get_rel_offset(uintptr_t addr)
fclose(f);
return -EINVAL;
}
+
+int read_buildid(const char *path, char **build_id)
+{
+ char tmp[] = "/tmp/dataXXXXXX";
+ char buf[PATH_MAX + 200];
+ int err, fd;
+ FILE *f;
+
+ fd = mkstemp(tmp);
+ if (fd == -1)
+ return -1;
+ close(fd);
+
+ snprintf(buf, sizeof(buf),
+ "readelf -n %s 2>/dev/null | grep 'Build ID' | awk '{print $3}' > %s",
+ path, tmp);
+
+ err = system(buf);
+ if (err)
+ goto out;
+
+ f = fopen(tmp, "r");
+ if (f) {
+ if (fscanf(f, "%ms$*\n", build_id) != 1) {
+ *build_id = NULL;
+ err = -1;
+ }
+ fclose(f);
+ }
+
+out:
+ unlink(tmp);
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 53efde0e2998..1a38c808b6c2 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -23,4 +23,5 @@ void read_trace_pipe(void);
ssize_t get_uprobe_offset(const void *addr);
ssize_t get_rel_offset(uintptr_t addr);
+int read_buildid(const char *path, char **build_id);
#endif
--
2.39.1
Testing iterator access to build id in vma->vm_file object
by storing each binary with buildid into map and checking
it against buildid retrieved in user space.
Signed-off-by: Jiri Olsa <[email protected]>
---
.../selftests/bpf/prog_tests/bpf_iter.c | 88 +++++++++++++++++++
.../bpf/progs/bpf_iter_task_vma_buildid.c | 49 +++++++++++
2 files changed, 137 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 3af6450763e9..fd3217b68c2e 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -33,6 +33,7 @@
#include "bpf_iter_bpf_link.skel.h"
#include "bpf_iter_ksym.skel.h"
#include "bpf_iter_sockmap.skel.h"
+#include "bpf_iter_task_vma_buildid.skel.h"
static int duration;
@@ -1536,6 +1537,91 @@ static void test_task_vma_dead_task(void)
bpf_iter_task_vma__destroy(skel);
}
+#define D_PATH_BUF_SIZE 1024
+#define BUILD_ID_SIZE_MAX 20
+
+struct build_id {
+ u32 sz;
+ char data[BUILD_ID_SIZE_MAX];
+};
+
+#define BUILDID_STR_SIZE (BPF_BUILD_ID_SIZE*2 + 1)
+
+static void test_task_vma_buildid(void)
+{
+ int err, iter_fd = -1, proc_maps_fd = -1;
+ struct bpf_iter_task_vma_buildid *skel;
+ char key[D_PATH_BUF_SIZE], *prev_key;
+ char bpf_build_id[BUILDID_STR_SIZE];
+ int len, files_fd, i, cnt = 0;
+ struct build_id val;
+ char *build_id;
+ char c;
+
+ skel = bpf_iter_task_vma_buildid__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma_buildid__open"))
+ return;
+
+ err = bpf_iter_task_vma_buildid__load(skel);
+ if (!ASSERT_OK(err, "bpf_iter_task_vma_buildid__load"))
+ goto out;
+
+ skel->links.proc_maps = bpf_program__attach_iter(
+ skel->progs.proc_maps, NULL);
+
+ if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
+ skel->links.proc_maps = NULL;
+ goto out;
+ }
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto out;
+
+ /* trigger the iterator, there's no output, just map */
+ len = read(iter_fd, &c, 1);
+ ASSERT_EQ(len, 0, "len_check");
+
+ files_fd = bpf_map__fd(skel->maps.files);
+
+ prev_key = NULL;
+
+ while (true) {
+ err = bpf_map_get_next_key(files_fd, prev_key, &key);
+ if (err) {
+ if (errno == ENOENT)
+ err = 0;
+ break;
+ }
+ if (bpf_map_lookup_elem(files_fd, key, &val))
+ break;
+ if (!ASSERT_LE(val.sz, BUILD_ID_SIZE_MAX, "buildid_size"))
+ break;
+
+ memset(bpf_build_id, 0x0, sizeof(bpf_build_id));
+ for (i = 0; i < val.sz; i++) {
+ sprintf(bpf_build_id + i*2, "%02x",
+ (unsigned char) val.data[i]);
+ }
+
+ if (!ASSERT_OK(read_buildid(key, &build_id), "read_buildid"))
+ break;
+
+ printf("BUILDID %s %s %s\n", bpf_build_id, build_id, key);
+ ASSERT_OK(strncmp(bpf_build_id, build_id, strlen(bpf_build_id)), "buildid_cmp");
+
+ free(build_id);
+ prev_key = key;
+ cnt++;
+ }
+
+ printf("checked %d files\n", cnt);
+out:
+ close(proc_maps_fd);
+ close(iter_fd);
+ bpf_iter_task_vma_buildid__destroy(skel);
+}
+
void test_bpf_sockmap_map_iter_fd(void)
{
struct bpf_iter_sockmap *skel;
@@ -1659,6 +1745,8 @@ void test_bpf_iter(void)
test_task_vma();
if (test__start_subtest("task_vma_dead_task"))
test_task_vma_dead_task();
+ if (test__start_subtest("task_vma_buildid"))
+ test_task_vma_buildid();
if (test__start_subtest("task_btf"))
test_task_btf();
if (test__start_subtest("tcp4"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
new file mode 100644
index 000000000000..25e2179ae5f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <string.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define VM_EXEC 0x00000004
+#define D_PATH_BUF_SIZE 1024
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 10000);
+ __type(key, char[D_PATH_BUF_SIZE]);
+ __type(value, struct build_id);
+} files SEC(".maps");
+
+static char tmp_key[D_PATH_BUF_SIZE];
+static struct build_id tmp_data;
+
+SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
+{
+ struct vm_area_struct *vma = ctx->vma;
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ unsigned long file_key;
+ struct file *file;
+
+ if (task == (void *)0 || vma == (void *)0)
+ return 0;
+
+ if (!(vma->vm_flags & VM_EXEC))
+ return 0;
+
+ file = vma->vm_file;
+ if (!file)
+ return 0;
+
+ memset(tmp_key, 0x0, D_PATH_BUF_SIZE);
+ bpf_d_path(&file->f_path, (char *) &tmp_key, D_PATH_BUF_SIZE);
+
+ if (bpf_map_lookup_elem(&files, &tmp_key))
+ return 0;
+
+ memcpy(&tmp_data, file->f_bid, sizeof(*file->f_bid));
+ bpf_map_update_elem(&files, &tmp_key, &tmp_data, 0);
+ return 0;
+}
--
2.39.1
On Wed, Feb 1, 2023 at 5:57 AM Jiri Olsa <[email protected]> wrote:
>
> hi,
> we have a use cases for bpf programs to use binary file's build id.
>
> After some attempts to add helpers/kfuncs [1] [2] Andrii had an idea [3]
> to store build id directly in the file object. That would solve our use
> case and might be beneficial for other profiling/tracing use cases with
> bpf programs.
>
> This RFC patchset adds new config CONFIG_FILE_BUILD_ID option, which adds
> build id object pointer to the file object when enabled. The build id is
> read/populated when the file is mmap-ed.
>
> I also added bpf and perf changes that would benefit from this.
>
> I'm not sure what's the policy on adding stuff to file object, so apologies
> if that's out of line. I'm open to any feedback or suggestions if there's
> better place or way to do this.
struct file represents all files while build_id is for executables only,
and not all executables, but those currently running, so
I think it's cleaner to put it into vm_area_struct.
On Thu, Feb 02, 2023 at 03:15:39AM -0800, Alexei Starovoitov wrote:
> On Wed, Feb 1, 2023 at 5:57 AM Jiri Olsa <[email protected]> wrote:
> >
> > hi,
> > we have a use cases for bpf programs to use binary file's build id.
> >
> > After some attempts to add helpers/kfuncs [1] [2] Andrii had an idea [3]
> > to store build id directly in the file object. That would solve our use
> > case and might be beneficial for other profiling/tracing use cases with
> > bpf programs.
> >
> > This RFC patchset adds new config CONFIG_FILE_BUILD_ID option, which adds
> > build id object pointer to the file object when enabled. The build id is
> > read/populated when the file is mmap-ed.
> >
> > I also added bpf and perf changes that would benefit from this.
> >
> > I'm not sure what's the policy on adding stuff to file object, so apologies
> > if that's out of line. I'm open to any feedback or suggestions if there's
> > better place or way to do this.
>
> struct file represents all files while build_id is for executables only,
> and not all executables, but those currently running, so
> I think it's cleaner to put it into vm_area_struct.
I thought file objects would be shared to some extend and we might save
some memory keeping the build id objects there, but not sure it's really
the case now.. will check, using vma might be also easier
jirka
On Wed, Feb 01, 2023 at 02:57:32PM +0100, Jiri Olsa wrote:
> hi,
> we have a use cases for bpf programs to use binary file's build id.
What is your use case? Is it some hobbyist thing or is it something
that distro kernels are all going to enable?
On Thu, Feb 02, 2023 at 03:10:30PM +0000, Matthew Wilcox wrote:
> On Wed, Feb 01, 2023 at 02:57:32PM +0100, Jiri Olsa wrote:
> > hi,
> > we have a use cases for bpf programs to use binary file's build id.
>
> What is your use case? Is it some hobbyist thing or is it something
> that distro kernels are all going to enable?
>
our use case is for hubble/tetragon [1] and we are asked to report
buildid of executed binary.. but the monitoring process is running
in its own pod and can't access the the binaries outside of it, so
we need to be able to read it in kernel
I understand Hao Luo has also use case for that [2]
jirka
[1] https://github.com/cilium/tetragon/
[2] https://lore.kernel.org/bpf/CA+khW7gAYHmoUkq0UqTiZjdOqARLG256USj3uFwi6z_FyZf31w@mail.gmail.com/
On Thu, Feb 02, 2023 at 03:15:39AM -0800, Alexei Starovoitov wrote:
> On Wed, Feb 1, 2023 at 5:57 AM Jiri Olsa <[email protected]> wrote:
> >
> > hi,
> > we have a use cases for bpf programs to use binary file's build id.
> >
> > After some attempts to add helpers/kfuncs [1] [2] Andrii had an idea [3]
> > to store build id directly in the file object. That would solve our use
> > case and might be beneficial for other profiling/tracing use cases with
> > bpf programs.
> >
> > This RFC patchset adds new config CONFIG_FILE_BUILD_ID option, which adds
> > build id object pointer to the file object when enabled. The build id is
> > read/populated when the file is mmap-ed.
> >
> > I also added bpf and perf changes that would benefit from this.
> >
> > I'm not sure what's the policy on adding stuff to file object, so apologies
> > if that's out of line. I'm open to any feedback or suggestions if there's
> > better place or way to do this.
>
> struct file represents all files while build_id is for executables only,
> and not all executables, but those currently running, so
> I think it's cleaner to put it into vm_area_struct.
There can be many vm_area_structs per file, and like for struct file,
there's vm_area_structs for non-executable ranges too.
Given there's only one buildid per file, struct file seems most
appropriate to me.
On Wed, Feb 1, 2023 at 5:57 AM Jiri Olsa <[email protected]> wrote:
>
> Storing build id in file object for elf executable with build
> id defined. The build id is stored when file is mmaped.
>
> The build id object assignment to the file is locked with existing
> file->f_mapping semaphore.
>
> It's hidden behind new config option CONFIG_FILE_BUILD_ID.
>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> fs/file_table.c | 3 +++
> include/linux/buildid.h | 17 ++++++++++++++++
> include/linux/fs.h | 3 +++
> lib/buildid.c | 44 +++++++++++++++++++++++++++++++++++++++++
> mm/Kconfig | 7 +++++++
> mm/mmap.c | 15 ++++++++++++++
> 6 files changed, 89 insertions(+)
>
> diff --git a/fs/file_table.c b/fs/file_table.c
> index dd88701e54a9..d1c814cdb623 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -28,6 +28,7 @@
> #include <linux/ima.h>
> #include <linux/swap.h>
> #include <linux/kmemleak.h>
> +#include <linux/buildid.h>
>
> #include <linux/atomic.h>
>
> @@ -47,6 +48,7 @@ static void file_free_rcu(struct rcu_head *head)
> {
> struct file *f = container_of(head, struct file, f_rcuhead);
>
> + file_build_id_free(f);
> put_cred(f->f_cred);
> kmem_cache_free(filp_cachep, f);
> }
> @@ -412,6 +414,7 @@ void __init files_init(void)
> filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
> SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL);
> percpu_counter_init(&nr_files, 0, GFP_KERNEL);
> + build_id_init();
> }
>
> /*
> diff --git a/include/linux/buildid.h b/include/linux/buildid.h
> index 3b7a0ff4642f..7c818085ad2c 100644
> --- a/include/linux/buildid.h
> +++ b/include/linux/buildid.h
> @@ -3,9 +3,15 @@
> #define _LINUX_BUILDID_H
>
> #include <linux/mm_types.h>
> +#include <linux/slab.h>
>
> #define BUILD_ID_SIZE_MAX 20
>
> +struct build_id {
> + u32 sz;
> + char data[BUILD_ID_SIZE_MAX];
don't know if 21 vs 24 matters for kmem_cache_create(), but we don't
need 4 bytes to store build_id size, given max size is 20, so maybe
use u8 for sz?
> +};
> +
> int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
> __u32 *size);
> int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size);
> @@ -17,4 +23,15 @@ void init_vmlinux_build_id(void);
> static inline void init_vmlinux_build_id(void) { }
> #endif
>
> +#ifdef CONFIG_FILE_BUILD_ID
> +void __init build_id_init(void);
> +void build_id_free(struct build_id *bid);
> +int vma_get_build_id(struct vm_area_struct *vma, struct build_id **bidp);
> +void file_build_id_free(struct file *f);
> +#else
> +static inline void __init build_id_init(void) { }
> +static inline void build_id_free(struct build_id *bid) { }
> +static inline void file_build_id_free(struct file *f) { }
> +#endif /* CONFIG_FILE_BUILD_ID */
> +
> #endif
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index c1769a2c5d70..9ad5e5fbf680 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -975,6 +975,9 @@ struct file {
> struct address_space *f_mapping;
> errseq_t f_wb_err;
> errseq_t f_sb_err; /* for syncfs */
> +#ifdef CONFIG_FILE_BUILD_ID
> + struct build_id *f_bid;
naming nit: anything wrong with f_buildid or f_build_id? all the
related APIs use fully spelled out "build_id"
> +#endif
> } __randomize_layout
> __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
>
> diff --git a/lib/buildid.c b/lib/buildid.c
> index dfc62625cae4..7f6c3ca7b257 100644
> --- a/lib/buildid.c
> +++ b/lib/buildid.c
> @@ -5,6 +5,7 @@
> #include <linux/elf.h>
> #include <linux/kernel.h>
> #include <linux/pagemap.h>
> +#include <linux/slab.h>
>
> #define BUILD_ID 3
>
> @@ -189,3 +190,46 @@ void __init init_vmlinux_build_id(void)
> build_id_parse_buf(&__start_notes, vmlinux_build_id, size);
> }
> #endif
> +
> +#ifdef CONFIG_FILE_BUILD_ID
> +
> +/* SLAB cache for build_id structures */
> +static struct kmem_cache *build_id_cachep;
> +
> +int vma_get_build_id(struct vm_area_struct *vma, struct build_id **bidp)
> +{
> + struct build_id *bid;
> + int err;
> +
> + bid = kmem_cache_alloc(build_id_cachep, GFP_KERNEL);
> + if (!bid)
> + return -ENOMEM;
> + err = build_id_parse(vma, bid->data, &bid->sz);
> + if (err) {
> + build_id_free(bid);
> + /* ignore parsing error */
> + return 0;
> + }
> + *bidp = bid;
> + return 0;
> +}
> +
> +void file_build_id_free(struct file *f)
> +{
> + build_id_free(f->f_bid);
> +}
> +
> +void build_id_free(struct build_id *bid)
> +{
> + if (!bid)
> + return;
> + kmem_cache_free(build_id_cachep, bid);
> +}
> +
> +void __init build_id_init(void)
> +{
> + build_id_cachep = kmem_cache_create("build_id", sizeof(struct build_id), 0,
> + SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL);
> +}
> +
> +#endif /* CONFIG_FILE_BUILD_ID */
> diff --git a/mm/Kconfig b/mm/Kconfig
> index ff7b209dec05..68911c3780c4 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -1183,6 +1183,13 @@ config LRU_GEN_STATS
> This option has a per-memcg and per-node memory overhead.
> # }
>
> +config FILE_BUILD_ID
> + bool "Store build id in file object"
> + default n
> + help
> + Store build id in file object for elf executable with build id
> + defined. The build id is stored when file is mmaped.
> +
> source "mm/damon/Kconfig"
>
> endmenu
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 425a9349e610..a06f744206e3 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2530,6 +2530,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> pgoff_t vm_pgoff;
> int error;
> MA_STATE(mas, &mm->mm_mt, addr, end - 1);
> + struct build_id *bid = NULL;
>
> /* Check against address space limit. */
> if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
> @@ -2626,6 +2627,13 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> if (error)
> goto unmap_and_free_vma;
>
> +#ifdef CONFIG_FILE_BUILD_ID
> + if (vma->vm_flags & VM_EXEC && !file->f_bid) {
> + error = vma_get_build_id(vma, &bid);
> + if (error)
> + goto close_and_free_vma;
do we want to fail mmap_region() if we get -ENOMEM from
vma_get_build_id()? can't we just store ERR_PTR(error) in f_bid field?
So we'll have f_bid == NULL for non-exec files, ERR_PTR() for when we
tried and failed to get build ID, and a valid pointer if we succeeded?
> + }
> +#endif
> /*
> * Expansion is handled above, merging is handled below.
> * Drivers should not alter the address of the VMA.
> @@ -2699,6 +2707,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> if (vma->vm_flags & VM_SHARED)
> mapping_allow_writable(vma->vm_file->f_mapping);
>
> +#ifdef CONFIG_FILE_BUILD_ID
> + if (bid && !file->f_bid)
> + file->f_bid = bid;
> + else
> + build_id_free(bid);
> +#endif
> flush_dcache_mmap_lock(vma->vm_file->f_mapping);
> vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap);
> flush_dcache_mmap_unlock(vma->vm_file->f_mapping);
> @@ -2759,6 +2773,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> mapping_unmap_writable(file->f_mapping);
> free_vma:
> vm_area_free(vma);
> + build_id_free(bid);
> unacct_error:
> if (charged)
> vm_unacct_memory(charged);
> --
> 2.39.1
>
On Wed, Feb 1, 2023 at 5:58 AM Jiri Olsa <[email protected]> wrote:
>
> The test attaches bpf program to sched_process_exec tracepoint
> and gets build of executed file from bprm->file object.
>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> .../selftests/bpf/prog_tests/file_build_id.c | 70 +++++++++++++++++++
> .../selftests/bpf/progs/file_build_id.c | 34 +++++++++
> tools/testing/selftests/bpf/trace_helpers.c | 35 ++++++++++
> tools/testing/selftests/bpf/trace_helpers.h | 1 +
> 4 files changed, 140 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/file_build_id.c
> create mode 100644 tools/testing/selftests/bpf/progs/file_build_id.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/file_build_id.c b/tools/testing/selftests/bpf/prog_tests/file_build_id.c
> new file mode 100644
> index 000000000000..a7b6307cc0f7
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/file_build_id.c
> @@ -0,0 +1,70 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <unistd.h>
> +#include <test_progs.h>
> +#include "file_build_id.skel.h"
> +#include "trace_helpers.h"
> +
> +#define BUILDID_STR_SIZE (BPF_BUILD_ID_SIZE*2 + 1)
> +
> +void test_file_build_id(void)
> +{
> + int go[2], err, child_pid, child_status, c = 1, i;
> + char bpf_build_id[BUILDID_STR_SIZE] = {};
> + struct file_build_id *skel;
> + char *bid = NULL;
> +
> + skel = file_build_id__open_and_load();
> + if (!ASSERT_OK_PTR(skel, "file_build_id__open_and_load"))
> + return;
> +
> + if (!ASSERT_OK(pipe(go), "pipe"))
> + goto out;
> +
> + child_pid = fork();
> + if (child_pid < 0)
> + goto out;
> +
> + /* child */
> + if (child_pid == 0) {
> + /* wait for parent's pid update */
> + err = read(go[0], &c, 1);
> + if (!ASSERT_EQ(err, 1, "child_read_pipe"))
> + exit(err);
> +
> + execle("/bin/bash", "bash", "-c", "exit 0", NULL, NULL);
> + exit(errno);
> + }
> +
> + /* parent, update child's pid and kick it */
> + skel->bss->pid = child_pid;
> +
> + err = file_build_id__attach(skel);
> + if (!ASSERT_OK(err, "file_build_id__attach"))
> + goto out;
> +
> + err = write(go[1], &c, 1);
> + if (!ASSERT_EQ(err, 1, "child_write_pipe"))
> + goto out;
> +
> + /* wait for child to exit */
> + waitpid(child_pid, &child_status, 0);
> + if (!ASSERT_EQ(WEXITSTATUS(child_status), 0, "child_exit_value"))
> + goto out;
> +
> + if (!ASSERT_OK(read_buildid("/bin/bash", &bid), "read_buildid"))
can we use urandom_read for build_id ? And it would also be nice to
check that build id fetching works for liburandom_read.so as well.
> + goto out;
> +
> + ASSERT_EQ(skel->bss->build_id_size, strlen(bid)/2, "build_id_size");
> +
> + /* Convert bpf build id to string, so we can compare it later. */
> + for (i = 0; i < skel->bss->build_id_size; i++) {
> + sprintf(bpf_build_id + i*2, "%02x",
> + (unsigned char) skel->bss->build_id[i]);
> + }
> + ASSERT_STREQ(bpf_build_id, bid, "build_id_data");
> +
> +out:
> + file_build_id__destroy(skel);
> + free(bid);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/file_build_id.c b/tools/testing/selftests/bpf/progs/file_build_id.c
> new file mode 100644
> index 000000000000..639a7217a927
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/file_build_id.c
> @@ -0,0 +1,34 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +#include <linux/string.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +int pid;
> +u32 build_id_size;
> +char build_id[20];
> +
> +SEC("tp_btf/sched_process_exec")
> +int BPF_PROG(prog, struct task_struct *p, pid_t old_pid, struct linux_binprm *bprm)
> +{
> + int cur_pid = bpf_get_current_pid_tgid() >> 32;
> + struct build_id *bid;
> +
> + if (pid != cur_pid)
> + return 0;
> +
> + if (!bprm->file || !bprm->file->f_bid)
> + return 0;
> +
> + bid = bprm->file->f_bid;
> + build_id_size = bid->sz;
> +
> + if (build_id_size > 20)
> + return 0;
> +
> + memcpy(build_id, bid->data, 20);
> + return 0;
> +}
> diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
> index 09a16a77bae4..f5557890e383 100644
> --- a/tools/testing/selftests/bpf/trace_helpers.c
> +++ b/tools/testing/selftests/bpf/trace_helpers.c
> @@ -9,6 +9,7 @@
> #include <poll.h>
> #include <unistd.h>
> #include <linux/perf_event.h>
> +#include <linux/limits.h>
> #include <sys/mman.h>
> #include "trace_helpers.h"
>
> @@ -230,3 +231,37 @@ ssize_t get_rel_offset(uintptr_t addr)
> fclose(f);
> return -EINVAL;
> }
> +
> +int read_buildid(const char *path, char **build_id)
> +{
> + char tmp[] = "/tmp/dataXXXXXX";
> + char buf[PATH_MAX + 200];
> + int err, fd;
> + FILE *f;
> +
> + fd = mkstemp(tmp);
> + if (fd == -1)
> + return -1;
> + close(fd);
> +
> + snprintf(buf, sizeof(buf),
> + "readelf -n %s 2>/dev/null | grep 'Build ID' | awk '{print $3}' > %s",
> + path, tmp);
> +
shelling out to readelf for this is unfortunate... maybe let's write a
libelf-based helper to fetch build ID from .note section?
> + err = system(buf);
> + if (err)
> + goto out;
> +
> + f = fopen(tmp, "r");
> + if (f) {
> + if (fscanf(f, "%ms$*\n", build_id) != 1) {
> + *build_id = NULL;
> + err = -1;
> + }
> + fclose(f);
> + }
> +
> +out:
> + unlink(tmp);
> + return err;
> +}
> diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
> index 53efde0e2998..1a38c808b6c2 100644
> --- a/tools/testing/selftests/bpf/trace_helpers.h
> +++ b/tools/testing/selftests/bpf/trace_helpers.h
> @@ -23,4 +23,5 @@ void read_trace_pipe(void);
> ssize_t get_uprobe_offset(const void *addr);
> ssize_t get_rel_offset(uintptr_t addr);
>
> +int read_buildid(const char *path, char **build_id);
> #endif
> --
> 2.39.1
>
On Wed, Feb 1, 2023 at 5:58 AM Jiri Olsa <[email protected]> wrote:
>
> Testing iterator access to build id in vma->vm_file object
> by storing each binary with buildid into map and checking
> it against buildid retrieved in user space.
>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> .../selftests/bpf/prog_tests/bpf_iter.c | 88 +++++++++++++++++++
> .../bpf/progs/bpf_iter_task_vma_buildid.c | 49 +++++++++++
> 2 files changed, 137 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 3af6450763e9..fd3217b68c2e 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -33,6 +33,7 @@
> #include "bpf_iter_bpf_link.skel.h"
> #include "bpf_iter_ksym.skel.h"
> #include "bpf_iter_sockmap.skel.h"
> +#include "bpf_iter_task_vma_buildid.skel.h"
>
> static int duration;
>
> @@ -1536,6 +1537,91 @@ static void test_task_vma_dead_task(void)
> bpf_iter_task_vma__destroy(skel);
> }
>
> +#define D_PATH_BUF_SIZE 1024
> +#define BUILD_ID_SIZE_MAX 20
> +
> +struct build_id {
> + u32 sz;
> + char data[BUILD_ID_SIZE_MAX];
> +};
> +
> +#define BUILDID_STR_SIZE (BPF_BUILD_ID_SIZE*2 + 1)
> +
> +static void test_task_vma_buildid(void)
> +{
> + int err, iter_fd = -1, proc_maps_fd = -1;
> + struct bpf_iter_task_vma_buildid *skel;
> + char key[D_PATH_BUF_SIZE], *prev_key;
> + char bpf_build_id[BUILDID_STR_SIZE];
> + int len, files_fd, i, cnt = 0;
> + struct build_id val;
> + char *build_id;
> + char c;
> +
> + skel = bpf_iter_task_vma_buildid__open();
> + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma_buildid__open"))
> + return;
> +
> + err = bpf_iter_task_vma_buildid__load(skel);
> + if (!ASSERT_OK(err, "bpf_iter_task_vma_buildid__load"))
> + goto out;
minor: you can do __open_and_load() in one step
> +
> + skel->links.proc_maps = bpf_program__attach_iter(
> + skel->progs.proc_maps, NULL);
> +
> + if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
> + skel->links.proc_maps = NULL;
> + goto out;
> + }
> +
> + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
> + if (!ASSERT_GE(iter_fd, 0, "create_iter"))
> + goto out;
> +
> + /* trigger the iterator, there's no output, just map */
> + len = read(iter_fd, &c, 1);
> + ASSERT_EQ(len, 0, "len_check");
> +
> + files_fd = bpf_map__fd(skel->maps.files);
> +
> + prev_key = NULL;
> +
> + while (true) {
> + err = bpf_map_get_next_key(files_fd, prev_key, &key);
> + if (err) {
> + if (errno == ENOENT)
> + err = 0;
> + break;
> + }
> + if (bpf_map_lookup_elem(files_fd, key, &val))
> + break;
> + if (!ASSERT_LE(val.sz, BUILD_ID_SIZE_MAX, "buildid_size"))
> + break;
> +
> + memset(bpf_build_id, 0x0, sizeof(bpf_build_id));
> + for (i = 0; i < val.sz; i++) {
> + sprintf(bpf_build_id + i*2, "%02x",
> + (unsigned char) val.data[i]);
> + }
> +
> + if (!ASSERT_OK(read_buildid(key, &build_id), "read_buildid"))
> + break;
> +
> + printf("BUILDID %s %s %s\n", bpf_build_id, build_id, key);
debugging leftover or intentional?
> + ASSERT_OK(strncmp(bpf_build_id, build_id, strlen(bpf_build_id)), "buildid_cmp");
> +
> + free(build_id);
> + prev_key = key;
> + cnt++;
> + }
> +
> + printf("checked %d files\n", cnt);
ditto
> +out:
> + close(proc_maps_fd);
> + close(iter_fd);
> + bpf_iter_task_vma_buildid__destroy(skel);
> +}
> +
> void test_bpf_sockmap_map_iter_fd(void)
> {
> struct bpf_iter_sockmap *skel;
> @@ -1659,6 +1745,8 @@ void test_bpf_iter(void)
> test_task_vma();
> if (test__start_subtest("task_vma_dead_task"))
> test_task_vma_dead_task();
> + if (test__start_subtest("task_vma_buildid"))
> + test_task_vma_buildid();
> if (test__start_subtest("task_btf"))
> test_task_btf();
> if (test__start_subtest("tcp4"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
> new file mode 100644
> index 000000000000..25e2179ae5f4
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
> @@ -0,0 +1,49 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include "bpf_iter.h"
> +#include <bpf/bpf_helpers.h>
> +#include <string.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +#define VM_EXEC 0x00000004
> +#define D_PATH_BUF_SIZE 1024
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_HASH);
> + __uint(max_entries, 10000);
> + __type(key, char[D_PATH_BUF_SIZE]);
> + __type(value, struct build_id);
> +} files SEC(".maps");
> +
> +static char tmp_key[D_PATH_BUF_SIZE];
> +static struct build_id tmp_data;
> +
> +SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
nit: let's keep SEC() on separate line from function itself
> +{
> + struct vm_area_struct *vma = ctx->vma;
> + struct seq_file *seq = ctx->meta->seq;
> + struct task_struct *task = ctx->task;
> + unsigned long file_key;
> + struct file *file;
> +
> + if (task == (void *)0 || vma == (void *)0)
> + return 0;
> +
> + if (!(vma->vm_flags & VM_EXEC))
> + return 0;
> +
> + file = vma->vm_file;
> + if (!file)
> + return 0;
> +
> + memset(tmp_key, 0x0, D_PATH_BUF_SIZE);
__builtin_memset() to not rely on compiler optimization?
> + bpf_d_path(&file->f_path, (char *) &tmp_key, D_PATH_BUF_SIZE);
> +
> + if (bpf_map_lookup_elem(&files, &tmp_key))
> + return 0;
> +
> + memcpy(&tmp_data, file->f_bid, sizeof(*file->f_bid));
same about __builtin_memcpy()
> + bpf_map_update_elem(&files, &tmp_key, &tmp_data, 0);
> + return 0;
> +}
> --
> 2.39.1
>
On Thu, Feb 2, 2023 at 7:33 AM Jiri Olsa <[email protected]> wrote:
>
> On Thu, Feb 02, 2023 at 03:10:30PM +0000, Matthew Wilcox wrote:
> > On Wed, Feb 01, 2023 at 02:57:32PM +0100, Jiri Olsa wrote:
> > > hi,
> > > we have a use cases for bpf programs to use binary file's build id.
> >
> > What is your use case? Is it some hobbyist thing or is it something
> > that distro kernels are all going to enable?
> >
>
> our use case is for hubble/tetragon [1] and we are asked to report
> buildid of executed binary.. but the monitoring process is running
> in its own pod and can't access the the binaries outside of it, so
> we need to be able to read it in kernel
>
> I understand Hao Luo has also use case for that [2]
>
Sorry for the late reply.
We use BPF to profile stacktraces and build id is more useful than
instruction addresses. However, sometimes we need to record
stacktraces from an atomic context. In that case, if the page that
contains the build id string is not in the page cache, we would fail
to get build id. Storing the build id in file object solves this
problem and helps us get build id more reliably.
> jirka
>
>
> [1] https://github.com/cilium/tetragon/
> [2] https://lore.kernel.org/bpf/CA+khW7gAYHmoUkq0UqTiZjdOqARLG256USj3uFwi6z_FyZf31w@mail.gmail.com/
On Wed, Feb 1, 2023 at 5:58 AM Jiri Olsa <[email protected]> wrote:
>
> Use build id from file object in stackmap if it's available.
>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
Can we insert the lookup from vma->vm_file in build_id_parse() rather
than its callers?
> kernel/bpf/stackmap.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index aecea7451b61..944cb260a42c 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -156,7 +156,15 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
> goto build_id_valid;
> }
> vma = find_vma(current->mm, ips[i]);
> +#ifdef CONFIG_FILE_BUILD_ID
> + if (vma && vma->vm_file && vma->vm_file->f_bid) {
> + memcpy(id_offs[i].build_id,
> + vma->vm_file->f_bid->data,
> + vma->vm_file->f_bid->sz);
> + } else {
> +#else
> if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
> +#endif
> /* per entry fall back to ips */
> id_offs[i].status = BPF_STACK_BUILD_ID_IP;
> id_offs[i].ip = ips[i];
> --
> 2.39.1
>
On Wed, Feb 08, 2023 at 11:23:13PM -0800, Hao Luo wrote:
> On Wed, Feb 1, 2023 at 5:58 AM Jiri Olsa <[email protected]> wrote:
> >
> > Use build id from file object in stackmap if it's available.
> >
> > Signed-off-by: Jiri Olsa <[email protected]>
> > ---
>
> Can we insert the lookup from vma->vm_file in build_id_parse() rather
> than its callers?
that might simplify also the perf code.. we might need to rename
it though.. maybe build_id_read(vma,...), I'll check
thanks,
jirka
>
> > kernel/bpf/stackmap.c | 8 ++++++++
> > 1 file changed, 8 insertions(+)
> >
> > diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> > index aecea7451b61..944cb260a42c 100644
> > --- a/kernel/bpf/stackmap.c
> > +++ b/kernel/bpf/stackmap.c
> > @@ -156,7 +156,15 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
> > goto build_id_valid;
> > }
> > vma = find_vma(current->mm, ips[i]);
> > +#ifdef CONFIG_FILE_BUILD_ID
> > + if (vma && vma->vm_file && vma->vm_file->f_bid) {
> > + memcpy(id_offs[i].build_id,
> > + vma->vm_file->f_bid->data,
> > + vma->vm_file->f_bid->sz);
> > + } else {
> > +#else
> > if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
> > +#endif
> > /* per entry fall back to ips */
> > id_offs[i].status = BPF_STACK_BUILD_ID_IP;
> > id_offs[i].ip = ips[i];
> > --
> > 2.39.1
> >
On Wed, Feb 08, 2023 at 04:01:42PM -0800, Andrii Nakryiko wrote:
SNIP
> > +static void test_task_vma_buildid(void)
> > +{
> > + int err, iter_fd = -1, proc_maps_fd = -1;
> > + struct bpf_iter_task_vma_buildid *skel;
> > + char key[D_PATH_BUF_SIZE], *prev_key;
> > + char bpf_build_id[BUILDID_STR_SIZE];
> > + int len, files_fd, i, cnt = 0;
> > + struct build_id val;
> > + char *build_id;
> > + char c;
> > +
> > + skel = bpf_iter_task_vma_buildid__open();
> > + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma_buildid__open"))
> > + return;
> > +
> > + err = bpf_iter_task_vma_buildid__load(skel);
> > + if (!ASSERT_OK(err, "bpf_iter_task_vma_buildid__load"))
> > + goto out;
>
> minor: you can do __open_and_load() in one step
right, I copied that from another test, but removed all the
setup in between, so we can actually call just __open_and_load
SNIP
> > + memset(bpf_build_id, 0x0, sizeof(bpf_build_id));
> > + for (i = 0; i < val.sz; i++) {
> > + sprintf(bpf_build_id + i*2, "%02x",
> > + (unsigned char) val.data[i]);
> > + }
> > +
> > + if (!ASSERT_OK(read_buildid(key, &build_id), "read_buildid"))
> > + break;
> > +
> > + printf("BUILDID %s %s %s\n", bpf_build_id, build_id, key);
>
> debugging leftover or intentional?
>
> > + ASSERT_OK(strncmp(bpf_build_id, build_id, strlen(bpf_build_id)), "buildid_cmp");
> > +
> > + free(build_id);
> > + prev_key = key;
> > + cnt++;
> > + }
> > +
> > + printf("checked %d files\n", cnt);
>
> ditto
both intentional, first one can go out I guess, but the
number of checked files seemed interesting to me ;-)
SNIP
> > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
> > new file mode 100644
> > index 000000000000..25e2179ae5f4
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
> > @@ -0,0 +1,49 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#include "bpf_iter.h"
> > +#include <bpf/bpf_helpers.h>
> > +#include <string.h>
> > +
> > +char _license[] SEC("license") = "GPL";
> > +
> > +#define VM_EXEC 0x00000004
> > +#define D_PATH_BUF_SIZE 1024
> > +
> > +struct {
> > + __uint(type, BPF_MAP_TYPE_HASH);
> > + __uint(max_entries, 10000);
> > + __type(key, char[D_PATH_BUF_SIZE]);
> > + __type(value, struct build_id);
> > +} files SEC(".maps");
> > +
> > +static char tmp_key[D_PATH_BUF_SIZE];
> > +static struct build_id tmp_data;
> > +
> > +SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
>
> nit: let's keep SEC() on separate line from function itself
ok
>
> > +{
> > + struct vm_area_struct *vma = ctx->vma;
> > + struct seq_file *seq = ctx->meta->seq;
> > + struct task_struct *task = ctx->task;
> > + unsigned long file_key;
> > + struct file *file;
> > +
> > + if (task == (void *)0 || vma == (void *)0)
> > + return 0;
> > +
> > + if (!(vma->vm_flags & VM_EXEC))
> > + return 0;
> > +
> > + file = vma->vm_file;
> > + if (!file)
> > + return 0;
> > +
> > + memset(tmp_key, 0x0, D_PATH_BUF_SIZE);
>
> __builtin_memset() to not rely on compiler optimization?
>
> > + bpf_d_path(&file->f_path, (char *) &tmp_key, D_PATH_BUF_SIZE);
> > +
> > + if (bpf_map_lookup_elem(&files, &tmp_key))
> > + return 0;
> > +
> > + memcpy(&tmp_data, file->f_bid, sizeof(*file->f_bid));
>
> same about __builtin_memcpy()
ah ok, did not know that, will check.. curious what could
go wrong by using not '__builtin_...' version?
thanks,
jirka
On Wed, Feb 08, 2023 at 03:58:06PM -0800, Andrii Nakryiko wrote:
SNIP
> > +
> > + /* parent, update child's pid and kick it */
> > + skel->bss->pid = child_pid;
> > +
> > + err = file_build_id__attach(skel);
> > + if (!ASSERT_OK(err, "file_build_id__attach"))
> > + goto out;
> > +
> > + err = write(go[1], &c, 1);
> > + if (!ASSERT_EQ(err, 1, "child_write_pipe"))
> > + goto out;
> > +
> > + /* wait for child to exit */
> > + waitpid(child_pid, &child_status, 0);
> > + if (!ASSERT_EQ(WEXITSTATUS(child_status), 0, "child_exit_value"))
> > + goto out;
> > +
> > + if (!ASSERT_OK(read_buildid("/bin/bash", &bid), "read_buildid"))
>
> can we use urandom_read for build_id ? And it would also be nice to
> check that build id fetching works for liburandom_read.so as well.
ok, will be better together with the shared library
SNIP
> > diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
> > index 09a16a77bae4..f5557890e383 100644
> > --- a/tools/testing/selftests/bpf/trace_helpers.c
> > +++ b/tools/testing/selftests/bpf/trace_helpers.c
> > @@ -9,6 +9,7 @@
> > #include <poll.h>
> > #include <unistd.h>
> > #include <linux/perf_event.h>
> > +#include <linux/limits.h>
> > #include <sys/mman.h>
> > #include "trace_helpers.h"
> >
> > @@ -230,3 +231,37 @@ ssize_t get_rel_offset(uintptr_t addr)
> > fclose(f);
> > return -EINVAL;
> > }
> > +
> > +int read_buildid(const char *path, char **build_id)
> > +{
> > + char tmp[] = "/tmp/dataXXXXXX";
> > + char buf[PATH_MAX + 200];
> > + int err, fd;
> > + FILE *f;
> > +
> > + fd = mkstemp(tmp);
> > + if (fd == -1)
> > + return -1;
> > + close(fd);
> > +
> > + snprintf(buf, sizeof(buf),
> > + "readelf -n %s 2>/dev/null | grep 'Build ID' | awk '{print $3}' > %s",
> > + path, tmp);
> > +
>
> shelling out to readelf for this is unfortunate... maybe let's write a
> libelf-based helper to fetch build ID from .note section?
right, I was thinking of that, shouldn't be that hard
and will speed things up
thanks,
jirka
On Wed, Feb 08, 2023 at 03:52:40PM -0800, Andrii Nakryiko wrote:
SNIP
> > diff --git a/include/linux/buildid.h b/include/linux/buildid.h
> > index 3b7a0ff4642f..7c818085ad2c 100644
> > --- a/include/linux/buildid.h
> > +++ b/include/linux/buildid.h
> > @@ -3,9 +3,15 @@
> > #define _LINUX_BUILDID_H
> >
> > #include <linux/mm_types.h>
> > +#include <linux/slab.h>
> >
> > #define BUILD_ID_SIZE_MAX 20
> >
> > +struct build_id {
> > + u32 sz;
> > + char data[BUILD_ID_SIZE_MAX];
>
> don't know if 21 vs 24 matters for kmem_cache_create(), but we don't
> need 4 bytes to store build_id size, given max size is 20, so maybe
> use u8 for sz?
ok
>
> > +};
> > +
> > int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
> > __u32 *size);
> > int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size);
> > @@ -17,4 +23,15 @@ void init_vmlinux_build_id(void);
> > static inline void init_vmlinux_build_id(void) { }
> > #endif
> >
> > +#ifdef CONFIG_FILE_BUILD_ID
> > +void __init build_id_init(void);
> > +void build_id_free(struct build_id *bid);
> > +int vma_get_build_id(struct vm_area_struct *vma, struct build_id **bidp);
> > +void file_build_id_free(struct file *f);
> > +#else
> > +static inline void __init build_id_init(void) { }
> > +static inline void build_id_free(struct build_id *bid) { }
> > +static inline void file_build_id_free(struct file *f) { }
> > +#endif /* CONFIG_FILE_BUILD_ID */
> > +
> > #endif
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index c1769a2c5d70..9ad5e5fbf680 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -975,6 +975,9 @@ struct file {
> > struct address_space *f_mapping;
> > errseq_t f_wb_err;
> > errseq_t f_sb_err; /* for syncfs */
> > +#ifdef CONFIG_FILE_BUILD_ID
> > + struct build_id *f_bid;
>
> naming nit: anything wrong with f_buildid or f_build_id? all the
> related APIs use fully spelled out "build_id"
ok
SNIP
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index 425a9349e610..a06f744206e3 100644
> > --- a/mm/mmap.c
> > +++ b/mm/mmap.c
> > @@ -2530,6 +2530,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > pgoff_t vm_pgoff;
> > int error;
> > MA_STATE(mas, &mm->mm_mt, addr, end - 1);
> > + struct build_id *bid = NULL;
> >
> > /* Check against address space limit. */
> > if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
> > @@ -2626,6 +2627,13 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > if (error)
> > goto unmap_and_free_vma;
> >
> > +#ifdef CONFIG_FILE_BUILD_ID
> > + if (vma->vm_flags & VM_EXEC && !file->f_bid) {
> > + error = vma_get_build_id(vma, &bid);
> > + if (error)
> > + goto close_and_free_vma;
>
> do we want to fail mmap_region() if we get -ENOMEM from
> vma_get_build_id()? can't we just store ERR_PTR(error) in f_bid field?
> So we'll have f_bid == NULL for non-exec files, ERR_PTR() for when we
> tried and failed to get build ID, and a valid pointer if we succeeded?
I guess we can do that.. might be handy for debugging
also build_id_parse might fail on missing build id, so you're right,
we should not fail mmap_region in here
thanks,
jirka
On Wed, Feb 01, 2023 at 02:57:32PM +0100, Jiri Olsa wrote:
> hi,
> we have a use cases for bpf programs to use binary file's build id.
>
> After some attempts to add helpers/kfuncs [1] [2] Andrii had an idea [3]
> to store build id directly in the file object. That would solve our use
> case and might be beneficial for other profiling/tracing use cases with
> bpf programs.
>
> This RFC patchset adds new config CONFIG_FILE_BUILD_ID option, which adds
> build id object pointer to the file object when enabled. The build id is
> read/populated when the file is mmap-ed.
>
> I also added bpf and perf changes that would benefit from this.
>
> I'm not sure what's the policy on adding stuff to file object, so apologies
> if that's out of line. I'm open to any feedback or suggestions if there's
> better place or way to do this.
hi,
Matthew suggested on irc to consider inode for storing build id
I tried that and it seems to have better stats wrt allocated build
id objects, because inode is being shared among file objects
I took /proc/slabinfo output after running bpf tests
- build id stored in file:
# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> : tunables <limit> <batchcount> <sharedfactor> : slabdata <active_slabs> <num_slabs> <sharedavail>
build_id 668 775 160 25 1 : tunables 0 0 0 : slabdata 31 31 0
- build id stored in inode:
# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> : tunables <limit> <batchcount> <sharedfactor> : slabdata <active_slabs> <num_slabs> <sharedavail>
build_id 222 225 160 25 1 : tunables 0 0 0 : slabdata 9 9 0
I'm stranger to inode/fs/mm code so I'll spend some time checking on
what I possibly broke in there before I send it, but I'd appreciate
any early feedback ;-)
the code is in here:
git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
inode_build_id
I'll send another version with inode if there's no objection
thanks,
jirka
>
> thanks,
> jirka
>
>
> [1] https://lore.kernel.org/bpf/[email protected]/
> [2] https://lore.kernel.org/bpf/[email protected]/
> [3] https://lore.kernel.org/bpf/CAEf4BzaZCUoxN_X2ALXwQeFTCwtL17R4P_B_-hUCcidfyO2xyQ@mail.gmail.com/
> ---
> Jiri Olsa (5):
> mm: Store build id in file object
> bpf: Use file object build id in stackmap
> perf: Use file object build id in perf_event_mmap_event
> selftests/bpf: Add file_build_id test
> selftests/bpf: Add iter_task_vma_buildid test
>
> fs/file_table.c | 3 +++
> include/linux/buildid.h | 17 +++++++++++++++++
> include/linux/fs.h | 3 +++
> kernel/bpf/stackmap.c | 8 ++++++++
> kernel/events/core.c | 43 +++++++++++++++++++++++++++++++++++++++----
> lib/buildid.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
> mm/Kconfig | 7 +++++++
> mm/mmap.c | 15 +++++++++++++++
> tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> tools/testing/selftests/bpf/prog_tests/file_build_id.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> tools/testing/selftests/bpf/progs/file_build_id.c | 34 ++++++++++++++++++++++++++++++++++
> tools/testing/selftests/bpf/trace_helpers.c | 35 +++++++++++++++++++++++++++++++++++
> tools/testing/selftests/bpf/trace_helpers.h | 1 +
> 14 files changed, 413 insertions(+), 4 deletions(-)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/file_build_id.c
> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
> create mode 100644 tools/testing/selftests/bpf/progs/file_build_id.c
On Thu, Feb 9, 2023 at 6:04 AM Jiri Olsa <[email protected]> wrote:
>
> On Wed, Feb 08, 2023 at 04:01:42PM -0800, Andrii Nakryiko wrote:
>
> SNIP
>
> > > +static void test_task_vma_buildid(void)
> > > +{
> > > + int err, iter_fd = -1, proc_maps_fd = -1;
> > > + struct bpf_iter_task_vma_buildid *skel;
> > > + char key[D_PATH_BUF_SIZE], *prev_key;
> > > + char bpf_build_id[BUILDID_STR_SIZE];
> > > + int len, files_fd, i, cnt = 0;
> > > + struct build_id val;
> > > + char *build_id;
> > > + char c;
> > > +
> > > + skel = bpf_iter_task_vma_buildid__open();
> > > + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma_buildid__open"))
> > > + return;
> > > +
> > > + err = bpf_iter_task_vma_buildid__load(skel);
> > > + if (!ASSERT_OK(err, "bpf_iter_task_vma_buildid__load"))
> > > + goto out;
> >
> > minor: you can do __open_and_load() in one step
>
> right, I copied that from another test, but removed all the
> setup in between, so we can actually call just __open_and_load
>
> SNIP
>
> > > + memset(bpf_build_id, 0x0, sizeof(bpf_build_id));
> > > + for (i = 0; i < val.sz; i++) {
> > > + sprintf(bpf_build_id + i*2, "%02x",
> > > + (unsigned char) val.data[i]);
> > > + }
> > > +
> > > + if (!ASSERT_OK(read_buildid(key, &build_id), "read_buildid"))
> > > + break;
> > > +
> > > + printf("BUILDID %s %s %s\n", bpf_build_id, build_id, key);
> >
> > debugging leftover or intentional?
> >
> > > + ASSERT_OK(strncmp(bpf_build_id, build_id, strlen(bpf_build_id)), "buildid_cmp");
> > > +
> > > + free(build_id);
> > > + prev_key = key;
> > > + cnt++;
> > > + }
> > > +
> > > + printf("checked %d files\n", cnt);
> >
> > ditto
>
> both intentional, first one can go out I guess, but the
> number of checked files seemed interesting to me ;-)
>
> SNIP
>
> > > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
> > > new file mode 100644
> > > index 000000000000..25e2179ae5f4
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
> > > @@ -0,0 +1,49 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +
> > > +#include "bpf_iter.h"
> > > +#include <bpf/bpf_helpers.h>
> > > +#include <string.h>
> > > +
> > > +char _license[] SEC("license") = "GPL";
> > > +
> > > +#define VM_EXEC 0x00000004
> > > +#define D_PATH_BUF_SIZE 1024
> > > +
> > > +struct {
> > > + __uint(type, BPF_MAP_TYPE_HASH);
> > > + __uint(max_entries, 10000);
> > > + __type(key, char[D_PATH_BUF_SIZE]);
> > > + __type(value, struct build_id);
> > > +} files SEC(".maps");
> > > +
> > > +static char tmp_key[D_PATH_BUF_SIZE];
> > > +static struct build_id tmp_data;
> > > +
> > > +SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
> >
> > nit: let's keep SEC() on separate line from function itself
>
> ok
>
> >
> > > +{
> > > + struct vm_area_struct *vma = ctx->vma;
> > > + struct seq_file *seq = ctx->meta->seq;
> > > + struct task_struct *task = ctx->task;
> > > + unsigned long file_key;
> > > + struct file *file;
> > > +
> > > + if (task == (void *)0 || vma == (void *)0)
> > > + return 0;
> > > +
> > > + if (!(vma->vm_flags & VM_EXEC))
> > > + return 0;
> > > +
> > > + file = vma->vm_file;
> > > + if (!file)
> > > + return 0;
> > > +
> > > + memset(tmp_key, 0x0, D_PATH_BUF_SIZE);
> >
> > __builtin_memset() to not rely on compiler optimization?
> >
> > > + bpf_d_path(&file->f_path, (char *) &tmp_key, D_PATH_BUF_SIZE);
> > > +
> > > + if (bpf_map_lookup_elem(&files, &tmp_key))
> > > + return 0;
> > > +
> > > + memcpy(&tmp_data, file->f_bid, sizeof(*file->f_bid));
> >
> > same about __builtin_memcpy()
>
> ah ok, did not know that, will check.. curious what could
> go wrong by using not '__builtin_...' version?
if compiler doesn't optimize it into __builtin_memcpy() (which results
in just explicit assembly code to copy/set data word-by-word), then
BPF program will do actual call to memset(), which with C rules would
be inferred as extern symbol, which would fail BPF object loading with
error along the lines of "couldn't resolve memset extern".
>
> thanks,
> jirka
Hi Jiri,
On Thu, Feb 9, 2023 at 6:25 AM Jiri Olsa <[email protected]> wrote:
>
> On Wed, Feb 01, 2023 at 02:57:32PM +0100, Jiri Olsa wrote:
> > hi,
> > we have a use cases for bpf programs to use binary file's build id.
> >
> > After some attempts to add helpers/kfuncs [1] [2] Andrii had an idea [3]
> > to store build id directly in the file object. That would solve our use
> > case and might be beneficial for other profiling/tracing use cases with
> > bpf programs.
> >
> > This RFC patchset adds new config CONFIG_FILE_BUILD_ID option, which adds
> > build id object pointer to the file object when enabled. The build id is
> > read/populated when the file is mmap-ed.
> >
> > I also added bpf and perf changes that would benefit from this.
Thanks for working on this!
> >
> > I'm not sure what's the policy on adding stuff to file object, so apologies
> > if that's out of line. I'm open to any feedback or suggestions if there's
> > better place or way to do this.
>
> hi,
> Matthew suggested on irc to consider inode for storing build id
Yeah, that's my idea too.
>
> I tried that and it seems to have better stats wrt allocated build
> id objects, because inode is being shared among file objects
>
> I took /proc/slabinfo output after running bpf tests
>
> - build id stored in file:
>
> # name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> : tunables <limit> <batchcount> <sharedfactor> : slabdata <active_slabs> <num_slabs> <sharedavail>
> build_id 668 775 160 25 1 : tunables 0 0 0 : slabdata 31 31 0
>
> - build id stored in inode:
>
> # name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> : tunables <limit> <batchcount> <sharedfactor> : slabdata <active_slabs> <num_slabs> <sharedavail>
> build_id 222 225 160 25 1 : tunables 0 0 0 : slabdata 9 9 0
Cool!
>
>
> I'm stranger to inode/fs/mm code so I'll spend some time checking on
> what I possibly broke in there before I send it, but I'd appreciate
> any early feedback ;-)
>
> the code is in here:
> git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
> inode_build_id
>
> I'll send another version with inode if there's no objection
I'll take a look.
Thanks,
Namhyung
> >
> > [1] https://lore.kernel.org/bpf/[email protected]/
> > [2] https://lore.kernel.org/bpf/[email protected]/
> > [3] https://lore.kernel.org/bpf/CAEf4BzaZCUoxN_X2ALXwQeFTCwtL17R4P_B_-hUCcidfyO2xyQ@mail.gmail.com/
> > ---
> > Jiri Olsa (5):
> > mm: Store build id in file object
> > bpf: Use file object build id in stackmap
> > perf: Use file object build id in perf_event_mmap_event
> > selftests/bpf: Add file_build_id test
> > selftests/bpf: Add iter_task_vma_buildid test
> >
> > fs/file_table.c | 3 +++
> > include/linux/buildid.h | 17 +++++++++++++++++
> > include/linux/fs.h | 3 +++
> > kernel/bpf/stackmap.c | 8 ++++++++
> > kernel/events/core.c | 43 +++++++++++++++++++++++++++++++++++++++----
> > lib/buildid.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
> > mm/Kconfig | 7 +++++++
> > mm/mmap.c | 15 +++++++++++++++
> > tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > tools/testing/selftests/bpf/prog_tests/file_build_id.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> > tools/testing/selftests/bpf/progs/file_build_id.c | 34 ++++++++++++++++++++++++++++++++++
> > tools/testing/selftests/bpf/trace_helpers.c | 35 +++++++++++++++++++++++++++++++++++
> > tools/testing/selftests/bpf/trace_helpers.h | 1 +
> > 14 files changed, 413 insertions(+), 4 deletions(-)
> > create mode 100644 tools/testing/selftests/bpf/prog_tests/file_build_id.c
> > create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_vma_buildid.c
> > create mode 100644 tools/testing/selftests/bpf/progs/file_build_id.c