LinuxLists.cc - [RFC PATCH v3] bpf: Using binary search to improve the performance of btf_find_by_name

2024-06-08 14:09:00

Subject: [RFC PATCH v3] bpf: Using binary search to improve the performance of btf_find_by_name_kind

Currently, we are only using the linear search method to find the type id
by the name, which has a time complexity of O(n). This change involves
sorting the names of btf types in ascending order and using binary search,
which has a time complexity of O(log(n)). This idea was inspired by the
following patch:

60443c88f3a8 ("kallsyms: Improve the performance of kallsyms_lookup_name()").

At present, this improvement is only for searching in vmlinux's and
module's BTFs.

Another change is the search direction, where we search the BTF first and
then its base, the type id of the first matched btf_type will be returned.

Here is a time-consuming result that finding 87590 type ids by their names in
vmlinux's BTF.

Before: 158426 ms
After: 114 ms

The average lookup performance has improved more than 1000x in the above scenario.

Signed-off-by: Donglin Peng <[email protected]>
---
Changes in RFC v3:
- Sort the btf types during the build process in order to reduce memory usage
and decrease boot time.

RFC v2:
- https://lore.kernel.org/all/[email protected]
---
include/linux/btf.h | 1 +
kernel/bpf/btf.c | 160 +++++++++++++++++++++++++++++++++---
tools/lib/bpf/btf.c | 195 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 345 insertions(+), 11 deletions(-)

diff --git a/include/linux/btf.h b/include/linux/btf.h
index f9e56fd12a9f..1dc1000a7dc9 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -214,6 +214,7 @@ bool btf_is_kernel(const struct btf *btf);
bool btf_is_module(const struct btf *btf);
struct module *btf_try_get_module(const struct btf *btf);
u32 btf_nr_types(const struct btf *btf);
+u32 btf_type_cnt(const struct btf *btf);
bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
const struct btf_member *m,
u32 expected_offset, u32 expected_size);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 821063660d9f..5b7b464204bf 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -262,6 +262,7 @@ struct btf {
u32 data_size;
refcount_t refcnt;
u32 id;
+ u32 nr_types_sorted;
struct rcu_head rcu;
struct btf_kfunc_set_tab *kfunc_set_tab;
struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
@@ -542,23 +543,102 @@ u32 btf_nr_types(const struct btf *btf)
return total;
}

-s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
+u32 btf_type_cnt(const struct btf *btf)
+{
+ return btf->start_id + btf->nr_types;
+}
+
+static s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name,
+ int *start, int *end)
{
const struct btf_type *t;
- const char *tname;
- u32 i, total;
+ const char *name_buf;
+ int low, low_start, mid, high, high_end;
+ int ret, start_id;
+
+ start_id = btf->base_btf ? btf->start_id : 1;
+ low_start = low = start_id;
+ high_end = high = start_id + btf->nr_types_sorted - 1;
+
+ while (low <= high) {
+ mid = low + (high - low) / 2;
+ t = btf_type_by_id(btf, mid);
+ name_buf = btf_name_by_offset(btf, t->name_off);
+ ret = strcmp(name, name_buf);
+ if (ret > 0)
+ low = mid + 1;
+ else if (ret < 0)
+ high = mid - 1;
+ else
+ break;
+ }

- total = btf_nr_types(btf);
- for (i = 1; i < total; i++) {
- t = btf_type_by_id(btf, i);
- if (BTF_INFO_KIND(t->info) != kind)
- continue;
+ if (low > high)
+ return -ESRCH;

- tname = btf_name_by_offset(btf, t->name_off);
- if (!strcmp(tname, name))
- return i;
+ if (start) {
+ low = mid;
+ while (low > low_start) {
+ t = btf_type_by_id(btf, low-1);
+ name_buf = btf_name_by_offset(btf, t->name_off);
+ if (strcmp(name, name_buf))
+ break;
+ low--;
+ }
+ *start = low;
}

+ if (end) {
+ high = mid;
+ while (high < high_end) {
+ t = btf_type_by_id(btf, high+1);
+ name_buf = btf_name_by_offset(btf, t->name_off);
+ if (strcmp(name, name_buf))
+ break;
+ high++;
+ }
+ *end = high;
+ }
+
+ return mid;
+}
+
+s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
+{
+ const struct btf_type *t;
+ const char *tname;
+ int start, end;
+ s32 id, total;
+
+ do {
+ if (btf->nr_types_sorted) {
+ /* binary search */
+ id = btf_find_by_name_bsearch(btf, name, &start, &end);
+ if (id > 0) {
+ while (start <= end) {
+ t = btf_type_by_id(btf, start);
+ if (BTF_INFO_KIND(t->info) == kind)
+ return start;
+ start++;
+ }
+ }
+ } else {
+ /* linear search */
+ total = btf_type_cnt(btf);
+ for (id = btf->base_btf ? btf->start_id : 1;
+ id < total; id++) {
+ t = btf_type_by_id(btf, id);
+ if (BTF_INFO_KIND(t->info) != kind)
+ continue;
+
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!strcmp(tname, name))
+ return id;
+ }
+ }
+ btf = btf->base_btf;
+ } while (btf);
+
return -ENOENT;
}

@@ -5979,6 +6059,56 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty
return kctx_type_id;
}

+static int btf_check_sort(struct btf *btf, int start_id)
+{
+ int i, n, nr_names = 0;
+
+ n = btf_nr_types(btf);
+ for (i = start_id; i < n; i++) {
+ const struct btf_type *t;
+ const char *name;
+
+ t = btf_type_by_id(btf, i);
+ if (!t)
+ return -EINVAL;
+
+ name = btf_str_by_offset(btf, t->name_off);
+ if (!str_is_empty(name))
+ nr_names++;
+ }
+
+ if (nr_names < 3)
+ goto out;
+
+ for (i = 0; i < nr_names - 1; i++) {
+ const struct btf_type *t1, *t2;
+ const char *s1, *s2;
+
+ t1 = btf_type_by_id(btf, start_id + i);
+ if (!t1)
+ return -EINVAL;
+
+ s1 = btf_str_by_offset(btf, t1->name_off);
+ if (str_is_empty(s1))
+ goto out;
+
+ t2 = btf_type_by_id(btf, start_id + i + 1);
+ if (!t2)
+ return -EINVAL;
+
+ s2 = btf_str_by_offset(btf, t2->name_off);
+ if (str_is_empty(s2))
+ goto out;
+
+ if (strcmp(s1, s2) > 0)
+ goto out;
+ }
+
+ btf->nr_types_sorted = nr_names;
+out:
+ return 0;
+}
+
BTF_ID_LIST(bpf_ctx_convert_btf_id)
BTF_ID(struct, bpf_ctx_convert)

@@ -6029,6 +6159,10 @@ struct btf *btf_parse_vmlinux(void)
if (err)
goto errout;

+ err = btf_check_sort(btf, 1);
+ if (err)
+ goto errout;
+
/* btf_parse_vmlinux() runs under bpf_verifier_lock */
bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);

@@ -6111,6 +6245,10 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u
if (err)
goto errout;

+ err = btf_check_sort(btf, btf_nr_types(base_btf));
+ if (err)
+ goto errout;
+
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 2d0840ef599a..93c1ab677bfa 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2018 Facebook */

+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#include <byteswap.h>
#include <endian.h>
#include <stdio.h>
@@ -3072,6 +3075,7 @@ static int btf_dedup_ref_types(struct btf_dedup *d);
static int btf_dedup_resolve_fwds(struct btf_dedup *d);
static int btf_dedup_compact_types(struct btf_dedup *d);
static int btf_dedup_remap_types(struct btf_dedup *d);
+static int btf_sort_type_by_name(struct btf *btf);

/*
* Deduplicate BTF types and strings.
@@ -3270,6 +3274,11 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
pr_debug("btf_dedup_remap_types failed:%d\n", err);
goto done;
}
+ err = btf_sort_type_by_name(btf);
+ if (err < 0) {
+ pr_debug("btf_sort_type_by_name failed:%d\n", err);
+ goto done;
+ }

done:
btf_dedup_free(d);
@@ -5212,3 +5221,189 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void

return 0;
}
+
+static int btf_compare_type_name(const void *a, const void *b, void *priv)
+{
+ struct btf *btf = (struct btf *)priv;
+ __u32 ta = *(const __u32 *)a;
+ __u32 tb = *(const __u32 *)b;
+ struct btf_type *bta, *btb;
+ const char *na, *nb;
+
+ bta = (struct btf_type *)(btf->types_data + ta);
+ btb = (struct btf_type *)(btf->types_data + tb);
+ na = btf__str_by_offset(btf, bta->name_off);
+ nb = btf__str_by_offset(btf, btb->name_off);
+
+ return strcmp(na, nb);
+}
+
+static int btf_compare_offs(const void *o1, const void *o2)
+{
+ __u32 *offs1 = (__u32 *)o1;
+ __u32 *offs2 = (__u32 *)o2;
+
+ return *offs1 - *offs2;
+}
+
+static inline __u32 btf_get_mapped_type(struct btf *btf, __u32 *maps, __u32 type)
+{
+ if (type < btf->start_id)
+ return type;
+ return maps[type - btf->start_id] + btf->start_id;
+}
+
+/*
+ * Collect and move the btf_types with names to the start location, and
+ * sort them in ascending order by name, so we can use the binary search
+ * method.
+ */
+static int btf_sort_type_by_name(struct btf *btf)
+{
+ struct btf_type *bt;
+ __u32 *new_type_offs = NULL, *new_type_offs_noname = NULL;
+ __u32 *maps = NULL, *found_offs;
+ void *new_types_data = NULL, *loc_data;
+ int i, j, k, type_cnt, ret = 0, type_size;
+ __u32 data_size;
+
+ if (btf_ensure_modifiable(btf))
+ return libbpf_err(-ENOMEM);
+
+ type_cnt = btf->nr_types;
+ data_size = btf->type_offs_cap * sizeof(*new_type_offs);
+
+ maps = (__u32 *)malloc(type_cnt * sizeof(__u32));
+ if (!maps) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ new_type_offs = (__u32 *)malloc(data_size);
+ if (!new_type_offs) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ new_type_offs_noname = (__u32 *)malloc(data_size);
+ if (!new_type_offs_noname) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ new_types_data = malloc(btf->types_data_cap);
+ if (!new_types_data) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ memset(new_type_offs, 0, data_size);
+
+ for (i = 0, j = 0, k = 0; i < type_cnt; i++) {
+ const char *name;
+
+ bt = (struct btf_type *)(btf->types_data + btf->type_offs[i]);
+ name = btf__str_by_offset(btf, bt->name_off);
+ if (!name || !name[0])
+ new_type_offs_noname[k++] = btf->type_offs[i];
+ else
+ new_type_offs[j++] = btf->type_offs[i];
+ }
+
+ memmove(new_type_offs + j, new_type_offs_noname, sizeof(__u32) * k);
+
+ qsort_r(new_type_offs, j, sizeof(*new_type_offs),
+ btf_compare_type_name, btf);
+
+ for (i = 0; i < type_cnt; i++) {
+ found_offs = bsearch(&new_type_offs[i], btf->type_offs, type_cnt,
+ sizeof(__u32), btf_compare_offs);
+ if (!found_offs) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+ maps[found_offs - btf->type_offs] = i;
+ }
+
+ loc_data = new_types_data;
+ for (i = 0; i < type_cnt; i++, loc_data += type_size) {
+ bt = (struct btf_type *)(btf->types_data + new_type_offs[i]);
+ type_size = btf_type_size(bt);
+ if (type_size < 0) {
+ ret = type_size;
+ goto err_out;
+ }
+
+ memcpy(loc_data, bt, type_size);
+
+ bt = (struct btf_type *)loc_data;
+ switch (btf_kind(bt)) {
+ case BTF_KIND_PTR:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_TYPE_TAG:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ case BTF_KIND_DECL_TAG:
+ bt->type = btf_get_mapped_type(btf, maps, bt->type);
+ break;
+ case BTF_KIND_ARRAY: {
+ struct btf_array *arr = (void *)(bt + 1);
+
+ arr->type = btf_get_mapped_type(btf, maps, arr->type);
+ arr->index_type = btf_get_mapped_type(btf, maps, arr->index_type);
+ break;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *m = (void *)(bt + 1);
+ __u16 vlen = BTF_INFO_VLEN(bt->info);
+
+ for (j = 0; j < vlen; j++, m++)
+ m->type = btf_get_mapped_type(btf, maps, m->type);
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *p = (void *)(bt + 1);
+ __u16 vlen = BTF_INFO_VLEN(bt->info);
+
+ bt->type = btf_get_mapped_type(btf, maps, bt->type);
+ for (j = 0; j < vlen; j++, p++)
+ p->type = btf_get_mapped_type(btf, maps, p->type);
+
+ break;
+ }
+ case BTF_KIND_DATASEC: {
+ struct btf_var_secinfo *v = (void *)(bt + 1);
+ __u16 vlen = BTF_INFO_VLEN(bt->info);
+
+ for (j = 0; j < vlen; j++, v++)
+ v->type = btf_get_mapped_type(btf, maps, v->type);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ free(btf->type_offs);
+ btf->type_offs = new_type_offs;
+ free(btf->types_data);
+ btf->types_data = new_types_data;
+ free(maps);
+ free(new_type_offs_noname);
+ return 0;
+
+err_out:
+ if (maps)
+ free(maps);
+ if (new_type_offs)
+ free(new_type_offs);
+ if (new_type_offs_noname)
+ free(new_type_offs_noname);
+ if (new_types_data)
+ free(new_types_data);
+ return libbpf_err(ret);
+}
--
2.25.1

2024-06-09 07:23:52

by Masami Hiramatsu

[permalink] [raw]

Subject: Re: [RFC PATCH v3] bpf: Using binary search to improve the performance of btf_find_by_name_kind

Hi

On Sat, 8 Jun 2024 07:08:35 -0700
Donglin Peng <[email protected]> wrote:

> Currently, we are only using the linear search method to find the type id
> by the name, which has a time complexity of O(n). This change involves
> sorting the names of btf types in ascending order and using binary search,
> which has a time complexity of O(log(n)). This idea was inspired by the
> following patch:
>
> 60443c88f3a8 ("kallsyms: Improve the performance of kallsyms_lookup_name()").
>
> At present, this improvement is only for searching in vmlinux's and
> module's BTFs.
>
> Another change is the search direction, where we search the BTF first and
> then its base, the type id of the first matched btf_type will be returned.
>
> Here is a time-consuming result that finding 87590 type ids by their names in
> vmlinux's BTF.
>
> Before: 158426 ms
> After: 114 ms
>
> The average lookup performance has improved more than 1000x in the above scenario.

This looks great improvement! so searching function entry in ~2us?
BTW, I have some comments below, but basically it looks good to me and
it passed my ftracetest.

Tested-by: Masami Hiramatsu (Google) <[email protected]>

>
> Signed-off-by: Donglin Peng <[email protected]>
> ---
> Changes in RFC v3:
> - Sort the btf types during the build process in order to reduce memory usage
> and decrease boot time.
>
> RFC v2:
> - https://lore.kernel.org/all/[email protected]
> ---
> include/linux/btf.h | 1 +
> kernel/bpf/btf.c | 160 +++++++++++++++++++++++++++++++++---
> tools/lib/bpf/btf.c | 195 ++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 345 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/btf.h b/include/linux/btf.h
> index f9e56fd12a9f..1dc1000a7dc9 100644
> --- a/include/linux/btf.h
> +++ b/include/linux/btf.h
> @@ -214,6 +214,7 @@ bool btf_is_kernel(const struct btf *btf);
> bool btf_is_module(const struct btf *btf);
> struct module *btf_try_get_module(const struct btf *btf);
> u32 btf_nr_types(const struct btf *btf);
> +u32 btf_type_cnt(const struct btf *btf);
> bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
> const struct btf_member *m,
> u32 expected_offset, u32 expected_size);
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 821063660d9f..5b7b464204bf 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -262,6 +262,7 @@ struct btf {
> u32 data_size;
> refcount_t refcnt;
> u32 id;
> + u32 nr_types_sorted;
> struct rcu_head rcu;
> struct btf_kfunc_set_tab *kfunc_set_tab;
> struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
> @@ -542,23 +543,102 @@ u32 btf_nr_types(const struct btf *btf)
> return total;
> }
>
> -s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
> +u32 btf_type_cnt(const struct btf *btf)
> +{
> + return btf->start_id + btf->nr_types;
> +}
> +
> +static s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name,
> + int *start, int *end)
> {
> const struct btf_type *t;
> - const char *tname;
> - u32 i, total;
> + const char *name_buf;
> + int low, low_start, mid, high, high_end;
> + int ret, start_id;
> +
> + start_id = btf->base_btf ? btf->start_id : 1;
> + low_start = low = start_id;
> + high_end = high = start_id + btf->nr_types_sorted - 1;
> +
> + while (low <= high) {
> + mid = low + (high - low) / 2;
> + t = btf_type_by_id(btf, mid);
> + name_buf = btf_name_by_offset(btf, t->name_off);
> + ret = strcmp(name, name_buf);
> + if (ret > 0)
> + low = mid + 1;
> + else if (ret < 0)
> + high = mid - 1;
> + else
> + break;
> + }
>
> - total = btf_nr_types(btf);
> - for (i = 1; i < total; i++) {
> - t = btf_type_by_id(btf, i);
> - if (BTF_INFO_KIND(t->info) != kind)
> - continue;
> + if (low > high)
> + return -ESRCH;

nit: -ENOENT ?

>
> - tname = btf_name_by_offset(btf, t->name_off);
> - if (!strcmp(tname, name))
> - return i;
> + if (start) {
> + low = mid;
> + while (low > low_start) {
> + t = btf_type_by_id(btf, low-1);
> + name_buf = btf_name_by_offset(btf, t->name_off);
> + if (strcmp(name, name_buf))
> + break;
> + low--;
> + }
> + *start = low;
> }
>
> + if (end) {
> + high = mid;
> + while (high < high_end) {
> + t = btf_type_by_id(btf, high+1);
> + name_buf = btf_name_by_offset(btf, t->name_off);
> + if (strcmp(name, name_buf))
> + break;
> + high++;
> + }
> + *end = high;
> + }
> +
> + return mid;
> +}
> +
> +s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
> +{
> + const struct btf_type *t;
> + const char *tname;
> + int start, end;
> + s32 id, total;
> +
> + do {
> + if (btf->nr_types_sorted) {
> + /* binary search */
> + id = btf_find_by_name_bsearch(btf, name, &start, &end);
> + if (id > 0) {
> + while (start <= end) {
> + t = btf_type_by_id(btf, start);
> + if (BTF_INFO_KIND(t->info) == kind)
> + return start;
> + start++;
> + }
> + }
> + } else {
> + /* linear search */
> + total = btf_type_cnt(btf);
> + for (id = btf->base_btf ? btf->start_id : 1;
> + id < total; id++) {
> + t = btf_type_by_id(btf, id);
> + if (BTF_INFO_KIND(t->info) != kind)
> + continue;
> +
> + tname = btf_name_by_offset(btf, t->name_off);
> + if (!strcmp(tname, name))
> + return id;
> + }
> + }
> + btf = btf->base_btf;
> + } while (btf);
> +
> return -ENOENT;
> }
>
> @@ -5979,6 +6059,56 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty
> return kctx_type_id;
> }
>
> +static int btf_check_sort(struct btf *btf, int start_id)
> +{
> + int i, n, nr_names = 0;
> +
> + n = btf_nr_types(btf);
> + for (i = start_id; i < n; i++) {
> + const struct btf_type *t;
> + const char *name;
> +
> + t = btf_type_by_id(btf, i);
> + if (!t)
> + return -EINVAL;
> +
> + name = btf_str_by_offset(btf, t->name_off);
> + if (!str_is_empty(name))
> + nr_names++;

else {
goto out;
}
? (*)

> + }
> +
> + if (nr_names < 3)
> + goto out;

What does this `3` mean?

> +
> + for (i = 0; i < nr_names - 1; i++) {
> + const struct btf_type *t1, *t2;
> + const char *s1, *s2;
> +
> + t1 = btf_type_by_id(btf, start_id + i);
> + if (!t1)
> + return -EINVAL;
> +
> + s1 = btf_str_by_offset(btf, t1->name_off);
> + if (str_is_empty(s1))
> + goto out;

Why not continue? This case is expected, or the previous block (*)
must go `out` at that point.

> +
> + t2 = btf_type_by_id(btf, start_id + i + 1);
> + if (!t2)
> + return -EINVAL;
> +
> + s2 = btf_str_by_offset(btf, t2->name_off);
> + if (str_is_empty(s2))
> + goto out;

Ditto.

> +
> + if (strcmp(s1, s2) > 0)
> + goto out;
> + }
> +
> + btf->nr_types_sorted = nr_names;
> +out:
> + return 0;
> +}
> +
> BTF_ID_LIST(bpf_ctx_convert_btf_id)
> BTF_ID(struct, bpf_ctx_convert)
>
> @@ -6029,6 +6159,10 @@ struct btf *btf_parse_vmlinux(void)
> if (err)
> goto errout;
>
> + err = btf_check_sort(btf, 1);

Why `1`?

> + if (err)
> + goto errout;
> +
> /* btf_parse_vmlinux() runs under bpf_verifier_lock */
> bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
>
> @@ -6111,6 +6245,10 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u
> if (err)
> goto errout;
>
> + err = btf_check_sort(btf, btf_nr_types(base_btf));
> + if (err)
> + goto errout;
> +
> btf_verifier_env_free(env);
> refcount_set(&btf->refcnt, 1);
> return btf;
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 2d0840ef599a..93c1ab677bfa 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -1,6 +1,9 @@
> // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> /* Copyright (c) 2018 Facebook */
>
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> #include <byteswap.h>
> #include <endian.h>
> #include <stdio.h>
> @@ -3072,6 +3075,7 @@ static int btf_dedup_ref_types(struct btf_dedup *d);
> static int btf_dedup_resolve_fwds(struct btf_dedup *d);
> static int btf_dedup_compact_types(struct btf_dedup *d);
> static int btf_dedup_remap_types(struct btf_dedup *d);
> +static int btf_sort_type_by_name(struct btf *btf);
>
> /*
> * Deduplicate BTF types and strings.
> @@ -3270,6 +3274,11 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
> pr_debug("btf_dedup_remap_types failed:%d\n", err);
> goto done;
> }
> + err = btf_sort_type_by_name(btf);
> + if (err < 0) {
> + pr_debug("btf_sort_type_by_name failed:%d\n", err);
> + goto done;
> + }
>
> done:
> btf_dedup_free(d);
> @@ -5212,3 +5221,189 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
>
> return 0;
> }
> +
> +static int btf_compare_type_name(const void *a, const void *b, void *priv)
> +{
> + struct btf *btf = (struct btf *)priv;
> + __u32 ta = *(const __u32 *)a;
> + __u32 tb = *(const __u32 *)b;
> + struct btf_type *bta, *btb;
> + const char *na, *nb;
> +
> + bta = (struct btf_type *)(btf->types_data + ta);
> + btb = (struct btf_type *)(btf->types_data + tb);
> + na = btf__str_by_offset(btf, bta->name_off);
> + nb = btf__str_by_offset(btf, btb->name_off);
> +
> + return strcmp(na, nb);
> +}
> +
> +static int btf_compare_offs(const void *o1, const void *o2)
> +{
> + __u32 *offs1 = (__u32 *)o1;
> + __u32 *offs2 = (__u32 *)o2;
> +
> + return *offs1 - *offs2;
> +}
> +
> +static inline __u32 btf_get_mapped_type(struct btf *btf, __u32 *maps, __u32 type)
> +{
> + if (type < btf->start_id)
> + return type;
> + return maps[type - btf->start_id] + btf->start_id;
> +}
> +
> +/*
> + * Collect and move the btf_types with names to the start location, and
> + * sort them in ascending order by name, so we can use the binary search
> + * method.
> + */
> +static int btf_sort_type_by_name(struct btf *btf)
> +{
> + struct btf_type *bt;
> + __u32 *new_type_offs = NULL, *new_type_offs_noname = NULL;
> + __u32 *maps = NULL, *found_offs;
> + void *new_types_data = NULL, *loc_data;
> + int i, j, k, type_cnt, ret = 0, type_size;
> + __u32 data_size;
> +
> + if (btf_ensure_modifiable(btf))
> + return libbpf_err(-ENOMEM);
> +
> + type_cnt = btf->nr_types;
> + data_size = btf->type_offs_cap * sizeof(*new_type_offs);
> +
> + maps = (__u32 *)malloc(type_cnt * sizeof(__u32));
> + if (!maps) {
> + ret = -ENOMEM;
> + goto err_out;
> + }
> +
> + new_type_offs = (__u32 *)malloc(data_size);

nit:
new_type_offs = (__u32 *)calloc(btf->type_offs_cap, sizeof(*new_type_offs))

> + if (!new_type_offs) {
> + ret = -ENOMEM;
> + goto err_out;
> + }
> +
> + new_type_offs_noname = (__u32 *)malloc(data_size);
> + if (!new_type_offs_noname) {
> + ret = -ENOMEM;
> + goto err_out;
> + }
> +
> + new_types_data = malloc(btf->types_data_cap);
> + if (!new_types_data) {
> + ret = -ENOMEM;
> + goto err_out;
> + }
> +
> + memset(new_type_offs, 0, data_size);

Then this memset is not required.

Thank you,

--
Masami Hiramatsu (Google) <[email protected]>

2024-06-09 16:12:47

by Donglin Peng

[permalink] [raw]

Subject: Re: [RFC PATCH v3] bpf: Using binary search to improve the performance of btf_find_by_name_kind

On Sun, Jun 9, 2024 at 3:23 PM Masami Hiramatsu <[email protected]> wrote:
>
> Hi
>
> On Sat, 8 Jun 2024 07:08:35 -0700
> Donglin Peng <[email protected]> wrote:
>
> > Currently, we are only using the linear search method to find the type id
> > by the name, which has a time complexity of O(n). This change involves
> > sorting the names of btf types in ascending order and using binary search,
> > which has a time complexity of O(log(n)). This idea was inspired by the
> > following patch:
> >
> > 60443c88f3a8 ("kallsyms: Improve the performance of kallsyms_lookup_name()").
> >
> > At present, this improvement is only for searching in vmlinux's and
> > module's BTFs.
> >
> > Another change is the search direction, where we search the BTF first and
> > then its base, the type id of the first matched btf_type will be returned.
> >
> > Here is a time-consuming result that finding 87590 type ids by their names in
> > vmlinux's BTF.
> >
> > Before: 158426 ms
> > After: 114 ms
> >
> > The average lookup performance has improved more than 1000x in the above scenario.
>
> This looks great improvement! so searching function entry in ~2us?

Yes, it is the average test result on a Ubuntu VM. However, if you are
specifically
searching for a particular btf_type, it may take more time due to cache misses.

The data is obtained from the following test code:

/*
* total: the number of btf_type
* nr_names: the number of btf_type that has a name
*
*/

pr_info("Begin to test ... total: %d, nr_names: %d\n", total, nr_names);

t0 = ktime_get_ns();
for (i = 0; i < nr_names; i++) {
if (btf_find_by_name_kind(btf, tn[i].name, tn[i].kind) <= 0) {
pr_err("Find failed: name: %s, kind: %d\n",
tn[i].name, tn[i].kind);
break;
}

if ((i & 0xfff) == 0)
touch_nmi_watchdog();
}
delta = ktime_get_ns() - t0;

pr_info("Consume total: %llu ms, Per btf_type: %llu ns\n", delta /
NSEC_PER_MSEC, delta / nr_names);

Using binary search:
[ 40.381095] Begin to test ... total: 155010, nr_names: 87596
[ 40.493024] Consume total: 111 ms, Per btf_type: 1275 ns

Using linear search:
[ 56.464520] Begin to test ... total: 155003, nr_names: 87591
[ 213.344919] Consume total: 156880 ms, Per btf_type: 1791054 ns

> BTW, I have some comments below, but basically it looks good to me and
> it passed my ftracetest.

Thank you.

>
> Tested-by: Masami Hiramatsu (Google) <[email protected]>
>

I will include this in v4.

> >
> > Signed-off-by: Donglin Peng <[email protected]>
> > ---
> > Changes in RFC v3:
> > - Sort the btf types during the build process in order to reduce memory usage
> > and decrease boot time.
> >
> > RFC v2:
> > - https://lore.kernel.org/all/[email protected]
> > ---
> > include/linux/btf.h | 1 +
> > kernel/bpf/btf.c | 160 +++++++++++++++++++++++++++++++++---
> > tools/lib/bpf/btf.c | 195 ++++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 345 insertions(+), 11 deletions(-)
> >
> > diff --git a/include/linux/btf.h b/include/linux/btf.h
> > index f9e56fd12a9f..1dc1000a7dc9 100644
> > --- a/include/linux/btf.h
> > +++ b/include/linux/btf.h
> > @@ -214,6 +214,7 @@ bool btf_is_kernel(const struct btf *btf);
> > bool btf_is_module(const struct btf *btf);
> > struct module *btf_try_get_module(const struct btf *btf);
> > u32 btf_nr_types(const struct btf *btf);
> > +u32 btf_type_cnt(const struct btf *btf);
> > bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
> > const struct btf_member *m,
> > u32 expected_offset, u32 expected_size);
> > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > index 821063660d9f..5b7b464204bf 100644
> > --- a/kernel/bpf/btf.c
> > +++ b/kernel/bpf/btf.c
> > @@ -262,6 +262,7 @@ struct btf {
> > u32 data_size;
> > refcount_t refcnt;
> > u32 id;
> > + u32 nr_types_sorted;
> > struct rcu_head rcu;
> > struct btf_kfunc_set_tab *kfunc_set_tab;
> > struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
> > @@ -542,23 +543,102 @@ u32 btf_nr_types(const struct btf *btf)
> > return total;
> > }
> >
> > -s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
> > +u32 btf_type_cnt(const struct btf *btf)
> > +{
> > + return btf->start_id + btf->nr_types;
> > +}
> > +
> > +static s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name,
> > + int *start, int *end)
> > {
> > const struct btf_type *t;
> > - const char *tname;
> > - u32 i, total;
> > + const char *name_buf;
> > + int low, low_start, mid, high, high_end;
> > + int ret, start_id;
> > +
> > + start_id = btf->base_btf ? btf->start_id : 1;
> > + low_start = low = start_id;
> > + high_end = high = start_id + btf->nr_types_sorted - 1;
> > +
> > + while (low <= high) {
> > + mid = low + (high - low) / 2;
> > + t = btf_type_by_id(btf, mid);
> > + name_buf = btf_name_by_offset(btf, t->name_off);
> > + ret = strcmp(name, name_buf);
> > + if (ret > 0)
> > + low = mid + 1;
> > + else if (ret < 0)
> > + high = mid - 1;
> > + else
> > + break;
> > + }
> >
> > - total = btf_nr_types(btf);
> > - for (i = 1; i < total; i++) {
> > - t = btf_type_by_id(btf, i);
> > - if (BTF_INFO_KIND(t->info) != kind)
> > - continue;
> > + if (low > high)
> > + return -ESRCH;
>
> nit: -ENOENT ?

Good, I will update it in the v4.

>
> >
> > - tname = btf_name_by_offset(btf, t->name_off);
> > - if (!strcmp(tname, name))
> > - return i;
> > + if (start) {
> > + low = mid;
> > + while (low > low_start) {
> > + t = btf_type_by_id(btf, low-1);
> > + name_buf = btf_name_by_offset(btf, t->name_off);
> > + if (strcmp(name, name_buf))
> > + break;
> > + low--;
> > + }
> > + *start = low;
> > }
> >
> > + if (end) {
> > + high = mid;
> > + while (high < high_end) {
> > + t = btf_type_by_id(btf, high+1);
> > + name_buf = btf_name_by_offset(btf, t->name_off);
> > + if (strcmp(name, name_buf))
> > + break;
> > + high++;
> > + }
> > + *end = high;
> > + }
> > +
> > + return mid;
> > +}
> > +
> > +s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
> > +{
> > + const struct btf_type *t;
> > + const char *tname;
> > + int start, end;
> > + s32 id, total;
> > +
> > + do {
> > + if (btf->nr_types_sorted) {
> > + /* binary search */
> > + id = btf_find_by_name_bsearch(btf, name, &start, &end);
> > + if (id > 0) {
> > + while (start <= end) {
> > + t = btf_type_by_id(btf, start);
> > + if (BTF_INFO_KIND(t->info) == kind)
> > + return start;
> > + start++;
> > + }
> > + }
> > + } else {
> > + /* linear search */
> > + total = btf_type_cnt(btf);
> > + for (id = btf->base_btf ? btf->start_id : 1;
> > + id < total; id++) {
> > + t = btf_type_by_id(btf, id);
> > + if (BTF_INFO_KIND(t->info) != kind)
> > + continue;
> > +
> > + tname = btf_name_by_offset(btf, t->name_off);
> > + if (!strcmp(tname, name))
> > + return id;
> > + }
> > + }
> > + btf = btf->base_btf;
> > + } while (btf);
> > +
> > return -ENOENT;
> > }
> >
> > @@ -5979,6 +6059,56 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty
> > return kctx_type_id;
> > }
> >
> > +static int btf_check_sort(struct btf *btf, int start_id)
> > +{
> > + int i, n, nr_names = 0;
> > +
> > + n = btf_nr_types(btf);
> > + for (i = start_id; i < n; i++) {
> > + const struct btf_type *t;
> > + const char *name;
> > +
> > + t = btf_type_by_id(btf, i);
> > + if (!t)
> > + return -EINVAL;
> > +
> > + name = btf_str_by_offset(btf, t->name_off);
> > + if (!str_is_empty(name))
> > + nr_names++;
>
> else {
> goto out;
> }
> ? (*)
>

No need for that. The purpose of the for loop here is to count the
number of btf_types
with names. If the BTF file is sorted, the btf_types with names will
be placed at the
beginning, while the btf_types without names will be appended at the end.

> > + }
> > +
> > + if (nr_names < 3)
> > + goto out;
>
> What does this `3` mean?

It is just my own opinion. The binary search method does not offer any
superiority if there
are only 1 or 2 btf_types with names.

>
> > +
> > + for (i = 0; i < nr_names - 1; i++) {
> > + const struct btf_type *t1, *t2;
> > + const char *s1, *s2;
> > +
> > + t1 = btf_type_by_id(btf, start_id + i);
> > + if (!t1)
> > + return -EINVAL;
> > +
> > + s1 = btf_str_by_offset(btf, t1->name_off);
> > + if (str_is_empty(s1))
> > + goto out;
>
> Why not continue? This case is expected, or the previous block (*)
> must go `out` at that point.

The purpose of the for loop here is to verify whether the first
nr_name btf_types are sorted
or not. If the BTF file is indeed sorted, btf_types with names will be
placed at the beginning,
and the number of btf_types with names is equal to nr_names. However,
if a btf_type without a
name is found within the first nr_name btf_types, it indicates that
the BTF file is not sorted.

>
> > +
> > + t2 = btf_type_by_id(btf, start_id + i + 1);
> > + if (!t2)
> > + return -EINVAL;
> > +
> > + s2 = btf_str_by_offset(btf, t2->name_off);
> > + if (str_is_empty(s2))
> > + goto out;
>
> Ditto.
>

Ditto.

> > +
> > + if (strcmp(s1, s2) > 0)
> > + goto out;
> > + }
> > +
> > + btf->nr_types_sorted = nr_names;
> > +out:
> > + return 0;
> > +}
> > +
> > BTF_ID_LIST(bpf_ctx_convert_btf_id)
> > BTF_ID(struct, bpf_ctx_convert)
> >
> > @@ -6029,6 +6159,10 @@ struct btf *btf_parse_vmlinux(void)
> > if (err)
> > goto errout;
> >
> > + err = btf_check_sort(btf, 1);
>
> Why `1`?

The start ID for the sorted btf_types in vmlinux's BTF is 1, as the
btf_void is placed before
the sorted types.

>
> > + if (err)
> > + goto errout;
> > +
> > /* btf_parse_vmlinux() runs under bpf_verifier_lock */
> > bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
> >
> > @@ -6111,6 +6245,10 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u
> > if (err)
> > goto errout;
> >
> > + err = btf_check_sort(btf, btf_nr_types(base_btf));
> > + if (err)
> > + goto errout;
> > +
> > btf_verifier_env_free(env);
> > refcount_set(&btf->refcnt, 1);
> > return btf;
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index 2d0840ef599a..93c1ab677bfa 100644
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
> > @@ -1,6 +1,9 @@
> > // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> > /* Copyright (c) 2018 Facebook */
> >
> > +#ifndef _GNU_SOURCE
> > +#define _GNU_SOURCE
> > +#endif
> > #include <byteswap.h>
> > #include <endian.h>
> > #include <stdio.h>
> > @@ -3072,6 +3075,7 @@ static int btf_dedup_ref_types(struct btf_dedup *d);
> > static int btf_dedup_resolve_fwds(struct btf_dedup *d);
> > static int btf_dedup_compact_types(struct btf_dedup *d);
> > static int btf_dedup_remap_types(struct btf_dedup *d);
> > +static int btf_sort_type_by_name(struct btf *btf);
> >
> > /*
> > * Deduplicate BTF types and strings.
> > @@ -3270,6 +3274,11 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
> > pr_debug("btf_dedup_remap_types failed:%d\n", err);
> > goto done;
> > }
> > + err = btf_sort_type_by_name(btf);
> > + if (err < 0) {
> > + pr_debug("btf_sort_type_by_name failed:%d\n", err);
> > + goto done;
> > + }
> >
> > done:
> > btf_dedup_free(d);
> > @@ -5212,3 +5221,189 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
> >
> > return 0;
> > }
> > +
> > +static int btf_compare_type_name(const void *a, const void *b, void *priv)
> > +{
> > + struct btf *btf = (struct btf *)priv;
> > + __u32 ta = *(const __u32 *)a;
> > + __u32 tb = *(const __u32 *)b;
> > + struct btf_type *bta, *btb;
> > + const char *na, *nb;
> > +
> > + bta = (struct btf_type *)(btf->types_data + ta);
> > + btb = (struct btf_type *)(btf->types_data + tb);
> > + na = btf__str_by_offset(btf, bta->name_off);
> > + nb = btf__str_by_offset(btf, btb->name_off);
> > +
> > + return strcmp(na, nb);
> > +}
> > +
> > +static int btf_compare_offs(const void *o1, const void *o2)
> > +{
> > + __u32 *offs1 = (__u32 *)o1;
> > + __u32 *offs2 = (__u32 *)o2;
> > +
> > + return *offs1 - *offs2;
> > +}
> > +
> > +static inline __u32 btf_get_mapped_type(struct btf *btf, __u32 *maps, __u32 type)
> > +{
> > + if (type < btf->start_id)
> > + return type;
> > + return maps[type - btf->start_id] + btf->start_id;
> > +}
> > +
> > +/*
> > + * Collect and move the btf_types with names to the start location, and
> > + * sort them in ascending order by name, so we can use the binary search
> > + * method.
> > + */
> > +static int btf_sort_type_by_name(struct btf *btf)
> > +{
> > + struct btf_type *bt;
> > + __u32 *new_type_offs = NULL, *new_type_offs_noname = NULL;
> > + __u32 *maps = NULL, *found_offs;
> > + void *new_types_data = NULL, *loc_data;
> > + int i, j, k, type_cnt, ret = 0, type_size;
> > + __u32 data_size;
> > +
> > + if (btf_ensure_modifiable(btf))
> > + return libbpf_err(-ENOMEM);
> > +
> > + type_cnt = btf->nr_types;
> > + data_size = btf->type_offs_cap * sizeof(*new_type_offs);
> > +
> > + maps = (__u32 *)malloc(type_cnt * sizeof(__u32));
> > + if (!maps) {
> > + ret = -ENOMEM;
> > + goto err_out;
> > + }
> > +
> > + new_type_offs = (__u32 *)malloc(data_size);
>
> nit:
> new_type_offs = (__u32 *)calloc(btf->type_offs_cap, sizeof(*new_type_offs))

Okay, I will update it in v4.

>
> > + if (!new_type_offs) {
> > + ret = -ENOMEM;
> > + goto err_out;
> > + }
> > +
> > + new_type_offs_noname = (__u32 *)malloc(data_size);
> > + if (!new_type_offs_noname) {
> > + ret = -ENOMEM;
> > + goto err_out;
> > + }
> > +
> > + new_types_data = malloc(btf->types_data_cap);
> > + if (!new_types_data) {
> > + ret = -ENOMEM;
> > + goto err_out;
> > + }
> > +
> > + memset(new_type_offs, 0, data_size);
>
> Then this memset is not required.

Good, I will update it in v4.

>
> Thank you,
>
>
> --
> Masami Hiramatsu (Google) <[email protected]>

2024-06-11 10:13:57

by Eduard Zingerman

[permalink] [raw]

Subject: Re: [RFC PATCH v3] bpf: Using binary search to improve the performance of btf_find_by_name_kind

On Sat, 2024-06-08 at 07:08 -0700, Donglin Peng wrote:

[...]

> Changes in RFC v3:
> - Sort the btf types during the build process in order to reduce memory usage
> and decrease boot time.
>
> RFC v2:
> - https://lore.kernel.org/all/[email protected]
> ---
> include/linux/btf.h | 1 +
> kernel/bpf/btf.c | 160 +++++++++++++++++++++++++++++++++---

I think that kernel part is in a good shape,
please split it as a separate commit.

> tools/lib/bpf/btf.c | 195 ++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 345 insertions(+), 11 deletions(-)

[...]

> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 2d0840ef599a..93c1ab677bfa 100644

I'm not sure that libbpf is the best place to put this functionality,
as there might be different kinds of orderings
(e.g. see a fresh commit to bpftool to output stable vmlinux.h:
94133cf24bb3 "bpftool: Introduce btf c dump sorting").

I'm curious what Andrii, Alan and Arnaldo think on libbpf vs pahole
for this feature.

Also, I have a selftests build failure with this patch-set
(and I suspect that a bunch of dedup test cases would need an update):

$ pwd
/home/eddy/work/bpf-next/tools/testing/selftests/bpf
$ make -j14 test_progs
...

GEN-SKEL [test_progs] access_map_in_map.skel.h
Binary files /home/eddy/work/bpf-next/tools/testing/selftests/bpf/access_map_in_map.bpf.linked2.o and /home/eddy/work/bpf-next/tools/testing/selftests/bpf/access_map_in_map.bpf.linked3.o differ
make: *** [Makefile:658: /home/eddy/work/bpf-next/tools/testing/selftests/bpf/access_map_in_map.skel.h] Error 1
make: *** Waiting for unfinished jobs....

If this change remains in libbpf, I think it would be great to update
btf_find_by_name_kind() to work the same way as kernel one.

> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c

[...]

> +static int btf_sort_type_by_name(struct btf *btf)
> +{
> + struct btf_type *bt;
> + __u32 *new_type_offs = NULL, *new_type_offs_noname = NULL;
> + __u32 *maps = NULL, *found_offs;
> + void *new_types_data = NULL, *loc_data;
> + int i, j, k, type_cnt, ret = 0, type_size;
> + __u32 data_size;
> +
> + if (btf_ensure_modifiable(btf))
> + return libbpf_err(-ENOMEM);
> +
> + type_cnt = btf->nr_types;
> + data_size = btf->type_offs_cap * sizeof(*new_type_offs);
> +
> + maps = (__u32 *)malloc(type_cnt * sizeof(__u32));
> + if (!maps) {
> + ret = -ENOMEM;
> + goto err_out;
> + }
> +
> + new_type_offs = (__u32 *)malloc(data_size);
> + if (!new_type_offs) {
> + ret = -ENOMEM;
> + goto err_out;
> + }
> +
> + new_type_offs_noname = (__u32 *)malloc(data_size);
> + if (!new_type_offs_noname) {
> + ret = -ENOMEM;
> + goto err_out;
> + }

What is the point of separating offsets in new_type_offs vs
new_type_offs_noname? It should be possible to use a single offsets
array and have a comparison function that puts all named types before
unnamed.

> +
> + new_types_data = malloc(btf->types_data_cap);
> + if (!new_types_data) {
> + ret = -ENOMEM;
> + goto err_out;
> + }
> +
> + memset(new_type_offs, 0, data_size);
> +
> + for (i = 0, j = 0, k = 0; i < type_cnt; i++) {
> + const char *name;
> +
> + bt = (struct btf_type *)(btf->types_data + btf->type_offs[i]);
> + name = btf__str_by_offset(btf, bt->name_off);
> + if (!name || !name[0])
> + new_type_offs_noname[k++] = btf->type_offs[i];
> + else
> + new_type_offs[j++] = btf->type_offs[i];
> + }
> +
> + memmove(new_type_offs + j, new_type_offs_noname, sizeof(__u32) * k);
> +
> + qsort_r(new_type_offs, j, sizeof(*new_type_offs),
> + btf_compare_type_name, btf);
> +
> + for (i = 0; i < type_cnt; i++) {
> + found_offs = bsearch(&new_type_offs[i], btf->type_offs, type_cnt,
> + sizeof(__u32), btf_compare_offs);
> + if (!found_offs) {
> + ret = -EINVAL;
> + goto err_out;
> + }
> + maps[found_offs - btf->type_offs] = i;
> + }
> +
> + loc_data = new_types_data;
> + for (i = 0; i < type_cnt; i++, loc_data += type_size) {
> + bt = (struct btf_type *)(btf->types_data + new_type_offs[i]);
> + type_size = btf_type_size(bt);
> + if (type_size < 0) {
> + ret = type_size;
> + goto err_out;
> + }
> +
> + memcpy(loc_data, bt, type_size);
> +
> + bt = (struct btf_type *)loc_data;
> + switch (btf_kind(bt)) {

Please take a look at btf_dedup_remap_types(): it uses newly added
iterator interface to enumerate all ID references in the type.
It could be used here to avoid enumerating every BTF kind.
Also, the d->hypot_map could be used instead of `maps`.
And if so, I think that it should be possible to put this pass before
btf_dedup_remap_types() in order for it to do the remapping.

Alternatively, it might make sense to merge this pass with
btf_dedup_compact_types() in order to minimize number of operations,
e.g. as in my crude attempt:
https://github.com/eddyz87/bpf/tree/binsort-btf-dedup
(fails with similar selftests issue).

> + case BTF_KIND_PTR:
> + case BTF_KIND_CONST:
> + case BTF_KIND_VOLATILE:
> + case BTF_KIND_RESTRICT:
> + case BTF_KIND_TYPEDEF:
> + case BTF_KIND_TYPE_TAG:
> + case BTF_KIND_FUNC:
> + case BTF_KIND_VAR:
> + case BTF_KIND_DECL_TAG:
> + bt->type = btf_get_mapped_type(btf, maps, bt->type);
> + break;

[...]

2024-06-15 11:49:45

by Donglin Peng

[permalink] [raw]

Subject: Re: [RFC PATCH v3] bpf: Using binary search to improve the performance of btf_find_by_name_kind

On Tue, Jun 11, 2024 at 6:13 PM Eduard Zingerman <[email protected]> wrote:
>
> On Sat, 2024-06-08 at 07:08 -0700, Donglin Peng wrote:
>
> [...]
>
> > Changes in RFC v3:
> > - Sort the btf types during the build process in order to reduce memory usage
> > and decrease boot time.
> >
> > RFC v2:
> > - https://lore.kernel.org/all/[email protected]
> > ---
> > include/linux/btf.h | 1 +
> > kernel/bpf/btf.c | 160 +++++++++++++++++++++++++++++++++---
>
> I think that kernel part is in a good shape,
> please split it as a separate commit.

Okay, thanks.

>
> > tools/lib/bpf/btf.c | 195 ++++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 345 insertions(+), 11 deletions(-)
>
> [...]
>
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index 2d0840ef599a..93c1ab677bfa 100644
>
> I'm not sure that libbpf is the best place to put this functionality,
> as there might be different kinds of orderings
> (e.g. see a fresh commit to bpftool to output stable vmlinux.h:
> 94133cf24bb3 "bpftool: Introduce btf c dump sorting").

Thanks, I think it would be better to put it into the libbpf. However, I would
also like to hear the opinions of others.

>
> I'm curious what Andrii, Alan and Arnaldo think on libbpf vs pahole
> for this feature.
>
> Also, I have a selftests build failure with this patch-set
> (and I suspect that a bunch of dedup test cases would need an update):

I appologize for the bug in my patch that caused the issue. I will fix it.

>
> $ pwd
> /home/eddy/work/bpf-next/tools/testing/selftests/bpf
> $ make -j14 test_progs
> ...
>
> GEN-SKEL [test_progs] access_map_in_map.skel.h
> Binary files /home/eddy/work/bpf-next/tools/testing/selftests/bpf/access_map_in_map.bpf.linked2.o and /home/eddy/work/bpf-next/tools/testing/selftests/bpf/access_map_in_map.bpf.linked3.o differ
> make: *** [Makefile:658: /home/eddy/work/bpf-next/tools/testing/selftests/bpf/access_map_in_map.skel.h] Error 1
> make: *** Waiting for unfinished jobs....

Sorry, I neglected to perform an ID remap for the btf_types in the BTF.ext
section. I will fix it.

>
> If this change remains in libbpf, I think it would be great to update
> btf_find_by_name_kind() to work the same way as kernel one.

Sounds good, we might do it later.

>
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
>
> [...]
>
> > +static int btf_sort_type_by_name(struct btf *btf)
> > +{
> > + struct btf_type *bt;
> > + __u32 *new_type_offs = NULL, *new_type_offs_noname = NULL;
> > + __u32 *maps = NULL, *found_offs;
> > + void *new_types_data = NULL, *loc_data;
> > + int i, j, k, type_cnt, ret = 0, type_size;
> > + __u32 data_size;
> > +
> > + if (btf_ensure_modifiable(btf))
> > + return libbpf_err(-ENOMEM);
> > +
> > + type_cnt = btf->nr_types;
> > + data_size = btf->type_offs_cap * sizeof(*new_type_offs);
> > +
> > + maps = (__u32 *)malloc(type_cnt * sizeof(__u32));
> > + if (!maps) {
> > + ret = -ENOMEM;
> > + goto err_out;
> > + }
> > +
> > + new_type_offs = (__u32 *)malloc(data_size);
> > + if (!new_type_offs) {
> > + ret = -ENOMEM;
> > + goto err_out;
> > + }
> > +
> > + new_type_offs_noname = (__u32 *)malloc(data_size);
> > + if (!new_type_offs_noname) {
> > + ret = -ENOMEM;
> > + goto err_out;
> > + }
>
> What is the point of separating offsets in new_type_offs vs
> new_type_offs_noname? It should be possible to use a single offsets
> array and have a comparison function that puts all named types before
> unnamed.

Great, you are right.

>
> > +
> > + new_types_data = malloc(btf->types_data_cap);
> > + if (!new_types_data) {
> > + ret = -ENOMEM;
> > + goto err_out;
> > + }
> > +
> > + memset(new_type_offs, 0, data_size);
> > +
> > + for (i = 0, j = 0, k = 0; i < type_cnt; i++) {
> > + const char *name;
> > +
> > + bt = (struct btf_type *)(btf->types_data + btf->type_offs[i]);
> > + name = btf__str_by_offset(btf, bt->name_off);
> > + if (!name || !name[0])
> > + new_type_offs_noname[k++] = btf->type_offs[i];
> > + else
> > + new_type_offs[j++] = btf->type_offs[i];
> > + }
> > +
> > + memmove(new_type_offs + j, new_type_offs_noname, sizeof(__u32) * k);
> > +
> > + qsort_r(new_type_offs, j, sizeof(*new_type_offs),
> > + btf_compare_type_name, btf);
> > +
> > + for (i = 0; i < type_cnt; i++) {
> > + found_offs = bsearch(&new_type_offs[i], btf->type_offs, type_cnt,
> > + sizeof(__u32), btf_compare_offs);
> > + if (!found_offs) {
> > + ret = -EINVAL;
> > + goto err_out;
> > + }
> > + maps[found_offs - btf->type_offs] = i;
> > + }
> > +
> > + loc_data = new_types_data;
> > + for (i = 0; i < type_cnt; i++, loc_data += type_size) {
> > + bt = (struct btf_type *)(btf->types_data + new_type_offs[i]);
> > + type_size = btf_type_size(bt);
> > + if (type_size < 0) {
> > + ret = type_size;
> > + goto err_out;
> > + }
> > +
> > + memcpy(loc_data, bt, type_size);
> > +
> > + bt = (struct btf_type *)loc_data;
> > + switch (btf_kind(bt)) {
>
> Please take a look at btf_dedup_remap_types(): it uses newly added
> iterator interface to enumerate all ID references in the type.
> It could be used here to avoid enumerating every BTF kind.
> Also, the d->hypot_map could be used instead of `maps`.
> And if so, I think that it should be possible to put this pass before
> btf_dedup_remap_types() in order for it to do the remapping.

Thank you. I will revise the code.

>
> Alternatively, it might make sense to merge this pass with
> btf_dedup_compact_types() in order to minimize number of operations,
> e.g. as in my crude attempt:
> https://github.com/eddyz87/bpf/tree/binsort-btf-dedup

Thank you. I would refer to your patch.

> (fails with similar selftests issue).

In addition to the bug in my patch, I have also identified a bug in
linker_fixup_btf
in the libbpf. After resolving the issue, the selftests successfully
passed, and I will
create a new patch to address the bug.

>
> > + case BTF_KIND_PTR:
> > + case BTF_KIND_CONST:
> > + case BTF_KIND_VOLATILE:
> > + case BTF_KIND_RESTRICT:
> > + case BTF_KIND_TYPEDEF:
> > + case BTF_KIND_TYPE_TAG:
> > + case BTF_KIND_FUNC:
> > + case BTF_KIND_VAR:
> > + case BTF_KIND_DECL_TAG:
> > + bt->type = btf_get_mapped_type(btf, maps, bt->type);
> > + break;
>
> [...]

2024-06-15 15:00:44

by Donglin Peng

[permalink] [raw]

Subject: Re: [RFC PATCH v3] bpf: Using binary search to improve the performance of btf_find_by_name_kind

On Sat, Jun 15, 2024 at 7:49 PM Donglin Peng <[email protected]> wrote:
>
> On Tue, Jun 11, 2024 at 6:13 PM Eduard Zingerman <[email protected]> wrote:
> >
> > On Sat, 2024-06-08 at 07:08 -0700, Donglin Peng wrote:
> >
> > [...]
> >
> > > Changes in RFC v3:
> > > - Sort the btf types during the build process in order to reduce memory usage
> > > and decrease boot time.
> > >
> > > RFC v2:
> > > - https://lore.kernel.org/all/[email protected]
> > > ---
> > > include/linux/btf.h | 1 +
> > > kernel/bpf/btf.c | 160 +++++++++++++++++++++++++++++++++---
> >
> > I think that kernel part is in a good shape,
> > please split it as a separate commit.
>
> Okay, thanks.
>
> >
> > > tools/lib/bpf/btf.c | 195 ++++++++++++++++++++++++++++++++++++++++++++
> > > 3 files changed, 345 insertions(+), 11 deletions(-)
> >
> > [...]
> >
> > > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > > index 2d0840ef599a..93c1ab677bfa 100644
> >
> > I'm not sure that libbpf is the best place to put this functionality,
> > as there might be different kinds of orderings
> > (e.g. see a fresh commit to bpftool to output stable vmlinux.h:
> > 94133cf24bb3 "bpftool: Introduce btf c dump sorting").
>
> Thanks, I think it would be better to put it into the libbpf. However, I would
> also like to hear the opinions of others.
>
> >
> > I'm curious what Andrii, Alan and Arnaldo think on libbpf vs pahole
> > for this feature.
> >
> > Also, I have a selftests build failure with this patch-set
> > (and I suspect that a bunch of dedup test cases would need an update):

Yes，many test cases need to be updated as the BTF layout is modified
unconditionally.

>
> I appologize for the bug in my patch that caused the issue. I will fix it.
>
> >
> > $ pwd
> > /home/eddy/work/bpf-next/tools/testing/selftests/bpf
> > $ make -j14 test_progs
> > ...
> >
> > GEN-SKEL [test_progs] access_map_in_map.skel.h
> > Binary files /home/eddy/work/bpf-next/tools/testing/selftests/bpf/access_map_in_map.bpf.linked2.o and /home/eddy/work/bpf-next/tools/testing/selftests/bpf/access_map_in_map.bpf.linked3.o differ
> > make: *** [Makefile:658: /home/eddy/work/bpf-next/tools/testing/selftests/bpf/access_map_in_map.skel.h] Error 1
> > make: *** Waiting for unfinished jobs....
>
> Sorry, I neglected to perform an ID remap for the btf_types in the BTF.ext
> section. I will fix it.
>
> >
> > If this change remains in libbpf, I think it would be great to update
> > btf_find_by_name_kind() to work the same way as kernel one.
>
> Sounds good, we might do it later.
>
> >
> > > --- a/tools/lib/bpf/btf.c
> > > +++ b/tools/lib/bpf/btf.c
> >
> > [...]
> >
> > > +static int btf_sort_type_by_name(struct btf *btf)
> > > +{
> > > + struct btf_type *bt;
> > > + __u32 *new_type_offs = NULL, *new_type_offs_noname = NULL;
> > > + __u32 *maps = NULL, *found_offs;
> > > + void *new_types_data = NULL, *loc_data;
> > > + int i, j, k, type_cnt, ret = 0, type_size;
> > > + __u32 data_size;
> > > +
> > > + if (btf_ensure_modifiable(btf))
> > > + return libbpf_err(-ENOMEM);
> > > +
> > > + type_cnt = btf->nr_types;
> > > + data_size = btf->type_offs_cap * sizeof(*new_type_offs);
> > > +
> > > + maps = (__u32 *)malloc(type_cnt * sizeof(__u32));
> > > + if (!maps) {
> > > + ret = -ENOMEM;
> > > + goto err_out;
> > > + }
> > > +
> > > + new_type_offs = (__u32 *)malloc(data_size);
> > > + if (!new_type_offs) {
> > > + ret = -ENOMEM;
> > > + goto err_out;
> > > + }
> > > +
> > > + new_type_offs_noname = (__u32 *)malloc(data_size);
> > > + if (!new_type_offs_noname) {
> > > + ret = -ENOMEM;
> > > + goto err_out;
> > > + }
> >
> > What is the point of separating offsets in new_type_offs vs
> > new_type_offs_noname? It should be possible to use a single offsets
> > array and have a comparison function that puts all named types before
> > unnamed.
>
> Great, you are right.
>
> >
> > > +
> > > + new_types_data = malloc(btf->types_data_cap);
> > > + if (!new_types_data) {
> > > + ret = -ENOMEM;
> > > + goto err_out;
> > > + }
> > > +
> > > + memset(new_type_offs, 0, data_size);
> > > +
> > > + for (i = 0, j = 0, k = 0; i < type_cnt; i++) {
> > > + const char *name;
> > > +
> > > + bt = (struct btf_type *)(btf->types_data + btf->type_offs[i]);
> > > + name = btf__str_by_offset(btf, bt->name_off);
> > > + if (!name || !name[0])
> > > + new_type_offs_noname[k++] = btf->type_offs[i];
> > > + else
> > > + new_type_offs[j++] = btf->type_offs[i];
> > > + }
> > > +
> > > + memmove(new_type_offs + j, new_type_offs_noname, sizeof(__u32) * k);
> > > +
> > > + qsort_r(new_type_offs, j, sizeof(*new_type_offs),
> > > + btf_compare_type_name, btf);
> > > +
> > > + for (i = 0; i < type_cnt; i++) {
> > > + found_offs = bsearch(&new_type_offs[i], btf->type_offs, type_cnt,
> > > + sizeof(__u32), btf_compare_offs);
> > > + if (!found_offs) {
> > > + ret = -EINVAL;
> > > + goto err_out;
> > > + }
> > > + maps[found_offs - btf->type_offs] = i;
> > > + }
> > > +
> > > + loc_data = new_types_data;
> > > + for (i = 0; i < type_cnt; i++, loc_data += type_size) {
> > > + bt = (struct btf_type *)(btf->types_data + new_type_offs[i]);
> > > + type_size = btf_type_size(bt);
> > > + if (type_size < 0) {
> > > + ret = type_size;
> > > + goto err_out;
> > > + }
> > > +
> > > + memcpy(loc_data, bt, type_size);
> > > +
> > > + bt = (struct btf_type *)loc_data;
> > > + switch (btf_kind(bt)) {
> >
> > Please take a look at btf_dedup_remap_types(): it uses newly added
> > iterator interface to enumerate all ID references in the type.
> > It could be used here to avoid enumerating every BTF kind.
> > Also, the d->hypot_map could be used instead of `maps`.
> > And if so, I think that it should be possible to put this pass before
> > btf_dedup_remap_types() in order for it to do the remapping.
>
> Thank you. I will revise the code.
>
> >
> > Alternatively, it might make sense to merge this pass with
> > btf_dedup_compact_types() in order to minimize number of operations,
> > e.g. as in my crude attempt:
> > https://github.com/eddyz87/bpf/tree/binsort-btf-dedup

Could you please provide me with the patch?

>
> Thank you. I would refer to your patch.
>
> > (fails with similar selftests issue).
>
> In addition to the bug in my patch, I have also identified a bug in
> linker_fixup_btf
> in the libbpf. After resolving the issue, the selftests successfully
> passed, and I will
> create a new patch to address the bug.

After fixing the bug, the "make test_progs" command passes
successfully. However,
the dedup test cases are still failing.

>
> >
> > > + case BTF_KIND_PTR:
> > > + case BTF_KIND_CONST:
> > > + case BTF_KIND_VOLATILE:
> > > + case BTF_KIND_RESTRICT:
> > > + case BTF_KIND_TYPEDEF:
> > > + case BTF_KIND_TYPE_TAG:
> > > + case BTF_KIND_FUNC:
> > > + case BTF_KIND_VAR:
> > > + case BTF_KIND_DECL_TAG:
> > > + bt->type = btf_get_mapped_type(btf, maps, bt->type);
> > > + break;
> >
> > [...]