LinuxLists.cc - [RFC PATCH] bpftool btf: Add prefix option to dump C

2020-07-22 05:43:53

Subject: [RFC PATCH] bpftool btf: Add prefix option to dump C

When bpftool dumps types and enum members into a header file for
inclusion the names match those in the original source. If the same
header file needs to be included in the original source and the bpf
program, the names of structs, unions, typedefs and enum members will
have naming collisions.

To avoid these collisions an approach is to redeclare the header file
types and enum members, which leads to duplication and possible
inconsistencies. Another approach is to use preprocessor macros
to rename conflicting names, but this can be cumbersome if there are
many conflicts.

This patch adds a prefix option for the dumped names. Use of this option
can avoid name conflicts and compile time errors.

Signed-off-by: Ian Rogers <[email protected]>
---
.../bpf/bpftool/Documentation/bpftool-btf.rst | 7 ++++++-
tools/bpf/bpftool/btf.c | 18 ++++++++++++++---
tools/lib/bpf/btf.h | 1 +
tools/lib/bpf/btf_dump.c | 20 +++++++++++++------
4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 896f4c6c2870..85d66bc69634 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -20,7 +20,7 @@ BTF COMMANDS
=============

| **bpftool** **btf** { **show** | **list** } [**id** *BTF_ID*]
-| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*]
+| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] [**prefix** *PREFIX*]
| **bpftool** **btf help**
|
| *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* }
@@ -66,6 +66,11 @@ DESCRIPTION
output format. Raw (**raw**) or C-syntax (**c**) output
formats are supported.

+ With the C-syntax format the **prefix** option can
+ be used to prefix all identifiers and enum members
+ with *PREFIX*. This is useful to avoid naming
+ collisions.
+
**bpftool btf help**
Print short help message.

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index fc9bc7a23db6..6a428636fa6f 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -379,12 +379,15 @@ static void __printf(2, 0) btf_dump_printf(void *ctx,
}

static int dump_btf_c(const struct btf *btf,
- __u32 *root_type_ids, int root_type_cnt)
+ __u32 *root_type_ids, int root_type_cnt, const char *name_prefix)
{
struct btf_dump *d;
int err = 0, i;
+ struct btf_dump_opts opts = {
+ .name_prefix = name_prefix,
+ };

- d = btf_dump__new(btf, NULL, NULL, btf_dump_printf);
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
if (IS_ERR(d))
return PTR_ERR(d);

@@ -478,6 +481,7 @@ static int do_dump(int argc, char **argv)
bool dump_c = false;
__u32 btf_id = -1;
const char *src;
+ const char *c_prefix = NULL;
int fd = -1;
int err;

@@ -583,6 +587,14 @@ static int do_dump(int argc, char **argv)
goto done;
}
NEXT_ARG();
+ } else if (is_prefix(*argv, "prefix")) {
+ NEXT_ARG();
+ if (argc < 1 || !*argv) {
+ p_err("expecting value for 'prefix' option\n");
+ goto done;
+ }
+ c_prefix = *argv;
+ NEXT_ARG();
} else {
p_err("unrecognized option: '%s'", *argv);
goto done;
@@ -608,7 +620,7 @@ static int do_dump(int argc, char **argv)
err = -ENOTSUP;
goto done;
}
- err = dump_btf_c(btf, root_type_ids, root_type_cnt);
+ err = dump_btf_c(btf, root_type_ids, root_type_cnt, c_prefix);
} else {
err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 491c7b41ffdc..fea4baab00bd 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -117,6 +117,7 @@ struct btf_dump;

struct btf_dump_opts {
void *ctx;
+ const char *name_prefix;
};

typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index e1c344504cae..baf2b4d82e1e 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -138,6 +138,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
d->btf_ext = btf_ext;
d->printf_fn = printf_fn;
d->opts.ctx = opts ? opts->ctx : NULL;
+ d->opts.name_prefix = opts ? opts->name_prefix : NULL;

d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
if (IS_ERR(d->type_names)) {
@@ -903,6 +904,7 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
const struct btf_enum *v = btf_enum(t);
__u16 vlen = btf_vlen(t);
const char *name;
+ const char *name_prefix = d->opts.name_prefix;
size_t dup_cnt;
int i;

@@ -912,17 +914,19 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,

if (vlen) {
btf_dump_printf(d, " {");
+ if (!name_prefix)
+ name_prefix = "";
for (i = 0; i < vlen; i++, v++) {
name = btf_name_of(d, v->name_off);
/* enumerators share namespace with typedef idents */
dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
if (dup_cnt > 1) {
- btf_dump_printf(d, "\n%s%s___%zu = %u,",
- pfx(lvl + 1), name, dup_cnt,
+ btf_dump_printf(d, "\n%s%s%s___%zu = %u,",
+ pfx(lvl + 1), name_prefix, name, dup_cnt,
(__u32)v->val);
} else {
- btf_dump_printf(d, "\n%s%s = %u,",
- pfx(lvl + 1), name,
+ btf_dump_printf(d, "\n%s%s%s = %u,",
+ pfx(lvl + 1), name_prefix, name,
(__u32)v->val);
}
}
@@ -1360,6 +1364,7 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
const struct btf_type *t = btf__type_by_id(d->btf, id);
const char *orig_name = btf_name_of(d, t->name_off);
const char **cached_name = &d->cached_names[id];
+ const char *prefix = d->opts.name_prefix;
size_t dup_cnt;

if (t->name_off == 0)
@@ -1369,11 +1374,14 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
return *cached_name ? *cached_name : orig_name;

dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
- if (dup_cnt > 1) {
+ if (dup_cnt > 1 || prefix) {
const size_t max_len = 256;
char new_name[max_len];

- snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt);
+ if (dup_cnt > 1)
+ snprintf(new_name, max_len, "%s%s___%zu", prefix, orig_name, dup_cnt);
+ else
+ snprintf(new_name, max_len, "%s%s", prefix, orig_name);
*cached_name = strdup(new_name);
}

--
2.28.0.rc0.105.gf9edc3c819-goog

2020-07-22 06:58:32

by Andrii Nakryiko

[permalink] [raw]

Subject: Re: [RFC PATCH] bpftool btf: Add prefix option to dump C

On Tue, Jul 21, 2020 at 10:44 PM Ian Rogers <[email protected]> wrote:
>
> When bpftool dumps types and enum members into a header file for
> inclusion the names match those in the original source. If the same
> header file needs to be included in the original source and the bpf
> program, the names of structs, unions, typedefs and enum members will
> have naming collisions.

vmlinux.h is not really intended to be used from user-space, because
it's incompatible with pretty much any other header that declares any
type. Ideally we should make this better, but that might require some
compiler support. We've been discussing with Yonghong extending Clang
with a compile-time check for whether some type is defined or not,
which would allow to guard every type and only declare it
conditionally, if it's missing. But that's just an idea at this point.

Regardless, vmlinux.h is also very much Clang-specific, and shouldn't
work well with GCC. Could you elaborate on the specifics of the use
case you have in mind? That could help me see what might be the right
solution. Thanks!

>
> To avoid these collisions an approach is to redeclare the header file
> types and enum members, which leads to duplication and possible
> inconsistencies. Another approach is to use preprocessor macros
> to rename conflicting names, but this can be cumbersome if there are
> many conflicts.
>
> This patch adds a prefix option for the dumped names. Use of this option
> can avoid name conflicts and compile time errors.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> .../bpf/bpftool/Documentation/bpftool-btf.rst | 7 ++++++-
> tools/bpf/bpftool/btf.c | 18 ++++++++++++++---
> tools/lib/bpf/btf.h | 1 +
> tools/lib/bpf/btf_dump.c | 20 +++++++++++++------
> 4 files changed, 36 insertions(+), 10 deletions(-)
>

[...]

> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> index 491c7b41ffdc..fea4baab00bd 100644
> --- a/tools/lib/bpf/btf.h
> +++ b/tools/lib/bpf/btf.h
> @@ -117,6 +117,7 @@ struct btf_dump;
>
> struct btf_dump_opts {
> void *ctx;
> + const char *name_prefix;
> };

BTW, we can't do that, this breaks ABI. btf_dump_opts were added
before we understood the problem of backward/forward compatibility of
libbpf APIs, unfortunately.

>
> typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
> diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
> index e1c344504cae..baf2b4d82e1e 100644
> --- a/tools/lib/bpf/btf_dump.c
> +++ b/tools/lib/bpf/btf_dump.c

[...]

2020-08-01 01:48:26

by Ian Rogers

[permalink] [raw]

Subject: Re: [RFC PATCH] bpftool btf: Add prefix option to dump C

On Tue, Jul 21, 2020 at 11:58 PM Andrii Nakryiko
<[email protected]> wrote:
>
> On Tue, Jul 21, 2020 at 10:44 PM Ian Rogers <[email protected]> wrote:
> >
> > When bpftool dumps types and enum members into a header file for
> > inclusion the names match those in the original source. If the same
> > header file needs to be included in the original source and the bpf
> > program, the names of structs, unions, typedefs and enum members will
> > have naming collisions.
>
> vmlinux.h is not really intended to be used from user-space, because
> it's incompatible with pretty much any other header that declares any
> type. Ideally we should make this better, but that might require some
> compiler support. We've been discussing with Yonghong extending Clang
> with a compile-time check for whether some type is defined or not,
> which would allow to guard every type and only declare it
> conditionally, if it's missing. But that's just an idea at this point.

Thanks Andrii! We're not looking at user-space code but the BPF code.
The prefix idea comes from a way to solve this problem in C++ with
namespaces:

namespace vmlinux {
#include "vmlinux.h"
}

As the BPF programs are C code then the prefix acts like the
namespace. It seems strange to need to extend the language.

> Regardless, vmlinux.h is also very much Clang-specific, and shouldn't
> work well with GCC. Could you elaborate on the specifics of the use
> case you have in mind? That could help me see what might be the right
> solution. Thanks!

So the use-case is similar to btf_iter.h:
https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter.h
To avoid collisions with somewhat cleaner macro or not games.

Prompted by your concern I was looking into changing bpf_iter.h to use
a prefix to show what the difference would be like. I also think that
there may be issues with our kernel and tool set up that may mean that
the prefix is unnecessary, if I fix something else. Anyway, to give an
example I needed to build the selftests but this is failing for me.
What I see is:

$ git clone git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
$ cd bpf-next
$ make defconfig
$ cat >>.config <<EOF
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_BTF=y
EOF
$ make -j all
$ mkdir /tmp/selftests
$ make O=/tmp/selftests/ TARGETS=bpf kselftest
...
CLANG /tmp/selftests//kselftest/bpf/tools/build/bpftool/profiler.bpf.o
skeleton/profiler.bpf.c:18:21: error: invalid application of 'sizeof'
to an incomplete type 'struct bpf_perf_event_value'
__uint(value_size, sizeof(struct bpf_perf_event_value));
^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Checking with bpftool the vmlinux lacks struct bpf_perf_event_value
but as this is unconditionally defined in bpf.h this seems wrong. Do
you have any suggestions and getting a working build?

> > To avoid these collisions an approach is to redeclare the header file
> > types and enum members, which leads to duplication and possible
> > inconsistencies. Another approach is to use preprocessor macros
> > to rename conflicting names, but this can be cumbersome if there are
> > many conflicts.
> >
> > This patch adds a prefix option for the dumped names. Use of this option
> > can avoid name conflicts and compile time errors.
> >
> > Signed-off-by: Ian Rogers <[email protected]>
> > ---
> > .../bpf/bpftool/Documentation/bpftool-btf.rst | 7 ++++++-
> > tools/bpf/bpftool/btf.c | 18 ++++++++++++++---
> > tools/lib/bpf/btf.h | 1 +
> > tools/lib/bpf/btf_dump.c | 20 +++++++++++++------
> > 4 files changed, 36 insertions(+), 10 deletions(-)
> >
>
> [...]
>
> > diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> > index 491c7b41ffdc..fea4baab00bd 100644
> > --- a/tools/lib/bpf/btf.h
> > +++ b/tools/lib/bpf/btf.h
> > @@ -117,6 +117,7 @@ struct btf_dump;
> >
> > struct btf_dump_opts {
> > void *ctx;
> > + const char *name_prefix;
> > };
>
> BTW, we can't do that, this breaks ABI. btf_dump_opts were added
> before we understood the problem of backward/forward compatibility of
> libbpf APIs, unfortunately.

This could be fixed by adding a "new" API for the parameter, which
would be unfortunate compared to just amending the existing API. There
may be solutions that are less duplicative.

Thanks,
Ian

> >
> > typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
> > diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
> > index e1c344504cae..baf2b4d82e1e 100644
> > --- a/tools/lib/bpf/btf_dump.c
> > +++ b/tools/lib/bpf/btf_dump.c
>
> [...]

2020-08-01 03:48:54

by Andrii Nakryiko

[permalink] [raw]

Subject: Re: [RFC PATCH] bpftool btf: Add prefix option to dump C

On Fri, Jul 31, 2020 at 6:47 PM Ian Rogers <[email protected]> wrote:
>
> On Tue, Jul 21, 2020 at 11:58 PM Andrii Nakryiko
> <[email protected]> wrote:
> >
> > On Tue, Jul 21, 2020 at 10:44 PM Ian Rogers <[email protected]> wrote:
> > >
> > > When bpftool dumps types and enum members into a header file for
> > > inclusion the names match those in the original source. If the same
> > > header file needs to be included in the original source and the bpf
> > > program, the names of structs, unions, typedefs and enum members will
> > > have naming collisions.
> >
> > vmlinux.h is not really intended to be used from user-space, because
> > it's incompatible with pretty much any other header that declares any
> > type. Ideally we should make this better, but that might require some
> > compiler support. We've been discussing with Yonghong extending Clang
> > with a compile-time check for whether some type is defined or not,
> > which would allow to guard every type and only declare it
> > conditionally, if it's missing. But that's just an idea at this point.
>
> Thanks Andrii! We're not looking at user-space code but the BPF code.
> The prefix idea comes from a way to solve this problem in C++ with
> namespaces:
>
> namespace vmlinux {
> #include "vmlinux.h"
> }
>
> As the BPF programs are C code then the prefix acts like the
> namespace. It seems strange to need to extend the language.

This is a classic case of jumping to designing a solution without
discussing a real problem first :)

You don't need to use any of the kernel headers together with
vmlinux.h (and it won't work as well), because vmlinux.h is supposed
to have all the **used** types from the kernel. So BPF programs only
include vmlinux.h and few libbpf-provided headers with helpers. Which
is why I assumed that you are trying to use it from user-space. But
see below on what went wrong.

>
> > Regardless, vmlinux.h is also very much Clang-specific, and shouldn't
> > work well with GCC. Could you elaborate on the specifics of the use
> > case you have in mind? That could help me see what might be the right
> > solution. Thanks!
>
> So the use-case is similar to btf_iter.h:
> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter.h
> To avoid collisions with somewhat cleaner macro or not games.
>
> Prompted by your concern I was looking into changing bpf_iter.h to use
> a prefix to show what the difference would be like. I also think that
> there may be issues with our kernel and tool set up that may mean that
> the prefix is unnecessary, if I fix something else. Anyway, to give an
> example I needed to build the selftests but this is failing for me.
> What I see is:
>
> $ git clone git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
> $ cd bpf-next
> $ make defconfig
> $ cat >>.config <<EOF
> CONFIG_DEBUG_INFO=y
> CONFIG_DEBUG_INFO_BTF=y
> EOF
> $ make -j all
> $ mkdir /tmp/selftests
> $ make O=/tmp/selftests/ TARGETS=bpf kselftest
> ...
> CLANG /tmp/selftests//kselftest/bpf/tools/build/bpftool/profiler.bpf.o
> skeleton/profiler.bpf.c:18:21: error: invalid application of 'sizeof'
> to an incomplete type 'struct bpf_perf_event_value'
> __uint(value_size, sizeof(struct bpf_perf_event_value));
> ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>
> Checking with bpftool the vmlinux lacks struct bpf_perf_event_value
> but as this is unconditionally defined in bpf.h this seems wrong. Do
> you have any suggestions and getting a working build?

It is unconditionally defined in bpf.h, but unless kernel code really
uses that type for something, compiler won't generate DWARF
information for that type, which subsequently won't get into BTF.
Adding CONFIG_DEBUG_INFO_BTF=y ensures you get BTF type info
generated, but only for subsystems that were compiled into vmlinux
according to your kernel config.

In this case, default config doesn't enable CONFIG_BPF_EVENTS, which
is a requirement to compile kernel/trace/bpf_trace.c, which in turn
uses struct bpf_perf_event_value in the helper signature.

So the solution in your case would be to use a slightly richer kernel
config, which enables more of the BPF subsystem. You can check
selftests/bpf/config for a list of options we typically enable to run
of selftests, for instance.

>
> > > To avoid these collisions an approach is to redeclare the header file
> > > types and enum members, which leads to duplication and possible
> > > inconsistencies. Another approach is to use preprocessor macros
> > > to rename conflicting names, but this can be cumbersome if there are
> > > many conflicts.
> > >
> > > This patch adds a prefix option for the dumped names. Use of this option
> > > can avoid name conflicts and compile time errors.
> > >
> > > Signed-off-by: Ian Rogers <[email protected]>
> > > ---
> > > .../bpf/bpftool/Documentation/bpftool-btf.rst | 7 ++++++-
> > > tools/bpf/bpftool/btf.c | 18 ++++++++++++++---
> > > tools/lib/bpf/btf.h | 1 +
> > > tools/lib/bpf/btf_dump.c | 20 +++++++++++++------
> > > 4 files changed, 36 insertions(+), 10 deletions(-)
> > >
> >
> > [...]
> >
> > > diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> > > index 491c7b41ffdc..fea4baab00bd 100644
> > > --- a/tools/lib/bpf/btf.h
> > > +++ b/tools/lib/bpf/btf.h
> > > @@ -117,6 +117,7 @@ struct btf_dump;
> > >
> > > struct btf_dump_opts {
> > > void *ctx;
> > > + const char *name_prefix;
> > > };
> >
> > BTW, we can't do that, this breaks ABI. btf_dump_opts were added
> > before we understood the problem of backward/forward compatibility of
> > libbpf APIs, unfortunately.
>
> This could be fixed by adding a "new" API for the parameter, which
> would be unfortunate compared to just amending the existing API. There
> may be solutions that are less duplicative.
>

Does ABI stability sucks for maintainers of the library? It absolutely
does! But we can't just go and break it.

> Thanks,
> Ian
>
> > >
> > > typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
> > > diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
> > > index e1c344504cae..baf2b4d82e1e 100644
> > > --- a/tools/lib/bpf/btf_dump.c
> > > +++ b/tools/lib/bpf/btf_dump.c
> >
> > [...]

2020-08-06 17:58:56

by Andrii Nakryiko

[permalink] [raw]

Subject: Re: [RFC PATCH] bpftool btf: Add prefix option to dump C

On Fri, Jul 31, 2020 at 8:47 PM Andrii Nakryiko
<[email protected]> wrote:
>
> On Fri, Jul 31, 2020 at 6:47 PM Ian Rogers <[email protected]> wrote:
> >
> > On Tue, Jul 21, 2020 at 11:58 PM Andrii Nakryiko
> > <[email protected]> wrote:
> > >
> > > On Tue, Jul 21, 2020 at 10:44 PM Ian Rogers <[email protected]> wrote:
> > > >
> > > > When bpftool dumps types and enum members into a header file for
> > > > inclusion the names match those in the original source. If the same
> > > > header file needs to be included in the original source and the bpf
> > > > program, the names of structs, unions, typedefs and enum members will
> > > > have naming collisions.
> > >
> > > vmlinux.h is not really intended to be used from user-space, because
> > > it's incompatible with pretty much any other header that declares any
> > > type. Ideally we should make this better, but that might require some
> > > compiler support. We've been discussing with Yonghong extending Clang
> > > with a compile-time check for whether some type is defined or not,
> > > which would allow to guard every type and only declare it
> > > conditionally, if it's missing. But that's just an idea at this point.
> >
> > Thanks Andrii! We're not looking at user-space code but the BPF code.
> > The prefix idea comes from a way to solve this problem in C++ with
> > namespaces:
> >
> > namespace vmlinux {
> > #include "vmlinux.h"
> > }
> >
> > As the BPF programs are C code then the prefix acts like the
> > namespace. It seems strange to need to extend the language.
>
> This is a classic case of jumping to designing a solution without
> discussing a real problem first :)
>
> You don't need to use any of the kernel headers together with
> vmlinux.h (and it won't work as well), because vmlinux.h is supposed
> to have all the **used** types from the kernel. So BPF programs only
> include vmlinux.h and few libbpf-provided headers with helpers. Which
> is why I assumed that you are trying to use it from user-space. But
> see below on what went wrong.
>
> >
> > > Regardless, vmlinux.h is also very much Clang-specific, and shouldn't
> > > work well with GCC. Could you elaborate on the specifics of the use
> > > case you have in mind? That could help me see what might be the right
> > > solution. Thanks!
> >
> > So the use-case is similar to btf_iter.h:
> > https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter.h
> > To avoid collisions with somewhat cleaner macro or not games.
> >
> > Prompted by your concern I was looking into changing bpf_iter.h to use
> > a prefix to show what the difference would be like. I also think that
> > there may be issues with our kernel and tool set up that may mean that
> > the prefix is unnecessary, if I fix something else. Anyway, to give an
> > example I needed to build the selftests but this is failing for me.
> > What I see is:
> >
> > $ git clone git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
> > $ cd bpf-next
> > $ make defconfig
> > $ cat >>.config <<EOF
> > CONFIG_DEBUG_INFO=y
> > CONFIG_DEBUG_INFO_BTF=y
> > EOF
> > $ make -j all
> > $ mkdir /tmp/selftests
> > $ make O=/tmp/selftests/ TARGETS=bpf kselftest
> > ...
> > CLANG /tmp/selftests//kselftest/bpf/tools/build/bpftool/profiler.bpf.o
> > skeleton/profiler.bpf.c:18:21: error: invalid application of 'sizeof'
> > to an incomplete type 'struct bpf_perf_event_value'
> > __uint(value_size, sizeof(struct bpf_perf_event_value));
> > ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> >
> > Checking with bpftool the vmlinux lacks struct bpf_perf_event_value
> > but as this is unconditionally defined in bpf.h this seems wrong. Do
> > you have any suggestions and getting a working build?
>
> It is unconditionally defined in bpf.h, but unless kernel code really
> uses that type for something, compiler won't generate DWARF
> information for that type, which subsequently won't get into BTF.
> Adding CONFIG_DEBUG_INFO_BTF=y ensures you get BTF type info
> generated, but only for subsystems that were compiled into vmlinux
> according to your kernel config.
>
> In this case, default config doesn't enable CONFIG_BPF_EVENTS, which
> is a requirement to compile kernel/trace/bpf_trace.c, which in turn
> uses struct bpf_perf_event_value in the helper signature.
>
> So the solution in your case would be to use a slightly richer kernel
> config, which enables more of the BPF subsystem. You can check
> selftests/bpf/config for a list of options we typically enable to run
> of selftests, for instance.
>

So we've discussed this and related issues today at BPF office hours
and few more thoughts occurred to me after I left the call.

You don't really have to use vmlinux.h, if it's inconvenient. Unless
you want to use some internal kernel type that's not available in
kernel-headers. Otherwise feel free to use normal kernel header
includes and don't use vmlinux.h. If you are using BPF_CORE_READ(),
any type is automatically CO-RE-relocatable, even if they come from
#include <linux/whatever.h>. If you need to use direct memory accesses
with programs like fentry/fexit, then adding:

#pragma clang attribute push (__attribute__((preserve_access_index)),
apply_to = record)

before you include any headers would make types in those headers
automatically CO-RE-relocatable even for direct memory accesses. So
this is just something to keep in mind.

But the way we've been handling this was like this.

On BPF program side:

#include "vmlinux.h"
#include "my_custom_types.h"

...

On user-space program side:

#include <stdint.h> /* and whatever else is needed */
#include "my_custom_types.h"

Then in my_custom_types.h you just assume all the needed types are
defined (in either vmlinux.h or in user-space header includes):

struct my_struct {
uint64_t whatever;
};

So far worked fine. It still sucks you can't include some of the
kernel headers to get some useful macro, but to solve that we'd need
Clang extension to check that some type X is already defined, as we
discussed in the call.

Hope this helps a bit.

[...]

2020-08-06 19:44:14

by Ian Rogers

[permalink] [raw]

Subject: Re: [RFC PATCH] bpftool btf: Add prefix option to dump C

On Thu, Aug 6, 2020 at 10:58 AM Andrii Nakryiko
<[email protected]> wrote:
>
> On Fri, Jul 31, 2020 at 8:47 PM Andrii Nakryiko
> <[email protected]> wrote:
> >
> > On Fri, Jul 31, 2020 at 6:47 PM Ian Rogers <[email protected]> wrote:
> > >
> > > On Tue, Jul 21, 2020 at 11:58 PM Andrii Nakryiko
> > > <[email protected]> wrote:
> > > >
> > > > On Tue, Jul 21, 2020 at 10:44 PM Ian Rogers <[email protected]> wrote:
> > > > >
> > > > > When bpftool dumps types and enum members into a header file for
> > > > > inclusion the names match those in the original source. If the same
> > > > > header file needs to be included in the original source and the bpf
> > > > > program, the names of structs, unions, typedefs and enum members will
> > > > > have naming collisions.
> > > >
> > > > vmlinux.h is not really intended to be used from user-space, because
> > > > it's incompatible with pretty much any other header that declares any
> > > > type. Ideally we should make this better, but that might require some
> > > > compiler support. We've been discussing with Yonghong extending Clang
> > > > with a compile-time check for whether some type is defined or not,
> > > > which would allow to guard every type and only declare it
> > > > conditionally, if it's missing. But that's just an idea at this point.
> > >
> > > Thanks Andrii! We're not looking at user-space code but the BPF code.
> > > The prefix idea comes from a way to solve this problem in C++ with
> > > namespaces:
> > >
> > > namespace vmlinux {
> > > #include "vmlinux.h"
> > > }
> > >
> > > As the BPF programs are C code then the prefix acts like the
> > > namespace. It seems strange to need to extend the language.
> >
> > This is a classic case of jumping to designing a solution without
> > discussing a real problem first :)
> >
> > You don't need to use any of the kernel headers together with
> > vmlinux.h (and it won't work as well), because vmlinux.h is supposed
> > to have all the **used** types from the kernel. So BPF programs only
> > include vmlinux.h and few libbpf-provided headers with helpers. Which
> > is why I assumed that you are trying to use it from user-space. But
> > see below on what went wrong.
> >
> > >
> > > > Regardless, vmlinux.h is also very much Clang-specific, and shouldn't
> > > > work well with GCC. Could you elaborate on the specifics of the use
> > > > case you have in mind? That could help me see what might be the right
> > > > solution. Thanks!
> > >
> > > So the use-case is similar to btf_iter.h:
> > > https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter.h
> > > To avoid collisions with somewhat cleaner macro or not games.
> > >
> > > Prompted by your concern I was looking into changing bpf_iter.h to use
> > > a prefix to show what the difference would be like. I also think that
> > > there may be issues with our kernel and tool set up that may mean that
> > > the prefix is unnecessary, if I fix something else. Anyway, to give an
> > > example I needed to build the selftests but this is failing for me.
> > > What I see is:
> > >
> > > $ git clone git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
> > > $ cd bpf-next
> > > $ make defconfig
> > > $ cat >>.config <<EOF
> > > CONFIG_DEBUG_INFO=y
> > > CONFIG_DEBUG_INFO_BTF=y
> > > EOF
> > > $ make -j all
> > > $ mkdir /tmp/selftests
> > > $ make O=/tmp/selftests/ TARGETS=bpf kselftest
> > > ...
> > > CLANG /tmp/selftests//kselftest/bpf/tools/build/bpftool/profiler.bpf.o
> > > skeleton/profiler.bpf.c:18:21: error: invalid application of 'sizeof'
> > > to an incomplete type 'struct bpf_perf_event_value'
> > > __uint(value_size, sizeof(struct bpf_perf_event_value));
> > > ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> > >
> > > Checking with bpftool the vmlinux lacks struct bpf_perf_event_value
> > > but as this is unconditionally defined in bpf.h this seems wrong. Do
> > > you have any suggestions and getting a working build?
> >
> > It is unconditionally defined in bpf.h, but unless kernel code really
> > uses that type for something, compiler won't generate DWARF
> > information for that type, which subsequently won't get into BTF.
> > Adding CONFIG_DEBUG_INFO_BTF=y ensures you get BTF type info
> > generated, but only for subsystems that were compiled into vmlinux
> > according to your kernel config.
> >
> > In this case, default config doesn't enable CONFIG_BPF_EVENTS, which
> > is a requirement to compile kernel/trace/bpf_trace.c, which in turn
> > uses struct bpf_perf_event_value in the helper signature.
> >
> > So the solution in your case would be to use a slightly richer kernel
> > config, which enables more of the BPF subsystem. You can check
> > selftests/bpf/config for a list of options we typically enable to run
> > of selftests, for instance.
> >
>
> So we've discussed this and related issues today at BPF office hours
> and few more thoughts occurred to me after I left the call.

Thanks for the follow-up. I need to add the office hours to my schedule.

> You don't really have to use vmlinux.h, if it's inconvenient. Unless
> you want to use some internal kernel type that's not available in
> kernel-headers. Otherwise feel free to use normal kernel header
> includes and don't use vmlinux.h. If you are using BPF_CORE_READ(),
> any type is automatically CO-RE-relocatable, even if they come from
> #include <linux/whatever.h>. If you need to use direct memory accesses
> with programs like fentry/fexit, then adding:
>
> #pragma clang attribute push (__attribute__((preserve_access_index)),
> apply_to = record)
>
> before you include any headers would make types in those headers
> automatically CO-RE-relocatable even for direct memory accesses. So
> this is just something to keep in mind.
>
>
> But the way we've been handling this was like this.
>
> On BPF program side:
>
> #include "vmlinux.h"
> #include "my_custom_types.h"
>
> ...
>
>
> On user-space program side:
>
> #include <stdint.h> /* and whatever else is needed */
> #include "my_custom_types.h"
>
> Then in my_custom_types.h you just assume all the needed types are
> defined (in either vmlinux.h or in user-space header includes):
>
>
> struct my_struct {
> uint64_t whatever;
> };
>
> So far worked fine. It still sucks you can't include some of the
> kernel headers to get some useful macro, but to solve that we'd need
> Clang extension to check that some type X is already defined, as we
> discussed in the call.
>
> Hope this helps a bit.

Thanks, I was scratching around for examples because I was using a
kernel that wasn't providing me even the values present in bpf.h. I
looked at the bpf selftests as hopeful best practice, but that's where
I saw the use of macros to move definitions out of the way:
https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter.h
This felt like my point of pain, so perhaps that code needs to carry a
warning. Using #define to rename types, as in that file, doesn't scale
for something like bpf.h and so this patch - which is intended to feel
like a use of namespaces. There are related style issues (from the
#define renaming) in Google's build system because of the use of
modules [1] where this kind of "textual" use of headers is considered
an issue.

I'm wondering, following this conversation whether there is some tech
debt cleanup that could be done. For example, on the perf side I found
that BPF errors were being swallowed:
https://lore.kernel.org/lkml/[email protected]/
Perf is defining its own bpf.h:
https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/perf/include/bpf/bpf.h
and perhaps that needs to be rethought to be more aligned with CO-RE
and vmlinux.h.
It would be nice if selftests could do a better job of building
dependencies with the necessary config requirements. There's a lot of
feeling around to make these things work, which seems less than ideal.

Thanks,
Ian

[1] https://clang.llvm.org/docs/Modules.html

> [...]

2020-08-07 00:30:45

by Andrii Nakryiko

[permalink] [raw]

Subject: Re: [RFC PATCH] bpftool btf: Add prefix option to dump C

On Thu, Aug 6, 2020 at 12:42 PM Ian Rogers <[email protected]> wrote:
>
> On Thu, Aug 6, 2020 at 10:58 AM Andrii Nakryiko
> <[email protected]> wrote:
> >
> > On Fri, Jul 31, 2020 at 8:47 PM Andrii Nakryiko
> > <[email protected]> wrote:
> > >
> > > On Fri, Jul 31, 2020 at 6:47 PM Ian Rogers <[email protected]> wrote:
> > > >
> > > > On Tue, Jul 21, 2020 at 11:58 PM Andrii Nakryiko
> > > > <[email protected]> wrote:
> > > > >
> > > > > On Tue, Jul 21, 2020 at 10:44 PM Ian Rogers <[email protected]> wrote:
> > > > > >
> > > > > > When bpftool dumps types and enum members into a header file for
> > > > > > inclusion the names match those in the original source. If the same
> > > > > > header file needs to be included in the original source and the bpf
> > > > > > program, the names of structs, unions, typedefs and enum members will
> > > > > > have naming collisions.
> > > > >
> > > > > vmlinux.h is not really intended to be used from user-space, because
> > > > > it's incompatible with pretty much any other header that declares any
> > > > > type. Ideally we should make this better, but that might require some
> > > > > compiler support. We've been discussing with Yonghong extending Clang
> > > > > with a compile-time check for whether some type is defined or not,
> > > > > which would allow to guard every type and only declare it
> > > > > conditionally, if it's missing. But that's just an idea at this point.
> > > >
> > > > Thanks Andrii! We're not looking at user-space code but the BPF code.
> > > > The prefix idea comes from a way to solve this problem in C++ with
> > > > namespaces:
> > > >
> > > > namespace vmlinux {
> > > > #include "vmlinux.h"
> > > > }
> > > >
> > > > As the BPF programs are C code then the prefix acts like the
> > > > namespace. It seems strange to need to extend the language.
> > >
> > > This is a classic case of jumping to designing a solution without
> > > discussing a real problem first :)
> > >
> > > You don't need to use any of the kernel headers together with
> > > vmlinux.h (and it won't work as well), because vmlinux.h is supposed
> > > to have all the **used** types from the kernel. So BPF programs only
> > > include vmlinux.h and few libbpf-provided headers with helpers. Which
> > > is why I assumed that you are trying to use it from user-space. But
> > > see below on what went wrong.
> > >
> > > >
> > > > > Regardless, vmlinux.h is also very much Clang-specific, and shouldn't
> > > > > work well with GCC. Could you elaborate on the specifics of the use
> > > > > case you have in mind? That could help me see what might be the right
> > > > > solution. Thanks!
> > > >
> > > > So the use-case is similar to btf_iter.h:
> > > > https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter.h
> > > > To avoid collisions with somewhat cleaner macro or not games.
> > > >
> > > > Prompted by your concern I was looking into changing bpf_iter.h to use
> > > > a prefix to show what the difference would be like. I also think that
> > > > there may be issues with our kernel and tool set up that may mean that
> > > > the prefix is unnecessary, if I fix something else. Anyway, to give an
> > > > example I needed to build the selftests but this is failing for me.
> > > > What I see is:
> > > >
> > > > $ git clone git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
> > > > $ cd bpf-next
> > > > $ make defconfig
> > > > $ cat >>.config <<EOF
> > > > CONFIG_DEBUG_INFO=y
> > > > CONFIG_DEBUG_INFO_BTF=y
> > > > EOF
> > > > $ make -j all
> > > > $ mkdir /tmp/selftests
> > > > $ make O=/tmp/selftests/ TARGETS=bpf kselftest
> > > > ...
> > > > CLANG /tmp/selftests//kselftest/bpf/tools/build/bpftool/profiler.bpf.o
> > > > skeleton/profiler.bpf.c:18:21: error: invalid application of 'sizeof'
> > > > to an incomplete type 'struct bpf_perf_event_value'
> > > > __uint(value_size, sizeof(struct bpf_perf_event_value));
> > > > ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> > > >
> > > > Checking with bpftool the vmlinux lacks struct bpf_perf_event_value
> > > > but as this is unconditionally defined in bpf.h this seems wrong. Do
> > > > you have any suggestions and getting a working build?
> > >
> > > It is unconditionally defined in bpf.h, but unless kernel code really
> > > uses that type for something, compiler won't generate DWARF
> > > information for that type, which subsequently won't get into BTF.
> > > Adding CONFIG_DEBUG_INFO_BTF=y ensures you get BTF type info
> > > generated, but only for subsystems that were compiled into vmlinux
> > > according to your kernel config.
> > >
> > > In this case, default config doesn't enable CONFIG_BPF_EVENTS, which
> > > is a requirement to compile kernel/trace/bpf_trace.c, which in turn
> > > uses struct bpf_perf_event_value in the helper signature.
> > >
> > > So the solution in your case would be to use a slightly richer kernel
> > > config, which enables more of the BPF subsystem. You can check
> > > selftests/bpf/config for a list of options we typically enable to run
> > > of selftests, for instance.
> > >
> >
> > So we've discussed this and related issues today at BPF office hours
> > and few more thoughts occurred to me after I left the call.
>
> Thanks for the follow-up. I need to add the office hours to my schedule.
>
> > You don't really have to use vmlinux.h, if it's inconvenient. Unless
> > you want to use some internal kernel type that's not available in
> > kernel-headers. Otherwise feel free to use normal kernel header
> > includes and don't use vmlinux.h. If you are using BPF_CORE_READ(),
> > any type is automatically CO-RE-relocatable, even if they come from
> > #include <linux/whatever.h>. If you need to use direct memory accesses
> > with programs like fentry/fexit, then adding:
> >
> > #pragma clang attribute push (__attribute__((preserve_access_index)),
> > apply_to = record)
> >
> > before you include any headers would make types in those headers
> > automatically CO-RE-relocatable even for direct memory accesses. So
> > this is just something to keep in mind.
> >
> >
> > But the way we've been handling this was like this.
> >
> > On BPF program side:
> >
> > #include "vmlinux.h"
> > #include "my_custom_types.h"
> >
> > ...
> >
> >
> > On user-space program side:
> >
> > #include <stdint.h> /* and whatever else is needed */
> > #include "my_custom_types.h"
> >
> > Then in my_custom_types.h you just assume all the needed types are
> > defined (in either vmlinux.h or in user-space header includes):
> >
> >
> > struct my_struct {
> > uint64_t whatever;
> > };
> >
> > So far worked fine. It still sucks you can't include some of the
> > kernel headers to get some useful macro, but to solve that we'd need
> > Clang extension to check that some type X is already defined, as we
> > discussed in the call.
> >
> > Hope this helps a bit.
>
> Thanks, I was scratching around for examples because I was using a
> kernel that wasn't providing me even the values present in bpf.h. I
> looked at the bpf selftests as hopeful best practice, but that's where
> I saw the use of macros to move definitions out of the way:
> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter.h
> This felt like my point of pain, so perhaps that code needs to carry a
> warning. Using #define to rename types, as in that file, doesn't scale
> for something like bpf.h and so this patch - which is intended to feel
> like a use of namespaces. There are related style issues (from the
> #define renaming) in Google's build system because of the use of
> modules [1] where this kind of "textual" use of headers is considered
> an issue.

This #define rename approach is definitely not a "best practice", but
we need it for selftests to be able to **compile** them against older
kernels. We need that as part of libbpf CI in its Github mirror. So we
unconditionally undefine those bpf_iter types, just in case we are
compiling on the latest kernels that already have those types.
selftests/bpf are purposefully testing all the latest bleeding-edge
features and generally assume latest Clang and kernel, so it has its
own specifics.

If you are looking for more realistic examples, consider looking at
libbpf-tools in BCC repo ([0]). Those are nice self-contained
libbpf/CO-RE-based tools. They use pre-generated and checked-in
vmlinux.h, which is much more convenient logistically, than generating
vmlinux.h on the fly. That, of course, depends on specific build
system organization, but we do pre-generate vmlinux.h at Facebook for
our production use-cases as well.

[0] https://github.com/iovisor/bcc/tree/master/libbpf-tools

>
> I'm wondering, following this conversation whether there is some tech
> debt cleanup that could be done. For example, on the perf side I found
> that BPF errors were being swallowed:
> https://lore.kernel.org/lkml/[email protected]/
> Perf is defining its own bpf.h:
> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/perf/include/bpf/bpf.h
> and perhaps that needs to be rethought to be more aligned with CO-RE
> and vmlinux.h.
> It would be nice if selftests could do a better job of building
> dependencies with the necessary config requirements. There's a lot of
> feeling around to make these things work, which seems less than ideal.

There is always some amount of tech debt, for sure. But I'm also not
sure how selftests/bpf can force kernel config on users. It provides
required config in selftests/bpf/config, but it's really easy to miss
it, if you don't know about it already. But then again, even that is
not enough for real-world-applicable vmlinux.h, you need to use one of
real production kernels to generate vmlinux.h that would work well for
production use cases. That's what we are also doing at Facebook, we
try to follow the latest production kernel releases and periodically
re-generate vmlinux.h to have all the new types. Hope this helps to
clarify a bit.

>
> Thanks,
> Ian
>
> [1] https://clang.llvm.org/docs/Modules.html
>
> > [...]