2021-01-14 13:44:13

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function

It's possible to have other build id types (other than default SHA1).
Currently there's also ld support for MD5 build id.

Adding size argument to build_id_parse function, that returns (if defined)
size of the parsed build id, so we can recognize the build id type.

Cc: Alexei Starovoitov <[email protected]>
Cc: Song Liu <[email protected]>
Signed-off-by: Jiri Olsa <[email protected]>
---
include/linux/buildid.h | 3 ++-
kernel/bpf/stackmap.c | 2 +-
lib/buildid.c | 29 +++++++++++++++++++++--------
3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/include/linux/buildid.h b/include/linux/buildid.h
index 08028a212589..40232f90db6e 100644
--- a/include/linux/buildid.h
+++ b/include/linux/buildid.h
@@ -6,6 +6,7 @@

#define BUILD_ID_SIZE_MAX 20

-int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id);
+int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
+ __u32 *size);

#endif
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 55d254a59f07..cabaf7db8efc 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -189,7 +189,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,

for (i = 0; i < trace_nr; i++) {
vma = find_vma(current->mm, ips[i]);
- if (!vma || build_id_parse(vma, id_offs[i].build_id)) {
+ if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
/* per entry fall back to ips */
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
diff --git a/lib/buildid.c b/lib/buildid.c
index 4a4f520c0e29..6156997c3895 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -12,6 +12,7 @@
*/
static inline int parse_build_id(void *page_addr,
unsigned char *build_id,
+ __u32 *size,
void *note_start,
Elf32_Word note_size)
{
@@ -38,6 +39,8 @@ static inline int parse_build_id(void *page_addr,
nhdr->n_descsz);
memset(build_id + nhdr->n_descsz, 0,
BUILD_ID_SIZE_MAX - nhdr->n_descsz);
+ if (size)
+ *size = nhdr->n_descsz;
return 0;
}
new_offs = note_offs + sizeof(Elf32_Nhdr) +
@@ -50,7 +53,8 @@ static inline int parse_build_id(void *page_addr,
}

/* Parse build ID from 32-bit ELF */
-static int get_build_id_32(void *page_addr, unsigned char *build_id)
+static int get_build_id_32(void *page_addr, unsigned char *build_id,
+ __u32 *size)
{
Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
Elf32_Phdr *phdr;
@@ -65,7 +69,7 @@ static int get_build_id_32(void *page_addr, unsigned char *build_id)

for (i = 0; i < ehdr->e_phnum; ++i) {
if (phdr[i].p_type == PT_NOTE &&
- !parse_build_id(page_addr, build_id,
+ !parse_build_id(page_addr, build_id, size,
page_addr + phdr[i].p_offset,
phdr[i].p_filesz))
return 0;
@@ -74,7 +78,8 @@ static int get_build_id_32(void *page_addr, unsigned char *build_id)
}

/* Parse build ID from 64-bit ELF */
-static int get_build_id_64(void *page_addr, unsigned char *build_id)
+static int get_build_id_64(void *page_addr, unsigned char *build_id,
+ __u32 *size)
{
Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
Elf64_Phdr *phdr;
@@ -89,7 +94,7 @@ static int get_build_id_64(void *page_addr, unsigned char *build_id)

for (i = 0; i < ehdr->e_phnum; ++i) {
if (phdr[i].p_type == PT_NOTE &&
- !parse_build_id(page_addr, build_id,
+ !parse_build_id(page_addr, build_id, size,
page_addr + phdr[i].p_offset,
phdr[i].p_filesz))
return 0;
@@ -97,8 +102,16 @@ static int get_build_id_64(void *page_addr, unsigned char *build_id)
return -EINVAL;
}

-/* Parse build ID of ELF file mapped to vma */
-int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id)
+/*
+ * Parse build ID of ELF file mapped to vma
+ * @vma: vma object
+ * @build_id: buffer to store build id, at least BUILD_ID_SIZE long
+ * @size: returns actual build id size in case of success
+ *
+ * Returns 0 on success, otherwise error (< 0).
+ */
+int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
+ __u32 *size)
{
Elf32_Ehdr *ehdr;
struct page *page;
@@ -126,9 +139,9 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id)
goto out;

if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
- ret = get_build_id_32(page_addr, build_id);
+ ret = get_build_id_32(page_addr, build_id, size);
else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
- ret = get_build_id_64(page_addr, build_id);
+ ret = get_build_id_64(page_addr, build_id, size);
out:
kunmap_atomic(page_addr);
put_page(page);
--
2.26.2


2021-01-14 19:00:27

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function



On 1/14/21 5:40 AM, Jiri Olsa wrote:
> It's possible to have other build id types (other than default SHA1).
> Currently there's also ld support for MD5 build id.

Currently, bpf build_id based stackmap does not returns the size of
the build_id. Did you see an issue here? I guess user space can check
the length of non-zero bits of the build id to decide what kind of
type it is, right?

>
> Adding size argument to build_id_parse function, that returns (if defined)
> size of the parsed build id, so we can recognize the build id type.
>
> Cc: Alexei Starovoitov <[email protected]>
> Cc: Song Liu <[email protected]>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> include/linux/buildid.h | 3 ++-
> kernel/bpf/stackmap.c | 2 +-
> lib/buildid.c | 29 +++++++++++++++++++++--------
> 3 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/buildid.h b/include/linux/buildid.h
> index 08028a212589..40232f90db6e 100644
> --- a/include/linux/buildid.h
> +++ b/include/linux/buildid.h
> @@ -6,6 +6,7 @@
>
> #define BUILD_ID_SIZE_MAX 20
>
> -int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id);
> +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
> + __u32 *size);
>
> #endif
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 55d254a59f07..cabaf7db8efc 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -189,7 +189,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>
> for (i = 0; i < trace_nr; i++) {
> vma = find_vma(current->mm, ips[i]);
> - if (!vma || build_id_parse(vma, id_offs[i].build_id)) {
> + if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
> /* per entry fall back to ips */
> id_offs[i].status = BPF_STACK_BUILD_ID_IP;
> id_offs[i].ip = ips[i];
> diff --git a/lib/buildid.c b/lib/buildid.c
> index 4a4f520c0e29..6156997c3895 100644
> --- a/lib/buildid.c
> +++ b/lib/buildid.c
> @@ -12,6 +12,7 @@
> */
> static inline int parse_build_id(void *page_addr,
> unsigned char *build_id,
> + __u32 *size,
> void *note_start,
> Elf32_Word note_size)
> {
> @@ -38,6 +39,8 @@ static inline int parse_build_id(void *page_addr,
> nhdr->n_descsz);
> memset(build_id + nhdr->n_descsz, 0,
> BUILD_ID_SIZE_MAX - nhdr->n_descsz);
> + if (size)
> + *size = nhdr->n_descsz;
> return 0;
> }
> new_offs = note_offs + sizeof(Elf32_Nhdr) +
> @@ -50,7 +53,8 @@ static inline int parse_build_id(void *page_addr,
> }
>
[...]

2021-01-14 20:05:43

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function

On Thu, Jan 14, 2021 at 10:56:33AM -0800, Yonghong Song wrote:
>
>
> On 1/14/21 5:40 AM, Jiri Olsa wrote:
> > It's possible to have other build id types (other than default SHA1).
> > Currently there's also ld support for MD5 build id.
>
> Currently, bpf build_id based stackmap does not returns the size of
> the build_id. Did you see an issue here? I guess user space can check
> the length of non-zero bits of the build id to decide what kind of
> type it is, right?

you can have zero bytes in the build id hash, so you need to get the size

I never saw MD5 being used in practise just SHA1, but we added the
size to be complete and make sure we'll fit with build id, because
there's only limited space in mmap2 event

jirka

>
> >
> > Adding size argument to build_id_parse function, that returns (if defined)
> > size of the parsed build id, so we can recognize the build id type.
> >
> > Cc: Alexei Starovoitov <[email protected]>
> > Cc: Song Liu <[email protected]>
> > Signed-off-by: Jiri Olsa <[email protected]>
> > ---
> > include/linux/buildid.h | 3 ++-
> > kernel/bpf/stackmap.c | 2 +-
> > lib/buildid.c | 29 +++++++++++++++++++++--------
> > 3 files changed, 24 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/buildid.h b/include/linux/buildid.h
> > index 08028a212589..40232f90db6e 100644
> > --- a/include/linux/buildid.h
> > +++ b/include/linux/buildid.h
> > @@ -6,6 +6,7 @@
> > #define BUILD_ID_SIZE_MAX 20
> > -int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id);
> > +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
> > + __u32 *size);
> > #endif
> > diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> > index 55d254a59f07..cabaf7db8efc 100644
> > --- a/kernel/bpf/stackmap.c
> > +++ b/kernel/bpf/stackmap.c
> > @@ -189,7 +189,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
> > for (i = 0; i < trace_nr; i++) {
> > vma = find_vma(current->mm, ips[i]);
> > - if (!vma || build_id_parse(vma, id_offs[i].build_id)) {
> > + if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
> > /* per entry fall back to ips */
> > id_offs[i].status = BPF_STACK_BUILD_ID_IP;
> > id_offs[i].ip = ips[i];
> > diff --git a/lib/buildid.c b/lib/buildid.c
> > index 4a4f520c0e29..6156997c3895 100644
> > --- a/lib/buildid.c
> > +++ b/lib/buildid.c
> > @@ -12,6 +12,7 @@
> > */
> > static inline int parse_build_id(void *page_addr,
> > unsigned char *build_id,
> > + __u32 *size,
> > void *note_start,
> > Elf32_Word note_size)
> > {
> > @@ -38,6 +39,8 @@ static inline int parse_build_id(void *page_addr,
> > nhdr->n_descsz);
> > memset(build_id + nhdr->n_descsz, 0,
> > BUILD_ID_SIZE_MAX - nhdr->n_descsz);
> > + if (size)
> > + *size = nhdr->n_descsz;
> > return 0;
> > }
> > new_offs = note_offs + sizeof(Elf32_Nhdr) +
> > @@ -50,7 +53,8 @@ static inline int parse_build_id(void *page_addr,
> > }
> [...]
>

2021-01-14 21:08:50

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function



On 1/14/21 12:01 PM, Jiri Olsa wrote:
> On Thu, Jan 14, 2021 at 10:56:33AM -0800, Yonghong Song wrote:
>>
>>
>> On 1/14/21 5:40 AM, Jiri Olsa wrote:
>>> It's possible to have other build id types (other than default SHA1).
>>> Currently there's also ld support for MD5 build id.
>>
>> Currently, bpf build_id based stackmap does not returns the size of
>> the build_id. Did you see an issue here? I guess user space can check
>> the length of non-zero bits of the build id to decide what kind of
>> type it is, right?
>
> you can have zero bytes in the build id hash, so you need to get the size
>
> I never saw MD5 being used in practise just SHA1, but we added the
> size to be complete and make sure we'll fit with build id, because
> there's only limited space in mmap2 event

I am asking to check whether we should extend uapi struct
bpf_stack_build_id to include build_id_size as well. I guess
we can delay this until a real use case.


>
> jirka
>
>>
>>>
>>> Adding size argument to build_id_parse function, that returns (if defined)
>>> size of the parsed build id, so we can recognize the build id type.
>>>
>>> Cc: Alexei Starovoitov <[email protected]>
>>> Cc: Song Liu <[email protected]>
>>> Signed-off-by: Jiri Olsa <[email protected]>
>>> ---
>>> include/linux/buildid.h | 3 ++-
>>> kernel/bpf/stackmap.c | 2 +-
>>> lib/buildid.c | 29 +++++++++++++++++++++--------
>>> 3 files changed, 24 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/include/linux/buildid.h b/include/linux/buildid.h
>>> index 08028a212589..40232f90db6e 100644
>>> --- a/include/linux/buildid.h
>>> +++ b/include/linux/buildid.h
>>> @@ -6,6 +6,7 @@
>>> #define BUILD_ID_SIZE_MAX 20
>>> -int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id);
>>> +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
>>> + __u32 *size);
>>> #endif
>>> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
>>> index 55d254a59f07..cabaf7db8efc 100644
>>> --- a/kernel/bpf/stackmap.c
>>> +++ b/kernel/bpf/stackmap.c
>>> @@ -189,7 +189,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>>> for (i = 0; i < trace_nr; i++) {
>>> vma = find_vma(current->mm, ips[i]);
>>> - if (!vma || build_id_parse(vma, id_offs[i].build_id)) {
>>> + if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
>>> /* per entry fall back to ips */
>>> id_offs[i].status = BPF_STACK_BUILD_ID_IP;
>>> id_offs[i].ip = ips[i];
>>> diff --git a/lib/buildid.c b/lib/buildid.c
>>> index 4a4f520c0e29..6156997c3895 100644
>>> --- a/lib/buildid.c
>>> +++ b/lib/buildid.c
>>> @@ -12,6 +12,7 @@
>>> */
>>> static inline int parse_build_id(void *page_addr,
>>> unsigned char *build_id,
>>> + __u32 *size,
>>> void *note_start,
>>> Elf32_Word note_size)
>>> {
>>> @@ -38,6 +39,8 @@ static inline int parse_build_id(void *page_addr,
>>> nhdr->n_descsz);
>>> memset(build_id + nhdr->n_descsz, 0,
>>> BUILD_ID_SIZE_MAX - nhdr->n_descsz);
>>> + if (size)
>>> + *size = nhdr->n_descsz;
>>> return 0;
>>> }
>>> new_offs = note_offs + sizeof(Elf32_Nhdr) +
>>> @@ -50,7 +53,8 @@ static inline int parse_build_id(void *page_addr,
>>> }
>> [...]
>>
>

2021-01-14 22:06:12

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function

On Thu, Jan 14, 2021 at 01:05:33PM -0800, Yonghong Song wrote:
>
>
> On 1/14/21 12:01 PM, Jiri Olsa wrote:
> > On Thu, Jan 14, 2021 at 10:56:33AM -0800, Yonghong Song wrote:
> > >
> > >
> > > On 1/14/21 5:40 AM, Jiri Olsa wrote:
> > > > It's possible to have other build id types (other than default SHA1).
> > > > Currently there's also ld support for MD5 build id.
> > >
> > > Currently, bpf build_id based stackmap does not returns the size of
> > > the build_id. Did you see an issue here? I guess user space can check
> > > the length of non-zero bits of the build id to decide what kind of
> > > type it is, right?
> >
> > you can have zero bytes in the build id hash, so you need to get the size
> >
> > I never saw MD5 being used in practise just SHA1, but we added the
> > size to be complete and make sure we'll fit with build id, because
> > there's only limited space in mmap2 event
>
> I am asking to check whether we should extend uapi struct
> bpf_stack_build_id to include build_id_size as well. I guess
> we can delay this until a real use case.

right, we can try make some MD5 build id binaries and check if it
explodes with some bcc tools, but I don't expect that.. I'll try
to find some time for that

perf tool uses build ids in .debug cache as file links, and we had
few isues there

jirka

2021-01-14 23:47:52

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function



On 1/14/21 2:02 PM, Jiri Olsa wrote:
> On Thu, Jan 14, 2021 at 01:05:33PM -0800, Yonghong Song wrote:
>>
>>
>> On 1/14/21 12:01 PM, Jiri Olsa wrote:
>>> On Thu, Jan 14, 2021 at 10:56:33AM -0800, Yonghong Song wrote:
>>>>
>>>>
>>>> On 1/14/21 5:40 AM, Jiri Olsa wrote:
>>>>> It's possible to have other build id types (other than default SHA1).
>>>>> Currently there's also ld support for MD5 build id.
>>>>
>>>> Currently, bpf build_id based stackmap does not returns the size of
>>>> the build_id. Did you see an issue here? I guess user space can check
>>>> the length of non-zero bits of the build id to decide what kind of
>>>> type it is, right?
>>>
>>> you can have zero bytes in the build id hash, so you need to get the size
>>>
>>> I never saw MD5 being used in practise just SHA1, but we added the
>>> size to be complete and make sure we'll fit with build id, because
>>> there's only limited space in mmap2 event
>>
>> I am asking to check whether we should extend uapi struct
>> bpf_stack_build_id to include build_id_size as well. I guess
>> we can delay this until a real use case.
>
> right, we can try make some MD5 build id binaries and check if it
> explodes with some bcc tools, but I don't expect that.. I'll try
> to find some time for that

Thanks. We may have issues on bcc side. For build_id collected in
kernel, bcc always generates a length-20 string. But for user
binaries, the build_id string length is equal to actual size of
the build_id. They may not match (MD5 length is 16).
The fix is probably to append '0's (up to length 20) for user
binary build_id's.

I guess MD5 is very seldom used. I will wait if you can reproduce
the issue and then we might fix it.

>
> perf tool uses build ids in .debug cache as file links, and we had
> few isues there
>
> jirka
>

2021-01-15 05:47:35

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function

On Thu, Jan 14, 2021 at 3:44 PM Yonghong Song <[email protected]> wrote:
>
>
>
> On 1/14/21 2:02 PM, Jiri Olsa wrote:
> > On Thu, Jan 14, 2021 at 01:05:33PM -0800, Yonghong Song wrote:
> >>
> >>
> >> On 1/14/21 12:01 PM, Jiri Olsa wrote:
> >>> On Thu, Jan 14, 2021 at 10:56:33AM -0800, Yonghong Song wrote:
> >>>>
> >>>>
> >>>> On 1/14/21 5:40 AM, Jiri Olsa wrote:
> >>>>> It's possible to have other build id types (other than default SHA1).
> >>>>> Currently there's also ld support for MD5 build id.
> >>>>
> >>>> Currently, bpf build_id based stackmap does not returns the size of
> >>>> the build_id. Did you see an issue here? I guess user space can check
> >>>> the length of non-zero bits of the build id to decide what kind of
> >>>> type it is, right?
> >>>
> >>> you can have zero bytes in the build id hash, so you need to get the size
> >>>
> >>> I never saw MD5 being used in practise just SHA1, but we added the
> >>> size to be complete and make sure we'll fit with build id, because
> >>> there's only limited space in mmap2 event
> >>
> >> I am asking to check whether we should extend uapi struct
> >> bpf_stack_build_id to include build_id_size as well. I guess
> >> we can delay this until a real use case.
> >
> > right, we can try make some MD5 build id binaries and check if it
> > explodes with some bcc tools, but I don't expect that.. I'll try
> > to find some time for that
>
> Thanks. We may have issues on bcc side. For build_id collected in
> kernel, bcc always generates a length-20 string. But for user
> binaries, the build_id string length is equal to actual size of
> the build_id. They may not match (MD5 length is 16).
> The fix is probably to append '0's (up to length 20) for user
> binary build_id's.
>
> I guess MD5 is very seldom used. I will wait if you can reproduce
> the issue and then we might fix it.

Indeed.
Jiri, please check whether md5 is really an issue.
Sounds like we have to do something on the kernel side.
Hopefully zero padding will be enough.
I would prefer to avoid extending uapi struct to cover rare case.

I've applied the series, since this issue sounds orthogonal.

2021-01-27 19:51:32

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function

On Thu, Jan 14, 2021 at 07:47:20PM -0800, Alexei Starovoitov wrote:
> On Thu, Jan 14, 2021 at 3:44 PM Yonghong Song <[email protected]> wrote:
> >
> >
> >
> > On 1/14/21 2:02 PM, Jiri Olsa wrote:
> > > On Thu, Jan 14, 2021 at 01:05:33PM -0800, Yonghong Song wrote:
> > >>
> > >>
> > >> On 1/14/21 12:01 PM, Jiri Olsa wrote:
> > >>> On Thu, Jan 14, 2021 at 10:56:33AM -0800, Yonghong Song wrote:
> > >>>>
> > >>>>
> > >>>> On 1/14/21 5:40 AM, Jiri Olsa wrote:
> > >>>>> It's possible to have other build id types (other than default SHA1).
> > >>>>> Currently there's also ld support for MD5 build id.
> > >>>>
> > >>>> Currently, bpf build_id based stackmap does not returns the size of
> > >>>> the build_id. Did you see an issue here? I guess user space can check
> > >>>> the length of non-zero bits of the build id to decide what kind of
> > >>>> type it is, right?
> > >>>
> > >>> you can have zero bytes in the build id hash, so you need to get the size
> > >>>
> > >>> I never saw MD5 being used in practise just SHA1, but we added the
> > >>> size to be complete and make sure we'll fit with build id, because
> > >>> there's only limited space in mmap2 event
> > >>
> > >> I am asking to check whether we should extend uapi struct
> > >> bpf_stack_build_id to include build_id_size as well. I guess
> > >> we can delay this until a real use case.
> > >
> > > right, we can try make some MD5 build id binaries and check if it
> > > explodes with some bcc tools, but I don't expect that.. I'll try
> > > to find some time for that
> >
> > Thanks. We may have issues on bcc side. For build_id collected in
> > kernel, bcc always generates a length-20 string. But for user
> > binaries, the build_id string length is equal to actual size of
> > the build_id. They may not match (MD5 length is 16).
> > The fix is probably to append '0's (up to length 20) for user
> > binary build_id's.
> >
> > I guess MD5 is very seldom used. I will wait if you can reproduce
> > the issue and then we might fix it.
>
> Indeed.
> Jiri, please check whether md5 is really an issue.
> Sounds like we have to do something on the kernel side.
> Hopefully zero padding will be enough.
> I would prefer to avoid extending uapi struct to cover rare case.

build_id_parse is already doing the zero padding, so we are ok

I tried several bcc tools over perf bench with md5 buildid and
the results looked ok

jirka

2021-01-27 19:52:24

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function



On 1/26/21 12:52 PM, Jiri Olsa wrote:
> On Thu, Jan 14, 2021 at 07:47:20PM -0800, Alexei Starovoitov wrote:
>> On Thu, Jan 14, 2021 at 3:44 PM Yonghong Song <[email protected]> wrote:
>>>
>>>
>>>
>>> On 1/14/21 2:02 PM, Jiri Olsa wrote:
>>>> On Thu, Jan 14, 2021 at 01:05:33PM -0800, Yonghong Song wrote:
>>>>>
>>>>>
>>>>> On 1/14/21 12:01 PM, Jiri Olsa wrote:
>>>>>> On Thu, Jan 14, 2021 at 10:56:33AM -0800, Yonghong Song wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 1/14/21 5:40 AM, Jiri Olsa wrote:
>>>>>>>> It's possible to have other build id types (other than default SHA1).
>>>>>>>> Currently there's also ld support for MD5 build id.
>>>>>>>
>>>>>>> Currently, bpf build_id based stackmap does not returns the size of
>>>>>>> the build_id. Did you see an issue here? I guess user space can check
>>>>>>> the length of non-zero bits of the build id to decide what kind of
>>>>>>> type it is, right?
>>>>>>
>>>>>> you can have zero bytes in the build id hash, so you need to get the size
>>>>>>
>>>>>> I never saw MD5 being used in practise just SHA1, but we added the
>>>>>> size to be complete and make sure we'll fit with build id, because
>>>>>> there's only limited space in mmap2 event
>>>>>
>>>>> I am asking to check whether we should extend uapi struct
>>>>> bpf_stack_build_id to include build_id_size as well. I guess
>>>>> we can delay this until a real use case.
>>>>
>>>> right, we can try make some MD5 build id binaries and check if it
>>>> explodes with some bcc tools, but I don't expect that.. I'll try
>>>> to find some time for that
>>>
>>> Thanks. We may have issues on bcc side. For build_id collected in
>>> kernel, bcc always generates a length-20 string. But for user
>>> binaries, the build_id string length is equal to actual size of
>>> the build_id. They may not match (MD5 length is 16).
>>> The fix is probably to append '0's (up to length 20) for user
>>> binary build_id's.
>>>
>>> I guess MD5 is very seldom used. I will wait if you can reproduce
>>> the issue and then we might fix it.
>>
>> Indeed.
>> Jiri, please check whether md5 is really an issue.
>> Sounds like we have to do something on the kernel side.
>> Hopefully zero padding will be enough.
>> I would prefer to avoid extending uapi struct to cover rare case.
>
> build_id_parse is already doing the zero padding, so we are ok
>
> I tried several bcc tools over perf bench with md5 buildid and
> the results looked ok

Great. Thanks for confirmation!

>
> jirka
>