2024-06-04 14:13:37

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: perf 6.9-1 (archlinux) crashes during recording of cycles + raw_syscalls

On Tue, Jun 04, 2024 at 10:50:00AM -0300, Arnaldo Carvalho de Melo wrote:
> On Tue, Jun 04, 2024 at 01:44:18PM +0200, Milian Wolff wrote:
> > the following command crashes perf for me, is this still an issue upstream and
> > should I report it to arch for backporting? I cannot build the official perf/
> > core branch, so I am afraid I cannot test it myself there:

> > ```
> > sudo /usr/bin/perf record -z --call-graph dwarf -e cycles -e
> > raw_syscalls:sys_enter ls
> > ...
> > [ perf record: Woken up 3 times to write data ]
> > malloc(): invalid next size (unsorted)
> > Aborted
> > ```
> > Backtrace with GDB + debuginfod:
> > ```
> > malloc(): invalid next size (unsorted)

> I reproduced this all the way back to 6.8, trying to bisect now, thanks
> for the report,

Can you please try with the attached and perhaps provide your Tested-by?

Thanks,

- Arnaldo

From ab355e2c6b4cf641a9fff7af38059cf69ac712d5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <[email protected]>
Date: Tue, 4 Jun 2024 11:00:22 -0300
Subject: [PATCH 1/1] Revert "perf record: Reduce memory for recording
PERF_RECORD_LOST_SAMPLES event"

This reverts commit 7d1405c71df21f6c394b8a885aa8a133f749fa22.

This causes segfaults in some cases, as reported by Milian:

```
sudo /usr/bin/perf record -z --call-graph dwarf -e cycles -e
raw_syscalls:sys_enter ls
...
[ perf record: Woken up 3 times to write data ]
malloc(): invalid next size (unsorted)
Aborted
```

Backtrace with GDB + debuginfod:

```
malloc(): invalid next size (unsorted)

Thread 1 "perf" received signal SIGABRT, Aborted.
__pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6,
no_tid=no_tid@entry=0) at pthread_kill.c:44
Downloading source file /usr/src/debug/glibc/glibc/nptl/pthread_kill.c
44 return INTERNAL_SYSCALL_ERROR_P (ret) ? INTERNAL_SYSCALL_ERRNO
(ret) : 0;
(gdb) bt
#0 __pthread_kill_implementation (threadid=<optimized out>,
signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44
#1 0x00007ffff6ea8eb3 in __pthread_kill_internal (threadid=<optimized out>,
signo=6) at pthread_kill.c:78
#2 0x00007ffff6e50a30 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/
raise.c:26
#3 0x00007ffff6e384c3 in __GI_abort () at abort.c:79
#4 0x00007ffff6e39354 in __libc_message_impl (fmt=fmt@entry=0x7ffff6fc22ea
"%s\n") at ../sysdeps/posix/libc_fatal.c:132
#5 0x00007ffff6eb3085 in malloc_printerr (str=str@entry=0x7ffff6fc5850
"malloc(): invalid next size (unsorted)") at malloc.c:5772
#6 0x00007ffff6eb657c in _int_malloc (av=av@entry=0x7ffff6ff6ac0
<main_arena>, bytes=bytes@entry=368) at malloc.c:4081
#7 0x00007ffff6eb877e in __libc_calloc (n=<optimized out>,
elem_size=<optimized out>) at malloc.c:3754
#8 0x000055555569bdb6 in perf_session.do_write_header ()
#9 0x00005555555a373a in __cmd_record.constprop.0 ()
#10 0x00005555555a6846 in cmd_record ()
#11 0x000055555564db7f in run_builtin ()
#12 0x000055555558ed77 in main ()
```

Valgrind memcheck:
```
==45136== Invalid write of size 8
==45136== at 0x2B38A5: perf_event__synthesize_id_sample (in /usr/bin/perf)
==45136== by 0x157069: __cmd_record.constprop.0 (in /usr/bin/perf)
==45136== by 0x15A845: cmd_record (in /usr/bin/perf)
==45136== by 0x201B7E: run_builtin (in /usr/bin/perf)
==45136== by 0x142D76: main (in /usr/bin/perf)
==45136== Address 0x6a866a8 is 0 bytes after a block of size 40 alloc'd
==45136== at 0x4849BF3: calloc (vg_replace_malloc.c:1675)
==45136== by 0x3574AB: zalloc (in /usr/bin/perf)
==45136== by 0x1570E0: __cmd_record.constprop.0 (in /usr/bin/perf)
==45136== by 0x15A845: cmd_record (in /usr/bin/perf)
==45136== by 0x201B7E: run_builtin (in /usr/bin/perf)
==45136== by 0x142D76: main (in /usr/bin/perf)
==45136==
==45136== Syscall param write(buf) points to unaddressable byte(s)
==45136== at 0x575953D: __libc_write (write.c:26)
==45136== by 0x575953D: write (write.c:24)
==45136== by 0x35761F: ion (in /usr/bin/perf)
==45136== by 0x357778: writen (in /usr/bin/perf)
==45136== by 0x1548F7: record__write (in /usr/bin/perf)
==45136== by 0x15708A: __cmd_record.constprop.0 (in /usr/bin/perf)
==45136== by 0x15A845: cmd_record (in /usr/bin/perf)
==45136== by 0x201B7E: run_builtin (in /usr/bin/perf)
==45136== by 0x142D76: main (in /usr/bin/perf)
==45136== Address 0x6a866a8 is 0 bytes after a block of size 40 alloc'd
==45136== at 0x4849BF3: calloc (vg_replace_malloc.c:1675)
==45136== by 0x3574AB: zalloc (in /usr/bin/perf)
==45136== by 0x1570E0: __cmd_record.constprop.0 (in /usr/bin/perf)
==45136== by 0x15A845: cmd_record (in /usr/bin/perf)
==45136== by 0x201B7E: run_builtin (in /usr/bin/perf)
==45136== by 0x142D76: main (in /usr/bin/perf)
==45136==
-----

Closes: https://lore.kernel.org/linux-perf-users/23879991.0LEYPuXRzz@milian-workstation/
Reported-by: Milian Wolff <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Ian Rogers <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: [email protected] # 6.8+
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-record.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 66a3de8ac66186b0..0a8ba1323d64be6b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1956,8 +1956,7 @@ static void record__read_lost_samples(struct record *rec)

if (count.lost) {
if (!lost) {
- lost = zalloc(sizeof(*lost) +
- session->machines.host.id_hdr_size);
+ lost = zalloc(PERF_SAMPLE_MAX_SIZE);
if (!lost) {
pr_debug("Memory allocation failed\n");
return;
@@ -1973,8 +1972,7 @@ static void record__read_lost_samples(struct record *rec)
lost_count = perf_bpf_filter__lost_count(evsel);
if (lost_count) {
if (!lost) {
- lost = zalloc(sizeof(*lost) +
- session->machines.host.id_hdr_size);
+ lost = zalloc(PERF_SAMPLE_MAX_SIZE);
if (!lost) {
pr_debug("Memory allocation failed\n");
return;
--
2.45.1



2024-06-04 18:48:32

by Ian Rogers

[permalink] [raw]
Subject: Re: perf 6.9-1 (archlinux) crashes during recording of cycles + raw_syscalls

On Tue, Jun 4, 2024 at 7:12 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
>
> On Tue, Jun 04, 2024 at 10:50:00AM -0300, Arnaldo Carvalho de Melo wrote:
> > On Tue, Jun 04, 2024 at 01:44:18PM +0200, Milian Wolff wrote:
> > > the following command crashes perf for me, is this still an issue upstream and
> > > should I report it to arch for backporting? I cannot build the official perf/
> > > core branch, so I am afraid I cannot test it myself there:
>
> > > ```
> > > sudo /usr/bin/perf record -z --call-graph dwarf -e cycles -e
> > > raw_syscalls:sys_enter ls
> > > ...
> > > [ perf record: Woken up 3 times to write data ]
> > > malloc(): invalid next size (unsorted)
> > > Aborted
> > > ```
> > > Backtrace with GDB + debuginfod:
> > > ```
> > > malloc(): invalid next size (unsorted)
>
> > I reproduced this all the way back to 6.8, trying to bisect now, thanks
> > for the report,
>
> Can you please try with the attached and perhaps provide your Tested-by?
>
> Thanks,
>
> - Arnaldo
>
> From ab355e2c6b4cf641a9fff7af38059cf69ac712d5 Mon Sep 17 00:00:00 2001
> From: Arnaldo Carvalho de Melo <[email protected]>
> Date: Tue, 4 Jun 2024 11:00:22 -0300
> Subject: [PATCH 1/1] Revert "perf record: Reduce memory for recording
> PERF_RECORD_LOST_SAMPLES event"
>
> This reverts commit 7d1405c71df21f6c394b8a885aa8a133f749fa22.

I think we should try to fight back reverts when possible. Reverts are
removing something somebody poured time and attention into. When a
regression has occurred then I think we should add the regression case
as a test.

> This causes segfaults in some cases, as reported by Milian:
>
> ```
> sudo /usr/bin/perf record -z --call-graph dwarf -e cycles -e
> raw_syscalls:sys_enter ls
> ...
> [ perf record: Woken up 3 times to write data ]
> malloc(): invalid next size (unsorted)
> Aborted
> ```
>
> Backtrace with GDB + debuginfod:
>
> ```
> malloc(): invalid next size (unsorted)
>
> Thread 1 "perf" received signal SIGABRT, Aborted.
> __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6,
> no_tid=no_tid@entry=0) at pthread_kill.c:44
> Downloading source file /usr/src/debug/glibc/glibc/nptl/pthread_kill.c
> 44 return INTERNAL_SYSCALL_ERROR_P (ret) ? INTERNAL_SYSCALL_ERRNO
> (ret) : 0;
> (gdb) bt
> #0 __pthread_kill_implementation (threadid=<optimized out>,
> signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44
> #1 0x00007ffff6ea8eb3 in __pthread_kill_internal (threadid=<optimized out>,
> signo=6) at pthread_kill.c:78
> #2 0x00007ffff6e50a30 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/
> raise.c:26
> #3 0x00007ffff6e384c3 in __GI_abort () at abort.c:79
> #4 0x00007ffff6e39354 in __libc_message_impl (fmt=fmt@entry=0x7ffff6fc22ea
> "%s\n") at ../sysdeps/posix/libc_fatal.c:132
> #5 0x00007ffff6eb3085 in malloc_printerr (str=str@entry=0x7ffff6fc5850
> "malloc(): invalid next size (unsorted)") at malloc.c:5772
> #6 0x00007ffff6eb657c in _int_malloc (av=av@entry=0x7ffff6ff6ac0
> <main_arena>, bytes=bytes@entry=368) at malloc.c:4081
> #7 0x00007ffff6eb877e in __libc_calloc (n=<optimized out>,
> elem_size=<optimized out>) at malloc.c:3754
> #8 0x000055555569bdb6 in perf_session.do_write_header ()
> #9 0x00005555555a373a in __cmd_record.constprop.0 ()
> #10 0x00005555555a6846 in cmd_record ()
> #11 0x000055555564db7f in run_builtin ()
> #12 0x000055555558ed77 in main ()
> ```
>
> Valgrind memcheck:
> ```
> ==45136== Invalid write of size 8
> ==45136== at 0x2B38A5: perf_event__synthesize_id_sample (in /usr/bin/perf)

The write that is happening here is because an empty sample except for
the id value is written out:
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/synthetic-events.c?h=perf-tools-next#n1805
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/builtin-record.c?h=perf-tools-next#n1910
It'd been missed that the sample wasn't quite empty and needed space.

Rather than making the space 64kb again, can we not use
perf_event__sample_event_size or some max constant?

Thanks,
Ian

> ==45136== by 0x157069: __cmd_record.constprop.0 (in /usr/bin/perf)
> ==45136== by 0x15A845: cmd_record (in /usr/bin/perf)
> ==45136== by 0x201B7E: run_builtin (in /usr/bin/perf)
> ==45136== by 0x142D76: main (in /usr/bin/perf)
> ==45136== Address 0x6a866a8 is 0 bytes after a block of size 40 alloc'd
> ==45136== at 0x4849BF3: calloc (vg_replace_malloc.c:1675)
> ==45136== by 0x3574AB: zalloc (in /usr/bin/perf)
> ==45136== by 0x1570E0: __cmd_record.constprop.0 (in /usr/bin/perf)
> ==45136== by 0x15A845: cmd_record (in /usr/bin/perf)
> ==45136== by 0x201B7E: run_builtin (in /usr/bin/perf)
> ==45136== by 0x142D76: main (in /usr/bin/perf)
> ==45136==
> ==45136== Syscall param write(buf) points to unaddressable byte(s)
> ==45136== at 0x575953D: __libc_write (write.c:26)
> ==45136== by 0x575953D: write (write.c:24)
> ==45136== by 0x35761F: ion (in /usr/bin/perf)
> ==45136== by 0x357778: writen (in /usr/bin/perf)
> ==45136== by 0x1548F7: record__write (in /usr/bin/perf)
> ==45136== by 0x15708A: __cmd_record.constprop.0 (in /usr/bin/perf)
> ==45136== by 0x15A845: cmd_record (in /usr/bin/perf)
> ==45136== by 0x201B7E: run_builtin (in /usr/bin/perf)
> ==45136== by 0x142D76: main (in /usr/bin/perf)
> ==45136== Address 0x6a866a8 is 0 bytes after a block of size 40 alloc'd
> ==45136== at 0x4849BF3: calloc (vg_replace_malloc.c:1675)
> ==45136== by 0x3574AB: zalloc (in /usr/bin/perf)
> ==45136== by 0x1570E0: __cmd_record.constprop.0 (in /usr/bin/perf)
> ==45136== by 0x15A845: cmd_record (in /usr/bin/perf)
> ==45136== by 0x201B7E: run_builtin (in /usr/bin/perf)
> ==45136== by 0x142D76: main (in /usr/bin/perf)
> ==45136==
> -----
>
> Closes: https://lore.kernel.org/linux-perf-users/23879991.0LEYPuXRzz@milian-workstation/
> Reported-by: Milian Wolff <[email protected]>
> Cc: Adrian Hunter <[email protected]>
> Cc: Ian Rogers <[email protected]>
> Cc: Jiri Olsa <[email protected]>
> Cc: Kan Liang <[email protected]>
> Cc: Namhyung Kim <[email protected]>
> Cc: [email protected] # 6.8+
> Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
> ---
> tools/perf/builtin-record.c | 6 ++----
> 1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 66a3de8ac66186b0..0a8ba1323d64be6b 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1956,8 +1956,7 @@ static void record__read_lost_samples(struct record *rec)
>
> if (count.lost) {
> if (!lost) {
> - lost = zalloc(sizeof(*lost) +
> - session->machines.host.id_hdr_size);
> + lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> if (!lost) {
> pr_debug("Memory allocation failed\n");
> return;
> @@ -1973,8 +1972,7 @@ static void record__read_lost_samples(struct record *rec)
> lost_count = perf_bpf_filter__lost_count(evsel);
> if (lost_count) {
> if (!lost) {
> - lost = zalloc(sizeof(*lost) +
> - session->machines.host.id_hdr_size);
> + lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> if (!lost) {
> pr_debug("Memory allocation failed\n");
> return;
> --
> 2.45.1
>

2024-06-04 19:02:24

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: perf 6.9-1 (archlinux) crashes during recording of cycles + raw_syscalls

On Tue, Jun 04, 2024 at 11:48:09AM -0700, Ian Rogers wrote:
> On Tue, Jun 4, 2024 at 7:12 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > Can you please try with the attached and perhaps provide your Tested-by?

> > From ab355e2c6b4cf641a9fff7af38059cf69ac712d5 Mon Sep 17 00:00:00 2001
> > From: Arnaldo Carvalho de Melo <[email protected]>
> > Date: Tue, 4 Jun 2024 11:00:22 -0300
> > Subject: [PATCH 1/1] Revert "perf record: Reduce memory for recording
> > PERF_RECORD_LOST_SAMPLES event"

> > This reverts commit 7d1405c71df21f6c394b8a885aa8a133f749fa22.

> I think we should try to fight back reverts when possible. Reverts are
> removing something somebody poured time and attention into. When a

While in the development phase, yeah, but when we find a regression and
the revert makes it go away, that is the way to go.

The person who poured time on the development gets notified and can
decide if/when to try again.

Millian had to pour time to figure out why something stopped working,
was kind enough to provide the output from multiple tools to help in
fixing the problem and I had to do the bisect to figure out when the
problem happened and to check if reverting it we would have the tool
working again.

If we try to fix this for v6.10 we may end up adding yet another bug, so
the safe thing to do at this point is to do the revert.

We can try improving this once again for v6.11.

> regression has occurred then I think we should add the regression case
> as a test.

Sure, I thought about that as well, will try and have one shell test
with that, referring to this case, for v6.11.

- Arnaldo

2024-06-04 20:17:09

by Milian Wolff

[permalink] [raw]
Subject: Re: perf 6.9-1 (archlinux) crashes during recording of cycles + raw_syscalls

On Dienstag, 4. Juni 2024 16:12:36 MESZ Arnaldo Carvalho de Melo wrote:
> On Tue, Jun 04, 2024 at 10:50:00AM -0300, Arnaldo Carvalho de Melo wrote:
> > On Tue, Jun 04, 2024 at 01:44:18PM +0200, Milian Wolff wrote:
> > > the following command crashes perf for me, is this still an issue
> > > upstream and should I report it to arch for backporting? I cannot build
> > > the official perf/ core branch, so I am afraid I cannot test it myself
> > > there:
> > >
> > > ```
> > > sudo /usr/bin/perf record -z --call-graph dwarf -e cycles -e
> > > raw_syscalls:sys_enter ls
> > > ...
> > > [ perf record: Woken up 3 times to write data ]
> > > malloc(): invalid next size (unsorted)
> > > Aborted
> > > ```
> > > Backtrace with GDB + debuginfod:
> > > ```
> > > malloc(): invalid next size (unsorted)
> >
> > I reproduced this all the way back to 6.8, trying to bisect now, thanks
> > for the report,
>
> Can you please try with the attached and perhaps provide your Tested-by?

Works a charm, thanks

Tested-by: Milian Wolff <[email protected]>

--
Milian Wolff | [email protected] | Senior Software Engineer
KDAB (Deutschland) GmbH, a KDAB Group company
Tel: +49-30-521325470
KDAB - The Qt, C++ and OpenGL Experts


2024-06-06 22:20:20

by Namhyung Kim

[permalink] [raw]
Subject: Re: perf 6.9-1 (archlinux) crashes during recording of cycles + raw_syscalls

On Tue, Jun 04, 2024 at 04:02:08PM -0300, Arnaldo Carvalho de Melo wrote:
> On Tue, Jun 04, 2024 at 11:48:09AM -0700, Ian Rogers wrote:
> > On Tue, Jun 4, 2024 at 7:12 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > > Can you please try with the attached and perhaps provide your Tested-by?
>
> > > From ab355e2c6b4cf641a9fff7af38059cf69ac712d5 Mon Sep 17 00:00:00 2001
> > > From: Arnaldo Carvalho de Melo <[email protected]>
> > > Date: Tue, 4 Jun 2024 11:00:22 -0300
> > > Subject: [PATCH 1/1] Revert "perf record: Reduce memory for recording
> > > PERF_RECORD_LOST_SAMPLES event"
>
> > > This reverts commit 7d1405c71df21f6c394b8a885aa8a133f749fa22.
>
> > I think we should try to fight back reverts when possible. Reverts are
> > removing something somebody poured time and attention into. When a
>
> While in the development phase, yeah, but when we find a regression and
> the revert makes it go away, that is the way to go.
>
> The person who poured time on the development gets notified and can
> decide if/when to try again.
>
> Millian had to pour time to figure out why something stopped working,
> was kind enough to provide the output from multiple tools to help in
> fixing the problem and I had to do the bisect to figure out when the
> problem happened and to check if reverting it we would have the tool
> working again.
>
> If we try to fix this for v6.10 we may end up adding yet another bug, so
> the safe thing to do at this point is to do the revert.
>
> We can try improving this once again for v6.11.

I think I found a couple of problems with this issue. :(

1. perf_session__set_id_hdr_size() uses the first evsel in the session
But I think it should pick the tracking event. I guess we assume
all events have the same set of sample_type wrt the sample_id_all
but I'm not sure if it's correct.

2. With --call-graph dwarf, it seems to set unrelated sample type bits
in the attr like ADDR and DATA_SRC.

3. For tracepoint events, evsel__newtp_idx() sets a couple of sample
type regardless of the configuration. This includes RAW, TIME and
CPU. This one changes the format of the id headers.

4. PERF_RECORD_LOST_SAMPLES is for the sampling event, so it should
use the event's sample_type. But the event parsing looks up the
event using evlist->is_pos which is set for the first event.

5. I think we can remove some sample type (i.e. TID and CPU) from the
tracking event in most cases. ID(ENTIFIER) will be used for LOST_
SAMPLES and TIME is needed anyway. TID is might be used for SWITCH
but others already contain necessary information in the type. I
wish we could add id field to PERF_RECORD_LOST_SAMPLES and tid/pid
to PERF_RECORD_SWITCH.

Thanks,
Namhyung

>
> > regression has occurred then I think we should add the regression case
> > as a test.
>
> Sure, I thought about that as well, will try and have one shell test
> with that, referring to this case, for v6.11.
>
> - Arnaldo

2024-06-06 23:18:11

by Ian Rogers

[permalink] [raw]
Subject: Re: perf 6.9-1 (archlinux) crashes during recording of cycles + raw_syscalls

On Thu, Jun 6, 2024 at 3:20 PM Namhyung Kim <[email protected]> wrote:
>
> On Tue, Jun 04, 2024 at 04:02:08PM -0300, Arnaldo Carvalho de Melo wrote:
> > On Tue, Jun 04, 2024 at 11:48:09AM -0700, Ian Rogers wrote:
> > > On Tue, Jun 4, 2024 at 7:12 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > > > Can you please try with the attached and perhaps provide your Tested-by?
> >
> > > > From ab355e2c6b4cf641a9fff7af38059cf69ac712d5 Mon Sep 17 00:00:00 2001
> > > > From: Arnaldo Carvalho de Melo <[email protected]>
> > > > Date: Tue, 4 Jun 2024 11:00:22 -0300
> > > > Subject: [PATCH 1/1] Revert "perf record: Reduce memory for recording
> > > > PERF_RECORD_LOST_SAMPLES event"
> >
> > > > This reverts commit 7d1405c71df21f6c394b8a885aa8a133f749fa22.
> >
> > > I think we should try to fight back reverts when possible. Reverts are
> > > removing something somebody poured time and attention into. When a
> >
> > While in the development phase, yeah, but when we find a regression and
> > the revert makes it go away, that is the way to go.
> >
> > The person who poured time on the development gets notified and can
> > decide if/when to try again.
> >
> > Millian had to pour time to figure out why something stopped working,
> > was kind enough to provide the output from multiple tools to help in
> > fixing the problem and I had to do the bisect to figure out when the
> > problem happened and to check if reverting it we would have the tool
> > working again.
> >
> > If we try to fix this for v6.10 we may end up adding yet another bug, so
> > the safe thing to do at this point is to do the revert.
> >
> > We can try improving this once again for v6.11.
>
> I think I found a couple of problems with this issue. :(
>
> 1. perf_session__set_id_hdr_size() uses the first evsel in the session
> But I think it should pick the tracking event. I guess we assume
> all events have the same set of sample_type wrt the sample_id_all
> but I'm not sure if it's correct.
>
> 2. With --call-graph dwarf, it seems to set unrelated sample type bits
> in the attr like ADDR and DATA_SRC.
>
> 3. For tracepoint events, evsel__newtp_idx() sets a couple of sample
> type regardless of the configuration. This includes RAW, TIME and
> CPU. This one changes the format of the id headers.
>
> 4. PERF_RECORD_LOST_SAMPLES is for the sampling event, so it should
> use the event's sample_type. But the event parsing looks up the
> event using evlist->is_pos which is set for the first event.
>
> 5. I think we can remove some sample type (i.e. TID and CPU) from the
> tracking event in most cases. ID(ENTIFIER) will be used for LOST_
> SAMPLES and TIME is needed anyway. TID is might be used for SWITCH
> but others already contain necessary information in the type. I
> wish we could add id field to PERF_RECORD_LOST_SAMPLES and tid/pid
> to PERF_RECORD_SWITCH.

Right, this is good. To clear up the immediate error we just need to
increase the memory allocation size by 48 bytes to account for the
sample ID being written. Here is a change doing that and removing the
memory allocation altogether:
```
diff --git a/tools/lib/perf/include/perf/event.h
b/tools/lib/perf/include/perf/event.h
index ae64090184d3..a2dfaff26fb7 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -77,6 +77,8 @@ struct perf_record_lost_samples {
__u64 lost;
};

+#define PERF_RECORD_MAX_LOST_SAMPLE_AND_ID_SIZE \
+ (sizeof(struct perf_record_lost_samples) + 6 * sizeof(__u64))
/*
* PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST
*/
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 66a3de8ac661..1615a1723fb9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1926,7 +1926,10 @@ static void __record__save_lost_samples(struct
record *rec, struct evsel *evs
el,
static void record__read_lost_samples(struct record *rec)
{
struct perf_session *session = rec->session;
- struct perf_record_lost_samples *lost = NULL;
+ union {
+ struct perf_record_lost_samples lost;
+ char
lost_and_sample_id[PERF_RECORD_MAX_LOST_SAMPLE_AND_ID_SIZE];
+ } lost;
struct evsel *evsel;

/* there was an error during record__open */
@@ -1951,20 +1954,13 @@ static void record__read_lost_samples(struct
record *rec)

if (perf_evsel__read(&evsel->core, x,
y, &count) < 0) {
pr_debug("read LOST count failed\n");
- goto out;
+ return;
}

if (count.lost) {
- if (!lost) {
- lost = zalloc(sizeof(*lost) +
-
session->machines.host.id_hdr_size);
- if (!lost) {
-
pr_debug("Memory allocation failed\n");
- return;
- }
- lost->header.type =
PERF_RECORD_LOST_SAMPLES;
- }
-
__record__save_lost_samples(rec, evsel, lost,
+ memset(&lost, 0, sizeof(lost));
+ lost.lost.header.type =
PERF_RECORD_LOST_SAMPLES;
+
__record__save_lost_samples(rec, evsel, &lost.lost,
x,
y, count.lost, 0);
}
}
@@ -1972,21 +1968,12 @@ static void record__read_lost_samples(struct
record *rec)

lost_count = perf_bpf_filter__lost_count(evsel);
if (lost_count) {
- if (!lost) {
- lost = zalloc(sizeof(*lost) +
-
session->machines.host.id_hdr_size);
- if (!lost) {
- pr_debug("Memory allocation failed\n");
- return;
- }
- lost->header.type = PERF_RECORD_LOST_SAMPLES;
- }
- __record__save_lost_samples(rec, evsel, lost,
0, 0, lost_count,
+ memset(&lost, 0, sizeof(lost));
+ lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
+ __record__save_lost_samples(rec, evsel,
&lost.lost, 0, 0, lost_count,

PERF_RECORD_MISC_LOST_SAMPLES_BPF);
}
}
-out:
- free(lost);
}

static volatile sig_atomic_t workload_exec_errno;
```

Thanks,
Ian

> Thanks,
> Namhyung
>
> >
> > > regression has occurred then I think we should add the regression case
> > > as a test.
> >
> > Sure, I thought about that as well, will try and have one shell test
> > with that, referring to this case, for v6.11.
> >
> > - Arnaldo

2024-06-07 18:26:49

by Namhyung Kim

[permalink] [raw]
Subject: Re: perf 6.9-1 (archlinux) crashes during recording of cycles + raw_syscalls

On Thu, Jun 06, 2024 at 04:17:43PM -0700, Ian Rogers wrote:
> On Thu, Jun 6, 2024 at 3:20 PM Namhyung Kim <[email protected]> wrote:
> >
> > On Tue, Jun 04, 2024 at 04:02:08PM -0300, Arnaldo Carvalho de Melo wrote:
> > > On Tue, Jun 04, 2024 at 11:48:09AM -0700, Ian Rogers wrote:
> > > > On Tue, Jun 4, 2024 at 7:12 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > > > > Can you please try with the attached and perhaps provide your Tested-by?
> > >
> > > > > From ab355e2c6b4cf641a9fff7af38059cf69ac712d5 Mon Sep 17 00:00:00 2001
> > > > > From: Arnaldo Carvalho de Melo <[email protected]>
> > > > > Date: Tue, 4 Jun 2024 11:00:22 -0300
> > > > > Subject: [PATCH 1/1] Revert "perf record: Reduce memory for recording
> > > > > PERF_RECORD_LOST_SAMPLES event"
> > >
> > > > > This reverts commit 7d1405c71df21f6c394b8a885aa8a133f749fa22.
> > >
> > > > I think we should try to fight back reverts when possible. Reverts are
> > > > removing something somebody poured time and attention into. When a
> > >
> > > While in the development phase, yeah, but when we find a regression and
> > > the revert makes it go away, that is the way to go.
> > >
> > > The person who poured time on the development gets notified and can
> > > decide if/when to try again.
> > >
> > > Millian had to pour time to figure out why something stopped working,
> > > was kind enough to provide the output from multiple tools to help in
> > > fixing the problem and I had to do the bisect to figure out when the
> > > problem happened and to check if reverting it we would have the tool
> > > working again.
> > >
> > > If we try to fix this for v6.10 we may end up adding yet another bug, so
> > > the safe thing to do at this point is to do the revert.
> > >
> > > We can try improving this once again for v6.11.
> >
> > I think I found a couple of problems with this issue. :(
> >
> > 1. perf_session__set_id_hdr_size() uses the first evsel in the session
> > But I think it should pick the tracking event. I guess we assume
> > all events have the same set of sample_type wrt the sample_id_all
> > but I'm not sure if it's correct.
> >
> > 2. With --call-graph dwarf, it seems to set unrelated sample type bits
> > in the attr like ADDR and DATA_SRC.
> >
> > 3. For tracepoint events, evsel__newtp_idx() sets a couple of sample
> > type regardless of the configuration. This includes RAW, TIME and
> > CPU. This one changes the format of the id headers.
> >
> > 4. PERF_RECORD_LOST_SAMPLES is for the sampling event, so it should
> > use the event's sample_type. But the event parsing looks up the
> > event using evlist->is_pos which is set for the first event.
> >
> > 5. I think we can remove some sample type (i.e. TID and CPU) from the
> > tracking event in most cases. ID(ENTIFIER) will be used for LOST_
> > SAMPLES and TIME is needed anyway. TID is might be used for SWITCH
> > but others already contain necessary information in the type. I
> > wish we could add id field to PERF_RECORD_LOST_SAMPLES and tid/pid
> > to PERF_RECORD_SWITCH.
>
> Right, this is good. To clear up the immediate error we just need to
> increase the memory allocation size by 48 bytes to account for the
> sample ID being written. Here is a change doing that and removing the
> memory allocation altogether:

Looks ok. I think we can have this as a quick fix and I can work on the
above issues separately. Can you please send a formal patch?

> ```
> diff --git a/tools/lib/perf/include/perf/event.h
> b/tools/lib/perf/include/perf/event.h
> index ae64090184d3..a2dfaff26fb7 100644
> --- a/tools/lib/perf/include/perf/event.h
> +++ b/tools/lib/perf/include/perf/event.h
> @@ -77,6 +77,8 @@ struct perf_record_lost_samples {
> __u64 lost;
> };
>
> +#define PERF_RECORD_MAX_LOST_SAMPLE_AND_ID_SIZE \
> + (sizeof(struct perf_record_lost_samples) + 6 * sizeof(__u64))

For possible other uses, I'd suggest to add

#define MAX_ID_HDR_ENTRIES 6

Thanks,
Namhyung

> /*
> * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST
> */
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 66a3de8ac661..1615a1723fb9 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1926,7 +1926,10 @@ static void __record__save_lost_samples(struct
> record *rec, struct evsel *evs
> el,
> static void record__read_lost_samples(struct record *rec)
> {
> struct perf_session *session = rec->session;
> - struct perf_record_lost_samples *lost = NULL;
> + union {
> + struct perf_record_lost_samples lost;
> + char
> lost_and_sample_id[PERF_RECORD_MAX_LOST_SAMPLE_AND_ID_SIZE];
> + } lost;
> struct evsel *evsel;
>
> /* there was an error during record__open */
> @@ -1951,20 +1954,13 @@ static void record__read_lost_samples(struct
> record *rec)
>
> if (perf_evsel__read(&evsel->core, x,
> y, &count) < 0) {
> pr_debug("read LOST count failed\n");
> - goto out;
> + return;
> }
>
> if (count.lost) {
> - if (!lost) {
> - lost = zalloc(sizeof(*lost) +
> -
> session->machines.host.id_hdr_size);
> - if (!lost) {
> -
> pr_debug("Memory allocation failed\n");
> - return;
> - }
> - lost->header.type =
> PERF_RECORD_LOST_SAMPLES;
> - }
> -
> __record__save_lost_samples(rec, evsel, lost,
> + memset(&lost, 0, sizeof(lost));
> + lost.lost.header.type =
> PERF_RECORD_LOST_SAMPLES;
> +
> __record__save_lost_samples(rec, evsel, &lost.lost,
> x,
> y, count.lost, 0);
> }
> }
> @@ -1972,21 +1968,12 @@ static void record__read_lost_samples(struct
> record *rec)
>
> lost_count = perf_bpf_filter__lost_count(evsel);
> if (lost_count) {
> - if (!lost) {
> - lost = zalloc(sizeof(*lost) +
> -
> session->machines.host.id_hdr_size);
> - if (!lost) {
> - pr_debug("Memory allocation failed\n");
> - return;
> - }
> - lost->header.type = PERF_RECORD_LOST_SAMPLES;
> - }
> - __record__save_lost_samples(rec, evsel, lost,
> 0, 0, lost_count,
> + memset(&lost, 0, sizeof(lost));
> + lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
> + __record__save_lost_samples(rec, evsel,
> &lost.lost, 0, 0, lost_count,
>
> PERF_RECORD_MISC_LOST_SAMPLES_BPF);
> }
> }
> -out:
> - free(lost);
> }
>
> static volatile sig_atomic_t workload_exec_errno;
> ```
>
> Thanks,
> Ian
>
> > Thanks,
> > Namhyung
> >
> > >
> > > > regression has occurred then I think we should add the regression case
> > > > as a test.
> > >
> > > Sure, I thought about that as well, will try and have one shell test
> > > with that, referring to this case, for v6.11.
> > >
> > > - Arnaldo