2024-02-16 17:24:11

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1] perf trace: Disable syscall augmentation with record

Syscall augmentation is causing samples not to be written to the
perf.data file with "perf trace record". Disabling augmentation is
sub-optimal, but it beats having a totally broken perf trace record.

Closes: https://lore.kernel.org/lkml/CAP-5=fV9Gd1Teak+EOcUSxe13KqSyfZyPNagK97GbLiOQRgGaw@mail.gmail.com/
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/builtin-trace.c | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 109b8e64fe69..192721261098 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4864,6 +4864,11 @@ int cmd_trace(int argc, const char **argv)
if (!trace.trace_syscalls)
goto skip_augmentation;

+ if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) {
+ pr_debug("Syscall augmentation fails with record, disabling augmentation");
+ goto skip_augmentation;
+ }
+
trace.skel = augmented_raw_syscalls_bpf__open();
if (!trace.skel) {
pr_debug("Failed to open augmented syscalls BPF skeleton");
--
2.44.0.rc0.258.g7320e95886-goog



2024-02-16 17:38:18

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v1] perf trace: Disable syscall augmentation with record

On Fri, Feb 16, 2024 at 09:23:57AM -0800, Ian Rogers wrote:
> Syscall augmentation is causing samples not to be written to the
> perf.data file with "perf trace record". Disabling augmentation is
> sub-optimal, but it beats having a totally broken perf trace record.

Tested-by: Arnaldo Carvalho de Melo <[email protected]>

'perf trace record' needs to be more tested, we need to have a shell
'perf test' for it, maybe Michael has something on his test suite?

Testing your patch:

root@number:~# perf trace record ls
anaconda-ks.cfg bin perf.all-number-20231219-104854.tar.bz2 perfconfig.off perf.data perf.data.baseline perf.data.old
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.050 MB perf.data (170 samples) ]
root@number:~# perf evlist -v
raw_syscalls:sys_enter: type: 2 (PERF_TYPE_TRACEPOINT), size: 136, config: 0x168 (raw_syscalls:sys_enter), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1
raw_syscalls:sys_exit: type: 2 (PERF_TYPE_TRACEPOINT), size: 136, config: 0x167 (raw_syscalls:sys_exit), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1
dummy:u: type: 1 (PERF_TYPE_SOFTWARE), size: 136, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|IDENTIFIER, read_format: ID|LOST, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1
# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events
root@number:~# perf script | head -5
ls 145954 [021] 64113.916047: raw_syscalls:sys_exit: NR 59 = 0
ls 145954 [021] 64113.916065: raw_syscalls:sys_enter: NR 12 (0, 528, 0, 0, 0, 27)
ls 145954 [021] 64113.916065: raw_syscalls:sys_exit: NR 12 = 94098335809536
ls 145954 [021] 64113.916077: raw_syscalls:sys_enter: NR 158 (3001, 7fff9bc13790, 7f4bd6f4b4c0, 1, 3, 800)
ls 145954 [021] 64113.916078: raw_syscalls:sys_exit: NR 158 = -22
root@number:~# perf trace -i perf.data |& head
? ( ): ls/145954 ... [continued]: execve()) = 0
0.018 ( 0.000 ms): ls/145954 brk() = 0x5594f9e14000
0.030 ( 0.000 ms): ls/145954 arch_prctl(option: 0x3001, arg2: 0x7fff9bc13790) = -1 EINVAL (Invalid argument)
0.045 ( 0.002 ms): ls/145954 access(filename: 0xd6f5b2c0, mode: R) = -1 ENOENT (No such file or directory)
0.050 ( 0.003 ms): ls/145954 openat(dfd: CWD, filename: 0xd6f5a144, flags: RDONLY|CLOEXEC) = 3
0.053 ( 0.001 ms): ls/145954 newfstatat(dfd: 3, filename: 0xd6f5ace8, statbuf: 0x7fff9bc129a0, flag: 4096) = 0
0.055 ( 0.004 ms): ls/145954 mmap(len: 78087, prot: READ, flags: PRIVATE, fd: 3) = 0x7f4bd6f1c000
0.059 ( 0.000 ms): ls/145954 close(fd: 3) = 0
0.066 ( 0.003 ms): ls/145954 openat(dfd: CWD, filename: 0xd6f65fa0, flags: RDONLY|CLOEXEC) = 3
0.068 ( 0.001 ms): ls/145954 read(fd: 3, buf: 0x7fff9bc12b08, count: 832) = 832
root@number:~#

root@number:~# perf trace -v record ls
Syscall augmentation fails with record, disabling augmentationUsing CPUID GenuineIntel-6-B7-1
DEBUGINFOD_URLS=
nr_cblocks: 0
affinity: SYS
mmap flush: 1
comp level: 0
mmap size 4198400B
Control descriptor is not initialized
mmap size 528384B
anaconda-ks.cfg bin perf.all-number-20231219-104854.tar.bz2 perfconfig.off perf.data perf.data.baseline perf.data.old
[ perf record: Woken up 1 times to write data ]
failed to write feature CPU_PMU_CAPS
[ perf record: Captured and wrote 0.050 MB perf.data (170 samples) ]
root@number:~#

There is a missing newline and we can avoid repeating a term, and I also
added some extra explanation about "augmentation":

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 192721261098832e..3f77dd3eb70dc37a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4865,7 +4865,7 @@ int cmd_trace(int argc, const char **argv)
goto skip_augmentation;

if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) {
- pr_debug("Syscall augmentation fails with record, disabling augmentation");
+ pr_debug("Syscall augmentation (reading pointer args using BPF) fails with record, disabling it.\n");
goto skip_augmentation;
}

I'll see if I come up with the test and a fix that allows augmentation
to work with 'perf trace record'.

- Arnaldo


> Closes: https://lore.kernel.org/lkml/CAP-5=fV9Gd1Teak+EOcUSxe13KqSyfZyPNagK97GbLiOQRgGaw@mail.gmail.com/
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/perf/builtin-trace.c | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 109b8e64fe69..192721261098 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -4864,6 +4864,11 @@ int cmd_trace(int argc, const char **argv)
> if (!trace.trace_syscalls)
> goto skip_augmentation;
>
> + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) {
> + pr_debug("Syscall augmentation fails with record, disabling augmentation");
> + goto skip_augmentation;
> + }
> +
> trace.skel = augmented_raw_syscalls_bpf__open();
> if (!trace.skel) {
> pr_debug("Failed to open augmented syscalls BPF skeleton");
> --
> 2.44.0.rc0.258.g7320e95886-goog
>

2024-05-04 18:04:56

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v1] perf trace: Disable syscall augmentation with record

On Fri, Feb 16, 2024 at 02:37:38PM -0300, Arnaldo Carvalho de Melo wrote:
> On Fri, Feb 16, 2024 at 09:23:57AM -0800, Ian Rogers wrote:
> > Syscall augmentation is causing samples not to be written to the
> > perf.data file with "perf trace record". Disabling augmentation is
> > sub-optimal, but it beats having a totally broken perf trace record.
>
> Tested-by: Arnaldo Carvalho de Melo <[email protected]>

I thought Namhyung had collected this one, doing it now.

- Arnaldo

> 'perf trace record' needs to be more tested, we need to have a shell
> 'perf test' for it, maybe Michael has something on his test suite?
>
> Testing your patch:
>
> root@number:~# perf trace record ls
> anaconda-ks.cfg bin perf.all-number-20231219-104854.tar.bz2 perfconfig.off perf.data perf.data.baseline perf.data.old
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.050 MB perf.data (170 samples) ]
> root@number:~# perf evlist -v
> raw_syscalls:sys_enter: type: 2 (PERF_TYPE_TRACEPOINT), size: 136, config: 0x168 (raw_syscalls:sys_enter), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1
> raw_syscalls:sys_exit: type: 2 (PERF_TYPE_TRACEPOINT), size: 136, config: 0x167 (raw_syscalls:sys_exit), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1
> dummy:u: type: 1 (PERF_TYPE_SOFTWARE), size: 136, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|IDENTIFIER, read_format: ID|LOST, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1
> # Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events
> root@number:~# perf script | head -5
> ls 145954 [021] 64113.916047: raw_syscalls:sys_exit: NR 59 = 0
> ls 145954 [021] 64113.916065: raw_syscalls:sys_enter: NR 12 (0, 528, 0, 0, 0, 27)
> ls 145954 [021] 64113.916065: raw_syscalls:sys_exit: NR 12 = 94098335809536
> ls 145954 [021] 64113.916077: raw_syscalls:sys_enter: NR 158 (3001, 7fff9bc13790, 7f4bd6f4b4c0, 1, 3, 800)
> ls 145954 [021] 64113.916078: raw_syscalls:sys_exit: NR 158 = -22
> root@number:~# perf trace -i perf.data |& head
> ? ( ): ls/145954 ... [continued]: execve()) = 0
> 0.018 ( 0.000 ms): ls/145954 brk() = 0x5594f9e14000
> 0.030 ( 0.000 ms): ls/145954 arch_prctl(option: 0x3001, arg2: 0x7fff9bc13790) = -1 EINVAL (Invalid argument)
> 0.045 ( 0.002 ms): ls/145954 access(filename: 0xd6f5b2c0, mode: R) = -1 ENOENT (No such file or directory)
> 0.050 ( 0.003 ms): ls/145954 openat(dfd: CWD, filename: 0xd6f5a144, flags: RDONLY|CLOEXEC) = 3
> 0.053 ( 0.001 ms): ls/145954 newfstatat(dfd: 3, filename: 0xd6f5ace8, statbuf: 0x7fff9bc129a0, flag: 4096) = 0
> 0.055 ( 0.004 ms): ls/145954 mmap(len: 78087, prot: READ, flags: PRIVATE, fd: 3) = 0x7f4bd6f1c000
> 0.059 ( 0.000 ms): ls/145954 close(fd: 3) = 0
> 0.066 ( 0.003 ms): ls/145954 openat(dfd: CWD, filename: 0xd6f65fa0, flags: RDONLY|CLOEXEC) = 3
> 0.068 ( 0.001 ms): ls/145954 read(fd: 3, buf: 0x7fff9bc12b08, count: 832) = 832
> root@number:~#
>
> root@number:~# perf trace -v record ls
> Syscall augmentation fails with record, disabling augmentationUsing CPUID GenuineIntel-6-B7-1
> DEBUGINFOD_URLS=
> nr_cblocks: 0
> affinity: SYS
> mmap flush: 1
> comp level: 0
> mmap size 4198400B
> Control descriptor is not initialized
> mmap size 528384B
> anaconda-ks.cfg bin perf.all-number-20231219-104854.tar.bz2 perfconfig.off perf.data perf.data.baseline perf.data.old
> [ perf record: Woken up 1 times to write data ]
> failed to write feature CPU_PMU_CAPS
> [ perf record: Captured and wrote 0.050 MB perf.data (170 samples) ]
> root@number:~#
>
> There is a missing newline and we can avoid repeating a term, and I also
> added some extra explanation about "augmentation":
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 192721261098832e..3f77dd3eb70dc37a 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -4865,7 +4865,7 @@ int cmd_trace(int argc, const char **argv)
> goto skip_augmentation;
>
> if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) {
> - pr_debug("Syscall augmentation fails with record, disabling augmentation");
> + pr_debug("Syscall augmentation (reading pointer args using BPF) fails with record, disabling it.\n");
> goto skip_augmentation;
> }
>
> I'll see if I come up with the test and a fix that allows augmentation
> to work with 'perf trace record'.
>
> - Arnaldo
>
>
> > Closes: https://lore.kernel.org/lkml/CAP-5=fV9Gd1Teak+EOcUSxe13KqSyfZyPNagK97GbLiOQRgGaw@mail.gmail.com/
> > Signed-off-by: Ian Rogers <[email protected]>
> > ---
> > tools/perf/builtin-trace.c | 5 +++++
> > 1 file changed, 5 insertions(+)
> >
> > diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> > index 109b8e64fe69..192721261098 100644
> > --- a/tools/perf/builtin-trace.c
> > +++ b/tools/perf/builtin-trace.c
> > @@ -4864,6 +4864,11 @@ int cmd_trace(int argc, const char **argv)
> > if (!trace.trace_syscalls)
> > goto skip_augmentation;
> >
> > + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) {
> > + pr_debug("Syscall augmentation fails with record, disabling augmentation");
> > + goto skip_augmentation;
> > + }
> > +
> > trace.skel = augmented_raw_syscalls_bpf__open();
> > if (!trace.skel) {
> > pr_debug("Failed to open augmented syscalls BPF skeleton");
> > --
> > 2.44.0.rc0.258.g7320e95886-goog
> >