2023-10-19 08:27:55

by Thomas Richter

[permalink] [raw]
Subject: [PATCH] perf trace: Use the right bpf_probe_read(_str) variant for reading user data

Perf test case 111 Check open filename arg using perf trace + vfs_getname
fails on s390. This is caused by a failing function
bpf_probe_read() in file util/bpf_skel/augmented_raw_syscalls.bpf.c.

The root cause is the lookup by address. Function bpf_probe_read()
is used. This function works only for architectures
with ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE.

On s390 is not possible to determine from the address to which
address space the address belongs to (user or kernel space).

Replace bpf_probe_read() by bpf_probe_read_kernel()
and bpf_probe_read_str() by bpf_probe_read_user_str() to
explicity specify the address space the address refers to.

Output before:
# ./perf trace -eopen,openat -- touch /tmp/111
libbpf: prog 'sys_enter': BPF program load failed: Invalid argument
libbpf: prog 'sys_enter': -- BEGIN PROG LOAD LOG --
reg type unsupported for arg#0 function sys_enter#75
0: R1=ctx(off=0,imm=0) R10=fp0
; int sys_enter(struct syscall_enter_args *args)
0: (bf) r6 = r1 ; R1=ctx(off=0,imm=0) R6_w=ctx(off=0,imm=0)
; return bpf_get_current_pid_tgid();
1: (85) call bpf_get_current_pid_tgid#14 ; R0_w=scalar()
2: (63) *(u32 *)(r10 -8) = r0 ; R0_w=scalar() R10=fp0 fp-8=????mmmm
3: (bf) r2 = r10 ; R2_w=fp0 R10=fp0
;
.....
lines deleted here
.....
23: (bf) r3 = r6 ; R3_w=ctx(off=0,imm=0) R6=ctx(off=0,imm=0)
24: (85) call bpf_probe_read#4
unknown func bpf_probe_read#4
processed 23 insns (limit 1000000) max_states_per_insn 0 \
total_states 2 peak_states 2 mark_read 2
-- END PROG LOAD LOG --
libbpf: prog 'sys_enter': failed to load: -22
libbpf: failed to load object 'augmented_raw_syscalls_bpf'
libbpf: failed to load BPF skeleton 'augmented_raw_syscalls_bpf': -22
....

Output after:
# ./perf test -Fv 111
111: Check open filename arg using perf trace + vfs_getname :
--- start ---
1.085 ( 0.011 ms): touch/320753 openat(dfd: CWD, filename: \
"/tmp/temporary_file.SWH85", \
flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3
---- end ----
Check open filename arg using perf trace + vfs_getname: Ok
#

Test with the sleep command shows:
Output before:
# ./perf trace -e *sleep sleep 1.234567890
0.000 (1234.681 ms): sleep/63114 clock_nanosleep(rqtp: \
{ .tv_sec: 0, .tv_nsec: 0 }, rmtp: 0x3ffe0979720) = 0
#

Output after:
# ./perf trace -e *sleep sleep 1.234567890
0.000 (1234.686 ms): sleep/64277 clock_nanosleep(rqtp: \
{ .tv_sec: 1, .tv_nsec: 234567890 }, rmtp: 0x3fff3df9ea0) = 0
#

Fixes: 14e4b9f4289a ("perf trace: Raw augmented syscalls fix libbpf 1.0+ compatibility")
Signed-off-by: Thomas Richter <[email protected]>
Co-developed-by: Arnaldo Carvalho de Melo <[email protected]>
Acked-by: Ilya Leoshkevich <[email protected]>
Cc: Ian Rogers <[email protected]>
---
.../util/bpf_skel/augmented_raw_syscalls.bpf.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 939ec769bf4a..52c270330ae0 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -153,7 +153,7 @@ static inline
unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
{
unsigned int augmented_len = sizeof(*augmented_arg);
- int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg);
+ int string_len = bpf_probe_read_user_str(&augmented_arg->value, arg_len, arg);

augmented_arg->size = augmented_arg->err = 0;
/*
@@ -203,7 +203,7 @@ int sys_enter_connect(struct syscall_enter_args *args)
_Static_assert(is_power_of_2(sizeof(augmented_args->saddr)), "sizeof(augmented_args->saddr) needs to be a power of two");
socklen &= sizeof(augmented_args->saddr) - 1;

- bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
+ bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);

return augmented__output(args, augmented_args, len + socklen);
}
@@ -221,7 +221,7 @@ int sys_enter_sendto(struct syscall_enter_args *args)

socklen &= sizeof(augmented_args->saddr) - 1;

- bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
+ bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);

return augmented__output(args, augmented_args, len + socklen);
}
@@ -311,7 +311,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
if (augmented_args == NULL)
goto failure;

- if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0)
+ if (bpf_probe_read_user(&augmented_args->__data, sizeof(*attr), attr) < 0)
goto failure;

attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
@@ -325,7 +325,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
goto failure;

// Now that we read attr->size and tested it against the size limits, read it completely
- if (bpf_probe_read(&augmented_args->__data, size, attr) < 0)
+ if (bpf_probe_read_user(&augmented_args->__data, size, attr) < 0)
goto failure;

return augmented__output(args, augmented_args, len + size);
@@ -347,7 +347,7 @@ int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
if (size > sizeof(augmented_args->__data))
goto failure;

- bpf_probe_read(&augmented_args->__data, size, rqtp_arg);
+ bpf_probe_read_user(&augmented_args->__data, size, rqtp_arg);

return augmented__output(args, augmented_args, len + size);
failure:
@@ -385,7 +385,7 @@ int sys_enter(struct syscall_enter_args *args)
if (augmented_args == NULL)
return 1;

- bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
+ bpf_probe_read_kernel(&augmented_args->args, sizeof(augmented_args->args), args);

/*
* Jump to syscall specific augmenter, even if the default one,
@@ -406,7 +406,7 @@ int sys_exit(struct syscall_exit_args *args)
if (pid_filter__has(&pids_filtered, getpid()))
return 0;

- bpf_probe_read(&exit_args, sizeof(exit_args), args);
+ bpf_probe_read_kernel(&exit_args, sizeof(exit_args), args);
/*
* Jump to syscall specific return augmenter, even if the default one,
* "!raw_syscalls:unaugmented" that will just return 1 to return the
--
2.41.0


2023-10-19 19:35:12

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH] perf trace: Use the right bpf_probe_read(_str) variant for reading user data

Em Thu, Oct 19, 2023 at 10:26:42AM +0200, Thomas Richter escreveu:
> Perf test case 111 Check open filename arg using perf trace + vfs_getname
> fails on s390. This is caused by a failing function
> bpf_probe_read() in file util/bpf_skel/augmented_raw_syscalls.bpf.c.
>
> The root cause is the lookup by address. Function bpf_probe_read()
> is used. This function works only for architectures
> with ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE.
>
> On s390 is not possible to determine from the address to which
> address space the address belongs to (user or kernel space).
>
> Replace bpf_probe_read() by bpf_probe_read_kernel()
> and bpf_probe_read_str() by bpf_probe_read_user_str() to
> explicity specify the address space the address refers to.

Thanks,

Tested-by: Arnaldo Carvalho de Melo <[email protected]>

- Arnaldo

> Output before:
> # ./perf trace -eopen,openat -- touch /tmp/111
> libbpf: prog 'sys_enter': BPF program load failed: Invalid argument
> libbpf: prog 'sys_enter': -- BEGIN PROG LOAD LOG --
> reg type unsupported for arg#0 function sys_enter#75
> 0: R1=ctx(off=0,imm=0) R10=fp0
> ; int sys_enter(struct syscall_enter_args *args)
> 0: (bf) r6 = r1 ; R1=ctx(off=0,imm=0) R6_w=ctx(off=0,imm=0)
> ; return bpf_get_current_pid_tgid();
> 1: (85) call bpf_get_current_pid_tgid#14 ; R0_w=scalar()
> 2: (63) *(u32 *)(r10 -8) = r0 ; R0_w=scalar() R10=fp0 fp-8=????mmmm
> 3: (bf) r2 = r10 ; R2_w=fp0 R10=fp0
> ;
> .....
> lines deleted here
> .....
> 23: (bf) r3 = r6 ; R3_w=ctx(off=0,imm=0) R6=ctx(off=0,imm=0)
> 24: (85) call bpf_probe_read#4
> unknown func bpf_probe_read#4
> processed 23 insns (limit 1000000) max_states_per_insn 0 \
> total_states 2 peak_states 2 mark_read 2
> -- END PROG LOAD LOG --
> libbpf: prog 'sys_enter': failed to load: -22
> libbpf: failed to load object 'augmented_raw_syscalls_bpf'
> libbpf: failed to load BPF skeleton 'augmented_raw_syscalls_bpf': -22
> ....
>
> Output after:
> # ./perf test -Fv 111
> 111: Check open filename arg using perf trace + vfs_getname :
> --- start ---
> 1.085 ( 0.011 ms): touch/320753 openat(dfd: CWD, filename: \
> "/tmp/temporary_file.SWH85", \
> flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3
> ---- end ----
> Check open filename arg using perf trace + vfs_getname: Ok
> #
>
> Test with the sleep command shows:
> Output before:
> # ./perf trace -e *sleep sleep 1.234567890
> 0.000 (1234.681 ms): sleep/63114 clock_nanosleep(rqtp: \
> { .tv_sec: 0, .tv_nsec: 0 }, rmtp: 0x3ffe0979720) = 0
> #
>
> Output after:
> # ./perf trace -e *sleep sleep 1.234567890
> 0.000 (1234.686 ms): sleep/64277 clock_nanosleep(rqtp: \
> { .tv_sec: 1, .tv_nsec: 234567890 }, rmtp: 0x3fff3df9ea0) = 0
> #
>
> Fixes: 14e4b9f4289a ("perf trace: Raw augmented syscalls fix libbpf 1.0+ compatibility")
> Signed-off-by: Thomas Richter <[email protected]>
> Co-developed-by: Arnaldo Carvalho de Melo <[email protected]>
> Acked-by: Ilya Leoshkevich <[email protected]>
> Cc: Ian Rogers <[email protected]>
> ---
> .../util/bpf_skel/augmented_raw_syscalls.bpf.c | 16 ++++++++--------
> 1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> index 939ec769bf4a..52c270330ae0 100644
> --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> @@ -153,7 +153,7 @@ static inline
> unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
> {
> unsigned int augmented_len = sizeof(*augmented_arg);
> - int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg);
> + int string_len = bpf_probe_read_user_str(&augmented_arg->value, arg_len, arg);
>
> augmented_arg->size = augmented_arg->err = 0;
> /*
> @@ -203,7 +203,7 @@ int sys_enter_connect(struct syscall_enter_args *args)
> _Static_assert(is_power_of_2(sizeof(augmented_args->saddr)), "sizeof(augmented_args->saddr) needs to be a power of two");
> socklen &= sizeof(augmented_args->saddr) - 1;
>
> - bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
> + bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);
>
> return augmented__output(args, augmented_args, len + socklen);
> }
> @@ -221,7 +221,7 @@ int sys_enter_sendto(struct syscall_enter_args *args)
>
> socklen &= sizeof(augmented_args->saddr) - 1;
>
> - bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
> + bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);
>
> return augmented__output(args, augmented_args, len + socklen);
> }
> @@ -311,7 +311,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
> if (augmented_args == NULL)
> goto failure;
>
> - if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0)
> + if (bpf_probe_read_user(&augmented_args->__data, sizeof(*attr), attr) < 0)
> goto failure;
>
> attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
> @@ -325,7 +325,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
> goto failure;
>
> // Now that we read attr->size and tested it against the size limits, read it completely
> - if (bpf_probe_read(&augmented_args->__data, size, attr) < 0)
> + if (bpf_probe_read_user(&augmented_args->__data, size, attr) < 0)
> goto failure;
>
> return augmented__output(args, augmented_args, len + size);
> @@ -347,7 +347,7 @@ int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
> if (size > sizeof(augmented_args->__data))
> goto failure;
>
> - bpf_probe_read(&augmented_args->__data, size, rqtp_arg);
> + bpf_probe_read_user(&augmented_args->__data, size, rqtp_arg);
>
> return augmented__output(args, augmented_args, len + size);
> failure:
> @@ -385,7 +385,7 @@ int sys_enter(struct syscall_enter_args *args)
> if (augmented_args == NULL)
> return 1;
>
> - bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
> + bpf_probe_read_kernel(&augmented_args->args, sizeof(augmented_args->args), args);
>
> /*
> * Jump to syscall specific augmenter, even if the default one,
> @@ -406,7 +406,7 @@ int sys_exit(struct syscall_exit_args *args)
> if (pid_filter__has(&pids_filtered, getpid()))
> return 0;
>
> - bpf_probe_read(&exit_args, sizeof(exit_args), args);
> + bpf_probe_read_kernel(&exit_args, sizeof(exit_args), args);
> /*
> * Jump to syscall specific return augmenter, even if the default one,
> * "!raw_syscalls:unaugmented" that will just return 1 to return the
> --
> 2.41.0
>

--

- Arnaldo

2023-10-20 21:21:56

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH] perf trace: Use the right bpf_probe_read(_str) variant for reading user data

On Thu, 19 Oct 2023 10:26:42 +0200, Thomas Richter wrote:
> Perf test case 111 Check open filename arg using perf trace + vfs_getname
> fails on s390. This is caused by a failing function
> bpf_probe_read() in file util/bpf_skel/augmented_raw_syscalls.bpf.c.
>
> The root cause is the lookup by address. Function bpf_probe_read()
> is used. This function works only for architectures
> with ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE.
>
> [...]

Applied to perf-tools-next, thanks!