2023-10-27 12:09:18

by Naresh Kamboju

[permalink] [raw]
Subject: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

Following kernel crash noticed on x86_64 while running selftests: user_events:
ftrace_test running 6.6.0-rc7-next-20231026.

Reported-by: Linux Kernel Functional Testing <[email protected]>

kselftest: Running tests in user_events
TAP version 13
1..4
# timeout set to 90
# selftests: user_events: ftrace_test
[ 2391.606817] general protection fault, probably for non-canonical
address 0x6b6b6b6b6b6b8a83: 0000 [#1] PREEMPT SMP PTI
[ 2391.617519] CPU: 1 PID: 34662 Comm: ftrace_test Not tainted
6.6.0-rc7-next-20231026 #1
[ 2391.625428] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
2.7 12/07/2021
[ 2391.632811] RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)
[ 2391.637952] Code: 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00
55 31 f6 48 89 e5 41 55 41 54 53 48 89 fb 48 c7 c7 40 8c 61 94 e8 92
d3 5a 01 <44> 0f b6 a3 18 1f 00 00 41 80 fc 01 0f 87 c8 dc 4e 01 45 31
ed 41
All code
========
0: 90 nop
1: 90 nop
2: 90 nop
3: 90 nop
4: 90 nop
5: 90 nop
6: 90 nop
7: 90 nop
8: 90 nop
9: 90 nop
a: 90 nop
b: 90 nop
c: 66 0f 1f 00 nopw (%rax)
10: 55 push %rbp
11: 31 f6 xor %esi,%esi
13: 48 89 e5 mov %rsp,%rbp
16: 41 55 push %r13
18: 41 54 push %r12
1a: 53 push %rbx
1b: 48 89 fb mov %rdi,%rbx
1e: 48 c7 c7 40 8c 61 94 mov $0xffffffff94618c40,%rdi
25: e8 92 d3 5a 01 callq 0x15ad3bc
2a:* 44 0f b6 a3 18 1f 00 movzbl 0x1f18(%rbx),%r12d <-- trapping instruction
31: 00
32: 41 80 fc 01 cmp $0x1,%r12b
36: 0f 87 c8 dc 4e 01 ja 0x14edd04
3c: 45 31 ed xor %r13d,%r13d
3f: 41 rex.B

Code starting with the faulting instruction
===========================================
0: 44 0f b6 a3 18 1f 00 movzbl 0x1f18(%rbx),%r12d
7: 00
8: 41 80 fc 01 cmp $0x1,%r12b
c: 0f 87 c8 dc 4e 01 ja 0x14edcda
12: 45 31 ed xor %r13d,%r13d
15: 41 rex.B
[ 2391.656696] RSP: 0018:ffffb36e0a477d80 EFLAGS: 00010246
[ 2391.661937] RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b6b RCX: 0000000080000000
[ 2391.669064] RDX: 0000000000000000 RSI: ffffffff9299b722 RDI: ffffffff9299b722
[ 2391.676195] RBP: ffffb36e0a477d98 R08: 000000000000002f R09: 0000000000000002
[ 2391.683321] R10: ffffb36e0a477d70 R11: 0000000000000000 R12: 0000000000000002
[ 2391.690453] R13: ffffb36e0a477e88 R14: ffff99c5803a2230 R15: ffff99c581c39000
[ 2391.697586] FS: 00007fb4b9681740(0000) GS:ffff99c6efa80000(0000)
knlGS:0000000000000000
[ 2391.705670] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2391.711410] CR2: 00007fb4b96ab5e0 CR3: 000000010635c002 CR4: 00000000003706f0
[ 2391.718540] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 2391.725665] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 2391.732797] Call Trace:
[ 2391.735240] <TASK>
[ 2391.737339] ? show_regs (arch/x86/kernel/dumpstack.c:479)
[ 2391.740744] ? die_addr (arch/x86/kernel/dumpstack.c:421
arch/x86/kernel/dumpstack.c:460)
[ 2391.744056] ? exc_general_protection (arch/x86/kernel/traps.c:697
arch/x86/kernel/traps.c:642)
[ 2391.748766] ? asm_exc_general_protection
(arch/x86/include/asm/idtentry.h:564)
[ 2391.753652] ? __mutex_lock (kernel/locking/mutex.c:613
(discriminator 3) kernel/locking/mutex.c:747 (discriminator 3))
[ 2391.757487] ? __mutex_lock (kernel/locking/mutex.c:613
(discriminator 3) kernel/locking/mutex.c:747 (discriminator 3))
[ 2391.761318] ? tracing_update_buffers (kernel/trace/trace.c:6470)
[ 2391.765851] event_enable_write (kernel/trace/trace_events.c:1408)
[ 2391.769976] vfs_write (fs/read_write.c:582)
[ 2391.773296] ? close_fd_get_file (fs/file.c:821)
[ 2391.777396] ? preempt_count_sub (kernel/sched/core.c:5857
kernel/sched/core.c:5853 kernel/sched/core.c:5875)
[ 2391.781496] ksys_write (fs/read_write.c:638)
[ 2391.784918] __x64_sys_write (fs/read_write.c:646)
[ 2391.788671] do_syscall_64 (arch/x86/entry/common.c:51
arch/x86/entry/common.c:82)
[ 2391.792248] ? do_syscall_64 (arch/x86/entry/common.c:101)
[ 2391.795995] ? syscall_exit_to_user_mode (kernel/entry/common.c:299)
[ 2391.800785] ? do_syscall_64 (arch/x86/entry/common.c:101)
[ 2391.804529] ? do_syscall_64 (arch/x86/entry/common.c:101)
[ 2391.808275] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129)
[ 2391.813327] RIP: 0033:0x7fb4b977c140
[ 2391.816920] Code: 40 00 48 8b 15 c1 9c 0d 00 f7 d8 64 89 02 48 c7
c0 ff ff ff ff eb b7 0f 1f 00 80 3d a1 24 0e 00 00 74 17 b8 01 00 00
00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28
48 89
All code
========
0: 40 00 48 8b add %cl,-0x75(%rax)
4: 15 c1 9c 0d 00 adc $0xd9cc1,%eax
9: f7 d8 neg %eax
b: 64 89 02 mov %eax,%fs:(%rdx)
e: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
15: eb b7 jmp 0xffffffffffffffce
17: 0f 1f 00 nopl (%rax)
1a: 80 3d a1 24 0e 00 00 cmpb $0x0,0xe24a1(%rip) # 0xe24c2
21: 74 17 je 0x3a
23: b8 01 00 00 00 mov $0x1,%eax
28: 0f 05 syscall
2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <--
trapping instruction
30: 77 58 ja 0x8a
32: c3 retq
33: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
3a: 48 83 ec 28 sub $0x28,%rsp
3e: 48 rex.W
3f: 89 .byte 0x89

Code starting with the faulting instruction
===========================================
0: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
6: 77 58 ja 0x60
8: c3 retq
9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
10: 48 83 ec 28 sub $0x28,%rsp
14: 48 rex.W
15: 89 .byte 0x89
[ 2391.835660] RSP: 002b:00007ffc43b05b38 EFLAGS: 00000202 ORIG_RAX:
0000000000000001
[ 2391.843225] RAX: ffffffffffffffda RBX: 00007ffc43b05d88 RCX: 00007fb4b977c140
[ 2391.850350] RDX: 0000000000000002 RSI: 000056376b59b7d4 RDI: 0000000000000007
[ 2391.857482] RBP: 00007ffc43b05b60 R08: 0000000000000000 R09: 00007fb4b9681740
[ 2391.864615] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000
[ 2391.871747] R13: 00007ffc43b05d98 R14: 000056376b59ddc8 R15: 00007fb4b9981020
[ 2391.878907] </TASK>
[ 2391.881106] Modules linked in: x86_pkg_temp_thermal fuse configfs
[ 2391.887288] ---[ end trace 0000000000000000 ]---
[ 2391.891915] RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)
[ 2391.897231] Code: 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00
55 31 f6 48 89 e5 41 55 41 54 53 48 89 fb 48 c7 c7 40 8c 61 94 e8 92
d3 5a 01 <44> 0f b6 a3 18 1f 00 00 41 80 fc 01 0f 87 c8 dc 4e 01 45 31
ed 41
All code
========
0: 90 nop
1: 90 nop
2: 90 nop
3: 90 nop
4: 90 nop
5: 90 nop
6: 90 nop
7: 90 nop
8: 90 nop
9: 90 nop
a: 90 nop
b: 90 nop
c: 66 0f 1f 00 nopw (%rax)
10: 55 push %rbp
11: 31 f6 xor %esi,%esi
13: 48 89 e5 mov %rsp,%rbp
16: 41 55 push %r13
18: 41 54 push %r12
1a: 53 push %rbx
1b: 48 89 fb mov %rdi,%rbx
1e: 48 c7 c7 40 8c 61 94 mov $0xffffffff94618c40,%rdi
25: e8 92 d3 5a 01 callq 0x15ad3bc
2a:* 44 0f b6 a3 18 1f 00 movzbl 0x1f18(%rbx),%r12d <-- trapping instruction
31: 00
32: 41 80 fc 01 cmp $0x1,%r12b
36: 0f 87 c8 dc 4e 01 ja 0x14edd04
3c: 45 31 ed xor %r13d,%r13d
3f: 41 rex.B

Code starting with the faulting instruction
===========================================
0: 44 0f b6 a3 18 1f 00 movzbl 0x1f18(%rbx),%r12d
7: 00
8: 41 80 fc 01 cmp $0x1,%r12b
c: 0f 87 c8 dc 4e 01 ja 0x14edcda
12: 45 31 ed xor %r13d,%r13d
15: 41 rex.B
[ 2391.916120] RSP: 0018:ffffb36e0a477d80 EFLAGS: 00010246
[ 2391.921569] RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b6b RCX: 0000000080000000
[ 2391.928872] RDX: 0000000000000000 RSI: ffffffff9299b722 RDI: ffffffff9299b722
[ 2391.936237] RBP: ffffb36e0a477d98 R08: 000000000000002f R09: 0000000000000002
[ 2391.943388] R10: ffffb36e0a477d70 R11: 0000000000000000 R12: 0000000000000002
[ 2391.950527] R13: ffffb36e0a477e88 R14: ffff99c5803a2230 R15: ffff99c581c39000
[ 2391.957670] FS: 00007fb4b9681740(0000) GS:ffff99c6efa80000(0000)
knlGS:0000000000000000
[ 2391.965822] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2391.971579] CR2: 00007fb4b96ab5e0 CR3: 000000010635c002 CR4: 00000000003706f0
[ 2391.978721] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 2391.985879] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 2391.993028] Kernel panic - not syncing: Fatal exception
[ 2391.998287] Kernel Offset: 0x10000000 from 0xffffffff81000000
(relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 2392.009066] ---[ end Kernel panic - not syncing: Fatal exception ]---

Links:
- https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20231026/testrun/20823454/suite/log-parser-test/tests/
- https://lkft.validation.linaro.org/scheduler/job/6974179#L5053

metadata:
git_ref: master
git_repo: https://gitlab.com/Linaro/lkft/mirrors/next/linux-next
git_sha: 2ef7141596eed0b4b45ef18b3626f428a6b0a822
git_describe: next-20231026
kernel_version: 6.6.0-rc7
kernel-config:
https://storage.tuxsuite.com/public/linaro/lkft/builds/2XHt24sNSdog7DYY3FLKFZpZmjG/config
artifact-location:
https://storage.tuxsuite.com/public/linaro/lkft/builds/2XHt24sNSdog7DYY3FLKFZpZmjG/
toolchain: gcc-13


--
Linaro LKFT
https://lkft.linaro.org


2023-10-27 19:20:30

by Beau Belgrave

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Fri, Oct 27, 2023 at 05:38:41PM +0530, Naresh Kamboju wrote:
> Following kernel crash noticed on x86_64 while running selftests: user_events:
> ftrace_test running 6.6.0-rc7-next-20231026.
>
> Reported-by: Linux Kernel Functional Testing <[email protected]>
>
> kselftest: Running tests in user_events
> TAP version 13
> 1..4
> # timeout set to 90
> # selftests: user_events: ftrace_test
> [ 2391.606817] general protection fault, probably for non-canonical
> address 0x6b6b6b6b6b6b8a83: 0000 [#1] PREEMPT SMP PTI
> [ 2391.617519] CPU: 1 PID: 34662 Comm: ftrace_test Not tainted
> 6.6.0-rc7-next-20231026 #1
> [ 2391.625428] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
> 2.7 12/07/2021
> [ 2391.632811] RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)
> [ 2391.637952] Code: 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00
> 55 31 f6 48 89 e5 41 55 41 54 53 48 89 fb 48 c7 c7 40 8c 61 94 e8 92
> d3 5a 01 <44> 0f b6 a3 18 1f 00 00 41 80 fc 01 0f 87 c8 dc 4e 01 45 31
> ed 41
> All code
> ========
> 0: 90 nop
> 1: 90 nop
> 2: 90 nop
> 3: 90 nop
> 4: 90 nop
> 5: 90 nop
> 6: 90 nop
> 7: 90 nop
> 8: 90 nop
> 9: 90 nop
> a: 90 nop
> b: 90 nop
> c: 66 0f 1f 00 nopw (%rax)
> 10: 55 push %rbp
> 11: 31 f6 xor %esi,%esi
> 13: 48 89 e5 mov %rsp,%rbp
> 16: 41 55 push %r13
> 18: 41 54 push %r12
> 1a: 53 push %rbx
> 1b: 48 89 fb mov %rdi,%rbx
> 1e: 48 c7 c7 40 8c 61 94 mov $0xffffffff94618c40,%rdi
> 25: e8 92 d3 5a 01 callq 0x15ad3bc
> 2a:* 44 0f b6 a3 18 1f 00 movzbl 0x1f18(%rbx),%r12d <-- trapping instruction
> 31: 00
> 32: 41 80 fc 01 cmp $0x1,%r12b
> 36: 0f 87 c8 dc 4e 01 ja 0x14edd04
> 3c: 45 31 ed xor %r13d,%r13d
> 3f: 41 rex.B
>
> Code starting with the faulting instruction
> ===========================================
> 0: 44 0f b6 a3 18 1f 00 movzbl 0x1f18(%rbx),%r12d
> 7: 00
> 8: 41 80 fc 01 cmp $0x1,%r12b
> c: 0f 87 c8 dc 4e 01 ja 0x14edcda
> 12: 45 31 ed xor %r13d,%r13d
> 15: 41 rex.B
> [ 2391.656696] RSP: 0018:ffffb36e0a477d80 EFLAGS: 00010246
> [ 2391.661937] RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b6b RCX: 0000000080000000
> [ 2391.669064] RDX: 0000000000000000 RSI: ffffffff9299b722 RDI: ffffffff9299b722
> [ 2391.676195] RBP: ffffb36e0a477d98 R08: 000000000000002f R09: 0000000000000002
> [ 2391.683321] R10: ffffb36e0a477d70 R11: 0000000000000000 R12: 0000000000000002
> [ 2391.690453] R13: ffffb36e0a477e88 R14: ffff99c5803a2230 R15: ffff99c581c39000
> [ 2391.697586] FS: 00007fb4b9681740(0000) GS:ffff99c6efa80000(0000)
> knlGS:0000000000000000
> [ 2391.705670] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 2391.711410] CR2: 00007fb4b96ab5e0 CR3: 000000010635c002 CR4: 00000000003706f0
> [ 2391.718540] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 2391.725665] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 2391.732797] Call Trace:
> [ 2391.735240] <TASK>
> [ 2391.737339] ? show_regs (arch/x86/kernel/dumpstack.c:479)
> [ 2391.740744] ? die_addr (arch/x86/kernel/dumpstack.c:421
> arch/x86/kernel/dumpstack.c:460)
> [ 2391.744056] ? exc_general_protection (arch/x86/kernel/traps.c:697
> arch/x86/kernel/traps.c:642)
> [ 2391.748766] ? asm_exc_general_protection
> (arch/x86/include/asm/idtentry.h:564)
> [ 2391.753652] ? __mutex_lock (kernel/locking/mutex.c:613
> (discriminator 3) kernel/locking/mutex.c:747 (discriminator 3))
> [ 2391.757487] ? __mutex_lock (kernel/locking/mutex.c:613
> (discriminator 3) kernel/locking/mutex.c:747 (discriminator 3))
> [ 2391.761318] ? tracing_update_buffers (kernel/trace/trace.c:6470)
> [ 2391.765851] event_enable_write (kernel/trace/trace_events.c:1408)
> [ 2391.769976] vfs_write (fs/read_write.c:582)
> [ 2391.773296] ? close_fd_get_file (fs/file.c:821)
> [ 2391.777396] ? preempt_count_sub (kernel/sched/core.c:5857
> kernel/sched/core.c:5853 kernel/sched/core.c:5875)
> [ 2391.781496] ksys_write (fs/read_write.c:638)
> [ 2391.784918] __x64_sys_write (fs/read_write.c:646)
> [ 2391.788671] do_syscall_64 (arch/x86/entry/common.c:51
> arch/x86/entry/common.c:82)
> [ 2391.792248] ? do_syscall_64 (arch/x86/entry/common.c:101)
> [ 2391.795995] ? syscall_exit_to_user_mode (kernel/entry/common.c:299)
> [ 2391.800785] ? do_syscall_64 (arch/x86/entry/common.c:101)
> [ 2391.804529] ? do_syscall_64 (arch/x86/entry/common.c:101)
> [ 2391.808275] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129)
> [ 2391.813327] RIP: 0033:0x7fb4b977c140
> [ 2391.816920] Code: 40 00 48 8b 15 c1 9c 0d 00 f7 d8 64 89 02 48 c7
> c0 ff ff ff ff eb b7 0f 1f 00 80 3d a1 24 0e 00 00 74 17 b8 01 00 00
> 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28
> 48 89
> All code
> ========
> 0: 40 00 48 8b add %cl,-0x75(%rax)
> 4: 15 c1 9c 0d 00 adc $0xd9cc1,%eax
> 9: f7 d8 neg %eax
> b: 64 89 02 mov %eax,%fs:(%rdx)
> e: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
> 15: eb b7 jmp 0xffffffffffffffce
> 17: 0f 1f 00 nopl (%rax)
> 1a: 80 3d a1 24 0e 00 00 cmpb $0x0,0xe24a1(%rip) # 0xe24c2
> 21: 74 17 je 0x3a
> 23: b8 01 00 00 00 mov $0x1,%eax
> 28: 0f 05 syscall
> 2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <--
> trapping instruction
> 30: 77 58 ja 0x8a
> 32: c3 retq
> 33: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
> 3a: 48 83 ec 28 sub $0x28,%rsp
> 3e: 48 rex.W
> 3f: 89 .byte 0x89
>
> Code starting with the faulting instruction
> ===========================================
> 0: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
> 6: 77 58 ja 0x60
> 8: c3 retq
> 9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
> 10: 48 83 ec 28 sub $0x28,%rsp
> 14: 48 rex.W
> 15: 89 .byte 0x89
> [ 2391.835660] RSP: 002b:00007ffc43b05b38 EFLAGS: 00000202 ORIG_RAX:
> 0000000000000001
> [ 2391.843225] RAX: ffffffffffffffda RBX: 00007ffc43b05d88 RCX: 00007fb4b977c140
> [ 2391.850350] RDX: 0000000000000002 RSI: 000056376b59b7d4 RDI: 0000000000000007
> [ 2391.857482] RBP: 00007ffc43b05b60 R08: 0000000000000000 R09: 00007fb4b9681740
> [ 2391.864615] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000
> [ 2391.871747] R13: 00007ffc43b05d98 R14: 000056376b59ddc8 R15: 00007fb4b9981020
> [ 2391.878907] </TASK>
> [ 2391.881106] Modules linked in: x86_pkg_temp_thermal fuse configfs
> [ 2391.887288] ---[ end trace 0000000000000000 ]---
> [ 2391.891915] RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)
> [ 2391.897231] Code: 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00
> 55 31 f6 48 89 e5 41 55 41 54 53 48 89 fb 48 c7 c7 40 8c 61 94 e8 92
> d3 5a 01 <44> 0f b6 a3 18 1f 00 00 41 80 fc 01 0f 87 c8 dc 4e 01 45 31
> ed 41
> All code
> ========
> 0: 90 nop
> 1: 90 nop
> 2: 90 nop
> 3: 90 nop
> 4: 90 nop
> 5: 90 nop
> 6: 90 nop
> 7: 90 nop
> 8: 90 nop
> 9: 90 nop
> a: 90 nop
> b: 90 nop
> c: 66 0f 1f 00 nopw (%rax)
> 10: 55 push %rbp
> 11: 31 f6 xor %esi,%esi
> 13: 48 89 e5 mov %rsp,%rbp
> 16: 41 55 push %r13
> 18: 41 54 push %r12
> 1a: 53 push %rbx
> 1b: 48 89 fb mov %rdi,%rbx
> 1e: 48 c7 c7 40 8c 61 94 mov $0xffffffff94618c40,%rdi
> 25: e8 92 d3 5a 01 callq 0x15ad3bc
> 2a:* 44 0f b6 a3 18 1f 00 movzbl 0x1f18(%rbx),%r12d <-- trapping instruction
> 31: 00
> 32: 41 80 fc 01 cmp $0x1,%r12b
> 36: 0f 87 c8 dc 4e 01 ja 0x14edd04
> 3c: 45 31 ed xor %r13d,%r13d
> 3f: 41 rex.B
>
> Code starting with the faulting instruction
> ===========================================
> 0: 44 0f b6 a3 18 1f 00 movzbl 0x1f18(%rbx),%r12d
> 7: 00
> 8: 41 80 fc 01 cmp $0x1,%r12b
> c: 0f 87 c8 dc 4e 01 ja 0x14edcda
> 12: 45 31 ed xor %r13d,%r13d
> 15: 41 rex.B
> [ 2391.916120] RSP: 0018:ffffb36e0a477d80 EFLAGS: 00010246
> [ 2391.921569] RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b6b RCX: 0000000080000000
> [ 2391.928872] RDX: 0000000000000000 RSI: ffffffff9299b722 RDI: ffffffff9299b722
> [ 2391.936237] RBP: ffffb36e0a477d98 R08: 000000000000002f R09: 0000000000000002
> [ 2391.943388] R10: ffffb36e0a477d70 R11: 0000000000000000 R12: 0000000000000002
> [ 2391.950527] R13: ffffb36e0a477e88 R14: ffff99c5803a2230 R15: ffff99c581c39000
> [ 2391.957670] FS: 00007fb4b9681740(0000) GS:ffff99c6efa80000(0000)
> knlGS:0000000000000000
> [ 2391.965822] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 2391.971579] CR2: 00007fb4b96ab5e0 CR3: 000000010635c002 CR4: 00000000003706f0
> [ 2391.978721] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 2391.985879] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 2391.993028] Kernel panic - not syncing: Fatal exception
> [ 2391.998287] Kernel Offset: 0x10000000 from 0xffffffff81000000
> (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
> [ 2392.009066] ---[ end Kernel panic - not syncing: Fatal exception ]---
>

It's hard to repro this, it does repro after running several times for
me.

Steven, this is firing when the self test writes 1 to the enable file
for a user_event created event.

Sometimes I get this splat instead of a panic:
[ 956.819778] ------------[ cut here ]------------
[ 956.820526] WARNING: CPU: 12 PID: 914 at kernel/trace/trace.c:516 tracing_release_file_tr+0x46/0x50
[ 956.821389] Modules linked in:
[ 956.821708] CPU: 12 PID: 914 Comm: ftrace_test Not tainted 6.6.0-rc7-next-20231026 #1
[ 956.822501] RIP: 0010:tracing_release_file_tr+0x46/0x50
[ 956.822902] Code: d1 03 01 8b 83 c0 1e 00 00 85 c0 74 1d 83 e8 01 48 c7 c7 80 5b ef 84 89 83 c0 1e 00 00 e8 b2 b5 03 01 31 c0 5b e9 75 ee 27 01 <0f> 0b eb df 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90
[ 956.824261] RSP: 0018:ffffb59d879e3ee8 EFLAGS: 00010246
[ 956.824656] RAX: 0000000000000000 RBX: ffff9e386a82c058 RCX: 0000000000000000
[ 956.825239] RDX: 0000000000000000 RSI: ffffffff84b6b383 RDI: 00000000ffffffff
[ 956.825835] RBP: ffff9e3844076200 R08: 0000000000000000 R09: 0000000000000000
[ 956.826425] R10: ffffb59d879e3ee8 R11: ffffffff8328526f R12: 00000000000f801f
[ 956.827359] R13: ffff9e3846134020 R14: ffff9e3843fd03a8 R15: 0000000000000000
[ 956.828732] FS: 00007eff4f818740(0000) GS:ffff9e3f26300000(0000) knlGS:0000000000000000
[ 956.829436] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 956.829968] CR2: 00007eff4f42a250 CR3: 00000007d0744004 CR4: 0000000000370eb0
[ 956.830591] Call Trace:
[ 956.830804] <TASK>
[ 956.831113] ? __warn+0x7f/0x160
[ 956.831469] ? tracing_release_file_tr+0x46/0x50
[ 956.831891] ? report_bug+0x1c3/0x1d0
[ 956.832224] ? handle_bug+0x3c/0x70
[ 956.832544] ? exc_invalid_op+0x14/0x70
[ 956.832861] ? asm_exc_invalid_op+0x16/0x20
[ 956.833190] ? tracing_release_file_tr+0x1f/0x50
[ 956.833727] ? tracing_release_file_tr+0x46/0x50
[ 956.834159] ? tracing_release_file_tr+0x1f/0x50
[ 956.834579] __fput+0xab/0x300
[ 956.834903] __x64_sys_close+0x38/0x80
[ 956.835235] do_syscall_64+0x41/0xf0
[ 956.835554] entry_SYSCALL_64_after_hwframe+0x6c/0x74
[ 956.835974] RIP: 0033:0x7eff4f515157
[ 956.836291] Code: ff e8 0d 16 02 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 83 b8 f7 ff
[ 956.837728] RSP: 002b:00007ffc021d8158 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
[ 956.838351] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007eff4f515157
[ 956.838975] RDX: 0000000000000002 RSI: 000055637921d7d4 RDI: 0000000000000005
[ 956.839586] RBP: 00007ffc021d8180 R08: 0000000000000000 R09: 00007eff4f818740
[ 956.840201] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc021d83b8
[ 956.840810] R13: 000055637921cae7 R14: 000055637921fc38 R15: 00007eff4f946040
[ 956.841562] </TASK>
[ 956.841787] irq event stamp: 881
[ 956.842145] hardirqs last enabled at (889): [<ffffffff831d7f49>] console_unlock+0x109/0x130
[ 956.843013] hardirqs last disabled at (896): [<ffffffff831d7f2e>] console_unlock+0xee/0x130
[ 956.843648] softirqs last enabled at (694): [<ffffffff8314ba2a>] irq_exit_rcu+0x8a/0xe0
[ 956.844276] softirqs last disabled at (689): [<ffffffff8314ba2a>] irq_exit_rcu+0x8a/0xe0
[ 956.844893] ---[ end trace 0000000000000000 ]---

Warning is from this code:
static void __trace_array_put(struct trace_array *this_tr)
{
WARN_ON(!this_tr->ref);
this_tr->ref--;
}

It seems like there might be a timing window or an incorrect call to
trace_array_put() somewhere. Do you think this is related to the eventfs
work?

Thanks,
-Beau

> Links:
> - https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20231026/testrun/20823454/suite/log-parser-test/tests/
> - https://lkft.validation.linaro.org/scheduler/job/6974179#L5053
>
> metadata:
> git_ref: master
> git_repo: https://gitlab.com/Linaro/lkft/mirrors/next/linux-next
> git_sha: 2ef7141596eed0b4b45ef18b3626f428a6b0a822
> git_describe: next-20231026
> kernel_version: 6.6.0-rc7
> kernel-config:
> https://storage.tuxsuite.com/public/linaro/lkft/builds/2XHt24sNSdog7DYY3FLKFZpZmjG/config
> artifact-location:
> https://storage.tuxsuite.com/public/linaro/lkft/builds/2XHt24sNSdog7DYY3FLKFZpZmjG/
> toolchain: gcc-13
>
>
> --
> Linaro LKFT
> https://lkft.linaro.org

2023-10-27 22:37:05

by Steven Rostedt

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Fri, 27 Oct 2023 12:20:11 -0700
Beau Belgrave <[email protected]> wrote:

> On Fri, Oct 27, 2023 at 05:38:41PM +0530, Naresh Kamboju wrote:
> > Following kernel crash noticed on x86_64 while running selftests: user_events:
> > ftrace_test running 6.6.0-rc7-next-20231026.
> >
> > Reported-by: Linux Kernel Functional Testing <[email protected]>
> >
> > kselftest: Running tests in user_events
> > TAP version 13
> > 1..4
> > # timeout set to 90
> > # selftests: user_events: ftrace_test
> > [ 2391.606817] general protection fault, probably for non-canonical
> > address 0x6b6b6b6b6b6b8a83: 0000 [#1] PREEMPT SMP PTI
> > [ 2391.617519] CPU: 1 PID: 34662 Comm: ftrace_test Not tainted
> > 6.6.0-rc7-next-20231026 #1
> > [ 2391.625428] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
> > 2.7 12/07/2021
> > [ 2391.632811] RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)
> > [ 2391.637952] Code: 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00
> > 55 31 f6 48 89 e5 41 55 41 54 53 48 89 fb 48 c7 c7 40 8c 61 94 e8 92
> > d3 5a 01 <44> 0f b6 a3 18 1f 00 00 41 80 fc 01 0f 87 c8 dc 4e 01 45 31



> Warning is from this code:
> static void __trace_array_put(struct trace_array *this_tr)
> {
> WARN_ON(!this_tr->ref);
> this_tr->ref--;
> }
>
> It seems like there might be a timing window or an incorrect call to
> trace_array_put() somewhere. Do you think this is related to the eventfs
> work?

No, I think this is was probably introduced by:

a1f157c7a3bb ("tracing: Expand all ring buffers individually")

Or possibly a mixture of the two changes? But anyway I think I need to look
at this one first.

-- Steve

2023-10-28 02:34:29

by Steven Rostedt

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Fri, 27 Oct 2023 18:36:40 -0400
Steven Rostedt <[email protected]> wrote:

> On Fri, 27 Oct 2023 12:20:11 -0700
> Beau Belgrave <[email protected]> wrote:
>
> > On Fri, Oct 27, 2023 at 05:38:41PM +0530, Naresh Kamboju wrote:
> > > Following kernel crash noticed on x86_64 while running selftests: user_events:
> > > ftrace_test running 6.6.0-rc7-next-20231026.
> > >
> > > Reported-by: Linux Kernel Functional Testing <[email protected]>
> > >
> > > kselftest: Running tests in user_events
> > > TAP version 13
> > > 1..4
> > > # timeout set to 90
> > > # selftests: user_events: ftrace_test
> > > [ 2391.606817] general protection fault, probably for non-canonical
> > > address 0x6b6b6b6b6b6b8a83: 0000 [#1] PREEMPT SMP PTI
> > > [ 2391.617519] CPU: 1 PID: 34662 Comm: ftrace_test Not tainted
> > > 6.6.0-rc7-next-20231026 #1
> > > [ 2391.625428] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
> > > 2.7 12/07/2021
> > > [ 2391.632811] RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)
> > > [ 2391.637952] Code: 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00
> > > 55 31 f6 48 89 e5 41 55 41 54 53 48 89 fb 48 c7 c7 40 8c 61 94 e8 92
> > > d3 5a 01 <44> 0f b6 a3 18 1f 00 00 41 80 fc 01 0f 87 c8 dc 4e 01 45 31
>
>
>
> > Warning is from this code:
> > static void __trace_array_put(struct trace_array *this_tr)
> > {
> > WARN_ON(!this_tr->ref);
> > this_tr->ref--;
> > }
> >
> > It seems like there might be a timing window or an incorrect call to
> > trace_array_put() somewhere. Do you think this is related to the eventfs
> > work?
>
> No, I think this is was probably introduced by:
>
> a1f157c7a3bb ("tracing: Expand all ring buffers individually")
>
> Or possibly a mixture of the two changes? But anyway I think I need to look
> at this one first.

Not sure if the bug Beau hit is the same as this one, but the one Beau
hit I think is fixed by this:

-- Steve

diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 4d2da7480e5f..4a54493b8419 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -234,6 +234,10 @@ create_file_dentry(struct eventfs_inode *ei, struct dentry **e_dentry,
bool invalidate = false;

mutex_lock(&eventfs_mutex);
+ if (ei->is_freed) {
+ mutex_unlock(&eventfs_mutex);
+ return NULL;
+ }
/* If the e_dentry already has a dentry, use it */
if (*e_dentry) {
/* lookup does not need to up the ref count */

2023-10-30 16:32:04

by Beau Belgrave

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Fri, Oct 27, 2023 at 10:33:44PM -0400, Steven Rostedt wrote:
> On Fri, 27 Oct 2023 18:36:40 -0400
> Steven Rostedt <[email protected]> wrote:
>
> > On Fri, 27 Oct 2023 12:20:11 -0700
> > Beau Belgrave <[email protected]> wrote:
> >
> > > On Fri, Oct 27, 2023 at 05:38:41PM +0530, Naresh Kamboju wrote:
> > > > Following kernel crash noticed on x86_64 while running selftests: user_events:
> > > > ftrace_test running 6.6.0-rc7-next-20231026.
> > > >
> > > > Reported-by: Linux Kernel Functional Testing <[email protected]>
> > > >
> > > > kselftest: Running tests in user_events
> > > > TAP version 13
> > > > 1..4
> > > > # timeout set to 90
> > > > # selftests: user_events: ftrace_test
> > > > [ 2391.606817] general protection fault, probably for non-canonical
> > > > address 0x6b6b6b6b6b6b8a83: 0000 [#1] PREEMPT SMP PTI
> > > > [ 2391.617519] CPU: 1 PID: 34662 Comm: ftrace_test Not tainted
> > > > 6.6.0-rc7-next-20231026 #1
> > > > [ 2391.625428] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
> > > > 2.7 12/07/2021
> > > > [ 2391.632811] RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)
> > > > [ 2391.637952] Code: 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00
> > > > 55 31 f6 48 89 e5 41 55 41 54 53 48 89 fb 48 c7 c7 40 8c 61 94 e8 92
> > > > d3 5a 01 <44> 0f b6 a3 18 1f 00 00 41 80 fc 01 0f 87 c8 dc 4e 01 45 31
> >
> >
> >
> > > Warning is from this code:
> > > static void __trace_array_put(struct trace_array *this_tr)
> > > {
> > > WARN_ON(!this_tr->ref);
> > > this_tr->ref--;
> > > }
> > >
> > > It seems like there might be a timing window or an incorrect call to
> > > trace_array_put() somewhere. Do you think this is related to the eventfs
> > > work?
> >
> > No, I think this is was probably introduced by:
> >
> > a1f157c7a3bb ("tracing: Expand all ring buffers individually")
> >
> > Or possibly a mixture of the two changes? But anyway I think I need to look
> > at this one first.
>

I applied both [1][2] patches, and I no longer get any panics. However,
I still get the splat about the trace_array_put when running
user_event's ftrace selftest:

[ 26.665931] ------------[ cut here ]------------
[ 26.666663] WARNING: CPU: 12 PID: 291 at kernel/trace/trace.c:516 tracing_release_file_tr+0x46/0x50
[ 26.667470] Modules linked in:
[ 26.667808] CPU: 12 PID: 291 Comm: ftrace_test Not tainted 6.6.0-rc7-next-20231026 #3
[ 26.668665] RIP: 0010:tracing_release_file_tr+0x46/0x50
[ 26.669093] Code: d1 03 01 8b 83 c0 1e 00 00 85 c0 74 1d 83 e8 01 48 c7 c7 80 5b ef bc 89 83 c0 1e 00 00 e8 f2 b5 03 01 31 c0 5b e9 75 ee 27 01 <0f> 0b eb df 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90
[ 26.670580] RSP: 0018:ffffb6ef858ffee8 EFLAGS: 00010246
[ 26.671128] RAX: 0000000000000000 RBX: ffff9d7ae2364058 RCX: 0000000000000000
[ 26.671793] RDX: 0000000000000000 RSI: ffffffffbcb6b38b RDI: 00000000ffffffff
[ 26.672444] RBP: ffff9d7ac3e72200 R08: 0000000000000000 R09: 0000000000000000
[ 26.673072] R10: ffffb6ef858ffee8 R11: ffffffffbb28526f R12: 00000000000f801f
[ 26.673705] R13: ffff9d7b661a2020 R14: ffff9d7ac6057728 R15: 0000000000000000
[ 26.674339] FS: 00007fa852fa6740(0000) GS:ffff9d81a6300000(0000) knlGS:0000000000000000
[ 26.674978] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 26.675506] CR2: 00007fa852c2a250 CR3: 0000000105d92001 CR4: 0000000000370eb0
[ 26.676142] Call Trace:
[ 26.676357] <TASK>
[ 26.676572] ? __warn+0x7f/0x160
[ 26.677092] ? tracing_release_file_tr+0x46/0x50
[ 26.677540] ? report_bug+0x1c3/0x1d0
[ 26.677871] ? handle_bug+0x3c/0x70
[ 26.678196] ? exc_invalid_op+0x14/0x70
[ 26.678520] ? asm_exc_invalid_op+0x16/0x20
[ 26.678845] ? tracing_release_file_tr+0x1f/0x50
[ 26.679268] ? tracing_release_file_tr+0x46/0x50
[ 26.679691] ? tracing_release_file_tr+0x1f/0x50
[ 26.680105] __fput+0xab/0x300
[ 26.680437] __x64_sys_close+0x38/0x80
[ 26.680757] do_syscall_64+0x41/0xf0
[ 26.681329] entry_SYSCALL_64_after_hwframe+0x6c/0x74
[ 26.681784] RIP: 0033:0x7fa852d15157
[ 26.682126] Code: ff e8 0d 16 02 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 83 b8 f7 ff
[ 26.684255] RSP: 002b:00007ffd226914f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
[ 26.684939] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa852d15157
[ 26.685602] RDX: 0000000000000002 RSI: 0000560ad54567d4 RDI: 0000000000000005
[ 26.686257] RBP: 00007ffd22691520 R08: 0000000000000000 R09: 00007fa852fa6740
[ 26.686877] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffd22691758
[ 26.687480] R13: 0000560ad5455ae7 R14: 0000560ad5458c38 R15: 00007fa8530d4040
[ 26.688107] </TASK>
[ 26.688315] irq event stamp: 1361
[ 26.688618] hardirqs last enabled at (1369): [<ffffffffbb1d7f49>] console_unlock+0x109/0x130
[ 26.689339] hardirqs last disabled at (1376): [<ffffffffbb1d7f2e>] console_unlock+0xee/0x130
[ 26.690062] softirqs last enabled at (1272): [<ffffffffbb14ba2a>] irq_exit_rcu+0x8a/0xe0
[ 26.690774] softirqs last disabled at (1267): [<ffffffffbb14ba2a>] irq_exit_rcu+0x8a/0xe0
[ 26.691620] ---[ end trace 0000000000000000 ]---

Thanks,
-Beau

1. https://lore.kernel.org/linux-trace-kernel/[email protected]/
2. https://lore.kernel.org/linux-trace-kernel/[email protected]/

> Not sure if the bug Beau hit is the same as this one, but the one Beau
> hit I think is fixed by this:
>
> -- Steve
>
> diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
> index 4d2da7480e5f..4a54493b8419 100644
> --- a/fs/tracefs/event_inode.c
> +++ b/fs/tracefs/event_inode.c
> @@ -234,6 +234,10 @@ create_file_dentry(struct eventfs_inode *ei, struct dentry **e_dentry,
> bool invalidate = false;
>
> mutex_lock(&eventfs_mutex);
> + if (ei->is_freed) {
> + mutex_unlock(&eventfs_mutex);
> + return NULL;
> + }
> /* If the e_dentry already has a dentry, use it */
> if (*e_dentry) {
> /* lookup does not need to up the ref count */

2023-10-30 16:42:36

by Steven Rostedt

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Mon, 30 Oct 2023 09:31:02 -0700
Beau Belgrave <[email protected]> wrote:

> I applied both [1][2] patches, and I no longer get any panics. However,

Great! Can I add "Tested-by" from you on those patches?

> I still get the splat about the trace_array_put when running
> user_event's ftrace selftest:
>
> [ 26.665931] ------------[ cut here ]------------
> [ 26.666663] WARNING: CPU: 12 PID: 291 at kernel/trace/trace.c:516 tracing_release_file_tr+0x46/0x50
> [ 26.667470] Modules linked in:
> [ 26.667808] CPU: 12 PID: 291 Comm: ftrace_test Not tainted 6.6.0-rc7-next-20231026 #3
> [ 26.668665] RIP: 0010:tracing_release_file_tr+0x46/0x50
> [ 26.669093] Code: d1 03 01 8b 83 c0 1e 00 00 85 c0 74 1d 83 e8 01 48 c7 c7 80 5b ef bc 89 83 c0 1e 00 00 e8 f2 b5 03 01 31 c0 5b e9 75 ee 27 01 <0f> 0b eb df 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90
> [ 26.670580] RSP: 0018:ffffb6ef858ffee8 EFLAGS: 00010246
> [ 26.671128] RAX: 0000000000000000 RBX: ffff9d7ae2364058 RCX: 0000000000000000
> [ 26.671793] RDX: 0000000000000000 RSI: ffffffffbcb6b38b RDI: 00000000ffffffff
> [ 26.672444] RBP: ffff9d7ac3e72200 R08: 0000000000000000 R09: 0000000000000000
> [ 26.673072] R10: ffffb6ef858ffee8 R11: ffffffffbb28526f R12: 00000000000f801f
> [ 26.673705] R13: ffff9d7b661a2020 R14: ffff9d7ac6057728 R15: 0000000000000000
> [ 26.674339] FS: 00007fa852fa6740(0000) GS:ffff9d81a6300000(0000) knlGS:0000000000000000
> [ 26.674978] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 26.675506] CR2: 00007fa852c2a250 CR3: 0000000105d92001 CR4: 0000000000370eb0
> [ 26.676142] Call Trace:
> [ 26.676357] <TASK>
> [ 26.676572] ? __warn+0x7f/0x160
> [ 26.677092] ? tracing_release_file_tr+0x46/0x50
> [ 26.677540] ? report_bug+0x1c3/0x1d0
> [ 26.677871] ? handle_bug+0x3c/0x70
> [ 26.678196] ? exc_invalid_op+0x14/0x70
> [ 26.678520] ? asm_exc_invalid_op+0x16/0x20
> [ 26.678845] ? tracing_release_file_tr+0x1f/0x50
> [ 26.679268] ? tracing_release_file_tr+0x46/0x50
> [ 26.679691] ? tracing_release_file_tr+0x1f/0x50
> [ 26.680105] __fput+0xab/0x300
> [ 26.680437] __x64_sys_close+0x38/0x80

Hmm, this doesn't tell me much. Let me go play with the user_event self
tests.

Thanks Beau!

-- Steve


> [ 26.680757] do_syscall_64+0x41/0xf0
> [ 26.681329] entry_SYSCALL_64_after_hwframe+0x6c/0x74
> [ 26.681784] RIP: 0033:0x7fa852d15157
> [ 26.682126] Code: ff e8 0d 16 02 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 83 b8 f7 ff
> [ 26.684255] RSP: 002b:00007ffd226914f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
> [ 26.684939] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa852d15157
> [ 26.685602] RDX: 0000000000000002 RSI: 0000560ad54567d4 RDI: 0000000000000005
> [ 26.686257] RBP: 00007ffd22691520 R08: 0000000000000000 R09: 00007fa852fa6740
> [ 26.686877] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffd22691758
> [ 26.687480] R13: 0000560ad5455ae7 R14: 0000560ad5458c38 R15: 00007fa8530d4040
> [ 26.688107] </TASK>
> [ 26.688315] irq event stamp: 1361
> [ 26.688618] hardirqs last enabled at (1369): [<ffffffffbb1d7f49>] console_unlock+0x109/0x130
> [ 26.689339] hardirqs last disabled at (1376): [<ffffffffbb1d7f2e>] console_unlock+0xee/0x130
> [ 26.690062] softirqs last enabled at (1272): [<ffffffffbb14ba2a>] irq_exit_rcu+0x8a/0xe0
> [ 26.690774] softirqs last disabled at (1267): [<ffffffffbb14ba2a>] irq_exit_rcu+0x8a/0xe0
> [ 26.691620] ---[ end trace 0000000000000000 ]---
>
> Thanks,
> -Beau
>
> 1. https://lore.kernel.org/linux-trace-kernel/[email protected]/
> 2. https://lore.kernel.org/linux-trace-kernel/[email protected]/

2023-10-30 16:50:26

by Beau Belgrave

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Mon, Oct 30, 2023 at 12:42:23PM -0400, Steven Rostedt wrote:
> On Mon, 30 Oct 2023 09:31:02 -0700
> Beau Belgrave <[email protected]> wrote:
>
> > I applied both [1][2] patches, and I no longer get any panics. However,
>
> Great! Can I add "Tested-by" from you on those patches?
>

Yep, please do.

> > I still get the splat about the trace_array_put when running
> > user_event's ftrace selftest:
> >
> > [ 26.665931] ------------[ cut here ]------------
> > [ 26.666663] WARNING: CPU: 12 PID: 291 at kernel/trace/trace.c:516 tracing_release_file_tr+0x46/0x50
> > [ 26.667470] Modules linked in:
> > [ 26.667808] CPU: 12 PID: 291 Comm: ftrace_test Not tainted 6.6.0-rc7-next-20231026 #3
> > [ 26.668665] RIP: 0010:tracing_release_file_tr+0x46/0x50
> > [ 26.669093] Code: d1 03 01 8b 83 c0 1e 00 00 85 c0 74 1d 83 e8 01 48 c7 c7 80 5b ef bc 89 83 c0 1e 00 00 e8 f2 b5 03 01 31 c0 5b e9 75 ee 27 01 <0f> 0b eb df 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90
> > [ 26.670580] RSP: 0018:ffffb6ef858ffee8 EFLAGS: 00010246
> > [ 26.671128] RAX: 0000000000000000 RBX: ffff9d7ae2364058 RCX: 0000000000000000
> > [ 26.671793] RDX: 0000000000000000 RSI: ffffffffbcb6b38b RDI: 00000000ffffffff
> > [ 26.672444] RBP: ffff9d7ac3e72200 R08: 0000000000000000 R09: 0000000000000000
> > [ 26.673072] R10: ffffb6ef858ffee8 R11: ffffffffbb28526f R12: 00000000000f801f
> > [ 26.673705] R13: ffff9d7b661a2020 R14: ffff9d7ac6057728 R15: 0000000000000000
> > [ 26.674339] FS: 00007fa852fa6740(0000) GS:ffff9d81a6300000(0000) knlGS:0000000000000000
> > [ 26.674978] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 26.675506] CR2: 00007fa852c2a250 CR3: 0000000105d92001 CR4: 0000000000370eb0
> > [ 26.676142] Call Trace:
> > [ 26.676357] <TASK>
> > [ 26.676572] ? __warn+0x7f/0x160
> > [ 26.677092] ? tracing_release_file_tr+0x46/0x50
> > [ 26.677540] ? report_bug+0x1c3/0x1d0
> > [ 26.677871] ? handle_bug+0x3c/0x70
> > [ 26.678196] ? exc_invalid_op+0x14/0x70
> > [ 26.678520] ? asm_exc_invalid_op+0x16/0x20
> > [ 26.678845] ? tracing_release_file_tr+0x1f/0x50
> > [ 26.679268] ? tracing_release_file_tr+0x46/0x50
> > [ 26.679691] ? tracing_release_file_tr+0x1f/0x50
> > [ 26.680105] __fput+0xab/0x300
> > [ 26.680437] __x64_sys_close+0x38/0x80
>
> Hmm, this doesn't tell me much. Let me go play with the user_event self
> tests.
>
> Thanks Beau!
>
> -- Steve

I get a 100% repro, so hopefully it's easy. Let me know if you can't or
if you need something tested.

Thanks,
-Beau

>
>
> > [ 26.680757] do_syscall_64+0x41/0xf0
> > [ 26.681329] entry_SYSCALL_64_after_hwframe+0x6c/0x74
> > [ 26.681784] RIP: 0033:0x7fa852d15157
> > [ 26.682126] Code: ff e8 0d 16 02 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 83 b8 f7 ff
> > [ 26.684255] RSP: 002b:00007ffd226914f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
> > [ 26.684939] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa852d15157
> > [ 26.685602] RDX: 0000000000000002 RSI: 0000560ad54567d4 RDI: 0000000000000005
> > [ 26.686257] RBP: 00007ffd22691520 R08: 0000000000000000 R09: 00007fa852fa6740
> > [ 26.686877] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffd22691758
> > [ 26.687480] R13: 0000560ad5455ae7 R14: 0000560ad5458c38 R15: 00007fa8530d4040
> > [ 26.688107] </TASK>
> > [ 26.688315] irq event stamp: 1361
> > [ 26.688618] hardirqs last enabled at (1369): [<ffffffffbb1d7f49>] console_unlock+0x109/0x130
> > [ 26.689339] hardirqs last disabled at (1376): [<ffffffffbb1d7f2e>] console_unlock+0xee/0x130
> > [ 26.690062] softirqs last enabled at (1272): [<ffffffffbb14ba2a>] irq_exit_rcu+0x8a/0xe0
> > [ 26.690774] softirqs last disabled at (1267): [<ffffffffbb14ba2a>] irq_exit_rcu+0x8a/0xe0
> > [ 26.691620] ---[ end trace 0000000000000000 ]---
> >
> > Thanks,
> > -Beau
> >
> > 1. https://lore.kernel.org/linux-trace-kernel/[email protected]/
> > 2. https://lore.kernel.org/linux-trace-kernel/[email protected]/

2023-10-30 21:32:31

by Steven Rostedt

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Mon, 30 Oct 2023 12:42:23 -0400
Steven Rostedt <[email protected]> wrote:

> > I still get the splat about the trace_array_put when running
> > user_event's ftrace selftest:
> >
> > [ 26.665931] ------------[ cut here ]------------
> > [ 26.666663] WARNING: CPU: 12 PID: 291 at kernel/trace/trace.c:516 tracing_release_file_tr+0x46/0x50
> > [ 26.667470] Modules linked in:
> > [ 26.667808] CPU: 12 PID: 291 Comm: ftrace_test Not tainted 6.6.0-rc7-next-20231026 #3
> > [ 26.668665] RIP: 0010:tracing_release_file_tr+0x46/0x50
> > [ 26.669093] Code: d1 03 01 8b 83 c0 1e 00 00 85 c0 74 1d 83 e8 01 48 c7 c7 80 5b ef bc 89 83 c0 1e 00 00 e8 f2 b5 03 01 31 c0 5b e9 75 ee 27 01 <0f> 0b eb df 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90
> > [ 26.670580] RSP: 0018:ffffb6ef858ffee8 EFLAGS: 00010246
> > [ 26.671128] RAX: 0000000000000000 RBX: ffff9d7ae2364058 RCX: 0000000000000000
> > [ 26.671793] RDX: 0000000000000000 RSI: ffffffffbcb6b38b RDI: 00000000ffffffff
> > [ 26.672444] RBP: ffff9d7ac3e72200 R08: 0000000000000000 R09: 0000000000000000
> > [ 26.673072] R10: ffffb6ef858ffee8 R11: ffffffffbb28526f R12: 00000000000f801f
> > [ 26.673705] R13: ffff9d7b661a2020 R14: ffff9d7ac6057728 R15: 0000000000000000
> > [ 26.674339] FS: 00007fa852fa6740(0000) GS:ffff9d81a6300000(0000) knlGS:0000000000000000
> > [ 26.674978] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 26.675506] CR2: 00007fa852c2a250 CR3: 0000000105d92001 CR4: 0000000000370eb0
> > [ 26.676142] Call Trace:
> > [ 26.676357] <TASK>
> > [ 26.676572] ? __warn+0x7f/0x160
> > [ 26.677092] ? tracing_release_file_tr+0x46/0x50
> > [ 26.677540] ? report_bug+0x1c3/0x1d0
> > [ 26.677871] ? handle_bug+0x3c/0x70
> > [ 26.678196] ? exc_invalid_op+0x14/0x70
> > [ 26.678520] ? asm_exc_invalid_op+0x16/0x20
> > [ 26.678845] ? tracing_release_file_tr+0x1f/0x50
> > [ 26.679268] ? tracing_release_file_tr+0x46/0x50
> > [ 26.679691] ? tracing_release_file_tr+0x1f/0x50
> > [ 26.680105] __fput+0xab/0x300
> > [ 26.680437] __x64_sys_close+0x38/0x80
>
> Hmm, this doesn't tell me much. Let me go play with the user_event self
> tests.

I added a bunch of printk()s and I'm thinking there's a race in user event
(or dynamic event) code.

I put a printk in the open, write and release call to record the filp and
the file in the open and this is what I hit:

[ 32.603954] open ffff8d05488bf000 file=ffff8d0484f7a688
[ 32.607026] write ffff8d05488bf000
[ 32.608829] update file = ffff8d0484f7a688
[ 32.610100] update tr = ffffffffb2bebda0
[ 32.622203] write ffff8d05488bf000
[ 32.623231] update file = ffff8d0484f7a688
[ 32.624397] update tr = ffffffffb2bebda0
[ 32.625975] call delayed destroy
[ 32.627241] open ffff8d048510fc00 file=ffff8d0484f7a688

Another open with a different filp, but has the same file pointer (which is
the meta data that matches the eventfs files, but not part of eventfs).

[ 32.628720] release ffff8d048510fc00

Only one release is called and then we call:

[ 32.630785] Remove event call ffff8d04809d7c58

The above "Remove event call" came from user_event_set_call_visible()

if (visible) {
printk("show event call %px\n", &user->call);
ret = trace_add_event_call(&user->call);
} else {
printk("Remove event call %px\n", &user->call);
ret = trace_remove_event_call(&user->call);
}

Where trace_remove_event_call() calls:

probe_remove_event_call() {
__trace_remove_event_call() {
event_remove() {
remove_event_from_tracers() {
remove_event_file_dir() {
remove_event_file_dir() {
kmem_cache_free(file_cachep, file);

That is, call->file is freed at this point. Now any access to the file
pointer is going to be garbage, which a write to enable will cause.

So I see it is freed without seeing a release called.

[ 32.632323] Removing __test_event

The above is the dentry being released (unlinked).

[ 32.633618] Removing enable
[ 32.634453] Removing user_events
[ 32.634948] write ffff8d05488bf000

Another write is happening to the user event file (it may have been
unlinked, but the release was never called, so it is still valid.

[ 32.636440] update file = ffff8d0484f7a688
[ 32.637685] update tr = dc64cc323d943921

The above is the file pointer that we freed, and you can see the file->tr
is now garbage.

[ 32.638827] general protection fault, probably for non-canonical address 0xdc64cc323d9457f9: 0000 [#1] PREEMPT SMP PTI
[ 32.641712] CPU: 4 PID: 911 Comm: ftrace_test Not tainted 6.6.0-rc4-test-00024-gd402dc722a1b-dirty #158
[ 32.644220] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[ 32.646717] RIP: 0010:tracing_update_buffers+0x19/0x50
[ 32.648133] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 55 31 f6 31 ed 53 48 89 fb 48 c7 c7 e0 bc be b2 e8 d7 b6 cb 00 <80> bb d8 1e 00 00 00 74 15 48 c7 c7 e0 bc be b2 e8 42 9f cb 00 89
[ 32.653125] RSP: 0018:ffffb7b34172be20 EFLAGS: 00010246
[ 32.654573] RAX: 0000000000000000 RBX: dc64cc323d943921 RCX: 0000000000000000
[ 32.656503] RDX: 0000000000000000 RSI: ffffffffb1e09128 RDI: ffffffffb1e09128
[ 32.658445] RBP: 0000000000000000 R08: 000000000000002f R09: 0000000000000000
[ 32.660378] R10: ffffb7b34172be20 R11: 0000000000000001 R12: ffffb7b34172bf08
[ 32.662322] R13: ffff8d0484f7a688 R14: ffffb7b34172bf08 R15: 0000000000000000
[ 32.664249] FS: 00007f1769a10740(0000) GS:ffff8d05f7c00000(0000) knlGS:0000000000000000
[ 32.666438] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 32.668009] CR2: 000055c0cd72fc70 CR3: 000000010e3c6003 CR4: 0000000000170ee0
[ 32.669954] Call Trace:
[ 32.670653] <TASK>
[ 32.671263] ? die_addr+0x36/0x90
[ 32.672203] ? exc_general_protection+0x236/0x4a0
[ 32.673521] ? asm_exc_general_protection+0x26/0x30
[ 32.674872] ? __mutex_lock+0x1c8/0xb40
[ 32.675942] ? __mutex_lock+0x1c8/0xb40
[ 32.677009] ? tracing_update_buffers+0x19/0x50
[ 32.678259] event_enable_write+0xb4/0x140
[ 32.679311] vfs_write+0xf2/0x530
[ 32.680172] ? find_held_lock+0x2b/0x80
[ 32.681159] ? _raw_spin_unlock+0x2d/0x50
[ 32.682711] ? rcu_is_watching+0x11/0x50
[ 32.684159] ? _raw_spin_unlock+0x2d/0x50
[ 32.685642] ? trace_preempt_on+0x78/0x80
[ 32.687027] ksys_write+0x75/0x100
[ 32.688257] do_syscall_64+0x3f/0xc0
[ 32.689561] entry_SYSCALL_64_after_hwframe+0x6e/0xd8
[ 32.691048] RIP: 0033:0x7f1769b0ab00

Note, I think I can simplify some of the code (but not fix this bug) by
letting the dput free the eventfs_inode as well (I think that is required).
But the above looks to be a bug in the implementation of user_events.

-- Steve

2023-10-31 00:27:34

by Beau Belgrave

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Mon, Oct 30, 2023 at 05:31:51PM -0400, Steven Rostedt wrote:
> On Mon, 30 Oct 2023 12:42:23 -0400
> Steven Rostedt <[email protected]> wrote:
>
> > > I still get the splat about the trace_array_put when running
> > > user_event's ftrace selftest:
> > >
> > > [ 26.665931] ------------[ cut here ]------------
> > > [ 26.666663] WARNING: CPU: 12 PID: 291 at kernel/trace/trace.c:516 tracing_release_file_tr+0x46/0x50
> > > [ 26.667470] Modules linked in:
> > > [ 26.667808] CPU: 12 PID: 291 Comm: ftrace_test Not tainted 6.6.0-rc7-next-20231026 #3
> > > [ 26.668665] RIP: 0010:tracing_release_file_tr+0x46/0x50
> > > [ 26.669093] Code: d1 03 01 8b 83 c0 1e 00 00 85 c0 74 1d 83 e8 01 48 c7 c7 80 5b ef bc 89 83 c0 1e 00 00 e8 f2 b5 03 01 31 c0 5b e9 75 ee 27 01 <0f> 0b eb df 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90
> > > [ 26.670580] RSP: 0018:ffffb6ef858ffee8 EFLAGS: 00010246
> > > [ 26.671128] RAX: 0000000000000000 RBX: ffff9d7ae2364058 RCX: 0000000000000000
> > > [ 26.671793] RDX: 0000000000000000 RSI: ffffffffbcb6b38b RDI: 00000000ffffffff
> > > [ 26.672444] RBP: ffff9d7ac3e72200 R08: 0000000000000000 R09: 0000000000000000
> > > [ 26.673072] R10: ffffb6ef858ffee8 R11: ffffffffbb28526f R12: 00000000000f801f
> > > [ 26.673705] R13: ffff9d7b661a2020 R14: ffff9d7ac6057728 R15: 0000000000000000
> > > [ 26.674339] FS: 00007fa852fa6740(0000) GS:ffff9d81a6300000(0000) knlGS:0000000000000000
> > > [ 26.674978] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > [ 26.675506] CR2: 00007fa852c2a250 CR3: 0000000105d92001 CR4: 0000000000370eb0
> > > [ 26.676142] Call Trace:
> > > [ 26.676357] <TASK>
> > > [ 26.676572] ? __warn+0x7f/0x160
> > > [ 26.677092] ? tracing_release_file_tr+0x46/0x50
> > > [ 26.677540] ? report_bug+0x1c3/0x1d0
> > > [ 26.677871] ? handle_bug+0x3c/0x70
> > > [ 26.678196] ? exc_invalid_op+0x14/0x70
> > > [ 26.678520] ? asm_exc_invalid_op+0x16/0x20
> > > [ 26.678845] ? tracing_release_file_tr+0x1f/0x50
> > > [ 26.679268] ? tracing_release_file_tr+0x46/0x50
> > > [ 26.679691] ? tracing_release_file_tr+0x1f/0x50
> > > [ 26.680105] __fput+0xab/0x300
> > > [ 26.680437] __x64_sys_close+0x38/0x80
> >
> > Hmm, this doesn't tell me much. Let me go play with the user_event self
> > tests.
>
> I added a bunch of printk()s and I'm thinking there's a race in user event
> (or dynamic event) code.
>

I did as well, however, I don't see how user events would be involved
other than allowing a trace_remove_event_call() with open enable fds?

I believe the scenario is open the enable file and keep the fd open.

While the fd is open to the enable file, call trace_remove_event_call().

If trace_remove_event_call() is called for an event with a tr->ref > 0,
should it fail or work? (It currently works without issue.)

Should writes to the fd still work after the event it is related to has
been removed?

I don't see how user_events could prevent this, it seems
trace_remove_event_call() should fail if files for it are still open?

Thanks,
-Beau

> I put a printk in the open, write and release call to record the filp and
> the file in the open and this is what I hit:
>
> [ 32.603954] open ffff8d05488bf000 file=ffff8d0484f7a688
> [ 32.607026] write ffff8d05488bf000
> [ 32.608829] update file = ffff8d0484f7a688
> [ 32.610100] update tr = ffffffffb2bebda0
> [ 32.622203] write ffff8d05488bf000
> [ 32.623231] update file = ffff8d0484f7a688
> [ 32.624397] update tr = ffffffffb2bebda0
> [ 32.625975] call delayed destroy
> [ 32.627241] open ffff8d048510fc00 file=ffff8d0484f7a688
>
> Another open with a different filp, but has the same file pointer (which is
> the meta data that matches the eventfs files, but not part of eventfs).
>
> [ 32.628720] release ffff8d048510fc00
>
> Only one release is called and then we call:
>
> [ 32.630785] Remove event call ffff8d04809d7c58
>
> The above "Remove event call" came from user_event_set_call_visible()
>
> if (visible) {
> printk("show event call %px\n", &user->call);
> ret = trace_add_event_call(&user->call);
> } else {
> printk("Remove event call %px\n", &user->call);
> ret = trace_remove_event_call(&user->call);
> }
>
> Where trace_remove_event_call() calls:
>
> probe_remove_event_call() {
> __trace_remove_event_call() {
> event_remove() {
> remove_event_from_tracers() {
> remove_event_file_dir() {
> remove_event_file_dir() {
> kmem_cache_free(file_cachep, file);
>
> That is, call->file is freed at this point. Now any access to the file
> pointer is going to be garbage, which a write to enable will cause.
>
> So I see it is freed without seeing a release called.
>
> [ 32.632323] Removing __test_event
>
> The above is the dentry being released (unlinked).
>
> [ 32.633618] Removing enable
> [ 32.634453] Removing user_events
> [ 32.634948] write ffff8d05488bf000
>
> Another write is happening to the user event file (it may have been
> unlinked, but the release was never called, so it is still valid.
>
> [ 32.636440] update file = ffff8d0484f7a688
> [ 32.637685] update tr = dc64cc323d943921
>
> The above is the file pointer that we freed, and you can see the file->tr
> is now garbage.
>
> [ 32.638827] general protection fault, probably for non-canonical address 0xdc64cc323d9457f9: 0000 [#1] PREEMPT SMP PTI
> [ 32.641712] CPU: 4 PID: 911 Comm: ftrace_test Not tainted 6.6.0-rc4-test-00024-gd402dc722a1b-dirty #158
> [ 32.644220] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
> [ 32.646717] RIP: 0010:tracing_update_buffers+0x19/0x50
> [ 32.648133] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 55 31 f6 31 ed 53 48 89 fb 48 c7 c7 e0 bc be b2 e8 d7 b6 cb 00 <80> bb d8 1e 00 00 00 74 15 48 c7 c7 e0 bc be b2 e8 42 9f cb 00 89
> [ 32.653125] RSP: 0018:ffffb7b34172be20 EFLAGS: 00010246
> [ 32.654573] RAX: 0000000000000000 RBX: dc64cc323d943921 RCX: 0000000000000000
> [ 32.656503] RDX: 0000000000000000 RSI: ffffffffb1e09128 RDI: ffffffffb1e09128
> [ 32.658445] RBP: 0000000000000000 R08: 000000000000002f R09: 0000000000000000
> [ 32.660378] R10: ffffb7b34172be20 R11: 0000000000000001 R12: ffffb7b34172bf08
> [ 32.662322] R13: ffff8d0484f7a688 R14: ffffb7b34172bf08 R15: 0000000000000000
> [ 32.664249] FS: 00007f1769a10740(0000) GS:ffff8d05f7c00000(0000) knlGS:0000000000000000
> [ 32.666438] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 32.668009] CR2: 000055c0cd72fc70 CR3: 000000010e3c6003 CR4: 0000000000170ee0
> [ 32.669954] Call Trace:
> [ 32.670653] <TASK>
> [ 32.671263] ? die_addr+0x36/0x90
> [ 32.672203] ? exc_general_protection+0x236/0x4a0
> [ 32.673521] ? asm_exc_general_protection+0x26/0x30
> [ 32.674872] ? __mutex_lock+0x1c8/0xb40
> [ 32.675942] ? __mutex_lock+0x1c8/0xb40
> [ 32.677009] ? tracing_update_buffers+0x19/0x50
> [ 32.678259] event_enable_write+0xb4/0x140
> [ 32.679311] vfs_write+0xf2/0x530
> [ 32.680172] ? find_held_lock+0x2b/0x80
> [ 32.681159] ? _raw_spin_unlock+0x2d/0x50
> [ 32.682711] ? rcu_is_watching+0x11/0x50
> [ 32.684159] ? _raw_spin_unlock+0x2d/0x50
> [ 32.685642] ? trace_preempt_on+0x78/0x80
> [ 32.687027] ksys_write+0x75/0x100
> [ 32.688257] do_syscall_64+0x3f/0xc0
> [ 32.689561] entry_SYSCALL_64_after_hwframe+0x6e/0xd8
> [ 32.691048] RIP: 0033:0x7f1769b0ab00
>
> Note, I think I can simplify some of the code (but not fix this bug) by
> letting the dput free the eventfs_inode as well (I think that is required).
> But the above looks to be a bug in the implementation of user_events.
>
> -- Steve

2023-10-31 04:02:17

by Steven Rostedt

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

[ Adding Masami and stable ]

On Tue, 31 Oct 2023 00:27:07 +0000
Beau Belgrave <[email protected]> wrote:

> On Mon, Oct 30, 2023 at 05:31:51PM -0400, Steven Rostedt wrote:
> > On Mon, 30 Oct 2023 12:42:23 -0400
> > Steven Rostedt <[email protected]> wrote:
> >
> > > > I still get the splat about the trace_array_put when running
> > > > user_event's ftrace selftest:
> > > >
> > > > [ 26.665931] ------------[ cut here ]------------
> > > > [ 26.666663] WARNING: CPU: 12 PID: 291 at kernel/trace/trace.c:516 tracing_release_file_tr+0x46/0x50
> > > > [ 26.667470] Modules linked in:
> > > > [ 26.667808] CPU: 12 PID: 291 Comm: ftrace_test Not tainted 6.6.0-rc7-next-20231026 #3
> > > > [ 26.668665] RIP: 0010:tracing_release_file_tr+0x46/0x50
> > > > [ 26.669093] Code: d1 03 01 8b 83 c0 1e 00 00 85 c0 74 1d 83 e8 01 48 c7 c7 80 5b ef bc 89 83 c0 1e 00 00 e8 f2 b5 03 01 31 c0 5b e9 75 ee 27 01 <0f> 0b eb df 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90
> > > > [ 26.670580] RSP: 0018:ffffb6ef858ffee8 EFLAGS: 00010246
> > > > [ 26.671128] RAX: 0000000000000000 RBX: ffff9d7ae2364058 RCX: 0000000000000000
> > > > [ 26.671793] RDX: 0000000000000000 RSI: ffffffffbcb6b38b RDI: 00000000ffffffff
> > > > [ 26.672444] RBP: ffff9d7ac3e72200 R08: 0000000000000000 R09: 0000000000000000
> > > > [ 26.673072] R10: ffffb6ef858ffee8 R11: ffffffffbb28526f R12: 00000000000f801f
> > > > [ 26.673705] R13: ffff9d7b661a2020 R14: ffff9d7ac6057728 R15: 0000000000000000
> > > > [ 26.674339] FS: 00007fa852fa6740(0000) GS:ffff9d81a6300000(0000) knlGS:0000000000000000
> > > > [ 26.674978] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > > [ 26.675506] CR2: 00007fa852c2a250 CR3: 0000000105d92001 CR4: 0000000000370eb0
> > > > [ 26.676142] Call Trace:
> > > > [ 26.676357] <TASK>
> > > > [ 26.676572] ? __warn+0x7f/0x160
> > > > [ 26.677092] ? tracing_release_file_tr+0x46/0x50
> > > > [ 26.677540] ? report_bug+0x1c3/0x1d0
> > > > [ 26.677871] ? handle_bug+0x3c/0x70
> > > > [ 26.678196] ? exc_invalid_op+0x14/0x70
> > > > [ 26.678520] ? asm_exc_invalid_op+0x16/0x20
> > > > [ 26.678845] ? tracing_release_file_tr+0x1f/0x50
> > > > [ 26.679268] ? tracing_release_file_tr+0x46/0x50
> > > > [ 26.679691] ? tracing_release_file_tr+0x1f/0x50
> > > > [ 26.680105] __fput+0xab/0x300
> > > > [ 26.680437] __x64_sys_close+0x38/0x80
> > >
> > > Hmm, this doesn't tell me much. Let me go play with the user_event self
> > > tests.
> >
> > I added a bunch of printk()s and I'm thinking there's a race in user event
> > (or dynamic event) code.
> >
>
> I did as well, however, I don't see how user events would be involved
> other than allowing a trace_remove_event_call() with open enable fds?
>
> I believe the scenario is open the enable file and keep the fd open.
>
> While the fd is open to the enable file, call trace_remove_event_call().
>
> If trace_remove_event_call() is called for an event with a tr->ref > 0,
> should it fail or work? (It currently works without issue.)
>
> Should writes to the fd still work after the event it is related to has
> been removed?
>
> I don't see how user_events could prevent this, it seems
> trace_remove_event_call() should fail if files for it are still open?
>

This is a separate issue from eventfs (good, because I think I have solved
all the known bugs for that one - phew!).

Anyway, I checkout the code just before adding the eventfs, and did the following:

# echo 'p:sched schedule' > /sys/kernel/tracing/kprobe_events
# exec 5>>/sys/kernel/tracing/events/kprobes/sched/enable
# > /sys/kernel/tracing/kprobe_events
# exec 5>&-

And it worked fine. The above creates a kprobe event, opens the enable file
of that event with the bash file descriptor #5, removes the kprobe event,
and then closes the file descriptor #5.

But the I applied:

f5ca233e2e66d ("tracing: Increase trace array ref count on enable and filter files")

And do the above commands again and BOOM! it crashes with:

[ 217.879087] BUG: kernel NULL pointer dereference, address: 0000000000000028
[ 217.881121] #PF: supervisor read access in kernel mode
[ 217.882532] #PF: error_code(0x0000) - not-present page
[ 217.883932] PGD 0 P4D 0
[ 217.884672] Oops: 0000 [#1] PREEMPT SMP PTI
[ 217.885821] CPU: 6 PID: 877 Comm: bash Not tainted 6.5.0-rc4-test-00008-g2c6b6b1029d4-dirty #186
[ 217.888178] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[ 217.890684] RIP: 0010:tracing_release_file_tr+0xc/0x50
[ 217.892097] Code: 5d 41 5c c3 cc cc cc cc 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 53 48 8b 87 80 04 00 00 <48> 8b 58 28 48 85 db 74 2d 31 f6 48 c7 c7 c0 b3 1e 93 e8 2d 48 ca
[ 217.897102] RSP: 0018:ffffa5d400587eb0 EFLAGS: 00010282
[ 217.898531] RAX: 0000000000000000 RBX: ffff907d06aa6c00 RCX: 0000000000000000
[ 217.900471] RDX: 0000000000000000 RSI: ffff907d06aa6c00 RDI: ffff907d0bf21bd0
[ 217.902403] RBP: 00000000000d801e R08: 0000000000000001 R09: ffff907d0bf21bd0
[ 217.904350] R10: 0000000000000001 R11: 0000000000000001 R12: ffff907d0bf21bd0
[ 217.906282] R13: ffff907d103708e0 R14: ffff907d0a178c30 R15: 0000000000000000
[ 217.908215] FS: 00007ff49c150740(0000) GS:ffff907e77d00000(0000) knlGS:0000000000000000
[ 217.910405] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 217.911970] CR2: 0000000000000028 CR3: 00000001051ec005 CR4: 0000000000170ee0
[ 217.913924] Call Trace:
[ 217.914624] <TASK>
[ 217.915232] ? __die+0x23/0x70
[ 217.916105] ? page_fault_oops+0x17d/0x4d0
[ 217.917262] ? exc_page_fault+0x7f/0x200
[ 217.918350] ? asm_exc_page_fault+0x26/0x30
[ 217.919513] ? tracing_release_file_tr+0xc/0x50
[ 217.920780] __fput+0xfb/0x2a0
[ 217.921651] task_work_run+0x5d/0x90
[ 217.922652] exit_to_user_mode_prepare+0x231/0x240
[ 217.923981] syscall_exit_to_user_mode+0x1a/0x50
[ 217.925248] do_syscall_64+0x4b/0xc0
[ 217.926176] entry_SYSCALL_64_after_hwframe+0x6e/0xd8

Look familiar?

It's now midnight and I've been at this all day. I'm going to look more at
this tomorrow. It's not going to be easy :-( I'm not sure what exactly to
do. We may need to prevent dynamic events from being deleted if there's any
of its files opened (enable, format, etc).

That is, if you try to delete the event, it will give you an -EBUSY, just
like having them enabled would.

More good news, that commit is in 6.6 *and* marked for stable :-p

-- Steve

2023-10-31 14:46:20

by Steven Rostedt

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Tue, 31 Oct 2023 00:00:31 -0400
Steven Rostedt <[email protected]> wrote:

> But the I applied:
>
> f5ca233e2e66d ("tracing: Increase trace array ref count on enable and filter files")
>
> And do the above commands again and BOOM! it crashes with:

The below patch appears to solve this. Care to test it?

-- Steve

Signed-off-by: Steven Rostedt (Google) <[email protected]>
---
include/linux/trace_events.h | 4 ++++
kernel/trace/trace.c | 15 +++++++++++++++
kernel/trace/trace.h | 4 ++++
kernel/trace/trace_events.c | 29 +++++++++++++++++++++++++----
kernel/trace/trace_events_filter.c | 3 +++
5 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 12207dc6722d..696f8dc4aa53 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -492,6 +492,7 @@ enum {
EVENT_FILE_FL_TRIGGER_COND_BIT,
EVENT_FILE_FL_PID_FILTER_BIT,
EVENT_FILE_FL_WAS_ENABLED_BIT,
+ EVENT_FILE_FL_FREED_BIT,
};

extern struct trace_event_file *trace_get_event_file(const char *instance,
@@ -630,6 +631,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...);
* TRIGGER_COND - When set, one or more triggers has an associated filter
* PID_FILTER - When set, the event is filtered based on pid
* WAS_ENABLED - Set when enabled to know to clear trace on module removal
+ * FREED - File descriptor is freed, all fields should be considered invalid
*/
enum {
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -643,6 +645,7 @@ enum {
EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT),
+ EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT),
};

struct trace_event_file {
@@ -671,6 +674,7 @@ struct trace_event_file {
* caching and such. Which is mostly OK ;-)
*/
unsigned long flags;
+ atomic_t ref; /* ref count for opened files */
atomic_t sm_ref; /* soft-mode reference counter */
atomic_t tm_ref; /* trigger-mode reference counter */
};
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2539cfc20a97..9aebf904ff97 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4978,6 +4978,20 @@ int tracing_open_file_tr(struct inode *inode, struct file *filp)
if (ret)
return ret;

+ mutex_lock(&event_mutex);
+
+ /* Fail if the file is marked for removal */
+ if (file->flags & EVENT_FILE_FL_FREED) {
+ trace_array_put(file->tr);
+ ret = -ENODEV;
+ } else {
+ event_file_get(file);
+ }
+
+ mutex_unlock(&event_mutex);
+ if (ret)
+ return ret;
+
filp->private_data = inode->i_private;

return 0;
@@ -4988,6 +5002,7 @@ int tracing_release_file_tr(struct inode *inode, struct file *filp)
struct trace_event_file *file = inode->i_private;

trace_array_put(file->tr);
+ event_file_put(file);

return 0;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0e1405abf4f7..4fcd293ecbd4 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1669,6 +1669,10 @@ extern void event_trigger_unregister(struct event_command *cmd_ops,
char *glob,
struct event_trigger_data *trigger_data);

+extern void event_file_get(struct trace_event_file *file);
+extern void event_file_put(struct trace_event_file *file);
+
+
/**
* struct event_trigger_ops - callbacks for trace event triggers
*
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 70a029ae3c65..ec86f4894455 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -990,13 +990,33 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
}
}

+void event_file_get(struct trace_event_file *file)
+{
+ atomic_inc(&file->ref);
+}
+
+void event_file_put(struct trace_event_file *file)
+{
+ if (WARN_ON_ONCE(!atomic_read(&file->ref))) {
+ if (file->flags & EVENT_FILE_FL_FREED)
+ kmem_cache_free(file_cachep, file);
+ return;
+ }
+
+ if (atomic_dec_and_test(&file->ref) &&
+ (file->flags & EVENT_FILE_FL_FREED)) {
+ kmem_cache_free(file_cachep, file);
+ }
+}
+
static void remove_event_file_dir(struct trace_event_file *file)
{
eventfs_remove_dir(file->ei);
list_del(&file->list);
remove_subsystem(file->system);
free_event_filter(file->filter);
- kmem_cache_free(file_cachep, file);
+ file->flags |= EVENT_FILE_FL_FREED;
+ event_file_put(file);
}

/*
@@ -1369,7 +1389,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
flags = file->flags;
mutex_unlock(&event_mutex);

- if (!file)
+ if (!file || flags & EVENT_FILE_FL_FREED)
return -ENODEV;

if (flags & EVENT_FILE_FL_ENABLED &&
@@ -1404,7 +1424,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
ret = -ENODEV;
mutex_lock(&event_mutex);
file = event_file_data(filp);
- if (likely(file)) {
+ if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) {
printk("update file = %px\n", file);
printk("update tr = %px\n", file->tr);
ret = tracing_update_buffers(file->tr);
@@ -1686,7 +1706,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,

mutex_lock(&event_mutex);
file = event_file_data(filp);
- if (file)
+ if (file && !(file->flags & EVENT_FILE_FL_FREED))
print_event_filter(file, s);
mutex_unlock(&event_mutex);

@@ -2905,6 +2925,7 @@ trace_create_new_event(struct trace_event_call *call,
atomic_set(&file->tm_ref, 0);
INIT_LIST_HEAD(&file->triggers);
list_add(&file->list, &tr->events);
+ event_file_get(file);

return file;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 33264e510d16..0c611b281a5b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -2349,6 +2349,9 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
struct event_filter *filter = NULL;
int err;

+ if (file->flags & EVENT_FILE_FL_FREED)
+ return -ENODEV;
+
if (!strcmp(strstrip(filter_string), "0")) {
filter_disable(file);
filter = event_filter(file);
--
2.42.0

2023-10-31 16:07:23

by Steven Rostedt

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Tue, 31 Oct 2023 10:45:51 -0400
Steven Rostedt <[email protected]> wrote:

> @@ -1404,7 +1424,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
> ret = -ENODEV;
> mutex_lock(&event_mutex);
> file = event_file_data(filp);
> - if (likely(file)) {
> + if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) {
> printk("update file = %px\n", file);
> printk("update tr = %px\n", file->tr);
> ret = tracing_update_buffers(file->tr);

Well, this won't apply because I still had debugging in it when I added
these changes.

Here's a better version:

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 12207dc6722d..696f8dc4aa53 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -492,6 +492,7 @@ enum {
EVENT_FILE_FL_TRIGGER_COND_BIT,
EVENT_FILE_FL_PID_FILTER_BIT,
EVENT_FILE_FL_WAS_ENABLED_BIT,
+ EVENT_FILE_FL_FREED_BIT,
};

extern struct trace_event_file *trace_get_event_file(const char *instance,
@@ -630,6 +631,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...);
* TRIGGER_COND - When set, one or more triggers has an associated filter
* PID_FILTER - When set, the event is filtered based on pid
* WAS_ENABLED - Set when enabled to know to clear trace on module removal
+ * FREED - File descriptor is freed, all fields should be considered invalid
*/
enum {
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -643,6 +645,7 @@ enum {
EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT),
+ EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT),
};

struct trace_event_file {
@@ -671,6 +674,7 @@ struct trace_event_file {
* caching and such. Which is mostly OK ;-)
*/
unsigned long flags;
+ atomic_t ref; /* ref count for opened files */
atomic_t sm_ref; /* soft-mode reference counter */
atomic_t tm_ref; /* trigger-mode reference counter */
};
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2539cfc20a97..9aebf904ff97 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4978,6 +4978,20 @@ int tracing_open_file_tr(struct inode *inode, struct file *filp)
if (ret)
return ret;

+ mutex_lock(&event_mutex);
+
+ /* Fail if the file is marked for removal */
+ if (file->flags & EVENT_FILE_FL_FREED) {
+ trace_array_put(file->tr);
+ ret = -ENODEV;
+ } else {
+ event_file_get(file);
+ }
+
+ mutex_unlock(&event_mutex);
+ if (ret)
+ return ret;
+
filp->private_data = inode->i_private;

return 0;
@@ -4988,6 +5002,7 @@ int tracing_release_file_tr(struct inode *inode, struct file *filp)
struct trace_event_file *file = inode->i_private;

trace_array_put(file->tr);
+ event_file_put(file);

return 0;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0e1405abf4f7..b7f4ea25a194 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1669,6 +1669,9 @@ extern void event_trigger_unregister(struct event_command *cmd_ops,
char *glob,
struct event_trigger_data *trigger_data);

+extern void event_file_get(struct trace_event_file *file);
+extern void event_file_put(struct trace_event_file *file);
+
/**
* struct event_trigger_ops - callbacks for trace event triggers
*
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f9e3e24d8796..f29e815ca5b2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -990,13 +990,35 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
}
}

+void event_file_get(struct trace_event_file *file)
+{
+ atomic_inc(&file->ref);
+}
+
+void event_file_put(struct trace_event_file *file)
+{
+ if (WARN_ON_ONCE(!atomic_read(&file->ref))) {
+ if (file->flags & EVENT_FILE_FL_FREED)
+ kmem_cache_free(file_cachep, file);
+ return;
+ }
+
+ if (atomic_dec_and_test(&file->ref)) {
+ /* Count should only go to zero when it is freed */
+ if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED)))
+ return;
+ kmem_cache_free(file_cachep, file);
+ }
+}
+
static void remove_event_file_dir(struct trace_event_file *file)
{
eventfs_remove_dir(file->ei);
list_del(&file->list);
remove_subsystem(file->system);
free_event_filter(file->filter);
- kmem_cache_free(file_cachep, file);
+ file->flags |= EVENT_FILE_FL_FREED;
+ event_file_put(file);
}

/*
@@ -1369,7 +1391,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
flags = file->flags;
mutex_unlock(&event_mutex);

- if (!file)
+ if (!file || flags & EVENT_FILE_FL_FREED)
return -ENODEV;

if (flags & EVENT_FILE_FL_ENABLED &&
@@ -1403,7 +1425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
ret = -ENODEV;
mutex_lock(&event_mutex);
file = event_file_data(filp);
- if (likely(file)) {
+ if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) {
ret = tracing_update_buffers(file->tr);
if (ret < 0) {
mutex_unlock(&event_mutex);
@@ -1683,7 +1705,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,

mutex_lock(&event_mutex);
file = event_file_data(filp);
- if (file)
+ if (file && !(file->flags & EVENT_FILE_FL_FREED))
print_event_filter(file, s);
mutex_unlock(&event_mutex);

@@ -2902,6 +2924,7 @@ trace_create_new_event(struct trace_event_call *call,
atomic_set(&file->tm_ref, 0);
INIT_LIST_HEAD(&file->triggers);
list_add(&file->list, &tr->events);
+ event_file_get(file);

return file;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 33264e510d16..0c611b281a5b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -2349,6 +2349,9 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
struct event_filter *filter = NULL;
int err;

+ if (file->flags & EVENT_FILE_FL_FREED)
+ return -ENODEV;
+
if (!strcmp(strstrip(filter_string), "0")) {
filter_disable(file);
filter = event_filter(file);

2023-10-31 17:07:38

by Beau Belgrave

[permalink] [raw]
Subject: Re: selftests: user_events: ftrace_test - RIP: 0010:tracing_update_buffers (kernel/trace/trace.c:6470)

On Tue, Oct 31, 2023 at 12:07:02PM -0400, Steven Rostedt wrote:
> On Tue, 31 Oct 2023 10:45:51 -0400
> Steven Rostedt <[email protected]> wrote:
>
> > @@ -1404,7 +1424,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
> > ret = -ENODEV;
> > mutex_lock(&event_mutex);
> > file = event_file_data(filp);
> > - if (likely(file)) {
> > + if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) {
> > printk("update file = %px\n", file);
> > printk("update tr = %px\n", file->tr);
> > ret = tracing_update_buffers(file->tr);
>
> Well, this won't apply because I still had debugging in it when I added
> these changes.
>
> Here's a better version:
>
Thanks!

I ended up testing the official patch [1] instead of this one along with
patches [2][3] in the next branch.

My printk's are now good, everything looks back to normal for my repro.

I added my Tested-by onto the official patch thread.

Thanks,
-Beau

1. https://lore.kernel.org/linux-trace-kernel/[email protected]/
2. https://lore.kernel.org/linux-trace-kernel/[email protected]
3. https://lore.kernel.org/linux-trace-kernel/[email protected]/

> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 12207dc6722d..696f8dc4aa53 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -492,6 +492,7 @@ enum {
> EVENT_FILE_FL_TRIGGER_COND_BIT,
> EVENT_FILE_FL_PID_FILTER_BIT,
> EVENT_FILE_FL_WAS_ENABLED_BIT,
> + EVENT_FILE_FL_FREED_BIT,
> };
>
> extern struct trace_event_file *trace_get_event_file(const char *instance,
> @@ -630,6 +631,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...);
> * TRIGGER_COND - When set, one or more triggers has an associated filter
> * PID_FILTER - When set, the event is filtered based on pid
> * WAS_ENABLED - Set when enabled to know to clear trace on module removal
> + * FREED - File descriptor is freed, all fields should be considered invalid
> */
> enum {
> EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
> @@ -643,6 +645,7 @@ enum {
> EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
> EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
> EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT),
> + EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT),
> };
>
> struct trace_event_file {
> @@ -671,6 +674,7 @@ struct trace_event_file {
> * caching and such. Which is mostly OK ;-)
> */
> unsigned long flags;
> + atomic_t ref; /* ref count for opened files */
> atomic_t sm_ref; /* soft-mode reference counter */
> atomic_t tm_ref; /* trigger-mode reference counter */
> };
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 2539cfc20a97..9aebf904ff97 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -4978,6 +4978,20 @@ int tracing_open_file_tr(struct inode *inode, struct file *filp)
> if (ret)
> return ret;
>
> + mutex_lock(&event_mutex);
> +
> + /* Fail if the file is marked for removal */
> + if (file->flags & EVENT_FILE_FL_FREED) {
> + trace_array_put(file->tr);
> + ret = -ENODEV;
> + } else {
> + event_file_get(file);
> + }
> +
> + mutex_unlock(&event_mutex);
> + if (ret)
> + return ret;
> +
> filp->private_data = inode->i_private;
>
> return 0;
> @@ -4988,6 +5002,7 @@ int tracing_release_file_tr(struct inode *inode, struct file *filp)
> struct trace_event_file *file = inode->i_private;
>
> trace_array_put(file->tr);
> + event_file_put(file);
>
> return 0;
> }
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 0e1405abf4f7..b7f4ea25a194 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -1669,6 +1669,9 @@ extern void event_trigger_unregister(struct event_command *cmd_ops,
> char *glob,
> struct event_trigger_data *trigger_data);
>
> +extern void event_file_get(struct trace_event_file *file);
> +extern void event_file_put(struct trace_event_file *file);
> +
> /**
> * struct event_trigger_ops - callbacks for trace event triggers
> *
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index f9e3e24d8796..f29e815ca5b2 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -990,13 +990,35 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
> }
> }
>
> +void event_file_get(struct trace_event_file *file)
> +{
> + atomic_inc(&file->ref);
> +}
> +
> +void event_file_put(struct trace_event_file *file)
> +{
> + if (WARN_ON_ONCE(!atomic_read(&file->ref))) {
> + if (file->flags & EVENT_FILE_FL_FREED)
> + kmem_cache_free(file_cachep, file);
> + return;
> + }
> +
> + if (atomic_dec_and_test(&file->ref)) {
> + /* Count should only go to zero when it is freed */
> + if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED)))
> + return;
> + kmem_cache_free(file_cachep, file);
> + }
> +}
> +
> static void remove_event_file_dir(struct trace_event_file *file)
> {
> eventfs_remove_dir(file->ei);
> list_del(&file->list);
> remove_subsystem(file->system);
> free_event_filter(file->filter);
> - kmem_cache_free(file_cachep, file);
> + file->flags |= EVENT_FILE_FL_FREED;
> + event_file_put(file);
> }
>
> /*
> @@ -1369,7 +1391,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
> flags = file->flags;
> mutex_unlock(&event_mutex);
>
> - if (!file)
> + if (!file || flags & EVENT_FILE_FL_FREED)
> return -ENODEV;
>
> if (flags & EVENT_FILE_FL_ENABLED &&
> @@ -1403,7 +1425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
> ret = -ENODEV;
> mutex_lock(&event_mutex);
> file = event_file_data(filp);
> - if (likely(file)) {
> + if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) {
> ret = tracing_update_buffers(file->tr);
> if (ret < 0) {
> mutex_unlock(&event_mutex);
> @@ -1683,7 +1705,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
>
> mutex_lock(&event_mutex);
> file = event_file_data(filp);
> - if (file)
> + if (file && !(file->flags & EVENT_FILE_FL_FREED))
> print_event_filter(file, s);
> mutex_unlock(&event_mutex);
>
> @@ -2902,6 +2924,7 @@ trace_create_new_event(struct trace_event_call *call,
> atomic_set(&file->tm_ref, 0);
> INIT_LIST_HEAD(&file->triggers);
> list_add(&file->list, &tr->events);
> + event_file_get(file);
>
> return file;
> }
> diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
> index 33264e510d16..0c611b281a5b 100644
> --- a/kernel/trace/trace_events_filter.c
> +++ b/kernel/trace/trace_events_filter.c
> @@ -2349,6 +2349,9 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
> struct event_filter *filter = NULL;
> int err;
>
> + if (file->flags & EVENT_FILE_FL_FREED)
> + return -ENODEV;
> +
> if (!strcmp(strstrip(filter_string), "0")) {
> filter_disable(file);
> filter = event_filter(file);