I was clumsily trying to use "perf ftrace" from git master (I might
very well be using it wrong), and it's falling over with a NULL deref.
I don't really understand the perf code, but it looks to me like it
might be related to Andi Kleen's refactoring that introduced
evlist->all_cpus?
I think the problem is that evlist_close() assumes that ->cpus!=NULL
implies ->all_cpus!=NULL, but perf_evlist__propagate_maps() doesn't
set ->all_cpus if the evlist is empty.
Here's the crash I encountered:
root@vm:~# /linux-llvm/tools/perf/perf ftrace -a -T kmem_cache_alloc
failed to reset ftrace
Segmentation fault (core dumped)
root@vm:~# gdb /linux-llvm/tools/perf/perf core
[...]
Core was generated by `/linux-llvm/tools/perf/perf ftrace -a -T
kmem_cache_alloc'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0 perf_cpu_map__cpu (cpus=0x0, idx=idx@entry=0) at cpumap.c:250
[...]
(gdb) list
245 return cpus;
246 }
247
248 int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
249 {
250 if (idx < cpus->nr)
251 return cpus->map[idx];
252
253 return -1;
254 }
(gdb) print cpus
$1 = (const struct perf_cpu_map *) 0x0
(gdb) bt
#0 perf_cpu_map__cpu (cpus=0x0, idx=idx@entry=0) at cpumap.c:250
#1 0x00005579cbf38731 in evlist__close
(evlist=evlist@entry=0x5579cd1c22c0) at util/evlist.c:1222
#2 0x00005579cbf388f6 in evlist__delete (evlist=0x5579cd1c22c0) at
util/evlist.c:152
#3 0x00005579cbf389c1 in evlist__delete (evlist=<optimized out>) at
util/evlist.c:148
#4 0x00005579cbe9e3b5 in cmd_ftrace (argc=0, argv=0x7ffe818a16e0) at
builtin-ftrace.c:520
#5 0x00005579cbf2287d in run_builtin (p=0x5579cc2a9740
<commands+672>, argc=4, argv=0x7ffe818a16e0) at perf.c:312
#6 0x00005579cbe96baa in handle_internal_command
(argv=0x7ffe818a16e0, argc=4) at perf.c:364
#7 run_argv (argcp=<synthetic pointer>, argv=<synthetic pointer>) at perf.c:408
#8 main (argc=4, argv=0x7ffe818a16e0) at perf.c:538
(gdb) frame 1
#1 0x00005579cbf38731 in evlist__close
(evlist=evlist@entry=0x5579cd1c22c0) at util/evlist.c:1222
1222 evlist__for_each_cpu(evlist, i, cpu) {
(gdb) print evlist
$2 = (struct evlist *) 0x5579cd1c22c0
(gdb) print *evlist
$3 = {core = {entries = {next = 0x5579cd1c22c0, prev =
0x5579cd1c22c0}, nr_entries = 0, has_user_cpus = false, cpus =
0x5579cd1b8fe0, all_cpus = 0x0, threads = 0x5579cd1c2d60,
nr_mmaps = 0, mmap_len = 0, pollfd = {nr = 0, nr_alloc = 0,
nr_autogrow = 64, entries = 0x0, priv = 0x0}, heads = {{first = 0x0}
<repeats 256 times>}, mmap = 0x0, mmap_ovw = 0x0,
mmap_first = 0x0, mmap_ovw_first = 0x0}, nr_groups = 0, enabled =
false, id_pos = 0, is_pos = 0, combined_sample_type = 0,
bkw_mmap_state = BKW_MMAP_NOTREADY, workload = {cork_fd = 0,
pid = -1}, mmap = 0x0, overwrite_mmap = 0x0, selected = 0x0, stats
= {total_period = 0, total_non_filtered_period = 0, total_lost = 0,
total_lost_samples = 0, total_aux_lost = 0,
total_aux_partial = 0, total_invalid_chains = 0, nr_events = {0
<repeats 82 times>}, nr_non_filtered_samples = 0, nr_lost_warned = 0,
nr_unknown_events = 0, nr_invalid_chains = 0,
nr_unknown_id = 0, nr_unprocessable_samples = 0,
nr_auxtrace_errors = {0, 0}, nr_proc_map_timeout = 0}, env = 0x0,
trace_event_sample_raw = 0x0, first_sample_time = 0,
last_sample_time = 0, thread = {th = 0, done = 0}}
(gdb)
On Thu, Jan 09, 2020 at 12:37:14PM +0100, Jann Horn wrote:
> I was clumsily trying to use "perf ftrace" from git master (I might
> very well be using it wrong), and it's falling over with a NULL deref.
> I don't really understand the perf code, but it looks to me like it
> might be related to Andi Kleen's refactoring that introduced
> evlist->all_cpus?
>
> I think the problem is that evlist_close() assumes that ->cpus!=NULL
> implies ->all_cpus!=NULL, but perf_evlist__propagate_maps() doesn't
> set ->all_cpus if the evlist is empty.
>
> Here's the crash I encountered:
>
> root@vm:~# /linux-llvm/tools/perf/perf ftrace -a -T kmem_cache_alloc
> failed to reset ftrace
> Segmentation fault (core dumped)
> root@vm:~# gdb /linux-llvm/tools/perf/perf core
> [...]
> Core was generated by `/linux-llvm/tools/perf/perf ftrace -a -T
> kmem_cache_alloc'.
> Program terminated with signal SIGSEGV, Segmentation fault.
> #0 perf_cpu_map__cpu (cpus=0x0, idx=idx@entry=0) at cpumap.c:250
> [...]
> (gdb) list
> 245 return cpus;
> 246 }
> 247
> 248 int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
> 249 {
> 250 if (idx < cpus->nr)
> 251 return cpus->map[idx];
> 252
> 253 return -1;
> 254 }
> (gdb) print cpus
> $1 = (const struct perf_cpu_map *) 0x0
> (gdb) bt
> #0 perf_cpu_map__cpu (cpus=0x0, idx=idx@entry=0) at cpumap.c:250
> #1 0x00005579cbf38731 in evlist__close
> (evlist=evlist@entry=0x5579cd1c22c0) at util/evlist.c:1222
> #2 0x00005579cbf388f6 in evlist__delete (evlist=0x5579cd1c22c0) at
> util/evlist.c:152
> #3 0x00005579cbf389c1 in evlist__delete (evlist=<optimized out>) at
> util/evlist.c:148
> #4 0x00005579cbe9e3b5 in cmd_ftrace (argc=0, argv=0x7ffe818a16e0) at
> builtin-ftrace.c:520
> #5 0x00005579cbf2287d in run_builtin (p=0x5579cc2a9740
> <commands+672>, argc=4, argv=0x7ffe818a16e0) at perf.c:312
> #6 0x00005579cbe96baa in handle_internal_command
> (argv=0x7ffe818a16e0, argc=4) at perf.c:364
> #7 run_argv (argcp=<synthetic pointer>, argv=<synthetic pointer>) at perf.c:408
> #8 main (argc=4, argv=0x7ffe818a16e0) at perf.c:538
> (gdb) frame 1
> #1 0x00005579cbf38731 in evlist__close
> (evlist=evlist@entry=0x5579cd1c22c0) at util/evlist.c:1222
> 1222 evlist__for_each_cpu(evlist, i, cpu) {
> (gdb) print evlist
> $2 = (struct evlist *) 0x5579cd1c22c0
> (gdb) print *evlist
> $3 = {core = {entries = {next = 0x5579cd1c22c0, prev =
> 0x5579cd1c22c0}, nr_entries = 0, has_user_cpus = false, cpus =
> 0x5579cd1b8fe0, all_cpus = 0x0, threads = 0x5579cd1c2d60,
> nr_mmaps = 0, mmap_len = 0, pollfd = {nr = 0, nr_alloc = 0,
> nr_autogrow = 64, entries = 0x0, priv = 0x0}, heads = {{first = 0x0}
> <repeats 256 times>}, mmap = 0x0, mmap_ovw = 0x0,
> mmap_first = 0x0, mmap_ovw_first = 0x0}, nr_groups = 0, enabled =
> false, id_pos = 0, is_pos = 0, combined_sample_type = 0,
> bkw_mmap_state = BKW_MMAP_NOTREADY, workload = {cork_fd = 0,
> pid = -1}, mmap = 0x0, overwrite_mmap = 0x0, selected = 0x0, stats
> = {total_period = 0, total_non_filtered_period = 0, total_lost = 0,
> total_lost_samples = 0, total_aux_lost = 0,
> total_aux_partial = 0, total_invalid_chains = 0, nr_events = {0
> <repeats 82 times>}, nr_non_filtered_samples = 0, nr_lost_warned = 0,
> nr_unknown_events = 0, nr_invalid_chains = 0,
> nr_unknown_id = 0, nr_unprocessable_samples = 0,
> nr_auxtrace_errors = {0, 0}, nr_proc_map_timeout = 0}, env = 0x0,
> trace_event_sample_raw = 0x0, first_sample_time = 0,
> last_sample_time = 0, thread = {th = 0, done = 0}}
> (gdb)
oops, ftrace is using evlist without event ;-) so all_cpus
will not get initialized..
Andi, should we initialize evlist::all_cpus with the first
cpus we get, like in the patch below?
thanks,
jirka
---
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index ae9e65aa2491..5b9f2ca50591 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -164,6 +164,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist,
evlist->threads = perf_thread_map__get(threads);
}
+ if (!evlist->all_cpus && cpus)
+ evlist->all_cpus = perf_cpu_map__get(cpus);
+
perf_evlist__propagate_maps(evlist);
}
> oops, ftrace is using evlist without event ;-) so all_cpus
> will not get initialized..
>
> Andi, should we initialize evlist::all_cpus with the first
> cpus we get, like in the patch below?
I assume perf stat -A still works for all CPUs? If yes it's ok.
-andi
On Thu, Jan 9, 2020 at 4:11 PM Arnaldo Carvalho de Melo
<[email protected]> wrote:
> Em Thu, Jan 09, 2020 at 12:37:14PM +0100, Jann Horn escreveu:
> > I was clumsily trying to use "perf ftrace" from git master (I might
> > very well be using it wrong), and it's falling over with a NULL deref.
> > I don't really understand the perf code, but it looks to me like it
> > might be related to Andi Kleen's refactoring that introduced
> > evlist->all_cpus?
>
> > I think the problem is that evlist_close() assumes that ->cpus!=NULL
> > implies ->all_cpus!=NULL, but perf_evlist__propagate_maps() doesn't
> > set ->all_cpus if the evlist is empty.
>
> > Here's the crash I encountered:
>
> I've reproduced it and Jiri provided a patch, I'll test it, meanwhile
> you could alternatively drop an 'f' and try 'perf trace' + 'perf probe'
> instead, perhaps that could be enough, some examples:
>
> [root@quaco ~]# perf probe kmem_cache_alloc
> Added new event:
> probe:kmem_cache_alloc (on kmem_cache_alloc)
>
> You can now use it in all perf tools, such as:
>
> perf record -e probe:kmem_cache_alloc -aR sleep 1
Ah, thanks for the help. :)
Em Thu, Jan 09, 2020 at 12:37:14PM +0100, Jann Horn escreveu:
> I was clumsily trying to use "perf ftrace" from git master (I might
> very well be using it wrong), and it's falling over with a NULL deref.
> I don't really understand the perf code, but it looks to me like it
> might be related to Andi Kleen's refactoring that introduced
> evlist->all_cpus?
> I think the problem is that evlist_close() assumes that ->cpus!=NULL
> implies ->all_cpus!=NULL, but perf_evlist__propagate_maps() doesn't
> set ->all_cpus if the evlist is empty.
> Here's the crash I encountered:
I've reproduced it and Jiri provided a patch, I'll test it, meanwhile
you could alternatively drop an 'f' and try 'perf trace' + 'perf probe'
instead, perhaps that could be enough, some examples:
[root@quaco ~]# perf probe kmem_cache_alloc
Added new event:
probe:kmem_cache_alloc (on kmem_cache_alloc)
You can now use it in all perf tools, such as:
perf record -e probe:kmem_cache_alloc -aR sleep 1
[root@quaco ~]#
Bump the ring buffer size with -m for high freq events, limit the
backtrace size at the kernel so as to reduce the flow of events, limit
the number of events:
[root@quaco ~]# perf trace -m1024 -e probe:kmem_cache_alloc/max-stack=8/ --max-events=4
0.000 :0/0 probe:kmem_cache_alloc(__probe_ip: -1842593216)
kmem_cache_alloc ([kernel.kallsyms])
__build_skb ([kernel.kallsyms])
__napi_alloc_skb ([kernel.kallsyms])
r8152_poll ([r8152])
net_rx_action ([kernel.kallsyms])
__do_softirq ([kernel.kallsyms])
irq_exit ([kernel.kallsyms])
do_IRQ ([kernel.kallsyms])
0.869 :0/0 probe:kmem_cache_alloc(__probe_ip: -1842593216)
kmem_cache_alloc ([kernel.kallsyms])
__build_skb ([kernel.kallsyms])
__napi_alloc_skb ([kernel.kallsyms])
r8152_poll ([r8152])
net_rx_action ([kernel.kallsyms])
__do_softirq ([kernel.kallsyms])
irq_exit ([kernel.kallsyms])
do_IRQ ([kernel.kallsyms])
37.427 :0/0 probe:kmem_cache_alloc(__probe_ip: -1842593216)
kmem_cache_alloc ([kernel.kallsyms])
__build_skb ([kernel.kallsyms])
__napi_alloc_skb ([kernel.kallsyms])
r8152_poll ([r8152])
net_rx_action ([kernel.kallsyms])
__do_softirq ([kernel.kallsyms])
irq_exit ([kernel.kallsyms])
do_IRQ ([kernel.kallsyms])
37.450 :0/0 probe:kmem_cache_alloc(__probe_ip: -1842593216)
kmem_cache_alloc ([kernel.kallsyms])
__build_skb ([kernel.kallsyms])
__napi_alloc_skb ([kernel.kallsyms])
r8152_poll ([r8152])
net_rx_action ([kernel.kallsyms])
__do_softirq ([kernel.kallsyms])
irq_exit ([kernel.kallsyms])
do_IRQ ([kernel.kallsyms])
[root@quaco ~]#
Mix and match with syscall names, using globs, limit stacks for multiple
events, i.e. you can use 'perf probe kmem_*' to add all kernel functions
with that prefix, for instance:
[root@quaco ~]# perf trace -m1024 -e inotify*,stat*,probe:kmem_*/max-stack=8/ --max-events=4
0.000 ( ): :0/0 probe:kmem_cache_alloc(__probe_ip: -1842593216)
kmem_cache_alloc ([kernel.kallsyms])
__build_skb ([kernel.kallsyms])
__netdev_alloc_skb ([kernel.kallsyms])
br_send_bpdu.isra.0.constprop.0 ([bridge])
br_send_config_bpdu ([bridge])
br_transmit_config.part.0 ([bridge])
br_config_bpdu_generation ([bridge])
br_hello_timer_expired ([bridge])
51.901 ( ): weechat/6630 stat(filename: 0xc5be4063, statbuf: 0x7ffdfe73f050) ...
51.905 ( ): weechat/6630 probe:kmem_cache_alloc(__probe_ip: -1842593216)
kmem_cache_alloc ([kernel.kallsyms])
getname_flags ([kernel.kallsyms])
user_path_at_empty ([kernel.kallsyms])
vfs_statx ([kernel.kallsyms])
__do_sys_newstat ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
entry_SYSCALL_64 ([kernel.kallsyms])
__xstat64 (/usr/lib64/libc-2.29.so)
51.901 ( 0.018 ms): weechat/6630 ... [continued]: stat()) = 0
[root@quaco ~]#
Are you interested only in some specific syscall? Use
switch-on/switch-off to mark when to show events when to stop showing,
works for any event, even one that you mark in the middle of some
function and then at some other point in time, need to make it work with
plain syscall names:
[root@quaco ~]# perf trace -m1024 -e syscalls:sys_*_newstat,probe:kmem_*/max-stack=8/ --max-events=4 --switch-on=syscalls:sys_enter_newstat --switch-off=syscalls:sys_exit_newstat
0.000 weechat/6630 probe:kmem_cache_alloc(__probe_ip: -1842593216)
kmem_cache_alloc ([kernel.kallsyms])
getname_flags ([kernel.kallsyms])
user_path_at_empty ([kernel.kallsyms])
vfs_statx ([kernel.kallsyms])
__do_sys_newstat ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
entry_SYSCALL_64 ([kernel.kallsyms])
__xstat64 (/usr/lib64/libc-2.29.so)
75.843 :9367/9367 probe:kmem_cache_alloc(__probe_ip: -1842593216)
kmem_cache_alloc ([kernel.kallsyms])
getname_flags ([kernel.kallsyms])
user_path_at_empty ([kernel.kallsyms])
vfs_statx ([kernel.kallsyms])
__do_sys_newstat ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
entry_SYSCALL_64 ([kernel.kallsyms])
__xstat64 (/usr/lib64/libc-2.29.so)
768.592 NetworkManager/9368 probe:kmem_cache_alloc(__probe_ip: -1842593216)
kmem_cache_alloc ([kernel.kallsyms])
getname_flags ([kernel.kallsyms])
user_path_at_empty ([kernel.kallsyms])
vfs_statx ([kernel.kallsyms])
__do_sys_newstat ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
entry_SYSCALL_64 ([kernel.kallsyms])
__xstat64 (/usr/lib64/libc-2.29.so)
999.927 weechat/6630 probe:kmem_cache_alloc(__probe_ip: -1842593216)
kmem_cache_alloc ([kernel.kallsyms])
getname_flags ([kernel.kallsyms])
user_path_at_empty ([kernel.kallsyms])
vfs_statx ([kernel.kallsyms])
__do_sys_newstat ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
entry_SYSCALL_64 ([kernel.kallsyms])
__xstat64 (/usr/lib64/libc-2.29.so)
[root@quaco ~]#
- Arnaldo