2019-04-16 16:02:45

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 00/12] perf tools: Display eBPF code in intel_pt trace

hi,
this patchset adds dso support to read and display
bpf code in intel_pt trace output. I had to change
some of the kernel maps processing code, so hopefully
I did not break too many things ;-)

It's now possible to see bpf code flow via:

# perf-with-kcore record pt -e intel_pt//ku -- sleep 1
# perf-with-kcore script pt --insn-trace --xed
...
sleep 36660 [016] 1057036.806464404: ffffffff811dfba5 trace_call_bpf+0x65 ([kernel.kallsyms]) nopl %eax, (%rax,%rax,1)
sleep 36660 [016] 1057036.806464404: ffffffff811dfbaa trace_call_bpf+0x6a ([kernel.kallsyms]) movq 0x30(%rbx), %rax
sleep 36660 [016] 1057036.806464404: ffffffff811dfbae trace_call_bpf+0x6e ([kernel.kallsyms]) leaq 0x38(%rbx), %rsi
sleep 36660 [016] 1057036.806464404: ffffffff811dfbb2 trace_call_bpf+0x72 ([kernel.kallsyms]) mov %r13, %rdi
sleep 36660 [016] 1057036.806464404: ffffffff811dfbb5 trace_call_bpf+0x75 ([kernel.kallsyms]) callq 0xffffffff81c00c30
sleep 36660 [016] 1057036.806464404: ffffffff81c00c30 __x86_indirect_thunk_rax+0x0 ([kernel.kallsyms]) callq 0xffffffff81c00c3c
sleep 36660 [016] 1057036.806464404: ffffffff81c00c3c __x86_indirect_thunk_rax+0xc ([kernel.kallsyms]) movq %rax, (%rsp)
sleep 36660 [016] 1057036.806464404: ffffffff81c00c40 __x86_indirect_thunk_rax+0x10 ([kernel.kallsyms]) retq
sleep 36660 [016] 1057036.806464725: ffffffffa05e89ef bpf_prog_da4fe6b3d2c29b25_trace_return+0x0 (bpf_prog_da4fe6b3d2c29b25_trace_return) pushq %rbp
sleep 36660 [016] 1057036.806464725: ffffffffa05e89f0 bpf_prog_da4fe6b3d2c29b25_trace_return+0x1 (bpf_prog_da4fe6b3d2c29b25_trace_return) mov %rsp, %rbp
sleep 36660 [016] 1057036.806464725: ffffffffa05e89f3 bpf_prog_da4fe6b3d2c29b25_trace_return+0x4 (bpf_prog_da4fe6b3d2c29b25_trace_return) sub $0x158, %rsp
sleep 36660 [016] 1057036.806464725: ffffffffa05e89fa bpf_prog_da4fe6b3d2c29b25_trace_return+0xb (bpf_prog_da4fe6b3d2c29b25_trace_return) sub $0x28, %rbp
sleep 36660 [016] 1057036.806464725: ffffffffa05e89fe bpf_prog_da4fe6b3d2c29b25_trace_return+0xf (bpf_prog_da4fe6b3d2c29b25_trace_return) movq %rbx, (%rbp)
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a02 bpf_prog_da4fe6b3d2c29b25_trace_return+0x13 (bpf_prog_da4fe6b3d2c29b25_trace_return) movq %r13, 0x8(%rbp)
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a06 bpf_prog_da4fe6b3d2c29b25_trace_return+0x17 (bpf_prog_da4fe6b3d2c29b25_trace_return) movq %r14, 0x10(%rbp)
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a0a bpf_prog_da4fe6b3d2c29b25_trace_return+0x1b (bpf_prog_da4fe6b3d2c29b25_trace_return) movq %r15, 0x18(%rbp)
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a0e bpf_prog_da4fe6b3d2c29b25_trace_return+0x1f (bpf_prog_da4fe6b3d2c29b25_trace_return) xor %eax, %eax
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a10 bpf_prog_da4fe6b3d2c29b25_trace_return+0x21 (bpf_prog_da4fe6b3d2c29b25_trace_return) movq %rax, 0x20(%rbp)
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a14 bpf_prog_da4fe6b3d2c29b25_trace_return+0x25 (bpf_prog_da4fe6b3d2c29b25_trace_return) mov %rdi, %rbx
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a17 bpf_prog_da4fe6b3d2c29b25_trace_return+0x28 (bpf_prog_da4fe6b3d2c29b25_trace_return) callq 0xffffffff811fed50
sleep 36660 [016] 1057036.806464725: ffffffff811fed50 bpf_get_current_pid_tgid+0x0 ([kernel.kallsyms]) nopl %eax, (%rax,%rax,1)
sleep 36660 [016] 1057036.806464725: ffffffff811fed55 bpf_get_current_pid_tgid+0x5 ([kernel.kallsyms]) movq %gs:0x15c00, %rdx
sleep 36660 [016] 1057036.806464725: ffffffff811fed5e bpf_get_current_pid_tgid+0xe ([kernel.kallsyms]) test %rdx, %rdx
sleep 36660 [016] 1057036.806464725: ffffffff811fed61 bpf_get_current_pid_tgid+0x11 ([kernel.kallsyms]) jz 0xffffffff811fed79
sleep 36660 [016] 1057036.806464725: ffffffff811fed63 bpf_get_current_pid_tgid+0x13 ([kernel.kallsyms]) movsxdl 0x494(%rdx), %rax
sleep 36660 [016] 1057036.806464725: ffffffff811fed6a bpf_get_current_pid_tgid+0x1a ([kernel.kallsyms]) movsxdl 0x490(%rdx), %rdx
sleep 36660 [016] 1057036.806464725: ffffffff811fed71 bpf_get_current_pid_tgid+0x21 ([kernel.kallsyms]) shl $0x20, %rax
sleep 36660 [016] 1057036.806464725: ffffffff811fed75 bpf_get_current_pid_tgid+0x25 ([kernel.kallsyms]) or %rdx, %rax
sleep 36660 [016] 1057036.806464725: ffffffff811fed78 bpf_get_current_pid_tgid+0x28 ([kernel.kallsyms]) retq
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a1c bpf_prog_da4fe6b3d2c29b25_trace_return+0x2d (bpf_prog_da4fe6b3d2c29b25_trace_return) movq %rax, -0x8(%rbp)
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a20 bpf_prog_da4fe6b3d2c29b25_trace_return+0x31 (bpf_prog_da4fe6b3d2c29b25_trace_return) xor %edi, %edi
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a22 bpf_prog_da4fe6b3d2c29b25_trace_return+0x33 (bpf_prog_da4fe6b3d2c29b25_trace_return) movq %rdi, -0x10(%rbp)
sleep 36660 [016] 1057036.806464725: ffffffffa05e8a26 bpf_prog_da4fe6b3d2c29b25_trace_return+0x37 (bpf_prog_da4fe6b3d2c29b25_trace_return) movq %rdi, -0x18(%rbp)

# perf-core/perf-with-kcore script pt --call-trace
...
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms] ) kretprobe_perf_func
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms] ) trace_call_bpf
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms] ) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms] ) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806464725: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_get_current_pid_tgid
sleep 36660 [016] 1057036.806464725: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_ktime_get_ns
sleep 36660 [016] 1057036.806464725: ([kernel.kallsyms] ) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806464725: ([kernel.kallsyms] ) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806465045: (bpf_prog_da4fe6b3d2c29b25_trace_return ) __htab_map_lookup_elem
sleep 36660 [016] 1057036.806465366: ([kernel.kallsyms] ) memcmp
sleep 36660 [016] 1057036.806465687: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_probe_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) probe_kernel_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) __check_object_size
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) check_stack_object
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) copy_user_enhanced_fast_string
sleep 36660 [016] 1057036.806465687: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_probe_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) probe_kernel_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) __check_object_size
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) check_stack_object
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) copy_user_enhanced_fast_string
sleep 36660 [016] 1057036.806466008: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_get_current_uid_gid
sleep 36660 [016] 1057036.806466008: ([kernel.kallsyms] ) from_kgid
sleep 36660 [016] 1057036.806466008: ([kernel.kallsyms] ) from_kuid
sleep 36660 [016] 1057036.806466008: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_perf_event_output
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms] ) perf_event_output
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms] ) perf_prepare_sample
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms] ) perf_misc_flags
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms] ) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms] ) __x86_indirect_thunk_rax


It's also available in here:
git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
perf/fixes

thanks,
jirka


Cc: Jesper Dangaard Brouer <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: Stanislav Fomichev <[email protected]>
---
Jiri Olsa (11):
perf tools: Separate generic code in dso__data_file_size
perf tools: Separate generic code in dso_cache__read
perf tools: Simplify dso_cache__read function
perf tools: Add bpf dso read and size hooks
perf tools: Read also the end of the kernel
perf tools: Do not erase uncovered maps by kcore
perf tools: Fix side band thread draining
perf tools: Fix map reference counting
perf tools: Keep zero in pgoff bpf map
perf tools: Reuse shared eBPF dso objects
perf script: Pad dso name for --call-trace

Song Liu (1):
perf tools: Check maps for bpf programs

tools/include/linux/kernel.h | 1 +
tools/lib/vsprintf.c | 19 +++++++++++++++++++
tools/perf/builtin-script.c | 1 +
tools/perf/util/dso.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------
tools/perf/util/evlist.c | 14 +++++++++-----
tools/perf/util/machine.c | 45 +++++++++++++++++++++++++++++++--------------
tools/perf/util/map.c | 26 +++++++++++++++++++++++---
tools/perf/util/map.h | 4 +++-
tools/perf/util/symbol.c | 14 +++++++++++++-
tools/perf/util/symbol_conf.h | 1 +
tools/perf/util/symbol_fprintf.c | 1 +
11 files changed, 190 insertions(+), 60 deletions(-)


2019-04-16 16:03:10

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 04/12] perf tools: Add bpf dso read and size hooks

Adding bpf related code into dso reading paths.

Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/dso.c | 49 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 6fdff723e55a..0cc6702c6442 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -9,6 +9,8 @@
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
+#include <bpf/libbpf.h>
+#include "bpf-event.h"
#include "compress.h"
#include "namespaces.h"
#include "path.h"
@@ -706,6 +708,44 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
return false;
}

+static ssize_t bpf_read(struct dso *dso, u64 offset, char *data)
+{
+ struct bpf_prog_info_node *node;
+ ssize_t size = DSO__DATA_CACHE_SIZE;
+ u64 len;
+ u8 *buf;
+
+ node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
+ if (!node || !node->info_linear) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ return -1;
+ }
+
+ len = node->info_linear->info.jited_prog_len;
+ buf = (u8 *) node->info_linear->info.jited_prog_insns;
+
+ if (offset >= len)
+ return -1;
+
+ size = (ssize_t) min(len - offset, (u64) size);
+ memcpy(data, buf + offset, size);
+ return size;
+}
+
+static int bpf_size(struct dso *dso)
+{
+ struct bpf_prog_info_node *node;
+
+ node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
+ if (!node || !node->info_linear) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ return -1;
+ }
+
+ dso->data.file_size = node->info_linear->info.jited_prog_len;
+ return 0;
+}
+
static void
dso_cache__free(struct dso *dso)
{
@@ -831,7 +871,11 @@ dso_cache__read(struct dso *dso, struct machine *machine,
if (!cache)
return -ENOMEM;

- ret = file_read(dso, machine, cache_offset, cache->data);
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ ret = bpf_read(dso, cache_offset, cache->data);
+ else
+ ret = file_read(dso, machine, cache_offset, cache->data);
+
if (ret > 0) {
cache->offset = cache_offset;
cache->size = ret;
@@ -940,6 +984,9 @@ int dso__data_file_size(struct dso *dso, struct machine *machine)
if (dso->data.status == DSO_DATA_STATUS_ERROR)
return -1;

+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return bpf_size(dso);
+
return file_size(dso, machine);
}

--
2.17.2

2019-04-16 16:03:18

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 06/12] perf tools: Do not erase uncovered maps by kcore

Maps in kcore do not cover bpf maps, so we can't just
remove everything. Keeping all kernel maps, which are
not covered by kcore maps.

Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/symbol.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5cbad55cd99d..96738a7a8c14 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1166,6 +1166,18 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
return 0;
}

+static bool in_kcore(struct kcore_mapfn_data *md, struct map *map)
+{
+ struct map *iter;
+
+ list_for_each_entry(iter, &md->maps, node) {
+ if ((map->start >= iter->start) && (map->start < iter->end))
+ return true;
+ }
+
+ return false;
+}
+
static int dso__load_kcore(struct dso *dso, struct map *map,
const char *kallsyms_filename)
{
@@ -1222,7 +1234,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
while (old_map) {
struct map *next = map_groups__next(old_map);

- if (old_map != map)
+ if (old_map != map && !in_kcore(&md, old_map))
map_groups__remove(kmaps, old_map);
old_map = next;
}
--
2.17.2

2019-04-16 16:03:24

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 10/12] perf tools: Keep zero in pgoff bpf map

With pgoff set to zero, the map__map_ip function
will return bpf address based from 0, which is
what we need when we read the data from bpf dso.

Adding bpf symbols with mapped ip addresses as well.

Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/machine.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index ad0205fbb506..d4aa44489011 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -704,12 +704,12 @@ static int machine__process_ksymbol_register(struct machine *machine,
return -ENOMEM;

map->start = event->ksymbol_event.addr;
- map->pgoff = map->start;
map->end = map->start + event->ksymbol_event.len;
map_groups__insert(&machine->kmaps, map);
}

- sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len,
+ sym = symbol__new(map->map_ip(map, map->start),
+ event->ksymbol_event.len,
0, 0, event->ksymbol_event.name);
if (!sym)
return -ENOMEM;
--
2.17.2

2019-04-16 16:03:25

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 11/12] perf tools: Reuse shared eBPF dso objects

The eBPF program can be loaded multiple times
with the same name (tag). We can share dso
objects for those programs.

Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/machine.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index d4aa44489011..a60653827891 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -23,6 +23,7 @@
#include "linux/hash.h"
#include "asm/bug.h"
#include "bpf-event.h"
+#include "dso.h"

#include "sane_ctype.h"
#include <symbol/kallsyms.h>
@@ -699,11 +700,18 @@ static int machine__process_ksymbol_register(struct machine *machine,

map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
if (!map) {
- map = dso__new_map(event->ksymbol_event.name);
- if (!map)
+ struct dso *dso;
+
+ dso = dsos__findnew(&machine->dsos, event->ksymbol_event.name);
+ if (!dso)
return -ENOMEM;

- map->start = event->ksymbol_event.addr;
+ map = map__new2(event->ksymbol_event.addr, dso);
+ if (!map) {
+ dso__put(dso);
+ return -ENOMEM;
+ }
+
map->end = map->start + event->ksymbol_event.len;
map_groups__insert(&machine->kmaps, map);
}
--
2.17.2

2019-04-16 16:03:29

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 12/12] perf script: Pad dso name for --call-trace

so we don't have the indent screwed by different dso name lenghts,
as now for kernel there's also bpf code displayed.

# perf-with-kcore record pt -e intel_pt//ku -- sleep 1
# perf-core/perf-with-kcore script pt --call-trace

Before:
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms]) kretprobe_perf_func
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms]) trace_call_bpf
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms]) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms]) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806464725: (bpf_prog_da4fe6b3d2c29b25_trace_return) bpf_get_current_pid_tgid
sleep 36660 [016] 1057036.806464725: (bpf_prog_da4fe6b3d2c29b25_trace_return) bpf_ktime_get_ns
sleep 36660 [016] 1057036.806464725: ([kernel.kallsyms]) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806464725: ([kernel.kallsyms]) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806465045: (bpf_prog_da4fe6b3d2c29b25_trace_return) __htab_map_lookup_elem
sleep 36660 [016] 1057036.806465366: ([kernel.kallsyms]) memcmp
sleep 36660 [016] 1057036.806465687: (bpf_prog_da4fe6b3d2c29b25_trace_return) bpf_probe_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms]) probe_kernel_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms]) __check_object_size
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms]) check_stack_object
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms]) copy_user_enhanced_fast_string
sleep 36660 [016] 1057036.806465687: (bpf_prog_da4fe6b3d2c29b25_trace_return) bpf_probe_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms]) probe_kernel_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms]) __check_object_size
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms]) check_stack_object
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms]) copy_user_enhanced_fast_string
sleep 36660 [016] 1057036.806466008: (bpf_prog_da4fe6b3d2c29b25_trace_return) bpf_get_current_uid_gid
sleep 36660 [016] 1057036.806466008: ([kernel.kallsyms]) from_kgid
sleep 36660 [016] 1057036.806466008: ([kernel.kallsyms]) from_kuid
sleep 36660 [016] 1057036.806466008: (bpf_prog_da4fe6b3d2c29b25_trace_return) bpf_perf_event_output
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms]) perf_event_output
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms]) perf_prepare_sample
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms]) perf_misc_flags
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms]) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms]) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806466328: ([kvm]) kvm_is_in_guest
sleep 36660 [016] 1057036.806466649: ([kernel.kallsyms]) __perf_event_header__init_id.isra.0
sleep 36660 [016] 1057036.806466649: ([kernel.kallsyms]) perf_output_begin

After:
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms] ) kretprobe_perf_func
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms] ) trace_call_bpf
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms] ) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806464404: ([kernel.kallsyms] ) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806464725: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_get_current_pid_tgid
sleep 36660 [016] 1057036.806464725: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_ktime_get_ns
sleep 36660 [016] 1057036.806464725: ([kernel.kallsyms] ) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806464725: ([kernel.kallsyms] ) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806465045: (bpf_prog_da4fe6b3d2c29b25_trace_return ) __htab_map_lookup_elem
sleep 36660 [016] 1057036.806465366: ([kernel.kallsyms] ) memcmp
sleep 36660 [016] 1057036.806465687: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_probe_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) probe_kernel_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) __check_object_size
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) check_stack_object
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) copy_user_enhanced_fast_string
sleep 36660 [016] 1057036.806465687: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_probe_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) probe_kernel_read
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) __check_object_size
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) check_stack_object
sleep 36660 [016] 1057036.806465687: ([kernel.kallsyms] ) copy_user_enhanced_fast_string
sleep 36660 [016] 1057036.806466008: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_get_current_uid_gid
sleep 36660 [016] 1057036.806466008: ([kernel.kallsyms] ) from_kgid
sleep 36660 [016] 1057036.806466008: ([kernel.kallsyms] ) from_kuid
sleep 36660 [016] 1057036.806466008: (bpf_prog_da4fe6b3d2c29b25_trace_return ) bpf_perf_event_output
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms] ) perf_event_output
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms] ) perf_prepare_sample
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms] ) perf_misc_flags
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms] ) __x86_indirect_thunk_rax
sleep 36660 [016] 1057036.806466328: ([kernel.kallsyms] ) __x86_indirect_thunk_rax

Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/include/linux/kernel.h | 1 +
tools/lib/vsprintf.c | 19 +++++++++++++++++++
tools/perf/builtin-script.c | 1 +
tools/perf/util/map.c | 6 ++++++
tools/perf/util/symbol_conf.h | 1 +
5 files changed, 28 insertions(+)

diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 857d9e22826e..cba226948a0c 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -102,6 +102,7 @@

int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
int scnprintf(char * buf, size_t size, const char * fmt, ...);
+int scnprintf_pad(char * buf, size_t size, const char * fmt, ...);

#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))

diff --git a/tools/lib/vsprintf.c b/tools/lib/vsprintf.c
index e08ee147eab4..149a15013b23 100644
--- a/tools/lib/vsprintf.c
+++ b/tools/lib/vsprintf.c
@@ -23,3 +23,22 @@ int scnprintf(char * buf, size_t size, const char * fmt, ...)

return (i >= ssize) ? (ssize - 1) : i;
}
+
+int scnprintf_pad(char * buf, size_t size, const char * fmt, ...)
+{
+ ssize_t ssize = size;
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+
+ if (i < (int) size) {
+ for (; i < (int) size; i++)
+ buf[i] = ' ';
+ buf[i] = 0x0;
+ }
+
+ return (i >= ssize) ? (ssize - 1) : i;
+}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 61cfd8f70989..7adaa6c63a0b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3297,6 +3297,7 @@ static int parse_call_trace(const struct option *opt __maybe_unused,
parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0);
itrace_parse_synth_opts(opt, "cewp", 0);
symbol_conf.nanosecs = true;
+ symbol_conf.pad_output_len_dso = 50;
return 0;
}

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index ee71efb9db62..c3fbd6e556b0 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -405,6 +405,7 @@ size_t map__fprintf(struct map *map, FILE *fp)

size_t map__fprintf_dsoname(struct map *map, FILE *fp)
{
+ char buf[PATH_MAX];
const char *dsoname = "[unknown]";

if (map && map->dso) {
@@ -414,6 +415,11 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
dsoname = map->dso->name;
}

+ if (symbol_conf.pad_output_len_dso) {
+ scnprintf_pad(buf, symbol_conf.pad_output_len_dso, "%s", dsoname);
+ dsoname = buf;
+ }
+
return fprintf(fp, "%s", dsoname);
}

diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index 6c55fa6fccec..382ba63fc554 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -69,6 +69,7 @@ struct symbol_conf {
*tid_list;
const char *symfs;
int res_sample;
+ int pad_output_len_dso;
};

extern struct symbol_conf symbol_conf;
--
2.17.2

2019-04-16 16:03:39

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 08/12] perf tools: Fix side band thread draining

Current perf_evlist__poll_thread code could finish
without draining the data. Adding the logic that
makes sure we won't finish before the drain.

Fixes: 657ee5531903 ("perf evlist: Introduce side band thread")
Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/evlist.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index f2bbae38278d..4b6783ff5813 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1868,12 +1868,12 @@ static void *perf_evlist__poll_thread(void *arg)
{
struct perf_evlist *evlist = arg;
bool draining = false;
- int i;
+ int i, done = 0;
+
+ while (!done) {
+ bool got_data = false;

- while (draining || !(evlist->thread.done)) {
- if (draining)
- draining = false;
- else if (evlist->thread.done)
+ if (evlist->thread.done)
draining = true;

if (!draining)
@@ -1894,9 +1894,13 @@ static void *perf_evlist__poll_thread(void *arg)
pr_warning("cannot locate proper evsel for the side band event\n");

perf_mmap__consume(map);
+ got_data = true;
}
perf_mmap__read_done(map);
}
+
+ if (draining && !got_data)
+ break;
}
return NULL;
}
--
2.17.2

2019-04-16 16:03:52

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 09/12] perf tools: Fix map reference counting

By calling maps__insert we assume to get 2 references
on the map, which we relese within maps__remove call.

However if there's already same map name, we currently
don't bump the reference and can crash, like:

Program received signal SIGABRT, Aborted.
0x00007ffff75e60f5 in raise () from /lib64/libc.so.6

(gdb) bt
#0 0x00007ffff75e60f5 in raise () from /lib64/libc.so.6
#1 0x00007ffff75d0895 in abort () from /lib64/libc.so.6
#2 0x00007ffff75d0769 in __assert_fail_base.cold () from /lib64/libc.so.6
#3 0x00007ffff75de596 in __assert_fail () from /lib64/libc.so.6
#4 0x00000000004fc006 in refcount_sub_and_test (i=1, r=0x1224e88) at tools/include/linux/refcount.h:131
#5 refcount_dec_and_test (r=0x1224e88) at tools/include/linux/refcount.h:148
#6 map__put (map=0x1224df0) at util/map.c:299
#7 0x00000000004fdb95 in __maps__remove (map=0x1224df0, maps=0xb17d80) at util/map.c:953
#8 maps__remove (maps=0xb17d80, map=0x1224df0) at util/map.c:959
#9 0x00000000004f7d8a in map_groups__remove (map=<optimized out>, mg=<optimized out>) at util/map_groups.h:65
#10 machine__process_ksymbol_unregister (sample=<optimized out>, event=0x7ffff7279670, machine=<optimized out>) at util/machine.c:728
#11 machine__process_ksymbol (machine=<optimized out>, event=0x7ffff7279670, sample=<optimized out>) at util/machine.c:741
#12 0x00000000004fffbb in perf_session__deliver_event (session=0xb11390, event=0x7ffff7279670, tool=0x7fffffffc7b0, file_offset=13936) at util/session.c:1362
#13 0x00000000005039bb in do_flush (show_progress=false, oe=0xb17e80) at util/ordered-events.c:243
#14 __ordered_events__flush (oe=0xb17e80, how=OE_FLUSH__ROUND, timestamp=<optimized out>) at util/ordered-events.c:322
#15 0x00000000005005e4 in perf_session__process_user_event (session=session@entry=0xb11390, event=event@entry=0x7ffff72a4af8,
...

Adding the map on the list and getting the reference event
if we find the map with same name.

Cc: Song Liu <[email protected]>
Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/map.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 28d484ef74ae..ee71efb9db62 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -926,10 +926,8 @@ static void __maps__insert_name(struct maps *maps, struct map *map)
rc = strcmp(m->dso->short_name, map->dso->short_name);
if (rc < 0)
p = &(*p)->rb_left;
- else if (rc > 0)
- p = &(*p)->rb_right;
else
- return;
+ p = &(*p)->rb_right;
}
rb_link_node(&map->rb_node_name, parent, p);
rb_insert_color(&map->rb_node_name, &maps->names);
--
2.17.2

2019-04-16 16:04:00

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 07/12] perf tools: Check maps for bpf programs

From: Song Liu <[email protected]>

As reported by Jiri Olsa in:

"[BUG] perf: intel_pt won't display kernel function"
https://lore.kernel.org/lkml/20190403143738.GB32001@krava

Recent changes to support PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT
broke --kallsyms option. This is because it broke test __map__is_kmodule.

This patch fixes this by adding check for bpf program, so that these maps
are not mistaken as kernel modules.

Reported-by: Jiri Olsa <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Andrii Nakryiko <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: Martin KaFai Lau <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Song Liu <[email protected]>
Cc: Yonghong Song <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Fixes: 76193a94522f ("perf, bpf: Introduce PERF_RECORD_KSYMBOL")
Link: https://lore.kernel.org/lkml/[email protected]

Signed-off-by: Song Liu <[email protected]>
Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/map.c | 16 ++++++++++++++++
tools/perf/util/map.h | 4 +++-
2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e32628cd20a7..28d484ef74ae 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -261,6 +261,22 @@ bool __map__is_extra_kernel_map(const struct map *map)
return kmap && kmap->name[0];
}

+bool __map__is_bpf_prog(const struct map *map)
+{
+ const char *name;
+
+ if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return true;
+
+ /*
+ * If PERF_RECORD_BPF_EVENT is not included, the dso will not have
+ * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can
+ * guess the type based on name.
+ */
+ name = map->dso->short_name;
+ return name && (strstr(name, "bpf_prog_") == name);
+}
+
bool map__has_symbols(const struct map *map)
{
return dso__has_symbols(map->dso);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 0e20749f2c55..dc93787c74f0 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -159,10 +159,12 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,

bool __map__is_kernel(const struct map *map);
bool __map__is_extra_kernel_map(const struct map *map);
+bool __map__is_bpf_prog(const struct map *map);

static inline bool __map__is_kmodule(const struct map *map)
{
- return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
+ return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
+ !__map__is_bpf_prog(map);
}

bool map__has_symbols(const struct map *map);
--
2.17.2

2019-04-16 16:04:02

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 05/12] perf tools: Read also the end of the kernel

We mark the end of kernel based on the first module,
but that could cover some bpf program maps. Reading
_etext symbol if it's present to get precise kernel
map end.

Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/machine.c | 27 ++++++++++++++++++---------
1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 3c520baa198c..ad0205fbb506 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -924,7 +924,8 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};
* symbol_name if it's not that important.
*/
static int machine__get_running_kernel_start(struct machine *machine,
- const char **symbol_name, u64 *start)
+ const char **symbol_name,
+ u64 *start, u64 *end)
{
char filename[PATH_MAX];
int i, err = -1;
@@ -949,6 +950,11 @@ static int machine__get_running_kernel_start(struct machine *machine,
*symbol_name = name;

*start = addr;
+
+ err = kallsyms__get_function_start(filename, "_etext", &addr);
+ if (!err)
+ *end = addr;
+
return 0;
}

@@ -1440,7 +1446,7 @@ int machine__create_kernel_maps(struct machine *machine)
struct dso *kernel = machine__get_kernel(machine);
const char *name = NULL;
struct map *map;
- u64 addr = 0;
+ u64 start = 0, end = ~0ULL;
int ret;

if (kernel == NULL)
@@ -1459,9 +1465,9 @@ int machine__create_kernel_maps(struct machine *machine)
"continuing anyway...\n", machine->pid);
}

- if (!machine__get_running_kernel_start(machine, &name, &addr)) {
+ if (!machine__get_running_kernel_start(machine, &name, &start, &end)) {
if (name &&
- map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) {
+ map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, start)) {
machine__destroy_kernel_maps(machine);
ret = -1;
goto out_put;
@@ -1471,16 +1477,19 @@ int machine__create_kernel_maps(struct machine *machine)
* we have a real start address now, so re-order the kmaps
* assume it's the last in the kmaps
*/
- machine__update_kernel_mmap(machine, addr, ~0ULL);
+ machine__update_kernel_mmap(machine, start, end);
}

if (machine__create_extra_kernel_maps(machine, kernel))
pr_debug("Problems creating extra kernel maps, continuing anyway...\n");

- /* update end address of the kernel map using adjacent module address */
- map = map__next(machine__kernel_map(machine));
- if (map)
- machine__set_kernel_mmap(machine, addr, map->start);
+ if (end == ~0ULL) {
+ /* update end address of the kernel map using adjacent module address */
+ map = map__next(machine__kernel_map(machine));
+ if (map)
+ machine__set_kernel_mmap(machine, start, map->start);
+ }
+
out_put:
dso__put(kernel);
return ret;
--
2.17.2

2019-04-16 16:04:08

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 01/12] perf tools: Separate generic code in dso__data_file_size

Moving file specific code in dso__data_file_size function
into separate file_size function. I'll add bpf specific
code in following patches.

Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/dso.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index e059976d9d93..cb6199c1390a 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -898,18 +898,12 @@ static ssize_t cached_read(struct dso *dso, struct machine *machine,
return r;
}

-int dso__data_file_size(struct dso *dso, struct machine *machine)
+static int file_size(struct dso *dso, struct machine *machine)
{
int ret = 0;
struct stat st;
char sbuf[STRERR_BUFSIZE];

- if (dso->data.file_size)
- return 0;
-
- if (dso->data.status == DSO_DATA_STATUS_ERROR)
- return -1;
-
pthread_mutex_lock(&dso__data_open_lock);

/*
@@ -938,6 +932,17 @@ int dso__data_file_size(struct dso *dso, struct machine *machine)
return ret;
}

+int dso__data_file_size(struct dso *dso, struct machine *machine)
+{
+ if (dso->data.file_size)
+ return 0;
+
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ return file_size(dso, machine);
+}
+
/**
* dso__data_size - Return dso data size
* @dso: dso object
--
2.17.2

2019-04-16 16:04:10

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 02/12] perf tools: Separate generic code in dso_cache__read

Moving file specific code in dso_cache__read function
into separate file_read function. I'll add bpf specific
code in following patches.

Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/dso.c | 47 ++++++++++++++++++++++++-------------------
1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index cb6199c1390a..6baad22ec8a9 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -794,6 +794,30 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset,
return cache_size;
}

+static ssize_t file_read(struct dso *dso, struct machine *machine,
+ u64 offset, char *data)
+{
+ ssize_t ret;
+
+ pthread_mutex_lock(&dso__data_open_lock);
+
+ /*
+ * dso->data.fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
+
+ if (dso->data.fd < 0) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ return -errno;
+ }
+
+ ret = pread(dso->data.fd, data, DSO__DATA_CACHE_SIZE, offset);
+ pthread_mutex_unlock(&dso__data_open_lock);
+
+ return ret;
+}
+
static ssize_t
dso_cache__read(struct dso *dso, struct machine *machine,
u64 offset, u8 *data, ssize_t size)
@@ -803,37 +827,18 @@ dso_cache__read(struct dso *dso, struct machine *machine,
ssize_t ret;

do {
- u64 cache_offset;
+ u64 cache_offset = offset & DSO__DATA_CACHE_MASK;

cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
if (!cache)
return -ENOMEM;

- pthread_mutex_lock(&dso__data_open_lock);
-
- /*
- * dso->data.fd might be closed if other thread opened another
- * file (dso) due to open file limit (RLIMIT_NOFILE).
- */
- try_to_open_dso(dso, machine);
-
- if (dso->data.fd < 0) {
- ret = -errno;
- dso->data.status = DSO_DATA_STATUS_ERROR;
- break;
- }
-
- cache_offset = offset & DSO__DATA_CACHE_MASK;
-
- ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset);
- if (ret <= 0)
- break;
+ ret = file_read(dso, machine, cache_offset, cache->data);

cache->offset = cache_offset;
cache->size = ret;
} while (0);

- pthread_mutex_unlock(&dso__data_open_lock);

if (ret > 0) {
old = dso_cache__insert(dso, cache);
--
2.17.2

2019-04-16 16:05:26

by Jiri Olsa

[permalink] [raw]
Subject: [PATCH 03/12] perf tools: Simplify dso_cache__read function

There's no need for the while loop now, also we can
connect two (ret > 0) condition legs together.

Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/util/dso.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 6baad22ec8a9..6fdff723e55a 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -822,25 +822,20 @@ static ssize_t
dso_cache__read(struct dso *dso, struct machine *machine,
u64 offset, u8 *data, ssize_t size)
{
+ u64 cache_offset = offset & DSO__DATA_CACHE_MASK;
struct dso_cache *cache;
struct dso_cache *old;
ssize_t ret;

- do {
- u64 cache_offset = offset & DSO__DATA_CACHE_MASK;
-
- cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
- if (!cache)
- return -ENOMEM;
-
- ret = file_read(dso, machine, cache_offset, cache->data);
+ cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
+ if (!cache)
+ return -ENOMEM;

+ ret = file_read(dso, machine, cache_offset, cache->data);
+ if (ret > 0) {
cache->offset = cache_offset;
cache->size = ret;
- } while (0);
-

- if (ret > 0) {
old = dso_cache__insert(dso, cache);
if (old) {
/* we lose the race */
--
2.17.2

2019-04-16 17:18:31

by Stanislav Fomichev

[permalink] [raw]
Subject: Re: [PATCH 02/12] perf tools: Separate generic code in dso_cache__read

On 04/16, Jiri Olsa wrote:
> Moving file specific code in dso_cache__read function
> into separate file_read function. I'll add bpf specific
> code in following patches.
>
> Link: http://lkml.kernel.org/n/[email protected]
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> tools/perf/util/dso.c | 47 ++++++++++++++++++++++++-------------------
> 1 file changed, 26 insertions(+), 21 deletions(-)
>
> diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
> index cb6199c1390a..6baad22ec8a9 100644
> --- a/tools/perf/util/dso.c
> +++ b/tools/perf/util/dso.c
> @@ -794,6 +794,30 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset,
> return cache_size;
> }
>
> +static ssize_t file_read(struct dso *dso, struct machine *machine,
> + u64 offset, char *data)
> +{
> + ssize_t ret;
> +
> + pthread_mutex_lock(&dso__data_open_lock);
> +
> + /*
> + * dso->data.fd might be closed if other thread opened another
> + * file (dso) due to open file limit (RLIMIT_NOFILE).
> + */
> + try_to_open_dso(dso, machine);
> +
> + if (dso->data.fd < 0) {
> + dso->data.status = DSO_DATA_STATUS_ERROR;
pthread_mutex_unlock(&dso__data_open_lock) here?

> + return -errno;
> + }
> +
> + ret = pread(dso->data.fd, data, DSO__DATA_CACHE_SIZE, offset);
> + pthread_mutex_unlock(&dso__data_open_lock);
> +
> + return ret;
> +}
> +
> static ssize_t
> dso_cache__read(struct dso *dso, struct machine *machine,
> u64 offset, u8 *data, ssize_t size)
> @@ -803,37 +827,18 @@ dso_cache__read(struct dso *dso, struct machine *machine,
> ssize_t ret;
>
> do {
> - u64 cache_offset;
> + u64 cache_offset = offset & DSO__DATA_CACHE_MASK;
>
> cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
> if (!cache)
> return -ENOMEM;
>
> - pthread_mutex_lock(&dso__data_open_lock);
> -
> - /*
> - * dso->data.fd might be closed if other thread opened another
> - * file (dso) due to open file limit (RLIMIT_NOFILE).
> - */
> - try_to_open_dso(dso, machine);
> -
> - if (dso->data.fd < 0) {
> - ret = -errno;
> - dso->data.status = DSO_DATA_STATUS_ERROR;
> - break;
> - }
> -
> - cache_offset = offset & DSO__DATA_CACHE_MASK;
> -
> - ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset);
> - if (ret <= 0)
> - break;
> + ret = file_read(dso, machine, cache_offset, cache->data);
>
> cache->offset = cache_offset;
> cache->size = ret;
> } while (0);
>
> - pthread_mutex_unlock(&dso__data_open_lock);
>
> if (ret > 0) {
> old = dso_cache__insert(dso, cache);
> --
> 2.17.2
>

2019-04-16 18:22:39

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH 02/12] perf tools: Separate generic code in dso_cache__read

On Tue, Apr 16, 2019 at 10:17:13AM -0700, Stanislav Fomichev wrote:
> On 04/16, Jiri Olsa wrote:
> > Moving file specific code in dso_cache__read function
> > into separate file_read function. I'll add bpf specific
> > code in following patches.
> >
> > Link: http://lkml.kernel.org/n/[email protected]
> > Signed-off-by: Jiri Olsa <[email protected]>
> > ---
> > tools/perf/util/dso.c | 47 ++++++++++++++++++++++++-------------------
> > 1 file changed, 26 insertions(+), 21 deletions(-)
> >
> > diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
> > index cb6199c1390a..6baad22ec8a9 100644
> > --- a/tools/perf/util/dso.c
> > +++ b/tools/perf/util/dso.c
> > @@ -794,6 +794,30 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset,
> > return cache_size;
> > }
> >
> > +static ssize_t file_read(struct dso *dso, struct machine *machine,
> > + u64 offset, char *data)
> > +{
> > + ssize_t ret;
> > +
> > + pthread_mutex_lock(&dso__data_open_lock);
> > +
> > + /*
> > + * dso->data.fd might be closed if other thread opened another
> > + * file (dso) due to open file limit (RLIMIT_NOFILE).
> > + */
> > + try_to_open_dso(dso, machine);
> > +
> > + if (dso->data.fd < 0) {
> > + dso->data.status = DSO_DATA_STATUS_ERROR;
> pthread_mutex_unlock(&dso__data_open_lock) here?

oops, yea.. thanks

jirka

2019-04-16 19:33:20

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH 07/12] perf tools: Check maps for bpf programs

Em Tue, Apr 16, 2019 at 06:01:22PM +0200, Jiri Olsa escreveu:
> From: Song Liu <[email protected]>
>
> As reported by Jiri Olsa in:
>
> "[BUG] perf: intel_pt won't display kernel function"
> https://lore.kernel.org/lkml/20190403143738.GB32001@krava
>
> Recent changes to support PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT
> broke --kallsyms option. This is because it broke test __map__is_kmodule.
>
> This patch fixes this by adding check for bpf program, so that these maps
> are not mistaken as kernel modules.

Thanks, applied to perf/urgent.

- Arnaldo

2019-04-16 19:34:35

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH 08/12] perf tools: Fix side band thread draining

Em Tue, Apr 16, 2019 at 06:01:23PM +0200, Jiri Olsa escreveu:
> Current perf_evlist__poll_thread code could finish
> without draining the data. Adding the logic that
> makes sure we won't finish before the drain.
>
> Fixes: 657ee5531903 ("perf evlist: Introduce side band thread")

Thanks, applied to perf/urgent.

- Arnaldo

> Link: http://lkml.kernel.org/n/[email protected]
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> tools/perf/util/evlist.c | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index f2bbae38278d..4b6783ff5813 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -1868,12 +1868,12 @@ static void *perf_evlist__poll_thread(void *arg)
> {
> struct perf_evlist *evlist = arg;
> bool draining = false;
> - int i;
> + int i, done = 0;
> +
> + while (!done) {
> + bool got_data = false;
>
> - while (draining || !(evlist->thread.done)) {
> - if (draining)
> - draining = false;
> - else if (evlist->thread.done)
> + if (evlist->thread.done)
> draining = true;
>
> if (!draining)
> @@ -1894,9 +1894,13 @@ static void *perf_evlist__poll_thread(void *arg)
> pr_warning("cannot locate proper evsel for the side band event\n");
>
> perf_mmap__consume(map);
> + got_data = true;
> }
> perf_mmap__read_done(map);
> }
> +
> + if (draining && !got_data)
> + break;
> }
> return NULL;
> }
> --
> 2.17.2

--

- Arnaldo

2019-04-16 19:37:45

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH 09/12] perf tools: Fix map reference counting

Em Tue, Apr 16, 2019 at 06:01:24PM +0200, Jiri Olsa escreveu:
> By calling maps__insert we assume to get 2 references
> on the map, which we relese within maps__remove call.
>
> However if there's already same map name, we currently
> don't bump the reference and can crash, like:
>
> Program received signal SIGABRT, Aborted.
> 0x00007ffff75e60f5 in raise () from /lib64/libc.so.6
>
> (gdb) bt
> #0 0x00007ffff75e60f5 in raise () from /lib64/libc.so.6
> #1 0x00007ffff75d0895 in abort () from /lib64/libc.so.6
> #2 0x00007ffff75d0769 in __assert_fail_base.cold () from /lib64/libc.so.6
> #3 0x00007ffff75de596 in __assert_fail () from /lib64/libc.so.6
> #4 0x00000000004fc006 in refcount_sub_and_test (i=1, r=0x1224e88) at tools/include/linux/refcount.h:131
> #5 refcount_dec_and_test (r=0x1224e88) at tools/include/linux/refcount.h:148
> #6 map__put (map=0x1224df0) at util/map.c:299
> #7 0x00000000004fdb95 in __maps__remove (map=0x1224df0, maps=0xb17d80) at util/map.c:953
> #8 maps__remove (maps=0xb17d80, map=0x1224df0) at util/map.c:959
> #9 0x00000000004f7d8a in map_groups__remove (map=<optimized out>, mg=<optimized out>) at util/map_groups.h:65
> #10 machine__process_ksymbol_unregister (sample=<optimized out>, event=0x7ffff7279670, machine=<optimized out>) at util/machine.c:728
> #11 machine__process_ksymbol (machine=<optimized out>, event=0x7ffff7279670, sample=<optimized out>) at util/machine.c:741
> #12 0x00000000004fffbb in perf_session__deliver_event (session=0xb11390, event=0x7ffff7279670, tool=0x7fffffffc7b0, file_offset=13936) at util/session.c:1362
> #13 0x00000000005039bb in do_flush (show_progress=false, oe=0xb17e80) at util/ordered-events.c:243
> #14 __ordered_events__flush (oe=0xb17e80, how=OE_FLUSH__ROUND, timestamp=<optimized out>) at util/ordered-events.c:322
> #15 0x00000000005005e4 in perf_session__process_user_event (session=session@entry=0xb11390, event=event@entry=0x7ffff72a4af8,
> ...
>
> Adding the map on the list and getting the reference event
> if we find the map with same name.

Added:

Fixes: 1e6285699b30 ("perf symbols: Fix slowness due to -ffunction-section")

Ccing Eric Saint-Etienne <[email protected]>, the author of
the patch that introduced that function.

And applied to perf/urgent

> Cc: Song Liu <[email protected]>
> Link: http://lkml.kernel.org/n/[email protected]
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> tools/perf/util/map.c | 4 +---
> 1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
> index 28d484ef74ae..ee71efb9db62 100644
> --- a/tools/perf/util/map.c
> +++ b/tools/perf/util/map.c
> @@ -926,10 +926,8 @@ static void __maps__insert_name(struct maps *maps, struct map *map)
> rc = strcmp(m->dso->short_name, map->dso->short_name);
> if (rc < 0)
> p = &(*p)->rb_left;
> - else if (rc > 0)
> - p = &(*p)->rb_right;
> else
> - return;
> + p = &(*p)->rb_right;
> }
> rb_link_node(&map->rb_node_name, parent, p);
> rb_insert_color(&map->rb_node_name, &maps->names);
> --
> 2.17.2

--

- Arnaldo

2019-04-16 20:42:23

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH 08/12] perf tools: Fix side band thread draining



> On Apr 16, 2019, at 9:01 AM, Jiri Olsa <[email protected]> wrote:
>
> Current perf_evlist__poll_thread code could finish
> without draining the data. Adding the logic that
> makes sure we won't finish before the drain.
>
> Fixes: 657ee5531903 ("perf evlist: Introduce side band thread")
> Link: http://lkml.kernel.org/n/[email protected]
> Signed-off-by: Jiri Olsa <[email protected]>

Thanks for the fix!

Acked-by: Song Liu <[email protected]>

> ---
> tools/perf/util/evlist.c | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index f2bbae38278d..4b6783ff5813 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -1868,12 +1868,12 @@ static void *perf_evlist__poll_thread(void *arg)
> {
> struct perf_evlist *evlist = arg;
> bool draining = false;
> - int i;
> + int i, done = 0;
> +
> + while (!done) {
> + bool got_data = false;
>
> - while (draining || !(evlist->thread.done)) {
> - if (draining)
> - draining = false;
> - else if (evlist->thread.done)
> + if (evlist->thread.done)
> draining = true;
>
> if (!draining)
> @@ -1894,9 +1894,13 @@ static void *perf_evlist__poll_thread(void *arg)
> pr_warning("cannot locate proper evsel for the side band event\n");
>
> perf_mmap__consume(map);
> + got_data = true;
> }
> perf_mmap__read_done(map);
> }
> +
> + if (draining && !got_data)
> + break;
> }
> return NULL;
> }
> --
> 2.17.2
>

2019-04-17 06:38:06

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH 11/12] perf tools: Reuse shared eBPF dso objects

On 16/04/19 7:01 PM, Jiri Olsa wrote:
> The eBPF program can be loaded multiple times
> with the same name (tag). We can share dso
> objects for those programs.

Doesn't a eBPF program get recompiled differently every time it is loaded?

>
> Link: http://lkml.kernel.org/n/[email protected]
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> tools/perf/util/machine.c | 14 +++++++++++---
> 1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index d4aa44489011..a60653827891 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -23,6 +23,7 @@
> #include "linux/hash.h"
> #include "asm/bug.h"
> #include "bpf-event.h"
> +#include "dso.h"
>
> #include "sane_ctype.h"
> #include <symbol/kallsyms.h>
> @@ -699,11 +700,18 @@ static int machine__process_ksymbol_register(struct machine *machine,
>
> map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
> if (!map) {
> - map = dso__new_map(event->ksymbol_event.name);
> - if (!map)
> + struct dso *dso;
> +
> + dso = dsos__findnew(&machine->dsos, event->ksymbol_event.name);
> + if (!dso)
> return -ENOMEM;
>
> - map->start = event->ksymbol_event.addr;
> + map = map__new2(event->ksymbol_event.addr, dso);
> + if (!map) {
> + dso__put(dso);
> + return -ENOMEM;
> + }
> +
> map->end = map->start + event->ksymbol_event.len;
> map_groups__insert(&machine->kmaps, map);
> }
>

2019-04-17 06:52:40

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH 11/12] perf tools: Reuse shared eBPF dso objects

On Wed, Apr 17, 2019 at 09:35:32AM +0300, Adrian Hunter wrote:
> On 16/04/19 7:01 PM, Jiri Olsa wrote:
> > The eBPF program can be loaded multiple times
> > with the same name (tag). We can share dso
> > objects for those programs.
>
> Doesn't a eBPF program get recompiled differently every time it is loaded?

yes, but those with same tag are identical

jirka

>
> >
> > Link: http://lkml.kernel.org/n/[email protected]
> > Signed-off-by: Jiri Olsa <[email protected]>
> > ---
> > tools/perf/util/machine.c | 14 +++++++++++---
> > 1 file changed, 11 insertions(+), 3 deletions(-)
> >
> > diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> > index d4aa44489011..a60653827891 100644
> > --- a/tools/perf/util/machine.c
> > +++ b/tools/perf/util/machine.c
> > @@ -23,6 +23,7 @@
> > #include "linux/hash.h"
> > #include "asm/bug.h"
> > #include "bpf-event.h"
> > +#include "dso.h"
> >
> > #include "sane_ctype.h"
> > #include <symbol/kallsyms.h>
> > @@ -699,11 +700,18 @@ static int machine__process_ksymbol_register(struct machine *machine,
> >
> > map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
> > if (!map) {
> > - map = dso__new_map(event->ksymbol_event.name);
> > - if (!map)
> > + struct dso *dso;
> > +
> > + dso = dsos__findnew(&machine->dsos, event->ksymbol_event.name);
> > + if (!dso)
> > return -ENOMEM;
> >
> > - map->start = event->ksymbol_event.addr;
> > + map = map__new2(event->ksymbol_event.addr, dso);
> > + if (!map) {
> > + dso__put(dso);
> > + return -ENOMEM;
> > + }
> > +
> > map->end = map->start + event->ksymbol_event.len;
> > map_groups__insert(&machine->kmaps, map);
> > }
> >
>

2019-04-17 06:57:54

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH 11/12] perf tools: Reuse shared eBPF dso objects

On 17/04/19 9:51 AM, Jiri Olsa wrote:
> On Wed, Apr 17, 2019 at 09:35:32AM +0300, Adrian Hunter wrote:
>> On 16/04/19 7:01 PM, Jiri Olsa wrote:
>>> The eBPF program can be loaded multiple times
>>> with the same name (tag). We can share dso
>>> objects for those programs.
>>
>> Doesn't a eBPF program get recompiled differently every time it is loaded?
>
> yes, but those with same tag are identical

But won't recompiled eBPF programs be different due to blinded constants?
Or perhaps in the future other code randomization?

>
> jirka
>
>>
>>>
>>> Link: http://lkml.kernel.org/n/[email protected]
>>> Signed-off-by: Jiri Olsa <[email protected]>
>>> ---
>>> tools/perf/util/machine.c | 14 +++++++++++---
>>> 1 file changed, 11 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
>>> index d4aa44489011..a60653827891 100644
>>> --- a/tools/perf/util/machine.c
>>> +++ b/tools/perf/util/machine.c
>>> @@ -23,6 +23,7 @@
>>> #include "linux/hash.h"
>>> #include "asm/bug.h"
>>> #include "bpf-event.h"
>>> +#include "dso.h"
>>>
>>> #include "sane_ctype.h"
>>> #include <symbol/kallsyms.h>
>>> @@ -699,11 +700,18 @@ static int machine__process_ksymbol_register(struct machine *machine,
>>>
>>> map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
>>> if (!map) {
>>> - map = dso__new_map(event->ksymbol_event.name);
>>> - if (!map)
>>> + struct dso *dso;
>>> +
>>> + dso = dsos__findnew(&machine->dsos, event->ksymbol_event.name);
>>> + if (!dso)
>>> return -ENOMEM;
>>>
>>> - map->start = event->ksymbol_event.addr;
>>> + map = map__new2(event->ksymbol_event.addr, dso);
>>> + if (!map) {
>>> + dso__put(dso);
>>> + return -ENOMEM;
>>> + }
>>> +
>>> map->end = map->start + event->ksymbol_event.len;
>>> map_groups__insert(&machine->kmaps, map);
>>> }
>>>
>>
>

2019-04-17 07:35:09

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH 11/12] perf tools: Reuse shared eBPF dso objects

On Wed, Apr 17, 2019 at 09:55:25AM +0300, Adrian Hunter wrote:
> On 17/04/19 9:51 AM, Jiri Olsa wrote:
> > On Wed, Apr 17, 2019 at 09:35:32AM +0300, Adrian Hunter wrote:
> >> On 16/04/19 7:01 PM, Jiri Olsa wrote:
> >>> The eBPF program can be loaded multiple times
> >>> with the same name (tag). We can share dso
> >>> objects for those programs.
> >>
> >> Doesn't a eBPF program get recompiled differently every time it is loaded?
> >
> > yes, but those with same tag are identical
>
> But won't recompiled eBPF programs be different due to blinded constants?
> Or perhaps in the future other code randomization?

ah right.. that can happen, let's skip this one then

perhaps we could add bpf prog id to be part of the name
to make it unique.. I'll check

thanks,
jirka

>
> >
> > jirka
> >
> >>
> >>>
> >>> Link: http://lkml.kernel.org/n/[email protected]
> >>> Signed-off-by: Jiri Olsa <[email protected]>
> >>> ---
> >>> tools/perf/util/machine.c | 14 +++++++++++---
> >>> 1 file changed, 11 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> >>> index d4aa44489011..a60653827891 100644
> >>> --- a/tools/perf/util/machine.c
> >>> +++ b/tools/perf/util/machine.c
> >>> @@ -23,6 +23,7 @@
> >>> #include "linux/hash.h"
> >>> #include "asm/bug.h"
> >>> #include "bpf-event.h"
> >>> +#include "dso.h"
> >>>
> >>> #include "sane_ctype.h"
> >>> #include <symbol/kallsyms.h>
> >>> @@ -699,11 +700,18 @@ static int machine__process_ksymbol_register(struct machine *machine,
> >>>
> >>> map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
> >>> if (!map) {
> >>> - map = dso__new_map(event->ksymbol_event.name);
> >>> - if (!map)
> >>> + struct dso *dso;
> >>> +
> >>> + dso = dsos__findnew(&machine->dsos, event->ksymbol_event.name);
> >>> + if (!dso)
> >>> return -ENOMEM;
> >>>
> >>> - map->start = event->ksymbol_event.addr;
> >>> + map = map__new2(event->ksymbol_event.addr, dso);
> >>> + if (!map) {
> >>> + dso__put(dso);
> >>> + return -ENOMEM;
> >>> + }
> >>> +
> >>> map->end = map->start + event->ksymbol_event.len;
> >>> map_groups__insert(&machine->kmaps, map);
> >>> }
> >>>
> >>
> >
>

2019-04-17 16:59:18

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH 11/12] perf tools: Reuse shared eBPF dso objects



> On Apr 17, 2019, at 12:32 AM, Jiri Olsa <[email protected]> wrote:
>
> On Wed, Apr 17, 2019 at 09:55:25AM +0300, Adrian Hunter wrote:
>> On 17/04/19 9:51 AM, Jiri Olsa wrote:
>>> On Wed, Apr 17, 2019 at 09:35:32AM +0300, Adrian Hunter wrote:
>>>> On 16/04/19 7:01 PM, Jiri Olsa wrote:
>>>>> The eBPF program can be loaded multiple times
>>>>> with the same name (tag). We can share dso
>>>>> objects for those programs.
>>>>
>>>> Doesn't a eBPF program get recompiled differently every time it is loaded?
>>>
>>> yes, but those with same tag are identical
>>
>> But won't recompiled eBPF programs be different due to blinded constants?
>> Or perhaps in the future other code randomization?
>
> ah right.. that can happen, let's skip this one then
>
> perhaps we could add bpf prog id to be part of the name
> to make it unique.. I'll check
>
> thanks,
> jirka

I thought about similar optimizations. However, this is not easy.

Tag of a BPF program is calculated _before_ JiT, so we really cannot
say two programs with same tag are identical. Current implementation
iterates over all bpf program IDs, so same program id will not appear
twice in the same perf session.

I think the more realistic opportunity of optimizations comes from
sharing the dsos cross multiple perf.data files.

Thanks,
Song

>>
>>>
>>> jirka
>>>
>>>>
>>>>>
>>>>> Link: http://lkml.kernel.org/n/[email protected]
>>>>> Signed-off-by: Jiri Olsa <[email protected]>
>>>>> ---
>>>>> tools/perf/util/machine.c | 14 +++++++++++---
>>>>> 1 file changed, 11 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
>>>>> index d4aa44489011..a60653827891 100644
>>>>> --- a/tools/perf/util/machine.c
>>>>> +++ b/tools/perf/util/machine.c
>>>>> @@ -23,6 +23,7 @@
>>>>> #include "linux/hash.h"
>>>>> #include "asm/bug.h"
>>>>> #include "bpf-event.h"
>>>>> +#include "dso.h"
>>>>>
>>>>> #include "sane_ctype.h"
>>>>> #include <symbol/kallsyms.h>
>>>>> @@ -699,11 +700,18 @@ static int machine__process_ksymbol_register(struct machine *machine,
>>>>>
>>>>> map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
>>>>> if (!map) {
>>>>> - map = dso__new_map(event->ksymbol_event.name);
>>>>> - if (!map)
>>>>> + struct dso *dso;
>>>>> +
>>>>> + dso = dsos__findnew(&machine->dsos, event->ksymbol_event.name);
>>>>> + if (!dso)
>>>>> return -ENOMEM;
>>>>>
>>>>> - map->start = event->ksymbol_event.addr;
>>>>> + map = map__new2(event->ksymbol_event.addr, dso);
>>>>> + if (!map) {
>>>>> + dso__put(dso);
>>>>> + return -ENOMEM;
>>>>> + }
>>>>> +
>>>>> map->end = map->start + event->ksymbol_event.len;
>>>>> map_groups__insert(&machine->kmaps, map);
>>>>> }

Subject: [tip:perf/urgent] perf tools: Check maps for bpf programs

Commit-ID: a93e0b2365e81e5a5b61f25e269b5dc73d242cba
Gitweb: https://git.kernel.org/tip/a93e0b2365e81e5a5b61f25e269b5dc73d242cba
Author: Song Liu <[email protected]>
AuthorDate: Tue, 16 Apr 2019 18:01:22 +0200
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 17 Apr 2019 14:30:11 -0300

perf tools: Check maps for bpf programs

As reported by Jiri Olsa in:

"[BUG] perf: intel_pt won't display kernel function"
https://lore.kernel.org/lkml/20190403143738.GB32001@krava

Recent changes to support PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT
broke --kallsyms option. This is because it broke test __map__is_kmodule.

This patch fixes this by adding check for bpf program, so that these maps
are not mistaken as kernel modules.

Signed-off-by: Song Liu <[email protected]>
Reported-by: Jiri Olsa <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Andrii Nakryiko <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: Martin KaFai Lau <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Yonghong Song <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Fixes: 76193a94522f ("perf, bpf: Introduce PERF_RECORD_KSYMBOL")
Signed-off-by: Jiri Olsa <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/map.c | 16 ++++++++++++++++
tools/perf/util/map.h | 4 +++-
2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e32628cd20a7..28d484ef74ae 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -261,6 +261,22 @@ bool __map__is_extra_kernel_map(const struct map *map)
return kmap && kmap->name[0];
}

+bool __map__is_bpf_prog(const struct map *map)
+{
+ const char *name;
+
+ if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return true;
+
+ /*
+ * If PERF_RECORD_BPF_EVENT is not included, the dso will not have
+ * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can
+ * guess the type based on name.
+ */
+ name = map->dso->short_name;
+ return name && (strstr(name, "bpf_prog_") == name);
+}
+
bool map__has_symbols(const struct map *map)
{
return dso__has_symbols(map->dso);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 0e20749f2c55..dc93787c74f0 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -159,10 +159,12 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,

bool __map__is_kernel(const struct map *map);
bool __map__is_extra_kernel_map(const struct map *map);
+bool __map__is_bpf_prog(const struct map *map);

static inline bool __map__is_kmodule(const struct map *map)
{
- return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
+ return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
+ !__map__is_bpf_prog(map);
}

bool map__has_symbols(const struct map *map);

Subject: [tip:perf/urgent] perf evlist: Fix side band thread draining

Commit-ID: adc6257c4a6f23ff97dca8314fcd33828e2d8db5
Gitweb: https://git.kernel.org/tip/adc6257c4a6f23ff97dca8314fcd33828e2d8db5
Author: Jiri Olsa <[email protected]>
AuthorDate: Tue, 16 Apr 2019 18:01:23 +0200
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 17 Apr 2019 14:30:11 -0300

perf evlist: Fix side band thread draining

Current perf_evlist__poll_thread() code could finish without draining
the data. Adding the logic that makes sure we won't finish before the
drain.

Signed-off-by: Jiri Olsa <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Song Liu <[email protected]>
Fixes: 657ee5531903 ("perf evlist: Introduce side band thread")
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/evlist.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6689378ee577..51ead577533f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1868,12 +1868,12 @@ static void *perf_evlist__poll_thread(void *arg)
{
struct perf_evlist *evlist = arg;
bool draining = false;
- int i;
+ int i, done = 0;
+
+ while (!done) {
+ bool got_data = false;

- while (draining || !(evlist->thread.done)) {
- if (draining)
- draining = false;
- else if (evlist->thread.done)
+ if (evlist->thread.done)
draining = true;

if (!draining)
@@ -1894,9 +1894,13 @@ static void *perf_evlist__poll_thread(void *arg)
pr_warning("cannot locate proper evsel for the side band event\n");

perf_mmap__consume(map);
+ got_data = true;
}
perf_mmap__read_done(map);
}
+
+ if (draining && !got_data)
+ break;
}
return NULL;
}

Subject: [tip:perf/urgent] perf tools: Fix map reference counting

Commit-ID: b9abbdfa88024d52c8084d8f46ea4f161606c692
Gitweb: https://git.kernel.org/tip/b9abbdfa88024d52c8084d8f46ea4f161606c692
Author: Jiri Olsa <[email protected]>
AuthorDate: Tue, 16 Apr 2019 18:01:24 +0200
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 17 Apr 2019 14:30:11 -0300

perf tools: Fix map reference counting

By calling maps__insert() we assume to get 2 references on the map,
which we relese within maps__remove call.

However if there's already same map name, we currently don't bump the
reference and can crash, like:

Program received signal SIGABRT, Aborted.
0x00007ffff75e60f5 in raise () from /lib64/libc.so.6

(gdb) bt
#0 0x00007ffff75e60f5 in raise () from /lib64/libc.so.6
#1 0x00007ffff75d0895 in abort () from /lib64/libc.so.6
#2 0x00007ffff75d0769 in __assert_fail_base.cold () from /lib64/libc.so.6
#3 0x00007ffff75de596 in __assert_fail () from /lib64/libc.so.6
#4 0x00000000004fc006 in refcount_sub_and_test (i=1, r=0x1224e88) at tools/include/linux/refcount.h:131
#5 refcount_dec_and_test (r=0x1224e88) at tools/include/linux/refcount.h:148
#6 map__put (map=0x1224df0) at util/map.c:299
#7 0x00000000004fdb95 in __maps__remove (map=0x1224df0, maps=0xb17d80) at util/map.c:953
#8 maps__remove (maps=0xb17d80, map=0x1224df0) at util/map.c:959
#9 0x00000000004f7d8a in map_groups__remove (map=<optimized out>, mg=<optimized out>) at util/map_groups.h:65
#10 machine__process_ksymbol_unregister (sample=<optimized out>, event=0x7ffff7279670, machine=<optimized out>) at util/machine.c:728
#11 machine__process_ksymbol (machine=<optimized out>, event=0x7ffff7279670, sample=<optimized out>) at util/machine.c:741
#12 0x00000000004fffbb in perf_session__deliver_event (session=0xb11390, event=0x7ffff7279670, tool=0x7fffffffc7b0, file_offset=13936) at util/session.c:1362
#13 0x00000000005039bb in do_flush (show_progress=false, oe=0xb17e80) at util/ordered-events.c:243
#14 __ordered_events__flush (oe=0xb17e80, how=OE_FLUSH__ROUND, timestamp=<optimized out>) at util/ordered-events.c:322
#15 0x00000000005005e4 in perf_session__process_user_event (session=session@entry=0xb11390, event=event@entry=0x7ffff72a4af8,
...

Add the map to the list and getting the reference event if we find the
map with same name.

Signed-off-by: Jiri Olsa <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Daniel Borkmann <[email protected]>
Cc: Eric Saint-Etienne <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Song Liu <[email protected]>
Fixes: 1e6285699b30 ("perf symbols: Fix slowness due to -ffunction-section")
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/map.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 28d484ef74ae..ee71efb9db62 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -926,10 +926,8 @@ static void __maps__insert_name(struct maps *maps, struct map *map)
rc = strcmp(m->dso->short_name, map->dso->short_name);
if (rc < 0)
p = &(*p)->rb_left;
- else if (rc > 0)
- p = &(*p)->rb_right;
else
- return;
+ p = &(*p)->rb_right;
}
rb_link_node(&map->rb_node_name, parent, p);
rb_insert_color(&map->rb_node_name, &maps->names);

2019-04-23 09:35:02

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH 06/12] perf tools: Do not erase uncovered maps by kcore

On 16/04/19 7:01 PM, Jiri Olsa wrote:
> Maps in kcore do not cover bpf maps, so we can't just
> remove everything. Keeping all kernel maps, which are
> not covered by kcore maps.

Memory for jited-bpf is allocated from the same area that is used for
modules. In the case of /proc/kcore, that entire area is mapped, so there
won't be any bpf-maps that are not covered. For copies of kcore made by
'perf buildid-cache' the same would be true for any bpf that got allocated
in between modules.

But shouldn't the bpf map supersede the kcore map for the address range that
it maps? I guess that would mean splitting the kcore map, truncating the
first piece and inserting the bpf map in between.

>
> Link: http://lkml.kernel.org/n/[email protected]
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> tools/perf/util/symbol.c | 14 +++++++++++++-
> 1 file changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
> index 5cbad55cd99d..96738a7a8c14 100644
> --- a/tools/perf/util/symbol.c
> +++ b/tools/perf/util/symbol.c
> @@ -1166,6 +1166,18 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
> return 0;
> }
>
> +static bool in_kcore(struct kcore_mapfn_data *md, struct map *map)
> +{
> + struct map *iter;
> +
> + list_for_each_entry(iter, &md->maps, node) {
> + if ((map->start >= iter->start) && (map->start < iter->end))
> + return true;
> + }
> +
> + return false;
> +}
> +
> static int dso__load_kcore(struct dso *dso, struct map *map,
> const char *kallsyms_filename)
> {
> @@ -1222,7 +1234,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
> while (old_map) {
> struct map *next = map_groups__next(old_map);
>
> - if (old_map != map)
> + if (old_map != map && !in_kcore(&md, old_map))
> map_groups__remove(kmaps, old_map);
> old_map = next;
> }
>

2019-04-23 14:56:24

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH 06/12] perf tools: Do not erase uncovered maps by kcore

On Tue, Apr 23, 2019 at 12:32:12PM +0300, Adrian Hunter wrote:
> On 16/04/19 7:01 PM, Jiri Olsa wrote:
> > Maps in kcore do not cover bpf maps, so we can't just
> > remove everything. Keeping all kernel maps, which are
> > not covered by kcore maps.
>
> Memory for jited-bpf is allocated from the same area that is used for
> modules. In the case of /proc/kcore, that entire area is mapped, so there
> won't be any bpf-maps that are not covered. For copies of kcore made by
> 'perf buildid-cache' the same would be true for any bpf that got allocated
> in between modules.
>
> But shouldn't the bpf map supersede the kcore map for the address range that
> it maps? I guess that would mean splitting the kcore map, truncating the
> first piece and inserting the bpf map in between.

I haven't considered it could get in between modules,
I think you're right and we need to cut kcore maps
in case bpf is mapped within.. I'll submit new version

thanks,
jirka

>
> >
> > Link: http://lkml.kernel.org/n/[email protected]
> > Signed-off-by: Jiri Olsa <[email protected]>
> > ---
> > tools/perf/util/symbol.c | 14 +++++++++++++-
> > 1 file changed, 13 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
> > index 5cbad55cd99d..96738a7a8c14 100644
> > --- a/tools/perf/util/symbol.c
> > +++ b/tools/perf/util/symbol.c
> > @@ -1166,6 +1166,18 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
> > return 0;
> > }
> >
> > +static bool in_kcore(struct kcore_mapfn_data *md, struct map *map)
> > +{
> > + struct map *iter;
> > +
> > + list_for_each_entry(iter, &md->maps, node) {
> > + if ((map->start >= iter->start) && (map->start < iter->end))
> > + return true;
> > + }
> > +
> > + return false;
> > +}
> > +
> > static int dso__load_kcore(struct dso *dso, struct map *map,
> > const char *kallsyms_filename)
> > {
> > @@ -1222,7 +1234,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
> > while (old_map) {
> > struct map *next = map_groups__next(old_map);
> >
> > - if (old_map != map)
> > + if (old_map != map && !in_kcore(&md, old_map))
> > map_groups__remove(kmaps, old_map);
> > old_map = next;
> > }
> >
>