2022-02-03 18:55:46

by Eelco Chaudron

[permalink] [raw]
Subject: [PATCH] perf scripting python: expose symbol offset and source information

This change adds the symbol offset to the data exported for each
call-chain entry. This can not be calculated from the script and
only the ip value, and no related mmap information.

In addition, also export the source file and line information, if
available, to avoid an external lookup if this information is needed.

Signed-off-by: Eelco Chaudron <[email protected]>
---
.../util/scripting-engines/trace-event-python.c | 42 ++++++++++++++------
1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index e752e1f4a5f0..0f392b4ff663 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -392,6 +392,18 @@ static const char *get_dsoname(struct map *map)
return dsoname;
}

+static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
+{
+ unsigned long offset;
+
+ if (al->addr < sym->end)
+ offset = al->addr - sym->start;
+ else
+ offset = al->addr - al->map->start - sym->start;
+
+ return offset;
+}
+
static PyObject *python_process_callchain(struct perf_sample *sample,
struct evsel *evsel,
struct addr_location *al)
@@ -443,6 +455,24 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
_PyUnicode_FromStringAndSize(node->ms.sym->name,
node->ms.sym->namelen));
pydict_set_item_string_decref(pyelem, "sym", pysym);
+
+ if (node->ms.map) {
+ struct map *map = node->ms.map;
+ struct addr_location node_al;
+ unsigned long offset;
+
+ node_al.addr = map->map_ip(map, node->ip);
+ node_al.map = map;
+ offset = get_offset(node->ms.sym, &node_al);
+
+ pydict_set_item_string_decref(
+ pyelem, "sym_off",
+ PyLong_FromUnsignedLongLong(offset));
+ }
+ if (node->srcline && strcmp(":0", node->srcline))
+ pydict_set_item_string_decref(
+ pyelem, "sym_srcline",
+ _PyUnicode_FromString(node->srcline));
}

if (node->ms.map) {
@@ -520,18 +550,6 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
return pylist;
}

-static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
-{
- unsigned long offset;
-
- if (al->addr < sym->end)
- offset = al->addr - sym->start;
- else
- offset = al->addr - al->map->start - sym->start;
-
- return offset;
-}
-
static int get_symoff(struct symbol *sym, struct addr_location *al,
bool print_off, char *bf, int size)
{


2022-02-10 09:29:38

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH] perf scripting python: expose symbol offset and source information

On Thu, Feb 03, 2022 at 09:44:33AM -0500, Eelco Chaudron wrote:
> This change adds the symbol offset to the data exported for each
> call-chain entry. This can not be calculated from the script and
> only the ip value, and no related mmap information.
>
> In addition, also export the source file and line information, if
> available, to avoid an external lookup if this information is needed.

could you please update Documentation/perf-script-python.txt with that?

any example script under scripts/python would be great

thanks,
jirka

>
> Signed-off-by: Eelco Chaudron <[email protected]>
> ---
> .../util/scripting-engines/trace-event-python.c | 42 ++++++++++++++------
> 1 file changed, 30 insertions(+), 12 deletions(-)
>
> diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
> index e752e1f4a5f0..0f392b4ff663 100644
> --- a/tools/perf/util/scripting-engines/trace-event-python.c
> +++ b/tools/perf/util/scripting-engines/trace-event-python.c
> @@ -392,6 +392,18 @@ static const char *get_dsoname(struct map *map)
> return dsoname;
> }
>
> +static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
> +{
> + unsigned long offset;
> +
> + if (al->addr < sym->end)
> + offset = al->addr - sym->start;
> + else
> + offset = al->addr - al->map->start - sym->start;
> +
> + return offset;
> +}
> +
> static PyObject *python_process_callchain(struct perf_sample *sample,
> struct evsel *evsel,
> struct addr_location *al)
> @@ -443,6 +455,24 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
> _PyUnicode_FromStringAndSize(node->ms.sym->name,
> node->ms.sym->namelen));
> pydict_set_item_string_decref(pyelem, "sym", pysym);
> +
> + if (node->ms.map) {
> + struct map *map = node->ms.map;
> + struct addr_location node_al;
> + unsigned long offset;
> +
> + node_al.addr = map->map_ip(map, node->ip);
> + node_al.map = map;
> + offset = get_offset(node->ms.sym, &node_al);
> +
> + pydict_set_item_string_decref(
> + pyelem, "sym_off",
> + PyLong_FromUnsignedLongLong(offset));
> + }
> + if (node->srcline && strcmp(":0", node->srcline))
> + pydict_set_item_string_decref(
> + pyelem, "sym_srcline",
> + _PyUnicode_FromString(node->srcline));

nit missing { } for multiline if code

> }
>
> if (node->ms.map) {
> @@ -520,18 +550,6 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
> return pylist;
> }
>
> -static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
> -{
> - unsigned long offset;
> -
> - if (al->addr < sym->end)
> - offset = al->addr - sym->start;
> - else
> - offset = al->addr - al->map->start - sym->start;
> -
> - return offset;
> -}
> -
> static int get_symoff(struct symbol *sym, struct addr_location *al,
> bool print_off, char *bf, int size)
> {
>

2022-02-22 17:52:19

by Eelco Chaudron

[permalink] [raw]
Subject: Re: [PATCH] perf scripting python: expose symbol offset and source information



On 10 Feb 2022, at 8:52, Jiri Olsa wrote:

> On Thu, Feb 03, 2022 at 09:44:33AM -0500, Eelco Chaudron wrote:
>> This change adds the symbol offset to the data exported for each
>> call-chain entry. This can not be calculated from the script and
>> only the ip value, and no related mmap information.
>>
>> In addition, also export the source file and line information, if
>> available, to avoid an external lookup if this information is needed.
>
> could you please update Documentation/perf-script-python.txt with that?
>
> any example script under scripts/python would be great

Looks like there is no documentation at all regarding the call chains, only the auto-generate script.
So rather than trying to describe this, I decided to update the auto-generated script to include this information as an example.

The current output, if it included a callchain is:

[ffffffff99e03f0e] syscall_trace_enter
[ffffffff99e03f0e] syscall_trace_enter
[ffffffff99e043a9] do_syscall_64
[ffffffff9a8000ad] entry_SYSCALL_64_after_hwframe
[7f5b552280db] __GI_getrusage
[17a4d78] getrusage_thread
[17a4d78] refresh_rusage
[17a4d78] time_poll
[178f683] poll_block
[16bceb5] udpif_revalidator
[177b71c] ovsthread_wrapper
[7f5b57a66179] start_thread
[7f5b55231dc2] __GI___clone

The new output will be (dependend on the information available):

[ffffffff99e0392f] syscall_slow_exit_work+0xaf ([kernel.kallsyms])
[ffffffff99e0392f] syscall_slow_exit_work+0xaf ([kernel.kallsyms])
[ffffffff99e043d0] do_syscall_64+0x170 ([kernel.kallsyms])
[ffffffff9a8000ad] entry_SYSCALL_64_after_hwframe+0x65 ([kernel.kallsyms])
[7f5b55226a41] __GI___poll+0x51 (/usr/lib64/libc-2.28.so)
[17a4c50] time_poll+0x190 (/usr/sbin/ovs-vswitchd) timeval.c:326
[178f683] poll_block+0x83 (/usr/sbin/ovs-vswitchd) poll-loop.c:364
[16bceb5] udpif_revalidator+0x185 (/usr/sbin/ovs-vswitchd) ofproto-dpif-upcall.c:1024
[177b71c] ovsthread_wrapper+0x5c (/usr/sbin/ovs-vswitchd) ovs-thread.c:422
[7f5b57a66179] start_thread+0xe9 (/usr/lib64/libpthread-2.28.so)
[7f5b55231dc2] __GI___clone+0x42 (/usr/lib64/libc-2.28.so)

I will send out the v2 soon.

>>
>> Signed-off-by: Eelco Chaudron <[email protected]>
>> ---
>> .../util/scripting-engines/trace-event-python.c | 42 ++++++++++++++------
>> 1 file changed, 30 insertions(+), 12 deletions(-)
>>
>> diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
>> index e752e1f4a5f0..0f392b4ff663 100644
>> --- a/tools/perf/util/scripting-engines/trace-event-python.c
>> +++ b/tools/perf/util/scripting-engines/trace-event-python.c
>> @@ -392,6 +392,18 @@ static const char *get_dsoname(struct map *map)
>> return dsoname;
>> }
>>
>> +static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
>> +{
>> + unsigned long offset;
>> +
>> + if (al->addr < sym->end)
>> + offset = al->addr - sym->start;
>> + else
>> + offset = al->addr - al->map->start - sym->start;
>> +
>> + return offset;
>> +}
>> +
>> static PyObject *python_process_callchain(struct perf_sample *sample,
>> struct evsel *evsel,
>> struct addr_location *al)
>> @@ -443,6 +455,24 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
>> _PyUnicode_FromStringAndSize(node->ms.sym->name,
>> node->ms.sym->namelen));
>> pydict_set_item_string_decref(pyelem, "sym", pysym);
>> +
>> + if (node->ms.map) {
>> + struct map *map = node->ms.map;
>> + struct addr_location node_al;
>> + unsigned long offset;
>> +
>> + node_al.addr = map->map_ip(map, node->ip);
>> + node_al.map = map;
>> + offset = get_offset(node->ms.sym, &node_al);
>> +
>> + pydict_set_item_string_decref(
>> + pyelem, "sym_off",
>> + PyLong_FromUnsignedLongLong(offset));
>> + }
>> + if (node->srcline && strcmp(":0", node->srcline))
>> + pydict_set_item_string_decref(
>> + pyelem, "sym_srcline",
>> + _PyUnicode_FromString(node->srcline));
>
> nit missing { } for multiline if code
>

Will fix in v2
>> }
>>
>> if (node->ms.map) {
>> @@ -520,18 +550,6 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
>> return pylist;
>> }
>>
>> -static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
>> -{
>> - unsigned long offset;
>> -
>> - if (al->addr < sym->end)
>> - offset = al->addr - sym->start;
>> - else
>> - offset = al->addr - al->map->start - sym->start;
>> -
>> - return offset;
>> -}
>> -
>> static int get_symoff(struct symbol *sym, struct addr_location *al,
>> bool print_off, char *bf, int size)
>> {
>>