2022-02-22 15:22:09

by Eelco Chaudron

[permalink] [raw]
Subject: [PATCH v2] perf scripting python: expose symbol offset and source information

This change adds the symbol offset to the data exported for each
call-chain entry. This can not be calculated from the script and
only the ip value, and no related mmap information.

In addition, also export the source file and line information, if
available, to avoid an external lookup if this information is needed.

Signed-off-by: Eelco Chaudron <[email protected]>
---
v2:
- Fixed small code nit
- Included new features in auto generated scripts

.../util/scripting-engines/trace-event-python.c | 49 +++++++++++++++-----
1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index e752e1f4a5f0..86a9c8614231 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -392,6 +392,18 @@ static const char *get_dsoname(struct map *map)
return dsoname;
}

+static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
+{
+ unsigned long offset;
+
+ if (al->addr < sym->end)
+ offset = al->addr - sym->start;
+ else
+ offset = al->addr - al->map->start - sym->start;
+
+ return offset;
+}
+
static PyObject *python_process_callchain(struct perf_sample *sample,
struct evsel *evsel,
struct addr_location *al)
@@ -443,6 +455,25 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
_PyUnicode_FromStringAndSize(node->ms.sym->name,
node->ms.sym->namelen));
pydict_set_item_string_decref(pyelem, "sym", pysym);
+
+ if (node->ms.map) {
+ struct map *map = node->ms.map;
+ struct addr_location node_al;
+ unsigned long offset;
+
+ node_al.addr = map->map_ip(map, node->ip);
+ node_al.map = map;
+ offset = get_offset(node->ms.sym, &node_al);
+
+ pydict_set_item_string_decref(
+ pyelem, "sym_off",
+ PyLong_FromUnsignedLongLong(offset));
+ }
+ if (node->srcline && strcmp(":0", node->srcline)) {
+ pydict_set_item_string_decref(
+ pyelem, "sym_srcline",
+ _PyUnicode_FromString(node->srcline));
+ }
}

if (node->ms.map) {
@@ -520,18 +551,6 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
return pylist;
}

-static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
-{
- unsigned long offset;
-
- if (al->addr < sym->end)
- offset = al->addr - sym->start;
- else
- offset = al->addr - al->map->start - sym->start;
-
- return offset;
-}
-
static int get_symoff(struct symbol *sym, struct addr_location *al,
bool print_off, char *bf, int size)
{
@@ -2073,7 +2092,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile

fprintf(ofp, "\t\tfor node in common_callchain:");
fprintf(ofp, "\n\t\t\tif 'sym' in node:");
- fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))");
+ fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x] %%s%%s%%s%%s\" %% (");
+ fprintf(ofp, "\n\t\t\t\t\tnode['ip'], node['sym']['name'],");
+ fprintf(ofp, "\n\t\t\t\t\t\"+0x{:x}\".format(node['sym_off']) if 'sym_off' in node else \"\",");
+ fprintf(ofp, "\n\t\t\t\t\t\" ({})\".format(node['dso']) if 'dso' in node else \"\",");
+ fprintf(ofp, "\n\t\t\t\t\t\" \" + node['sym_srcline'] if 'sym_srcline' in node else \"\"))");
fprintf(ofp, "\n\t\t\telse:");
fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
fprintf(ofp, "\t\tprint()\n\n");


2022-04-08 18:40:00

by Eelco Chaudron

[permalink] [raw]
Subject: Re: [PATCH v2] perf scripting python: expose symbol offset and source information



On 22 Feb 2022, at 16:11, Eelco Chaudron wrote:

> This change adds the symbol offset to the data exported for each
> call-chain entry. This can not be calculated from the script and
> only the ip value, and no related mmap information.
>
> In addition, also export the source file and line information, if
> available, to avoid an external lookup if this information is needed.
>
> Signed-off-by: Eelco Chaudron <[email protected]>

Hi All, was wondering if this patch got lost?

> ---
> v2:
> - Fixed small code nit
> - Included new features in auto generated scripts
>
> .../util/scripting-engines/trace-event-python.c | 49 +++++++++++++++-----
> 1 file changed, 36 insertions(+), 13 deletions(-)
>
> diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
> index e752e1f4a5f0..86a9c8614231 100644
> --- a/tools/perf/util/scripting-engines/trace-event-python.c
> +++ b/tools/perf/util/scripting-engines/trace-event-python.c
> @@ -392,6 +392,18 @@ static const char *get_dsoname(struct map *map)
> return dsoname;
> }
>
> +static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
> +{
> + unsigned long offset;
> +
> + if (al->addr < sym->end)
> + offset = al->addr - sym->start;
> + else
> + offset = al->addr - al->map->start - sym->start;
> +
> + return offset;
> +}
> +
> static PyObject *python_process_callchain(struct perf_sample *sample,
> struct evsel *evsel,
> struct addr_location *al)
> @@ -443,6 +455,25 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
> _PyUnicode_FromStringAndSize(node->ms.sym->name,
> node->ms.sym->namelen));
> pydict_set_item_string_decref(pyelem, "sym", pysym);
> +
> + if (node->ms.map) {
> + struct map *map = node->ms.map;
> + struct addr_location node_al;
> + unsigned long offset;
> +
> + node_al.addr = map->map_ip(map, node->ip);
> + node_al.map = map;
> + offset = get_offset(node->ms.sym, &node_al);
> +
> + pydict_set_item_string_decref(
> + pyelem, "sym_off",
> + PyLong_FromUnsignedLongLong(offset));
> + }
> + if (node->srcline && strcmp(":0", node->srcline)) {
> + pydict_set_item_string_decref(
> + pyelem, "sym_srcline",
> + _PyUnicode_FromString(node->srcline));
> + }
> }
>
> if (node->ms.map) {
> @@ -520,18 +551,6 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
> return pylist;
> }
>
> -static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
> -{
> - unsigned long offset;
> -
> - if (al->addr < sym->end)
> - offset = al->addr - sym->start;
> - else
> - offset = al->addr - al->map->start - sym->start;
> -
> - return offset;
> -}
> -
> static int get_symoff(struct symbol *sym, struct addr_location *al,
> bool print_off, char *bf, int size)
> {
> @@ -2073,7 +2092,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile
>
> fprintf(ofp, "\t\tfor node in common_callchain:");
> fprintf(ofp, "\n\t\t\tif 'sym' in node:");
> - fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))");
> + fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x] %%s%%s%%s%%s\" %% (");
> + fprintf(ofp, "\n\t\t\t\t\tnode['ip'], node['sym']['name'],");
> + fprintf(ofp, "\n\t\t\t\t\t\"+0x{:x}\".format(node['sym_off']) if 'sym_off' in node else \"\",");
> + fprintf(ofp, "\n\t\t\t\t\t\" ({})\".format(node['dso']) if 'dso' in node else \"\",");
> + fprintf(ofp, "\n\t\t\t\t\t\" \" + node['sym_srcline'] if 'sym_srcline' in node else \"\"))");
> fprintf(ofp, "\n\t\t\telse:");
> fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
> fprintf(ofp, "\t\tprint()\n\n");

2022-04-11 03:42:22

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v2] perf scripting python: expose symbol offset and source information

Em Fri, Apr 08, 2022 at 04:18:38PM +0200, Eelco Chaudron escreveu:
>
>
> On 22 Feb 2022, at 16:11, Eelco Chaudron wrote:
>
> > This change adds the symbol offset to the data exported for each
> > call-chain entry. This can not be calculated from the script and
> > only the ip value, and no related mmap information.
> >
> > In addition, also export the source file and line information, if
> > available, to avoid an external lookup if this information is needed.
> >
> > Signed-off-by: Eelco Chaudron <[email protected]>
>
> Hi All, was wondering if this patch got lost?

Applied now to perf/core, heading to 5.19,

- Arnaldo

> > ---
> > v2:
> > - Fixed small code nit
> > - Included new features in auto generated scripts
> >
> > .../util/scripting-engines/trace-event-python.c | 49 +++++++++++++++-----
> > 1 file changed, 36 insertions(+), 13 deletions(-)
> >
> > diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
> > index e752e1f4a5f0..86a9c8614231 100644
> > --- a/tools/perf/util/scripting-engines/trace-event-python.c
> > +++ b/tools/perf/util/scripting-engines/trace-event-python.c
> > @@ -392,6 +392,18 @@ static const char *get_dsoname(struct map *map)
> > return dsoname;
> > }
> >
> > +static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
> > +{
> > + unsigned long offset;
> > +
> > + if (al->addr < sym->end)
> > + offset = al->addr - sym->start;
> > + else
> > + offset = al->addr - al->map->start - sym->start;
> > +
> > + return offset;
> > +}
> > +
> > static PyObject *python_process_callchain(struct perf_sample *sample,
> > struct evsel *evsel,
> > struct addr_location *al)
> > @@ -443,6 +455,25 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
> > _PyUnicode_FromStringAndSize(node->ms.sym->name,
> > node->ms.sym->namelen));
> > pydict_set_item_string_decref(pyelem, "sym", pysym);
> > +
> > + if (node->ms.map) {
> > + struct map *map = node->ms.map;
> > + struct addr_location node_al;
> > + unsigned long offset;
> > +
> > + node_al.addr = map->map_ip(map, node->ip);
> > + node_al.map = map;
> > + offset = get_offset(node->ms.sym, &node_al);
> > +
> > + pydict_set_item_string_decref(
> > + pyelem, "sym_off",
> > + PyLong_FromUnsignedLongLong(offset));
> > + }
> > + if (node->srcline && strcmp(":0", node->srcline)) {
> > + pydict_set_item_string_decref(
> > + pyelem, "sym_srcline",
> > + _PyUnicode_FromString(node->srcline));
> > + }
> > }
> >
> > if (node->ms.map) {
> > @@ -520,18 +551,6 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
> > return pylist;
> > }
> >
> > -static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
> > -{
> > - unsigned long offset;
> > -
> > - if (al->addr < sym->end)
> > - offset = al->addr - sym->start;
> > - else
> > - offset = al->addr - al->map->start - sym->start;
> > -
> > - return offset;
> > -}
> > -
> > static int get_symoff(struct symbol *sym, struct addr_location *al,
> > bool print_off, char *bf, int size)
> > {
> > @@ -2073,7 +2092,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile
> >
> > fprintf(ofp, "\t\tfor node in common_callchain:");
> > fprintf(ofp, "\n\t\t\tif 'sym' in node:");
> > - fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))");
> > + fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x] %%s%%s%%s%%s\" %% (");
> > + fprintf(ofp, "\n\t\t\t\t\tnode['ip'], node['sym']['name'],");
> > + fprintf(ofp, "\n\t\t\t\t\t\"+0x{:x}\".format(node['sym_off']) if 'sym_off' in node else \"\",");
> > + fprintf(ofp, "\n\t\t\t\t\t\" ({})\".format(node['dso']) if 'dso' in node else \"\",");
> > + fprintf(ofp, "\n\t\t\t\t\t\" \" + node['sym_srcline'] if 'sym_srcline' in node else \"\"))");
> > fprintf(ofp, "\n\t\t\telse:");
> > fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
> > fprintf(ofp, "\t\tprint()\n\n");

--

- Arnaldo