Hi Taeung,
On Wed, Mar 01, 2017 at 04:59:51AM +0900, Taeung Song wrote:
> Currently perf-annotate show wrong line numbers.
>
> For example,
> Actual source code is as below
>
> ...
> 21 };
> 22
> 23 unsigned int limited_wgt;
> 24
> 25 unsigned int get_cond_maxprice(int wgt)
> 26 {
> ...
>
> However, the output of perf-annotate is as below.
>
> 4 Disassembly of section .text:
>
> 6 0000000000400966 <get_cond_maxprice>:
> 7 get_cond_maxprice():
> 26 };
>
> 28 unsigned int limited_wgt;
>
> 30 unsigned int get_cond_maxprice(int wgt)
> 31 {
>
> The cause is the wrong way counting line numbers
> in symbol__parse_objdump_line().
> So remove wrong current code counting line number and
> use other method for it using functions related to addr2line
> instead of the output of '-l' of objdump.
Hmm.. do you think it's a bug of objdump or it's perf failing to parse
the line number correctly? I'd like to see the output of `objdump -l`
>
> However, despite the correct line numbers,
> we can't show proper source code view
> because of limitations from output of 'objdump -S'.
>
> So, next commit will resolve the limitations from 'objdump -S'
> with the new source code view.
It seems not related with this commit..
Thanks,
Namhyung
>
> Cc: Namhyung Kim <[email protected]>
> Cc: Jiri Olsa <[email protected]>
> Signed-off-by: Taeung Song <[email protected]>
> ---
> tools/perf/util/annotate.c | 59 +++++++++++++++++++++++++++-------------------
> 1 file changed, 35 insertions(+), 24 deletions(-)
>
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index 273f21f..42b752e 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -19,14 +19,12 @@
> #include "evsel.h"
> #include "block-range.h"
> #include "arch/common.h"
> -#include <regex.h>
> #include <pthread.h>
> #include <linux/bitops.h>
> #include <sys/utsname.h>
>
> const char *disassembler_style;
> const char *objdump_path;
> -static regex_t file_lineno;
>
> static struct ins_ops *ins__find(struct arch *arch, const char *name);
> static void ins__sort(struct arch *arch);
> @@ -814,7 +812,7 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp)
> }
>
> static struct disasm_line *disasm_line__new(s64 offset, char *line,
> - size_t privsize, int line_nr,
> + size_t privsize,
> struct arch *arch,
> struct map *map)
> {
> @@ -823,7 +821,6 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line,
> if (dl != NULL) {
> dl->offset = offset;
> dl->line = strdup(line);
> - dl->line_nr = line_nr;
> if (dl->line == NULL)
> goto out_delete;
>
> @@ -1121,6 +1118,29 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
> return 0;
> }
>
> +static int parse_srcline(char *srcline, char **path, int *line_nr)
> +{
> + char *sep;
> +
> + if (path != NULL)
> + *path = NULL;
> +
> + if (!strcmp(srcline, SRCLINE_UNKNOWN))
> + return -1;
> +
> + sep = strchr(srcline, ':');
> + if (sep) {
> + *sep = '\0';
> + if (path != NULL)
> + *path = srcline;
> + if (line_nr != NULL)
> + *line_nr = strtoul(++sep, NULL, 0);
> + } else
> + return -1;
> +
> + return 0;
> +}
> +
> /*
> * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw)
> * which looks like following
> @@ -1143,15 +1163,14 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
> */
> static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
> struct arch *arch,
> - FILE *file, size_t privsize,
> - int *line_nr)
> + FILE *file, size_t privsize)
> {
> struct annotation *notes = symbol__annotation(sym);
> struct disasm_line *dl;
> char *line = NULL, *parsed_line, *tmp, *tmp2, *c;
> size_t line_len;
> s64 line_ip, offset = -1;
> - regmatch_t match[2];
> + bool has_src_code = true;
>
> if (getline(&line, &line_len, file) < 0)
> return -1;
> @@ -1169,12 +1188,6 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
> line_ip = -1;
> parsed_line = line;
>
> - /* /filename:linenr ? Save line number and ignore. */
> - if (regexec(&file_lineno, line, 2, match, 0) == 0) {
> - *line_nr = atoi(line + match[1].rm_so);
> - return 0;
> - }
> -
> /*
> * Strip leading spaces:
> */
> @@ -1205,13 +1218,18 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
> parsed_line = tmp2 + 1;
> }
>
> - dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map);
> + dl = disasm_line__new(offset, parsed_line, privsize, arch, map);
> free(line);
> - (*line_nr)++;
>
> if (dl == NULL)
> return -1;
>
> + if (has_src_code && dl->offset != -1 && map->dso->symsrc_filename) {
> + if (parse_srcline(get_srcline(map->dso, line_ip, NULL, false),
> + NULL, &dl->line_nr) < 0)
> + has_src_code = false;
> + }
> +
> if (!disasm_line__has_offset(dl)) {
> dl->ops.target.offset = dl->ops.target.addr -
> map__rip_2objdump(map, sym->start);
> @@ -1235,11 +1253,6 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
> return 0;
> }
>
> -static __attribute__((constructor)) void symbol__init_regexpr(void)
> -{
> - regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED);
> -}
> -
> static void delete_last_nop(struct symbol *sym)
> {
> struct annotation *notes = symbol__annotation(sym);
> @@ -1360,7 +1373,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
> struct kcore_extract kce;
> bool delete_extract = false;
> int stdout_fd[2];
> - int lineno = 0;
> int nline;
> pid_t pid;
> int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
> @@ -1435,7 +1447,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
> snprintf(command, sizeof(command),
> "%s %s%s --start-address=0x%016" PRIx64
> " --stop-address=0x%016" PRIx64
> - " -l -d %s %s -C %s 2>/dev/null|grep -v %s|expand",
> + " -d %s %s -C %s 2>/dev/null|grep -v %s|expand",
> objdump_path ? objdump_path : "objdump",
> disassembler_style ? "-M " : "",
> disassembler_style ? disassembler_style : "",
> @@ -1482,8 +1494,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
>
> nline = 0;
> while (!feof(file)) {
> - if (symbol__parse_objdump_line(sym, map, arch, file, privsize,
> - &lineno) < 0)
> + if (symbol__parse_objdump_line(sym, map, arch, file, privsize) < 0)
> break;
> nline++;
> }
> --
> 2.7.4
>
On 03/01/2017 10:17 PM, Namhyung Kim wrote:
> Hi Taeung,
>
> On Wed, Mar 01, 2017 at 04:59:51AM +0900, Taeung Song wrote:
>> Currently perf-annotate show wrong line numbers.
>>
>> For example,
>> Actual source code is as below
>>
>> ...
>> 21 };
>> 22
>> 23 unsigned int limited_wgt;
>> 24
>> 25 unsigned int get_cond_maxprice(int wgt)
>> 26 {
>> ...
>>
>> However, the output of perf-annotate is as below.
>>
>> 4 Disassembly of section .text:
>>
>> 6 0000000000400966 <get_cond_maxprice>:
>> 7 get_cond_maxprice():
>> 26 };
>>
>> 28 unsigned int limited_wgt;
>>
>> 30 unsigned int get_cond_maxprice(int wgt)
>> 31 {
>>
>> The cause is the wrong way counting line numbers
>> in symbol__parse_objdump_line().
>> So remove wrong current code counting line number and
>> use other method for it using functions related to addr2line
>> instead of the output of '-l' of objdump.
>
> Hmm.. do you think it's a bug of objdump or it's perf failing to parse
> the line number correctly? I'd like to see the output of `objdump -l`
>
Both are ok.
'objdump -l' hasn't a bug related to line number
and perf's method parsing the line number is ok.
But symbol__parse_objdump_line() wrongly count line numbers
after parsing it as below.
1172 /* /filename:linenr ? Save line number and ignore. */
1173 if (regexec(&file_lineno, line, 2, match, 0) == 0) {
1174 *line_nr = atoi(line + match[1].rm_so);
1175 return 0;
1176 }
...
1208 dl = disasm_line__new(offset, parsed_line, privsize,
*line_nr, arch, map);
1209 free(line);
1210 (*line_nr)++;
Increasing line_nr each asm line is wrong method.
Because 'line_nr' means actual source code line number.
Sure, I can fix only the wrong counting way.
But the above parsing method(1172~1176) is never used because of 'grep -v'
in command as below.
(the grep already remove lines containing filename:linenr of output)
1435 snprintf(command, sizeof(command),
1436 "%s %s%s --start-address=0x%016" PRIx64
1437 " --stop-address=0x%016" PRIx64
1438 " -l -d %s %s -C %s 2>/dev/null|grep -v %s|expand",
1439 objdump_path ? objdump_path : "objdump",
1440 disassembler_style ? "-M " : "",
1441 disassembler_style ? disassembler_style : "",
1442 map__rip_2objdump(map, sym->start),
1443 map__rip_2objdump(map, sym->end),
1444 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw",
1445 symbol_conf.annotate_src ? "-S" : "",
1446 symfs_filename, symfs_filename);
Therefore, I think it is better to do three things
1) fix the wrong counting line number problem
2) remove unused the line number parsing method
3) In addtion, a bit reduce objdump dependency
using functions related to addr2line of perf.
What do you think about that ?
Is it bad idea ?
>>
>> However, despite the correct line numbers,
>> we can't show proper source code view
>> because of limitations from output of 'objdump -S'.
>>
>> So, next commit will resolve the limitations from 'objdump -S'
>> with the new source code view.
>
> It seems not related with this commit..
>
Okey, will remove the mention.
Thanks,
Taeung
+ Andi Kleen who wrote the code.
On Thu, Mar 02, 2017 at 03:05:14PM +0900, Taeung Song wrote:
>
>
> On 03/01/2017 10:17 PM, Namhyung Kim wrote:
> > Hi Taeung,
> >
> > On Wed, Mar 01, 2017 at 04:59:51AM +0900, Taeung Song wrote:
> > > Currently perf-annotate show wrong line numbers.
> > >
> > > For example,
> > > Actual source code is as below
> > >
> > > ...
> > > 21 };
> > > 22
> > > 23 unsigned int limited_wgt;
> > > 24
> > > 25 unsigned int get_cond_maxprice(int wgt)
> > > 26 {
> > > ...
> > >
> > > However, the output of perf-annotate is as below.
> > >
> > > 4 Disassembly of section .text:
> > >
> > > 6 0000000000400966 <get_cond_maxprice>:
> > > 7 get_cond_maxprice():
> > > 26 };
> > >
> > > 28 unsigned int limited_wgt;
> > >
> > > 30 unsigned int get_cond_maxprice(int wgt)
> > > 31 {
> > >
> > > The cause is the wrong way counting line numbers
> > > in symbol__parse_objdump_line().
> > > So remove wrong current code counting line number and
> > > use other method for it using functions related to addr2line
> > > instead of the output of '-l' of objdump.
> >
> > Hmm.. do you think it's a bug of objdump or it's perf failing to parse
> > the line number correctly? I'd like to see the output of `objdump -l`
> >
>
> Both are ok.
> 'objdump -l' hasn't a bug related to line number
> and perf's method parsing the line number is ok.
>
> But symbol__parse_objdump_line() wrongly count line numbers
> after parsing it as below.
>
> 1172 /* /filename:linenr ? Save line number and ignore. */
> 1173 if (regexec(&file_lineno, line, 2, match, 0) == 0) {
> 1174 *line_nr = atoi(line + match[1].rm_so);
> 1175 return 0;
> 1176 }
> ...
> 1208 dl = disasm_line__new(offset, parsed_line, privsize, *line_nr,
> arch, map);
> 1209 free(line);
> 1210 (*line_nr)++;
>
> Increasing line_nr each asm line is wrong method.
> Because 'line_nr' means actual source code line number.
Hmm.. ok. It looks like that it should reuse the old line_nr as is.
>
> Sure, I can fix only the wrong counting way.
> But the above parsing method(1172~1176) is never used because of 'grep -v'
> in command as below.
> (the grep already remove lines containing filename:linenr of output)
Right, but only if filename is same as binary name.
>
> 1435 snprintf(command, sizeof(command),
> 1436 "%s %s%s --start-address=0x%016" PRIx64
> 1437 " --stop-address=0x%016" PRIx64
> 1438 " -l -d %s %s -C %s 2>/dev/null|grep -v %s|expand",
> 1439 objdump_path ? objdump_path : "objdump",
> 1440 disassembler_style ? "-M " : "",
> 1441 disassembler_style ? disassembler_style : "",
> 1442 map__rip_2objdump(map, sym->start),
> 1443 map__rip_2objdump(map, sym->end),
> 1444 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw",
> 1445 symbol_conf.annotate_src ? "-S" : "",
> 1446 symfs_filename, symfs_filename);
>
> Therefore, I think it is better to do three things
>
> 1) fix the wrong counting line number problem
> 2) remove unused the line number parsing method
> 3) In addtion, a bit reduce objdump dependency
> using functions related to addr2line of perf.
>
> What do you think about that ?
> Is it bad idea ?
I think we need to fix 1) definitely, but not sure about 2) and 3).
If objdump could do all necessary works, why not use it? :)
Thanks,
Namhyung
>
> > >
> > > However, despite the correct line numbers,
> > > we can't show proper source code view
> > > because of limitations from output of 'objdump -S'.
> > >
> > > So, next commit will resolve the limitations from 'objdump -S'
> > > with the new source code view.
> >
> > It seems not related with this commit..
> >
>
> Okey, will remove the mention.
>
> Thanks,
> Taeung
On 03/03/2017 11:40 AM, Namhyung Kim wrote:
> + Andi Kleen who wrote the code.
>
> On Thu, Mar 02, 2017 at 03:05:14PM +0900, Taeung Song wrote:
>>
>>
>> On 03/01/2017 10:17 PM, Namhyung Kim wrote:
>>> Hi Taeung,
>>>
>>> On Wed, Mar 01, 2017 at 04:59:51AM +0900, Taeung Song wrote:
>>>> Currently perf-annotate show wrong line numbers.
>>>>
>>>> For example,
>>>> Actual source code is as below
>>>>
>>>> ...
>>>> 21 };
>>>> 22
>>>> 23 unsigned int limited_wgt;
>>>> 24
>>>> 25 unsigned int get_cond_maxprice(int wgt)
>>>> 26 {
>>>> ...
>>>>
>>>> However, the output of perf-annotate is as below.
>>>>
>>>> 4 Disassembly of section .text:
>>>>
>>>> 6 0000000000400966 <get_cond_maxprice>:
>>>> 7 get_cond_maxprice():
>>>> 26 };
>>>>
>>>> 28 unsigned int limited_wgt;
>>>>
>>>> 30 unsigned int get_cond_maxprice(int wgt)
>>>> 31 {
>>>>
>>>> The cause is the wrong way counting line numbers
>>>> in symbol__parse_objdump_line().
>>>> So remove wrong current code counting line number and
>>>> use other method for it using functions related to addr2line
>>>> instead of the output of '-l' of objdump.
>>>
>>> Hmm.. do you think it's a bug of objdump or it's perf failing to parse
>>> the line number correctly? I'd like to see the output of `objdump -l`
>>>
>>
>> Both are ok.
>> 'objdump -l' hasn't a bug related to line number
>> and perf's method parsing the line number is ok.
>>
>> But symbol__parse_objdump_line() wrongly count line numbers
>> after parsing it as below.
>>
>> 1172 /* /filename:linenr ? Save line number and ignore. */
>> 1173 if (regexec(&file_lineno, line, 2, match, 0) == 0) {
>> 1174 *line_nr = atoi(line + match[1].rm_so);
>> 1175 return 0;
>> 1176 }
>> ...
>> 1208 dl = disasm_line__new(offset, parsed_line, privsize, *line_nr,
>> arch, map);
>> 1209 free(line);
>> 1210 (*line_nr)++;
>>
>> Increasing line_nr each asm line is wrong method.
>> Because 'line_nr' means actual source code line number.
>
> Hmm.. ok. It looks like that it should reuse the old line_nr as is.
>
>>
>> Sure, I can fix only the wrong counting way.
>> But the above parsing method(1172~1176) is never used because of 'grep -v'
>> in command as below.
>> (the grep already remove lines containing filename:linenr of output)
>
> Right, but only if filename is same as binary name.
>
>>
>> 1435 snprintf(command, sizeof(command),
>> 1436 "%s %s%s --start-address=0x%016" PRIx64
>> 1437 " --stop-address=0x%016" PRIx64
>> 1438 " -l -d %s %s -C %s 2>/dev/null|grep -v %s|expand",
>> 1439 objdump_path ? objdump_path : "objdump",
>> 1440 disassembler_style ? "-M " : "",
>> 1441 disassembler_style ? disassembler_style : "",
>> 1442 map__rip_2objdump(map, sym->start),
>> 1443 map__rip_2objdump(map, sym->end),
>> 1444 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw",
>> 1445 symbol_conf.annotate_src ? "-S" : "",
>> 1446 symfs_filename, symfs_filename);
>>
>> Therefore, I think it is better to do three things
>>
>> 1) fix the wrong counting line number problem
>> 2) remove unused the line number parsing method
>> 3) In addtion, a bit reduce objdump dependency
>> using functions related to addr2line of perf.
>>
>> What do you think about that ?
>> Is it bad idea ?
>
> I think we need to fix 1) definitely, but not sure about 2) and 3).
> If objdump could do all necessary works, why not use it? :)
>
> Thanks,
> Namhyung
>
>
Okey! I'll concentrate on fixing 1)
,not removing objdump -l :)
Thanks,
Taeung