Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753571AbdF2PSd (ORCPT ); Thu, 29 Jun 2017 11:18:33 -0400 Received: from mail-pg0-f66.google.com ([74.125.83.66]:35237 "EHLO mail-pg0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753229AbdF2PS0 (ORCPT ); Thu, 29 Jun 2017 11:18:26 -0400 MIME-Version: 1.0 In-Reply-To: References: <1498619646-719-1-git-send-email-treeze.taeung@gmail.com> From: Namhyung Kim Date: Fri, 30 Jun 2017 00:18:24 +0900 X-Google-Sender-Auth: PtDJTpfFHnLhrKtRtyka-gHbqAs Message-ID: Subject: Re: [PATCH/RFC 1/4] perf annotate: Add --source-only option To: Taeung Song Cc: Arnaldo Carvalho de Melo , LKML , Milian Wolff , Jiri Olsa , Masami Hiramatsu , Adrian Hunter , Wang Nan , Jin Yao , Andi Kleen , Kim Phillips , David Ahern Content-Type: text/plain; charset="UTF-8" Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 20405 Lines: 563 Resend with a proper mail address.. On Fri, Jun 30, 2017 at 12:14 AM, Namhyung Kim wrote: > Hi Taeung, > > Thanks for doing this work! > > On Wed, Jun 28, 2017 at 12:14 PM, Taeung Song wrote: >> The option can show the result of performance analysis >> based on full source code per symbol(function). >> This view can be a precheck before assembly code level >> and be the additional useful view point. >> >> For example, if target symbol is 'hex2u64' of util/util.c, >> the output is like below. >> >> $ perf annotate --source-only --stdio -s hex2u64 >> Percent | Source code of util.c for cycles:ppp (42 samples) >> ----------------------------------------------------------------- >> 0.00 : 354 * While we find nice hex chars, build a long_val. >> 0.00 : 355 * Return number of chars processed. >> 0.00 : 356 */ >> 0.00 : 357 int hex2u64(const char *ptr, u64 *long_val) >> 2.38 : 358 { >> 2.38 : 359 const char *p = ptr; >> 0.00 : 360 *long_val = 0; >> 0.00 : 361 >> 30.95 : 362 while (*p) { >> 23.81 : 363 const int hex_val = hex(*p); >> 0.00 : 364 >> 14.29 : 365 if (hex_val < 0) >> 0.00 : 366 break; >> 0.00 : 367 >> 26.19 : 368 *long_val = (*long_val << 4) | hex_val; >> 0.00 : 369 p++; >> 0.00 : 370 } >> 0.00 : 371 >> 0.00 : 372 return p - ptr; >> 0.00 : 373 } >> >> Suggested-by: Namhyung Kim >> Cc: Milian Wolff >> Cc: Jiri Olsa >> Cc: Masami Hiramatsu >> Cc: Adrian Hunter >> Cc: Wang Nan >> Cc: Jin Yao >> Cc: Andi Kleen >> Cc: Kim Phillips >> Cc: David Ahern >> Signed-off-by: Taeung Song >> --- >> tools/perf/builtin-annotate.c | 2 + >> tools/perf/ui/browsers/annotate.c | 5 - >> tools/perf/util/annotate.c | 303 +++++++++++++++++++++++++++++++++++++- >> tools/perf/util/annotate.h | 22 +++ >> tools/perf/util/symbol.c | 1 + >> tools/perf/util/symbol.h | 1 + >> 6 files changed, 325 insertions(+), 9 deletions(-) >> >> diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c >> index 7a5dc7e..054c74f 100644 >> --- a/tools/perf/builtin-annotate.c >> +++ b/tools/perf/builtin-annotate.c >> @@ -436,6 +436,8 @@ int cmd_annotate(int argc, const char **argv) >> symbol__config_symfs), >> OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, >> "Interleave source code with assembly code (default)"), >> + OPT_BOOLEAN(0, "source-only", &symbol_conf.annotate_src_only, >> + "Display source code for each symbol"), >> OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, >> "Display raw encoding of assembly instructions (default)"), >> OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", >> diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c >> index 27f41f2..b075a32 100644 >> --- a/tools/perf/ui/browsers/annotate.c >> +++ b/tools/perf/ui/browsers/annotate.c >> @@ -14,11 +14,6 @@ >> #include >> #include >> >> -struct disasm_line_samples { >> - double percent; >> - u64 nr; >> -}; >> - >> #define IPC_WIDTH 6 >> #define CYCLES_WIDTH 6 >> >> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c >> index be1caab..4108520 100644 >> --- a/tools/perf/util/annotate.c >> +++ b/tools/perf/util/annotate.c >> @@ -1379,6 +1379,277 @@ static const char *annotate__norm_arch(const char *arch_name) >> return normalize_arch((char *)arch_name); >> } >> >> +static int parse_srcline(char *srcline, char **path, int *line_nr) >> +{ >> + char *sep; >> + >> + if (srcline == NULL || !strcmp(srcline, SRCLINE_UNKNOWN)) >> + return -1; >> + >> + sep = strchr(srcline, ':'); >> + if (sep) { >> + *sep = '\0'; >> + if (path) >> + *path = srcline; >> + *line_nr = strtoul(++sep, NULL, 0); >> + } else >> + return -1; >> + >> + return 0; >> +} >> + >> +static void code_line__free(struct code_line *cl) >> +{ >> + zfree(&cl->line); >> + zfree(&cl->matched_dl); >> + zfree(&cl->samples_sum); >> + free(cl); >> +} >> + >> +static void code_lines__free(struct list_head *code_lines) >> +{ >> + struct code_line *pos, *tmp; >> + >> + if (list_empty(code_lines)) >> + return; >> + >> + list_for_each_entry_safe(pos, tmp, code_lines, node) { >> + list_del_init(&pos->node); >> + code_line__free(pos); >> + } >> +} >> + >> +static int symbol__free_source_code(struct symbol *sym) >> +{ >> + struct annotation *notes = symbol__annotation(sym); >> + struct source_code *code = notes->src->code; >> + >> + if (code == NULL) >> + return -1; >> + >> + code_lines__free(&code->lines); >> + zfree(&code->path); >> + zfree(¬es->src->code); >> + return 0; >> +} >> + >> +static void code_line__sum_samples(struct code_line *cl, struct disasm_line *dl, >> + struct annotation *notes, struct perf_evsel *evsel) >> +{ >> + int i; >> + u64 nr_samples; >> + struct sym_hist *h; >> + struct source_code *code = notes->src->code; >> + >> + for (i = 0; i < code->nr_events; i++) { >> + double percent = 0.0; >> + >> + h = annotation__histogram(notes, evsel->idx + i); >> + nr_samples = h->addr[dl->offset]; >> + if (h->sum) >> + percent = 100.0 * nr_samples / h->sum; >> + >> + cl->samples_sum[i].percent += percent; >> + cl->samples_sum[i].nr += nr_samples; >> + } >> +} >> + >> +static void source_code__print(struct code_line *cl, int nr_events, >> + struct annotation *notes, struct perf_evsel *evsel) >> +{ >> + int i; >> + const char *color; >> + double percent, max_percent = 0.0; >> + >> + for (i = 0; i < cl->nr_matched_dl; i++) >> + code_line__sum_samples(cl, cl->matched_dl[i], notes, evsel); >> + >> + for (i = 0; i < nr_events; i++) { >> + percent = cl->samples_sum[i].percent; >> + color = get_percent_color(percent); >> + if (max_percent < percent) >> + max_percent = percent; >> + >> + if (symbol_conf.show_total_period) >> + color_fprintf(stdout, color, " %7" PRIu64, >> + cl->samples_sum[i].nr); >> + else >> + color_fprintf(stdout, color, " %7.2f", percent); >> + } >> + color = get_percent_color(max_percent); >> + color_fprintf(stdout, color, " : %d %s\n", >> + cl->line_nr, cl->line); >> +} >> + >> +static int code_line__match_with_dl(struct symbol *sym, struct map *map, >> + struct code_line *cl) >> +{ >> + int nr_dl = 0; >> + size_t allocated = sizeof(struct disasm_line **); > > I don't like the name 'allocated'. It's a size of a pointer to > disasm_line - so should be single "*" not "**" IMHO. What about just > 'sizeof_dl'? > > >> + u64 start = map__rip_2objdump(map, sym->start); >> + struct annotation *notes = symbol__annotation(sym); >> + struct disasm_line *pos, **matched_dl = zalloc(allocated); > > You can simply initialize the 'matched_dl' to NULL ... > >> + >> + if (!matched_dl) >> + return -1; >> + >> + list_for_each_entry(pos, ¬es->src->source, node) { >> + int line_nr; >> + u64 offset; >> + char *srcline; >> + >> + if (pos->offset == -1) >> + continue; >> + >> + offset = pos->offset + start; >> + srcline = get_srcline(map->dso, offset, NULL, false, true); >> + >> + if (parse_srcline(srcline, NULL, &line_nr) < 0) >> + continue; >> + >> + if (line_nr == cl->line_nr) { >> + if (nr_dl > 0) { >> + struct disasm_line **tmp = >> + realloc(matched_dl, allocated * (nr_dl + 1)); > > ... since realloc() can handle NULL. So the if statement is not needed. > > Also I think the srcline might be from a different source file. You'd > be better checking the filename as well. > >> + >> + if (!tmp) { >> + free(matched_dl); >> + return -1; >> + } >> + matched_dl = tmp; >> + } >> + matched_dl[nr_dl++] = pos; >> + } >> + >> + if (nr_dl && (line_nr != cl->line_nr)) >> + break; > > Hmm.. you seem to assume that the disasm lines having a same srcline > is contiguous. But it would not be true for an optimized code.. > > >> + } >> + >> + cl->matched_dl = matched_dl; >> + cl->nr_matched_dl = nr_dl; >> + return 0; >> +} >> + >> +static struct code_line *code_line__new(char *line, int linenr, int nr_events) >> +{ >> + struct code_line *cl = zalloc(sizeof(*cl)); >> + >> + if (!cl) >> + return NULL; >> + >> + cl->line_nr = linenr; >> + cl->line = strdup(line); > > This also can fail. > >> + cl->samples_sum = >> + zalloc(sizeof(struct disasm_line_samples) * nr_events); >> + if (!cl->samples_sum) >> + zfree(&cl); >> + >> + return cl; >> +} >> + >> +static int source_code__collect(struct symbol *sym, struct map *map, >> + struct source_code *code, char *path, >> + int start_linenr, int end_linenr) >> +{ >> + int ret = -1, linenr = 0; >> + char *line = NULL, *parsed_line; >> + size_t len; >> + FILE *file; >> + struct code_line *cl; >> + >> + file = fopen(path, "r"); >> + if (!file) >> + return -1; >> + >> + if (srcline_full_filename) >> + code->path = strdup(path); >> + else >> + code->path = strdup(basename(path)); >> + >> + INIT_LIST_HEAD(&code->lines); >> + while (!feof(file)) { >> + if (getline(&line, &len, file) < 0) >> + goto out_err; >> + >> + if (++linenr < start_linenr) >> + continue; >> + >> + parsed_line = rtrim(line); >> + cl = code_line__new(parsed_line, linenr, code->nr_events); >> + if (!cl) >> + goto out_err; >> + >> + if (code_line__match_with_dl(sym, map, cl) < 0) >> + goto out_err; >> + >> + list_add_tail(&cl->node, &code->lines); >> + if (linenr == end_linenr) { >> + ret = 0; >> + goto out; >> + } >> + } >> +out_err: >> + code_lines__free(&code->lines); >> +out: >> + free(line); >> + fclose(file); >> + return ret; >> +} >> + >> +static int symbol__get_source_code(struct symbol *sym, struct map *map, >> + struct perf_evsel *evsel) >> +{ >> + int start_linenr, end_linenr, ret = -1; >> + char *path, *start_srcline = NULL, *end_srcline = NULL; >> + u64 start = map__rip_2objdump(map, sym->start); >> + u64 end = map__rip_2objdump(map, sym->end - 1); >> + struct annotation *notes = symbol__annotation(sym); >> + struct source_code *code; >> + >> + if (map->dso->symsrc_filename) { >> + bool tmp = srcline_full_filename; >> + >> + srcline_full_filename = true; >> + start_srcline = get_srcline(map->dso, start, NULL, false, true); >> + end_srcline = get_srcline(map->dso, end, NULL, false, true); > > I'm not sure it's guaranteed that the last instruction is always at > the last line of the source code. > > Thanks, > Namhyung > > >> + srcline_full_filename = tmp; >> + } else >> + return -1; >> + >> + if (parse_srcline(start_srcline, &path, &start_linenr) < 0) >> + goto out; >> + if (parse_srcline(end_srcline, &path, &end_linenr) < 0) >> + goto out; >> + >> + code = zalloc(sizeof(struct source_code)); >> + if (code == NULL) >> + goto out; >> + >> + if (perf_evsel__is_group_event(evsel)) >> + code->nr_events = evsel->nr_members; >> + else >> + code->nr_events = 1; >> + >> + /* To roughly read a function header */ >> + if (start_linenr > 4) >> + start_linenr -= 4; >> + else >> + start_linenr = 1; >> + >> + if (source_code__collect(sym, map, code, path, start_linenr, >> + end_linenr) < 0) { >> + zfree(&code); >> + goto out; >> + } >> + >> + ret = 0; >> + notes->src->code = code; >> +out: >> + free_srcline(start_srcline); >> + free_srcline(end_srcline); >> + return ret; >> +} >> + >> int symbol__disassemble(struct symbol *sym, struct map *map, >> const char *arch_name, size_t privsize, >> struct arch **parch) >> @@ -1509,7 +1780,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map, >> >> if (nline == 0) >> pr_err("No output from %s\n", command); >> - >> /* >> * kallsyms does not have symbol sizes so there may a nop at the end. >> * Remove it. >> @@ -1768,6 +2038,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, >> struct sym_hist *h = annotation__histogram(notes, evsel->idx); >> struct disasm_line *pos, *queue = NULL; >> u64 start = map__rip_2objdump(map, sym->start); >> + bool src_code_only = false; >> int printed = 2, queue_len = 0; >> int more = 0; >> u64 len; >> @@ -1788,8 +2059,14 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, >> if (perf_evsel__is_group_event(evsel)) >> width *= evsel->nr_members; >> >> - graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n", >> - width, width, "Percent", d_filename, evsel_name, h->sum); >> + if (symbol_conf.annotate_src_only && notes->src->has_src_code) >> + src_code_only = true; >> + >> + graph_dotted_len = printf(" %-*.*s| %s of %s for %s (%" PRIu64 " samples)\n", >> + width, width, "Percent", >> + src_code_only ? "Source code" : "Source code & Disassembly", >> + src_code_only ? notes->src->code->path : d_filename, >> + evsel_name, h->sum); >> >> printf("%-*.*s----\n", >> graph_dotted_len, graph_dotted_len, graph_dotted_line); >> @@ -1797,6 +2074,16 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, >> if (verbose > 0) >> symbol__annotate_hits(sym, evsel); >> >> + if (src_code_only) { >> + struct source_code *code = notes->src->code; >> + struct code_line *cl; >> + >> + list_for_each_entry(cl, &code->lines, node) >> + source_code__print(cl, code->nr_events, notes, evsel); >> + >> + goto out; >> + } >> + >> list_for_each_entry(pos, ¬es->src->source, node) { >> if (context && queue == NULL) { >> queue = pos; >> @@ -1833,7 +2120,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, >> break; >> } >> } >> - >> +out: >> + printf("\n"); >> free(filename); >> >> return more; >> @@ -1903,6 +2191,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, >> bool full_paths, int min_pcnt, int max_lines) >> { >> struct dso *dso = map->dso; >> + struct annotation *notes = symbol__annotation(sym); >> struct rb_root source_line = RB_ROOT; >> u64 len; >> >> @@ -1918,11 +2207,17 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, >> print_summary(&source_line, dso->long_name); >> } >> >> + if (symbol_conf.annotate_src_only && >> + symbol__get_source_code(sym, map, evsel) == 0) >> + notes->src->has_src_code = true; >> + >> symbol__annotate_printf(sym, map, evsel, full_paths, >> min_pcnt, max_lines, 0); >> if (print_lines) >> symbol__free_source_line(sym, len); >> >> + if (notes->src->has_src_code) >> + symbol__free_source_code(sym); >> disasm__purge(&symbol__annotation(sym)->src->source); >> >> return 0; >> diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h >> index 2105503..3513807 100644 >> --- a/tools/perf/util/annotate.h >> +++ b/tools/perf/util/annotate.h >> @@ -56,6 +56,11 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands * >> >> struct annotation; >> >> +struct disasm_line_samples { >> + double percent; >> + u64 nr; >> +}; >> + >> struct disasm_line { >> struct list_head node; >> s64 offset; >> @@ -95,6 +100,21 @@ struct cyc_hist { >> u16 reset; >> }; >> >> +struct code_line { >> + struct list_head node; >> + int line_nr; >> + char *line; >> + int nr_matched_dl; >> + struct disasm_line **matched_dl; >> + struct disasm_line_samples *samples_sum; >> +}; >> + >> +struct source_code { >> + char *path; >> + int nr_events; >> + struct list_head lines; >> +}; >> + >> struct source_line_samples { >> double percent; >> double percent_sum; >> @@ -123,7 +143,9 @@ struct source_line { >> */ >> struct annotated_source { >> struct list_head source; >> + struct source_code *code; >> struct source_line *lines; >> + bool has_src_code; >> int nr_histograms; >> size_t sizeof_sym_hist; >> struct cyc_hist *cycles_hist; >> diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c >> index e7a98db..ca8dc6e 100644 >> --- a/tools/perf/util/symbol.c >> +++ b/tools/perf/util/symbol.c >> @@ -38,6 +38,7 @@ struct symbol_conf symbol_conf = { >> .use_modules = true, >> .try_vmlinux_path = true, >> .annotate_src = true, >> + .annotate_src_only = false, >> .demangle = true, >> .demangle_kernel = false, >> .cumulate_callchain = true, >> diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h >> index 41ebba9..a7ba20b 100644 >> --- a/tools/perf/util/symbol.h >> +++ b/tools/perf/util/symbol.h >> @@ -108,6 +108,7 @@ struct symbol_conf { >> kptr_restrict, >> annotate_asm_raw, >> annotate_src, >> + annotate_src_only, >> event_group, >> demangle, >> demangle_kernel, >> -- >> 2.7.4 >>