Hi,
These patches are a proposal to fulfill the following point of perf's TODO list
(https://perf.wiki.kernel.org/index.php/Todo):
* What I want is that if I am on bar*(), it annotates bar*(), no samples just
the call site (obtained from the callchain) dissassembly. This is useful
because in many cases there maybe multiple call sites within a function and
there maybe inlines in between. Hard to track down if you cannot figure out
the surrounding addresses of the call site. (Request made by Stephane
Eranian).
These patches are still at an early stage:
* Per-callchain annotation is only available in perf-report;
* Tests were not performed on real-world applications but on small basic ones.
Alexis.
Alexis Berlemont (3):
perf annotate: implement per-callchain annotation histogram
perf hists browser: add callchain-specific annotation
perf report: fill per-callchain symbol annotation histograms
tools/perf/builtin-report.c | 8 ++
tools/perf/ui/browsers/hists.c | 188 ++++++++++++++++++++++++-
tools/perf/util/annotate.c | 308 ++++++++++++++++++++++++++++++++++++++++-
tools/perf/util/annotate.h | 19 +++
4 files changed, 517 insertions(+), 6 deletions(-)
--
2.12.2
A symbol can be called from various points and according to its calling
context, it can provide different results.
This patch creates one histogram for every different callchain recorded
and accumulates profiling samples into the appropriate one. Samples
recorded with the same callchain are not mixed with other ones, The
noise should be reduced.
Signed-off-by: Alexis Berlemont <[email protected]>
---
tools/perf/util/annotate.c | 308 ++++++++++++++++++++++++++++++++++++++++++++-
tools/perf/util/annotate.h | 19 +++
2 files changed, 324 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 30498a2d4a6f..fa7691a0f205 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -18,6 +18,7 @@
#include "annotate.h"
#include "evsel.h"
#include "block-range.h"
+#include "callchain.h"
#include "arch/common.h"
#include <regex.h>
#include <pthread.h>
@@ -554,7 +555,7 @@ int symbol__alloc_hist(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
const size_t size = symbol__size(sym);
- size_t sizeof_sym_hist;
+ size_t sizeof_sym_hist, nr_sym_hist;
/* Check for overflow when calculating sizeof_sym_hist */
if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(u64))
@@ -567,12 +568,17 @@ int symbol__alloc_hist(struct symbol *sym)
/ symbol_conf.nr_events)
return -1;
- notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist);
+ /* Allocate 1 histogram per event and 1 more for context annotation */
+ nr_sym_hist = symbol_conf.nr_events + 1;
+
+ notes->src = zalloc(sizeof(*notes->src) +
+ nr_sym_hist * sizeof_sym_hist);
if (notes->src == NULL)
return -1;
notes->src->sizeof_sym_hist = sizeof_sym_hist;
- notes->src->nr_histograms = symbol_conf.nr_events;
+ notes->src->nr_histograms = nr_sym_hist;
INIT_LIST_HEAD(¬es->src->source);
+ INIT_LIST_HEAD(¬es->src->context_hists);
return 0;
}
@@ -591,11 +597,18 @@ static int symbol__alloc_hist_cycles(struct symbol *sym)
void symbol__annotate_zero_histograms(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
+ struct cxt_hist_entry *hist_entry;
pthread_mutex_lock(¬es->lock);
if (notes->src != NULL) {
memset(notes->src->histograms, 0,
notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+
+ list_for_each_entry(hist_entry,
+ ¬es->src->context_hists, list) {
+ list_del(&hist_entry->list);
+ free(hist_entry);
+ }
if (notes->src->cycles_hist)
memset(notes->src->cycles_hist, 0,
symbol__size(sym) * sizeof(struct cyc_hist));
@@ -681,6 +694,7 @@ static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles
if (symbol__alloc_hist_cycles(sym) < 0)
return NULL;
}
+
return notes;
}
@@ -697,6 +711,281 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
}
+static bool __symbol_cxt__cmp_call_cursor(struct cxt_hist_entry *hist_entry,
+ struct callchain_cursor *cursor)
+{
+ struct callchain_cursor_node *node;
+ struct call_list_entry *entry;
+ u64 match_count = 0;
+
+ callchain_cursor_commit(cursor);
+
+ /*
+ * One element of the callchain cursor cannot be matched: the last
+ * element (or first according to the order) which corresponds to the
+ * real sampled address; so, let's skip it (if need be) and...
+ */
+ if (callchain_param.order == ORDER_CALLEE)
+ callchain_cursor_advance(cursor);
+
+ /* ...consider it has matched */
+ match_count++;
+
+ list_for_each_entry(entry, &hist_entry->callchain, list) {
+
+ node = callchain_cursor_current(cursor);
+ if (node == NULL)
+ break;
+
+ if (entry->ip == node->ip)
+ match_count++;
+
+ callchain_cursor_advance(cursor);
+ }
+
+ return match_count == cursor->nr;
+}
+
+static int __symbol_cxt__copy_call_cursor(struct cxt_hist_entry *hist_entry,
+ struct callchain_cursor *cursor)
+{
+ struct callchain_cursor_node *node;
+ struct call_list_entry *entry, *n;
+
+ callchain_cursor_commit(cursor);
+ node = callchain_cursor_current(cursor);
+
+ /*
+ * For each entry in the callchain cursor, we need to copy 2 fields: the
+ * text addresses and the symbol in which the address is located...
+ */
+ while (node) {
+ entry = zalloc(sizeof(*entry));
+ if (entry == NULL)
+ goto error;
+
+ /* ...each address composes a unique key for the callchain ...*/
+ entry->ip = node->ip;
+
+ /* ... the symbol is only useful because the hist_entry's sorted
+ * callchain does not provide all the addresses.
+ */
+ entry->sym_start =
+ (node->sym != NULL) ? node->sym->start : node->ip;
+
+ list_add_tail(&entry->list, &hist_entry->callchain);
+
+ callchain_cursor_advance(cursor);
+ node = callchain_cursor_current(cursor);
+ }
+
+ /*
+ * One element of the callchain cursor should not be copied: the last
+ * element (or first according to the order) which corresponds to the
+ * real sampled address; so, let's deleted it from the linked list.
+ */
+ if (!list_empty(&hist_entry->callchain)) {
+ entry = (callchain_param.order == ORDER_CALLEE) ?
+ list_first_entry(&hist_entry->callchain,
+ struct call_list_entry, list) :
+ list_last_entry(&hist_entry->callchain,
+ struct call_list_entry, list);
+ list_del(&entry->list);
+ free(entry);
+ }
+
+ return 0;
+
+error:
+
+ list_for_each_entry_safe(entry, n, &hist_entry->callchain, list) {
+ list_del(&entry->list);
+ free(entry);
+ }
+
+ return -ENOMEM;
+}
+
+static struct sym_hist *__symbol_cxt__get_hists(struct annotation *notes,
+ int evidx,
+ struct callchain_cursor *cursor)
+{
+ struct cxt_hist_entry *hist_entry;
+ size_t sizeof_hists;
+
+ /*
+ * Try to find a contextual histogram (an instance with the same
+ * callchain)...
+ */
+ list_for_each_entry(hist_entry, ¬es->src->context_hists, list) {
+ if (__symbol_cxt__cmp_call_cursor(hist_entry, cursor))
+ goto return_selection;
+ }
+
+ /* ...if none was found, let's create a new one... */
+ sizeof_hists = notes->src->sizeof_sym_hist * notes->src->nr_histograms;
+ hist_entry = zalloc(sizeof(*hist_entry) + sizeof_hists);
+ if (hist_entry == NULL)
+ return NULL;
+
+ /* ...and copy the callchain from the callchain cursor */
+ INIT_LIST_HEAD(&hist_entry->callchain);
+ if (__symbol_cxt__copy_call_cursor(hist_entry, cursor) < 0) {
+ free(hist_entry);
+ return NULL;
+ }
+
+ list_add(&hist_entry->list, ¬es->src->context_hists);
+
+return_selection:
+
+ return (((void *)&hist_entry->histograms) +
+ (notes->src->sizeof_sym_hist * evidx));
+}
+
+static int __symbol_cxt__inc_addr_samples(struct symbol *sym,
+ struct map *map,
+ struct annotation *notes,
+ int evidx, u64 addr,
+ struct callchain_cursor *cursor)
+{
+ unsigned long offset;
+ struct sym_hist *h;
+
+ pr_debug3("%s: addr=%#" PRIx64 "\n",
+ __func__, map->unmap_ip(map, addr));
+
+ if ((addr < sym->start || addr >= sym->end) &&
+ (addr != sym->end || sym->start != sym->end)) {
+ pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64
+ ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n",
+ __func__, __LINE__,
+ sym->name, sym->start, addr, sym->end);
+ return -ERANGE;
+ }
+
+ offset = addr - sym->start;
+ h = __symbol_cxt__get_hists(notes, evidx, cursor);
+ h->sum++;
+ h->addr[offset]++;
+
+ pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
+ ", evidx=%d] => %" PRIu64 "\n", sym->start, sym->name,
+ addr, addr - sym->start, evidx, h->addr[offset]);
+ return 0;
+}
+
+static int symbol_cxt__inc_addr_samples(struct symbol *sym, struct map *map,
+ int evidx, u64 addr,
+ struct callchain_cursor *cursor)
+{
+ struct annotation *notes;
+
+ if (sym == NULL)
+ return 0;
+ notes = symbol__get_annotation(sym, false);
+ if (notes == NULL)
+ return -ENOMEM;
+ return __symbol_cxt__inc_addr_samples(sym, map, notes,
+ evidx, addr, cursor);
+}
+
+static bool __symbol_cxt__cmp_callchain(struct list_head *left_callchain,
+ struct list_head *right_callchain)
+{
+ struct call_list_entry *left_entry, *right_entry;
+ u64 total_count = 0, match_count = 0;
+
+ right_entry =
+ list_first_entry_or_null(right_callchain,
+ struct call_list_entry, list);
+
+ /*
+ * This function compares two callchains (of contextual histograms); so,
+ * we need to simultaneously go through both callchains and compare
+ * their entries against each other.
+ */
+ list_for_each_entry(left_entry, left_callchain, list) {
+
+ if (total_count != match_count)
+ break;
+
+ total_count++;
+
+ if (right_entry == NULL)
+ continue;
+
+ /*
+ * The right callchain was generated by scanning the elements in
+ * hist_entry->sorted_chain (used for results display); in case
+ * of fork, the hold ip address is not correct; so, we have to
+ * check symbol start addresses.
+ */
+ if (left_entry->ip == right_entry->ip)
+ match_count++;
+ else if (left_entry->sym_start == right_entry->sym_start)
+ match_count++;
+
+ if (list_is_last(&right_entry->list, right_callchain))
+ right_entry = NULL;
+ else
+ right_entry = list_next_entry(right_entry, list);
+ }
+
+ /*
+ * We consider that 2 empty callchains are not equal (just because the
+ * right callchain cannot be empty; this point might/must be flawed.
+ */
+ return total_count > 0 &&
+ right_entry == NULL && match_count == total_count;
+}
+
+int symbol_cxt__copy_hist(struct symbol *sym, int evidx,
+ struct list_head *ref_callchain)
+{
+ struct annotation *notes;
+ struct cxt_hist_entry *hist_entry;
+ struct sym_hist *src_hist, *dst_hist;
+ int last_hist_idx;
+
+ /*
+ * Each per-symbol annotation structure contains per-callchain
+ * histograms...
+ */
+ notes = symbol__get_annotation(sym, false);
+ if (notes == NULL)
+ return -ENOENT;
+
+ /*
+ * ...we need to find the histogram which corresponds to the reference
+ * callchain passed as argument...
+ */
+ list_for_each_entry(hist_entry, ¬es->src->context_hists, list) {
+
+ if (__symbol_cxt__cmp_callchain(&hist_entry->callchain,
+ ref_callchain)) {
+ goto copy_selection;
+ }
+ }
+
+ return -ENOENT;
+
+copy_selection:
+
+ last_hist_idx = notes->src->nr_histograms - 1;
+
+ /*
+ * ...and copy it so that it will be displayed as annotation of the
+ * disassembled symbol code.
+ */
+ dst_hist = annotation__histogram(notes, last_hist_idx);
+ src_hist = (((void *)&hist_entry->histograms) +
+ (notes->src->sizeof_sym_hist * evidx));
+ memcpy(dst_hist, src_hist, notes->src->sizeof_sym_hist);
+
+ return 0;
+}
+
static int symbol__account_cycles(u64 addr, u64 start,
struct symbol *sym, unsigned cycles)
{
@@ -768,6 +1057,19 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
}
+int hist_entry_cxt__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip,
+ struct callchain_cursor *cursor)
+{
+ /* Unlike hist_entry_inc_addr_samples which updates a per-symbol
+ * histogram for code annotation display, this function works with many
+ * per-symbol histograms: one for every different callchain recorded.
+ * The interest is to have different annotations according to the
+ * callchain context.
+ */
+ return symbol_cxt__inc_addr_samples(he->ms.sym,
+ he->ms.map, evidx, ip, cursor);
+}
+
static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map)
{
dl->ins.ops = ins__find(arch, dl->ins.name);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 948aa8e6fd39..1abad7e32ce3 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -84,6 +84,18 @@ struct sym_hist {
u64 addr[0];
};
+struct call_list_entry {
+ u64 ip;
+ u64 sym_start;
+ struct list_head list;
+};
+
+struct cxt_hist_entry {
+ struct list_head list;
+ struct list_head callchain;
+ struct sym_hist histograms[0];
+};
+
struct cyc_hist {
u64 start;
u64 cycles;
@@ -127,6 +139,7 @@ struct annotated_source {
int nr_histograms;
size_t sizeof_sym_hist;
struct cyc_hist *cycles_hist;
+ struct list_head context_hists;
struct sym_hist histograms[0];
};
@@ -155,9 +168,15 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
+int hist_entry_cxt__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip,
+ struct callchain_cursor *cursor);
+
int symbol__alloc_hist(struct symbol *sym);
void symbol__annotate_zero_histograms(struct symbol *sym);
+int symbol_cxt__copy_hist(struct symbol *sym,
+ int evidx, struct list_head *callchain);
+
int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize);
enum symbol_disassemble_errno {
--
2.12.2
The per-callchain histograms must be fed at some points with the
profiling samples. A solution is to fill them right after having filled
the per-symbol ones: in the callback hist_iter__report_callback.
Signed-off-by: Alexis Berlemont <[email protected]>
---
tools/perf/builtin-report.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c18158b83eb1..d825a599d4b4 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -136,6 +136,14 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
if (single)
err = hist_entry__inc_addr_samples(he, evsel->idx,
al->addr);
+ if (err == 0) {
+ struct callchain_cursor *cursor = &callchain_cursor;
+
+ err = hist_entry_cxt__inc_addr_samples(he,
+ evsel->idx,
+ al->addr,
+ cursor);
+ }
} else {
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
}
--
2.12.2
By pressing 'a' in the hists browser, the user gets an annotated view of
the code of the selected symbols.
This patch adds the case 'A'; if this key is pressed (and if the
call-graph option was enabled at record time), perf will build
the callchain from the top frame until the selected symbol and select
the corresponding annotation histogram.
Thus, the user will get an annotated view of the symbol code specific to
the callchain displayed in the hists browser.
Signed-off-by: Alexis Berlemont <[email protected]>
---
tools/perf/ui/browsers/hists.c | 188 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 185 insertions(+), 3 deletions(-)
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index da24072bb76e..9ed7fdc4dc51 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2543,17 +2543,157 @@ struct popup_action {
struct thread *thread;
struct map_symbol ms;
int socket;
+ bool context_annotate;
int (*fn)(struct hist_browser *browser, struct popup_action *act);
};
+static int copy_call_list_entry(struct list_head *callchain,
+ struct callchain_list *call)
+{
+ struct call_list_entry *entry = zalloc(sizeof(*entry));
+
+ if (!entry) {
+ perror("not enough memory to scan callchains");
+ return -1;
+ }
+
+ entry->ip = entry->sym_start = call->ip;
+ if (call->ms.sym != NULL)
+ entry->sym_start = call->ms.sym->start;
+
+ list_add_tail(&entry->list, callchain);
+
+ return 0;
+}
+
+static int __hist_browser_build_callchain(struct list_head *callchain,
+ struct rb_root *root,
+ struct callchain_list *target)
+{
+ struct callchain_node *tmp_node;
+ struct rb_node *node;
+ struct callchain_list *call;
+ struct call_list_entry *new_call, *tmp;
+
+ node = rb_first(root);
+
+ while (node) {
+ char folded_sign = ' ';
+ size_t added_count = 0;
+
+ tmp_node = rb_entry(node, struct callchain_node, rb_node);
+
+ /*
+ * If the callchain display mode is "flat", the list
+ * "parent_val" may contain the entries in common.
+ */
+
+ list_for_each_entry(call, &tmp_node->parent_val, list) {
+
+ /*
+ * If we have not found the highlighted callchain
+ * entry...
+ */
+
+ if (target == call)
+ return 0;
+
+ /*
+ * ...we need to keep the current element: the next
+ * one could be the right one and we need to build a
+ * callchain.
+ */
+
+ if (copy_call_list_entry(callchain, call) < 0)
+ return -1;
+
+ added_count++;
+ }
+
+ /*
+ * If the callchain display mode is "graph", "fractal" or even
+ * "flat", the callchain entries (the last one for "flat" are in
+ * the list "val".
+ */
+
+ list_for_each_entry(call, &tmp_node->val, list) {
+
+ /*
+ * If we have not found the highlighted callchain
+ * entry...
+ */
+
+ if (target == call)
+ return 0;
+
+ /*
+ * ...we need to keep the current element: the next
+ * one could be the right one and we need to build a
+ * callchain.
+ */
+
+ if (copy_call_list_entry(callchain, call) < 0)
+ return -1;
+
+ added_count++;
+
+ /*
+ * If we meet the folded sign '+' (and if the current
+ * element does not match), there is no need to go
+ * further, the callchain elements below cannot be the
+ * ones we are looking for.
+ */
+
+ folded_sign = callchain_list__folded(call);
+ if (folded_sign == '+')
+ break;
+ }
+
+ /*
+ * If the last scanned entry is unfolded, the callchain element
+ * we are looking for may be behing; so, let's scan its tree of
+ * callchain nodes.
+ */
+
+ if (folded_sign == '-' &&
+ __hist_browser_build_callchain(callchain,
+ &tmp_node->rb_root,
+ target) == 0)
+ return 0;
+
+ /*
+ * Nothing was found, let's remove the scanned callchain
+ * elements...
+ */
+
+ list_for_each_entry_safe_reverse(new_call, tmp,
+ callchain, list) {
+
+ if (added_count == 0)
+ break;
+
+ list_del(&new_call->list);
+ free(new_call);
+ added_count--;
+ }
+
+ /* ...and go to the next one. */
+ node = rb_next(node);
+ }
+
+ return -1;
+}
+
static int
do_annotate(struct hist_browser *browser, struct popup_action *act)
{
struct perf_evsel *evsel;
struct annotation *notes;
struct hist_entry *he;
- int err;
+ struct map_symbol *ms;
+ struct callchain_list *target;
+ int err, old_idx;
if (!objdump_path && perf_env__lookup_objdump(browser->env))
return 0;
@@ -2562,9 +2702,48 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
if (!notes->src)
return 0;
- evsel = hists_to_evsel(browser->hists);
- err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
he = hist_browser__selected_entry(browser);
+
+ ms = browser->selection;
+ target = container_of(ms, struct callchain_list, ms);
+
+ evsel = hists_to_evsel(browser->hists);
+ if (act->context_annotate && browser->selection != &he->ms) {
+ struct list_head callchain;
+
+ /*
+ * Build the callchain which corresponds to the selected entry
+ * in the browser in order to...
+ */
+
+ INIT_LIST_HEAD(&callchain);
+ err = __hist_browser_build_callchain(&callchain,
+ &he->sorted_chain, target);
+ if (err < 0)
+ return -1;
+
+ /*
+ * ...select the histogram which callchain matches ours and copy
+ * it into a fake evsel slot (the last one); then...
+ */
+
+ err = symbol_cxt__copy_hist(act->ms.sym,
+ evsel->idx, &callchain);
+ if (err < 0)
+ return -1;
+
+ /* ...we just need to trick the current evsel index so as
+ * display the copied annotation histogram.
+ */
+
+ old_idx = evsel->idx;
+ evsel->idx = notes->src->nr_histograms - 1;
+ err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
+ evsel->idx = old_idx;
+
+ } else
+ err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
+
/*
* offer option to annotate the other branch source or target
* (if they exists) when returning from annotate
@@ -2939,6 +3118,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
"ENTER Zoom into DSO/Threads & Annotate current symbol\n" \
"ESC Zoom out\n" \
"a Annotate current symbol\n" \
+ "A Annotate current symbol (callchain-specific)\n" \
"C Collapse all callchains\n" \
"d Zoom into current DSO\n" \
"E Expand all callchains\n" \
@@ -3014,6 +3194,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
*/
goto out_free_stack;
case 'a':
+ case 'A':
if (!hists__has(hists, sym)) {
ui_browser__warning(&browser->b, delay_secs * 2,
"Annotation is only available for symbolic views, "
@@ -3028,6 +3209,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
actions->ms.map = browser->selection->map;
actions->ms.sym = browser->selection->sym;
+ actions->context_annotate = key == 'A';
do_annotate(browser, actions);
continue;
case 'P':
--
2.12.2