From: Kan Liang <[email protected]>
Current perf diff has some issues. E.g. the default sort key is
symbol, but it doesn't work well. It sorts as address. Furthermore,
the old perf diff can only work on perf.data from the same binary.
This patch set fixes the symbol issue and extends it to compare the
perf.data from different binaries and different kernels. It's useful
for debugging the regression issue.
Current perf diff can do address compare, the patch set also keep
this feature. A new sort key "symoff" is introduced. It can let the user
sort differential profile by the address. It should be useful for
debugging the scaling issue, if the user think function level diff is
too high granularity.
Here is an example.
v1_1_6perf.data is perf record result of version one tchain on 1.6 kernel.
v2_1_8perf.data is perf record result of version two tchain on 1.8 kernel.
They are from different binaries.
v1_1_8_1perf.data and v1_1_8_2perf.data are perf record result of version
one tchain on 1.8 kernel. I run perf record twice.
They are from same binary.
Old perf diff with default sort key "dso,symbol" for different binary
./perf diff -s dso,symbol v1_1_6perf.data v2_1_8perf.data
Event 'cycles'
Baseline Delta Shared Object Symbol
........ ....... ................. .........................
0.01% [kernel.kallsyms] [k] native_write_msr_safe
0.01% [kernel.kallsyms] [k] notifier_call_chain
0.01% [kernel.kallsyms] [k] perf_event_task_tick
0.01% [kernel.kallsyms] [k] run_posix_cpu_timers
0.01% [kernel.kallsyms] [k] run_timer_softirq
0.01% [kernel.kallsyms] [k] trigger_load_balance
0.01% [kernel.kallsyms] [k] update_vsyscall
[kernel.vmlinux] [k] __run_hrtimer
[kernel.vmlinux] [k] apic_timer_interrupt
[kernel.vmlinux] [k] enqueue_task
[kernel.vmlinux] [k] hrtimer_interrupt
[kernel.vmlinux] [k] native_write_msr_safe
[kernel.vmlinux] [k] trigger_load_balance
[kernel.vmlinux] [k] update_wall_time
0.05% [unknown] [.] 0x0000000000400540
0.04% [unknown] [.] 0x0000000000400541
0.03% [unknown] [.] 0x000000000040054b
0.04% [unknown] [.] 0x0000000000400552
33.55% [unknown] [.] 0x0000000000400554
1.22% [unknown] [.] 0x000000000040055a
8.00% [unknown] [.] 0x000000000040055e
0.02% [unknown] [.] 0x0000000000400562
8.41% [unknown] [.] 0x0000000000400564
48.13% [unknown] [.] 0x000000000040056b
0.16% [unknown] [.] 0x0000000000400570
0.17% [unknown] [.] 0x0000000000400571
+0.45% [unknown] [.] 0x0000000000400580
+0.29% [unknown] [.] 0x0000000000400581
0.01% [unknown] [.] 0x0000000000400583
0.01% [unknown] [.] 0x0000000000400588
+0.22% [unknown] [.] 0x000000000040058b
0.01% +13.35% [unknown] [.] 0x000000000040058d
0.06% [unknown] [.] 0x0000000000400591
+0.78% [unknown] [.] 0x0000000000400593
0.04% [unknown] [.] 0x0000000000400595
0.01% +6.18% [unknown] [.] 0x0000000000400597
+1.47% [unknown] [.] 0x000000000040059b
+6.46% [unknown] [.] 0x000000000040059d
+1.28% [unknown] [.] 0x00000000004005a1
+66.38% [unknown] [.] 0x00000000004005a5
+1.34% [unknown] [.] 0x00000000004005a7
+1.15% [unknown] [.] 0x00000000004005a8
+0.05% [unknown] [.] 0x00000000004005ba
+0.03% [unknown] [.] 0x00000000004005bf
+0.02% [unknown] [.] 0x00000000004005c4
+0.05% [unknown] [.] 0x00000000004005c9
+0.03% [unknown] [.] 0x00000000004005ce
+0.27% [unknown] [.] 0x00000000004005d2
+0.15% [unknown] [.] 0x00000000004005d6
[unknown] [.] 0x00000000004005d8
[unknown] [.] 0x00000000004005d9
New perf diff with default sort key "dso,symbol" for different binary
./perf diff -s dso,symbol v1_1_6perf.data v2_1_8perf.data
Event 'cycles'
Baseline Delta Shared Object Symbol
........ ....... ................. .........................
0.01% [kernel.kallsyms] [k] native_write_msr_safe
0.01% [kernel.kallsyms] [k] notifier_call_chain
0.01% [kernel.kallsyms] [k] perf_event_task_tick
0.01% [kernel.kallsyms] [k] run_posix_cpu_timers
0.01% [kernel.kallsyms] [k] run_timer_softirq
0.01% [kernel.kallsyms] [k] trigger_load_balance
0.01% [kernel.kallsyms] [k] update_vsyscall
[kernel.vmlinux] [k] __run_hrtimer
[kernel.vmlinux] [k] apic_timer_interrupt
[kernel.vmlinux] [k] enqueue_task
[kernel.vmlinux] [k] hrtimer_interrupt
[kernel.vmlinux] [k] native_write_msr_safe
[kernel.vmlinux] [k] trigger_load_balance
[kernel.vmlinux] [k] update_wall_time
0.14% +0.47% tchain [.] f2
99.82% -0.47% tchain [.] f3
Changes from V1:
- mmap2 as default of tool's definition
- Using se_collapse to match symbol name.
- Sort key "addr" is introduced to compare address from same binary.
Changes from V2:
- Rename sort key name to "symoff"
Changes from V3:
- Add sort__symoff_collapse for perf diff
- Refine hist_entry__symoff_snprintf
Kan Liang (3):
perf tool: Fix perf diff symble sort issue
perf tool:perf diff support for different binaries
perf tool: Add sort key symoff for perf diff
tools/perf/Documentation/perf-diff.txt | 8 +++-
tools/perf/builtin-diff.c | 3 +-
tools/perf/util/hist.c | 5 ++-
tools/perf/util/hist.h | 1 +
tools/perf/util/sort.c | 76 ++++++++++++++++++++++++++++++++++
tools/perf/util/sort.h | 2 +
6 files changed, 90 insertions(+), 5 deletions(-)
--
1.8.3.2
From: Kan Liang <[email protected]>
Currently, the perf diff only works with same binaries. That's because
it compares the symbol start address. It doesn't work if the perf.data
comes from different binaries. This patch matches the function names.
Signed-off-by: Kan Liang <[email protected]>
Acked-by: Namhyung Kim <[email protected]>
---
tools/perf/util/sort.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 95efaaf..bea2e07 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1432,6 +1432,15 @@ int sort_dimension__add(const char *tok)
sort__has_parent = 1;
} else if (sd->entry == &sort_sym) {
sort__has_sym = 1;
+ /*
+ * perf diff displays the performance difference amongst
+ * two or more perf.data files. Those files could come
+ * from different binaries. So we should not compare
+ * their ips, but the name of symble.
+ */
+ if (sort__mode == SORT_MODE__DIFF)
+ sd->entry->se_collapse = sort__sym_sort;
+
} else if (sd->entry == &sort_dso) {
sort__has_dso = 1;
}
--
1.8.3.2
From: Kan Liang <[email protected]>
Sometime, especially debugging scaling issue, the function level diff
may be high granularity. The user may want to do deeper diff analysis
for some cache or lock issue. The "symoff" key can let the user sort
differential profile by ips. This feature should only work when the
perf.data comes from same binary.
Here is an example.
perf diff -s symoff v1_1_8_1perf.data v1_1_8_2perf.data
Event 'cycles'
Baseline Delta Symbol + Offset
........ ....... ...............................
0.00% __update_cpu_load+0xcc
_raw_spin_lock_irqsave+0x24
_raw_spin_unlock_irqrestore+0xa
0.00% apic_timer_interrupt+0x0
apic_timer_interrupt+0x68
check_preempt_curr+0x1c
0.00% f1+0x20
0.03% +0.03% f2+0x11
0.03% +0.01% f2+0x16
0.05% f2+0x1b
0.04% -0.02% f2+0x20
0.03% f2+0x25
0.17% +0.11% f2+0x29
0.15% f2+0x2d
f2+0x30
0.37% +0.02% f3+0x0
0.18% +0.03% f3+0x1
0.01% f3+0x4
0.18% +0.04% f3+0xb
12.31% +0.11% f3+0xd
0.03% f3+0x10
0.80% -0.12% f3+0x13
6.66% +0.09% f3+0x17
1.81% -0.36% f3+0x1b
6.35% +0.24% f3+0x1d
1.42% -0.22% f3+0x21
66.83% +0.22% f3+0x25
1.29% -0.12% f3+0x27
1.22% -0.04% f3+0x28
0.00% load_balance+0x96
0.00% native_apic_mem_write+0x0
0.00% native_write_msr_safe+0xa
0.00% native_write_msr_safe+0xd
0.00% rb_erase+0x381
resched_curr+0x5
restore_args+0x4
0.00% run_timer_softirq+0x29f
select_task_rq_fair+0x9
0.00% select_task_rq_fair+0x1d9
task_tick_fair+0x46
0.00% task_tick_fair+0x1ce
task_waking_fair+0x27
0.00% try_to_wake_up+0x158
update_cfs_rq_blocked_load+0x99
0.00% update_cpu_load_active+0x3b
update_group_capacity+0x150
update_wall_time+0x3c6
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/Documentation/perf-diff.txt | 8 +++-
tools/perf/builtin-diff.c | 2 +-
tools/perf/util/hist.c | 5 ++-
tools/perf/util/hist.h | 1 +
tools/perf/util/sort.c | 67 ++++++++++++++++++++++++++++++++++
tools/perf/util/sort.h | 2 +
6 files changed, 80 insertions(+), 5 deletions(-)
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index e463caa..9e13911 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -50,8 +50,12 @@ OPTIONS
-s::
--sort=::
- Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
- Please see description of --sort in the perf-report man page.
+ Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline, symoff.
+
+ - symoff: exact symbol + offset address executed at the time of sample.
+ (for same binary compare)
+
+ For other keys, please see description of --sort in the perf-report man page.
-t::
--field-separator=::
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 1ce425d..03a4001 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -744,7 +744,7 @@ static const struct option options[] = {
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
- "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+ "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, symoff, ..."
" Please refer the man page for the complete list."),
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6e88b9e..3d8a71b 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -67,13 +67,14 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
symlen = h->ms.sym->namelen + 4;
if (verbose)
symlen += BITS_PER_LONG / 4 + 2 + 3;
- hists__new_col_len(hists, HISTC_SYMBOL, symlen);
} else {
symlen = unresolved_col_width + 4 + 2;
- hists__new_col_len(hists, HISTC_SYMBOL, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO);
}
+ hists__new_col_len(hists, HISTC_SYMBOL, symlen);
+ hists__new_col_len(hists, HISTC_SYMOFF, symlen);
+
len = thread__comm_len(h->thread);
if (hists__new_col_len(hists, HISTC_COMM, len))
hists__set_col_len(hists, HISTC_THREAD, len + 6);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index d0ef9a1..874b203 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -24,6 +24,7 @@ enum hist_filter {
enum hist_column {
HISTC_SYMBOL,
+ HISTC_SYMOFF,
HISTC_DSO,
HISTC_THREAD,
HISTC_COMM,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index bea2e07..49ad000 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -280,6 +280,65 @@ struct sort_entry sort_sym = {
.se_width_idx = HISTC_SYMBOL,
};
+static int64_t
+sort__symoff_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return _sort__addr_cmp(left->ip, right->ip);
+}
+
+static int64_t
+sort__symoff_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ struct symbol *sym_l = left->ms.sym;
+ struct symbol *sym_r = right->ms.sym;
+ u64 symoff_l, symoff_r;
+ int64_t ret;
+
+ if (!sym_l || !sym_r)
+ return cmp_null(sym_l, sym_r);
+
+ ret = strcmp(sym_r->name, sym_l->name);
+ if (ret)
+ return ret;
+
+
+ symoff_l = left->ip - sym_l->start;
+ symoff_r = right->ip - sym_r->start;
+
+ return (int64_t)(symoff_r - symoff_l);
+}
+
+static int hist_entry__symoff_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ struct map *map = he->ms.map;
+ struct symbol *sym = he->ms.sym;
+ size_t ret = 0;
+
+ if (sym) {
+ ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
+ ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
+ he->ip - sym->start);
+
+ } else {
+ size_t len = BITS_PER_LONG / 4;
+
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len,
+ map ? map->unmap_ip(map, he->ip) : he->ip);
+ }
+
+ ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
+ width - ret, "");
+ return ret;
+}
+
+struct sort_entry sort_symoff = {
+ .se_header = "Symbol + Offset",
+ .se_cmp = sort__symoff_cmp,
+ .se_snprintf = hist_entry__symoff_snprintf,
+ .se_width_idx = HISTC_SYMOFF,
+};
+
/* --sort srcline */
static int64_t
@@ -1172,6 +1231,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_COMM, "comm", sort_comm),
DIM(SORT_DSO, "dso", sort_dso),
DIM(SORT_SYM, "symbol", sort_sym),
+ DIM(SORT_SYMOFF, "symoff", sort_symoff),
DIM(SORT_PARENT, "parent", sort_parent),
DIM(SORT_CPU, "cpu", sort_cpu),
DIM(SORT_SRCLINE, "srcline", sort_srcline),
@@ -1443,6 +1503,13 @@ int sort_dimension__add(const char *tok)
} else if (sd->entry == &sort_dso) {
sort__has_dso = 1;
+ } else if (sd->entry == &sort_symoff) {
+ /*
+ * For symoff sort key, not only the offset but also the
+ * symbol name should be compared.
+ */
+ if (sort__mode == SORT_MODE__DIFF)
+ sd->entry->se_collapse = sort__symoff_collapse;
}
return __sort_dimension__add(sd);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index c03e4ff..ea0122f 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -38,6 +38,7 @@ extern enum sort_mode sort__mode;
extern struct sort_entry sort_comm;
extern struct sort_entry sort_dso;
extern struct sort_entry sort_sym;
+extern struct sort_entry sort_symoff;
extern struct sort_entry sort_parent;
extern struct sort_entry sort_dso_from;
extern struct sort_entry sort_dso_to;
@@ -161,6 +162,7 @@ enum sort_type {
SORT_COMM,
SORT_DSO,
SORT_SYM,
+ SORT_SYMOFF,
SORT_PARENT,
SORT_CPU,
SORT_SRCLINE,
--
1.8.3.2
From: Kan Liang <[email protected]>
This patch fixes a bug for perf diff.
Without mmap2, perf diff fails to find the symbol name. The default
symbol sort key doesn't work well.
Signed-off-by: Kan Liang <[email protected]>
Acked-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-diff.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 25114c9..1ce425d 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -357,6 +357,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
static struct perf_tool tool = {
.sample = diff__process_sample_event,
.mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
--
1.8.3.2
On Tue, 18 Nov 2014 11:38:20 -0500, kan liang wrote:
> From: Kan Liang <[email protected]>
>
> Sometime, especially debugging scaling issue, the function level diff
> may be high granularity. The user may want to do deeper diff analysis
> for some cache or lock issue. The "symoff" key can let the user sort
> differential profile by ips. This feature should only work when the
> perf.data comes from same binary.
I think the symoff sort key now works well for different (i.e. modified)
binaries too.
>
> Here is an example.
> perf diff -s symoff v1_1_8_1perf.data v1_1_8_2perf.data
>
> Event 'cycles'
>
> Baseline Delta Symbol + Offset
> ........ ....... ...............................
>
> 0.00% __update_cpu_load+0xcc
> _raw_spin_lock_irqsave+0x24
> _raw_spin_unlock_irqrestore+0xa
> 0.00% apic_timer_interrupt+0x0
> apic_timer_interrupt+0x68
> check_preempt_curr+0x1c
> 0.00% f1+0x20
> 0.03% +0.03% f2+0x11
> 0.03% +0.01% f2+0x16
> 0.05% f2+0x1b
> 0.04% -0.02% f2+0x20
> 0.03% f2+0x25
> 0.17% +0.11% f2+0x29
> 0.15% f2+0x2d
> f2+0x30
> 0.37% +0.02% f3+0x0
> 0.18% +0.03% f3+0x1
> 0.01% f3+0x4
> 0.18% +0.04% f3+0xb
> 12.31% +0.11% f3+0xd
> 0.03% f3+0x10
> 0.80% -0.12% f3+0x13
> 6.66% +0.09% f3+0x17
> 1.81% -0.36% f3+0x1b
> 6.35% +0.24% f3+0x1d
> 1.42% -0.22% f3+0x21
> 66.83% +0.22% f3+0x25
> 1.29% -0.12% f3+0x27
> 1.22% -0.04% f3+0x28
> 0.00% load_balance+0x96
> 0.00% native_apic_mem_write+0x0
> 0.00% native_write_msr_safe+0xa
> 0.00% native_write_msr_safe+0xd
> 0.00% rb_erase+0x381
> resched_curr+0x5
> restore_args+0x4
> 0.00% run_timer_softirq+0x29f
> select_task_rq_fair+0x9
> 0.00% select_task_rq_fair+0x1d9
> task_tick_fair+0x46
> 0.00% task_tick_fair+0x1ce
> task_waking_fair+0x27
> 0.00% try_to_wake_up+0x158
> update_cfs_rq_blocked_load+0x99
> 0.00% update_cpu_load_active+0x3b
> update_group_capacity+0x150
> update_wall_time+0x3c6
>
> Signed-off-by: Kan Liang <[email protected]>
> ---
> tools/perf/Documentation/perf-diff.txt | 8 +++-
> tools/perf/builtin-diff.c | 2 +-
> tools/perf/util/hist.c | 5 ++-
> tools/perf/util/hist.h | 1 +
> tools/perf/util/sort.c | 67 ++++++++++++++++++++++++++++++++++
> tools/perf/util/sort.h | 2 +
> 6 files changed, 80 insertions(+), 5 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
> index e463caa..9e13911 100644
> --- a/tools/perf/Documentation/perf-diff.txt
> +++ b/tools/perf/Documentation/perf-diff.txt
> @@ -50,8 +50,12 @@ OPTIONS
>
> -s::
> --sort=::
> - Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
> - Please see description of --sort in the perf-report man page.
> + Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline, symoff.
> +
> + - symoff: exact symbol + offset address executed at the time of sample.
> + (for same binary compare)
Ditto. And symoff is not only for perf diff, but it should work for
normal perf report also. So you'd better move the description to perf
report man page IMHO.
> +
> + For other keys, please see description of --sort in the perf-report man page.
>
> -t::
> --field-separator=::
> diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
> index 1ce425d..03a4001 100644
> --- a/tools/perf/builtin-diff.c
> +++ b/tools/perf/builtin-diff.c
> @@ -744,7 +744,7 @@ static const struct option options[] = {
> OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
> "only consider these symbols"),
> OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
> - "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
> + "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, symoff, ..."
> " Please refer the man page for the complete list."),
> OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
> "separator for columns, no spaces will be added between "
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index 6e88b9e..3d8a71b 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -67,13 +67,14 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
> symlen = h->ms.sym->namelen + 4;
> if (verbose)
> symlen += BITS_PER_LONG / 4 + 2 + 3;
> - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> } else {
> symlen = unresolved_col_width + 4 + 2;
> - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> hists__set_unres_dso_col_len(hists, HISTC_DSO);
> }
>
> + hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> + hists__new_col_len(hists, HISTC_SYMOFF, symlen);
SYMOFF will need larger length at least 6 (for "+0xYYY"). Idealy 3 +
symbol size in hexdigit?
Thanks,
Namhyung
>
> On Tue, 18 Nov 2014 11:38:20 -0500, kan liang wrote:
> > From: Kan Liang <[email protected]>
> >
> > Sometime, especially debugging scaling issue, the function level diff
> > may be high granularity. The user may want to do deeper diff analysis
> > for some cache or lock issue. The "symoff" key can let the user sort
> > differential profile by ips. This feature should only work when the
> > perf.data comes from same binary.
>
> I think the symoff sort key now works well for different (i.e. modified)
> binaries too.
For different binaries, the function may be changed. So the offset may
point to different code.
What about this?
"This feature should work when the perf.data comes from
either same binary or same function of different binary."
Or just simply remove this sentence.
>
> >
> > -s::
> > --sort=::
> > - Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
> > - Please see description of --sort in the perf-report man page.
> > + Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline, symoff.
> > +
> > + - symoff: exact symbol + offset address executed at the time of
> sample.
> > + (for same binary compare)
>
> Ditto. And symoff is not only for perf diff, but it should work for normal
> perf report also. So you'd better move the description to perf report man
> page IMHO.
OK, I will do it, and also remove "(for same binary compare)"
>
>
> > +
> > + For other keys, please see description of --sort in the perf-report
> man page.
> >
> > -t::
> > --field-separator=::
> > diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
> > index 1ce425d..03a4001 100644
> > --- a/tools/perf/builtin-diff.c
> > +++ b/tools/perf/builtin-diff.c
> > @@ -744,7 +744,7 @@ static const struct option options[] = {
> > OPT_STRING('S', "symbols", &symbol_conf.sym_list_str,
> "symbol[,symbol...]",
> > "only consider these symbols"),
> > OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
> > - "sort by key(s): pid, comm, dso, symbol, parent, cpu,
> srcline, ..."
> > + "sort by key(s): pid, comm, dso, symbol, parent, cpu,
> srcline, symoff, ..."
> > " Please refer the man page for the complete list."),
> > OPT_STRING('t', "field-separator", &symbol_conf.field_sep,
> "separator",
> > "separator for columns, no spaces will be added between
> "
> > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index
> > 6e88b9e..3d8a71b 100644
> > --- a/tools/perf/util/hist.c
> > +++ b/tools/perf/util/hist.c
> > @@ -67,13 +67,14 @@ void hists__calc_col_len(struct hists *hists, struct
> hist_entry *h)
> > symlen = h->ms.sym->namelen + 4;
> > if (verbose)
> > symlen += BITS_PER_LONG / 4 + 2 + 3;
> > - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> > } else {
> > symlen = unresolved_col_width + 4 + 2;
> > - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> > hists__set_unres_dso_col_len(hists, HISTC_DSO);
> > }
> >
> > + hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> > + hists__new_col_len(hists, HISTC_SYMOFF, symlen);
>
> SYMOFF will need larger length at least 6 (for "+0xYYY"). Idealy 3 + symbol
> size in hexdigit?
>
We also need to handle the case which doesn't have symbol available.
What about this?
/*
+ * +6 accounts for '"+0xYYY ' symoff info
* +4 accounts for '[x] ' priv level info
* +2 accounts for 0x prefix on raw addresses
* +3 accounts for ' y ' symtab origin info
*/
if (h->ms.sym) {
symlen = h->ms.sym->namelen + 4;
if (verbose)
symlen += BITS_PER_LONG / 4 + 2 + 3;
hists__new_col_len(hists, HISTC_SYMBOL, symlen);
+ symlen = h->ms.sym->namelen + 6
+ hists__new_col_len(hists, HISTC_SYMOFF, symlen);
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_SYMBOL, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO);
+ symlen = unresolved_col_width + 2
+ hists__new_col_len(hists, HISTC_SYMOFF, symlen);
}
Thanks,
Kan
> Thanks,
> Namhyung
Em Tue, Nov 18, 2014 at 11:38:20AM -0500, [email protected] escreveu:
> From: Kan Liang <[email protected]>
>
> Sometime, especially debugging scaling issue, the function level diff
> may be high granularity. The user may want to do deeper diff analysis
> for some cache or lock issue. The "symoff" key can let the user sort
> differential profile by ips. This feature should only work when the
> perf.data comes from same binary.
So, to avoid having people scratching heads, and since we have the
build-id for both perf.data files, hence for both binaries being
compared, can we check the build ids and either refuse to do the diff or
print a big warning about different binaries being compared?
- Arnaldo
> Here is an example.
> perf diff -s symoff v1_1_8_1perf.data v1_1_8_2perf.data
>
> Event 'cycles'
>
> Baseline Delta Symbol + Offset
> ........ ....... ...............................
>
> 0.00% __update_cpu_load+0xcc
> _raw_spin_lock_irqsave+0x24
> _raw_spin_unlock_irqrestore+0xa
> 0.00% apic_timer_interrupt+0x0
> apic_timer_interrupt+0x68
> check_preempt_curr+0x1c
> 0.00% f1+0x20
> 0.03% +0.03% f2+0x11
> 0.03% +0.01% f2+0x16
> 0.05% f2+0x1b
> 0.04% -0.02% f2+0x20
> 0.03% f2+0x25
> 0.17% +0.11% f2+0x29
> 0.15% f2+0x2d
> f2+0x30
> 0.37% +0.02% f3+0x0
> 0.18% +0.03% f3+0x1
> 0.01% f3+0x4
> 0.18% +0.04% f3+0xb
> 12.31% +0.11% f3+0xd
> 0.03% f3+0x10
> 0.80% -0.12% f3+0x13
> 6.66% +0.09% f3+0x17
> 1.81% -0.36% f3+0x1b
> 6.35% +0.24% f3+0x1d
> 1.42% -0.22% f3+0x21
> 66.83% +0.22% f3+0x25
> 1.29% -0.12% f3+0x27
> 1.22% -0.04% f3+0x28
> 0.00% load_balance+0x96
> 0.00% native_apic_mem_write+0x0
> 0.00% native_write_msr_safe+0xa
> 0.00% native_write_msr_safe+0xd
> 0.00% rb_erase+0x381
> resched_curr+0x5
> restore_args+0x4
> 0.00% run_timer_softirq+0x29f
> select_task_rq_fair+0x9
> 0.00% select_task_rq_fair+0x1d9
> task_tick_fair+0x46
> 0.00% task_tick_fair+0x1ce
> task_waking_fair+0x27
> 0.00% try_to_wake_up+0x158
> update_cfs_rq_blocked_load+0x99
> 0.00% update_cpu_load_active+0x3b
> update_group_capacity+0x150
> update_wall_time+0x3c6
>
> Signed-off-by: Kan Liang <[email protected]>
> ---
> tools/perf/Documentation/perf-diff.txt | 8 +++-
> tools/perf/builtin-diff.c | 2 +-
> tools/perf/util/hist.c | 5 ++-
> tools/perf/util/hist.h | 1 +
> tools/perf/util/sort.c | 67 ++++++++++++++++++++++++++++++++++
> tools/perf/util/sort.h | 2 +
> 6 files changed, 80 insertions(+), 5 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
> index e463caa..9e13911 100644
> --- a/tools/perf/Documentation/perf-diff.txt
> +++ b/tools/perf/Documentation/perf-diff.txt
> @@ -50,8 +50,12 @@ OPTIONS
>
> -s::
> --sort=::
> - Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
> - Please see description of --sort in the perf-report man page.
> + Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline, symoff.
> +
> + - symoff: exact symbol + offset address executed at the time of sample.
> + (for same binary compare)
> +
> + For other keys, please see description of --sort in the perf-report man page.
>
> -t::
> --field-separator=::
> diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
> index 1ce425d..03a4001 100644
> --- a/tools/perf/builtin-diff.c
> +++ b/tools/perf/builtin-diff.c
> @@ -744,7 +744,7 @@ static const struct option options[] = {
> OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
> "only consider these symbols"),
> OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
> - "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
> + "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, symoff, ..."
> " Please refer the man page for the complete list."),
> OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
> "separator for columns, no spaces will be added between "
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index 6e88b9e..3d8a71b 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -67,13 +67,14 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
> symlen = h->ms.sym->namelen + 4;
> if (verbose)
> symlen += BITS_PER_LONG / 4 + 2 + 3;
> - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> } else {
> symlen = unresolved_col_width + 4 + 2;
> - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> hists__set_unres_dso_col_len(hists, HISTC_DSO);
> }
>
> + hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> + hists__new_col_len(hists, HISTC_SYMOFF, symlen);
> +
> len = thread__comm_len(h->thread);
> if (hists__new_col_len(hists, HISTC_COMM, len))
> hists__set_col_len(hists, HISTC_THREAD, len + 6);
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index d0ef9a1..874b203 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -24,6 +24,7 @@ enum hist_filter {
>
> enum hist_column {
> HISTC_SYMBOL,
> + HISTC_SYMOFF,
> HISTC_DSO,
> HISTC_THREAD,
> HISTC_COMM,
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index bea2e07..49ad000 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -280,6 +280,65 @@ struct sort_entry sort_sym = {
> .se_width_idx = HISTC_SYMBOL,
> };
>
> +static int64_t
> +sort__symoff_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> + return _sort__addr_cmp(left->ip, right->ip);
> +}
> +
> +static int64_t
> +sort__symoff_collapse(struct hist_entry *left, struct hist_entry *right)
> +{
> + struct symbol *sym_l = left->ms.sym;
> + struct symbol *sym_r = right->ms.sym;
> + u64 symoff_l, symoff_r;
> + int64_t ret;
> +
> + if (!sym_l || !sym_r)
> + return cmp_null(sym_l, sym_r);
> +
> + ret = strcmp(sym_r->name, sym_l->name);
> + if (ret)
> + return ret;
> +
> +
> + symoff_l = left->ip - sym_l->start;
> + symoff_r = right->ip - sym_r->start;
> +
> + return (int64_t)(symoff_r - symoff_l);
> +}
> +
> +static int hist_entry__symoff_snprintf(struct hist_entry *he, char *bf,
> + size_t size, unsigned int width)
> +{
> + struct map *map = he->ms.map;
> + struct symbol *sym = he->ms.sym;
> + size_t ret = 0;
> +
> + if (sym) {
> + ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
> + ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
> + he->ip - sym->start);
> +
> + } else {
> + size_t len = BITS_PER_LONG / 4;
> +
> + ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len,
> + map ? map->unmap_ip(map, he->ip) : he->ip);
> + }
> +
> + ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
> + width - ret, "");
> + return ret;
> +}
> +
> +struct sort_entry sort_symoff = {
> + .se_header = "Symbol + Offset",
> + .se_cmp = sort__symoff_cmp,
> + .se_snprintf = hist_entry__symoff_snprintf,
> + .se_width_idx = HISTC_SYMOFF,
> +};
> +
> /* --sort srcline */
>
> static int64_t
> @@ -1172,6 +1231,7 @@ static struct sort_dimension common_sort_dimensions[] = {
> DIM(SORT_COMM, "comm", sort_comm),
> DIM(SORT_DSO, "dso", sort_dso),
> DIM(SORT_SYM, "symbol", sort_sym),
> + DIM(SORT_SYMOFF, "symoff", sort_symoff),
> DIM(SORT_PARENT, "parent", sort_parent),
> DIM(SORT_CPU, "cpu", sort_cpu),
> DIM(SORT_SRCLINE, "srcline", sort_srcline),
> @@ -1443,6 +1503,13 @@ int sort_dimension__add(const char *tok)
>
> } else if (sd->entry == &sort_dso) {
> sort__has_dso = 1;
> + } else if (sd->entry == &sort_symoff) {
> + /*
> + * For symoff sort key, not only the offset but also the
> + * symbol name should be compared.
> + */
> + if (sort__mode == SORT_MODE__DIFF)
> + sd->entry->se_collapse = sort__symoff_collapse;
> }
>
> return __sort_dimension__add(sd);
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index c03e4ff..ea0122f 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -38,6 +38,7 @@ extern enum sort_mode sort__mode;
> extern struct sort_entry sort_comm;
> extern struct sort_entry sort_dso;
> extern struct sort_entry sort_sym;
> +extern struct sort_entry sort_symoff;
> extern struct sort_entry sort_parent;
> extern struct sort_entry sort_dso_from;
> extern struct sort_entry sort_dso_to;
> @@ -161,6 +162,7 @@ enum sort_type {
> SORT_COMM,
> SORT_DSO,
> SORT_SYM,
> + SORT_SYMOFF,
> SORT_PARENT,
> SORT_CPU,
> SORT_SRCLINE,
> --
> 1.8.3.2
Em Tue, Nov 18, 2014 at 11:38:18AM -0500, [email protected] escreveu:
> From: Kan Liang <[email protected]>
>
> This patch fixes a bug for perf diff.
> Without mmap2, perf diff fails to find the symbol name. The default
> symbol sort key doesn't work well.
>
> Signed-off-by: Kan Liang <[email protected]>
> Acked-by: Namhyung Kim <[email protected]>
Applied, thanks.
Em Tue, Nov 18, 2014 at 11:38:19AM -0500, [email protected] escreveu:
> From: Kan Liang <[email protected]>
>
> Currently, the perf diff only works with same binaries. That's because
> it compares the symbol start address. It doesn't work if the perf.data
> comes from different binaries. This patch matches the function names.
>
> Signed-off-by: Kan Liang <[email protected]>
> Acked-by: Namhyung Kim <[email protected]>
Can you provide an example of this?
Also, can you add some note on the documentation? The example you use in
the changelog may also be appropriate to have in the man page.
Its been a looong while since I looked at the diff code, but yeah,
comparing by symbol address makes no sense, it had to do a diff from
file name!
The canonical example is:
vim workload.c
make workload
perf record ./workload
vim workload.c # do some changes
perf record ./workload
perf diff
And of course doing that would probably result in symbols moving around
as functions grow in size and new ones are added...
/me scratches head...
- Arnaldo
> ---
> tools/perf/util/sort.c | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 95efaaf..bea2e07 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -1432,6 +1432,15 @@ int sort_dimension__add(const char *tok)
> sort__has_parent = 1;
> } else if (sd->entry == &sort_sym) {
> sort__has_sym = 1;
> + /*
> + * perf diff displays the performance difference amongst
> + * two or more perf.data files. Those files could come
> + * from different binaries. So we should not compare
> + * their ips, but the name of symble.
s/symble/symbol/g
> + */
> + if (sort__mode == SORT_MODE__DIFF)
> + sd->entry->se_collapse = sort__sym_sort;
> +
> } else if (sd->entry == &sort_dso) {
> sort__has_dso = 1;
> }
> --
> 1.8.3.2
>
> Em Tue, Nov 18, 2014 at 11:38:20AM -0500, [email protected] escreveu:
> > From: Kan Liang <[email protected]>
> >
> > Sometime, especially debugging scaling issue, the function level diff
> > may be high granularity. The user may want to do deeper diff analysis
> > for some cache or lock issue. The "symoff" key can let the user sort
> > differential profile by ips. This feature should only work when the
> > perf.data comes from same binary.
>
> So, to avoid having people scratching heads, and since we have the build-id
> for both perf.data files, hence for both binaries being compared, can we
> check the build ids and either refuse to do the diff or print a big warning
> about different binaries being compared?
>
Sure. We can compare the buildid on symoff sort,
and print a warning if they are from different binaries.
Currently, there is no code to compare the build-id,
so we also need a patch to enhance the dso.c
What about the code as below?
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1087,3 +1087,27 @@ enum dso_type dso__type(struct dso *dso,
struct machine *machine)
return dso__type_fd(fd);
}
+
+int dsos__buildid_compare(struct list_head *head_s,
struct list_head *head_t)
+{
+ struct dso *dso_s;
+ struct dso *dso_t;
+ int ret;
+
+
+ for (dso_s = list_entry((head_s)->next, typeof(*dso_s), node),
+ dso_t = list_entry((head_t)->next, typeof(*dso_t), node);
+ ((&dso_s->node != (head_s)) && (&dso_t->node != (head_t)));
+ dso_s = list_entry(dso_s->node.next, typeof(*dso_s), node),
+ dso_t = list_entry(dso_t->node.next, typeof(*dso_t), node)) {
+
+ ret = memcmp(dso_s->build_id, dso_t->build_id,
sizeof(dso_s->build_id));
+ if (ret)
+ return ret;
+ }
+
+ if ((&dso_s->node != (head_s)) || (&dso_t->node != (head_t)))
+ return -1;
+
+ return 0;
+}
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -677,6 +678,29 @@ static void data__free(struct data__file *d)
}
}
+static void buildid_compare(void)
+{
+ struct list_head *base_k_dso = &data__files[0].session
->machines.host.kernel_dsos.head;
+ struct list_head *base_u_dso = &data__files[0].session
->machines.host.user_dsos.head;
+ struct list_head *tmp_k_dso;
+ struct list_head *tmp_u_dso;
+ struct data__file *d;
+ int i;
+
+ data__for_each_file_new(i, d) {
+ tmp_k_dso = &d->session->machines.host.kernel_dsos.head;
+ tmp_u_dso = &d->session->machines.host.user_dsos.head;
+
+ if (dsos__buildid_compare(base_k_dso, tmp_k_dso))
+ pr_warning("The perf.data comes from different kernel. "
+ "The kernel symbol offset may point to different code.\n");
+
+ if (dsos__buildid_compare(base_u_dso, tmp_u_dso))
+ pr_warning("The perf.data comes from different user binary. "
+ "The kernel symbol offset may point to different code.\n");
+ }
+}
+
static int __cmd_diff(void)
{
struct data__file *d;
@@ -699,6 +723,9 @@ static int __cmd_diff(void)
perf_evlist__collapse_resort(d->session->evlist);
}
+ if (sort__has_symoff)
+ buildid_compare();
+
data_process();
out_delete:
Thanks,
Kan
> - Arnaldo
>
> > Here is an example.
> > perf diff -s symoff v1_1_8_1perf.data v1_1_8_2perf.data
> >
> > Event 'cycles'
> >
> > Baseline Delta Symbol + Offset
> > ........ ....... ...............................
> >
> > 0.00% __update_cpu_load+0xcc
> > _raw_spin_lock_irqsave+0x24
> > _raw_spin_unlock_irqrestore+0xa
> > 0.00% apic_timer_interrupt+0x0
> > apic_timer_interrupt+0x68
> > check_preempt_curr+0x1c
> > 0.00% f1+0x20
> > 0.03% +0.03% f2+0x11
> > 0.03% +0.01% f2+0x16
> > 0.05% f2+0x1b
> > 0.04% -0.02% f2+0x20
> > 0.03% f2+0x25
> > 0.17% +0.11% f2+0x29
> > 0.15% f2+0x2d
> > f2+0x30
> > 0.37% +0.02% f3+0x0
> > 0.18% +0.03% f3+0x1
> > 0.01% f3+0x4
> > 0.18% +0.04% f3+0xb
> > 12.31% +0.11% f3+0xd
> > 0.03% f3+0x10
> > 0.80% -0.12% f3+0x13
> > 6.66% +0.09% f3+0x17
> > 1.81% -0.36% f3+0x1b
> > 6.35% +0.24% f3+0x1d
> > 1.42% -0.22% f3+0x21
> > 66.83% +0.22% f3+0x25
> > 1.29% -0.12% f3+0x27
> > 1.22% -0.04% f3+0x28
> > 0.00% load_balance+0x96
> > 0.00% native_apic_mem_write+0x0
> > 0.00% native_write_msr_safe+0xa
> > 0.00% native_write_msr_safe+0xd
> > 0.00% rb_erase+0x381
> > resched_curr+0x5
> > restore_args+0x4
> > 0.00% run_timer_softirq+0x29f
> > select_task_rq_fair+0x9
> > 0.00% select_task_rq_fair+0x1d9
> > task_tick_fair+0x46
> > 0.00% task_tick_fair+0x1ce
> > task_waking_fair+0x27
> > 0.00% try_to_wake_up+0x158
> > update_cfs_rq_blocked_load+0x99
> > 0.00% update_cpu_load_active+0x3b
> > update_group_capacity+0x150
> > update_wall_time+0x3c6
> >
> > Signed-off-by: Kan Liang <[email protected]>
> > ---
> > tools/perf/Documentation/perf-diff.txt | 8 +++-
> > tools/perf/builtin-diff.c | 2 +-
> > tools/perf/util/hist.c | 5 ++-
> > tools/perf/util/hist.h | 1 +
> > tools/perf/util/sort.c | 67
> ++++++++++++++++++++++++++++++++++
> > tools/perf/util/sort.h | 2 +
> > 6 files changed, 80 insertions(+), 5 deletions(-)
> >
> > diff --git a/tools/perf/Documentation/perf-diff.txt
> > b/tools/perf/Documentation/perf-diff.txt
> > index e463caa..9e13911 100644
> > --- a/tools/perf/Documentation/perf-diff.txt
> > +++ b/tools/perf/Documentation/perf-diff.txt
> > @@ -50,8 +50,12 @@ OPTIONS
> >
> > -s::
> > --sort=::
> > - Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
> > - Please see description of --sort in the perf-report man page.
> > + Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline, symoff.
> > +
> > + - symoff: exact symbol + offset address executed at the time of
> sample.
> > + (for same binary compare)
> > +
> > + For other keys, please see description of --sort in the perf-report
> man page.
> >
> > -t::
> > --field-separator=::
> > diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
> > index 1ce425d..03a4001 100644
> > --- a/tools/perf/builtin-diff.c
> > +++ b/tools/perf/builtin-diff.c
> > @@ -744,7 +744,7 @@ static const struct option options[] = {
> > OPT_STRING('S', "symbols", &symbol_conf.sym_list_str,
> "symbol[,symbol...]",
> > "only consider these symbols"),
> > OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
> > - "sort by key(s): pid, comm, dso, symbol, parent, cpu,
> srcline, ..."
> > + "sort by key(s): pid, comm, dso, symbol, parent, cpu,
> srcline, symoff, ..."
> > " Please refer the man page for the complete list."),
> > OPT_STRING('t', "field-separator", &symbol_conf.field_sep,
> "separator",
> > "separator for columns, no spaces will be added between
> "
> > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index
> > 6e88b9e..3d8a71b 100644
> > --- a/tools/perf/util/hist.c
> > +++ b/tools/perf/util/hist.c
> > @@ -67,13 +67,14 @@ void hists__calc_col_len(struct hists *hists, struct
> hist_entry *h)
> > symlen = h->ms.sym->namelen + 4;
> > if (verbose)
> > symlen += BITS_PER_LONG / 4 + 2 + 3;
> > - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> > } else {
> > symlen = unresolved_col_width + 4 + 2;
> > - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> > hists__set_unres_dso_col_len(hists, HISTC_DSO);
> > }
> >
> > + hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> > + hists__new_col_len(hists, HISTC_SYMOFF, symlen);
> > +
> > len = thread__comm_len(h->thread);
> > if (hists__new_col_len(hists, HISTC_COMM, len))
> > hists__set_col_len(hists, HISTC_THREAD, len + 6); diff --git
> > a/tools/perf/util/hist.h b/tools/perf/util/hist.h index
> > d0ef9a1..874b203 100644
> > --- a/tools/perf/util/hist.h
> > +++ b/tools/perf/util/hist.h
> > @@ -24,6 +24,7 @@ enum hist_filter {
> >
> > enum hist_column {
> > HISTC_SYMBOL,
> > + HISTC_SYMOFF,
> > HISTC_DSO,
> > HISTC_THREAD,
> > HISTC_COMM,
> > diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index
> > bea2e07..49ad000 100644
> > --- a/tools/perf/util/sort.c
> > +++ b/tools/perf/util/sort.c
> > @@ -280,6 +280,65 @@ struct sort_entry sort_sym = {
> > .se_width_idx = HISTC_SYMBOL,
> > };
> >
> > +static int64_t
> > +sort__symoff_cmp(struct hist_entry *left, struct hist_entry *right) {
> > + return _sort__addr_cmp(left->ip, right->ip); }
> > +
> > +static int64_t
> > +sort__symoff_collapse(struct hist_entry *left, struct hist_entry
> > +*right) {
> > + struct symbol *sym_l = left->ms.sym;
> > + struct symbol *sym_r = right->ms.sym;
> > + u64 symoff_l, symoff_r;
> > + int64_t ret;
> > +
> > + if (!sym_l || !sym_r)
> > + return cmp_null(sym_l, sym_r);
> > +
> > + ret = strcmp(sym_r->name, sym_l->name);
> > + if (ret)
> > + return ret;
> > +
> > +
> > + symoff_l = left->ip - sym_l->start;
> > + symoff_r = right->ip - sym_r->start;
> > +
> > + return (int64_t)(symoff_r - symoff_l); }
> > +
> > +static int hist_entry__symoff_snprintf(struct hist_entry *he, char *bf,
> > + size_t size, unsigned int width) {
> > + struct map *map = he->ms.map;
> > + struct symbol *sym = he->ms.sym;
> > + size_t ret = 0;
> > +
> > + if (sym) {
> > + ret += repsep_snprintf(bf + ret, size - ret, "%s", sym-
> >name);
> > + ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
> > + he->ip - sym->start);
> > +
> > + } else {
> > + size_t len = BITS_PER_LONG / 4;
> > +
> > + ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len,
> > + map ? map->unmap_ip(map, he->ip) :
> he->ip);
> > + }
> > +
> > + ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
> > + width - ret, "");
> > + return ret;
> > +}
> > +
> > +struct sort_entry sort_symoff = {
> > + .se_header = "Symbol + Offset",
> > + .se_cmp = sort__symoff_cmp,
> > + .se_snprintf = hist_entry__symoff_snprintf,
> > + .se_width_idx = HISTC_SYMOFF,
> > +};
> > +
> > /* --sort srcline */
> >
> > static int64_t
> > @@ -1172,6 +1231,7 @@ static struct sort_dimension
> common_sort_dimensions[] = {
> > DIM(SORT_COMM, "comm", sort_comm),
> > DIM(SORT_DSO, "dso", sort_dso),
> > DIM(SORT_SYM, "symbol", sort_sym),
> > + DIM(SORT_SYMOFF, "symoff", sort_symoff),
> > DIM(SORT_PARENT, "parent", sort_parent),
> > DIM(SORT_CPU, "cpu", sort_cpu),
> > DIM(SORT_SRCLINE, "srcline", sort_srcline), @@ -1443,6 +1503,13
> @@
> > int sort_dimension__add(const char *tok)
> >
> > } else if (sd->entry == &sort_dso) {
> > sort__has_dso = 1;
> > + } else if (sd->entry == &sort_symoff) {
> > + /*
> > + * For symoff sort key, not only the offset but also
> the
> > + * symbol name should be compared.
> > + */
> > + if (sort__mode == SORT_MODE__DIFF)
> > + sd->entry->se_collapse =
> sort__symoff_collapse;
> > }
> >
> > return __sort_dimension__add(sd);
> > diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index
> > c03e4ff..ea0122f 100644
> > --- a/tools/perf/util/sort.h
> > +++ b/tools/perf/util/sort.h
> > @@ -38,6 +38,7 @@ extern enum sort_mode sort__mode; extern struct
> > sort_entry sort_comm; extern struct sort_entry sort_dso; extern
> > struct sort_entry sort_sym;
> > +extern struct sort_entry sort_symoff;
> > extern struct sort_entry sort_parent; extern struct sort_entry
> > sort_dso_from; extern struct sort_entry sort_dso_to; @@ -161,6 +162,7
> > @@ enum sort_type {
> > SORT_COMM,
> > SORT_DSO,
> > SORT_SYM,
> > + SORT_SYMOFF,
> > SORT_PARENT,
> > SORT_CPU,
> > SORT_SRCLINE,
> > --
> > 1.8.3.2
On Tue, 18 Nov 2014 18:13:19 -0300, Arnaldo Carvalho de Melo wrote:
> Em Tue, Nov 18, 2014 at 11:38:20AM -0500, [email protected] escreveu:
>> From: Kan Liang <[email protected]>
>>
>> Sometime, especially debugging scaling issue, the function level diff
>> may be high granularity. The user may want to do deeper diff analysis
>> for some cache or lock issue. The "symoff" key can let the user sort
>> differential profile by ips. This feature should only work when the
>> perf.data comes from same binary.
>
> So, to avoid having people scratching heads, and since we have the
> build-id for both perf.data files, hence for both binaries being
> compared, can we check the build ids and either refuse to do the diff or
> print a big warning about different binaries being compared?
Why do you think so? I think it's fine to use symoff for different
binaries for same reason as the symbol sort key. Even if some functions
were changed, other DSOs and/or functions still have same offset and
thus have meaningful diff result IMHO.
Thanks,
Namhyung
Hi Kan,
On Wed, 19 Nov 2014 14:17:33 +0000, Kan Liang wrote:
>>
>> On Tue, 18 Nov 2014 11:38:20 -0500, kan liang wrote:
>> > From: Kan Liang <[email protected]>
>> >
>> > Sometime, especially debugging scaling issue, the function level diff
>> > may be high granularity. The user may want to do deeper diff analysis
>> > for some cache or lock issue. The "symoff" key can let the user sort
>> > differential profile by ips. This feature should only work when the
>> > perf.data comes from same binary.
>>
>> I think the symoff sort key now works well for different (i.e. modified)
>> binaries too.
>
> For different binaries, the function may be changed. So the offset may
> point to different code.
> What about this?
> "This feature should work when the perf.data comes from
> either same binary or same function of different binary."
> Or just simply remove this sentence.
I'd like to remove it. :)
>
>>
>> >
>> > -s::
>> > --sort=::
>> > - Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
>> > - Please see description of --sort in the perf-report man page.
>> > + Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline, symoff.
>> > +
>> > + - symoff: exact symbol + offset address executed at the time of
>> sample.
>> > + (for same binary compare)
>>
>> Ditto. And symoff is not only for perf diff, but it should work for normal
>> perf report also. So you'd better move the description to perf report man
>> page IMHO.
>
> OK, I will do it, and also remove "(for same binary compare)"
Thanks!
>
>>
>>
>> > +
>> > + For other keys, please see description of --sort in the perf-report
>> man page.
>> >
>> > -t::
>> > --field-separator=::
>> > diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
>> > index 1ce425d..03a4001 100644
>> > --- a/tools/perf/builtin-diff.c
>> > +++ b/tools/perf/builtin-diff.c
>> > @@ -744,7 +744,7 @@ static const struct option options[] = {
>> > OPT_STRING('S', "symbols", &symbol_conf.sym_list_str,
>> "symbol[,symbol...]",
>> > "only consider these symbols"),
>> > OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
>> > - "sort by key(s): pid, comm, dso, symbol, parent, cpu,
>> srcline, ..."
>> > + "sort by key(s): pid, comm, dso, symbol, parent, cpu,
>> srcline, symoff, ..."
>> > " Please refer the man page for the complete list."),
>> > OPT_STRING('t', "field-separator", &symbol_conf.field_sep,
>> "separator",
>> > "separator for columns, no spaces will be added between
>> "
>> > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index
>> > 6e88b9e..3d8a71b 100644
>> > --- a/tools/perf/util/hist.c
>> > +++ b/tools/perf/util/hist.c
>> > @@ -67,13 +67,14 @@ void hists__calc_col_len(struct hists *hists, struct
>> hist_entry *h)
>> > symlen = h->ms.sym->namelen + 4;
>> > if (verbose)
>> > symlen += BITS_PER_LONG / 4 + 2 + 3;
>> > - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
>> > } else {
>> > symlen = unresolved_col_width + 4 + 2;
>> > - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
>> > hists__set_unres_dso_col_len(hists, HISTC_DSO);
>> > }
>> >
>> > + hists__new_col_len(hists, HISTC_SYMBOL, symlen);
>> > + hists__new_col_len(hists, HISTC_SYMOFF, symlen);
>>
>> SYMOFF will need larger length at least 6 (for "+0xYYY"). Idealy 3 + symbol
>> size in hexdigit?
>>
>
> We also need to handle the case which doesn't have symbol available.
> What about this?
I'm fine with it (but I believe you'll fix the indentation).
Thanks,
Namhyung
> /*
> + * +6 accounts for '"+0xYYY ' symoff info
> * +4 accounts for '[x] ' priv level info
> * +2 accounts for 0x prefix on raw addresses
> * +3 accounts for ' y ' symtab origin info
> */
> if (h->ms.sym) {
> symlen = h->ms.sym->namelen + 4;
> if (verbose)
> symlen += BITS_PER_LONG / 4 + 2 + 3;
> hists__new_col_len(hists, HISTC_SYMBOL, symlen);
>
> + symlen = h->ms.sym->namelen + 6
> + hists__new_col_len(hists, HISTC_SYMOFF, symlen);
> } else {
> symlen = unresolved_col_width + 4 + 2;
> hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> hists__set_unres_dso_col_len(hists, HISTC_DSO);
> + symlen = unresolved_col_width + 2
> + hists__new_col_len(hists, HISTC_SYMOFF, symlen);
> }
Commit-ID: 68ca9d65b88420c2c97948a0640bad13405129e7
Gitweb: http://git.kernel.org/tip/68ca9d65b88420c2c97948a0640bad13405129e7
Author: Kan Liang <[email protected]>
AuthorDate: Tue, 18 Nov 2014 11:38:18 -0500
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 19 Nov 2014 12:33:48 -0300
perf diff: Add missing handler for PERF_RECORD_MMAP2 events
Without mmap2, perf diff fails to find the symbol name. The default
symbol sort key doesn't work well.
Signed-off-by: Kan Liang <[email protected]>
Acked-by: Namhyung Kim <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-diff.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 25114c9..1ce425d 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -357,6 +357,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
static struct perf_tool tool = {
.sample = diff__process_sample_event,
.mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
Em Wed, Nov 19, 2014 at 08:44:27PM +0000, Liang, Kan escreveu:
>
>
> >
> > Em Tue, Nov 18, 2014 at 11:38:20AM -0500, [email protected] escreveu:
> > > From: Kan Liang <[email protected]>
> > >
> > > Sometime, especially debugging scaling issue, the function level diff
> > > may be high granularity. The user may want to do deeper diff analysis
> > > for some cache or lock issue. The "symoff" key can let the user sort
> > > differential profile by ips. This feature should only work when the
> > > perf.data comes from same binary.
> >
> > So, to avoid having people scratching heads, and since we have the build-id
> > for both perf.data files, hence for both binaries being compared, can we
> > check the build ids and either refuse to do the diff or print a big warning
> > about different binaries being compared?
> >
>
> Sure. We can compare the buildid on symoff sort,
> and print a warning if they are from different binaries.
>
> Currently, there is no code to compare the build-id,
> so we also need a patch to enhance the dso.c
> What about the code as below?
But this is to compare everything, I thought that when iterating
symbols, say, you got to a (dsoA, symA) comparision to a (dsoB, symB)
you would then check if (dsoA->build_id == dsoB->build_id) and if not,
print the warning, but perhaps it would be better to do what you said,
i.e. compare all the DSO build-ids and if one of them is different,
then, yes, the "composite build_id" of perf.data.1 would be different
from that of perf.data.2 and thus comparisions are for "different
binaries", is that what you mean?
But in the code below you are assuming that the DSOs are in the exact
same order, which I don't know if is an assumption that can be
guaranteed, perhaps it would be better to do something like:
/*
* XXX We're using dso->hit here as it probably is not used at the point
* where we use this function, but to be 100% sure perhaps we need to
* add dsos->visited:1 just after dsos->hit:1.
*/
bool dsos__build_ids_equal(struct list_head *dsos_A, struct list_head *dsos_B)
{
bool ret;
struct dso *dso_A, *dso_B;
list_for_each_entry(dsos_A, dso_A, node) {
dso_B = dsos__find_by_build_id(dsos_A, dso_A->build_id);
if (dso_B == NULL)
return false;
if (memcmp(dso_A->build_id, dso_B->build_id,
sizeof(dso_A->build_id))
return false;
/* Mark that we compared this one */
dso_B->hit = 1;
}
/*
* Now check if there are dsos in dsos_B that are not in
* dsos_A.
*/
ret = true;
list_for_each_entry(dsos_B, dso_B, node) {
if (!dso_B->hit)
ret = false;
else
dso_B->hit = 0;
}
return ret;
}
- Arnaldo
> --- a/tools/perf/util/dso.c
> +++ b/tools/perf/util/dso.c
> @@ -1087,3 +1087,27 @@ enum dso_type dso__type(struct dso *dso,
> struct machine *machine)
>
> return dso__type_fd(fd);
> }
> +
> +int dsos__buildid_compare(struct list_head *head_s,
> struct list_head *head_t)
> +{
> + struct dso *dso_s;
> + struct dso *dso_t;
> + int ret;
> +
> +
> + for (dso_s = list_entry((head_s)->next, typeof(*dso_s), node),
> + dso_t = list_entry((head_t)->next, typeof(*dso_t), node);
> + ((&dso_s->node != (head_s)) && (&dso_t->node != (head_t)));
> + dso_s = list_entry(dso_s->node.next, typeof(*dso_s), node),
> + dso_t = list_entry(dso_t->node.next, typeof(*dso_t), node)) {
> +
> + ret = memcmp(dso_s->build_id, dso_t->build_id,
> sizeof(dso_s->build_id));
> + if (ret)
> + return ret;
> + }
> +
> + if ((&dso_s->node != (head_s)) || (&dso_t->node != (head_t)))
> + return -1;
> +
> + return 0;
> +}
>
> --- a/tools/perf/builtin-diff.c
> +++ b/tools/perf/builtin-diff.c
> @@ -677,6 +678,29 @@ static void data__free(struct data__file *d)
> }
> }
>
> +static void buildid_compare(void)
> +{
> + struct list_head *base_k_dso = &data__files[0].session
> ->machines.host.kernel_dsos.head;
> + struct list_head *base_u_dso = &data__files[0].session
> ->machines.host.user_dsos.head;
> + struct list_head *tmp_k_dso;
> + struct list_head *tmp_u_dso;
> + struct data__file *d;
> + int i;
> +
> + data__for_each_file_new(i, d) {
> + tmp_k_dso = &d->session->machines.host.kernel_dsos.head;
> + tmp_u_dso = &d->session->machines.host.user_dsos.head;
> +
> + if (dsos__buildid_compare(base_k_dso, tmp_k_dso))
> + pr_warning("The perf.data comes from different kernel. "
> + "The kernel symbol offset may point to different code.\n");
> +
> + if (dsos__buildid_compare(base_u_dso, tmp_u_dso))
> + pr_warning("The perf.data comes from different user binary. "
> + "The kernel symbol offset may point to different code.\n");
> + }
> +}
> +
> static int __cmd_diff(void)
> {
> struct data__file *d;
> @@ -699,6 +723,9 @@ static int __cmd_diff(void)
> perf_evlist__collapse_resort(d->session->evlist);
> }
>
> + if (sort__has_symoff)
> + buildid_compare();
> +
> data_process();
>
> out_delete:
>
>
>
> Thanks,
> Kan
>
> > - Arnaldo
> >
> > > Here is an example.
> > > perf diff -s symoff v1_1_8_1perf.data v1_1_8_2perf.data
> > >
> > > Event 'cycles'
> > >
> > > Baseline Delta Symbol + Offset
> > > ........ ....... ...............................
> > >
> > > 0.00% __update_cpu_load+0xcc
> > > _raw_spin_lock_irqsave+0x24
> > > _raw_spin_unlock_irqrestore+0xa
> > > 0.00% apic_timer_interrupt+0x0
> > > apic_timer_interrupt+0x68
> > > check_preempt_curr+0x1c
> > > 0.00% f1+0x20
> > > 0.03% +0.03% f2+0x11
> > > 0.03% +0.01% f2+0x16
> > > 0.05% f2+0x1b
> > > 0.04% -0.02% f2+0x20
> > > 0.03% f2+0x25
> > > 0.17% +0.11% f2+0x29
> > > 0.15% f2+0x2d
> > > f2+0x30
> > > 0.37% +0.02% f3+0x0
> > > 0.18% +0.03% f3+0x1
> > > 0.01% f3+0x4
> > > 0.18% +0.04% f3+0xb
> > > 12.31% +0.11% f3+0xd
> > > 0.03% f3+0x10
> > > 0.80% -0.12% f3+0x13
> > > 6.66% +0.09% f3+0x17
> > > 1.81% -0.36% f3+0x1b
> > > 6.35% +0.24% f3+0x1d
> > > 1.42% -0.22% f3+0x21
> > > 66.83% +0.22% f3+0x25
> > > 1.29% -0.12% f3+0x27
> > > 1.22% -0.04% f3+0x28
> > > 0.00% load_balance+0x96
> > > 0.00% native_apic_mem_write+0x0
> > > 0.00% native_write_msr_safe+0xa
> > > 0.00% native_write_msr_safe+0xd
> > > 0.00% rb_erase+0x381
> > > resched_curr+0x5
> > > restore_args+0x4
> > > 0.00% run_timer_softirq+0x29f
> > > select_task_rq_fair+0x9
> > > 0.00% select_task_rq_fair+0x1d9
> > > task_tick_fair+0x46
> > > 0.00% task_tick_fair+0x1ce
> > > task_waking_fair+0x27
> > > 0.00% try_to_wake_up+0x158
> > > update_cfs_rq_blocked_load+0x99
> > > 0.00% update_cpu_load_active+0x3b
> > > update_group_capacity+0x150
> > > update_wall_time+0x3c6
> > >
> > > Signed-off-by: Kan Liang <[email protected]>
> > > ---
> > > tools/perf/Documentation/perf-diff.txt | 8 +++-
> > > tools/perf/builtin-diff.c | 2 +-
> > > tools/perf/util/hist.c | 5 ++-
> > > tools/perf/util/hist.h | 1 +
> > > tools/perf/util/sort.c | 67
> > ++++++++++++++++++++++++++++++++++
> > > tools/perf/util/sort.h | 2 +
> > > 6 files changed, 80 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/tools/perf/Documentation/perf-diff.txt
> > > b/tools/perf/Documentation/perf-diff.txt
> > > index e463caa..9e13911 100644
> > > --- a/tools/perf/Documentation/perf-diff.txt
> > > +++ b/tools/perf/Documentation/perf-diff.txt
> > > @@ -50,8 +50,12 @@ OPTIONS
> > >
> > > -s::
> > > --sort=::
> > > - Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
> > > - Please see description of --sort in the perf-report man page.
> > > + Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline, symoff.
> > > +
> > > + - symoff: exact symbol + offset address executed at the time of
> > sample.
> > > + (for same binary compare)
> > > +
> > > + For other keys, please see description of --sort in the perf-report
> > man page.
> > >
> > > -t::
> > > --field-separator=::
> > > diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
> > > index 1ce425d..03a4001 100644
> > > --- a/tools/perf/builtin-diff.c
> > > +++ b/tools/perf/builtin-diff.c
> > > @@ -744,7 +744,7 @@ static const struct option options[] = {
> > > OPT_STRING('S', "symbols", &symbol_conf.sym_list_str,
> > "symbol[,symbol...]",
> > > "only consider these symbols"),
> > > OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
> > > - "sort by key(s): pid, comm, dso, symbol, parent, cpu,
> > srcline, ..."
> > > + "sort by key(s): pid, comm, dso, symbol, parent, cpu,
> > srcline, symoff, ..."
> > > " Please refer the man page for the complete list."),
> > > OPT_STRING('t', "field-separator", &symbol_conf.field_sep,
> > "separator",
> > > "separator for columns, no spaces will be added between
> > "
> > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index
> > > 6e88b9e..3d8a71b 100644
> > > --- a/tools/perf/util/hist.c
> > > +++ b/tools/perf/util/hist.c
> > > @@ -67,13 +67,14 @@ void hists__calc_col_len(struct hists *hists, struct
> > hist_entry *h)
> > > symlen = h->ms.sym->namelen + 4;
> > > if (verbose)
> > > symlen += BITS_PER_LONG / 4 + 2 + 3;
> > > - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> > > } else {
> > > symlen = unresolved_col_width + 4 + 2;
> > > - hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> > > hists__set_unres_dso_col_len(hists, HISTC_DSO);
> > > }
> > >
> > > + hists__new_col_len(hists, HISTC_SYMBOL, symlen);
> > > + hists__new_col_len(hists, HISTC_SYMOFF, symlen);
> > > +
> > > len = thread__comm_len(h->thread);
> > > if (hists__new_col_len(hists, HISTC_COMM, len))
> > > hists__set_col_len(hists, HISTC_THREAD, len + 6); diff --git
> > > a/tools/perf/util/hist.h b/tools/perf/util/hist.h index
> > > d0ef9a1..874b203 100644
> > > --- a/tools/perf/util/hist.h
> > > +++ b/tools/perf/util/hist.h
> > > @@ -24,6 +24,7 @@ enum hist_filter {
> > >
> > > enum hist_column {
> > > HISTC_SYMBOL,
> > > + HISTC_SYMOFF,
> > > HISTC_DSO,
> > > HISTC_THREAD,
> > > HISTC_COMM,
> > > diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index
> > > bea2e07..49ad000 100644
> > > --- a/tools/perf/util/sort.c
> > > +++ b/tools/perf/util/sort.c
> > > @@ -280,6 +280,65 @@ struct sort_entry sort_sym = {
> > > .se_width_idx = HISTC_SYMBOL,
> > > };
> > >
> > > +static int64_t
> > > +sort__symoff_cmp(struct hist_entry *left, struct hist_entry *right) {
> > > + return _sort__addr_cmp(left->ip, right->ip); }
> > > +
> > > +static int64_t
> > > +sort__symoff_collapse(struct hist_entry *left, struct hist_entry
> > > +*right) {
> > > + struct symbol *sym_l = left->ms.sym;
> > > + struct symbol *sym_r = right->ms.sym;
> > > + u64 symoff_l, symoff_r;
> > > + int64_t ret;
> > > +
> > > + if (!sym_l || !sym_r)
> > > + return cmp_null(sym_l, sym_r);
> > > +
> > > + ret = strcmp(sym_r->name, sym_l->name);
> > > + if (ret)
> > > + return ret;
> > > +
> > > +
> > > + symoff_l = left->ip - sym_l->start;
> > > + symoff_r = right->ip - sym_r->start;
> > > +
> > > + return (int64_t)(symoff_r - symoff_l); }
> > > +
> > > +static int hist_entry__symoff_snprintf(struct hist_entry *he, char *bf,
> > > + size_t size, unsigned int width) {
> > > + struct map *map = he->ms.map;
> > > + struct symbol *sym = he->ms.sym;
> > > + size_t ret = 0;
> > > +
> > > + if (sym) {
> > > + ret += repsep_snprintf(bf + ret, size - ret, "%s", sym-
> > >name);
> > > + ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
> > > + he->ip - sym->start);
> > > +
> > > + } else {
> > > + size_t len = BITS_PER_LONG / 4;
> > > +
> > > + ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len,
> > > + map ? map->unmap_ip(map, he->ip) :
> > he->ip);
> > > + }
> > > +
> > > + ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
> > > + width - ret, "");
> > > + return ret;
> > > +}
> > > +
> > > +struct sort_entry sort_symoff = {
> > > + .se_header = "Symbol + Offset",
> > > + .se_cmp = sort__symoff_cmp,
> > > + .se_snprintf = hist_entry__symoff_snprintf,
> > > + .se_width_idx = HISTC_SYMOFF,
> > > +};
> > > +
> > > /* --sort srcline */
> > >
> > > static int64_t
> > > @@ -1172,6 +1231,7 @@ static struct sort_dimension
> > common_sort_dimensions[] = {
> > > DIM(SORT_COMM, "comm", sort_comm),
> > > DIM(SORT_DSO, "dso", sort_dso),
> > > DIM(SORT_SYM, "symbol", sort_sym),
> > > + DIM(SORT_SYMOFF, "symoff", sort_symoff),
> > > DIM(SORT_PARENT, "parent", sort_parent),
> > > DIM(SORT_CPU, "cpu", sort_cpu),
> > > DIM(SORT_SRCLINE, "srcline", sort_srcline), @@ -1443,6 +1503,13
> > @@
> > > int sort_dimension__add(const char *tok)
> > >
> > > } else if (sd->entry == &sort_dso) {
> > > sort__has_dso = 1;
> > > + } else if (sd->entry == &sort_symoff) {
> > > + /*
> > > + * For symoff sort key, not only the offset but also
> > the
> > > + * symbol name should be compared.
> > > + */
> > > + if (sort__mode == SORT_MODE__DIFF)
> > > + sd->entry->se_collapse =
> > sort__symoff_collapse;
> > > }
> > >
> > > return __sort_dimension__add(sd);
> > > diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index
> > > c03e4ff..ea0122f 100644
> > > --- a/tools/perf/util/sort.h
> > > +++ b/tools/perf/util/sort.h
> > > @@ -38,6 +38,7 @@ extern enum sort_mode sort__mode; extern struct
> > > sort_entry sort_comm; extern struct sort_entry sort_dso; extern
> > > struct sort_entry sort_sym;
> > > +extern struct sort_entry sort_symoff;
> > > extern struct sort_entry sort_parent; extern struct sort_entry
> > > sort_dso_from; extern struct sort_entry sort_dso_to; @@ -161,6 +162,7
> > > @@ enum sort_type {
> > > SORT_COMM,
> > > SORT_DSO,
> > > SORT_SYM,
> > > + SORT_SYMOFF,
> > > SORT_PARENT,
> > > SORT_CPU,
> > > SORT_SRCLINE,
> > > --
> > > 1.8.3.2