2014-10-31 19:52:34

by Liang, Kan

[permalink] [raw]
Subject: [PATCH 1/1] perf tools: perf diff for different binaries

From: Kan Liang <[email protected]>

perf diff can display the differential profile between two perf.data
files. However, the perf.data files have to come from same binaries.

The patch introduced a key "symbol_name" for --sort option, so the user
can compare the perf.data files from different binaries, even different
kernel version. If symbol_name is set, perf diff display the
differential profile of functions.
It could help the user quickly locate the scaling regression caused by
new binaries/kernels.

Here is an example. I have two version of binaries. One is running on
kernel 3.16. The other is running on kernel 3.17.

Using perf record, I got two perf.data files, v1_1_6perf.data (version
one in 3.16) and v2_1_7perf.data (version two in 3.17).

If I run old perf diff with --sort dso,symbol, we cannot get any useful
information for the user space binary. Furthermore, it failed to compare
the kernel function. E.g. native_write_msr_safe.

perf diff -s dso,symbol --compute ratio v1_1_6perf.data v2_1_7perf.data

Event 'cycles'

Baseline Ratio Shared Object Symbol
........ .............. .................
....................................

[kernel.kallsyms] [k]
__update_entity_load_avg_contrib
[kernel.kallsyms] [k] _raw_spin_lock
[kernel.kallsyms] [k] apic_timer_interrupt
[kernel.kallsyms] [k] hrtimer_interrupt
0.01% [kernel.kallsyms] [k] native_write_msr_safe
[kernel.kallsyms] [k] native_write_msr_safe
0.01% [kernel.kallsyms] [k] notifier_call_chain
0.01% [kernel.kallsyms] [k] perf_event_task_tick
0.01% [kernel.kallsyms] [k] run_posix_cpu_timers
0.01% [kernel.kallsyms] [k] run_timer_softirq
0.01% [kernel.kallsyms] [k] trigger_load_balance
0.01% [kernel.kallsyms] [k] update_vsyscall
0.05% [unknown] [.] 0x0000000000400540
0.04% [unknown] [.] 0x0000000000400541
0.03% [unknown] [.] 0x000000000040054b
0.04% [unknown] [.] 0x0000000000400552
33.55% [unknown] [.] 0x0000000000400554
1.22% [unknown] [.] 0x000000000040055a
8.00% [unknown] [.] 0x000000000040055e
0.02% [unknown] [.] 0x0000000000400562
8.41% [unknown] [.] 0x0000000000400564
48.13% [unknown] [.] 0x000000000040056b
0.16% [unknown] [.] 0x0000000000400570
0.17% [unknown] [.] 0x0000000000400571
[unknown] [.] 0x0000000000400580
[unknown] [.] 0x0000000000400581
0.01% [unknown] [.] 0x0000000000400583
0.01% [unknown] [.] 0x0000000000400588
[unknown] [.] 0x000000000040058b
0.01% 1240.990221 [unknown] [.] 0x000000000040058d
[unknown] [.] 0x0000000000400590
0.06% [unknown] [.] 0x0000000000400591
[unknown] [.] 0x0000000000400593
0.04% [unknown] [.] 0x0000000000400595
0.01% 1240.603148 [unknown] [.] 0x0000000000400597
[unknown] [.] 0x000000000040059b
[unknown] [.] 0x000000000040059d
[unknown] [.] 0x00000000004005a1
[unknown] [.] 0x00000000004005a5
[unknown] [.] 0x00000000004005a7
[unknown] [.] 0x00000000004005a8
[unknown] [.] 0x00000000004005aa
[unknown] [.] 0x00000000004005ba
[unknown] [.] 0x00000000004005bf
[unknown] [.] 0x00000000004005c4
[unknown] [.] 0x00000000004005c9
[unknown] [.] 0x00000000004005ce
[unknown] [.] 0x00000000004005d2
[unknown] [.] 0x00000000004005d6
[unknown] [.] 0x00000000004005d8
[unknown] [.] 0x00000000004005f5

With the key "symbol_name", we can easily get the differential profile
from different binary versions and different kernel versions.

./perf diff -s dso,symbol_name --compute ratio v1_1_6perf.data v2_1_7perf.data

Event 'cycles'

Baseline Ratio Shared Object Symbol Name
........ .............. .................
....................................

[kernel.kallsyms] [k]
__update_entity_load_avg_contrib
[kernel.kallsyms] [k] _raw_spin_lock
[kernel.kallsyms] [k] apic_timer_interrupt
[kernel.kallsyms] [k] hrtimer_interrupt
0.01% 1.998522 [kernel.kallsyms] [k] native_write_msr_safe
0.01% [kernel.kallsyms] [k] notifier_call_chain
0.01% [kernel.kallsyms] [k] perf_event_task_tick
0.01% [kernel.kallsyms] [k] run_posix_cpu_timers
0.01% [kernel.kallsyms] [k] run_timer_softirq
0.01% [kernel.kallsyms] [k] trigger_load_balance
0.01% [kernel.kallsyms] [k] update_vsyscall
tchain_edit [.] f1
0.14% 3.913444 tchain_edit [.] f2
99.82% 1.005478 tchain_edit [.] f3

Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/builtin-diff.c | 5 ++++-
tools/perf/util/sort.c | 26 ++++++++++++++++++++++++++
tools/perf/util/sort.h | 3 +++
3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 25114c9..71183c1 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -743,7 +743,7 @@ static const struct option options[] = {
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
- "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+ "sort by key(s): pid, comm, dso, symbol, symbol_name, parent, cpu, srcline, ..."
" Please refer the man page for the complete list."),
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
@@ -1164,6 +1164,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
if (setup_sorting() < 0)
usage_with_options(diff_usage, options);

+ if (sort__has_sym_name)
+ tool.mmap2 = perf_event__process_mmap2;
+
setup_pager();

sort__setup_elide(NULL);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9402885..1226e37 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -20,6 +20,7 @@ int have_ignore_callees = 0;
int sort__need_collapse = 0;
int sort__has_parent = 0;
int sort__has_sym = 0;
+int sort__has_sym_name = 0;
int sort__has_dso = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL;

@@ -280,6 +281,27 @@ struct sort_entry sort_sym = {
.se_width_idx = HISTC_SYMBOL,
};

+static int64_t
+sort__sym_name_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ const char *sym_name_l, *sym_name_r;
+
+ if (!left->ms.sym || !right->ms.sym)
+ return cmp_null(right->ms.sym, left->ms.sym);
+
+ sym_name_l = left->ms.sym->name;
+ sym_name_r = right->ms.sym->name;
+
+ return strcmp(sym_name_l, sym_name_r);
+}
+
+struct sort_entry sort_sym_name = {
+ .se_header = "Symbol Name",
+ .se_cmp = sort__sym_name_cmp,
+ .se_sort = sort__sym_sort,
+ .se_snprintf = hist_entry__sym_snprintf,
+ .se_width_idx = HISTC_SYMBOL,
+};
/* --sort srcline */

static int64_t
@@ -1170,6 +1192,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_COMM, "comm", sort_comm),
DIM(SORT_DSO, "dso", sort_dso),
DIM(SORT_SYM, "symbol", sort_sym),
+ DIM(SORT_SYM_NAME, "symbol_name", sort_sym_name),
DIM(SORT_PARENT, "parent", sort_parent),
DIM(SORT_CPU, "cpu", sort_cpu),
DIM(SORT_SRCLINE, "srcline", sort_srcline),
@@ -1430,6 +1453,8 @@ int sort_dimension__add(const char *tok)
sort__has_parent = 1;
} else if (sd->entry == &sort_sym) {
sort__has_sym = 1;
+ } else if (sd->entry == &sort_sym_name) {
+ sort__has_sym_name = 1;
} else if (sd->entry == &sort_dso) {
sort__has_dso = 1;
}
@@ -1809,6 +1834,7 @@ void reset_output_field(void)
sort__need_collapse = 0;
sort__has_parent = 0;
sort__has_sym = 0;
+ sort__has_sym_name = 0;
sort__has_dso = 0;

field_order = NULL;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index c03e4ff..9b43c6c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -34,10 +34,12 @@ extern int have_ignore_callees;
extern int sort__need_collapse;
extern int sort__has_parent;
extern int sort__has_sym;
+extern int sort__has_sym_name;
extern enum sort_mode sort__mode;
extern struct sort_entry sort_comm;
extern struct sort_entry sort_dso;
extern struct sort_entry sort_sym;
+extern struct sort_entry sort_sym_name;
extern struct sort_entry sort_parent;
extern struct sort_entry sort_dso_from;
extern struct sort_entry sort_dso_to;
@@ -161,6 +163,7 @@ enum sort_type {
SORT_COMM,
SORT_DSO,
SORT_SYM,
+ SORT_SYM_NAME,
SORT_PARENT,
SORT_CPU,
SORT_SRCLINE,
--
1.8.3.1


2014-10-31 20:03:11

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 1/1] perf tools: perf diff for different binaries

>
> If I run old perf diff with --sort dso,symbol, we cannot get any useful
> information for the user space binary. Furthermore, it failed to compare
> the kernel function. E.g. native_write_msr_safe.
>
> perf diff -s dso,symbol --compute ratio v1_1_6perf.data v2_1_7perf.data

The new option needs to be documented in the manpage

(with a paragraph explaining that it allows comparison
between different versions of the binary)

-Andi

2014-11-03 10:40:36

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH 1/1] perf tools: perf diff for different binaries

On Fri, Oct 31, 2014 at 05:06:12AM -0700, [email protected] wrote:

SNIP

> 99.82% 1.005478 tchain_edit [.] f3
>
> Signed-off-by: Kan Liang <[email protected]>
> ---
> tools/perf/builtin-diff.c | 5 ++++-
> tools/perf/util/sort.c | 26 ++++++++++++++++++++++++++
> tools/perf/util/sort.h | 3 +++
> 3 files changed, 33 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
> index 25114c9..71183c1 100644
> --- a/tools/perf/builtin-diff.c
> +++ b/tools/perf/builtin-diff.c
> @@ -743,7 +743,7 @@ static const struct option options[] = {
> OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
> "only consider these symbols"),
> OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
> - "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
> + "sort by key(s): pid, comm, dso, symbol, symbol_name, parent, cpu, srcline, ..."
> " Please refer the man page for the complete list."),
> OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
> "separator for columns, no spaces will be added between "
> @@ -1164,6 +1164,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
> if (setup_sorting() < 0)
> usage_with_options(diff_usage, options);
>
> + if (sort__has_sym_name)
> + tool.mmap2 = perf_event__process_mmap2;

why is the mmap2 callback set only for sort__has_sym_name?
Shouldn't we use/define it directly in the tool's definition?

jirka

2014-11-03 14:54:28

by Liang, Kan

[permalink] [raw]
Subject: RE: [PATCH 1/1] perf tools: perf diff for different binaries



> > @@ -1164,6 +1164,9 @@ int cmd_diff(int argc, const char **argv, const
> char *prefix __maybe_unused)
> > if (setup_sorting() < 0)
> > usage_with_options(diff_usage, options);
> >
> > + if (sort__has_sym_name)
> > + tool.mmap2 = perf_event__process_mmap2;
>
> why is the mmap2 callback set only for sort__has_sym_name?
> Shouldn't we use/define it directly in the tool's definition?
>

If we define it in tools definition, perf diff will display the differential profile
between functions (Not previous userspace IPs).
For same binary, IPs are same. So I think it's better to compare between IPs.
For different binaries, IPs change. But most of the function names are same for
most binary update case. So we'd better compare between functions.
I want to keep both IP and function compare.
So only new sort key "symble_name" set mmap2 which implies function compare.
If we only need to compare between IPs, we don't need to set mmap2.

Thanks,
Kan

> jirka

2014-11-04 05:41:43

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH 1/1] perf tools: perf diff for different binaries

Hi kan,

On Fri, 31 Oct 2014 05:06:12 -0700, kan liang wrote:
> From: Kan Liang <[email protected]>
>
> perf diff can display the differential profile between two perf.data
> files. However, the perf.data files have to come from same binaries.
>
> The patch introduced a key "symbol_name" for --sort option, so the user
> can compare the perf.data files from different binaries, even different
> kernel version. If symbol_name is set, perf diff display the
> differential profile of functions.
> It could help the user quickly locate the scaling regression caused by
> new binaries/kernels.
>
> Here is an example. I have two version of binaries. One is running on
> kernel 3.16. The other is running on kernel 3.17.
>
> Using perf record, I got two perf.data files, v1_1_6perf.data (version
> one in 3.16) and v2_1_7perf.data (version two in 3.17).
>
> If I run old perf diff with --sort dso,symbol, we cannot get any useful
> information for the user space binary. Furthermore, it failed to compare
> the kernel function. E.g. native_write_msr_safe.
>
> perf diff -s dso,symbol --compute ratio v1_1_6perf.data v2_1_7perf.data
>
> Event 'cycles'
>
> Baseline Ratio Shared Object Symbol
> ........ .............. .................
> ....................................
>
> [kernel.kallsyms] [k]
> __update_entity_load_avg_contrib
> [kernel.kallsyms] [k] _raw_spin_lock
> [kernel.kallsyms] [k] apic_timer_interrupt
> [kernel.kallsyms] [k] hrtimer_interrupt
> 0.01% [kernel.kallsyms] [k] native_write_msr_safe
> [kernel.kallsyms] [k] native_write_msr_safe
> 0.01% [kernel.kallsyms] [k] notifier_call_chain
> 0.01% [kernel.kallsyms] [k] perf_event_task_tick
> 0.01% [kernel.kallsyms] [k] run_posix_cpu_timers
> 0.01% [kernel.kallsyms] [k] run_timer_softirq
> 0.01% [kernel.kallsyms] [k] trigger_load_balance
> 0.01% [kernel.kallsyms] [k] update_vsyscall
> 0.05% [unknown] [.] 0x0000000000400540
> 0.04% [unknown] [.] 0x0000000000400541
> 0.03% [unknown] [.] 0x000000000040054b
> 0.04% [unknown] [.] 0x0000000000400552
> 33.55% [unknown] [.] 0x0000000000400554
> 1.22% [unknown] [.] 0x000000000040055a
> 8.00% [unknown] [.] 0x000000000040055e
> 0.02% [unknown] [.] 0x0000000000400562
> 8.41% [unknown] [.] 0x0000000000400564
> 48.13% [unknown] [.] 0x000000000040056b
> 0.16% [unknown] [.] 0x0000000000400570
> 0.17% [unknown] [.] 0x0000000000400571
> [unknown] [.] 0x0000000000400580
> [unknown] [.] 0x0000000000400581
> 0.01% [unknown] [.] 0x0000000000400583
> 0.01% [unknown] [.] 0x0000000000400588
> [unknown] [.] 0x000000000040058b
> 0.01% 1240.990221 [unknown] [.] 0x000000000040058d
> [unknown] [.] 0x0000000000400590
> 0.06% [unknown] [.] 0x0000000000400591
> [unknown] [.] 0x0000000000400593
> 0.04% [unknown] [.] 0x0000000000400595
> 0.01% 1240.603148 [unknown] [.] 0x0000000000400597
> [unknown] [.] 0x000000000040059b
> [unknown] [.] 0x000000000040059d
> [unknown] [.] 0x00000000004005a1
> [unknown] [.] 0x00000000004005a5
> [unknown] [.] 0x00000000004005a7
> [unknown] [.] 0x00000000004005a8
> [unknown] [.] 0x00000000004005aa
> [unknown] [.] 0x00000000004005ba
> [unknown] [.] 0x00000000004005bf
> [unknown] [.] 0x00000000004005c4
> [unknown] [.] 0x00000000004005c9
> [unknown] [.] 0x00000000004005ce
> [unknown] [.] 0x00000000004005d2
> [unknown] [.] 0x00000000004005d6
> [unknown] [.] 0x00000000004005d8
> [unknown] [.] 0x00000000004005f5
>
> With the key "symbol_name", we can easily get the differential profile
> from different binary versions and different kernel versions.
>
> ./perf diff -s dso,symbol_name --compute ratio v1_1_6perf.data v2_1_7perf.data
>
> Event 'cycles'
>
> Baseline Ratio Shared Object Symbol Name
> ........ .............. .................
> ....................................
>
> [kernel.kallsyms] [k]
> __update_entity_load_avg_contrib
> [kernel.kallsyms] [k] _raw_spin_lock
> [kernel.kallsyms] [k] apic_timer_interrupt
> [kernel.kallsyms] [k] hrtimer_interrupt
> 0.01% 1.998522 [kernel.kallsyms] [k] native_write_msr_safe
> 0.01% [kernel.kallsyms] [k] notifier_call_chain
> 0.01% [kernel.kallsyms] [k] perf_event_task_tick
> 0.01% [kernel.kallsyms] [k] run_posix_cpu_timers
> 0.01% [kernel.kallsyms] [k] run_timer_softirq
> 0.01% [kernel.kallsyms] [k] trigger_load_balance
> 0.01% [kernel.kallsyms] [k] update_vsyscall
> tchain_edit [.] f1
> 0.14% 3.913444 tchain_edit [.] f2
> 99.82% 1.005478 tchain_edit [.] f3

Hmm.. I think it should be a default behavior for perf diff, otherwise
-s symbol is almost meaningless IMHO. What about setting the
sort_sym.se_collapse in data_process() so that hists__match() can use
symbol names?

Thanks,
Namhyung

2014-11-04 17:08:45

by Liang, Kan

[permalink] [raw]
Subject: RE: [PATCH 1/1] perf tools: perf diff for different binaries

Hi Namhyung,

> > tchain_edit [.] f1
> > 0.14% 3.913444 tchain_edit [.] f2
> > 99.82% 1.005478 tchain_edit [.] f3
>
> Hmm.. I think it should be a default behavior for perf diff, otherwise -s
> symbol is almost meaningless IMHO.

I think we need both instruction level and function level diff.
For debugging scaling issue, I think we need to do deeper analysis for some
cache or lock issue. The function level is too high granularity.

The new option can be used to debug scaling regression issue.
If the binary/kernel is updated, it doesn't make sense to compare the
symbol address, since it should be changed. So comparing the function
should be more useful.


> What about setting the
> sort_sym.se_collapse in data_process() so that hists__match() can use
> symbol names?

Yes, we can set it if we only do function level diff. But I'd like to keep
both. So I defined two sort keys.
"symbol" means "symbol address executed at the time of sample "
"symbol_name" means "name of function executed at the time of sample"

Thanks,
Kan
>
> Thanks,
> Namhyung

2014-11-05 06:32:37

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH 1/1] perf tools: perf diff for different binaries

Hi Kan,

On Tue, 4 Nov 2014 17:07:43 +0000, Kan Liang wrote:
> Hi Namhyung,
>
>> > tchain_edit [.] f1
>> > 0.14% 3.913444 tchain_edit [.] f2
>> > 99.82% 1.005478 tchain_edit [.] f3
>>
>> Hmm.. I think it should be a default behavior for perf diff, otherwise -s
>> symbol is almost meaningless IMHO.
>
> I think we need both instruction level and function level diff.
> For debugging scaling issue, I think we need to do deeper analysis for some
> cache or lock issue. The function level is too high granularity.
>
> The new option can be used to debug scaling regression issue.
> If the binary/kernel is updated, it doesn't make sense to compare the
> symbol address, since it should be changed. So comparing the function
> should be more useful.
>
>
>> What about setting the
>> sort_sym.se_collapse in data_process() so that hists__match() can use
>> symbol names?
>
> Yes, we can set it if we only do function level diff. But I'd like to keep
> both. So I defined two sort keys.
> "symbol" means "symbol address executed at the time of sample "
> "symbol_name" means "name of function executed at the time of sample"

Hmm.. I don't think the symbol sort key provides the instruction level
diff that you want. If it finds a symbol it just use the start address
of the symbol, not the exact address of the sample. Am I missing
something?

Thanks,
Namhyung

2014-11-05 17:31:39

by Liang, Kan

[permalink] [raw]
Subject: RE: [PATCH 1/1] perf tools: perf diff for different binaries


> Hi Kan,
>
> On Tue, 4 Nov 2014 17:07:43 +0000, Kan Liang wrote:
> > Hi Namhyung,
> >
> >> > tchain_edit [.] f1
> >> > 0.14% 3.913444 tchain_edit [.] f2
> >> > 99.82% 1.005478 tchain_edit [.] f3
> >>
> >> Hmm.. I think it should be a default behavior for perf diff,
> >> otherwise -s symbol is almost meaningless IMHO.
> >
> > I think we need both instruction level and function level diff.
> > For debugging scaling issue, I think we need to do deeper analysis for
> > some cache or lock issue. The function level is too high granularity.
> >
> > The new option can be used to debug scaling regression issue.
> > If the binary/kernel is updated, it doesn't make sense to compare the
> > symbol address, since it should be changed. So comparing the function
> > should be more useful.
> >
> >
> >> What about setting the
> >> sort_sym.se_collapse in data_process() so that hists__match() can use
> >> symbol names?
> >
> > Yes, we can set it if we only do function level diff. But I'd like to
> > keep both. So I defined two sort keys.
> > "symbol" means "symbol address executed at the time of sample "
> > "symbol_name" means "name of function executed at the time of
> sample"
>
> Hmm.. I don't think the symbol sort key provides the instruction level diff
> that you want. If it finds a symbol it just use the start address of the
> symbol, not the exact address of the sample. Am I missing something?
>

No, the meaning of symbol in perf diff is different as in perf report. It uses
the exact address of the sample.

Here is current perf diff result and objdump fragment of the first binary.
You can see the exact address of the sample in function f3 was used, not the
start address.
33.55% [unknown] [.] 0x0000000000400554
48.13% [unknown] [.] 0x000000000040056b

[perf-test]#./perf diff -s dso,symbol --compute ratio ../../../v1_1_6perf.data
../../../v2_1_7perf.data
# Event 'cycles'
#
# Baseline Ratio Shared Object Symbol
# ........ .............. ................. .........................
#
[kernel.kallsyms] [k] __update_cpu_load
[kernel.kallsyms] [k] mod_timer
0.01% [kernel.kallsyms] [k] native_write_msr_safe
0.01% [kernel.kallsyms] [k] notifier_call_chain
0.01% [kernel.kallsyms] [k] perf_event_task_tick
[kernel.kallsyms] [k] rt_mutex_slowlock
0.01% [kernel.kallsyms] [k] run_posix_cpu_timers
0.01% [kernel.kallsyms] [k] run_timer_softirq
0.01% [kernel.kallsyms] [k] trigger_load_balance
0.01% [kernel.kallsyms] [k] update_vsyscall
[kernel.kallsyms] [k] wait_for_common
[kernel.kallsyms] [k] 0x000000000028482a
0.05% [unknown] [.] 0x0000000000400540
0.04% [unknown] [.] 0x0000000000400541
0.03% [unknown] [.] 0x000000000040054b
0.04% [unknown] [.] 0x0000000000400552
33.55% [unknown] [.] 0x0000000000400554
1.22% [unknown] [.] 0x000000000040055a
8.00% [unknown] [.] 0x000000000040055e
0.02% [unknown] [.] 0x0000000000400562
8.41% [unknown] [.] 0x0000000000400564
48.13% [unknown] [.] 0x000000000040056b
0.16% [unknown] [.] 0x0000000000400570
0.17% [unknown] [.] 0x0000000000400571
[unknown] [.] 0x0000000000400580
[unknown] [.] 0x0000000000400581
0.01% [unknown] [.] 0x0000000000400583
0.01% [unknown] [.] 0x0000000000400588
[unknown] [.] 0x000000000040058b
0.01% 1240.990221 [unknown] [.] 0x000000000040058d
[unknown] [.] 0x0000000000400590
0.06% [unknown] [.] 0x0000000000400591
[unknown] [.] 0x0000000000400593
0.04% [unknown] [.] 0x0000000000400595
0.01% 1240.603148 [unknown] [.] 0x0000000000400597
[unknown] [.] 0x000000000040059b
[unknown] [.] 0x000000000040059d
[unknown] [.] 0x00000000004005a1
[unknown] [.] 0x00000000004005a5
[unknown] [.] 0x00000000004005a7
[unknown] [.] 0x00000000004005a8
[unknown] [.] 0x00000000004005aa
[unknown] [.] 0x00000000004005ba
[unknown] [.] 0x00000000004005bf
[unknown] [.] 0x00000000004005c4
[unknown] [.] 0x00000000004005c9
[unknown] [.] 0x00000000004005ce
[unknown] [.] 0x00000000004005d2
[unknown] [.] 0x00000000004005d6
[unknown] [.] 0x00000000004005d8
[unknown] [.] 0x00000000004005f5


[perf-test]#objdump -d tchain_edit_1

0000000000400540 <f3>:
400540: 55 push %rbp
400541: 48 89 e5 mov %rsp,%rbp
400544: c7 45 f8 e8 03 00 00 movl $0x3e8,-0x8(%rbp)
40054b: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%rbp)
400552: eb 14 jmp 400568 <f3+0x28>
400554: 8b 45 fc mov -0x4(%rbp),%eax
400557: 83 e0 01 and $0x1,%eax
40055a: 85 c0 test %eax,%eax
40055c: 74 06 je 400564 <f3+0x24>
40055e: 83 45 fc 01 addl $0x1,-0x4(%rbp)
400562: eb 04 jmp 400568 <f3+0x28>
400564: 83 45 fc 01 addl $0x1,-0x4(%rbp)
400568: 8b 45 fc mov -0x4(%rbp),%eax
40056b: 3b 45 f8 cmp -0x8(%rbp),%eax
40056e: 7c e4 jl 400554 <f3+0x14>
400570: 5d pop %rbp
400571: c3 retq


Thanks,
Kan

> Thanks,
> Namhyung

2014-11-06 07:32:58

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH 1/1] perf tools: perf diff for different binaries

Hi Kan,

On Thu, Nov 6, 2014 at 2:28 AM, Liang, Kan <[email protected]> wrote:
>
>> Hi Kan,
>>
>> On Tue, 4 Nov 2014 17:07:43 +0000, Kan Liang wrote:
>> >> What about setting the
>> >> sort_sym.se_collapse in data_process() so that hists__match() can use
>> >> symbol names?
>> >
>> > Yes, we can set it if we only do function level diff. But I'd like to
>> > keep both. So I defined two sort keys.
>> > "symbol" means "symbol address executed at the time of sample "
>> > "symbol_name" means "name of function executed at the time of
>> sample"
>>
>> Hmm.. I don't think the symbol sort key provides the instruction level diff
>> that you want. If it finds a symbol it just use the start address of the
>> symbol, not the exact address of the sample. Am I missing something?
>>
>
> No, the meaning of symbol in perf diff is different as in perf report. It uses
> the exact address of the sample.

Hmm.. are you sure? I think it uses same code..


>
> Here is current perf diff result and objdump fragment of the first binary.
> You can see the exact address of the sample in function f3 was used, not the
> start address.
> 33.55% [unknown] [.] 0x0000000000400554
> 48.13% [unknown] [.] 0x000000000040056b

But I think it's because it failed to find a symbol (f3) for some
reason. The symbol sort key will use the ip address only if it didn't
find a symbol for the address. Please see sort__sym_cmp (and
_sort__sym_cmp too).

Thanks,
Namhyung


>
> [perf-test]#./perf diff -s dso,symbol --compute ratio ../../../v1_1_6perf.data
> ../../../v2_1_7perf.data
> # Event 'cycles'
> #
> # Baseline Ratio Shared Object Symbol
> # ........ .............. ................. .........................
> #
> [kernel.kallsyms] [k] __update_cpu_load
> [kernel.kallsyms] [k] mod_timer
> 0.01% [kernel.kallsyms] [k] native_write_msr_safe
> 0.01% [kernel.kallsyms] [k] notifier_call_chain
> 0.01% [kernel.kallsyms] [k] perf_event_task_tick
> [kernel.kallsyms] [k] rt_mutex_slowlock
> 0.01% [kernel.kallsyms] [k] run_posix_cpu_timers
> 0.01% [kernel.kallsyms] [k] run_timer_softirq
> 0.01% [kernel.kallsyms] [k] trigger_load_balance
> 0.01% [kernel.kallsyms] [k] update_vsyscall
> [kernel.kallsyms] [k] wait_for_common
> [kernel.kallsyms] [k] 0x000000000028482a
> 0.05% [unknown] [.] 0x0000000000400540
> 0.04% [unknown] [.] 0x0000000000400541
> 0.03% [unknown] [.] 0x000000000040054b
> 0.04% [unknown] [.] 0x0000000000400552
> 33.55% [unknown] [.] 0x0000000000400554
> 1.22% [unknown] [.] 0x000000000040055a
> 8.00% [unknown] [.] 0x000000000040055e
> 0.02% [unknown] [.] 0x0000000000400562
> 8.41% [unknown] [.] 0x0000000000400564
> 48.13% [unknown] [.] 0x000000000040056b
> 0.16% [unknown] [.] 0x0000000000400570
> 0.17% [unknown] [.] 0x0000000000400571
> [unknown] [.] 0x0000000000400580
> [unknown] [.] 0x0000000000400581
> 0.01% [unknown] [.] 0x0000000000400583
> 0.01% [unknown] [.] 0x0000000000400588
> [unknown] [.] 0x000000000040058b
> 0.01% 1240.990221 [unknown] [.] 0x000000000040058d
> [unknown] [.] 0x0000000000400590
> 0.06% [unknown] [.] 0x0000000000400591
> [unknown] [.] 0x0000000000400593
> 0.04% [unknown] [.] 0x0000000000400595
> 0.01% 1240.603148 [unknown] [.] 0x0000000000400597
> [unknown] [.] 0x000000000040059b
> [unknown] [.] 0x000000000040059d
> [unknown] [.] 0x00000000004005a1
> [unknown] [.] 0x00000000004005a5
> [unknown] [.] 0x00000000004005a7
> [unknown] [.] 0x00000000004005a8
> [unknown] [.] 0x00000000004005aa
> [unknown] [.] 0x00000000004005ba
> [unknown] [.] 0x00000000004005bf
> [unknown] [.] 0x00000000004005c4
> [unknown] [.] 0x00000000004005c9
> [unknown] [.] 0x00000000004005ce
> [unknown] [.] 0x00000000004005d2
> [unknown] [.] 0x00000000004005d6
> [unknown] [.] 0x00000000004005d8
> [unknown] [.] 0x00000000004005f5
>
>
> [perf-test]#objdump -d tchain_edit_1
>
> 0000000000400540 <f3>:
> 400540: 55 push %rbp
> 400541: 48 89 e5 mov %rsp,%rbp
> 400544: c7 45 f8 e8 03 00 00 movl $0x3e8,-0x8(%rbp)
> 40054b: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%rbp)
> 400552: eb 14 jmp 400568 <f3+0x28>
> 400554: 8b 45 fc mov -0x4(%rbp),%eax
> 400557: 83 e0 01 and $0x1,%eax
> 40055a: 85 c0 test %eax,%eax
> 40055c: 74 06 je 400564 <f3+0x24>
> 40055e: 83 45 fc 01 addl $0x1,-0x4(%rbp)
> 400562: eb 04 jmp 400568 <f3+0x28>
> 400564: 83 45 fc 01 addl $0x1,-0x4(%rbp)
> 400568: 8b 45 fc mov -0x4(%rbp),%eax
> 40056b: 3b 45 f8 cmp -0x8(%rbp),%eax
> 40056e: 7c e4 jl 400554 <f3+0x14>
> 400570: 5d pop %rbp
> 400571: c3 retq
>
>
> Thanks,
> Kan
>
>> Thanks,
>> Namhyung



--
Thanks,
Namhyung

2014-11-06 14:20:10

by Liang, Kan

[permalink] [raw]
Subject: RE: [PATCH 1/1] perf tools: perf diff for different binaries

> Hi Kan,
>
> On Thu, Nov 6, 2014 at 2:28 AM, Liang, Kan <[email protected]> wrote:
> >
> >> Hi Kan,
> >>
> >> On Tue, 4 Nov 2014 17:07:43 +0000, Kan Liang wrote:
> >> >> What about setting the
> >> >> sort_sym.se_collapse in data_process() so that hists__match() can
> >> >> use symbol names?
> >> >
> >> > Yes, we can set it if we only do function level diff. But I'd like
> >> > to keep both. So I defined two sort keys.
> >> > "symbol" means "symbol address executed at the time of sample "
> >> > "symbol_name" means "name of function executed at the time of
> >> sample"
> >>
> >> Hmm.. I don't think the symbol sort key provides the instruction
> >> level diff that you want. If it finds a symbol it just use the start
> >> address of the symbol, not the exact address of the sample. Am I
> missing something?
> >>
> >
> > No, the meaning of symbol in perf diff is different as in perf report.
> > It uses the exact address of the sample.
>
> Hmm.. are you sure? I think it uses same code..
>
Yes, they uses the same code except event_op mmap2.
>
> >
> > Here is current perf diff result and objdump fragment of the first binary.
> > You can see the exact address of the sample in function f3 was used,
> > not the start address.
> > 33.55% [unknown] [.] 0x0000000000400554
> > 48.13% [unknown] [.] 0x000000000040056b
>
> But I think it's because it failed to find a symbol (f3) for some reason. The
> symbol sort key will use the ip address only if it didn't find a symbol for the
> address. Please see sort__sym_cmp (and _sort__sym_cmp too).
>

The diff code doesn’t define event_op mmap2, so it fails to get the symbol.
You are right, it’s ip address. The meaning of symbol for diff and report should
be same.

But I still think the author intends to compare the ip address. Low granularity is
still useful for debugging the scaling issue. So we'd better keep both of them,
and give ip address sorting a proper key name.

"ip" means "IP address executed at the time of sample "
"symbol" means "name of function executed at the time of sample"

What about the attached patch?

Thanks,
Kan

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 25114c9..3063fbd 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -357,6 +357,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
static struct perf_tool tool = {
.sample = diff__process_sample_event,
.mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
@@ -743,7 +744,7 @@ static const struct option options[] = {
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
- "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+ "sort by key(s): pid, comm, dso, symbol, ip, parent, cpu, srcline, ..."
" Please refer the man page for the complete list."),
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
@@ -1164,6 +1165,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
if (setup_sorting() < 0)
usage_with_options(diff_usage, options);

+ if (sort__has_ip)
+ tool.mmap2 = NULL;
+
setup_pager();

sort__setup_elide(NULL);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9402885..a59f389 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -20,6 +20,7 @@ int have_ignore_callees = 0;
int sort__need_collapse = 0;
int sort__has_parent = 0;
int sort__has_sym = 0;
+int sort__has_ip = 0;
int sort__has_dso = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL;

@@ -272,9 +273,32 @@ static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
he->level, bf, size, width);
}

+static int64_t
+sort__sym_name_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ const char *sym_name_l, *sym_name_r;
+
+ if (!left->ms.sym || !right->ms.sym)
+ return cmp_null(right->ms.sym, left->ms.sym);
+
+ sym_name_l = left->ms.sym->name;
+ sym_name_r = right->ms.sym->name;
+
+ return strcmp(sym_name_l, sym_name_r);
+}
+
struct sort_entry sort_sym = {
.se_header = "Symbol",
.se_cmp = sort__sym_cmp,
+ .se_collapse = sort__sym_name_cmp,
+ .se_sort = sort__sym_sort,
+ .se_snprintf = hist_entry__sym_snprintf,
+ .se_width_idx = HISTC_SYMBOL,
+};
+
+struct sort_entry sort_ip = {
+ .se_header = "IP address",
+ .se_cmp = sort__sym_cmp,
.se_sort = sort__sym_sort,
.se_snprintf = hist_entry__sym_snprintf,
.se_width_idx = HISTC_SYMBOL,
@@ -1170,6 +1194,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_COMM, "comm", sort_comm),
DIM(SORT_DSO, "dso", sort_dso),
DIM(SORT_SYM, "symbol", sort_sym),
+ DIM(SORT_IP, "ip", sort_ip),
DIM(SORT_PARENT, "parent", sort_parent),
DIM(SORT_CPU, "cpu", sort_cpu),
DIM(SORT_SRCLINE, "srcline", sort_srcline),
@@ -1430,6 +1455,8 @@ int sort_dimension__add(const char *tok)
sort__has_parent = 1;
} else if (sd->entry == &sort_sym) {
sort__has_sym = 1;
+ } else if (sd->entry == &sort_ip) {
+ sort__has_ip = 1;
} else if (sd->entry == &sort_dso) {
sort__has_dso = 1;
}
@@ -1809,6 +1836,7 @@ void reset_output_field(void)
sort__need_collapse = 0;
sort__has_parent = 0;
sort__has_sym = 0;
+ sort__has_ip = 0;
sort__has_dso = 0;

field_order = NULL;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index c03e4ff..f00900a 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -34,10 +34,12 @@ extern int have_ignore_callees;
extern int sort__need_collapse;
extern int sort__has_parent;
extern int sort__has_sym;
+extern int sort__has_ip;
extern enum sort_mode sort__mode;
extern struct sort_entry sort_comm;
extern struct sort_entry sort_dso;
extern struct sort_entry sort_sym;
+extern struct sort_entry sort_ip;
extern struct sort_entry sort_parent;
extern struct sort_entry sort_dso_from;
extern struct sort_entry sort_dso_to;
@@ -161,6 +163,7 @@ enum sort_type {
SORT_COMM,
SORT_DSO,
SORT_SYM,
+ SORT_IP,
SORT_PARENT,
SORT_CPU,
SORT_SRCLINE,
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m???? ????????I?

2014-11-10 05:00:45

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH 1/1] perf tools: perf diff for different binaries

Hi Kan,

On Thu, 6 Nov 2014 14:16:23 +0000, Kan Liang wrote:
> The diff code doesn’t define event_op mmap2, so it fails to get the symbol.

Looks like a bug in perf diff code. (It's too easy to miss... :-/ )


> You are right, it’s ip address. The meaning of symbol for diff and report should
> be same.

Agreed.


>
> But I still think the author intends to compare the ip address. Low granularity is
> still useful for debugging the scaling issue. So we'd better keep both of them,
> and give ip address sorting a proper key name.
>
> "ip" means "IP address executed at the time of sample "
> "symbol" means "name of function executed at the time of sample"

I think the term 'IP address' is confusing since users might expect
network address.


>
> What about the attached patch?
>
> Thanks,
> Kan
>
> diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
> index 25114c9..3063fbd 100644
> --- a/tools/perf/builtin-diff.c
> +++ b/tools/perf/builtin-diff.c
> @@ -357,6 +357,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
> static struct perf_tool tool = {
> .sample = diff__process_sample_event,
> .mmap = perf_event__process_mmap,
> + .mmap2 = perf_event__process_mmap2,
> .comm = perf_event__process_comm,
> .exit = perf_event__process_exit,
> .fork = perf_event__process_fork,

Please send it as a separate fix.


> @@ -743,7 +744,7 @@ static const struct option options[] = {
> OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
> "only consider these symbols"),
> OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
> - "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
> + "sort by key(s): pid, comm, dso, symbol, ip, parent, cpu, srcline, ..."

With this, you also need to update the documentation.


> " Please refer the man page for the complete list."),
> OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
> "separator for columns, no spaces will be added between "
> @@ -1164,6 +1165,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
> if (setup_sorting() < 0)
> usage_with_options(diff_usage, options);
>
> + if (sort__has_ip)
> + tool.mmap2 = NULL;
> +

I don't think this is the right thing to do. And I guess you need to
identify symbols anyway.


> setup_pager();
>
> sort__setup_elide(NULL);
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 9402885..a59f389 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -20,6 +20,7 @@ int have_ignore_callees = 0;
> int sort__need_collapse = 0;
> int sort__has_parent = 0;
> int sort__has_sym = 0;
> +int sort__has_ip = 0;

Why is this needed?


> int sort__has_dso = 0;
> enum sort_mode sort__mode = SORT_MODE__NORMAL;
>
> @@ -272,9 +273,32 @@ static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
> he->level, bf, size, width);
> }
>
> +static int64_t
> +sort__sym_name_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> + const char *sym_name_l, *sym_name_r;
> +
> + if (!left->ms.sym || !right->ms.sym)
> + return cmp_null(right->ms.sym, left->ms.sym);
> +
> + sym_name_l = left->ms.sym->name;
> + sym_name_r = right->ms.sym->name;
> +
> + return strcmp(sym_name_l, sym_name_r);
> +}

Looks like doing same thing as sort__sym_sort().


> +
> struct sort_entry sort_sym = {
> .se_header = "Symbol",
> .se_cmp = sort__sym_cmp,
> + .se_collapse = sort__sym_name_cmp,

This will change the behavior of perf report somewhat.


> + .se_sort = sort__sym_sort,
> + .se_snprintf = hist_entry__sym_snprintf,
> + .se_width_idx = HISTC_SYMBOL,
> +};
> +
> +struct sort_entry sort_ip = {
> + .se_header = "IP address",
> + .se_cmp = sort__sym_cmp,

I think what you need is "symbol+offset" comparison so the .se_cmp
should compare hist_entry->ip (ie. mapped addr) instead of sym->start.

But I'm still not sure how it's meaningful since a bit of change will
result in changing offsets so that it cannot find a match.

Thanks,
Namhyung


> .se_sort = sort__sym_sort,
> .se_snprintf = hist_entry__sym_snprintf,
> .se_width_idx = HISTC_SYMBOL,
> @@ -1170,6 +1194,7 @@ static struct sort_dimension common_sort_dimensions[] = {
> DIM(SORT_COMM, "comm", sort_comm),
> DIM(SORT_DSO, "dso", sort_dso),
> DIM(SORT_SYM, "symbol", sort_sym),
> + DIM(SORT_IP, "ip", sort_ip),
> DIM(SORT_PARENT, "parent", sort_parent),
> DIM(SORT_CPU, "cpu", sort_cpu),
> DIM(SORT_SRCLINE, "srcline", sort_srcline),
> @@ -1430,6 +1455,8 @@ int sort_dimension__add(const char *tok)
> sort__has_parent = 1;
> } else if (sd->entry == &sort_sym) {
> sort__has_sym = 1;
> + } else if (sd->entry == &sort_ip) {
> + sort__has_ip = 1;
> } else if (sd->entry == &sort_dso) {
> sort__has_dso = 1;
> }
> @@ -1809,6 +1836,7 @@ void reset_output_field(void)
> sort__need_collapse = 0;
> sort__has_parent = 0;
> sort__has_sym = 0;
> + sort__has_ip = 0;
> sort__has_dso = 0;
>
> field_order = NULL;
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index c03e4ff..f00900a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -34,10 +34,12 @@ extern int have_ignore_callees;
> extern int sort__need_collapse;
> extern int sort__has_parent;
> extern int sort__has_sym;
> +extern int sort__has_ip;
> extern enum sort_mode sort__mode;
> extern struct sort_entry sort_comm;
> extern struct sort_entry sort_dso;
> extern struct sort_entry sort_sym;
> +extern struct sort_entry sort_ip;
> extern struct sort_entry sort_parent;
> extern struct sort_entry sort_dso_from;
> extern struct sort_entry sort_dso_to;
> @@ -161,6 +163,7 @@ enum sort_type {
> SORT_COMM,
> SORT_DSO,
> SORT_SYM,
> + SORT_IP,
> SORT_PARENT,
> SORT_CPU,
> SORT_SRCLINE,