2015-08-11 17:46:04

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V10 1/4] perf,tools: per-event callgraph support

From: Kan Liang <[email protected]>

This patchkit adds the ability to set callgraph mode (fp,dwarf,lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.

Here is an example.

perf record -e
'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/'
sleep 1

perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1

Signed-off-by: Kan Liang <[email protected]>
---

Changes since V9:
- move "callgraph=no" to seperate patch

tools/perf/Documentation/perf-record.txt | 3 ++
tools/perf/util/evsel.c | 62 ++++++++++++++++++++++++++++++--
tools/perf/util/evsel.h | 4 +++
tools/perf/util/parse-events.c | 12 +++++++
tools/perf/util/parse-events.h | 2 ++
tools/perf/util/parse-events.l | 2 ++
tools/perf/util/pmu.c | 4 ++-
7 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index afbe45e..7f82dec 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -53,6 +53,9 @@ OPTIONS
- 'time': Disable/enable time stamping. Acceptable values are 1 for
enabling time stamping. 0 for disabling time stamping.
The default is 1.
+ - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
+ FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode.
+ - 'stack-size': user stack size for dwarf mode
Note: If user explicitly sets options which conflict with the params,
the value set by the params will be overridden.

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 04fdddd..6647925 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -588,11 +588,36 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
}
}

-static void apply_config_terms(struct perf_evsel *evsel)
+static void
+perf_evsel__reset_callgraph(struct perf_evsel *evsel,
+ struct callchain_param *param)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ if (param->record_mode == CALLCHAIN_LBR) {
+ perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_CALL_STACK);
+ }
+ if (param->record_mode == CALLCHAIN_DWARF) {
+ perf_evsel__reset_sample_bit(evsel, REGS_USER);
+ perf_evsel__reset_sample_bit(evsel, STACK_USER);
+ }
+}
+
+static void apply_config_terms(struct perf_evsel *evsel,
+ struct record_opts *opts)
{
struct perf_evsel_config_term *term;
struct list_head *config_terms = &evsel->config_terms;
struct perf_event_attr *attr = &evsel->attr;
+ struct callchain_param param;
+ u32 dump_size = 0;
+ char *callgraph_buf = NULL;
+
+ /* callgraph default */
+ param.record_mode = callchain_param.record_mode;

list_for_each_entry(term, config_terms, list) {
switch (term->type) {
@@ -610,10 +635,43 @@ static void apply_config_terms(struct perf_evsel *evsel)
else
perf_evsel__reset_sample_bit(evsel, TIME);
break;
+ case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+ callgraph_buf = term->val.callgraph;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+ dump_size = term->val.stack_user;
+ break;
default:
break;
}
}
+
+ /* User explicitly set per-event callgraph, clear the old setting and reset. */
+ if ((callgraph_buf != NULL) || (dump_size > 0)) {
+
+ /* parse callgraph parameters */
+ if (callgraph_buf != NULL) {
+ param.enabled = true;
+ if (parse_callchain_record(callgraph_buf, &param)) {
+ pr_err("per-event callgraph setting for %s failed. "
+ "Apply callgraph global setting for it\n",
+ evsel->name);
+ return;
+ }
+ }
+ if (dump_size > 0) {
+ dump_size = round_up(dump_size, sizeof(u64));
+ param.dump_size = dump_size;
+ }
+
+ /* If global callgraph set, clear it */
+ if (callchain_param.enabled)
+ perf_evsel__reset_callgraph(evsel, &callchain_param);
+
+ /* set perf-event callgraph */
+ if (param.enabled)
+ perf_evsel__config_callgraph(evsel, opts, &param);
+ }
}

/*
@@ -812,7 +870,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
* Apply event specific term settings,
* it overloads any global configuration.
*/
- apply_config_terms(evsel);
+ apply_config_terms(evsel, opts);
}

static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index fdf2674..93ac6b1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -41,6 +41,8 @@ enum {
PERF_EVSEL__CONFIG_TERM_PERIOD,
PERF_EVSEL__CONFIG_TERM_FREQ,
PERF_EVSEL__CONFIG_TERM_TIME,
+ PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+ PERF_EVSEL__CONFIG_TERM_STACK_USER,
PERF_EVSEL__CONFIG_TERM_MAX,
};

@@ -51,6 +53,8 @@ struct perf_evsel_config_term {
u64 period;
u64 freq;
bool time;
+ char *callgraph;
+ u64 stack_user;
} val;
};

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index dbf315d..d826e6f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -614,6 +614,12 @@ do { \
return -EINVAL;
}
break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ CHECK_TYPE_VAL(NUM);
+ break;
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
@@ -668,6 +674,12 @@ do { \
case PARSE_EVENTS__TERM_TYPE_TIME:
ADD_CONFIG_TERM(TIME, time, term->val.num);
break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+ break;
default:
break;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index ce2d13a..a09b0e2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -65,6 +65,8 @@ enum {
PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
PARSE_EVENTS__TERM_TYPE_TIME,
+ PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+ PARSE_EVENTS__TERM_TYPE_STACKSIZE,
};

struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 4306f5a..936d566 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -185,6 +185,8 @@ period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index d85f11b..84cad05 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -634,7 +634,9 @@ static char *formats_error_string(struct list_head *formats)
{
struct perf_pmu_format *format;
char *err, *str;
- static const char *static_terms = "config,config1,config2,name,period,freq,branch_type,time\n";
+ static const char *static_terms = "config,config1,config2,name,"
+ "period,freq,branch_type,time,"
+ "call-graph,stack-size\n";
unsigned i = 0;

if (!asprintf(&str, "valid terms:"))
--
1.8.3.1


2015-08-11 17:46:39

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V10 2/4] perf,tools: disable per-event callgraph support

From: Kan Liang <[email protected]>

This patch introduce "call-graph=no" to disable per-event callgraph.

Here is an example.

perf record -e
'cpu/cpu-cycles,call-graph=fp/,cpu/instructions,call-graph=no/' sleep 1

perf report --stdio

# To display the perf.data header info, please use
--header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 6 of event 'cpu/cpu-cycles,call-graph=fp/'
# Event count (approx.): 774218
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................
........................................
#
61.94% 0.00% sleep [kernel.vmlinux] [k]
entry_SYSCALL_64_fastpath
|
---entry_SYSCALL_64_fastpath
|
|--97.30%-- __brk
|
--2.70%-- mmap64
_dl_check_map_versions
_dl_check_all_versions

61.94% 0.00% sleep [kernel.vmlinux] [k] perf_event_mmap
|
---perf_event_mmap
|
|--97.30%-- do_brk
| sys_brk
| entry_SYSCALL_64_fastpath
| __brk
|
--2.70%-- mmap_region
do_mmap_pgoff
vm_mmap_pgoff
sys_mmap_pgoff
sys_mmap
entry_SYSCALL_64_fastpath
mmap64
_dl_check_map_versions
_dl_check_all_versions
......

# Samples: 6 of event 'cpu/instructions,call-graph=no/'
# Event count (approx.): 359692
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................
.................................
#
89.03% 0.00% sleep [unknown] [.] 0xffff6598ffff6598

89.03% 0.00% sleep ld-2.17.so [.]
_dl_resolve_conflicts

89.03% 0.00% sleep [kernel.vmlinux] [k] page_fault

Signed-off-by: Kan Liang <[email protected]>
---

Changes since V9:
- Really disable callgraph for "callgraph=no" event

tools/perf/Documentation/perf-record.txt | 3 ++-
tools/perf/builtin-annotate.c | 2 ++
tools/perf/builtin-diff.c | 3 +++
tools/perf/tests/hists_cumulate.c | 4 ++++
tools/perf/util/evsel.c | 17 +++++++++++------
tools/perf/util/hist.c | 9 ++++++---
6 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 7f82dec..347a273 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -54,7 +54,8 @@ OPTIONS
enabling time stamping. 0 for disabling time stamping.
The default is 1.
- 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
- FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode.
+ FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
+ "no" for disable callgraph.
- 'stack-size': user stack size for dwarf mode
Note: If user explicitly sets options which conflict with the params,
the value set by the params will be overridden.
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 467a23b..a32a64e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -239,6 +239,8 @@ static int __cmd_annotate(struct perf_annotate *ann)
if (nr_samples > 0) {
total_nr_samples += nr_samples;
hists__collapse_resort(hists, NULL);
+ /* Don't sort callchain */
+ perf_evsel__reset_sample_bit(pos, CALLCHAIN);
hists__output_resort(hists, NULL);

if (symbol_conf.event_group &&
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index daaa7dc..0b180a8 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -722,6 +722,9 @@ static void data_process(void)
if (verbose || data__files_cnt > 2)
data__fprintf();

+ /* Don't sort callchain for perf diff */
+ perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
+
hists__process(hists_base);
}
}
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 7d82c8b..7ed7370 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -279,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)

symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = false;
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);

setup_sorting();
callchain_register_param(&callchain_param);
@@ -425,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)

symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = false;
+ perf_evsel__set_sample_bit(evsel, CALLCHAIN);

setup_sorting();
callchain_register_param(&callchain_param);
@@ -482,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)

symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = true;
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);

setup_sorting();
callchain_register_param(&callchain_param);
@@ -665,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)

symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = true;
+ perf_evsel__set_sample_bit(evsel, CALLCHAIN);

setup_sorting();
callchain_register_param(&callchain_param);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 6647925..b096ef7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -651,12 +651,17 @@ static void apply_config_terms(struct perf_evsel *evsel,

/* parse callgraph parameters */
if (callgraph_buf != NULL) {
- param.enabled = true;
- if (parse_callchain_record(callgraph_buf, &param)) {
- pr_err("per-event callgraph setting for %s failed. "
- "Apply callgraph global setting for it\n",
- evsel->name);
- return;
+ if (!strcmp(callgraph_buf, "no")) {
+ param.enabled = false;
+ param.record_mode = CALLCHAIN_NONE;
+ } else {
+ param.enabled = true;
+ if (parse_callchain_record(callgraph_buf, &param)) {
+ pr_err("per-event callgraph setting for %s failed. "
+ "Apply callgraph global setting for it\n",
+ evsel->name);
+ return;
+ }
}
}
if (dump_size > 0) {
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6bccfae..1cd785b 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1109,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)

static void __hists__insert_output_entry(struct rb_root *entries,
struct hist_entry *he,
- u64 min_callchain_hits)
+ u64 min_callchain_hits,
+ bool use_callchain)
{
struct rb_node **p = &entries->rb_node;
struct rb_node *parent = NULL;
struct hist_entry *iter;

- if (symbol_conf.use_callchain)
+ if (use_callchain)
callchain_param.sort(&he->sorted_chain, he->callchain,
min_callchain_hits, &callchain_param);

@@ -1139,6 +1140,8 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
struct rb_node *next;
struct hist_entry *n;
u64 min_callchain_hits;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain;

min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);

@@ -1157,7 +1160,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
n = rb_entry(next, struct hist_entry, rb_node_in);
next = rb_next(&n->rb_node_in);

- __hists__insert_output_entry(&hists->entries, n, min_callchain_hits);
+ __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
hists__inc_stats(hists, n);

if (!n->filtered)
--
1.8.3.1

2015-08-11 17:46:10

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V10 3/4] perf,tools: show call graph from reference events

From: Kan Liang <[email protected]>

Introduce --show-ref-call-graph for perf report to print reference
callgraph for no callgraph event.

Here is an example.

perf report --show-ref-call-graph --stdio

# To display the perf.data header info, please use
--header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 5 of event 'cpu/cpu-cycles,call-graph=fp/'
# Event count (approx.): 144985
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................
........................................
#
72.30% 0.00% sleep [kernel.vmlinux] [k]
entry_SYSCALL_64_fastpath
|
---entry_SYSCALL_64_fastpath
|
|--22.62%-- __GI___libc_nanosleep
--77.38%-- [...]

......

# Samples: 6 of event 'cpu/instructions,call-graph=no/', show
reference callgraph
# Event count (approx.): 172780
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................
........................................
#
73.16% 0.00% sleep [kernel.vmlinux] [k]
entry_SYSCALL_64_fastpath
|
---entry_SYSCALL_64_fastpath
|
|--31.44%-- __GI___libc_nanosleep
--68.56%-- [...]

Signed-off-by: Kan Liang <[email protected]>
---

Changes since V9:
- new patch to show reference callgraph

tools/perf/Documentation/perf-report.txt | 11 +++++++++++
tools/perf/builtin-report.c | 7 +++++++
tools/perf/ui/browsers/hists.c | 9 +++++++--
tools/perf/util/hist.c | 7 ++++++-
tools/perf/util/symbol.h | 3 ++-
5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 7b07d19..a18ba75 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -359,6 +359,17 @@ OPTIONS
--full-source-path::
Show the full path for source files for srcline output.

+--show-ref-call-graph::
+ When multiple events are sampled, it may not be needed to collect
+ callgraphs for all of them. The sample sites are usually nearby,
+ and it's enough to collect the callgraphs on a reference event.
+ So user can use "call-graph=no" event modifier to disable callgraph
+ for other events to reduce the overhead.
+ However, perf report cannot show callgraphs for the event which
+ disable the callgraph.
+ This option extends the perf report to show reference callgraphs,
+ which collected by reference event, in no callgraph event.
+
include::callchain-overhead-calculation.txt[]

SEE ALSO
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f301e86..62b285e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -316,6 +316,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
if (evname != NULL)
ret += fprintf(fp, " of event '%s'", evname);

+ if (symbol_conf.show_ref_callgraph &&
+ strstr(evname, "call-graph=no")) {
+ ret += fprintf(fp, ", show reference callgraph");
+ }
+
if (rep->mem_mode) {
ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order);
@@ -740,6 +745,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
itrace_parse_synth_opts),
OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
"Show full source file name path for source lines"),
+ OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
+ "Show callgraph from reference event"),
OPT_END()
};
struct perf_data_file file = {
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index fa67613..03f09de 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1267,6 +1267,8 @@ static int hists__browser_title(struct hists *hists,
const char *ev_name = perf_evsel__name(evsel);
char buf[512];
size_t buflen = sizeof(buf);
+ char ref[30] = " show reference callgraph, ";
+ bool enable_ref = false;

if (symbol_conf.filter_relative) {
nr_samples = hists->stats.nr_non_filtered_samples;
@@ -1292,10 +1294,13 @@ static int hists__browser_title(struct hists *hists,
}
}

+ if (symbol_conf.show_ref_callgraph &&
+ strstr(ev_name, "call-graph=no"))
+ enable_ref = true;
nr_samples = convert_unit(nr_samples, &unit);
printed = scnprintf(bf, size,
- "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64,
- nr_samples, unit, ev_name, nr_events);
+ "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64,
+ nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events);


if (hists->uid_filter_str)
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 1cd785b..08b6cd9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1141,7 +1141,12 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
struct hist_entry *n;
u64 min_callchain_hits;
struct perf_evsel *evsel = hists_to_evsel(hists);
- bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain;
+ bool use_callchain;
+
+ if (evsel && !symbol_conf.show_ref_callgraph)
+ use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+ else
+ use_callchain = symbol_conf.use_callchain;

min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);

diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index b98ce51..a4cde92 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -106,7 +106,8 @@ struct symbol_conf {
filter_relative,
show_hist_headers,
branch_callstack,
- has_filter;
+ has_filter,
+ show_ref_callgraph;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
--
1.8.3.1

2015-08-11 17:46:08

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V10 4/4] perf,tests: Add tests to callgraph and time parse

From: Kan Liang <[email protected]>

Add tests in tests/parse-events.c to check call-graph and time option

Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/tests/parse-events.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index f65bb89..9b6b2b63 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -479,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
return 0;
}

+static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ /* cpu/config=1,call-graph=fp,time,period=100000/ */
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ /* cpu/config=2,call-graph=no,time=0,period=2000/ */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ return 0;
+}
+
static int test__checkevent_pmu_events(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1555,6 +1588,11 @@ static struct evlist_test test__events_pmu[] = {
.check = test__checkevent_pmu_name,
.id = 1,
},
+ {
+ .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
+ .check = test__checkevent_pmu_partial_time_callgraph,
+ .id = 2,
+ },
};

struct terms_test {
--
1.8.3.1

Subject: [tip:perf/core] perf callchain: Per-event type selection support

Commit-ID: d457c96392bb418bd998f3ccf93e0e4c958fcd0f
Gitweb: http://git.kernel.org/tip/d457c96392bb418bd998f3ccf93e0e4c958fcd0f
Author: Kan Liang <[email protected]>
AuthorDate: Tue, 11 Aug 2015 06:30:47 -0400
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 12 Aug 2015 13:20:27 -0300

perf callchain: Per-event type selection support

This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.

Here is an example.

perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1

perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1

Signed-off-by: Kan Liang <[email protected]>
Tested-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 3 ++
tools/perf/util/evsel.c | 62 ++++++++++++++++++++++++++++++--
tools/perf/util/evsel.h | 4 +++
tools/perf/util/parse-events.c | 12 +++++++
tools/perf/util/parse-events.h | 2 ++
tools/perf/util/parse-events.l | 2 ++
tools/perf/util/pmu.c | 4 ++-
7 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index afbe45e..7f82dec 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -53,6 +53,9 @@ OPTIONS
- 'time': Disable/enable time stamping. Acceptable values are 1 for
enabling time stamping. 0 for disabling time stamping.
The default is 1.
+ - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
+ FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode.
+ - 'stack-size': user stack size for dwarf mode
Note: If user explicitly sets options which conflict with the params,
the value set by the params will be overridden.

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 04fdddd..6647925 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -588,11 +588,36 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
}
}

-static void apply_config_terms(struct perf_evsel *evsel)
+static void
+perf_evsel__reset_callgraph(struct perf_evsel *evsel,
+ struct callchain_param *param)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ if (param->record_mode == CALLCHAIN_LBR) {
+ perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_CALL_STACK);
+ }
+ if (param->record_mode == CALLCHAIN_DWARF) {
+ perf_evsel__reset_sample_bit(evsel, REGS_USER);
+ perf_evsel__reset_sample_bit(evsel, STACK_USER);
+ }
+}
+
+static void apply_config_terms(struct perf_evsel *evsel,
+ struct record_opts *opts)
{
struct perf_evsel_config_term *term;
struct list_head *config_terms = &evsel->config_terms;
struct perf_event_attr *attr = &evsel->attr;
+ struct callchain_param param;
+ u32 dump_size = 0;
+ char *callgraph_buf = NULL;
+
+ /* callgraph default */
+ param.record_mode = callchain_param.record_mode;

list_for_each_entry(term, config_terms, list) {
switch (term->type) {
@@ -610,10 +635,43 @@ static void apply_config_terms(struct perf_evsel *evsel)
else
perf_evsel__reset_sample_bit(evsel, TIME);
break;
+ case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+ callgraph_buf = term->val.callgraph;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+ dump_size = term->val.stack_user;
+ break;
default:
break;
}
}
+
+ /* User explicitly set per-event callgraph, clear the old setting and reset. */
+ if ((callgraph_buf != NULL) || (dump_size > 0)) {
+
+ /* parse callgraph parameters */
+ if (callgraph_buf != NULL) {
+ param.enabled = true;
+ if (parse_callchain_record(callgraph_buf, &param)) {
+ pr_err("per-event callgraph setting for %s failed. "
+ "Apply callgraph global setting for it\n",
+ evsel->name);
+ return;
+ }
+ }
+ if (dump_size > 0) {
+ dump_size = round_up(dump_size, sizeof(u64));
+ param.dump_size = dump_size;
+ }
+
+ /* If global callgraph set, clear it */
+ if (callchain_param.enabled)
+ perf_evsel__reset_callgraph(evsel, &callchain_param);
+
+ /* set perf-event callgraph */
+ if (param.enabled)
+ perf_evsel__config_callgraph(evsel, opts, &param);
+ }
}

/*
@@ -812,7 +870,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
* Apply event specific term settings,
* it overloads any global configuration.
*/
- apply_config_terms(evsel);
+ apply_config_terms(evsel, opts);
}

static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index fdf2674..93ac6b1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -41,6 +41,8 @@ enum {
PERF_EVSEL__CONFIG_TERM_PERIOD,
PERF_EVSEL__CONFIG_TERM_FREQ,
PERF_EVSEL__CONFIG_TERM_TIME,
+ PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+ PERF_EVSEL__CONFIG_TERM_STACK_USER,
PERF_EVSEL__CONFIG_TERM_MAX,
};

@@ -51,6 +53,8 @@ struct perf_evsel_config_term {
u64 period;
u64 freq;
bool time;
+ char *callgraph;
+ u64 stack_user;
} val;
};

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index dbf315d..d826e6f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -614,6 +614,12 @@ do { \
return -EINVAL;
}
break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ CHECK_TYPE_VAL(NUM);
+ break;
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
@@ -668,6 +674,12 @@ do { \
case PARSE_EVENTS__TERM_TYPE_TIME:
ADD_CONFIG_TERM(TIME, time, term->val.num);
break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+ break;
default:
break;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index ce2d13a..a09b0e2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -65,6 +65,8 @@ enum {
PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
PARSE_EVENTS__TERM_TYPE_TIME,
+ PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+ PARSE_EVENTS__TERM_TYPE_STACKSIZE,
};

struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 4306f5a..936d566 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -185,6 +185,8 @@ period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index d85f11b..84cad05 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -634,7 +634,9 @@ static char *formats_error_string(struct list_head *formats)
{
struct perf_pmu_format *format;
char *err, *str;
- static const char *static_terms = "config,config1,config2,name,period,freq,branch_type,time\n";
+ static const char *static_terms = "config,config1,config2,name,"
+ "period,freq,branch_type,time,"
+ "call-graph,stack-size\n";
unsigned i = 0;

if (!asprintf(&str, "valid terms:"))

Subject: [tip:perf/core] perf callchain: Allow disabling call graphs per event

Commit-ID: f9db0d0f1b2cf030083c83d3ed3a4bbae6bdc8b7
Gitweb: http://git.kernel.org/tip/f9db0d0f1b2cf030083c83d3ed3a4bbae6bdc8b7
Author: Kan Liang <[email protected]>
AuthorDate: Tue, 11 Aug 2015 06:30:48 -0400
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 12 Aug 2015 13:20:28 -0300

perf callchain: Allow disabling call graphs per event

This patch introduce "call-graph=no" to disable per-event callgraph.

Here is an example.

perf record -e 'cpu/cpu-cycles,call-graph=fp/,cpu/instructions,call-graph=no/' sleep 1

perf report --stdio

# To display the perf.data header info, please use
--header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 6 of event 'cpu/cpu-cycles,call-graph=fp/'
# Event count (approx.): 774218
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................ ........................................
#
61.94% 0.00% sleep [kernel.vmlinux] [k] entry_SYSCALL_64_fastpath
|
---entry_SYSCALL_64_fastpath
|
|--97.30%-- __brk
|
--2.70%-- mmap64
_dl_check_map_versions
_dl_check_all_versions

61.94% 0.00% sleep [kernel.vmlinux] [k] perf_event_mmap
|
---perf_event_mmap
|
|--97.30%-- do_brk
| sys_brk
| entry_SYSCALL_64_fastpath
| __brk
|
--2.70%-- mmap_region
do_mmap_pgoff
vm_mmap_pgoff
sys_mmap_pgoff
sys_mmap
entry_SYSCALL_64_fastpath
mmap64
_dl_check_map_versions
_dl_check_all_versions
......

# Samples: 6 of event 'cpu/instructions,call-graph=no/'
# Event count (approx.): 359692
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................ .................................
#
89.03% 0.00% sleep [unknown] [.] 0xffff6598ffff6598
89.03% 0.00% sleep ld-2.17.so [.] _dl_resolve_conflicts
89.03% 0.00% sleep [kernel.vmlinux] [k] page_fault

Signed-off-by: Kan Liang <[email protected]>
Tested-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 3 ++-
tools/perf/builtin-annotate.c | 2 ++
tools/perf/builtin-diff.c | 3 +++
tools/perf/tests/hists_cumulate.c | 4 ++++
tools/perf/util/evsel.c | 17 +++++++++++------
tools/perf/util/hist.c | 9 ++++++---
6 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 7f82dec..347a273 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -54,7 +54,8 @@ OPTIONS
enabling time stamping. 0 for disabling time stamping.
The default is 1.
- 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
- FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode.
+ FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
+ "no" for disable callgraph.
- 'stack-size': user stack size for dwarf mode
Note: If user explicitly sets options which conflict with the params,
the value set by the params will be overridden.
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 467a23b..a32a64e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -239,6 +239,8 @@ static int __cmd_annotate(struct perf_annotate *ann)
if (nr_samples > 0) {
total_nr_samples += nr_samples;
hists__collapse_resort(hists, NULL);
+ /* Don't sort callchain */
+ perf_evsel__reset_sample_bit(pos, CALLCHAIN);
hists__output_resort(hists, NULL);

if (symbol_conf.event_group &&
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index daaa7dc..0b180a8 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -722,6 +722,9 @@ static void data_process(void)
if (verbose || data__files_cnt > 2)
data__fprintf();

+ /* Don't sort callchain for perf diff */
+ perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
+
hists__process(hists_base);
}
}
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 7d82c8b..7ed7370 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -279,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)

symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = false;
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);

setup_sorting();
callchain_register_param(&callchain_param);
@@ -425,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)

symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = false;
+ perf_evsel__set_sample_bit(evsel, CALLCHAIN);

setup_sorting();
callchain_register_param(&callchain_param);
@@ -482,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)

symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = true;
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);

setup_sorting();
callchain_register_param(&callchain_param);
@@ -665,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)

symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = true;
+ perf_evsel__set_sample_bit(evsel, CALLCHAIN);

setup_sorting();
callchain_register_param(&callchain_param);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 6647925..b096ef7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -651,12 +651,17 @@ static void apply_config_terms(struct perf_evsel *evsel,

/* parse callgraph parameters */
if (callgraph_buf != NULL) {
- param.enabled = true;
- if (parse_callchain_record(callgraph_buf, &param)) {
- pr_err("per-event callgraph setting for %s failed. "
- "Apply callgraph global setting for it\n",
- evsel->name);
- return;
+ if (!strcmp(callgraph_buf, "no")) {
+ param.enabled = false;
+ param.record_mode = CALLCHAIN_NONE;
+ } else {
+ param.enabled = true;
+ if (parse_callchain_record(callgraph_buf, &param)) {
+ pr_err("per-event callgraph setting for %s failed. "
+ "Apply callgraph global setting for it\n",
+ evsel->name);
+ return;
+ }
}
}
if (dump_size > 0) {
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6bccfae..1cd785b 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1109,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)

static void __hists__insert_output_entry(struct rb_root *entries,
struct hist_entry *he,
- u64 min_callchain_hits)
+ u64 min_callchain_hits,
+ bool use_callchain)
{
struct rb_node **p = &entries->rb_node;
struct rb_node *parent = NULL;
struct hist_entry *iter;

- if (symbol_conf.use_callchain)
+ if (use_callchain)
callchain_param.sort(&he->sorted_chain, he->callchain,
min_callchain_hits, &callchain_param);

@@ -1139,6 +1140,8 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
struct rb_node *next;
struct hist_entry *n;
u64 min_callchain_hits;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain;

min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);

@@ -1157,7 +1160,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
n = rb_entry(next, struct hist_entry, rb_node_in);
next = rb_next(&n->rb_node_in);

- __hists__insert_output_entry(&hists->entries, n, min_callchain_hits);
+ __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
hists__inc_stats(hists, n);

if (!n->filtered)

Subject: [tip:perf/core] perf report: Show call graph from reference events

Commit-ID: 9e207ddfa20781e56465ce9a537f0a377c9d34fb
Gitweb: http://git.kernel.org/tip/9e207ddfa20781e56465ce9a537f0a377c9d34fb
Author: Kan Liang <[email protected]>
AuthorDate: Tue, 11 Aug 2015 06:30:49 -0400
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 12 Aug 2015 13:20:28 -0300

perf report: Show call graph from reference events

Introduce --show-ref-call-graph for perf report to print reference
callgraph for no callgraph event.

Here is an example.

perf report --show-ref-call-graph --stdio

# To display the perf.data header info, please use
--header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 5 of event 'cpu/cpu-cycles,call-graph=fp/'
# Event count (approx.): 144985
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................ ........................................
#
72.30% 0.00% sleep [kernel.vmlinux] [k] entry_SYSCALL_64_fastpath
|
---entry_SYSCALL_64_fastpath
|
|--22.62%-- __GI___libc_nanosleep
--77.38%-- [...]

......

# Samples: 6 of event 'cpu/instructions,call-graph=no/', show reference callgraph
# Event count (approx.): 172780
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................ ........................................
#
73.16% 0.00% sleep [kernel.vmlinux] [k] entry_SYSCALL_64_fastpath
|
---entry_SYSCALL_64_fastpath
|
|--31.44%-- __GI___libc_nanosleep
--68.56%-- [...]

Signed-off-by: Kan Liang <[email protected]>
Tested-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-report.txt | 11 +++++++++++
tools/perf/builtin-report.c | 7 +++++++
tools/perf/ui/browsers/hists.c | 9 +++++++--
tools/perf/util/hist.c | 7 ++++++-
tools/perf/util/symbol.h | 3 ++-
5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 7b07d19..a18ba75 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -359,6 +359,17 @@ OPTIONS
--full-source-path::
Show the full path for source files for srcline output.

+--show-ref-call-graph::
+ When multiple events are sampled, it may not be needed to collect
+ callgraphs for all of them. The sample sites are usually nearby,
+ and it's enough to collect the callgraphs on a reference event.
+ So user can use "call-graph=no" event modifier to disable callgraph
+ for other events to reduce the overhead.
+ However, perf report cannot show callgraphs for the event which
+ disable the callgraph.
+ This option extends the perf report to show reference callgraphs,
+ which collected by reference event, in no callgraph event.
+
include::callchain-overhead-calculation.txt[]

SEE ALSO
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f301e86..62b285e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -316,6 +316,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
if (evname != NULL)
ret += fprintf(fp, " of event '%s'", evname);

+ if (symbol_conf.show_ref_callgraph &&
+ strstr(evname, "call-graph=no")) {
+ ret += fprintf(fp, ", show reference callgraph");
+ }
+
if (rep->mem_mode) {
ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order);
@@ -740,6 +745,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
itrace_parse_synth_opts),
OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
"Show full source file name path for source lines"),
+ OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
+ "Show callgraph from reference event"),
OPT_END()
};
struct perf_data_file file = {
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f296b73..10c7ec0 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1267,6 +1267,8 @@ static int hists__browser_title(struct hists *hists,
const char *ev_name = perf_evsel__name(evsel);
char buf[512];
size_t buflen = sizeof(buf);
+ char ref[30] = " show reference callgraph, ";
+ bool enable_ref = false;

if (symbol_conf.filter_relative) {
nr_samples = hists->stats.nr_non_filtered_samples;
@@ -1292,10 +1294,13 @@ static int hists__browser_title(struct hists *hists,
}
}

+ if (symbol_conf.show_ref_callgraph &&
+ strstr(ev_name, "call-graph=no"))
+ enable_ref = true;
nr_samples = convert_unit(nr_samples, &unit);
printed = scnprintf(bf, size,
- "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64,
- nr_samples, unit, ev_name, nr_events);
+ "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64,
+ nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events);


if (hists->uid_filter_str)
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 1cd785b..08b6cd9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1141,7 +1141,12 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
struct hist_entry *n;
u64 min_callchain_hits;
struct perf_evsel *evsel = hists_to_evsel(hists);
- bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain;
+ bool use_callchain;
+
+ if (evsel && !symbol_conf.show_ref_callgraph)
+ use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+ else
+ use_callchain = symbol_conf.use_callchain;

min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);

diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index b98ce51..a4cde92 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -106,7 +106,8 @@ struct symbol_conf {
filter_relative,
show_hist_headers,
branch_callstack,
- has_filter;
+ has_filter,
+ show_ref_callgraph;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,

Subject: [tip:perf/core] perf tests: Add tests to callgraph and time parse

Commit-ID: 71ef150ee06df29c5b427307dc0bacfe06a8baea
Gitweb: http://git.kernel.org/tip/71ef150ee06df29c5b427307dc0bacfe06a8baea
Author: Kan Liang <[email protected]>
AuthorDate: Tue, 11 Aug 2015 06:30:50 -0400
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 12 Aug 2015 13:20:29 -0300

perf tests: Add tests to callgraph and time parse

Add tests in tests/parse-events.c to check call-graph and time option.

Signed-off-by: Kan Liang <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/tests/parse-events.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index f65bb89..9b6b2b63 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -479,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
return 0;
}

+static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ /* cpu/config=1,call-graph=fp,time,period=100000/ */
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ /* cpu/config=2,call-graph=no,time=0,period=2000/ */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ return 0;
+}
+
static int test__checkevent_pmu_events(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1555,6 +1588,11 @@ static struct evlist_test test__events_pmu[] = {
.check = test__checkevent_pmu_name,
.id = 1,
},
+ {
+ .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
+ .check = test__checkevent_pmu_partial_time_callgraph,
+ .id = 2,
+ },
};

struct terms_test {