This patchkit adds the ability to turn off callgraphs and time stamps
per event. This in term can reduce sampling overhead and the size of
the perf.data.
Changes since V1:
- Break up V1 patches into three patches(parse option changes,
partial time support and partial callgraph support).
- Use strings 'fp,dwarf,lbr,no' to identify callchains
- Add test case in parse-events.c
Changes since V2:
- Rebase on 60cd37eb10
Changes since V3:
- Replace OPT_CALLBACK_SET by current existing callback mechanism.
- Using perf_evsel__set_sample_bit if possible
- Change the expression "partial" to "per event"
- Using global variable to indicate if 'time' is set per event.
If 'time' is not set, enable it by default for perf record.
Changes since V4:
- Fix issue of setting callgraph_set
Changes since V5:
- per-event settings over global settings in general
- support for event post configuration structure
Changes since V6:
- Add error infor for per-event time
- rename the option to call-graph and stacl-size
- refine parse/config callchain functions,
and reuse them for per-event callgraph setting
- Modify perf test
Changes since V7:
- Rebase on 6ea3269c58
- Enclose callgraph reset codes.
Kan Liang (4):
perf,tools: per-event time support
perf,tools: refine parse/config callchain functions
perf,tools: per-event callgraph support
perf,tests: Add tests to callgraph and time parse
tools/perf/Documentation/perf-record.txt | 8 ++-
tools/perf/builtin-record.c | 2 +-
tools/perf/tests/parse-events.c | 38 ++++++++++++++
tools/perf/util/callchain.c | 14 +++---
tools/perf/util/callchain.h | 2 +-
tools/perf/util/evsel.c | 86 +++++++++++++++++++++++++++++---
tools/perf/util/evsel.h | 6 +++
tools/perf/util/parse-events.c | 24 +++++++++
tools/perf/util/parse-events.h | 3 ++
tools/perf/util/parse-events.l | 3 ++
tools/perf/util/pmu.c | 3 +-
11 files changed, 170 insertions(+), 19 deletions(-)
--
1.8.3.1
From: Kan Liang <[email protected]>
This patchkit adds the ability to turn off time stamps per event.
One usable case of partial time is to work with per-event callgraph to
enable "PEBS threshold > 1" (https://lkml.org/lkml/2015/5/10/196), which
can significantly reduce the sampling overhead.
The event samples with time stamps off will not be ordered.
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 4 +++-
tools/perf/util/evsel.c | 14 +++++++++++---
tools/perf/util/evsel.h | 2 ++
tools/perf/util/parse-events.c | 12 ++++++++++++
tools/perf/util/parse-events.h | 1 +
tools/perf/util/parse-events.l | 1 +
tools/perf/util/pmu.c | 2 +-
7 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index ac41350..0d852d1 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -49,7 +49,9 @@ OPTIONS
These params can be used to overload default config values per event.
Here is a list of the params.
- 'period': Set event sampling period
-
+ - 'time': Disable/enable time stamping. Acceptable values are 1 for
+ enabling time stamping. 0 for disabling time stamping.
+ The default is 1.
Note: If user explicitly sets options which conflict with the params,
the value set by the params will be overridden.
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7d3acba..7febfe2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -587,15 +587,23 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
}
}
-static void apply_config_terms(struct perf_event_attr *attr __maybe_unused,
- struct list_head *config_terms)
+static void apply_config_terms(struct perf_evsel *evsel)
{
struct perf_evsel_config_term *term;
+ struct list_head *config_terms = &evsel->config_terms;
+ struct perf_event_attr *attr = &evsel->attr;
list_for_each_entry(term, config_terms, list) {
switch (term->type) {
case PERF_EVSEL__CONFIG_TERM_PERIOD:
attr->sample_period = term->val.period;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_TIME:
+ if (term->val.time)
+ perf_evsel__set_sample_bit(evsel, TIME);
+ else
+ perf_evsel__reset_sample_bit(evsel, TIME);
+ break;
default:
break;
}
@@ -798,7 +806,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
* Apply event specific term settings,
* it overloads any global configuration.
*/
- apply_config_terms(attr, &evsel->config_terms);
+ apply_config_terms(evsel);
}
static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a7d2175..6a12908 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -39,6 +39,7 @@ struct cgroup_sel;
*/
enum {
PERF_EVSEL__CONFIG_TERM_PERIOD,
+ PERF_EVSEL__CONFIG_TERM_TIME,
PERF_EVSEL__CONFIG_TERM_MAX,
};
@@ -47,6 +48,7 @@ struct perf_evsel_config_term {
int type;
union {
u64 period;
+ bool time;
} val;
};
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 09bee93..a6cb9af 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -603,6 +603,14 @@ do { \
* attr->branch_sample_type = term->val.num;
*/
break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ CHECK_TYPE_VAL(NUM);
+ if (term->val.num > 1) {
+ err->str = strdup("expected 0 or 1");
+ err->idx = term->err_val;
+ return -EINVAL;
+ }
+ break;
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
@@ -650,6 +658,10 @@ do { \
switch (term->type_term) {
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
ADD_CONFIG_TERM(PERIOD, period, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ ADD_CONFIG_TERM(TIME, time, term->val.num);
+ break;
default:
break;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 2063048..e6f9aacc 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -63,6 +63,7 @@ enum {
PARSE_EVENTS__TERM_TYPE_NAME,
PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
+ PARSE_EVENTS__TERM_TYPE_TIME,
};
struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 13cef3c..f542750 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -183,6 +183,7 @@ config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7bcb8c3..b615cdf 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -607,7 +607,7 @@ static char *formats_error_string(struct list_head *formats)
{
struct perf_pmu_format *format;
char *err, *str;
- static const char *static_terms = "config,config1,config2,name,period,branch_type\n";
+ static const char *static_terms = "config,config1,config2,name,period,branch_type,time\n";
unsigned i = 0;
if (!asprintf(&str, "valid terms:"))
--
1.8.3.1
From: Kan Liang <[email protected]>
Pass global callchain_param into parse_callchain_record_opt and
perf_evsel__config_callgraph as parameter. So we can reuse these
functions to parse/config local param for callchain.
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/builtin-record.c | 2 +-
tools/perf/util/callchain.c | 14 +++++++-------
tools/perf/util/callchain.h | 2 +-
tools/perf/util/evsel.c | 11 ++++++-----
4 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f51131b..25cf6b4 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -779,7 +779,7 @@ int record_parse_callchain_opt(const struct option *opt,
return 0;
}
- ret = parse_callchain_record_opt(arg);
+ ret = parse_callchain_record_opt(arg, &callchain_param);
if (!ret)
callchain_debug();
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9f643ee..931cca8 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -53,7 +53,7 @@ static int get_stack_size(const char *str, unsigned long *_size)
}
#endif /* HAVE_DWARF_UNWIND_SUPPORT */
-int parse_callchain_record_opt(const char *arg)
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
{
char *tok, *name, *saveptr = NULL;
char *buf;
@@ -73,7 +73,7 @@ int parse_callchain_record_opt(const char *arg)
/* Framepointer style */
if (!strncmp(name, "fp", sizeof("fp"))) {
if (!strtok_r(NULL, ",", &saveptr)) {
- callchain_param.record_mode = CALLCHAIN_FP;
+ param->record_mode = CALLCHAIN_FP;
ret = 0;
} else
pr_err("callchain: No more arguments "
@@ -86,20 +86,20 @@ int parse_callchain_record_opt(const char *arg)
const unsigned long default_stack_dump_size = 8192;
ret = 0;
- callchain_param.record_mode = CALLCHAIN_DWARF;
- callchain_param.dump_size = default_stack_dump_size;
+ param->record_mode = CALLCHAIN_DWARF;
+ param->dump_size = default_stack_dump_size;
tok = strtok_r(NULL, ",", &saveptr);
if (tok) {
unsigned long size = 0;
ret = get_stack_size(tok, &size);
- callchain_param.dump_size = size;
+ param->dump_size = size;
}
#endif /* HAVE_DWARF_UNWIND_SUPPORT */
} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
if (!strtok_r(NULL, ",", &saveptr)) {
- callchain_param.record_mode = CALLCHAIN_LBR;
+ param->record_mode = CALLCHAIN_LBR;
ret = 0;
} else
pr_err("callchain: No more arguments "
@@ -219,7 +219,7 @@ int perf_callchain_config(const char *var, const char *value)
var += sizeof("call-graph.") - 1;
if (!strcmp(var, "record-mode"))
- return parse_callchain_record_opt(value);
+ return parse_callchain_record_opt(value, &callchain_param);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
if (!strcmp(var, "dump-size")) {
unsigned long size = 0;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 679c2c6..68a32c2 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -177,7 +177,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
bool hide_unresolved);
extern const char record_callchain_help[];
-int parse_callchain_record_opt(const char *arg);
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
int parse_callchain_report_opt(const char *arg);
int perf_callchain_config(const char *var, const char *value);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7febfe2..f572f46 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -545,14 +545,15 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
static void
perf_evsel__config_callgraph(struct perf_evsel *evsel,
- struct record_opts *opts)
+ struct record_opts *opts,
+ struct callchain_param *param)
{
bool function = perf_evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->attr;
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
- if (callchain_param.record_mode == CALLCHAIN_LBR) {
+ if (param->record_mode == CALLCHAIN_LBR) {
if (!opts->branch_stack) {
if (attr->exclude_user) {
pr_warning("LBR callstack option is only available "
@@ -568,12 +569,12 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
"Falling back to framepointers.\n");
}
- if (callchain_param.record_mode == CALLCHAIN_DWARF) {
+ if (param->record_mode == CALLCHAIN_DWARF) {
if (!function) {
perf_evsel__set_sample_bit(evsel, REGS_USER);
perf_evsel__set_sample_bit(evsel, STACK_USER);
attr->sample_regs_user = PERF_REGS_MASK;
- attr->sample_stack_user = callchain_param.dump_size;
+ attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
} else {
pr_info("Cannot use DWARF unwind for function trace event,"
@@ -714,7 +715,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
evsel->attr.exclude_callchain_user = 1;
if (callchain_param.enabled && !evsel->no_aux_samples)
- perf_evsel__config_callgraph(evsel, opts);
+ perf_evsel__config_callgraph(evsel, opts, &callchain_param);
if (opts->sample_intr_regs) {
attr->sample_regs_intr = PERF_REGS_MASK;
--
1.8.3.1
From: Kan Liang <[email protected]>
When multiple events are sampled it may not be needed to collect
callgraphs for all of them. The sample sites are usually nearby, and
it's enough to collect the callgraphs on a reference event (such as
precise cycles or precise instructions).
This patchkit adds the ability to turn off callgraphs and time stamp
per event. This in term can reduce sampling overhead and the size of the
perf.data. Furthermore, it makes collecting back traces and timestamps
possible when PEBS threshold > 1, which significantly reducing the
sampling overhead especially for frequently occurring events
(https://lkml.org/lkml/2015/5/10/196). For example, A slower event with
a larger period collects back traces/timestamps. Other more events run
fast with multi-pebs. The time stamps from the slower events can be used
to order the faster events. Their backtraces can give the user enough
hint to find the right spot.
Here are some examples and test results.
1. Comparing the elapsed time and perf.data size from "kernbench -M -H".
The test command for FULL callgraph and time support.
"perf record -e
'{cpu/cpu-cycles,period=100000/,cpu/instructions,period=20000/p}'
--call-graph fp --time"
The test command for PARTIAL callgraph and time support.
"perf record -e
'{cpu/cpu-cycles,call-graph=fp,time,period=100000/,
cpu/instructions,call-graph=no,time=0,period=20000/p}'"
The elapsed time for FULL is 24.3 Sec, while for PARTIAL is 16.9 Sec.
The perf.data size for FULL is 22.1 Gb, while for PARTIAL is 12.4 Gb.
2. Comparing the perf.data size and callgraph results.
The test command for FULL callgraph and time support.
"perf record -e
'{cpu/cpu-cycles,period=100000/pp,cpu/instructions,period=20000/p}'
--call-graph fp -- ./tchain_edit"
The test command for PARTIAL callgraph and time support.
"perf record -e
'{cpu/cpu-cycles,call-graph=fp,time,period=100000/pp,
cpu/instructions,call-graph=no,time=0,period=20000/p}'
-- ./tchain_edit"
The perf.data size for FULL is 43.2 MB, while for PARTIAL is 21.1 MB.
The callgraph is roughly the same.
The callgraph from FULL
# Samples: 87K of event
'cpu/cpu-cycles,call-graph=fp,time,period=100000/pp'
# Event count (approx.): 8760000000
#
# Children Self Command Shared Object Symbol
# ........ ........ ........... ..................
..........................................
#
99.98% 0.00% tchain_edit libc-2.15.so [.]
__libc_start_main
|
---__libc_start_main
99.97% 0.00% tchain_edit tchain_edit [.] main
|
---main
__libc_start_main
99.97% 0.00% tchain_edit tchain_edit [.] f1
|
---f1
main
__libc_start_main
99.85% 87.01% tchain_edit tchain_edit [.] f3
|
---f3
|
|--99.74%-- f2
| f1
| main
| __libc_start_main
--0.26%-- [...]
99.71% 0.12% tchain_edit tchain_edit [.] f2
|
---f2
f1
main
__libc_start_main
The callgraph from PARTIAL
# Samples: 417K of event
'cpu/instructions,call-graph=no,time=0,period=20000/p'
# Event count (approx.): 8346980000
#
# Children Self Command Shared Object Symbol
# ........ ........ ........... ................
..........................................
#
98.82% 0.00% tchain_edit libc-2.15.so [.]
__libc_start_main
|
---__libc_start_main
98.82% 0.00% tchain_edit tchain_edit [.] main
|
---main
__libc_start_main
98.82% 0.00% tchain_edit tchain_edit [.] f1
|
---f1
main
__libc_start_main
98.82% 98.28% tchain_edit tchain_edit [.] f3
|
---f3
|
|--0.53%-- f2
| f1
| main
| __libc_start_main
|
|--0.01%-- f1
| main
| __libc_start_main
--99.46%-- [...]
97.63% 0.03% tchain_edit tchain_edit [.] f2
|
---f2
f1
main
__libc_start_main
7.13% 0.03% tchain_edit [kernel.vmlinux] [k] do_nmi
|
---do_nmi
end_repeat_nmi
f3
f2
f1
main
__libc_start_main
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 4 ++
tools/perf/util/evsel.c | 65 +++++++++++++++++++++++++++++++-
tools/perf/util/evsel.h | 4 ++
tools/perf/util/parse-events.c | 12 ++++++
tools/perf/util/parse-events.h | 2 +
tools/perf/util/parse-events.l | 2 +
tools/perf/util/pmu.c | 3 +-
7 files changed, 89 insertions(+), 3 deletions(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 0d852d1..e633711 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -52,6 +52,10 @@ OPTIONS
- 'time': Disable/enable time stamping. Acceptable values are 1 for
enabling time stamping. 0 for disabling time stamping.
The default is 1.
+ - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
+ FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
+ "no" for disable callgraph.
+ - 'stack-size': user stack size for dwarf mode
Note: If user explicitly sets options which conflict with the params,
the value set by the params will be overridden.
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f572f46..106cd20 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -588,11 +588,36 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
}
}
-static void apply_config_terms(struct perf_evsel *evsel)
+static void
+perf_evsel__reset_callgraph(struct perf_evsel *evsel,
+ struct callchain_param *param)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ if (param->record_mode == CALLCHAIN_LBR) {
+ perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_CALL_STACK);
+ }
+ if (param->record_mode == CALLCHAIN_DWARF) {
+ perf_evsel__reset_sample_bit(evsel, REGS_USER);
+ perf_evsel__reset_sample_bit(evsel, STACK_USER);
+ }
+}
+
+static void apply_config_terms(struct perf_evsel *evsel,
+ struct record_opts *opts)
{
struct perf_evsel_config_term *term;
struct list_head *config_terms = &evsel->config_terms;
struct perf_event_attr *attr = &evsel->attr;
+ struct callchain_param param;
+ u32 dump_size = 0;
+ char *callgraph_buf = NULL;
+
+ /* callgraph default */
+ param.record_mode = callchain_param.record_mode;
list_for_each_entry(term, config_terms, list) {
switch (term->type) {
@@ -605,10 +630,46 @@ static void apply_config_terms(struct perf_evsel *evsel)
else
perf_evsel__reset_sample_bit(evsel, TIME);
break;
+ case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+ callgraph_buf = term->val.callgraph;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+ dump_size = term->val.stack_user;
+ break;
default:
break;
}
}
+
+ /* User explicitly set per-event callgraph, clear the old setting and reset. */
+ if ((callgraph_buf != NULL) || (dump_size > 0)) {
+
+ /* parse callgraph parameters */
+ if (callgraph_buf != NULL) {
+ if (!strcmp(callgraph_buf, "no")) {
+ param.enabled = false;
+ param.record_mode = CALLCHAIN_NONE;
+ } else {
+ param.enabled = true;
+ if (parse_callchain_record_opt(callgraph_buf, ¶m)) {
+ pr_err("per-event callgraph setting for %s failed. "
+ "Apply callgraph global setting for it\n",
+ evsel->name);
+ return;
+ }
+ }
+ }
+ if (dump_size > 0)
+ param.dump_size = dump_size;
+
+ /* If global callgraph set, clear it */
+ if (callchain_param.enabled)
+ perf_evsel__reset_callgraph(evsel, &callchain_param);
+
+ /* set perf-event callgraph */
+ if (param.enabled)
+ perf_evsel__config_callgraph(evsel, opts, ¶m);
+ }
}
/*
@@ -807,7 +868,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
* Apply event specific term settings,
* it overloads any global configuration.
*/
- apply_config_terms(evsel);
+ apply_config_terms(evsel, opts);
}
static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 6a12908..09a3022 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -40,6 +40,8 @@ struct cgroup_sel;
enum {
PERF_EVSEL__CONFIG_TERM_PERIOD,
PERF_EVSEL__CONFIG_TERM_TIME,
+ PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+ PERF_EVSEL__CONFIG_TERM_STACK_USER,
PERF_EVSEL__CONFIG_TERM_MAX,
};
@@ -49,6 +51,8 @@ struct perf_evsel_config_term {
union {
u64 period;
bool time;
+ char *callgraph;
+ u64 stack_user;
} val;
};
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index a6cb9af..716f1fc 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -611,6 +611,12 @@ do { \
return -EINVAL;
}
break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ CHECK_TYPE_VAL(NUM);
+ break;
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
@@ -662,6 +668,12 @@ do { \
case PARSE_EVENTS__TERM_TYPE_TIME:
ADD_CONFIG_TERM(TIME, time, term->val.num);
break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+ break;
default:
break;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index e6f9aacc..87dc9f6 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -64,6 +64,8 @@ enum {
PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
PARSE_EVENTS__TERM_TYPE_TIME,
+ PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+ PARSE_EVENTS__TERM_TYPE_STACKSIZE,
};
struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index f542750..1665497 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -184,6 +184,8 @@ name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index b615cdf..948f896 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -607,7 +607,8 @@ static char *formats_error_string(struct list_head *formats)
{
struct perf_pmu_format *format;
char *err, *str;
- static const char *static_terms = "config,config1,config2,name,period,branch_type,time\n";
+ static const char *static_terms = "config,config1,config2,name,period,"
+ "branch_type,time,call-graph,stack-size\n";
unsigned i = 0;
if (!asprintf(&str, "valid terms:"))
--
1.8.3.1
From: Kan Liang <[email protected]>
Add tests in tests/parse-events.c to check call-graph and time option
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/tests/parse-events.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index f65bb89..9b6b2b63 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -479,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
return 0;
}
+static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ /* cpu/config=1,call-graph=fp,time,period=100000/ */
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ /* cpu/config=2,call-graph=no,time=0,period=2000/ */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ return 0;
+}
+
static int test__checkevent_pmu_events(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1555,6 +1588,11 @@ static struct evlist_test test__events_pmu[] = {
.check = test__checkevent_pmu_name,
.id = 1,
},
+ {
+ .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
+ .check = test__checkevent_pmu_partial_time_callgraph,
+ .id = 2,
+ },
};
struct terms_test {
--
1.8.3.1
On Tue, Aug 04, 2015 at 04:30:19AM -0400, Kan Liang wrote:
> From: Kan Liang <[email protected]>
>
> This patchkit adds the ability to turn off time stamps per event.
> One usable case of partial time is to work with per-event callgraph to
> enable "PEBS threshold > 1" (https://lkml.org/lkml/2015/5/10/196), which
> can significantly reduce the sampling overhead.
> The event samples with time stamps off will not be ordered.
>
> Signed-off-by: Kan Liang <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
thanks,
jirka
> ---
> tools/perf/Documentation/perf-record.txt | 4 +++-
> tools/perf/util/evsel.c | 14 +++++++++++---
> tools/perf/util/evsel.h | 2 ++
> tools/perf/util/parse-events.c | 12 ++++++++++++
> tools/perf/util/parse-events.h | 1 +
> tools/perf/util/parse-events.l | 1 +
> tools/perf/util/pmu.c | 2 +-
> 7 files changed, 31 insertions(+), 5 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index ac41350..0d852d1 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -49,7 +49,9 @@ OPTIONS
> These params can be used to overload default config values per event.
> Here is a list of the params.
> - 'period': Set event sampling period
> -
> + - 'time': Disable/enable time stamping. Acceptable values are 1 for
> + enabling time stamping. 0 for disabling time stamping.
> + The default is 1.
> Note: If user explicitly sets options which conflict with the params,
> the value set by the params will be overridden.
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 7d3acba..7febfe2 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -587,15 +587,23 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
> }
> }
>
> -static void apply_config_terms(struct perf_event_attr *attr __maybe_unused,
> - struct list_head *config_terms)
> +static void apply_config_terms(struct perf_evsel *evsel)
> {
> struct perf_evsel_config_term *term;
> + struct list_head *config_terms = &evsel->config_terms;
> + struct perf_event_attr *attr = &evsel->attr;
>
> list_for_each_entry(term, config_terms, list) {
> switch (term->type) {
> case PERF_EVSEL__CONFIG_TERM_PERIOD:
> attr->sample_period = term->val.period;
> + break;
> + case PERF_EVSEL__CONFIG_TERM_TIME:
> + if (term->val.time)
> + perf_evsel__set_sample_bit(evsel, TIME);
> + else
> + perf_evsel__reset_sample_bit(evsel, TIME);
> + break;
> default:
> break;
> }
> @@ -798,7 +806,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
> * Apply event specific term settings,
> * it overloads any global configuration.
> */
> - apply_config_terms(attr, &evsel->config_terms);
> + apply_config_terms(evsel);
> }
>
> static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index a7d2175..6a12908 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -39,6 +39,7 @@ struct cgroup_sel;
> */
> enum {
> PERF_EVSEL__CONFIG_TERM_PERIOD,
> + PERF_EVSEL__CONFIG_TERM_TIME,
> PERF_EVSEL__CONFIG_TERM_MAX,
> };
>
> @@ -47,6 +48,7 @@ struct perf_evsel_config_term {
> int type;
> union {
> u64 period;
> + bool time;
> } val;
> };
>
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index 09bee93..a6cb9af 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -603,6 +603,14 @@ do { \
> * attr->branch_sample_type = term->val.num;
> */
> break;
> + case PARSE_EVENTS__TERM_TYPE_TIME:
> + CHECK_TYPE_VAL(NUM);
> + if (term->val.num > 1) {
> + err->str = strdup("expected 0 or 1");
> + err->idx = term->err_val;
> + return -EINVAL;
> + }
> + break;
> case PARSE_EVENTS__TERM_TYPE_NAME:
> CHECK_TYPE_VAL(STR);
> break;
> @@ -650,6 +658,10 @@ do { \
> switch (term->type_term) {
> case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
> ADD_CONFIG_TERM(PERIOD, period, term->val.num);
> + break;
> + case PARSE_EVENTS__TERM_TYPE_TIME:
> + ADD_CONFIG_TERM(TIME, time, term->val.num);
> + break;
> default:
> break;
> }
> diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
> index 2063048..e6f9aacc 100644
> --- a/tools/perf/util/parse-events.h
> +++ b/tools/perf/util/parse-events.h
> @@ -63,6 +63,7 @@ enum {
> PARSE_EVENTS__TERM_TYPE_NAME,
> PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
> PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
> + PARSE_EVENTS__TERM_TYPE_TIME,
> };
>
> struct parse_events_term {
> diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
> index 13cef3c..f542750 100644
> --- a/tools/perf/util/parse-events.l
> +++ b/tools/perf/util/parse-events.l
> @@ -183,6 +183,7 @@ config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
> name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
> period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
> branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
> +time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
> , { return ','; }
> "/" { BEGIN(INITIAL); return '/'; }
> {name_minus} { return str(yyscanner, PE_NAME); }
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index 7bcb8c3..b615cdf 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -607,7 +607,7 @@ static char *formats_error_string(struct list_head *formats)
> {
> struct perf_pmu_format *format;
> char *err, *str;
> - static const char *static_terms = "config,config1,config2,name,period,branch_type\n";
> + static const char *static_terms = "config,config1,config2,name,period,branch_type,time\n";
> unsigned i = 0;
>
> if (!asprintf(&str, "valid terms:"))
> --
> 1.8.3.1
>
On Tue, Aug 04, 2015 at 04:30:20AM -0400, Kan Liang wrote:
> From: Kan Liang <[email protected]>
>
> Pass global callchain_param into parse_callchain_record_opt and
> perf_evsel__config_callgraph as parameter. So we can reuse these
> functions to parse/config local param for callchain.
Acked-by: Jiri Olsa <[email protected]>
thanks,
jirka
>
> Signed-off-by: Kan Liang <[email protected]>
> ---
> tools/perf/builtin-record.c | 2 +-
> tools/perf/util/callchain.c | 14 +++++++-------
> tools/perf/util/callchain.h | 2 +-
> tools/perf/util/evsel.c | 11 ++++++-----
> 4 files changed, 15 insertions(+), 14 deletions(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index f51131b..25cf6b4 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -779,7 +779,7 @@ int record_parse_callchain_opt(const struct option *opt,
> return 0;
> }
>
> - ret = parse_callchain_record_opt(arg);
> + ret = parse_callchain_record_opt(arg, &callchain_param);
> if (!ret)
> callchain_debug();
>
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 9f643ee..931cca8 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -53,7 +53,7 @@ static int get_stack_size(const char *str, unsigned long *_size)
> }
> #endif /* HAVE_DWARF_UNWIND_SUPPORT */
>
> -int parse_callchain_record_opt(const char *arg)
> +int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
> {
> char *tok, *name, *saveptr = NULL;
> char *buf;
> @@ -73,7 +73,7 @@ int parse_callchain_record_opt(const char *arg)
> /* Framepointer style */
> if (!strncmp(name, "fp", sizeof("fp"))) {
> if (!strtok_r(NULL, ",", &saveptr)) {
> - callchain_param.record_mode = CALLCHAIN_FP;
> + param->record_mode = CALLCHAIN_FP;
> ret = 0;
> } else
> pr_err("callchain: No more arguments "
> @@ -86,20 +86,20 @@ int parse_callchain_record_opt(const char *arg)
> const unsigned long default_stack_dump_size = 8192;
>
> ret = 0;
> - callchain_param.record_mode = CALLCHAIN_DWARF;
> - callchain_param.dump_size = default_stack_dump_size;
> + param->record_mode = CALLCHAIN_DWARF;
> + param->dump_size = default_stack_dump_size;
>
> tok = strtok_r(NULL, ",", &saveptr);
> if (tok) {
> unsigned long size = 0;
>
> ret = get_stack_size(tok, &size);
> - callchain_param.dump_size = size;
> + param->dump_size = size;
> }
> #endif /* HAVE_DWARF_UNWIND_SUPPORT */
> } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
> if (!strtok_r(NULL, ",", &saveptr)) {
> - callchain_param.record_mode = CALLCHAIN_LBR;
> + param->record_mode = CALLCHAIN_LBR;
> ret = 0;
> } else
> pr_err("callchain: No more arguments "
> @@ -219,7 +219,7 @@ int perf_callchain_config(const char *var, const char *value)
> var += sizeof("call-graph.") - 1;
>
> if (!strcmp(var, "record-mode"))
> - return parse_callchain_record_opt(value);
> + return parse_callchain_record_opt(value, &callchain_param);
> #ifdef HAVE_DWARF_UNWIND_SUPPORT
> if (!strcmp(var, "dump-size")) {
> unsigned long size = 0;
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 679c2c6..68a32c2 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -177,7 +177,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
> bool hide_unresolved);
>
> extern const char record_callchain_help[];
> -int parse_callchain_record_opt(const char *arg);
> +int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
> int parse_callchain_report_opt(const char *arg);
> int perf_callchain_config(const char *var, const char *value);
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 7febfe2..f572f46 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -545,14 +545,15 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
>
> static void
> perf_evsel__config_callgraph(struct perf_evsel *evsel,
> - struct record_opts *opts)
> + struct record_opts *opts,
> + struct callchain_param *param)
> {
> bool function = perf_evsel__is_function_event(evsel);
> struct perf_event_attr *attr = &evsel->attr;
>
> perf_evsel__set_sample_bit(evsel, CALLCHAIN);
>
> - if (callchain_param.record_mode == CALLCHAIN_LBR) {
> + if (param->record_mode == CALLCHAIN_LBR) {
> if (!opts->branch_stack) {
> if (attr->exclude_user) {
> pr_warning("LBR callstack option is only available "
> @@ -568,12 +569,12 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
> "Falling back to framepointers.\n");
> }
>
> - if (callchain_param.record_mode == CALLCHAIN_DWARF) {
> + if (param->record_mode == CALLCHAIN_DWARF) {
> if (!function) {
> perf_evsel__set_sample_bit(evsel, REGS_USER);
> perf_evsel__set_sample_bit(evsel, STACK_USER);
> attr->sample_regs_user = PERF_REGS_MASK;
> - attr->sample_stack_user = callchain_param.dump_size;
> + attr->sample_stack_user = param->dump_size;
> attr->exclude_callchain_user = 1;
> } else {
> pr_info("Cannot use DWARF unwind for function trace event,"
> @@ -714,7 +715,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
> evsel->attr.exclude_callchain_user = 1;
>
> if (callchain_param.enabled && !evsel->no_aux_samples)
> - perf_evsel__config_callgraph(evsel, opts);
> + perf_evsel__config_callgraph(evsel, opts, &callchain_param);
>
> if (opts->sample_intr_regs) {
> attr->sample_regs_intr = PERF_REGS_MASK;
> --
> 1.8.3.1
>
On Tue, Aug 04, 2015 at 04:30:21AM -0400, Kan Liang wrote:
SNIP
> +
> + /* User explicitly set per-event callgraph, clear the old setting and reset. */
> + if ((callgraph_buf != NULL) || (dump_size > 0)) {
> +
> + /* parse callgraph parameters */
> + if (callgraph_buf != NULL) {
> + if (!strcmp(callgraph_buf, "no")) {
> + param.enabled = false;
> + param.record_mode = CALLCHAIN_NONE;
> + } else {
> + param.enabled = true;
> + if (parse_callchain_record_opt(callgraph_buf, ¶m)) {
> + pr_err("per-event callgraph setting for %s failed. "
> + "Apply callgraph global setting for it\n",
> + evsel->name);
> + return;
> + }
> + }
> + }
> + if (dump_size > 0)
> + param.dump_size = dump_size;
the parse_callchain_record_opt does some rounding on the stack size value
via the get_stack_size function, so following command like does not fail:
$ perf record --call-graph='dwarf,819' ls
while the term syntax fails:
$ perf record -e 'cpu/cpu-cycles,call-graph=dwarf,stack-size=819/
Error:
The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cpu/cpu-cycles,time=0,call-graph=dwarf,stack-size=819/).
/bin/dmesg may provide additional information.
No CONFIG_PERF_EVENTS=y kernel support configured?
jirka
On Tue, Aug 04, 2015 at 04:30:21AM -0400, Kan Liang wrote:
> From: Kan Liang <[email protected]>
>
> When multiple events are sampled it may not be needed to collect
> callgraphs for all of them. The sample sites are usually nearby, and
> it's enough to collect the callgraphs on a reference event (such as
> precise cycles or precise instructions).
> This patchkit adds the ability to turn off callgraphs and time stamp
> per event. This in term can reduce sampling overhead and the size of the
> perf.data. Furthermore, it makes collecting back traces and timestamps
> possible when PEBS threshold > 1, which significantly reducing the
> sampling overhead especially for frequently occurring events
> (https://lkml.org/lkml/2015/5/10/196). For example, A slower event with
> a larger period collects back traces/timestamps. Other more events run
> fast with multi-pebs. The time stamps from the slower events can be used
> to order the faster events. Their backtraces can give the user enough
> hint to find the right spot.
>
> Here are some examples and test results.
>
> 1. Comparing the elapsed time and perf.data size from "kernbench -M -H".
>
> The test command for FULL callgraph and time support.
> "perf record -e
> '{cpu/cpu-cycles,period=100000/,cpu/instructions,period=20000/p}'
> --call-graph fp --time"
>
> The test command for PARTIAL callgraph and time support.
> "perf record -e
> '{cpu/cpu-cycles,call-graph=fp,time,period=100000/,
> cpu/instructions,call-graph=no,time=0,period=20000/p}'"
>
> The elapsed time for FULL is 24.3 Sec, while for PARTIAL is 16.9 Sec.
> The perf.data size for FULL is 22.1 Gb, while for PARTIAL is 12.4 Gb.
>
> 2. Comparing the perf.data size and callgraph results.
>
> The test command for FULL callgraph and time support.
> "perf record -e
> '{cpu/cpu-cycles,period=100000/pp,cpu/instructions,period=20000/p}'
> --call-graph fp -- ./tchain_edit"
>
> The test command for PARTIAL callgraph and time support.
> "perf record -e
> '{cpu/cpu-cycles,call-graph=fp,time,period=100000/pp,
> cpu/instructions,call-graph=no,time=0,period=20000/p}'
> -- ./tchain_edit"
>
> The perf.data size for FULL is 43.2 MB, while for PARTIAL is 21.1 MB.
> The callgraph is roughly the same.
>
> The callgraph from FULL
> # Samples: 87K of event
> 'cpu/cpu-cycles,call-graph=fp,time,period=100000/pp'
> # Event count (approx.): 8760000000
> #
> # Children Self Command Shared Object Symbol
> # ........ ........ ........... ..................
> ..........................................
> #
> 99.98% 0.00% tchain_edit libc-2.15.so [.]
> __libc_start_main
> |
> ---__libc_start_main
>
> 99.97% 0.00% tchain_edit tchain_edit [.] main
> |
> ---main
> __libc_start_main
>
> 99.97% 0.00% tchain_edit tchain_edit [.] f1
> |
> ---f1
> main
> __libc_start_main
>
> 99.85% 87.01% tchain_edit tchain_edit [.] f3
> |
> ---f3
> |
> |--99.74%-- f2
> | f1
> | main
> | __libc_start_main
> --0.26%-- [...]
> 99.71% 0.12% tchain_edit tchain_edit [.] f2
> |
> ---f2
> f1
> main
> __libc_start_main
>
> The callgraph from PARTIAL
> # Samples: 417K of event
> 'cpu/instructions,call-graph=no,time=0,period=20000/p'
> # Event count (approx.): 8346980000
> #
> # Children Self Command Shared Object Symbol
> # ........ ........ ........... ................
> ..........................................
> #
> 98.82% 0.00% tchain_edit libc-2.15.so [.]
> __libc_start_main
> |
> ---__libc_start_main
>
> 98.82% 0.00% tchain_edit tchain_edit [.] main
> |
> ---main
> __libc_start_main
>
> 98.82% 0.00% tchain_edit tchain_edit [.] f1
> |
> ---f1
> main
> __libc_start_main
>
> 98.82% 98.28% tchain_edit tchain_edit [.] f3
> |
> ---f3
> |
> |--0.53%-- f2
> | f1
> | main
> | __libc_start_main
> |
> |--0.01%-- f1
> | main
> | __libc_start_main
> --99.46%-- [...]
> 97.63% 0.03% tchain_edit tchain_edit [.] f2
> |
> ---f2
> f1
> main
> __libc_start_main
>
> 7.13% 0.03% tchain_edit [kernel.vmlinux] [k] do_nmi
> |
> ---do_nmi
> end_repeat_nmi
> f3
> f2
> f1
> main
> __libc_start_main
>
> Signed-off-by: Kan Liang <[email protected]>
> ---
> tools/perf/Documentation/perf-record.txt | 4 ++
> tools/perf/util/evsel.c | 65 +++++++++++++++++++++++++++++++-
> tools/perf/util/evsel.h | 4 ++
> tools/perf/util/parse-events.c | 12 ++++++
> tools/perf/util/parse-events.h | 2 +
> tools/perf/util/parse-events.l | 2 +
> tools/perf/util/pmu.c | 3 +-
> 7 files changed, 89 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index 0d852d1..e633711 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -52,6 +52,10 @@ OPTIONS
> - 'time': Disable/enable time stamping. Acceptable values are 1 for
> enabling time stamping. 0 for disabling time stamping.
> The default is 1.
> + - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
> + FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
> + "no" for disable callgraph.
> + - 'stack-size': user stack size for dwarf mode
> Note: If user explicitly sets options which conflict with the params,
> the value set by the params will be overridden.
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index f572f46..106cd20 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -588,11 +588,36 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
> }
> }
>
> -static void apply_config_terms(struct perf_evsel *evsel)
> +static void
> +perf_evsel__reset_callgraph(struct perf_evsel *evsel,
> + struct callchain_param *param)
> +{
> + struct perf_event_attr *attr = &evsel->attr;
> +
> + perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
> + if (param->record_mode == CALLCHAIN_LBR) {
> + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
> + attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
> + PERF_SAMPLE_BRANCH_CALL_STACK);
> + }
> + if (param->record_mode == CALLCHAIN_DWARF) {
> + perf_evsel__reset_sample_bit(evsel, REGS_USER);
> + perf_evsel__reset_sample_bit(evsel, STACK_USER);
> + }
> +}
> +
> +static void apply_config_terms(struct perf_evsel *evsel,
> + struct record_opts *opts)
> {
> struct perf_evsel_config_term *term;
> struct list_head *config_terms = &evsel->config_terms;
> struct perf_event_attr *attr = &evsel->attr;
> + struct callchain_param param;
> + u32 dump_size = 0;
> + char *callgraph_buf = NULL;
> +
> + /* callgraph default */
> + param.record_mode = callchain_param.record_mode;
>
> list_for_each_entry(term, config_terms, list) {
> switch (term->type) {
> @@ -605,10 +630,46 @@ static void apply_config_terms(struct perf_evsel *evsel)
> else
> perf_evsel__reset_sample_bit(evsel, TIME);
> break;
> + case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
> + callgraph_buf = term->val.callgraph;
> + break;
> + case PERF_EVSEL__CONFIG_TERM_STACK_USER:
> + dump_size = term->val.stack_user;
> + break;
> default:
> break;
> }
> }
> +
> + /* User explicitly set per-event callgraph, clear the old setting and reset. */
> + if ((callgraph_buf != NULL) || (dump_size > 0)) {
> +
> + /* parse callgraph parameters */
> + if (callgraph_buf != NULL) {
> + if (!strcmp(callgraph_buf, "no")) {
> + param.enabled = false;
> + param.record_mode = CALLCHAIN_NONE;
> + } else {
> + param.enabled = true;
> + if (parse_callchain_record_opt(callgraph_buf, ¶m)) {
> + pr_err("per-event callgraph setting for %s failed. "
> + "Apply callgraph global setting for it\n",
> + evsel->name);
> + return;
hum, calling parse_callchain_record_opt from evsel hurts the python code:
17: Try 'import perf' in python, checking link problems :
--- start ---
test child forked, pid 25751
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: python/perf.so: undefined symbol: parse_callchain_record_opt
test child finished with -1
---- end ----
Try 'import perf' in python, checking link problems: FAILED!
not sure if we can call it from some place else (I guess not),
then we'd need to either put the util/callchain.c under python
objects, or somehow refine needed parsing code..
jirka
> + }
> + }
> + }
> + if (dump_size > 0)
> + param.dump_size = dump_size;
> +
> + /* If global callgraph set, clear it */
> + if (callchain_param.enabled)
> + perf_evsel__reset_callgraph(evsel, &callchain_param);
> +
> + /* set perf-event callgraph */
> + if (param.enabled)
> + perf_evsel__config_callgraph(evsel, opts, ¶m);
> + }
> }
>
> /*
> @@ -807,7 +868,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
> * Apply event specific term settings,
> * it overloads any global configuration.
> */
> - apply_config_terms(evsel);
> + apply_config_terms(evsel, opts);
> }
>
> static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 6a12908..09a3022 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -40,6 +40,8 @@ struct cgroup_sel;
> enum {
> PERF_EVSEL__CONFIG_TERM_PERIOD,
> PERF_EVSEL__CONFIG_TERM_TIME,
> + PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
> + PERF_EVSEL__CONFIG_TERM_STACK_USER,
> PERF_EVSEL__CONFIG_TERM_MAX,
> };
>
> @@ -49,6 +51,8 @@ struct perf_evsel_config_term {
> union {
> u64 period;
> bool time;
> + char *callgraph;
> + u64 stack_user;
> } val;
> };
>
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index a6cb9af..716f1fc 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -611,6 +611,12 @@ do { \
> return -EINVAL;
> }
> break;
> + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
> + CHECK_TYPE_VAL(STR);
> + break;
> + case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
> + CHECK_TYPE_VAL(NUM);
> + break;
> case PARSE_EVENTS__TERM_TYPE_NAME:
> CHECK_TYPE_VAL(STR);
> break;
> @@ -662,6 +668,12 @@ do { \
> case PARSE_EVENTS__TERM_TYPE_TIME:
> ADD_CONFIG_TERM(TIME, time, term->val.num);
> break;
> + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
> + ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
> + break;
> + case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
> + ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
> + break;
> default:
> break;
> }
> diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
> index e6f9aacc..87dc9f6 100644
> --- a/tools/perf/util/parse-events.h
> +++ b/tools/perf/util/parse-events.h
> @@ -64,6 +64,8 @@ enum {
> PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
> PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
> PARSE_EVENTS__TERM_TYPE_TIME,
> + PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
> + PARSE_EVENTS__TERM_TYPE_STACKSIZE,
> };
>
> struct parse_events_term {
> diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
> index f542750..1665497 100644
> --- a/tools/perf/util/parse-events.l
> +++ b/tools/perf/util/parse-events.l
> @@ -184,6 +184,8 @@ name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
> period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
> branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
> time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
> +call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
> +stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
> , { return ','; }
> "/" { BEGIN(INITIAL); return '/'; }
> {name_minus} { return str(yyscanner, PE_NAME); }
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index b615cdf..948f896 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -607,7 +607,8 @@ static char *formats_error_string(struct list_head *formats)
> {
> struct perf_pmu_format *format;
> char *err, *str;
> - static const char *static_terms = "config,config1,config2,name,period,branch_type,time\n";
> + static const char *static_terms = "config,config1,config2,name,period,"
> + "branch_type,time,call-graph,stack-size\n";
> unsigned i = 0;
>
> if (!asprintf(&str, "valid terms:"))
> --
> 1.8.3.1
>
> > + /* User explicitly set per-event callgraph, clear the old setting and
> reset. */
> > + if ((callgraph_buf != NULL) || (dump_size > 0)) {
> > +
> > + /* parse callgraph parameters */
> > + if (callgraph_buf != NULL) {
> > + if (!strcmp(callgraph_buf, "no")) {
> > + param.enabled = false;
> > + param.record_mode = CALLCHAIN_NONE;
> > + } else {
> > + param.enabled = true;
> > + if
> (parse_callchain_record_opt(callgraph_buf, ¶m)) {
> > + pr_err("per-event callgraph setting
> for %s failed. "
> > + "Apply callgraph global setting
> for it\n",
> > + evsel->name);
> > + return;
>
> hum, calling parse_callchain_record_opt from evsel hurts the python code:
>
> 17: Try 'import perf' in python, checking link problems :
> --- start ---
> test child forked, pid 25751
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> ImportError: python/perf.so: undefined symbol:
> parse_callchain_record_opt test child finished with -1
> ---- end ----
> Try 'import perf' in python, checking link problems: FAILED!
>
>
> not sure if we can call it from some place else (I guess not), then we'd need
> to either put the util/callchain.c under python objects,
We cannot only put the util/callchain.c under python objects, since
there are too many dependency for callchain.c.
> or somehow refine
> needed parsing code..
Could we just move the related code to util.c as below?
---
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 931cca8..773fe13 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -25,96 +25,9 @@
__thread struct callchain_cursor callchain_cursor;
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
-static int get_stack_size(const char *str, unsigned long *_size)
-{
- char *endptr;
- unsigned long size;
- unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
-
- size = strtoul(str, &endptr, 0);
-
- do {
- if (*endptr)
- break;
-
- size = round_up(size, sizeof(u64));
- if (!size || size > max_size)
- break;
-
- *_size = size;
- return 0;
-
- } while (0);
-
- pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
- max_size, str);
- return -1;
-}
-#endif /* HAVE_DWARF_UNWIND_SUPPORT */
-
int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
{
- char *tok, *name, *saveptr = NULL;
- char *buf;
- int ret = -1;
-
- /* We need buffer that we know we can write to. */
- buf = malloc(strlen(arg) + 1);
- if (!buf)
- return -ENOMEM;
-
- strcpy(buf, arg);
-
- tok = strtok_r((char *)buf, ",", &saveptr);
- name = tok ? : (char *)buf;
-
- do {
- /* Framepointer style */
- if (!strncmp(name, "fp", sizeof("fp"))) {
- if (!strtok_r(NULL, ",", &saveptr)) {
- param->record_mode = CALLCHAIN_FP;
- ret = 0;
- } else
- pr_err("callchain: No more arguments "
- "needed for --call-graph fp\n");
- break;
-
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
- /* Dwarf style */
- } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
- const unsigned long default_stack_dump_size = 8192;
-
- ret = 0;
- param->record_mode = CALLCHAIN_DWARF;
- param->dump_size = default_stack_dump_size;
-
- tok = strtok_r(NULL, ",", &saveptr);
- if (tok) {
- unsigned long size = 0;
-
- ret = get_stack_size(tok, &size);
- param->dump_size = size;
- }
-#endif /* HAVE_DWARF_UNWIND_SUPPORT */
- } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
- if (!strtok_r(NULL, ",", &saveptr)) {
- param->record_mode = CALLCHAIN_LBR;
- ret = 0;
- } else
- pr_err("callchain: No more arguments "
- "needed for --call-graph lbr\n");
- break;
- } else {
- pr_err("callchain: Unknown --call-graph option "
- "value: %s\n", arg);
- break;
- }
-
- } while (0);
-
- free(buf);
- return ret;
+ return parse_callchain_record(arg, param);
}
static int parse_callchain_mode(const char *value)
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 68a32c2..acee2b3 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -177,6 +177,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
bool hide_unresolved);
extern const char record_callchain_help[];
+extern int parse_callchain_record(const char *arg, struct callchain_param *param);
int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
int parse_callchain_report_opt(const char *arg);
int perf_callchain_config(const char *var, const char *value);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 106cd20..675d237 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -651,7 +651,7 @@ static void apply_config_terms(struct perf_evsel *evsel,
param.record_mode = CALLCHAIN_NONE;
} else {
param.enabled = true;
- if (parse_callchain_record_opt(callgraph_buf, ¶m)) {
+ if (parse_callchain_record(callgraph_buf, ¶m)) {
pr_err("per-event callgraph setting for %s failed. "
"Apply callgraph global setting for it\n",
evsel->name);
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index edc2d63..f7adf12 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -566,6 +566,96 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
return (unsigned long) -1;
}
+int get_stack_size(const char *str, unsigned long *_size)
+{
+ char *endptr;
+ unsigned long size;
+ unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
+
+ size = strtoul(str, &endptr, 0);
+
+ do {
+ if (*endptr)
+ break;
+
+ size = round_up(size, sizeof(u64));
+ if (!size || size > max_size)
+ break;
+
+ *_size = size;
+ return 0;
+
+ } while (0);
+
+ pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
+ max_size, str);
+ return -1;
+}
+
+int parse_callchain_record(const char *arg, struct callchain_param *param)
+{
+ char *tok, *name, *saveptr = NULL;
+ char *buf;
+ int ret = -1;
+
+ /* We need buffer that we know we can write to. */
+ buf = malloc(strlen(arg) + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ strcpy(buf, arg);
+
+ tok = strtok_r((char *)buf, ",", &saveptr);
+ name = tok ? : (char *)buf;
+
+ do {
+ /* Framepointer style */
+ if (!strncmp(name, "fp", sizeof("fp"))) {
+ if (!strtok_r(NULL, ",", &saveptr)) {
+ param->record_mode = CALLCHAIN_FP;
+ ret = 0;
+ } else
+ pr_err("callchain: No more arguments "
+ "needed for --call-graph fp\n");
+ break;
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ /* Dwarf style */
+ } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
+ const unsigned long default_stack_dump_size = 8192;
+
+ ret = 0;
+ param->record_mode = CALLCHAIN_DWARF;
+ param->dump_size = default_stack_dump_size;
+
+ tok = strtok_r(NULL, ",", &saveptr);
+ if (tok) {
+ unsigned long size = 0;
+
+ ret = get_stack_size(tok, &size);
+ param->dump_size = size;
+ }
+#endif /* HAVE_DWARF_UNWIND_SUPPORT */
+ } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
+ if (!strtok_r(NULL, ",", &saveptr)) {
+ param->record_mode = CALLCHAIN_LBR;
+ ret = 0;
+ } else
+ pr_err("callchain: No more arguments "
+ "needed for --call-graph lbr\n");
+ break;
+ } else {
+ pr_err("callchain: Unknown --call-graph option "
+ "value: %s\n", arg);
+ break;
+ }
+
+ } while (0);
+
+ free(buf);
+ return ret;
+}
+
int filename__read_str(const char *filename, char **buf, size_t *sizep)
{
size_t size = 0, alloc_size = 0;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 20d625a..8148703 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -351,4 +351,6 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int
return asprintf_expr_inout_ints(var, false, nints, ints);
}
+int get_stack_size(const char *str, unsigned long *_size);
+
#endif /* GIT_COMPAT_UTIL_H */
--
Thanks,
Kan
Commit-ID: 320677123905fd1dd122895cd5fb870ee9e1380b
Gitweb: http://git.kernel.org/tip/320677123905fd1dd122895cd5fb870ee9e1380b
Author: Kan Liang <[email protected]>
AuthorDate: Tue, 4 Aug 2015 04:30:19 -0400
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 5 Aug 2015 12:50:52 -0300
perf tools: Per-event time support
This patchkit adds the ability to turn off time stamps per event.
One usaful case for partial time is to work with per-event callgraph to
enable "PEBS threshold > 1" (https://lkml.org/lkml/2015/5/10/196), which
can significantly reduce the sampling overhead.
The event samples with time stamps off will not be ordered.
Signed-off-by: Kan Liang <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Namhyung Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 4 +++-
tools/perf/util/evsel.c | 14 +++++++++++---
tools/perf/util/evsel.h | 2 ++
tools/perf/util/parse-events.c | 12 ++++++++++++
tools/perf/util/parse-events.h | 1 +
tools/perf/util/parse-events.l | 1 +
tools/perf/util/pmu.c | 2 +-
7 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index ac41350..0d852d1 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -49,7 +49,9 @@ OPTIONS
These params can be used to overload default config values per event.
Here is a list of the params.
- 'period': Set event sampling period
-
+ - 'time': Disable/enable time stamping. Acceptable values are 1 for
+ enabling time stamping. 0 for disabling time stamping.
+ The default is 1.
Note: If user explicitly sets options which conflict with the params,
the value set by the params will be overridden.
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7d3acba..7febfe2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -587,15 +587,23 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
}
}
-static void apply_config_terms(struct perf_event_attr *attr __maybe_unused,
- struct list_head *config_terms)
+static void apply_config_terms(struct perf_evsel *evsel)
{
struct perf_evsel_config_term *term;
+ struct list_head *config_terms = &evsel->config_terms;
+ struct perf_event_attr *attr = &evsel->attr;
list_for_each_entry(term, config_terms, list) {
switch (term->type) {
case PERF_EVSEL__CONFIG_TERM_PERIOD:
attr->sample_period = term->val.period;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_TIME:
+ if (term->val.time)
+ perf_evsel__set_sample_bit(evsel, TIME);
+ else
+ perf_evsel__reset_sample_bit(evsel, TIME);
+ break;
default:
break;
}
@@ -798,7 +806,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
* Apply event specific term settings,
* it overloads any global configuration.
*/
- apply_config_terms(attr, &evsel->config_terms);
+ apply_config_terms(evsel);
}
static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a7d2175..6a12908 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -39,6 +39,7 @@ struct cgroup_sel;
*/
enum {
PERF_EVSEL__CONFIG_TERM_PERIOD,
+ PERF_EVSEL__CONFIG_TERM_TIME,
PERF_EVSEL__CONFIG_TERM_MAX,
};
@@ -47,6 +48,7 @@ struct perf_evsel_config_term {
int type;
union {
u64 period;
+ bool time;
} val;
};
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 09bee93..a6cb9af 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -603,6 +603,14 @@ do { \
* attr->branch_sample_type = term->val.num;
*/
break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ CHECK_TYPE_VAL(NUM);
+ if (term->val.num > 1) {
+ err->str = strdup("expected 0 or 1");
+ err->idx = term->err_val;
+ return -EINVAL;
+ }
+ break;
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
@@ -650,6 +658,10 @@ do { \
switch (term->type_term) {
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
ADD_CONFIG_TERM(PERIOD, period, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ ADD_CONFIG_TERM(TIME, time, term->val.num);
+ break;
default:
break;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 2063048..e6f9aacc 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -63,6 +63,7 @@ enum {
PARSE_EVENTS__TERM_TYPE_NAME,
PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
+ PARSE_EVENTS__TERM_TYPE_TIME,
};
struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 13cef3c..f542750 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -183,6 +183,7 @@ config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7bcb8c3..b615cdf 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -607,7 +607,7 @@ static char *formats_error_string(struct list_head *formats)
{
struct perf_pmu_format *format;
char *err, *str;
- static const char *static_terms = "config,config1,config2,name,period,branch_type\n";
+ static const char *static_terms = "config,config1,config2,name,period,branch_type,time\n";
unsigned i = 0;
if (!asprintf(&str, "valid terms:"))
Commit-ID: c3a6a8c40538f609923acf9473250266283269a5
Gitweb: http://git.kernel.org/tip/c3a6a8c40538f609923acf9473250266283269a5
Author: Kan Liang <[email protected]>
AuthorDate: Tue, 4 Aug 2015 04:30:20 -0400
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 5 Aug 2015 16:42:11 -0300
perf tools: Refine parse/config callchain functions
Pass global callchain_param into parse_callchain_record_opt and
perf_evsel__config_callgraph as parameter. So we can reuse these
functions to parse/config local param for callchain.
Signed-off-by: Kan Liang <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Namhyung Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-record.c | 2 +-
tools/perf/util/callchain.c | 14 +++++++-------
tools/perf/util/callchain.h | 2 +-
tools/perf/util/evsel.c | 11 ++++++-----
4 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f51131b..25cf6b4 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -779,7 +779,7 @@ int record_parse_callchain_opt(const struct option *opt,
return 0;
}
- ret = parse_callchain_record_opt(arg);
+ ret = parse_callchain_record_opt(arg, &callchain_param);
if (!ret)
callchain_debug();
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9f643ee..931cca8 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -53,7 +53,7 @@ static int get_stack_size(const char *str, unsigned long *_size)
}
#endif /* HAVE_DWARF_UNWIND_SUPPORT */
-int parse_callchain_record_opt(const char *arg)
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
{
char *tok, *name, *saveptr = NULL;
char *buf;
@@ -73,7 +73,7 @@ int parse_callchain_record_opt(const char *arg)
/* Framepointer style */
if (!strncmp(name, "fp", sizeof("fp"))) {
if (!strtok_r(NULL, ",", &saveptr)) {
- callchain_param.record_mode = CALLCHAIN_FP;
+ param->record_mode = CALLCHAIN_FP;
ret = 0;
} else
pr_err("callchain: No more arguments "
@@ -86,20 +86,20 @@ int parse_callchain_record_opt(const char *arg)
const unsigned long default_stack_dump_size = 8192;
ret = 0;
- callchain_param.record_mode = CALLCHAIN_DWARF;
- callchain_param.dump_size = default_stack_dump_size;
+ param->record_mode = CALLCHAIN_DWARF;
+ param->dump_size = default_stack_dump_size;
tok = strtok_r(NULL, ",", &saveptr);
if (tok) {
unsigned long size = 0;
ret = get_stack_size(tok, &size);
- callchain_param.dump_size = size;
+ param->dump_size = size;
}
#endif /* HAVE_DWARF_UNWIND_SUPPORT */
} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
if (!strtok_r(NULL, ",", &saveptr)) {
- callchain_param.record_mode = CALLCHAIN_LBR;
+ param->record_mode = CALLCHAIN_LBR;
ret = 0;
} else
pr_err("callchain: No more arguments "
@@ -219,7 +219,7 @@ int perf_callchain_config(const char *var, const char *value)
var += sizeof("call-graph.") - 1;
if (!strcmp(var, "record-mode"))
- return parse_callchain_record_opt(value);
+ return parse_callchain_record_opt(value, &callchain_param);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
if (!strcmp(var, "dump-size")) {
unsigned long size = 0;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 679c2c6..68a32c2 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -177,7 +177,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
bool hide_unresolved);
extern const char record_callchain_help[];
-int parse_callchain_record_opt(const char *arg);
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
int parse_callchain_report_opt(const char *arg);
int perf_callchain_config(const char *var, const char *value);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7febfe2..f572f46 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -545,14 +545,15 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
static void
perf_evsel__config_callgraph(struct perf_evsel *evsel,
- struct record_opts *opts)
+ struct record_opts *opts,
+ struct callchain_param *param)
{
bool function = perf_evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->attr;
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
- if (callchain_param.record_mode == CALLCHAIN_LBR) {
+ if (param->record_mode == CALLCHAIN_LBR) {
if (!opts->branch_stack) {
if (attr->exclude_user) {
pr_warning("LBR callstack option is only available "
@@ -568,12 +569,12 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
"Falling back to framepointers.\n");
}
- if (callchain_param.record_mode == CALLCHAIN_DWARF) {
+ if (param->record_mode == CALLCHAIN_DWARF) {
if (!function) {
perf_evsel__set_sample_bit(evsel, REGS_USER);
perf_evsel__set_sample_bit(evsel, STACK_USER);
attr->sample_regs_user = PERF_REGS_MASK;
- attr->sample_stack_user = callchain_param.dump_size;
+ attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
} else {
pr_info("Cannot use DWARF unwind for function trace event,"
@@ -714,7 +715,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
evsel->attr.exclude_callchain_user = 1;
if (callchain_param.enabled && !evsel->no_aux_samples)
- perf_evsel__config_callgraph(evsel, opts);
+ perf_evsel__config_callgraph(evsel, opts, &callchain_param);
if (opts->sample_intr_regs) {
attr->sample_regs_intr = PERF_REGS_MASK;
Em Wed, Aug 05, 2015 at 03:45:27PM +0000, Liang, Kan escreveu:
> > hum, calling parse_callchain_record_opt from evsel hurts the python code:
> >
> > 17: Try 'import perf' in python, checking link problems :
> > --- start ---
> > test child forked, pid 25751
> > Traceback (most recent call last):
> > File "<stdin>", line 1, in <module>
> > ImportError: python/perf.so: undefined symbol:
> > parse_callchain_record_opt test child finished with -1
> > ---- end ----
> > Try 'import perf' in python, checking link problems: FAILED!
> >
> >
> > not sure if we can call it from some place else (I guess not), then we'd need
> > to either put the util/callchain.c under python objects,
>
> We cannot only put the util/callchain.c under python objects, since
> there are too many dependency for callchain.c.
>
> > or somehow refine
> > needed parsing code..
>
> Could we just move the related code to util.c as below?
Looks sensible, can you resend this as a separate patch, plus the two
remaining ones? I.e. patch below is the first on this new series, then
this per event callchain selection one, then the 'perf test' patch.
The first two already are in tip.git
Thanks,
- Arnaldo
> ---
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 931cca8..773fe13 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -25,96 +25,9 @@
>
> __thread struct callchain_cursor callchain_cursor;
>
> -#ifdef HAVE_DWARF_UNWIND_SUPPORT
> -static int get_stack_size(const char *str, unsigned long *_size)
> -{
> - char *endptr;
> - unsigned long size;
> - unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
> -
> - size = strtoul(str, &endptr, 0);
> -
> - do {
> - if (*endptr)
> - break;
> -
> - size = round_up(size, sizeof(u64));
> - if (!size || size > max_size)
> - break;
> -
> - *_size = size;
> - return 0;
> -
> - } while (0);
> -
> - pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
> - max_size, str);
> - return -1;
> -}
> -#endif /* HAVE_DWARF_UNWIND_SUPPORT */
> -
> int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
> {
> - char *tok, *name, *saveptr = NULL;
> - char *buf;
> - int ret = -1;
> -
> - /* We need buffer that we know we can write to. */
> - buf = malloc(strlen(arg) + 1);
> - if (!buf)
> - return -ENOMEM;
> -
> - strcpy(buf, arg);
> -
> - tok = strtok_r((char *)buf, ",", &saveptr);
> - name = tok ? : (char *)buf;
> -
> - do {
> - /* Framepointer style */
> - if (!strncmp(name, "fp", sizeof("fp"))) {
> - if (!strtok_r(NULL, ",", &saveptr)) {
> - param->record_mode = CALLCHAIN_FP;
> - ret = 0;
> - } else
> - pr_err("callchain: No more arguments "
> - "needed for --call-graph fp\n");
> - break;
> -
> -#ifdef HAVE_DWARF_UNWIND_SUPPORT
> - /* Dwarf style */
> - } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
> - const unsigned long default_stack_dump_size = 8192;
> -
> - ret = 0;
> - param->record_mode = CALLCHAIN_DWARF;
> - param->dump_size = default_stack_dump_size;
> -
> - tok = strtok_r(NULL, ",", &saveptr);
> - if (tok) {
> - unsigned long size = 0;
> -
> - ret = get_stack_size(tok, &size);
> - param->dump_size = size;
> - }
> -#endif /* HAVE_DWARF_UNWIND_SUPPORT */
> - } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
> - if (!strtok_r(NULL, ",", &saveptr)) {
> - param->record_mode = CALLCHAIN_LBR;
> - ret = 0;
> - } else
> - pr_err("callchain: No more arguments "
> - "needed for --call-graph lbr\n");
> - break;
> - } else {
> - pr_err("callchain: Unknown --call-graph option "
> - "value: %s\n", arg);
> - break;
> - }
> -
> - } while (0);
> -
> - free(buf);
> - return ret;
> + return parse_callchain_record(arg, param);
> }
>
> static int parse_callchain_mode(const char *value)
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 68a32c2..acee2b3 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -177,6 +177,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
> bool hide_unresolved);
>
> extern const char record_callchain_help[];
> +extern int parse_callchain_record(const char *arg, struct callchain_param *param);
> int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
> int parse_callchain_report_opt(const char *arg);
> int perf_callchain_config(const char *var, const char *value);
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 106cd20..675d237 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -651,7 +651,7 @@ static void apply_config_terms(struct perf_evsel *evsel,
> param.record_mode = CALLCHAIN_NONE;
> } else {
> param.enabled = true;
> - if (parse_callchain_record_opt(callgraph_buf, ¶m)) {
> + if (parse_callchain_record(callgraph_buf, ¶m)) {
> pr_err("per-event callgraph setting for %s failed. "
> "Apply callgraph global setting for it\n",
> evsel->name);
> diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
> index edc2d63..f7adf12 100644
> --- a/tools/perf/util/util.c
> +++ b/tools/perf/util/util.c
> @@ -566,6 +566,96 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
> return (unsigned long) -1;
> }
>
> +int get_stack_size(const char *str, unsigned long *_size)
> +{
> + char *endptr;
> + unsigned long size;
> + unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
> +
> + size = strtoul(str, &endptr, 0);
> +
> + do {
> + if (*endptr)
> + break;
> +
> + size = round_up(size, sizeof(u64));
> + if (!size || size > max_size)
> + break;
> +
> + *_size = size;
> + return 0;
> +
> + } while (0);
> +
> + pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
> + max_size, str);
> + return -1;
> +}
> +
> +int parse_callchain_record(const char *arg, struct callchain_param *param)
> +{
> + char *tok, *name, *saveptr = NULL;
> + char *buf;
> + int ret = -1;
> +
> + /* We need buffer that we know we can write to. */
> + buf = malloc(strlen(arg) + 1);
> + if (!buf)
> + return -ENOMEM;
> +
> + strcpy(buf, arg);
> +
> + tok = strtok_r((char *)buf, ",", &saveptr);
> + name = tok ? : (char *)buf;
> +
> + do {
> + /* Framepointer style */
> + if (!strncmp(name, "fp", sizeof("fp"))) {
> + if (!strtok_r(NULL, ",", &saveptr)) {
> + param->record_mode = CALLCHAIN_FP;
> + ret = 0;
> + } else
> + pr_err("callchain: No more arguments "
> + "needed for --call-graph fp\n");
> + break;
> +
> +#ifdef HAVE_DWARF_UNWIND_SUPPORT
> + /* Dwarf style */
> + } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
> + const unsigned long default_stack_dump_size = 8192;
> +
> + ret = 0;
> + param->record_mode = CALLCHAIN_DWARF;
> + param->dump_size = default_stack_dump_size;
> +
> + tok = strtok_r(NULL, ",", &saveptr);
> + if (tok) {
> + unsigned long size = 0;
> +
> + ret = get_stack_size(tok, &size);
> + param->dump_size = size;
> + }
> +#endif /* HAVE_DWARF_UNWIND_SUPPORT */
> + } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
> + if (!strtok_r(NULL, ",", &saveptr)) {
> + param->record_mode = CALLCHAIN_LBR;
> + ret = 0;
> + } else
> + pr_err("callchain: No more arguments "
> + "needed for --call-graph lbr\n");
> + break;
> + } else {
> + pr_err("callchain: Unknown --call-graph option "
> + "value: %s\n", arg);
> + break;
> + }
> +
> + } while (0);
> +
> + free(buf);
> + return ret;
> +}
> +
> int filename__read_str(const char *filename, char **buf, size_t *sizep)
> {
> size_t size = 0, alloc_size = 0;
> diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
> index 20d625a..8148703 100644
> --- a/tools/perf/util/util.h
> +++ b/tools/perf/util/util.h
> @@ -351,4 +351,6 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int
> return asprintf_expr_inout_ints(var, false, nints, ints);
> }
>
> +int get_stack_size(const char *str, unsigned long *_size);
> +
> #endif /* GIT_COMPAT_UTIL_H */
> --
>
> Thanks,
> Kan
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/