2015-07-29 16:57:38

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V7 0/7] per event callgraph and time support

This patchkit adds the ability to turn off callgraphs and time stamps
per event. This in term can reduce sampling overhead and the size of
the perf.data.

Changes since V1:
- Break up V1 patches into three patches(parse option changes,
partial time support and partial callgraph support).
- Use strings 'fp,dwarf,lbr,no' to identify callchains
- Add test case in parse-events.c

Changes since V2:
- Rebase on 60cd37eb10

Changes since V3:
- Replace OPT_CALLBACK_SET by current existing callback mechanism.
- Using perf_evsel__set_sample_bit if possible
- Change the expression "partial" to "per event"
- Using global variable to indicate if 'time' is set per event.
If 'time' is not set, enable it by default for perf record.

Changes since V4:
- Fix issue of setting callgraph_set

Changes since V5:
- per-event settings over global settings in general
- support for event post configuration structure

Changes since V6:
- Add error infor for per-event time
- rename the option to call-graph and stacl-size
- refine parse/config callchain functions,
and reuse them for per-event callgraph setting
- Modify perf test

Jiri Olsa (2):
perf tools: Add support for event post configuration
perf tools: Force period term to overload global settings

Kan Liang (5):
perf,tools: introduce callgraph_set for callgraph option
perf,tools: per-event time support
perf,tools: refine parse/config callchain functions
perf,tools: per-event callgraph support
perf,tests: Add tests to callgraph and time parse

tools/perf/Documentation/perf-record.txt | 10 ++-
tools/perf/builtin-record.c | 11 +++-
tools/perf/perf.h | 1 +
tools/perf/tests/parse-events.c | 50 ++++++++++++++-
tools/perf/util/callchain.c | 14 ++---
tools/perf/util/callchain.h | 2 +-
tools/perf/util/evsel.c | 105 +++++++++++++++++++++++++++++--
tools/perf/util/evsel.h | 26 ++++++++
tools/perf/util/parse-events.c | 94 +++++++++++++++++++++++----
tools/perf/util/parse-events.h | 3 +
tools/perf/util/parse-events.l | 3 +
tools/perf/util/pmu.c | 3 +-
12 files changed, 289 insertions(+), 33 deletions(-)

--
1.8.3.1


2015-07-29 16:59:19

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V7 1/7] perf tools: Add support for event post configuration

From: Jiri Olsa <[email protected]>

Add support to overload any global settings for event
and force user specified term value. It will be useful
for new time and backtrace terms.

Signed-off-by: Jiri Olsa <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/util/evsel.c | 31 +++++++++++++++++++
tools/perf/util/evsel.h | 19 ++++++++++++
tools/perf/util/parse-events.c | 67 +++++++++++++++++++++++++++++++++++-------
3 files changed, 106 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 71f6905..048d61d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -207,6 +207,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
evsel->unit = "";
evsel->scale = 1.0;
INIT_LIST_HEAD(&evsel->node);
+ INIT_LIST_HEAD(&evsel->config_terms);
perf_evsel__object.init(evsel);
evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
perf_evsel__calc_id_pos(evsel);
@@ -586,6 +587,19 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
}
}

+static void apply_config_terms(struct perf_event_attr *attr __maybe_unused,
+ struct list_head *config_terms)
+{
+ struct perf_evsel_config_term *term;
+
+ list_for_each_entry(term, config_terms, list) {
+ switch (term->type) {
+ default:
+ break;
+ }
+ }
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
@@ -777,6 +791,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
attr->use_clockid = 1;
attr->clockid = opts->clockid;
}
+
+ /*
+ * Apply event specific term settings,
+ * it overloads any global configuration.
+ */
+ apply_config_terms(attr, &evsel->config_terms);
}

static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
@@ -900,6 +920,16 @@ static void perf_evsel__free_id(struct perf_evsel *evsel)
zfree(&evsel->id);
}

+static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
+{
+ struct perf_evsel_config_term *term, *h;
+
+ list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
+ list_del(&term->list);
+ free(term);
+ }
+}
+
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
int cpu, thread;
@@ -919,6 +949,7 @@ void perf_evsel__exit(struct perf_evsel *evsel)
assert(list_empty(&evsel->node));
perf_evsel__free_fd(evsel);
perf_evsel__free_id(evsel);
+ perf_evsel__free_config_terms(evsel);
close_cgroup(evsel->cgrp);
cpu_map__put(evsel->cpus);
thread_map__put(evsel->threads);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 1fc263a..0339819 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -31,6 +31,24 @@ struct perf_sample_id {

struct cgroup_sel;

+/*
+ * The 'struct perf_evsel_config_term' is used to pass event
+ * specific configuration data to perf_evsel__config routine.
+ * It is allocated within event parsing and attached to
+ * perf_evsel::config_terms list head.
+*/
+enum {
+ PERF_EVSEL__CONFIG_TERM_MAX,
+};
+
+struct perf_evsel_config_term {
+ struct list_head list;
+ int type;
+ union {
+ u64 period;
+ } val;
+};
+
/** struct perf_evsel - event selector
*
* @name - Can be set to retain the original event name passed by the user,
@@ -87,6 +105,7 @@ struct perf_evsel {
struct perf_evsel *leader;
char *group_name;
bool cmdline_group_boundary;
+ struct list_head config_terms;
};

union u64_swap {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4f807fc..3271d13 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -276,7 +276,8 @@ const char *event_type(int type)
static struct perf_evsel *
__add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr,
- char *name, struct cpu_map *cpus)
+ char *name, struct cpu_map *cpus,
+ struct list_head *config_terms)
{
struct perf_evsel *evsel;

@@ -291,14 +292,19 @@ __add_event(struct list_head *list, int *idx,

if (name)
evsel->name = strdup(name);
+
+ if (config_terms)
+ list_splice(config_terms, &evsel->config_terms);
+
list_add_tail(&evsel->node, list);
return evsel;
}

static int add_event(struct list_head *list, int *idx,
- struct perf_event_attr *attr, char *name)
+ struct perf_event_attr *attr, char *name,
+ struct list_head *config_terms)
{
- return __add_event(list, idx, attr, name, NULL) ? 0 : -ENOMEM;
+ return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM;
}

static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@@ -377,7 +383,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
memset(&attr, 0, sizeof(attr));
attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
attr.type = PERF_TYPE_HW_CACHE;
- return add_event(list, idx, &attr, name);
+ return add_event(list, idx, &attr, name, NULL);
}

static int add_tracepoint(struct list_head *list, int *idx,
@@ -539,7 +545,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
attr.type = PERF_TYPE_BREAKPOINT;
attr.sample_period = 1;

- return add_event(list, idx, &attr, NULL);
+ return add_event(list, idx, &attr, NULL, NULL);
}

static int check_type_val(struct parse_events_term *term,
@@ -622,22 +628,56 @@ static int config_attr(struct perf_event_attr *attr,
return 0;
}

+static int get_config_terms(struct list_head *head_config,
+ struct list_head *head_terms __maybe_unused)
+{
+#define ADD_CONFIG_TERM(__type, __name, __val) \
+do { \
+ struct perf_evsel_config_term *__t; \
+ \
+ __t = zalloc(sizeof(*__t)); \
+ if (!__t) \
+ return -ENOMEM; \
+ \
+ INIT_LIST_HEAD(&__t->list); \
+ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \
+ __t->val.__name = __val; \
+ list_add_tail(&__t->list, head_terms); \
+} while (0)
+
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_config, list) {
+ switch (term->type_term) {
+ default:
+ break;
+ }
+ }
+#undef ADD_EVSEL_CONFIG
+ return 0;
+}
+
int parse_events_add_numeric(struct parse_events_evlist *data,
struct list_head *list,
u32 type, u64 config,
struct list_head *head_config)
{
struct perf_event_attr attr;
+ LIST_HEAD(config_terms);

memset(&attr, 0, sizeof(attr));
attr.type = type;
attr.config = config;

- if (head_config &&
- config_attr(&attr, head_config, data->error))
- return -EINVAL;
+ if (head_config) {
+ if (config_attr(&attr, head_config, data->error))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ }

- return add_event(list, &data->idx, &attr, NULL);
+ return add_event(list, &data->idx, &attr, NULL, &config_terms);
}

static int parse_events__is_name_term(struct parse_events_term *term)
@@ -664,6 +704,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
struct perf_pmu_info info;
struct perf_pmu *pmu;
struct perf_evsel *evsel;
+ LIST_HEAD(config_terms);

pmu = perf_pmu__find(name);
if (!pmu)
@@ -678,7 +719,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,

if (!head_config) {
attr.type = pmu->type;
- evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus);
+ evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus, NULL);
return evsel ? 0 : -ENOMEM;
}

@@ -692,11 +733,15 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
if (config_attr(&attr, head_config, data->error))
return -EINVAL;

+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+
if (perf_pmu__config(pmu, &attr, head_config, data->error))
return -EINVAL;

evsel = __add_event(list, &data->idx, &attr,
- pmu_event_name(head_config), pmu->cpus);
+ pmu_event_name(head_config), pmu->cpus,
+ &config_terms);
if (evsel) {
evsel->unit = info.unit;
evsel->scale = info.scale;
--
1.8.3.1

2015-07-29 16:58:59

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V7 2/7] perf tools: Force period term to overload global settings

From: Jiri Olsa <[email protected]>

Currently the command line option settings beats the
per event period settings:

With no global settings, we get per-event configuration:

$ perf record -e 'cpu/instructions,period=20000/' sleep 1
$ perf evlist -v
... { sample_period, sample_freq }: 20000 ...

With 'c' option period setup, we get 'c' option value:
$ perf record -e 'cpu/instructions,period=20000/' -c 1000 sleep 1
$ perf evlist -v
... { sample_period, sample_freq }: 1000 ...

This patch makes the per-event settings overload the
global 'c' option setup:

$ perf record -e 'cpu/instructions,period=20000/' -c 1000 sleep 1
$ perf evlist -v
... { sample_period, sample_freq }: 20000 ...

I think the making the per-event settings to overload any other
config makes more sense than current state. However it breaks
the current 'period' term handling, which might cause some
noise.. so let's see ;-).

Also fixing parse event tests with the new behaviour.

Signed-off-by: Jiri Olsa <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 2 +-
tools/perf/tests/parse-events.c | 12 ++++++++++--
tools/perf/util/evsel.c | 2 ++
tools/perf/util/evsel.h | 1 +
tools/perf/util/parse-events.c | 3 ++-
5 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 63ee040..ac41350 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -46,7 +46,7 @@ OPTIONS
/sys/bus/event_sources/devices/<pmu>/format/*

There are also some params which are not defined in .../<pmu>/format/*.
- These params can be used to set event defaults.
+ These params can be used to overload default config values per event.
Here is a list of the params.
- 'period': Set event sampling period

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index d76963f..f65bb89 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -82,8 +82,12 @@ static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ /*
+ * The period value gets configured within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
TEST_ASSERT_VAL("wrong period",
- 100000 == evsel->attr.sample_period);
+ 0 == evsel->attr.sample_period);
TEST_ASSERT_VAL("wrong config1",
0 == evsel->attr.config1);
TEST_ASSERT_VAL("wrong config2",
@@ -406,7 +410,11 @@ static int test__checkevent_pmu(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config);
TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1);
TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2);
- TEST_ASSERT_VAL("wrong period", 1000 == evsel->attr.sample_period);
+ /*
+ * The period value gets configured within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);

return 0;
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 048d61d..7d3acba 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -594,6 +594,8 @@ static void apply_config_terms(struct perf_event_attr *attr __maybe_unused,

list_for_each_entry(term, config_terms, list) {
switch (term->type) {
+ case PERF_EVSEL__CONFIG_TERM_PERIOD:
+ attr->sample_period = term->val.period;
default:
break;
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 0339819..a7d2175 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -38,6 +38,7 @@ struct cgroup_sel;
* perf_evsel::config_terms list head.
*/
enum {
+ PERF_EVSEL__CONFIG_TERM_PERIOD,
PERF_EVSEL__CONFIG_TERM_MAX,
};

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3271d13..09bee93 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -596,7 +596,6 @@ do { \
break;
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
CHECK_TYPE_VAL(NUM);
- attr->sample_period = term->val.num;
break;
case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
/*
@@ -649,6 +648,8 @@ do { \

list_for_each_entry(term, head_config, list) {
switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ ADD_CONFIG_TERM(PERIOD, period, term->val.num);
default:
break;
}
--
1.8.3.1

2015-07-29 16:57:42

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V7 3/7] perf,tools: introduce callgraph_set for callgraph option

From: Kan Liang <[email protected]>

Introduce callgraph_set to indicate whether the callgraph option was set
by user.

Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/builtin-record.c | 9 +++++++--
tools/perf/perf.h | 1 +
2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 445a64d..f51131b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -762,12 +762,14 @@ static void callchain_debug(void)
callchain_param.dump_size);
}

-int record_parse_callchain_opt(const struct option *opt __maybe_unused,
+int record_parse_callchain_opt(const struct option *opt,
const char *arg,
int unset)
{
int ret;
+ struct record_opts *record = (struct record_opts *)opt->value;

+ record->callgraph_set = true;
callchain_param.enabled = !unset;

/* --no-call-graph */
@@ -784,10 +786,13 @@ int record_parse_callchain_opt(const struct option *opt __maybe_unused,
return ret;
}

-int record_callchain_opt(const struct option *opt __maybe_unused,
+int record_callchain_opt(const struct option *opt,
const char *arg __maybe_unused,
int unset __maybe_unused)
{
+ struct record_opts *record = (struct record_opts *)opt->value;
+
+ record->callgraph_set = true;
callchain_param.enabled = true;

if (callchain_param.record_mode == CALLCHAIN_NONE)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cf459f8..cccb4cf 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -52,6 +52,7 @@ struct record_opts {
bool sample_weight;
bool sample_time;
bool sample_time_set;
+ bool callgraph_set;
bool period;
bool sample_intr_regs;
bool running_time;
--
1.8.3.1

2015-07-29 16:57:40

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V7 4/7] perf,tools: per-event time support

From: Kan Liang <[email protected]>

This patchkit adds the ability to turn off time stamps per event.
One usable case of partial time is to work with per-event callgraph to
enable "PEBS threshold > 1" (https://lkml.org/lkml/2015/5/10/196), which
can significantly reduce the sampling overhead.
The event samples with time stamps off will not be ordered.

Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 4 +++-
tools/perf/util/evsel.c | 14 +++++++++++---
tools/perf/util/evsel.h | 2 ++
tools/perf/util/parse-events.c | 12 ++++++++++++
tools/perf/util/parse-events.h | 1 +
tools/perf/util/parse-events.l | 1 +
tools/perf/util/pmu.c | 2 +-
7 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index ac41350..0d852d1 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -49,7 +49,9 @@ OPTIONS
These params can be used to overload default config values per event.
Here is a list of the params.
- 'period': Set event sampling period
-
+ - 'time': Disable/enable time stamping. Acceptable values are 1 for
+ enabling time stamping. 0 for disabling time stamping.
+ The default is 1.
Note: If user explicitly sets options which conflict with the params,
the value set by the params will be overridden.

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7d3acba..7febfe2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -587,15 +587,23 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
}
}

-static void apply_config_terms(struct perf_event_attr *attr __maybe_unused,
- struct list_head *config_terms)
+static void apply_config_terms(struct perf_evsel *evsel)
{
struct perf_evsel_config_term *term;
+ struct list_head *config_terms = &evsel->config_terms;
+ struct perf_event_attr *attr = &evsel->attr;

list_for_each_entry(term, config_terms, list) {
switch (term->type) {
case PERF_EVSEL__CONFIG_TERM_PERIOD:
attr->sample_period = term->val.period;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_TIME:
+ if (term->val.time)
+ perf_evsel__set_sample_bit(evsel, TIME);
+ else
+ perf_evsel__reset_sample_bit(evsel, TIME);
+ break;
default:
break;
}
@@ -798,7 +806,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
* Apply event specific term settings,
* it overloads any global configuration.
*/
- apply_config_terms(attr, &evsel->config_terms);
+ apply_config_terms(evsel);
}

static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a7d2175..6a12908 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -39,6 +39,7 @@ struct cgroup_sel;
*/
enum {
PERF_EVSEL__CONFIG_TERM_PERIOD,
+ PERF_EVSEL__CONFIG_TERM_TIME,
PERF_EVSEL__CONFIG_TERM_MAX,
};

@@ -47,6 +48,7 @@ struct perf_evsel_config_term {
int type;
union {
u64 period;
+ bool time;
} val;
};

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 09bee93..a6cb9af 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -603,6 +603,14 @@ do { \
* attr->branch_sample_type = term->val.num;
*/
break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ CHECK_TYPE_VAL(NUM);
+ if (term->val.num > 1) {
+ err->str = strdup("expected 0 or 1");
+ err->idx = term->err_val;
+ return -EINVAL;
+ }
+ break;
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
@@ -650,6 +658,10 @@ do { \
switch (term->type_term) {
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
ADD_CONFIG_TERM(PERIOD, period, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ ADD_CONFIG_TERM(TIME, time, term->val.num);
+ break;
default:
break;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 2063048..e6f9aacc 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -63,6 +63,7 @@ enum {
PARSE_EVENTS__TERM_TYPE_NAME,
PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
+ PARSE_EVENTS__TERM_TYPE_TIME,
};

struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 13cef3c..f542750 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -183,6 +183,7 @@ config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7bcb8c3..b615cdf 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -607,7 +607,7 @@ static char *formats_error_string(struct list_head *formats)
{
struct perf_pmu_format *format;
char *err, *str;
- static const char *static_terms = "config,config1,config2,name,period,branch_type\n";
+ static const char *static_terms = "config,config1,config2,name,period,branch_type,time\n";
unsigned i = 0;

if (!asprintf(&str, "valid terms:"))
--
1.8.3.1

2015-07-29 16:57:43

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V7 5/7] perf,tools: refine parse/config callchain functions

From: Kan Liang <[email protected]>

Pass global callchain_param into parse_callchain_record_opt and
perf_evsel__config_callgraph as parameter. So we can reuse these
functions to parse/config local param for callchain.

Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/builtin-record.c | 2 +-
tools/perf/util/callchain.c | 14 +++++++-------
tools/perf/util/callchain.h | 2 +-
tools/perf/util/evsel.c | 11 ++++++-----
4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f51131b..25cf6b4 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -779,7 +779,7 @@ int record_parse_callchain_opt(const struct option *opt,
return 0;
}

- ret = parse_callchain_record_opt(arg);
+ ret = parse_callchain_record_opt(arg, &callchain_param);
if (!ret)
callchain_debug();

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9f643ee..931cca8 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -53,7 +53,7 @@ static int get_stack_size(const char *str, unsigned long *_size)
}
#endif /* HAVE_DWARF_UNWIND_SUPPORT */

-int parse_callchain_record_opt(const char *arg)
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
{
char *tok, *name, *saveptr = NULL;
char *buf;
@@ -73,7 +73,7 @@ int parse_callchain_record_opt(const char *arg)
/* Framepointer style */
if (!strncmp(name, "fp", sizeof("fp"))) {
if (!strtok_r(NULL, ",", &saveptr)) {
- callchain_param.record_mode = CALLCHAIN_FP;
+ param->record_mode = CALLCHAIN_FP;
ret = 0;
} else
pr_err("callchain: No more arguments "
@@ -86,20 +86,20 @@ int parse_callchain_record_opt(const char *arg)
const unsigned long default_stack_dump_size = 8192;

ret = 0;
- callchain_param.record_mode = CALLCHAIN_DWARF;
- callchain_param.dump_size = default_stack_dump_size;
+ param->record_mode = CALLCHAIN_DWARF;
+ param->dump_size = default_stack_dump_size;

tok = strtok_r(NULL, ",", &saveptr);
if (tok) {
unsigned long size = 0;

ret = get_stack_size(tok, &size);
- callchain_param.dump_size = size;
+ param->dump_size = size;
}
#endif /* HAVE_DWARF_UNWIND_SUPPORT */
} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
if (!strtok_r(NULL, ",", &saveptr)) {
- callchain_param.record_mode = CALLCHAIN_LBR;
+ param->record_mode = CALLCHAIN_LBR;
ret = 0;
} else
pr_err("callchain: No more arguments "
@@ -219,7 +219,7 @@ int perf_callchain_config(const char *var, const char *value)
var += sizeof("call-graph.") - 1;

if (!strcmp(var, "record-mode"))
- return parse_callchain_record_opt(value);
+ return parse_callchain_record_opt(value, &callchain_param);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
if (!strcmp(var, "dump-size")) {
unsigned long size = 0;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 679c2c6..68a32c2 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -177,7 +177,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
bool hide_unresolved);

extern const char record_callchain_help[];
-int parse_callchain_record_opt(const char *arg);
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
int parse_callchain_report_opt(const char *arg);
int perf_callchain_config(const char *var, const char *value);

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7febfe2..f572f46 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -545,14 +545,15 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)

static void
perf_evsel__config_callgraph(struct perf_evsel *evsel,
- struct record_opts *opts)
+ struct record_opts *opts,
+ struct callchain_param *param)
{
bool function = perf_evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->attr;

perf_evsel__set_sample_bit(evsel, CALLCHAIN);

- if (callchain_param.record_mode == CALLCHAIN_LBR) {
+ if (param->record_mode == CALLCHAIN_LBR) {
if (!opts->branch_stack) {
if (attr->exclude_user) {
pr_warning("LBR callstack option is only available "
@@ -568,12 +569,12 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
"Falling back to framepointers.\n");
}

- if (callchain_param.record_mode == CALLCHAIN_DWARF) {
+ if (param->record_mode == CALLCHAIN_DWARF) {
if (!function) {
perf_evsel__set_sample_bit(evsel, REGS_USER);
perf_evsel__set_sample_bit(evsel, STACK_USER);
attr->sample_regs_user = PERF_REGS_MASK;
- attr->sample_stack_user = callchain_param.dump_size;
+ attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
} else {
pr_info("Cannot use DWARF unwind for function trace event,"
@@ -714,7 +715,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
evsel->attr.exclude_callchain_user = 1;

if (callchain_param.enabled && !evsel->no_aux_samples)
- perf_evsel__config_callgraph(evsel, opts);
+ perf_evsel__config_callgraph(evsel, opts, &callchain_param);

if (opts->sample_intr_regs) {
attr->sample_regs_intr = PERF_REGS_MASK;
--
1.8.3.1

2015-07-29 16:57:44

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V7 6/7] perf,tools: per-event callgraph support

From: Kan Liang <[email protected]>

When multiple events are sampled it may not be needed to collect
callgraphs for all of them. The sample sites are usually nearby, and
it's enough to collect the callgraphs on a reference event (such as
precise cycles or precise instructions).
This patchkit adds the ability to turn off callgraphs and time stamp
per event. This in term can reduce sampling overhead and the size of the
perf.data. Furthermore, it makes collecting back traces and timestamps
possible when PEBS threshold > 1, which significantly reducing the
sampling overhead especially for frequently occurring events
(https://lkml.org/lkml/2015/5/10/196). For example, A slower event with
a larger period collects back traces/timestamps. Other more events run
fast with multi-pebs. The time stamps from the slower events can be used
to order the faster events. Their backtraces can give the user enough
hint to find the right spot.

Here are some examples and test results.

1. Comparing the elapsed time and perf.data size from "kernbench -M -H".

The test command for FULL callgraph and time support.
"perf record -e
'{cpu/cpu-cycles,period=100000/,cpu/instructions,period=20000/p}'
--call-graph fp --time"

The test command for PARTIAL callgraph and time support.
"perf record -e
'{cpu/cpu-cycles,call-graph=fp,time,period=100000/,
cpu/instructions,call-graph=no,time=0,period=20000/p}'"

The elapsed time for FULL is 24.3 Sec, while for PARTIAL is 16.9 Sec.
The perf.data size for FULL is 22.1 Gb, while for PARTIAL is 12.4 Gb.

2. Comparing the perf.data size and callgraph results.

The test command for FULL callgraph and time support.
"perf record -e
'{cpu/cpu-cycles,period=100000/pp,cpu/instructions,period=20000/p}'
--call-graph fp -- ./tchain_edit"

The test command for PARTIAL callgraph and time support.
"perf record -e
'{cpu/cpu-cycles,call-graph=fp,time,period=100000/pp,
cpu/instructions,call-graph=no,time=0,period=20000/p}'
-- ./tchain_edit"

The perf.data size for FULL is 43.2 MB, while for PARTIAL is 21.1 MB.
The callgraph is roughly the same.

The callgraph from FULL
# Samples: 87K of event
'cpu/cpu-cycles,call-graph=fp,time,period=100000/pp'
# Event count (approx.): 8760000000
#
# Children Self Command Shared Object Symbol
# ........ ........ ........... ..................
..........................................
#
99.98% 0.00% tchain_edit libc-2.15.so [.]
__libc_start_main
|
---__libc_start_main

99.97% 0.00% tchain_edit tchain_edit [.] main
|
---main
__libc_start_main

99.97% 0.00% tchain_edit tchain_edit [.] f1
|
---f1
main
__libc_start_main

99.85% 87.01% tchain_edit tchain_edit [.] f3
|
---f3
|
|--99.74%-- f2
| f1
| main
| __libc_start_main
--0.26%-- [...]
99.71% 0.12% tchain_edit tchain_edit [.] f2
|
---f2
f1
main
__libc_start_main

The callgraph from PARTIAL
# Samples: 417K of event
'cpu/instructions,call-graph=no,time=0,period=20000/p'
# Event count (approx.): 8346980000
#
# Children Self Command Shared Object Symbol
# ........ ........ ........... ................
..........................................
#
98.82% 0.00% tchain_edit libc-2.15.so [.]
__libc_start_main
|
---__libc_start_main

98.82% 0.00% tchain_edit tchain_edit [.] main
|
---main
__libc_start_main

98.82% 0.00% tchain_edit tchain_edit [.] f1
|
---f1
main
__libc_start_main

98.82% 98.28% tchain_edit tchain_edit [.] f3
|
---f3
|
|--0.53%-- f2
| f1
| main
| __libc_start_main
|
|--0.01%-- f1
| main
| __libc_start_main
--99.46%-- [...]
97.63% 0.03% tchain_edit tchain_edit [.] f2
|
---f2
f1
main
__libc_start_main

7.13% 0.03% tchain_edit [kernel.vmlinux] [k] do_nmi
|
---do_nmi
end_repeat_nmi
f3
f2
f1
main
__libc_start_main

Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 4 +++
tools/perf/util/evsel.c | 57 ++++++++++++++++++++++++++++++--
tools/perf/util/evsel.h | 4 +++
tools/perf/util/parse-events.c | 12 +++++++
tools/perf/util/parse-events.h | 2 ++
tools/perf/util/parse-events.l | 2 ++
tools/perf/util/pmu.c | 3 +-
7 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 0d852d1..e633711 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -52,6 +52,10 @@ OPTIONS
- 'time': Disable/enable time stamping. Acceptable values are 1 for
enabling time stamping. 0 for disabling time stamping.
The default is 1.
+ - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
+ FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
+ "no" for disable callgraph.
+ - 'stack-size': user stack size for dwarf mode
Note: If user explicitly sets options which conflict with the params,
the value set by the params will be overridden.

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f572f46..0d11c5e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -588,11 +588,18 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
}
}

-static void apply_config_terms(struct perf_evsel *evsel)
+static void apply_config_terms(struct perf_evsel *evsel,
+ struct record_opts *opts)
{
struct perf_evsel_config_term *term;
struct list_head *config_terms = &evsel->config_terms;
struct perf_event_attr *attr = &evsel->attr;
+ struct callchain_param param;
+ u32 dump_size = 0;
+ char *callgraph_buf = NULL;
+
+ /* callgraph default */
+ param.record_mode = callchain_param.record_mode;

list_for_each_entry(term, config_terms, list) {
switch (term->type) {
@@ -605,10 +612,56 @@ static void apply_config_terms(struct perf_evsel *evsel)
else
perf_evsel__reset_sample_bit(evsel, TIME);
break;
+ case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+ callgraph_buf = term->val.callgraph;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+ dump_size = term->val.stack_user;
+ break;
default:
break;
}
}
+
+ /* User explicitly set per-event callgraph, clear the old setting and reset. */
+ if ((callgraph_buf != NULL) || (dump_size > 0)) {
+
+ /* parse callgraph parameters */
+ if (callgraph_buf != NULL) {
+ if (!strcmp(callgraph_buf, "no")) {
+ param.enabled = false;
+ param.record_mode = CALLCHAIN_NONE;
+ } else {
+ param.enabled = true;
+ if (parse_callchain_record_opt(callgraph_buf, &param)) {
+ pr_err("per-event callgraph setting for %s failed. "
+ "Apply callgraph global setting for it\n",
+ evsel->name);
+ return;
+ }
+ }
+ }
+ if (dump_size > 0)
+ param.dump_size = dump_size;
+
+ /* If global callgraph set, clear it */
+ if (callchain_param.enabled) {
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ if (callchain_param.record_mode == CALLCHAIN_LBR) {
+ perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_CALL_STACK);
+ }
+ if (callchain_param.record_mode == CALLCHAIN_DWARF) {
+ perf_evsel__reset_sample_bit(evsel, REGS_USER);
+ perf_evsel__reset_sample_bit(evsel, STACK_USER);
+ }
+ }
+
+ /* set perf-event callgraph */
+ if (param.enabled)
+ perf_evsel__config_callgraph(evsel, opts, &param);
+ }
}

/*
@@ -807,7 +860,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
* Apply event specific term settings,
* it overloads any global configuration.
*/
- apply_config_terms(evsel);
+ apply_config_terms(evsel, opts);
}

static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 6a12908..09a3022 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -40,6 +40,8 @@ struct cgroup_sel;
enum {
PERF_EVSEL__CONFIG_TERM_PERIOD,
PERF_EVSEL__CONFIG_TERM_TIME,
+ PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+ PERF_EVSEL__CONFIG_TERM_STACK_USER,
PERF_EVSEL__CONFIG_TERM_MAX,
};

@@ -49,6 +51,8 @@ struct perf_evsel_config_term {
union {
u64 period;
bool time;
+ char *callgraph;
+ u64 stack_user;
} val;
};

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index a6cb9af..716f1fc 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -611,6 +611,12 @@ do { \
return -EINVAL;
}
break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ CHECK_TYPE_VAL(NUM);
+ break;
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
@@ -662,6 +668,12 @@ do { \
case PARSE_EVENTS__TERM_TYPE_TIME:
ADD_CONFIG_TERM(TIME, time, term->val.num);
break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+ break;
default:
break;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index e6f9aacc..87dc9f6 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -64,6 +64,8 @@ enum {
PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
PARSE_EVENTS__TERM_TYPE_TIME,
+ PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+ PARSE_EVENTS__TERM_TYPE_STACKSIZE,
};

struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index f542750..1665497 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -184,6 +184,8 @@ name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index b615cdf..948f896 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -607,7 +607,8 @@ static char *formats_error_string(struct list_head *formats)
{
struct perf_pmu_format *format;
char *err, *str;
- static const char *static_terms = "config,config1,config2,name,period,branch_type,time\n";
+ static const char *static_terms = "config,config1,config2,name,period,"
+ "branch_type,time,call-graph,stack-size\n";
unsigned i = 0;

if (!asprintf(&str, "valid terms:"))
--
1.8.3.1

2015-07-29 16:57:46

by Liang, Kan

[permalink] [raw]
Subject: [PATCH RFC V7 7/7] perf,tests: Add tests to callgraph and time parse

From: Kan Liang <[email protected]>

Add tests in tests/parse-events.c to check call-graph and time option

Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/tests/parse-events.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index f65bb89..9b6b2b63 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -479,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
return 0;
}

+static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ /* cpu/config=1,call-graph=fp,time,period=100000/ */
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ /* cpu/config=2,call-graph=no,time=0,period=2000/ */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ return 0;
+}
+
static int test__checkevent_pmu_events(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1555,6 +1588,11 @@ static struct evlist_test test__events_pmu[] = {
.check = test__checkevent_pmu_name,
.id = 1,
},
+ {
+ .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
+ .check = test__checkevent_pmu_partial_time_callgraph,
+ .id = 2,
+ },
};

struct terms_test {
--
1.8.3.1

2015-07-29 19:20:48

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH RFC V7 0/7] per event callgraph and time support

Em Wed, Jul 29, 2015 at 05:42:09AM -0400, Kan Liang escreveu:
> This patchkit adds the ability to turn off callgraphs and time stamps
> per event. This in term can reduce sampling overhead and the size of
> the perf.data.

Applied the first three patches, will wait for Jiri comments on the
remaining.

- Arnaldo

> Changes since V1:
> - Break up V1 patches into three patches(parse option changes,
> partial time support and partial callgraph support).
> - Use strings 'fp,dwarf,lbr,no' to identify callchains
> - Add test case in parse-events.c
>
> Changes since V2:
> - Rebase on 60cd37eb10
>
> Changes since V3:
> - Replace OPT_CALLBACK_SET by current existing callback mechanism.
> - Using perf_evsel__set_sample_bit if possible
> - Change the expression "partial" to "per event"
> - Using global variable to indicate if 'time' is set per event.
> If 'time' is not set, enable it by default for perf record.
>
> Changes since V4:
> - Fix issue of setting callgraph_set
>
> Changes since V5:
> - per-event settings over global settings in general
> - support for event post configuration structure
>
> Changes since V6:
> - Add error infor for per-event time
> - rename the option to call-graph and stacl-size
> - refine parse/config callchain functions,
> and reuse them for per-event callgraph setting
> - Modify perf test
>
> Jiri Olsa (2):
> perf tools: Add support for event post configuration
> perf tools: Force period term to overload global settings
>
> Kan Liang (5):
> perf,tools: introduce callgraph_set for callgraph option
> perf,tools: per-event time support
> perf,tools: refine parse/config callchain functions
> perf,tools: per-event callgraph support
> perf,tests: Add tests to callgraph and time parse
>
> tools/perf/Documentation/perf-record.txt | 10 ++-
> tools/perf/builtin-record.c | 11 +++-
> tools/perf/perf.h | 1 +
> tools/perf/tests/parse-events.c | 50 ++++++++++++++-
> tools/perf/util/callchain.c | 14 ++---
> tools/perf/util/callchain.h | 2 +-
> tools/perf/util/evsel.c | 105 +++++++++++++++++++++++++++++--
> tools/perf/util/evsel.h | 26 ++++++++
> tools/perf/util/parse-events.c | 94 +++++++++++++++++++++++----
> tools/perf/util/parse-events.h | 3 +
> tools/perf/util/parse-events.l | 3 +
> tools/perf/util/pmu.c | 3 +-
> 12 files changed, 289 insertions(+), 33 deletions(-)
>
> --
> 1.8.3.1

2015-07-30 12:15:28

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH RFC V7 6/7] perf,tools: per-event callgraph support

On Wed, Jul 29, 2015 at 05:42:15AM -0400, Kan Liang wrote:

SNIP

> +
> + /* User explicitly set per-event callgraph, clear the old setting and reset. */
> + if ((callgraph_buf != NULL) || (dump_size > 0)) {
> +
> + /* parse callgraph parameters */
> + if (callgraph_buf != NULL) {
> + if (!strcmp(callgraph_buf, "no")) {
> + param.enabled = false;
> + param.record_mode = CALLCHAIN_NONE;
> + } else {
> + param.enabled = true;
> + if (parse_callchain_record_opt(callgraph_buf, &param)) {
> + pr_err("per-event callgraph setting for %s failed. "
> + "Apply callgraph global setting for it\n",
> + evsel->name);
> + return;
> + }
> + }
> + }
> + if (dump_size > 0)
> + param.dump_size = dump_size;
> +
> + /* If global callgraph set, clear it */
> + if (callchain_param.enabled) {
> + perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
> + if (callchain_param.record_mode == CALLCHAIN_LBR) {
> + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
> + attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
> + PERF_SAMPLE_BRANCH_CALL_STACK);
> + }
> + if (callchain_param.record_mode == CALLCHAIN_DWARF) {
> + perf_evsel__reset_sample_bit(evsel, REGS_USER);
> + perf_evsel__reset_sample_bit(evsel, STACK_USER);
> + }
> + }

please put code above into perf_evsel__reset_callgraph
ans place it close to perf_evsel__config_callgraph ;-)

other than that the change looks ok to me now

thanks,
jirka

Subject: [tip:perf/core] perf tools: Add support for event post configuration

Commit-ID: 930a2e29758f865e3a7b34b8b3b37c08d40f0254
Gitweb: http://git.kernel.org/tip/930a2e29758f865e3a7b34b8b3b37c08d40f0254
Author: Jiri Olsa <[email protected]>
AuthorDate: Wed, 29 Jul 2015 05:42:10 -0400
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 29 Jul 2015 16:15:57 -0300

perf tools: Add support for event post configuration

Add support to overload any global settings for event and force user
specified term value. It will be useful for new time and backtrace
terms.

Signed-off-by: Jiri Olsa <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Namhyung Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/evsel.c | 31 +++++++++++++++++++
tools/perf/util/evsel.h | 19 ++++++++++++
tools/perf/util/parse-events.c | 67 +++++++++++++++++++++++++++++++++++-------
3 files changed, 106 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 71f6905..048d61d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -207,6 +207,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
evsel->unit = "";
evsel->scale = 1.0;
INIT_LIST_HEAD(&evsel->node);
+ INIT_LIST_HEAD(&evsel->config_terms);
perf_evsel__object.init(evsel);
evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
perf_evsel__calc_id_pos(evsel);
@@ -586,6 +587,19 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
}
}

+static void apply_config_terms(struct perf_event_attr *attr __maybe_unused,
+ struct list_head *config_terms)
+{
+ struct perf_evsel_config_term *term;
+
+ list_for_each_entry(term, config_terms, list) {
+ switch (term->type) {
+ default:
+ break;
+ }
+ }
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
@@ -777,6 +791,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
attr->use_clockid = 1;
attr->clockid = opts->clockid;
}
+
+ /*
+ * Apply event specific term settings,
+ * it overloads any global configuration.
+ */
+ apply_config_terms(attr, &evsel->config_terms);
}

static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
@@ -900,6 +920,16 @@ static void perf_evsel__free_id(struct perf_evsel *evsel)
zfree(&evsel->id);
}

+static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
+{
+ struct perf_evsel_config_term *term, *h;
+
+ list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
+ list_del(&term->list);
+ free(term);
+ }
+}
+
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
int cpu, thread;
@@ -919,6 +949,7 @@ void perf_evsel__exit(struct perf_evsel *evsel)
assert(list_empty(&evsel->node));
perf_evsel__free_fd(evsel);
perf_evsel__free_id(evsel);
+ perf_evsel__free_config_terms(evsel);
close_cgroup(evsel->cgrp);
cpu_map__put(evsel->cpus);
thread_map__put(evsel->threads);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 1fc263a..0339819 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -31,6 +31,24 @@ struct perf_sample_id {

struct cgroup_sel;

+/*
+ * The 'struct perf_evsel_config_term' is used to pass event
+ * specific configuration data to perf_evsel__config routine.
+ * It is allocated within event parsing and attached to
+ * perf_evsel::config_terms list head.
+*/
+enum {
+ PERF_EVSEL__CONFIG_TERM_MAX,
+};
+
+struct perf_evsel_config_term {
+ struct list_head list;
+ int type;
+ union {
+ u64 period;
+ } val;
+};
+
/** struct perf_evsel - event selector
*
* @name - Can be set to retain the original event name passed by the user,
@@ -87,6 +105,7 @@ struct perf_evsel {
struct perf_evsel *leader;
char *group_name;
bool cmdline_group_boundary;
+ struct list_head config_terms;
};

union u64_swap {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4f807fc..3271d13 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -276,7 +276,8 @@ const char *event_type(int type)
static struct perf_evsel *
__add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr,
- char *name, struct cpu_map *cpus)
+ char *name, struct cpu_map *cpus,
+ struct list_head *config_terms)
{
struct perf_evsel *evsel;

@@ -291,14 +292,19 @@ __add_event(struct list_head *list, int *idx,

if (name)
evsel->name = strdup(name);
+
+ if (config_terms)
+ list_splice(config_terms, &evsel->config_terms);
+
list_add_tail(&evsel->node, list);
return evsel;
}

static int add_event(struct list_head *list, int *idx,
- struct perf_event_attr *attr, char *name)
+ struct perf_event_attr *attr, char *name,
+ struct list_head *config_terms)
{
- return __add_event(list, idx, attr, name, NULL) ? 0 : -ENOMEM;
+ return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM;
}

static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@@ -377,7 +383,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
memset(&attr, 0, sizeof(attr));
attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
attr.type = PERF_TYPE_HW_CACHE;
- return add_event(list, idx, &attr, name);
+ return add_event(list, idx, &attr, name, NULL);
}

static int add_tracepoint(struct list_head *list, int *idx,
@@ -539,7 +545,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
attr.type = PERF_TYPE_BREAKPOINT;
attr.sample_period = 1;

- return add_event(list, idx, &attr, NULL);
+ return add_event(list, idx, &attr, NULL, NULL);
}

static int check_type_val(struct parse_events_term *term,
@@ -622,22 +628,56 @@ static int config_attr(struct perf_event_attr *attr,
return 0;
}

+static int get_config_terms(struct list_head *head_config,
+ struct list_head *head_terms __maybe_unused)
+{
+#define ADD_CONFIG_TERM(__type, __name, __val) \
+do { \
+ struct perf_evsel_config_term *__t; \
+ \
+ __t = zalloc(sizeof(*__t)); \
+ if (!__t) \
+ return -ENOMEM; \
+ \
+ INIT_LIST_HEAD(&__t->list); \
+ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \
+ __t->val.__name = __val; \
+ list_add_tail(&__t->list, head_terms); \
+} while (0)
+
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_config, list) {
+ switch (term->type_term) {
+ default:
+ break;
+ }
+ }
+#undef ADD_EVSEL_CONFIG
+ return 0;
+}
+
int parse_events_add_numeric(struct parse_events_evlist *data,
struct list_head *list,
u32 type, u64 config,
struct list_head *head_config)
{
struct perf_event_attr attr;
+ LIST_HEAD(config_terms);

memset(&attr, 0, sizeof(attr));
attr.type = type;
attr.config = config;

- if (head_config &&
- config_attr(&attr, head_config, data->error))
- return -EINVAL;
+ if (head_config) {
+ if (config_attr(&attr, head_config, data->error))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ }

- return add_event(list, &data->idx, &attr, NULL);
+ return add_event(list, &data->idx, &attr, NULL, &config_terms);
}

static int parse_events__is_name_term(struct parse_events_term *term)
@@ -664,6 +704,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
struct perf_pmu_info info;
struct perf_pmu *pmu;
struct perf_evsel *evsel;
+ LIST_HEAD(config_terms);

pmu = perf_pmu__find(name);
if (!pmu)
@@ -678,7 +719,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,

if (!head_config) {
attr.type = pmu->type;
- evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus);
+ evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus, NULL);
return evsel ? 0 : -ENOMEM;
}

@@ -692,11 +733,15 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
if (config_attr(&attr, head_config, data->error))
return -EINVAL;

+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+
if (perf_pmu__config(pmu, &attr, head_config, data->error))
return -EINVAL;

evsel = __add_event(list, &data->idx, &attr,
- pmu_event_name(head_config), pmu->cpus);
+ pmu_event_name(head_config), pmu->cpus,
+ &config_terms);
if (evsel) {
evsel->unit = info.unit;
evsel->scale = info.scale;

Subject: [tip:perf/core] perf tools: Force period term to overload global settings

Commit-ID: ee4c75887d12bcd6ecd897291797d969256f39ca
Gitweb: http://git.kernel.org/tip/ee4c75887d12bcd6ecd897291797d969256f39ca
Author: Jiri Olsa <[email protected]>
AuthorDate: Wed, 29 Jul 2015 05:42:11 -0400
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 29 Jul 2015 16:18:21 -0300

perf tools: Force period term to overload global settings

Currently the command line option settings beats the per event period
settings:

With no global settings, we get per-event configuration:

$ perf record -e 'cpu/instructions,period=20000/' sleep 1
$ perf evlist -v
... { sample_period, sample_freq }: 20000 ...

With 'c' option period setup, we get 'c' option value:
$ perf record -e 'cpu/instructions,period=20000/' -c 1000 sleep 1
$ perf evlist -v
... { sample_period, sample_freq }: 1000 ...

This patch makes the per-event settings overload the global 'c' option
setup:

$ perf record -e 'cpu/instructions,period=20000/' -c 1000 sleep 1
$ perf evlist -v
... { sample_period, sample_freq }: 20000 ...

I think the making the per-event settings to overload any other config
makes more sense than current state. However it breaks the current
'period' term handling, which might cause some noise.. so let's see ;-).

Also fixing parse event tests with the new behaviour.

Signed-off-by: Jiri Olsa <[email protected]>
Tested-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Namhyung Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 2 +-
tools/perf/tests/parse-events.c | 12 ++++++++++--
tools/perf/util/evsel.c | 2 ++
tools/perf/util/evsel.h | 1 +
tools/perf/util/parse-events.c | 3 ++-
5 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 63ee040..ac41350 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -46,7 +46,7 @@ OPTIONS
/sys/bus/event_sources/devices/<pmu>/format/*

There are also some params which are not defined in .../<pmu>/format/*.
- These params can be used to set event defaults.
+ These params can be used to overload default config values per event.
Here is a list of the params.
- 'period': Set event sampling period

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index d76963f..f65bb89 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -82,8 +82,12 @@ static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ /*
+ * The period value gets configured within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
TEST_ASSERT_VAL("wrong period",
- 100000 == evsel->attr.sample_period);
+ 0 == evsel->attr.sample_period);
TEST_ASSERT_VAL("wrong config1",
0 == evsel->attr.config1);
TEST_ASSERT_VAL("wrong config2",
@@ -406,7 +410,11 @@ static int test__checkevent_pmu(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config);
TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1);
TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2);
- TEST_ASSERT_VAL("wrong period", 1000 == evsel->attr.sample_period);
+ /*
+ * The period value gets configured within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);

return 0;
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 048d61d..7d3acba 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -594,6 +594,8 @@ static void apply_config_terms(struct perf_event_attr *attr __maybe_unused,

list_for_each_entry(term, config_terms, list) {
switch (term->type) {
+ case PERF_EVSEL__CONFIG_TERM_PERIOD:
+ attr->sample_period = term->val.period;
default:
break;
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 0339819..a7d2175 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -38,6 +38,7 @@ struct cgroup_sel;
* perf_evsel::config_terms list head.
*/
enum {
+ PERF_EVSEL__CONFIG_TERM_PERIOD,
PERF_EVSEL__CONFIG_TERM_MAX,
};

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3271d13..09bee93 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -596,7 +596,6 @@ do { \
break;
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
CHECK_TYPE_VAL(NUM);
- attr->sample_period = term->val.num;
break;
case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
/*
@@ -649,6 +648,8 @@ do { \

list_for_each_entry(term, head_config, list) {
switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ ADD_CONFIG_TERM(PERIOD, period, term->val.num);
default:
break;
}

Subject: [tip:perf/core] perf tools: Introduce callgraph_set for callgraph option

Commit-ID: c421e80b1073e2ed58842ee6ee493386ace2d9eb
Gitweb: http://git.kernel.org/tip/c421e80b1073e2ed58842ee6ee493386ace2d9eb
Author: Kan Liang <[email protected]>
AuthorDate: Wed, 29 Jul 2015 05:42:12 -0400
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 29 Jul 2015 16:18:45 -0300

perf tools: Introduce callgraph_set for callgraph option

Introduce callgraph_set to indicate whether the callgraph option was set
by user.

Signed-off-by: Kan Liang <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-record.c | 9 +++++++--
tools/perf/perf.h | 1 +
2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 445a64d..f51131b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -762,12 +762,14 @@ static void callchain_debug(void)
callchain_param.dump_size);
}

-int record_parse_callchain_opt(const struct option *opt __maybe_unused,
+int record_parse_callchain_opt(const struct option *opt,
const char *arg,
int unset)
{
int ret;
+ struct record_opts *record = (struct record_opts *)opt->value;

+ record->callgraph_set = true;
callchain_param.enabled = !unset;

/* --no-call-graph */
@@ -784,10 +786,13 @@ int record_parse_callchain_opt(const struct option *opt __maybe_unused,
return ret;
}

-int record_callchain_opt(const struct option *opt __maybe_unused,
+int record_callchain_opt(const struct option *opt,
const char *arg __maybe_unused,
int unset __maybe_unused)
{
+ struct record_opts *record = (struct record_opts *)opt->value;
+
+ record->callgraph_set = true;
callchain_param.enabled = true;

if (callchain_param.record_mode == CALLCHAIN_NONE)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cf459f8..cccb4cf 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -52,6 +52,7 @@ struct record_opts {
bool sample_weight;
bool sample_time;
bool sample_time_set;
+ bool callgraph_set;
bool period;
bool sample_intr_regs;
bool running_time;