From: Robert Richter <[email protected]>
This patch set contains userland changes necessary for out-of-the-box
support of persistent events. These patches are follow on patches of
the kernel patches I sent out today:
[PATCH 00/16] perf, persistent: Kernel updates for perf tool integration
Persistent events are always enabled kernel events. Buffers are mapped
readonly and multiple users are allowed. The persistent event flag of
the event attribute must be set to specify such an event.
The following changes to perf tools are necessary to support
persistent events. A way is needed to specify sysfs entries to set
event flags. For this a new syntax 'attr<num>' was added to the event
parser, see patch #3. We also need to change perf tools to mmap
persistent event buffers readonly.
All patches can be found here:
git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile.git persistent
-Robert
Robert Richter (4):
perf tools: Rename flex conditions to avoid name conflicts
perf tools: Modify event parser to update event attribute by index
perf tools: Add attr<num> syntax to event parser
perf tools: Retry mapping buffers readonly on EACCES
tools/perf/builtin-record.c | 7 ++++-
tools/perf/builtin-top.c | 8 ++++--
tools/perf/perf.h | 1 +
tools/perf/tests/parse-events.c | 12 ++++++---
tools/perf/util/parse-events.c | 59 +++++++++++++++++++----------------------
tools/perf/util/parse-events.h | 12 ++++-----
tools/perf/util/parse-events.l | 56 +++++++++++++++++++++++---------------
tools/perf/util/parse-events.y | 24 ++++++++++-------
tools/perf/util/pmu.c | 32 +++++-----------------
tools/perf/util/pmu.h | 9 ++-----
tools/perf/util/pmu.l | 1 +
tools/perf/util/pmu.y | 18 ++++++++++---
12 files changed, 127 insertions(+), 112 deletions(-)
--
1.8.1.1
From: Robert Richter <[email protected]>
These define's may cause conflicts with other definitions:
#define INITIAL 0
#define mem 1
#define config 2
#define event 3
Prefix them with cond_* to avoid this.
Signed-off-by: Robert Richter <[email protected]>
---
tools/perf/util/parse-events.l | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index e9d1134..03ead35 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -69,9 +69,9 @@ static int term(yyscan_t scanner, int type)
%}
-%x mem
-%s config
-%x event
+%x cond_mem
+%s cond_config
+%x cond_event
group [^,{}/]*[{][^}]*[}][^,{}/]*
event_pmu [^,{}/]+[/][^/]*[/][^,{}/]*
@@ -94,9 +94,9 @@ modifier_bp [rwx]{1,3}
start_token = parse_events_get_extra(yyscanner);
if (start_token == PE_START_TERMS)
- BEGIN(config);
+ BEGIN(cond_config);
else if (start_token == PE_START_EVENTS)
- BEGIN(event);
+ BEGIN(cond_event);
if (start_token) {
parse_events_set_extra(NULL, yyscanner);
@@ -105,7 +105,7 @@ modifier_bp [rwx]{1,3}
}
%}
-<event>{
+<cond_event>{
{group} {
BEGIN(INITIAL); yyless(0);
@@ -160,7 +160,7 @@ speculative-read|speculative-load |
refs|Reference|ops|access |
misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }
-<config>{
+<cond_config>{
config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
@@ -172,23 +172,23 @@ branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE
{name_minus} { return str(yyscanner, PE_NAME); }
}
-mem: { BEGIN(mem); return PE_PREFIX_MEM; }
+mem: { BEGIN(cond_mem); return PE_PREFIX_MEM; }
r{num_raw_hex} { return raw(yyscanner); }
{num_dec} { return value(yyscanner, 10); }
{num_hex} { return value(yyscanner, 16); }
{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); }
{name} { return str(yyscanner, PE_NAME); }
-"/" { BEGIN(config); return '/'; }
+"/" { BEGIN(cond_config); return '/'; }
- { return '-'; }
-, { BEGIN(event); return ','; }
+, { BEGIN(cond_event); return ','; }
: { return ':'; }
-"{" { BEGIN(event); return '{'; }
+"{" { BEGIN(cond_event); return '{'; }
"}" { return '}'; }
= { return '='; }
\n { }
-<mem>{
+<cond_mem>{
{modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); }
: { return ':'; }
{num_dec} { return value(yyscanner, 10); }
--
1.8.1.1
From: Robert Richter <[email protected]>
In a later patch we want to introduce a syntax that allows updating
attribute fields by an index pointing to a certain u64 entry of struct
perf_event_attr. We need this to expose any event via sysfs that is
available in the system where especially flag fields need to be set.
Reworking the event parser to use an attribute index. This is done by
introducing type PARSE_EVENTS__TERM_TYPE_ATTR for all numeric values
to be added to the event attribute fields. We use an index to specify
the corresponding u64 attr value.
Signed-off-by: Robert Richter <[email protected]>
---
tools/perf/tests/parse-events.c | 12 ++++++---
tools/perf/util/parse-events.c | 59 +++++++++++++++++++----------------------
tools/perf/util/parse-events.h | 12 ++++-----
tools/perf/util/parse-events.l | 18 ++++++-------
tools/perf/util/parse-events.y | 24 ++++++++++-------
5 files changed, 65 insertions(+), 60 deletions(-)
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 0275bab..916a41a 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -471,7 +471,9 @@ static int test__checkterms_simple(struct list_head *terms)
/* config=10 */
term = list_entry(terms->next, struct parse_events_term, list);
TEST_ASSERT_VAL("wrong type term",
- term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
+ term->type_term == PARSE_EVENTS__TERM_TYPE_ATTR);
+ TEST_ASSERT_VAL("wrong type idx",
+ term->idx == 1);
TEST_ASSERT_VAL("wrong type val",
term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
TEST_ASSERT_VAL("wrong val", term->val.num == 10);
@@ -480,7 +482,9 @@ static int test__checkterms_simple(struct list_head *terms)
/* config1 */
term = list_entry(term->list.next, struct parse_events_term, list);
TEST_ASSERT_VAL("wrong type term",
- term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1);
+ term->type_term == PARSE_EVENTS__TERM_TYPE_ATTR);
+ TEST_ASSERT_VAL("wrong type idx",
+ term->idx == 7);
TEST_ASSERT_VAL("wrong type val",
term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
TEST_ASSERT_VAL("wrong val", term->val.num == 1);
@@ -489,7 +493,9 @@ static int test__checkterms_simple(struct list_head *terms)
/* config2=3 */
term = list_entry(term->list.next, struct parse_events_term, list);
TEST_ASSERT_VAL("wrong type term",
- term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2);
+ term->type_term == PARSE_EVENTS__TERM_TYPE_ATTR);
+ TEST_ASSERT_VAL("wrong type idx",
+ term->idx == 8);
TEST_ASSERT_VAL("wrong type val",
term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
TEST_ASSERT_VAL("wrong val", term->val.num == 3);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6c8bb0f..3ba1450 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -533,6 +533,19 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx,
return add_event(list, idx, &attr, NULL);
}
+int parse_events__set_attr(struct perf_event_attr *__attr, u64 idx, u64 val)
+{
+ __u64 *attr = (__u64 *)__attr;
+
+ if (idx * sizeof(*attr) >= sizeof(*__attr))
+ return -EINVAL;
+
+ attr += idx;
+ *attr |= val;
+
+ return 0;
+}
+
static int config_term(struct perf_event_attr *attr,
struct parse_events_term *term)
{
@@ -543,36 +556,17 @@ do { \
} while (0)
switch (term->type_term) {
- case PARSE_EVENTS__TERM_TYPE_CONFIG:
+ case PARSE_EVENTS__TERM_TYPE_ATTR:
CHECK_TYPE_VAL(NUM);
- attr->config = term->val.num;
- break;
- case PARSE_EVENTS__TERM_TYPE_CONFIG1:
- CHECK_TYPE_VAL(NUM);
- attr->config1 = term->val.num;
- break;
- case PARSE_EVENTS__TERM_TYPE_CONFIG2:
- CHECK_TYPE_VAL(NUM);
- attr->config2 = term->val.num;
- break;
- case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
- CHECK_TYPE_VAL(NUM);
- attr->sample_period = term->val.num;
- break;
- case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
- /*
- * TODO uncomment when the field is available
- * attr->branch_sample_type = term->val.num;
- */
- break;
+ return parse_events__set_attr(attr, term->idx, term->val.num);
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
- break;
+ return 0;
default:
- return -EINVAL;
+ break;
}
- return 0;
+ return -EINVAL;
#undef CHECK_TYPE_VAL
}
@@ -1162,7 +1156,7 @@ int parse_events__is_hardcoded_term(struct parse_events_term *term)
static int new_term(struct parse_events_term **_term, int type_val,
int type_term, char *config,
- char *str, u64 num)
+ char *str, u64 num, u64 idx)
{
struct parse_events_term *term;
@@ -1173,7 +1167,8 @@ static int new_term(struct parse_events_term **_term, int type_val,
INIT_LIST_HEAD(&term->list);
term->type_val = type_val;
term->type_term = type_term;
- term->config = config;
+ term->config = config;
+ term->idx = idx;
switch (type_val) {
case PARSE_EVENTS__TERM_TYPE_NUM:
@@ -1191,17 +1186,17 @@ static int new_term(struct parse_events_term **_term, int type_val,
}
int parse_events_term__num(struct parse_events_term **term,
- int type_term, char *config, u64 num)
+ int type_term, char *config, u64 num, u64 idx)
{
return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
- config, NULL, num);
+ config, NULL, num, idx);
}
int parse_events_term__str(struct parse_events_term **term,
int type_term, char *config, char *str)
{
return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
- config, str, 0);
+ config, str, 0, 0);
}
int parse_events_term__sym_hw(struct parse_events_term **term,
@@ -1215,18 +1210,18 @@ int parse_events_term__sym_hw(struct parse_events_term **term,
if (config)
return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
PARSE_EVENTS__TERM_TYPE_USER, config,
- (char *) sym->symbol, 0);
+ (char *) sym->symbol, 0, 0);
else
return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
PARSE_EVENTS__TERM_TYPE_USER,
- (char *) "event", (char *) sym->symbol, 0);
+ (char *) "event", (char *) sym->symbol, 0, 0);
}
int parse_events_term__clone(struct parse_events_term **new,
struct parse_events_term *term)
{
return new_term(new, term->type_val, term->type_term, term->config,
- term->val.str, term->val.num);
+ term->val.str, term->val.num, term->idx);
}
void parse_events__free_terms(struct list_head *terms)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 8a48593..8bd5708 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -34,6 +34,8 @@ extern int parse_events_terms(struct list_head *terms, const char *str);
extern int parse_filter(const struct option *opt, const char *str, int unset);
#define EVENTS_HELP_MAX (128*1024)
+#define PERF_ATTR_IDX(MEMBER) \
+ (offsetof(struct perf_event_attr, MEMBER) / sizeof(__u64))
enum {
PARSE_EVENTS__TERM_TYPE_NUM,
@@ -42,12 +44,8 @@ enum {
enum {
PARSE_EVENTS__TERM_TYPE_USER,
- PARSE_EVENTS__TERM_TYPE_CONFIG,
- PARSE_EVENTS__TERM_TYPE_CONFIG1,
- PARSE_EVENTS__TERM_TYPE_CONFIG2,
+ PARSE_EVENTS__TERM_TYPE_ATTR,
PARSE_EVENTS__TERM_TYPE_NAME,
- PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
- PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
};
struct parse_events_term {
@@ -58,6 +56,7 @@ struct parse_events_term {
} val;
int type_val;
int type_term;
+ u64 idx;
struct list_head list;
};
@@ -73,7 +72,7 @@ struct parse_events_terms {
int parse_events__is_hardcoded_term(struct parse_events_term *term);
int parse_events_term__num(struct parse_events_term **_term,
- int type_term, char *config, u64 num);
+ int type_term, char *config, u64 num, u64 idx);
int parse_events_term__str(struct parse_events_term **_term,
int type_term, char *config, char *str);
int parse_events_term__sym_hw(struct parse_events_term **term,
@@ -96,6 +95,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx,
int parse_events_add_pmu(struct list_head **list, int *idx,
char *pmu , struct list_head *head_config);
void parse_events__set_leader(char *name, struct list_head *list);
+int parse_events__set_attr(struct perf_event_attr *__attr, u64 idx, u64 val);
void parse_events_update_lists(struct list_head *list_event,
struct list_head *list_all);
void parse_events_error(void *data, void *scanner, char const *msg);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 03ead35..f9397cc 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -59,12 +59,12 @@ static int sym(yyscan_t scanner, int type, int config)
return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;
}
-static int term(yyscan_t scanner, int type)
+static int attr(yyscan_t scanner, u64 idx)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
- yylval->num = type;
- return PE_TERM;
+ yylval->num = idx;
+ return PE_TERM_ATTR;
}
%}
@@ -161,12 +161,12 @@ refs|Reference|ops|access |
misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }
<cond_config>{
-config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
-config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
-config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
-name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
-period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
-branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+config { return attr(yyscanner, PERF_ATTR_IDX(config)); }
+config1 { return attr(yyscanner, PERF_ATTR_IDX(config1)); }
+config2 { return attr(yyscanner, PERF_ATTR_IDX(config2)); }
+period { return attr(yyscanner, PERF_ATTR_IDX(sample_period)); }
+branch_type { return attr(yyscanner, PERF_ATTR_IDX(branch_sample_type)); }
+name { return PE_TERM_NAME; }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index afc44c1..c4a119b 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -33,7 +33,8 @@ static inc_group_count(struct list_head *list,
%}
%token PE_START_EVENTS PE_START_TERMS
-%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM
+%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW
+%token PE_TERM_NAME PE_TERM_ATTR
%token PE_EVENT_NAME
%token PE_NAME
%token PE_MODIFIER_EVENT PE_MODIFIER_BP
@@ -44,7 +45,7 @@ static inc_group_count(struct list_head *list,
%type <num> PE_VALUE_SYM_HW
%type <num> PE_VALUE_SYM_SW
%type <num> PE_RAW
-%type <num> PE_TERM
+%type <num> PE_TERM_ATTR
%type <str> PE_NAME
%type <str> PE_NAME_CACHE_TYPE
%type <str> PE_NAME_CACHE_OP_RESULT
@@ -357,7 +358,7 @@ PE_NAME '=' PE_VALUE
struct parse_events_term *term;
ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, $3));
+ $1, $3, 0));
$$ = term;
}
|
@@ -375,7 +376,7 @@ PE_NAME
struct parse_events_term *term;
ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, 1));
+ $1, 1, 0));
$$ = term;
}
|
@@ -388,27 +389,30 @@ PE_VALUE_SYM_HW
$$ = term;
}
|
-PE_TERM '=' PE_NAME
+PE_TERM_NAME '=' PE_NAME
{
struct parse_events_term *term;
- ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3));
+ ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_NAME,
+ NULL, $3));
$$ = term;
}
|
-PE_TERM '=' PE_VALUE
+PE_TERM_ATTR '=' PE_VALUE
{
struct parse_events_term *term;
- ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3));
+ ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_ATTR,
+ NULL, $3, $1));
$$ = term;
}
|
-PE_TERM
+PE_TERM_ATTR
{
struct parse_events_term *term;
- ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1));
+ ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_ATTR,
+ NULL, 1, $1));
$$ = term;
}
--
1.8.1.1
From: Robert Richter <[email protected]>
Persistent event buffers may only be mmapped readonly. Thus, retry
mapping it readonly if mmap returns EACCES after trying to mmap
writable.
Signed-off-by: Robert Richter <[email protected]>
---
tools/perf/builtin-record.c | 7 ++++++-
tools/perf/builtin-top.c | 8 ++++++--
tools/perf/perf.h | 1 +
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index cdf58ec..916776d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -255,7 +255,12 @@ try_again:
goto out;
}
- if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
+try_again2:
+ if (perf_evlist__mmap(evlist, opts->mmap_pages, opts->mmap_ro) < 0) {
+ if (!opts->mmap_ro && errno == EACCES) {
+ opts->mmap_ro = true;
+ goto try_again2;
+ }
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 67bdb9f..2a5757d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -899,8 +899,12 @@ try_again:
goto out_err;
}
}
-
- if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
+try_again2:
+ if (perf_evlist__mmap(evlist, opts->mmap_pages, opts->mmap_ro) < 0) {
+ if (!opts->mmap_ro && errno == EACCES) {
+ opts->mmap_ro = true;
+ goto try_again2;
+ }
ui__error("Failed to mmap with %d (%s)\n",
errno, strerror(errno));
goto out_err;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 32bd102..41acea3 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -221,6 +221,7 @@ struct perf_record_opts {
bool sample_weight;
bool sample_time;
bool period;
+ bool mmap_ro;
unsigned int freq;
unsigned int mmap_pages;
unsigned int user_freq;
--
1.8.1.1
From: Robert Richter <[email protected]>
The event parser is limited to update only a subset of all fields in
struct perf_event_attr (config*, period, branch_type). We are not able
to set other attr fields, esp. flags.
Introducing a new syntax to set any field of the event attribute by
using an index to the u64 value to be used within struct
perf_event_attr. The new syntax attr<num> is similar to config<num>,
but <num> specifies the index to be used. E.g. attr5:23 sets bit 23 of
the flag field of attr.
The persistent event implementation is a use case of the above. In
this case sysfs provides:
/sys/bus/event_source/devices/persistent/events/mce_record:persistent,config=106
/sys/bus/event_source/devices/persistent/format/persistent:attr5:23
Persistent events are exposed via sysfs and need to set the persistent
flag (bit 23 of the flag field). With the sysfs entry above we are
able to define the persistent flag format and then may setup a
mce_record event with that flag set.
In general we are now flexible to describe with sysfs any event to be
setup by perf tools.
Signed-off-by: Robert Richter <[email protected]>
---
tools/perf/util/parse-events.l | 14 ++++++++++++++
tools/perf/util/pmu.c | 32 ++++++--------------------------
tools/perf/util/pmu.h | 9 ++-------
tools/perf/util/pmu.l | 1 +
tools/perf/util/pmu.y | 18 ++++++++++++++----
5 files changed, 37 insertions(+), 37 deletions(-)
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index f9397cc..b71356e 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -67,6 +67,19 @@ static int attr(yyscan_t scanner, u64 idx)
return PE_TERM_ATTR;
}
+static int attr_parse(yyscan_t scanner)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ errno = 0;
+ yylval->num = strtoull(text + 4, NULL, 10);
+ if (errno)
+ return PE_ERROR;
+
+ return PE_TERM_ATTR;
+}
+
%}
%x cond_mem
@@ -161,6 +174,7 @@ refs|Reference|ops|access |
misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }
<cond_config>{
+attr[0-9]* { return attr_parse(yyscanner); }
config { return attr(yyscanner, PERF_ATTR_IDX(config)); }
config1 { return attr(yyscanner, PERF_ATTR_IDX(config1)); }
config2 { return attr(yyscanner, PERF_ATTR_IDX(config2)); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 4c6f9c4..b2eb9fe 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -17,8 +17,8 @@ struct perf_pmu_alias {
};
struct perf_pmu_format {
- char *name;
- int value;
+ char *name;
+ u64 idx;
DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
struct list_head list;
};
@@ -418,7 +418,6 @@ static int pmu_config_term(struct list_head *formats,
struct parse_events_term *term)
{
struct perf_pmu_format *format;
- __u64 *vp;
/*
* Support only for hardcoded and numnerial terms.
@@ -435,27 +434,8 @@ static int pmu_config_term(struct list_head *formats,
if (!format)
return -EINVAL;
- switch (format->value) {
- case PERF_PMU_FORMAT_VALUE_CONFIG:
- vp = &attr->config;
- break;
- case PERF_PMU_FORMAT_VALUE_CONFIG1:
- vp = &attr->config1;
- break;
- case PERF_PMU_FORMAT_VALUE_CONFIG2:
- vp = &attr->config2;
- break;
- default:
- return -EINVAL;
- }
-
- /*
- * XXX If we ever decide to go with string values for
- * non-hardcoded terms, here's the place to translate
- * them into value.
- */
- *vp |= pmu_format_value(format->bits, term->val.num);
- return 0;
+ return parse_events__set_attr(attr, format->idx,
+ pmu_format_value(format->bits, term->val.num));
}
int perf_pmu__config_terms(struct list_head *formats,
@@ -537,7 +517,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms)
}
int perf_pmu__new_format(struct list_head *list, char *name,
- int config, unsigned long *bits)
+ __u64 idx, unsigned long *bits)
{
struct perf_pmu_format *format;
@@ -546,7 +526,7 @@ int perf_pmu__new_format(struct list_head *list, char *name,
return -ENOMEM;
format->name = strdup(name);
- format->value = config;
+ format->idx = idx;
memcpy(format->bits, bits, sizeof(format->bits));
list_add_tail(&format->list, list);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 32fe55b..9c4fac1 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -3,12 +3,7 @@
#include <linux/bitops.h>
#include <linux/perf_event.h>
-
-enum {
- PERF_PMU_FORMAT_VALUE_CONFIG,
- PERF_PMU_FORMAT_VALUE_CONFIG1,
- PERF_PMU_FORMAT_VALUE_CONFIG2,
-};
+#include "parse-events.h"
#define PERF_PMU_FORMAT_BITS 64
@@ -34,7 +29,7 @@ int perf_pmu_wrap(void);
void perf_pmu_error(struct list_head *list, char *name, char const *msg);
int perf_pmu__new_format(struct list_head *list, char *name,
- int config, unsigned long *bits);
+ __u64 idx, unsigned long *bits);
void perf_pmu__set_format(unsigned long *bits, long from, long to);
int perf_pmu__format_parse(char *dir, struct list_head *head);
diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l
index a15d9fb..9d5aa62 100644
--- a/tools/perf/util/pmu.l
+++ b/tools/perf/util/pmu.l
@@ -26,6 +26,7 @@ num_dec [0-9]+
%%
{num_dec} { return value(10); }
+attr { return PP_ATTR; }
config { return PP_CONFIG; }
config1 { return PP_CONFIG1; }
config2 { return PP_CONFIG2; }
diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y
index bfd7e85..fb86df8 100644
--- a/tools/perf/util/pmu.y
+++ b/tools/perf/util/pmu.y
@@ -20,7 +20,7 @@ do { \
%}
-%token PP_CONFIG PP_CONFIG1 PP_CONFIG2
+%token PP_ATTR PP_CONFIG PP_CONFIG1 PP_CONFIG2
%token PP_VALUE PP_ERROR
%type <num> PP_VALUE
%type <bits> bit_term
@@ -40,24 +40,34 @@ format format_term
format_term
format_term:
+PP_ATTR ':' bits
+{
+ ABORT_ON(perf_pmu__new_format(format, name, 0, $3));
+}
+|
+PP_ATTR PP_VALUE ':' bits
+{
+ ABORT_ON(perf_pmu__new_format(format, name, $2, $4));
+}
+|
PP_CONFIG ':' bits
{
ABORT_ON(perf_pmu__new_format(format, name,
- PERF_PMU_FORMAT_VALUE_CONFIG,
+ PERF_ATTR_IDX(config),
$3));
}
|
PP_CONFIG1 ':' bits
{
ABORT_ON(perf_pmu__new_format(format, name,
- PERF_PMU_FORMAT_VALUE_CONFIG1,
+ PERF_ATTR_IDX(config1),
$3));
}
|
PP_CONFIG2 ':' bits
{
ABORT_ON(perf_pmu__new_format(format, name,
- PERF_PMU_FORMAT_VALUE_CONFIG2,
+ PERF_ATTR_IDX(config2),
$3));
}
--
1.8.1.1
* Robert Richter <[email protected]> wrote:
> From: Robert Richter <[email protected]>
>
> This patch set contains userland changes necessary for out-of-the-box
> support of persistent events. These patches are follow on patches of
> the kernel patches I sent out today:
>
> [PATCH 00/16] perf, persistent: Kernel updates for perf tool integration
>
> Persistent events are always enabled kernel events. Buffers are mapped
> readonly and multiple users are allowed. The persistent event flag of
> the event attribute must be set to specify such an event.
>
> The following changes to perf tools are necessary to support
> persistent events. A way is needed to specify sysfs entries to set
> event flags. For this a new syntax 'attr<num>' was added to the event
> parser, see patch #3. We also need to change perf tools to mmap
> persistent event buffers readonly.
Nice progress - one fundamental thing I'm missing from this series is
actual everyday utility: it would be nice if it was easy to just create a
persistent event of any sort and then use it.
For example I might want to instrument a global aspect of the system:
fork()s performed (-e sched:sched_process_fork).
For that I'd like to create a persistent event that just keeps running,
and to which I can occasionally attach to read-only to see what's going on
and maybe attach to it read-write to drain the trace entries. I.e.
basically a global trace buffer. How do I achieve that with this new
tooling?
Thanks,
Ingo
On Fri, May 31, 2013 at 02:07:20PM +0200, Ingo Molnar wrote:
> For that I'd like to create a persistent event that just keeps
> running, and to which I can occasionally attach to read-only to see
> what's going on and maybe attach to it read-write to drain the trace
> entries. I.e. basically a global trace buffer. How do I achieve that
> with this new tooling?
I have a patch which adds persistent_events= kernel command line param
and uses the same syntax as trace_event=. Rostedt has seen it already, I
need to incorporate his comments and send it out next week.
Thanks.
* Borislav Petkov <[email protected]> wrote:
> On Fri, May 31, 2013 at 02:07:20PM +0200, Ingo Molnar wrote:
> > For that I'd like to create a persistent event that just keeps
> > running, and to which I can occasionally attach to read-only to see
> > what's going on and maybe attach to it read-write to drain the trace
> > entries. I.e. basically a global trace buffer. How do I achieve that
> > with this new tooling?
>
> I have a patch which adds persistent_events= kernel command line param
> and uses the same syntax as trace_event=. Rostedt has seen it already, I
> need to incorporate his comments and send it out next week.
A kernel command line does not make it very practical - this needs to be
accessible from tooling ...
Thanks,
Ingo
> > On Fri, May 31, 2013 at 02:07:20PM +0200, Ingo Molnar wrote:
> > > For that I'd like to create a persistent event that just keeps
> > > running, and to which I can occasionally attach to read-only to see
> > > what's going on and maybe attach to it read-write to drain the trace
> > > entries. I.e. basically a global trace buffer. How do I achieve that
> > > with this new tooling?
Actually every system-wide event that is opened with readonly buffers
could be shared between processes, which would be the same as
connecting to a persistent event. I didn't want to implement this in a
first step, but I think this could be implemented without too much
effort.
The main problem is that at least one file descriptor needs to be open
at any time. Otherwise the event would be removed. So there is no
concept (except enabling an in-kernel event) yet to keep the event
running without any process having an event file discriptor open. Even
harder will it be to release such an event, there is no distincition
beetween detaching the event from the process or permanently removing
the event.
So the easiest would be to just open a system-wide event and then put
the process into sleep until we want to remove the event. In between
other processes (maybe duplicate on fork?) could attach to the same
buffer. Only if no process is attached to the event enymore the event
will be removed. Would that fit your purpose?
-Robert
On Fri, May 31, 2013 at 02:48:05PM +0200, Ingo Molnar wrote:
> A kernel command line does not make it very practical - this needs to
> be accessible from tooling ...
It will be accessible from tooling since the event is always enabled and
with the persistent pmu you spacify that you want to consume already
created buffers, i.e. the persistent event.
However, you need to be able to enable such event as early as possible
for tasks like boot tracing. I don't care if it is a kernel command line
param or something else - in the end of the day we need a mechanism
to say "enable this event the earliest moment possible, after you've
enabled the perf subsystem."
Hmm.
On Fri, May 31, 2013 at 11:16:24AM +0200, Robert Richter wrote:
> From: Robert Richter <[email protected]>
>
> The event parser is limited to update only a subset of all fields in
> struct perf_event_attr (config*, period, branch_type). We are not able
> to set other attr fields, esp. flags.
>
> Introducing a new syntax to set any field of the event attribute by
> using an index to the u64 value to be used within struct
> perf_event_attr. The new syntax attr<num> is similar to config<num>,
> but <num> specifies the index to be used. E.g. attr5:23 sets bit 23 of
> the flag field of attr.
>
> The persistent event implementation is a use case of the above. In
> this case sysfs provides:
>
> /sys/bus/event_source/devices/persistent/events/mce_record:persistent,config=106
> /sys/bus/event_source/devices/persistent/format/persistent:attr5:23
good idea, you probably need to update:
Documentation/ABI/testing/sysfs-bus-event_source-devices-format
also.. there's so far only mce_record event AFAICS, and this seems
to be initialized at the time when sysfs's not ready so I dont get
the sysfs entries for it.. and since there's no other event yet,
the sysfs is not updated/populated later.. I think ;)
I'll probably tweak it somehow later, but if there was anything
simple I could do or I missed something please let me know, that
would speed up my testing
thanks,
jirka
On 03.06.13 15:54:22, Jiri Olsa wrote:
> On Fri, May 31, 2013 at 11:16:24AM +0200, Robert Richter wrote:
> > From: Robert Richter <[email protected]>
> >
> > The event parser is limited to update only a subset of all fields in
> > struct perf_event_attr (config*, period, branch_type). We are not able
> > to set other attr fields, esp. flags.
> >
> > Introducing a new syntax to set any field of the event attribute by
> > using an index to the u64 value to be used within struct
> > perf_event_attr. The new syntax attr<num> is similar to config<num>,
> > but <num> specifies the index to be used. E.g. attr5:23 sets bit 23 of
> > the flag field of attr.
> >
> > The persistent event implementation is a use case of the above. In
> > this case sysfs provides:
> >
> > /sys/bus/event_source/devices/persistent/events/mce_record:persistent,config=106
> > /sys/bus/event_source/devices/persistent/format/persistent:attr5:23
>
> good idea, you probably need to update:
> Documentation/ABI/testing/sysfs-bus-event_source-devices-format
I will add something there.
> also.. there's so far only mce_record event AFAICS, and this seems
> to be initialized at the time when sysfs's not ready so I dont get
> the sysfs entries for it.. and since there's no other event yet,
> the sysfs is not updated/populated later.. I think ;)
The code adds entries dynamically. If something was added to the
persistent events list, the sysfs entry is updated too. You should
actually should see something in sysfs. Code that registers it is here
for Intel or AMD:
arch/x86/kernel/cpu/mcheck/mce.c:mcheck_init_tp()
> I'll probably tweak it somehow later, but if there was anything
> simple I could do or I missed something please let me know, that
> would speed up my testing
CONFIG_FTRACE should be enabled (which enables tracepoints), but this
is probably enabled per default. Otherwise the persistent pmu should
be visible in sysfs. What does dmesg show on your system?
-Robert
Hi Robert,
On Fri, 31 May 2013 11:16:25 +0200, Robert Richter wrote:
> From: Robert Richter <[email protected]>
>
> Persistent event buffers may only be mmapped readonly. Thus, retry
> mapping it readonly if mmap returns EACCES after trying to mmap
> writable.
>
> Signed-off-by: Robert Richter <[email protected]>
> ---
> tools/perf/builtin-record.c | 7 ++++++-
> tools/perf/builtin-top.c | 8 ++++++--
> tools/perf/perf.h | 1 +
> 3 files changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index cdf58ec..916776d 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -255,7 +255,12 @@ try_again:
> goto out;
> }
>
> - if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
> +try_again2:
> + if (perf_evlist__mmap(evlist, opts->mmap_pages, opts->mmap_ro) < 0) {
> + if (!opts->mmap_ro && errno == EACCES) {
> + opts->mmap_ro = true;
> + goto try_again2;
> + }
> if (errno == EPERM) {
> pr_err("Permission error mapping pages.\n"
> "Consider increasing "
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 67bdb9f..2a5757d 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -899,8 +899,12 @@ try_again:
> goto out_err;
> }
> }
> -
> - if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
> +try_again2:
> + if (perf_evlist__mmap(evlist, opts->mmap_pages, opts->mmap_ro) < 0) {
> + if (!opts->mmap_ro && errno == EACCES) {
> + opts->mmap_ro = true;
> + goto try_again2;
> + }
> ui__error("Failed to mmap with %d (%s)\n",
> errno, strerror(errno));
> goto out_err;
You will need this also:
@@ -161,7 +161,8 @@ static int perf_record__mmap_read(struct perf_record *rec,
}
md->prev = old;
- perf_mmap__write_tail(md, old);
+ if (!rec->opts.mmap_ro)
+ perf_mmap__write_tail(md, old);
out:
return rc;
--
Thanks,
Namhyung
On 14.06.13 11:08:40, Namhyung Kim wrote:
> > - if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
> > +try_again2:
> > + if (perf_evlist__mmap(evlist, opts->mmap_pages, opts->mmap_ro) < 0) {
> > + if (!opts->mmap_ro && errno == EACCES) {
> > + opts->mmap_ro = true;
> > + goto try_again2;
> > + }
> > ui__error("Failed to mmap with %d (%s)\n",
> > errno, strerror(errno));
> > goto out_err;
>
>
> You will need this also:
>
>
> @@ -161,7 +161,8 @@ static int perf_record__mmap_read(struct perf_record *rec,
> }
>
> md->prev = old;
> - perf_mmap__write_tail(md, old);
> + if (!rec->opts.mmap_ro)
> + perf_mmap__write_tail(md, old);
>
> out:
> return rc;
Yes, indeed. Will add your change
-Robert