2014-07-15 00:31:49

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v2 0/5] perf: add ability to sample interrupted machine state

This short patch series add the ability to sample the interrupted
machine state for each hardware sample. This is useful to analyze
the state after certain events, for instance for function value
profiling after a call instruction.

The patch extends the interface with a new PERF_SAMPLE_REGS_INTR
smaple_type flag. The register to sample can be named in the
sample_regs_intr bitmask for each event. The name and bit
position for each register is architecture dependent and
provided, just like for PERF_SAMPLE_REGS_USER by asm/perf_regs.h.

The support is similar to PERF_SAMPLE_REGS_USER.

On Intel x86, the series includes support for capturing the
PEBS state as well. When precise sampling is used, the interrupted
state is collect from the PEBS records, at least partially.
The PEBS machine state is a subset of the machine state.

The series provides access to this new feature in perf record
with the -I option. It is possible to display the sampled
register values using perf report -D.

This patch series is the fundation for a future series adding
function value profiling.

In V2, we address the issues raised during reviews:
- add sample parsing test
- shorten perf record option to --intr-regs
- added man page for perf record -I/--intr-regs option
- refactor register printf code between user and intr regs
- rebase to v3.16-rc3

Stephane Eranian (5):
perf: add ability to sample machine state on interrupt
perf/x86: add support for sampling PEBS machine state registers
perf tools: add core support for sampling intr machine state regs
perf/tests: add interrupted state sample parsing test
perf record: add new -I option to sample interrupted machine state

arch/x86/kernel/cpu/perf_event_intel_ds.c | 19 ++++++++++
include/linux/perf_event.h | 7 ++--
include/uapi/linux/perf_event.h | 14 +++++++-
kernel/events/core.c | 44 +++++++++++++++++++++--
tools/perf/Documentation/perf-record.txt | 6 ++++
tools/perf/builtin-record.c | 2 ++
tools/perf/perf.h | 1 +
tools/perf/tests/sample-parsing.c | 55 +++++++++++++++++++++--------
tools/perf/util/event.h | 1 +
tools/perf/util/evsel.c | 46 +++++++++++++++++++++++-
tools/perf/util/session.c | 44 ++++++++++++++++++++---
11 files changed, 213 insertions(+), 26 deletions(-)

--
1.7.9.5


2014-07-15 00:32:03

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v2 1/5] perf: add ability to sample machine state on interrupt

Enable capture of interrupted machine state for each
sample.

Registers to sample are passed per event in the
sample_regs_intr bitmask.

To sample interrupt machine state, the
PERF_SAMPLE_INTR_REGS must be passed in
sample_type.

The list of available registers is arch
dependent and provided by asm/perf_regs.h

Registers are laid out as u64 in the order
of the bits in sample_regs_intr.

Reviewed-by: Andi Kleen <[email protected]>
Reviewed-by: Jiri Olsa <[email protected]>
Signed-off-by: Stephane Eranian <[email protected]>
---
include/linux/perf_event.h | 7 +++++--
include/uapi/linux/perf_event.h | 14 ++++++++++++-
kernel/events/core.c | 44 +++++++++++++++++++++++++++++++++++++--
3 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 707617a..4970c1d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -78,7 +78,7 @@ struct perf_branch_stack {
struct perf_branch_entry entries[0];
};

-struct perf_regs_user {
+struct perf_regs {
__u64 abi;
struct pt_regs *regs;
};
@@ -595,7 +595,8 @@ struct perf_sample_data {
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
- struct perf_regs_user regs_user;
+ struct perf_regs regs_user;
+ struct perf_regs regs_intr;
u64 stack_user_size;
u64 weight;
/*
@@ -618,6 +619,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
data->weight = 0;
data->data_src.val = 0;
data->txn = 0;
+ data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
+ data->regs_intr.regs = NULL;
}

extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9269de2..543cc19 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -137,8 +137,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_DATA_SRC = 1U << 15,
PERF_SAMPLE_IDENTIFIER = 1U << 16,
PERF_SAMPLE_TRANSACTION = 1U << 17,
+ PERF_SAMPLE_REGS_INTR = 1U << 18,

- PERF_SAMPLE_MAX = 1U << 18, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */
};

/*
@@ -334,6 +335,15 @@ struct perf_event_attr {

/* Align to u64. */
__u32 __reserved_2;
+ /*
+ * Define set of user regs to dump on samples.
+ * state capture on:
+ * - precise = 0: PMU interrupt
+ * - precise > 0: sampled instruction
+ *
+ * See asm/perf_regs.h for details.
+ */
+ __u64 sample_regs_intr;
};

#define perf_flags(attr) (*(&(attr)->read_format + 1))
@@ -686,6 +696,8 @@ enum perf_event_type {
* { u64 weight; } && PERF_SAMPLE_WEIGHT
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* };
*/
PERF_RECORD_SAMPLE = 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 57e9190..018939c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4296,7 +4296,7 @@ perf_output_sample_regs(struct perf_output_handle *handle,
}
}

-static void perf_sample_regs_user(struct perf_regs_user *regs_user,
+static void perf_sample_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs)
{
if (!user_mode(regs)) {
@@ -4312,6 +4312,14 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user,
}
}

+static void perf_sample_regs_intr(struct perf_regs *regs_intr,
+ struct pt_regs *regs)
+{
+ regs_intr->regs = regs;
+ regs_intr->abi = perf_reg_abi(current);
+}
+
+
/*
* Get remaining task size from user stack pointer.
*
@@ -4693,6 +4701,22 @@ void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_TRANSACTION)
perf_output_put(handle, data->txn);

+ if (sample_type & PERF_SAMPLE_REGS_INTR) {
+ u64 abi = data->regs_intr.abi;
+ /*
+ * If there are no regs to dump, notice it through
+ * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE).
+ */
+ perf_output_put(handle, abi);
+
+ if (abi) {
+ u64 mask = event->attr.sample_regs_intr;
+ perf_output_sample_regs(handle,
+ data->regs_intr.regs,
+ mask);
+ }
+ }
+
if (!event->attr.watermark) {
int wakeup_events = event->attr.wakeup_events;

@@ -4779,7 +4803,7 @@ void perf_prepare_sample(struct perf_event_header *header,
* in case new sample type is added, because we could eat
* up the rest of the sample size.
*/
- struct perf_regs_user *uregs = &data->regs_user;
+ struct perf_regs *uregs = &data->regs_user;
u16 stack_size = event->attr.sample_stack_user;
u16 size = sizeof(u64);

@@ -4800,6 +4824,20 @@ void perf_prepare_sample(struct perf_event_header *header,
data->stack_user_size = stack_size;
header->size += size;
}
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR) {
+ /* regs dump ABI info */
+ int size = sizeof(u64);
+
+ perf_sample_regs_intr(&data->regs_intr, regs);
+
+ if (data->regs_intr.regs) {
+ u64 mask = event->attr.sample_regs_intr;
+ size += hweight64(mask) * sizeof(u64);
+ }
+
+ header->size += size;
+ }
}

static void perf_event_output(struct perf_event *event,
@@ -6994,6 +7032,8 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
ret = -EINVAL;
}

+ if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
+ ret = perf_reg_validate(attr->sample_regs_intr);
out:
return ret;

--
1.7.9.5

2014-07-15 00:32:10

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v2 3/5] perf tools: add core support for sampling intr machine state regs

Add the infrastructure to setup, collect and report the interrupt
machine state regs which can be captured by the kernel.

Signed-off-by: Stephane Eranian <[email protected]>
---
tools/perf/perf.h | 1 +
tools/perf/util/event.h | 1 +
tools/perf/util/evsel.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-
tools/perf/util/session.c | 44 ++++++++++++++++++++++++++++++++++++++-----
4 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 510c65f..309d956 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -54,6 +54,7 @@ struct record_opts {
bool sample_weight;
bool sample_time;
bool period;
+ bool sample_intr_regs;
unsigned int freq;
unsigned int mmap_pages;
unsigned int user_freq;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index e5dd40a..4e36967 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -160,6 +160,7 @@ struct perf_sample {
struct ip_callchain *callchain;
struct branch_stack *branch_stack;
struct regs_dump user_regs;
+ struct regs_dump intr_regs;
struct stack_dump user_stack;
struct sample_read read;
};
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8606175..2ba3403 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -626,6 +626,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
if (opts->call_graph_enabled)
perf_evsel__config_callgraph(evsel, opts);

+ if (opts->sample_intr_regs) {
+ attr->sample_regs_intr = PERF_REGS_MASK;
+ perf_evsel__set_sample_bit(evsel, REGS_INTR);
+ }
+
if (target__has_cpu(&opts->target))
perf_evsel__set_sample_bit(evsel, CPU);

@@ -979,6 +984,7 @@ static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp)
ret += PRINT_ATTR_X64(branch_sample_type);
ret += PRINT_ATTR_X64(sample_regs_user);
ret += PRINT_ATTR_U32(sample_stack_user);
+ ret += PRINT_ATTR_X64(sample_regs_intr);

ret += fprintf(fp, "%.60s\n", graph_dotted_line);

@@ -1468,6 +1474,23 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
array++;
}

+ data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
+ if (type & PERF_SAMPLE_REGS_INTR) {
+ OVERFLOW_CHECK_u64(array);
+ data->intr_regs.abi = *array;
+ array++;
+
+ if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
+ u64 mask = evsel->attr.sample_regs_intr;
+
+ sz = hweight_long(mask) * sizeof(u64);
+ OVERFLOW_CHECK(array, sz, max_size);
+ data->intr_regs.mask = mask;
+ data->intr_regs.regs = (u64 *)array;
+ array = (void *)array + sz;
+ }
+ }
+
return 0;
}

@@ -1563,6 +1586,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_TRANSACTION)
result += sizeof(u64);

+ if (type & PERF_SAMPLE_REGS_INTR) {
+ if (sample->intr_regs.abi) {
+ result += sizeof(u64);
+ sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+ result += sz;
+ } else {
+ result += sizeof(u64);
+ }
+ }
+
return result;
}

@@ -1741,6 +1774,17 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
array++;
}

+ if (type & PERF_SAMPLE_REGS_INTR) {
+ if (sample->intr_regs.abi) {
+ *array++ = sample->intr_regs.abi;
+ sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+ memcpy(array, sample->intr_regs.regs, sz);
+ array = (void *)array + sz;
+ } else {
+ *array++ = 0;
+ }
+ }
+
return 0;
}

@@ -1870,7 +1914,7 @@ static int sample_type__fprintf(FILE *fp, bool *first, u64 value)
bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
- bit_name(IDENTIFIER),
+ bit_name(IDENTIFIER), bit_name(REGS_INTR),
{ .name = NULL, }
};
#undef bit_name
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 64a186e..9da3eda 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -702,15 +702,46 @@ static void regs_dump__printf(u64 mask, u64 *regs)
}
}

+static const char *regs_abi[]= {
+ [PERF_SAMPLE_REGS_ABI_NONE] = "none",
+ [PERF_SAMPLE_REGS_ABI_32] = "32-bit",
+ [PERF_SAMPLE_REGS_ABI_64] = "64-bit",
+};
+
+static inline const char *regs_dump_abi(struct regs_dump *d)
+{
+ if (d->abi > PERF_SAMPLE_REGS_ABI_64)
+ return "unknown";
+
+ return regs_abi[d->abi];
+}
+
+static void regs__printf(const char *type, struct regs_dump *regs)
+{
+ u64 mask = regs->mask;
+
+ printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n",
+ type,
+ mask,
+ regs_dump_abi(regs));
+
+ regs_dump__printf(mask, regs->regs);
+}
+
static void regs_user__printf(struct perf_sample *sample)
{
struct regs_dump *user_regs = &sample->user_regs;

- if (user_regs->regs) {
- u64 mask = user_regs->mask;
- printf("... user regs: mask 0x%" PRIx64 "\n", mask);
- regs_dump__printf(mask, user_regs->regs);
- }
+ if (user_regs->regs)
+ regs__printf("user", user_regs);
+}
+
+static void regs_intr__printf(struct perf_sample *sample)
+{
+ struct regs_dump *intr_regs = &sample->intr_regs;
+
+ if (intr_regs->regs)
+ regs__printf("intr", intr_regs);
}

static void stack_user__printf(struct stack_dump *dump)
@@ -809,6 +840,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
if (sample_type & PERF_SAMPLE_REGS_USER)
regs_user__printf(sample);

+ if (sample_type & PERF_SAMPLE_REGS_INTR)
+ regs_intr__printf(sample);
+
if (sample_type & PERF_SAMPLE_STACK_USER)
stack_user__printf(&sample->user_stack);

--
1.7.9.5

2014-07-15 00:32:07

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v2 2/5] perf/x86: add support for sampling PEBS machine state registers

PEBS can capture machine state regs at retiremnt of the sampled
instructions. When precise sampling is enabled on an event, PEBS
is used, so substitute the interrupted state with the PEBS state.
Note that not all registers are captured by PEBS. Those missing
are replaced by the interrupt state counter-parts.

Signed-off-by: Stephane Eranian <[email protected]>
---
arch/x86/kernel/cpu/perf_event_intel_ds.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..d612bcd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -925,6 +925,25 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.bp = pebs->bp;
regs.sp = pebs->sp;

+ if (sample_type & PERF_SAMPLE_REGS_INTR) {
+ regs.ax = pebs->ax;
+ regs.bx = pebs->bx;
+ regs.cx = pebs->cx;
+ regs.si = pebs->si;
+ regs.di = pebs->di;
+
+ regs.r8 = pebs->r8;
+ regs.r9 = pebs->r9;
+ regs.r10 = pebs->r10;
+ regs.r11 = pebs->r11;
+ regs.r12 = pebs->r12;
+ regs.r13 = pebs->r13;
+ regs.r14 = pebs->r14;
+ regs.r14 = pebs->r15;
+
+ data.regs_intr.regs = &regs;
+ }
+
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
regs.ip = pebs->real_ip;
regs.flags |= PERF_EFLAGS_EXACT;
--
1.7.9.5

2014-07-15 00:32:12

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v2 4/5] perf/tests: add interrupted state sample parsing test

This patch updates the sample parsing test with support
for the sampling of machine interrupted state.

The patch modifies the do_test() code to sahred the sample
regts bitmask between user and intr regs.

Signed-off-by: Stephane Eranian <[email protected]>
---
tools/perf/tests/sample-parsing.c | 55 +++++++++++++++++++++++++++----------
1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 7ae8d17..47367fe 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -125,16 +125,28 @@ static bool samples_same(const struct perf_sample *s1,
if (type & PERF_SAMPLE_TRANSACTION)
COMP(transaction);

+ if (type & PERF_SAMPLE_REGS_INTR) {
+ size_t sz = hweight_long(s1->intr_regs.mask) * sizeof(u64);
+
+ COMP(intr_regs.mask);
+ COMP(intr_regs.abi);
+ if (s1->intr_regs.abi &&
+ (!s1->intr_regs.regs || !s2->intr_regs.regs ||
+ memcmp(s1->intr_regs.regs, s2->intr_regs.regs, sz))) {
+ pr_debug("Samples differ at 'intr_regs'\n");
+ return false;
+ }
+ }
+
return true;
}

-static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format)
+static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
{
struct perf_evsel evsel = {
.needs_swap = false,
.attr = {
.sample_type = sample_type,
- .sample_regs_user = sample_regs_user,
.read_format = read_format,
},
};
@@ -153,7 +165,7 @@ static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format)
/* 1 branch_entry */
.data = {1, 211, 212, 213},
};
- u64 user_regs[64];
+ u64 regs[64];
const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL};
const u64 data[] = {0x2211443366558877ULL, 0, 0xaabbccddeeff4321ULL};
struct perf_sample sample = {
@@ -175,8 +187,8 @@ static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format)
.branch_stack = &branch_stack.branch_stack,
.user_regs = {
.abi = PERF_SAMPLE_REGS_ABI_64,
- .mask = sample_regs_user,
- .regs = user_regs,
+ .mask = sample_regs,
+ .regs = regs,
},
.user_stack = {
.size = sizeof(data),
@@ -186,14 +198,25 @@ static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format)
.time_enabled = 0x030a59d664fca7deULL,
.time_running = 0x011b6ae553eb98edULL,
},
+ .intr_regs = {
+ .abi = PERF_SAMPLE_REGS_ABI_64,
+ .mask = sample_regs,
+ .regs = regs,
+ },
};
struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
struct perf_sample sample_out;
size_t i, sz, bufsz;
int err, ret = -1;

- for (i = 0; i < sizeof(user_regs); i++)
- *(i + (u8 *)user_regs) = i & 0xfe;
+ if (sample_type & PERF_SAMPLE_REGS_USER)
+ evsel.attr.sample_regs_user = sample_regs;
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR)
+ evsel.attr.sample_regs_intr = sample_regs;
+
+ for (i = 0; i < sizeof(regs); i++)
+ *(i + (u8 *)regs) = i & 0xfe;

if (read_format & PERF_FORMAT_GROUP) {
sample.read.group.nr = 4;
@@ -270,7 +293,7 @@ int test__sample_parsing(void)
{
const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
u64 sample_type;
- u64 sample_regs_user;
+ u64 sample_regs;
size_t i;
int err;

@@ -279,7 +302,7 @@ int test__sample_parsing(void)
* were added. Please actually update the test rather than just change
* the condition below.
*/
- if (PERF_SAMPLE_MAX > PERF_SAMPLE_TRANSACTION << 1) {
+ if (PERF_SAMPLE_MAX > PERF_SAMPLE_REGS_INTR << 1) {
pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
return -1;
}
@@ -296,22 +319,24 @@ int test__sample_parsing(void)
}
continue;
}
+ sample_regs = 0;

if (sample_type == PERF_SAMPLE_REGS_USER)
- sample_regs_user = 0x3fff;
- else
- sample_regs_user = 0;
+ sample_regs = 0x3fff;
+
+ if (sample_type == PERF_SAMPLE_REGS_INTR)
+ sample_regs = 0xff0fff;

- err = do_test(sample_type, sample_regs_user, 0);
+ err = do_test(sample_type, sample_regs, 0);
if (err)
return err;
}

/* Test all sample format bits together */
sample_type = PERF_SAMPLE_MAX - 1;
- sample_regs_user = 0x3fff;
+ sample_regs = 0x3fff; /* shared yb intr and user regs */
for (i = 0; i < ARRAY_SIZE(rf); i++) {
- err = do_test(sample_type, sample_regs_user, rf[i]);
+ err = do_test(sample_type, sample_regs, rf[i]);
if (err)
return err;
}
--
1.7.9.5

2014-07-15 00:32:57

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH v2 5/5] perf record: add new -I option to sample interrupted machine state

Add -I/--intr-regs option to capture machine state registers at
interrupt.

Add the corresponding man page description

Signed-off-by: Stephane Eranian <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 6 ++++++
tools/perf/builtin-record.c | 2 ++
2 files changed, 8 insertions(+)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index d460049..1a36259 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -214,6 +214,12 @@ if combined with -a or -C options.
After starting the program, wait msecs before measuring. This is useful to
filter out the startup phase of the program, which is often very different.

+-I::
+--intr-regs::
+Capture machine state (registers) at interrupt, i.e., on counter overflows for
+each sample. List of captured registers depends on the architecture. This option
+is off by default.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 378b85b..ed2e65b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -876,6 +876,8 @@ const struct option record_options[] = {
"sample transaction flags (special events only)"),
OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
"use per-thread mmaps"),
+ OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
+ "Sample machine registers on interrupt"),
OPT_END()
};

--
1.7.9.5

2014-07-15 14:16:38

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] perf: add ability to sample machine state on interrupt

On Tue, Jul 15, 2014 at 02:31:40AM +0200, Stephane Eranian wrote:
> @@ -595,7 +595,8 @@ struct perf_sample_data {
> struct perf_callchain_entry *callchain;
> struct perf_raw_record *raw;
> struct perf_branch_stack *br_stack;
> - struct perf_regs_user regs_user;
> + struct perf_regs regs_user;
> + struct perf_regs regs_intr;
> u64 stack_user_size;
> u64 weight;
> /*
> @@ -618,6 +619,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
> data->weight = 0;
> data->data_src.val = 0;
> data->txn = 0;
> + data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
> + data->regs_intr.regs = NULL;
> }

I don't think we've been very careful here; does the below make sense?

AFAICT we don't need to set stack_user_size at all,
perf_prepare_sample() will set it when required, and with the change to
perf_sample_regs_user() the same is true for the regs_user thing.

This again reduces the cost of perf_sample_data_init() to touching a
single cacheline.

I'm not entirely sure the ____cacheline_aligned makes sense though, the
previous stack line is probably touched already so any next cacheline is
the one, and one avg we'd gain 0.5 cachelines worth of data.



---
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 707617a8c0f6..d27fec8118b1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -575,34 +575,40 @@ extern u64 perf_event_read_value(struct perf_event *event,


struct perf_sample_data {
- u64 type;
+ /*
+ * Fields set by perf_sample_data_init(), group so as to
+ * minimize the cachelines touched.
+ */
+ u64 addr;
+ struct perf_raw_record *raw;
+ struct perf_branch_stack *br_stack;
+ u64 period;
+ u64 weight;
+ u64 txn;
+ union perf_mem_data_src data_src;
+

+ /*
+ * The other fields, optionally {set,used} by
+ * perf_{prepare,output}_sample().
+ */
+ u64 type;
u64 ip;
struct {
u32 pid;
u32 tid;
} tid_entry;
u64 time;
- u64 addr;
u64 id;
u64 stream_id;
struct {
u32 cpu;
u32 reserved;
} cpu_entry;
- u64 period;
- union perf_mem_data_src data_src;
struct perf_callchain_entry *callchain;
- struct perf_raw_record *raw;
- struct perf_branch_stack *br_stack;
struct perf_regs_user regs_user;
u64 stack_user_size;
- u64 weight;
- /*
- * Transaction flags for abort events:
- */
- u64 txn;
-};
+} ____cacheline_aligned;

static inline void perf_sample_data_init(struct perf_sample_data *data,
u64 addr, u64 period)
@@ -612,9 +618,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
data->raw = NULL;
data->br_stack = NULL;
data->period = period;
- data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
- data->regs_user.regs = NULL;
- data->stack_user_size = 0;
data->weight = 0;
data->data_src.val = 0;
data->txn = 0;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c8b53c94d41d..926cd7aafc14 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4299,6 +4299,8 @@ perf_output_sample_regs(struct perf_output_handle *handle,
static void perf_sample_regs_user(struct perf_regs_user *regs_user,
struct pt_regs *regs)
{
+ regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+
if (!user_mode(regs)) {
if (current->mm)
regs = task_pt_regs(current);


Attachments:
(No filename) (3.26 kB)
(No filename) (836.00 B)
Download all attachments

2014-07-15 14:25:35

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] perf: add ability to sample machine state on interrupt

On Tue, Jul 15, 2014 at 02:31:40AM +0200, Stephane Eranian wrote:
> @@ -618,6 +619,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
> data->weight = 0;
> data->data_src.val = 0;
> data->txn = 0;
> + data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
> + data->regs_intr.regs = NULL;
> }

> +static void perf_sample_regs_intr(struct perf_regs *regs_intr,
> + struct pt_regs *regs)
> +{
> + regs_intr->regs = regs;
> + regs_intr->abi = perf_reg_abi(current);
> +}

> @@ -4800,6 +4824,20 @@ void perf_prepare_sample(struct perf_event_header *header,
> data->stack_user_size = stack_size;
> header->size += size;
> }
> +
> + if (sample_type & PERF_SAMPLE_REGS_INTR) {
> + /* regs dump ABI info */
> + int size = sizeof(u64);
> +
> + perf_sample_regs_intr(&data->regs_intr, regs);
> +
> + if (data->regs_intr.regs) {
> + u64 mask = event->attr.sample_regs_intr;
> + size += hweight64(mask) * sizeof(u64);
> + }
> +
> + header->size += size;
> + }

Given that the prepare_sample hunk sets both regs_intr fields, the
addition to perf_sample_data_init() is entirely superfluous, no?


Attachments:
(No filename) (1.11 kB)
(No filename) (836.00 B)
Download all attachments

2014-07-15 14:29:55

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 2/5] perf/x86: add support for sampling PEBS machine state registers

On Tue, Jul 15, 2014 at 02:31:41AM +0200, Stephane Eranian wrote:
> PEBS can capture machine state regs at retiremnt of the sampled
> instructions. When precise sampling is enabled on an event, PEBS
> is used, so substitute the interrupted state with the PEBS state.
> Note that not all registers are captured by PEBS. Those missing
> are replaced by the interrupt state counter-parts.
>
> Signed-off-by: Stephane Eranian <[email protected]>
> ---
> arch/x86/kernel/cpu/perf_event_intel_ds.c | 19 +++++++++++++++++++
> 1 file changed, 19 insertions(+)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 980970c..d612bcd 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -925,6 +925,25 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
> regs.bp = pebs->bp;
> regs.sp = pebs->sp;
>
> + if (sample_type & PERF_SAMPLE_REGS_INTR) {
> + regs.ax = pebs->ax;
> + regs.bx = pebs->bx;
> + regs.cx = pebs->cx;
> + regs.si = pebs->si;
> + regs.di = pebs->di;
> +
> + regs.r8 = pebs->r8;
> + regs.r9 = pebs->r9;
> + regs.r10 = pebs->r10;
> + regs.r11 = pebs->r11;
> + regs.r12 = pebs->r12;
> + regs.r13 = pebs->r13;
> + regs.r14 = pebs->r14;
> + regs.r14 = pebs->r15;
> +
> + data.regs_intr.regs = &regs;

This last assignment is superfluous. Note how you previous patch's
perf_sample_regs_intr() is unconditionally writing its .regs value, and
in turn, perf_sample_regs_intr() is called unconditionally from
perf_prepare_sample().



Attachments:
(No filename) (1.55 kB)
(No filename) (836.00 B)
Download all attachments

2014-07-15 23:56:47

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH v2 2/5] perf/x86: add support for sampling PEBS machine state registers

On Tue, Jul 15, 2014 at 4:29 PM, Peter Zijlstra <[email protected]> wrote:
> On Tue, Jul 15, 2014 at 02:31:41AM +0200, Stephane Eranian wrote:
>> PEBS can capture machine state regs at retiremnt of the sampled
>> instructions. When precise sampling is enabled on an event, PEBS
>> is used, so substitute the interrupted state with the PEBS state.
>> Note that not all registers are captured by PEBS. Those missing
>> are replaced by the interrupt state counter-parts.
>>
>> Signed-off-by: Stephane Eranian <[email protected]>
>> ---
>> arch/x86/kernel/cpu/perf_event_intel_ds.c | 19 +++++++++++++++++++
>> 1 file changed, 19 insertions(+)
>>
>> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> index 980970c..d612bcd 100644
>> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> @@ -925,6 +925,25 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
>> regs.bp = pebs->bp;
>> regs.sp = pebs->sp;
>>
>> + if (sample_type & PERF_SAMPLE_REGS_INTR) {
>> + regs.ax = pebs->ax;
>> + regs.bx = pebs->bx;
>> + regs.cx = pebs->cx;
>> + regs.si = pebs->si;
>> + regs.di = pebs->di;
>> +
>> + regs.r8 = pebs->r8;
>> + regs.r9 = pebs->r9;
>> + regs.r10 = pebs->r10;
>> + regs.r11 = pebs->r11;
>> + regs.r12 = pebs->r12;
>> + regs.r13 = pebs->r13;
>> + regs.r14 = pebs->r14;
>> + regs.r14 = pebs->r15;
>> +
>> + data.regs_intr.regs = &regs;
>
> This last assignment is superfluous. Note how you previous patch's
> perf_sample_regs_intr() is unconditionally writing its .regs value, and
> in turn, perf_sample_regs_intr() is called unconditionally from
> perf_prepare_sample().
>
Ok, I will remove it.

2014-07-15 23:59:05

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] perf: add ability to sample machine state on interrupt

On Tue, Jul 15, 2014 at 4:25 PM, Peter Zijlstra <[email protected]> wrote:
> On Tue, Jul 15, 2014 at 02:31:40AM +0200, Stephane Eranian wrote:
>> @@ -618,6 +619,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>> data->weight = 0;
>> data->data_src.val = 0;
>> data->txn = 0;
>> + data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
>> + data->regs_intr.regs = NULL;
>> }
>
>> +static void perf_sample_regs_intr(struct perf_regs *regs_intr,
>> + struct pt_regs *regs)
>> +{
>> + regs_intr->regs = regs;
>> + regs_intr->abi = perf_reg_abi(current);
>> +}
>
>> @@ -4800,6 +4824,20 @@ void perf_prepare_sample(struct perf_event_header *header,
>> data->stack_user_size = stack_size;
>> header->size += size;
>> }
>> +
>> + if (sample_type & PERF_SAMPLE_REGS_INTR) {
>> + /* regs dump ABI info */
>> + int size = sizeof(u64);
>> +
>> + perf_sample_regs_intr(&data->regs_intr, regs);
>> +
>> + if (data->regs_intr.regs) {
>> + u64 mask = event->attr.sample_regs_intr;
>> + size += hweight64(mask) * sizeof(u64);
>> + }
>> +
>> + header->size += size;
>> + }
>
> Given that the prepare_sample hunk sets both regs_intr fields, the
> addition to perf_sample_data_init() is entirely superfluous, no?

Yes, looks like it, though having an initialization there prevents
any random values for the two fields, in case code tries to check
without first testing the sample_format bitmask. So yes, it is redundant
but may prevent future errors.

2014-07-16 00:10:40

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] perf: add ability to sample machine state on interrupt

On Tue, Jul 15, 2014 at 4:16 PM, Peter Zijlstra <[email protected]> wrote:
> On Tue, Jul 15, 2014 at 02:31:40AM +0200, Stephane Eranian wrote:
>> @@ -595,7 +595,8 @@ struct perf_sample_data {
>> struct perf_callchain_entry *callchain;
>> struct perf_raw_record *raw;
>> struct perf_branch_stack *br_stack;
>> - struct perf_regs_user regs_user;
>> + struct perf_regs regs_user;
>> + struct perf_regs regs_intr;
>> u64 stack_user_size;
>> u64 weight;
>> /*
>> @@ -618,6 +619,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>> data->weight = 0;
>> data->data_src.val = 0;
>> data->txn = 0;
>> + data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
>> + data->regs_intr.regs = NULL;
>> }
>
> I don't think we've been very careful here; does the below make sense?
>
> AFAICT we don't need to set stack_user_size at all,
> perf_prepare_sample() will set it when required, and with the change to
> perf_sample_regs_user() the same is true for the regs_user thing.
>
> This again reduces the cost of perf_sample_data_init() to touching a
> single cacheline.
>
> I'm not entirely sure the ____cacheline_aligned makes sense though, the
> previous stack line is probably touched already so any next cacheline is
> the one, and one avg we'd gain 0.5 cachelines worth of data.
>
>
I am okay with the changes in this patch.

>
> ---
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 707617a8c0f6..d27fec8118b1 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -575,34 +575,40 @@ extern u64 perf_event_read_value(struct perf_event *event,
>
>
> struct perf_sample_data {
> - u64 type;
> + /*
> + * Fields set by perf_sample_data_init(), group so as to
> + * minimize the cachelines touched.
> + */
> + u64 addr;
> + struct perf_raw_record *raw;
> + struct perf_branch_stack *br_stack;
> + u64 period;
> + u64 weight;
> + u64 txn;
> + union perf_mem_data_src data_src;
> +
>
> + /*
> + * The other fields, optionally {set,used} by
> + * perf_{prepare,output}_sample().
> + */
> + u64 type;
> u64 ip;
> struct {
> u32 pid;
> u32 tid;
> } tid_entry;
> u64 time;
> - u64 addr;
> u64 id;
> u64 stream_id;
> struct {
> u32 cpu;
> u32 reserved;
> } cpu_entry;
> - u64 period;
> - union perf_mem_data_src data_src;
> struct perf_callchain_entry *callchain;
> - struct perf_raw_record *raw;
> - struct perf_branch_stack *br_stack;
> struct perf_regs_user regs_user;
> u64 stack_user_size;
> - u64 weight;
> - /*
> - * Transaction flags for abort events:
> - */
> - u64 txn;
> -};
> +} ____cacheline_aligned;
>
> static inline void perf_sample_data_init(struct perf_sample_data *data,
> u64 addr, u64 period)
> @@ -612,9 +618,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
> data->raw = NULL;
> data->br_stack = NULL;
> data->period = period;
> - data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
> - data->regs_user.regs = NULL;
> - data->stack_user_size = 0;
> data->weight = 0;
> data->data_src.val = 0;
> data->txn = 0;
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index c8b53c94d41d..926cd7aafc14 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4299,6 +4299,8 @@ perf_output_sample_regs(struct perf_output_handle *handle,
> static void perf_sample_regs_user(struct perf_regs_user *regs_user,
> struct pt_regs *regs)
> {
> + regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
> +
> if (!user_mode(regs)) {
> if (current->mm)
> regs = task_pt_regs(current);