This short patch series add the ability to sample the interrupted
machine state for each hardware sample. This is useful to analyze
the state after certain events, for instance for function value
profiling after a call instruction.
The patch extends the interface with a new PERF_SAMPLE_REGS_INTR
smaple_type flag. The register to sample can be named in the
sample_regs_intr bitmask for each event. The name and bit
position for each register is architecture dependent and
provided, just like for PERF_SAMPLE_REGS_USER by asm/perf_regs.h.
The support is similar to PERF_SAMPLE_REGS_USER.
On Intel x86, the series includes support for capturing the
PEBS state as well. When precise sampling is used, the interrupted
state is collect from the PEBS records, at least partially.
The PEBS machine state is a subset of the machine state.
The series provides access to this new feature in perf record
with the -I option. It is possible to display the sampled
register values using perf report -D.
This patch series is the fundation for a future series adding
function value profiling.
Stephane Eranian (4):
perf: add ability to sample machine state on interrupt
perf/x86: add support for sampling PEBS machine state registers
perf tools: add core support for sampling intr machine state regs
perf record: add new -I option to sample interrupted machine state
arch/x86/kernel/cpu/perf_event_intel_ds.c | 19 +++++++++++++
include/linux/perf_event.h | 7 +++--
include/uapi/linux/perf_event.h | 14 ++++++++-
kernel/events/core.c | 44 +++++++++++++++++++++++++++--
tools/perf/builtin-record.c | 2 ++
tools/perf/perf.h | 1 +
tools/perf/util/event.h | 7 +++++
tools/perf/util/evsel.c | 23 ++++++++++++++-
tools/perf/util/session.c | 26 +++++++++++++++++
9 files changed, 137 insertions(+), 6 deletions(-)
--
1.7.9.5
Enable capture of interrupted machine state in each
sample.
Registers to sample are passed per event in the
sample_regs_intr bitmask.
To sample interrupt machine state, the
PERF_SAMPLE_INTR_REGS must be passed in
sample_type.
The list of available registers is arch
dependent and provided by asm/perf_regs.h
Registers are laid out as u64 in the order
of the bit order of sample_intr_regs.
Signed-off-by: Stephane Eranian <[email protected]>
---
include/linux/perf_event.h | 7 +++++--
include/uapi/linux/perf_event.h | 14 ++++++++++++-
kernel/events/core.c | 44 +++++++++++++++++++++++++++++++++++++++--
3 files changed, 60 insertions(+), 5 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 707617a..4970c1d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -78,7 +78,7 @@ struct perf_branch_stack {
struct perf_branch_entry entries[0];
};
-struct perf_regs_user {
+struct perf_regs {
__u64 abi;
struct pt_regs *regs;
};
@@ -595,7 +595,8 @@ struct perf_sample_data {
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
- struct perf_regs_user regs_user;
+ struct perf_regs regs_user;
+ struct perf_regs regs_intr;
u64 stack_user_size;
u64 weight;
/*
@@ -618,6 +619,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
data->weight = 0;
data->data_src.val = 0;
data->txn = 0;
+ data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
+ data->regs_intr.regs = NULL;
}
extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9269de2..543cc19 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -137,8 +137,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_DATA_SRC = 1U << 15,
PERF_SAMPLE_IDENTIFIER = 1U << 16,
PERF_SAMPLE_TRANSACTION = 1U << 17,
+ PERF_SAMPLE_REGS_INTR = 1U << 18,
- PERF_SAMPLE_MAX = 1U << 18, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */
};
/*
@@ -334,6 +335,15 @@ struct perf_event_attr {
/* Align to u64. */
__u32 __reserved_2;
+ /*
+ * Defines set of user regs to dump on samples.
+ * state capture on:
+ * - precise = 0: PMU interrupt
+ * - precise > 0: sampled instruction
+ *
+ * See asm/perf_regs.h for details.
+ */
+ __u64 sample_regs_intr;
};
#define perf_flags(attr) (*(&(attr)->read_format + 1))
@@ -686,6 +696,8 @@ enum perf_event_type {
* { u64 weight; } && PERF_SAMPLE_WEIGHT
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* };
*/
PERF_RECORD_SAMPLE = 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 57e9190..018939c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4296,7 +4296,7 @@ perf_output_sample_regs(struct perf_output_handle *handle,
}
}
-static void perf_sample_regs_user(struct perf_regs_user *regs_user,
+static void perf_sample_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs)
{
if (!user_mode(regs)) {
@@ -4312,6 +4312,14 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user,
}
}
+static void perf_sample_regs_intr(struct perf_regs *regs_user,
+ struct pt_regs *regs)
+{
+ regs_user->regs = regs;
+ regs_user->abi = perf_reg_abi(current);
+}
+
+
/*
* Get remaining task size from user stack pointer.
*
@@ -4693,6 +4701,22 @@ void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_TRANSACTION)
perf_output_put(handle, data->txn);
+ if (sample_type & PERF_SAMPLE_REGS_INTR) {
+ u64 abi = data->regs_intr.abi;
+ /*
+ * If there are no regs to dump, notice it through
+ * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE).
+ */
+ perf_output_put(handle, abi);
+
+ if (abi) {
+ u64 mask = event->attr.sample_regs_intr;
+ perf_output_sample_regs(handle,
+ data->regs_intr.regs,
+ mask);
+ }
+ }
+
if (!event->attr.watermark) {
int wakeup_events = event->attr.wakeup_events;
@@ -4779,7 +4803,7 @@ void perf_prepare_sample(struct perf_event_header *header,
* in case new sample type is added, because we could eat
* up the rest of the sample size.
*/
- struct perf_regs_user *uregs = &data->regs_user;
+ struct perf_regs *uregs = &data->regs_user;
u16 stack_size = event->attr.sample_stack_user;
u16 size = sizeof(u64);
@@ -4800,6 +4824,20 @@ void perf_prepare_sample(struct perf_event_header *header,
data->stack_user_size = stack_size;
header->size += size;
}
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR) {
+ /* regs dump ABI info */
+ int size = sizeof(u64);
+
+ perf_sample_regs_intr(&data->regs_intr, regs);
+
+ if (data->regs_intr.regs) {
+ u64 mask = event->attr.sample_regs_intr;
+ size += hweight64(mask) * sizeof(u64);
+ }
+
+ header->size += size;
+ }
}
static void perf_event_output(struct perf_event *event,
@@ -6994,6 +7032,8 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
ret = -EINVAL;
}
+ if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
+ ret = perf_reg_validate(attr->sample_regs_intr);
out:
return ret;
--
1.8.3.2
Add the infrastructure to setup, collect and report the interrupt
machine state regs which can be captured by the kernel.
Signed-off-by: Stephane Eranian <[email protected]>
---
tools/perf/perf.h | 1 +
tools/perf/util/event.h | 7 +++++++
tools/perf/util/evsel.c | 25 ++++++++++++++++++++++++-
tools/perf/util/session.c | 26 ++++++++++++++++++++++++++
4 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 510c65f..309d956 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -54,6 +54,7 @@ struct record_opts {
bool sample_weight;
bool sample_time;
bool period;
+ bool sample_intr_regs;
unsigned int freq;
unsigned int mmap_pages;
unsigned int user_freq;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index e5dd40a..4ce2e99 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -160,6 +160,7 @@ struct perf_sample {
struct ip_callchain *callchain;
struct branch_stack *branch_stack;
struct regs_dump user_regs;
+ struct regs_dump intr_regs;
struct stack_dump user_stack;
struct sample_read read;
};
@@ -290,6 +291,12 @@ int perf_event__preprocess_sample(const union perf_event *event,
const char *perf_event__name(unsigned int id);
+int perf_event__parse_sample(const union perf_event *event, u64 type,
+ int sample_size, bool sample_id_all,
+ u64 sample_regs_user, u64 sample_stack_user,
+ u64 sample_regs_intr,
+ struct perf_sample *data, bool swapped);
+
size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
u64 read_format);
int perf_event__synthesize_sample(union perf_event *event, u64 type,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8606175..00e45d1 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -626,6 +626,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
if (opts->call_graph_enabled)
perf_evsel__config_callgraph(evsel, opts);
+ if (opts->sample_intr_regs) {
+ attr->sample_regs_intr = PERF_REGS_MASK;
+ perf_evsel__set_sample_bit(evsel, REGS_INTR);
+ }
+
if (target__has_cpu(&opts->target))
perf_evsel__set_sample_bit(evsel, CPU);
@@ -979,6 +984,7 @@ static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp)
ret += PRINT_ATTR_X64(branch_sample_type);
ret += PRINT_ATTR_X64(sample_regs_user);
ret += PRINT_ATTR_U32(sample_stack_user);
+ ret += PRINT_ATTR_X64(sample_regs_intr);
ret += fprintf(fp, "%.60s\n", graph_dotted_line);
@@ -1468,6 +1474,23 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
array++;
}
+ data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
+ if (type & PERF_SAMPLE_REGS_INTR) {
+ OVERFLOW_CHECK_u64(array);
+ data->intr_regs.abi = *array;
+ array++;
+
+ if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
+ u64 mask = evsel->attr.sample_regs_intr;
+
+ sz = hweight_long(mask) * sizeof(u64);
+ OVERFLOW_CHECK(array, sz, max_size);
+ data->intr_regs.mask = mask;
+ data->intr_regs.regs = (u64 *)array;
+ array = (void *)array + sz;
+ }
+ }
+
return 0;
}
@@ -1870,7 +1893,7 @@ static int sample_type__fprintf(FILE *fp, bool *first, u64 value)
bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
- bit_name(IDENTIFIER),
+ bit_name(IDENTIFIER), bit_name(REGS_INTR),
{ .name = NULL, }
};
#undef bit_name
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 64a186e..6f112ca 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -713,6 +713,29 @@ static void regs_user__printf(struct perf_sample *sample)
}
}
+static const char *regs_abi[]= {
+ [PERF_SAMPLE_REGS_ABI_NONE] = "none",
+ [PERF_SAMPLE_REGS_ABI_32] = "32-bit",
+ [PERF_SAMPLE_REGS_ABI_64] = "64-bit",
+};
+
+static inline const char *regs_dump_abi(struct regs_dump *d)
+{
+ if (d->abi > PERF_SAMPLE_REGS_ABI_64)
+ return "???";
+ return regs_abi[d->abi];
+}
+
+static void regs_intr__printf(struct perf_sample *sample, u64 mask)
+{
+ struct regs_dump *intr_regs = &sample->intr_regs;
+
+ if (intr_regs->regs) {
+ printf("... intr regs: mask 0x%" PRIx64 " ABI: %s\n", mask, regs_dump_abi(intr_regs));
+ regs_dump__printf(mask, intr_regs->regs);
+ }
+}
+
static void stack_user__printf(struct stack_dump *dump)
{
printf("... ustack: size %" PRIu64 ", offset 0x%x\n",
@@ -809,6 +832,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
if (sample_type & PERF_SAMPLE_REGS_USER)
regs_user__printf(sample);
+ if (sample_type & PERF_SAMPLE_REGS_INTR)
+ regs_intr__printf(sample, evsel->attr.sample_regs_intr);
+
if (sample_type & PERF_SAMPLE_STACK_USER)
stack_user__printf(&sample->user_stack);
--
1.8.3.2
Add -I option to capture machine state registers at interrupt.
Use perf report -D to display the sampled register values
in each sample.
Signed-off-by: Stephane Eranian <[email protected]>
---
tools/perf/builtin-record.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 378b85b..17c45f8 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -876,6 +876,8 @@ const struct option record_options[] = {
"sample transaction flags (special events only)"),
OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
"use per-thread mmaps"),
+ OPT_BOOLEAN('I', "sample-intr-regs", &record.opts.sample_intr_regs,
+ "Sample machine register on interrupt"),
OPT_END()
};
--
1.8.3.2
PEBS can capture machine state regs at retirement of the sampled
instructions. When precise sampling is enabled on an event, PEBS
is used, so substitute the interrupted state with the PEBS state.
Note that not all registers are captured by PEBS. Those missing
are replaced by the interrupt state counter-parts.
Signed-off-by: Stephane Eranian <[email protected]>
---
arch/x86/kernel/cpu/perf_event_intel_ds.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..d612bcd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -925,6 +925,25 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.bp = pebs->bp;
regs.sp = pebs->sp;
+ if (sample_type & PERF_SAMPLE_REGS_INTR) {
+ regs.ax = pebs->ax;
+ regs.bx = pebs->bx;
+ regs.cx = pebs->cx;
+ regs.si = pebs->si;
+ regs.di = pebs->di;
+
+ regs.r8 = pebs->r8;
+ regs.r9 = pebs->r9;
+ regs.r10 = pebs->r10;
+ regs.r11 = pebs->r11;
+ regs.r12 = pebs->r12;
+ regs.r13 = pebs->r13;
+ regs.r14 = pebs->r14;
+ regs.r14 = pebs->r15;
+
+ data.regs_intr.regs = ®s;
+ }
+
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
regs.ip = pebs->real_ip;
regs.flags |= PERF_EFLAGS_EXACT;
--
1.8.3.2
Hi Stephane,
On Wed, 9 Jul 2014 00:15:55 +0200, Stephane Eranian wrote:
> This short patch series add the ability to sample the interrupted
> machine state for each hardware sample. This is useful to analyze
> the state after certain events, for instance for function value
> profiling after a call instruction.
>
> The patch extends the interface with a new PERF_SAMPLE_REGS_INTR
> smaple_type flag. The register to sample can be named in the
> sample_regs_intr bitmask for each event. The name and bit
> position for each register is architecture dependent and
> provided, just like for PERF_SAMPLE_REGS_USER by asm/perf_regs.h.
>
> The support is similar to PERF_SAMPLE_REGS_USER.
>
> On Intel x86, the series includes support for capturing the
> PEBS state as well. When precise sampling is used, the interrupted
> state is collect from the PEBS records, at least partially.
> The PEBS machine state is a subset of the machine state.
>
> The series provides access to this new feature in perf record
> with the -I option. It is possible to display the sampled
> register values using perf report -D.
>
> This patch series is the fundation for a future series adding
> function value profiling.
Probably a silly question: what is the function value profiling?
Anyway, I'll try to review it tomorrow. :)
Thanks,
Namhyung
>
> Stephane Eranian (4):
> perf: add ability to sample machine state on interrupt
> perf/x86: add support for sampling PEBS machine state registers
> perf tools: add core support for sampling intr machine state regs
> perf record: add new -I option to sample interrupted machine state
>
> arch/x86/kernel/cpu/perf_event_intel_ds.c | 19 +++++++++++++
> include/linux/perf_event.h | 7 +++--
> include/uapi/linux/perf_event.h | 14 ++++++++-
> kernel/events/core.c | 44 +++++++++++++++++++++++++++--
> tools/perf/builtin-record.c | 2 ++
> tools/perf/perf.h | 1 +
> tools/perf/util/event.h | 7 +++++
> tools/perf/util/evsel.c | 23 ++++++++++++++-
> tools/perf/util/session.c | 26 +++++++++++++++++
> 9 files changed, 137 insertions(+), 6 deletions(-)
Em Wed, Jul 09, 2014 at 05:26:20PM +0900, Namhyung Kim escreveu:
> On Wed, 9 Jul 2014 00:15:55 +0200, Stephane Eranian wrote:
> > This patch series is the fundation for a future series adding
> > function value profiling.
>
> Probably a silly question: what is the function value profiling?
I think he wants to sample the values of parameters and local variables,
etc, right?
http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-June/062950.html lists
some things you can do with that.
Stephane, is that what you mean by "value profiling"?
- Arnaldo
Em Wed, Jul 09, 2014 at 12:15:56AM +0200, Stephane Eranian escreveu:
> Enable capture of interrupted machine state in each
> sample.
>
> Registers to sample are passed per event in the
> sample_regs_intr bitmask.
>
> To sample interrupt machine state, the
> PERF_SAMPLE_INTR_REGS must be passed in
> sample_type.
>
> The list of available registers is arch
> dependent and provided by asm/perf_regs.h
>
> Registers are laid out as u64 in the order
> of the bit order of sample_intr_regs.
I would have broken this into smaller pieces, with the first patch doing
the prep of renaming perf_regs_user to perf_regs, for instance.
> Signed-off-by: Stephane Eranian <[email protected]>
> ---
> include/linux/perf_event.h | 7 +++++--
> include/uapi/linux/perf_event.h | 14 ++++++++++++-
> kernel/events/core.c | 44 +++++++++++++++++++++++++++++++++++++++--
> 3 files changed, 60 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 707617a..4970c1d 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -78,7 +78,7 @@ struct perf_branch_stack {
> struct perf_branch_entry entries[0];
> };
>
> -struct perf_regs_user {
> +struct perf_regs {
> __u64 abi;
> struct pt_regs *regs;
> };
> @@ -595,7 +595,8 @@ struct perf_sample_data {
> struct perf_callchain_entry *callchain;
> struct perf_raw_record *raw;
> struct perf_branch_stack *br_stack;
> - struct perf_regs_user regs_user;
> + struct perf_regs regs_user;
> + struct perf_regs regs_intr;
> u64 stack_user_size;
> u64 weight;
> /*
> @@ -618,6 +619,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
> data->weight = 0;
> data->data_src.val = 0;
> data->txn = 0;
> + data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
> + data->regs_intr.regs = NULL;
Nit:
This would be better right after the equivalent ones for regs_user, I
had to go to perf_sample_data_init() to check if that was done, yes, it
is right before data->weight = 0 :-)
Reading on...
On Wed, Jul 9, 2014 at 4:53 PM, Arnaldo Carvalho de Melo
<[email protected]> wrote:
> Em Wed, Jul 09, 2014 at 05:26:20PM +0900, Namhyung Kim escreveu:
>> On Wed, 9 Jul 2014 00:15:55 +0200, Stephane Eranian wrote:
>> > This patch series is the fundation for a future series adding
>> > function value profiling.
>>
>> Probably a silly question: what is the function value profiling?
>
> I think he wants to sample the values of parameters and local variables,
> etc, right?
>
> http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-June/062950.html lists
> some things you can do with that.
>
> Stephane, is that what you mean by "value profiling"?
>
Value profiling means sampling the values of function arguments passed in
registers.
> +int perf_event__parse_sample(const union perf_event *event, u64 type,
> + int sample_size, bool sample_id_all,
> + u64 sample_regs_user, u64 sample_stack_user,
> + u64 sample_regs_intr,
> + struct perf_sample *data, bool swapped);
Why is this only a prototype?
-Andi
--
[email protected] -- Speaking for myself only
> Value profiling means sampling the values of function arguments passed in
> registers.
You can use it for more, e.g. it's also useful to sample loop iterations.
-Andi
--
[email protected] -- Speaking for myself only
On Wed, Jul 09, 2014 at 12:15:56AM +0200, Stephane Eranian wrote:
> Enable capture of interrupted machine state in each
> sample.
Looks good to me.
Reviewed-by: Andi Kleen <[email protected]>
-Andi
On Wed, Jul 09, 2014 at 12:15:59AM +0200, Stephane Eranian wrote:
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 378b85b..17c45f8 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -876,6 +876,8 @@ const struct option record_options[] = {
> "sample transaction flags (special events only)"),
> OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
> "use per-thread mmaps"),
> + OPT_BOOLEAN('I', "sample-intr-regs", &record.opts.sample_intr_regs,
Long option is too long. --user-regs or so?
-Andi
On Wed, Jul 9, 2014 at 5:47 PM, Andi Kleen <[email protected]> wrote:
>> Value profiling means sampling the values of function arguments passed in
>> registers.
>
> You can use it for more, e.g. it's also useful to sample loop iterations.
>
Correct.
> -Andi
>
> --
> [email protected] -- Speaking for myself only
On Wed, Jul 09, 2014 at 12:15:56AM +0200, Stephane Eranian wrote:
> Enable capture of interrupted machine state in each
> sample.
>
> Registers to sample are passed per event in the
> sample_regs_intr bitmask.
>
> To sample interrupt machine state, the
> PERF_SAMPLE_INTR_REGS must be passed in
> sample_type.
>
> The list of available registers is arch
> dependent and provided by asm/perf_regs.h
>
> Registers are laid out as u64 in the order
> of the bit order of sample_intr_regs.
SNIP
> };
>
> /*
> @@ -334,6 +335,15 @@ struct perf_event_attr {
>
> /* Align to u64. */
> __u32 __reserved_2;
> + /*
> + * Defines set of user regs to dump on samples.
^^ interrupt
jirka
On Wed, Jul 09, 2014 at 12:15:56AM +0200, Stephane Eranian wrote:
> Enable capture of interrupted machine state in each
> sample.
>
> Registers to sample are passed per event in the
> sample_regs_intr bitmask.
>
> To sample interrupt machine state, the
> PERF_SAMPLE_INTR_REGS must be passed in
> sample_type.
>
> The list of available registers is arch
> dependent and provided by asm/perf_regs.h
>
> Registers are laid out as u64 in the order
> of the bit order of sample_intr_regs.
^^^ sample_regs_intr
just crossed my ming we want to use 'kernel' here and call it 'sample_regs_kernel',
since we already have sample_regs_user. Also for events other than HW, the sample
does not happen in interrupt.. anyway:
Reviewed-by: Jiri Olsa <[email protected]>
jirka
On Wed, Jul 09, 2014 at 12:15:56AM +0200, Stephane Eranian wrote:
> Enable capture of interrupted machine state in each
> sample.
>
> Registers to sample are passed per event in the
> sample_regs_intr bitmask.
>
> To sample interrupt machine state, the
> PERF_SAMPLE_INTR_REGS must be passed in
> sample_type.
SNIP
> index 57e9190..018939c 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4296,7 +4296,7 @@ perf_output_sample_regs(struct perf_output_handle *handle,
> }
> }
>
> -static void perf_sample_regs_user(struct perf_regs_user *regs_user,
> +static void perf_sample_regs_user(struct perf_regs *regs_user,
> struct pt_regs *regs)
> {
> if (!user_mode(regs)) {
> @@ -4312,6 +4312,14 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user,
> }
> }
>
> +static void perf_sample_regs_intr(struct perf_regs *regs_user,
> + struct pt_regs *regs)
> +{
> + regs_user->regs = regs;
> + regs_user->abi = perf_reg_abi(current);
^^^ regs_intr
jirka
On Wed, Jul 09, 2014 at 12:15:57AM +0200, Stephane Eranian wrote:
> PEBS can capture machine state regs at retirement of the sampled
> instructions. When precise sampling is enabled on an event, PEBS
> is used, so substitute the interrupted state with the PEBS state.
> Note that not all registers are captured by PEBS. Those missing
> are replaced by the interrupt state counter-parts.
>
> Signed-off-by: Stephane Eranian <[email protected]>
> ---
> arch/x86/kernel/cpu/perf_event_intel_ds.c | 19 +++++++++++++++++++
> 1 file changed, 19 insertions(+)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 980970c..d612bcd 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -925,6 +925,25 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
> regs.bp = pebs->bp;
> regs.sp = pebs->sp;
>
> + if (sample_type & PERF_SAMPLE_REGS_INTR) {
> + regs.ax = pebs->ax;
> + regs.bx = pebs->bx;
> + regs.cx = pebs->cx;
> + regs.si = pebs->si;
> + regs.di = pebs->di;
> +
> + regs.r8 = pebs->r8;
> + regs.r9 = pebs->r9;
> + regs.r10 = pebs->r10;
> + regs.r11 = pebs->r11;
> + regs.r12 = pebs->r12;
> + regs.r13 = pebs->r13;
> + regs.r14 = pebs->r14;
> + regs.r14 = pebs->r15;
> +
> + data.regs_intr.regs = ®s;
this assignement is not necessary, because you assign this regs pointer
again in perf_prepare_sample -> perf_sample_regs_intr later
jirka
On Wed, Jul 09, 2014 at 12:15:58AM +0200, Stephane Eranian wrote:
> Add the infrastructure to setup, collect and report the interrupt
> machine state regs which can be captured by the kernel.
>
> Signed-off-by: Stephane Eranian <[email protected]>
> ---
SNIP
> +static const char *regs_abi[]= {
> + [PERF_SAMPLE_REGS_ABI_NONE] = "none",
> + [PERF_SAMPLE_REGS_ABI_32] = "32-bit",
> + [PERF_SAMPLE_REGS_ABI_64] = "64-bit",
> +};
> +
> +static inline const char *regs_dump_abi(struct regs_dump *d)
> +{
> + if (d->abi > PERF_SAMPLE_REGS_ABI_64)
> + return "???";
> + return regs_abi[d->abi];
> +}
> +
> +static void regs_intr__printf(struct perf_sample *sample, u64 mask)
> +{
> + struct regs_dump *intr_regs = &sample->intr_regs;
> +
> + if (intr_regs->regs) {
> + printf("... intr regs: mask 0x%" PRIx64 " ABI: %s\n", mask, regs_dump_abi(intr_regs));
> + regs_dump__printf(mask, intr_regs->regs);
> + }
> +}
could you please unify/share this with regs_user__printf,
so we could get same output for registers dump
jirka
On Wed, Jul 09, 2014 at 12:15:58AM +0200, Stephane Eranian wrote:
> Add the infrastructure to setup, collect and report the interrupt
> machine state regs which can be captured by the kernel.
>
SNIP
> index 8606175..00e45d1 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -626,6 +626,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
> if (opts->call_graph_enabled)
> perf_evsel__config_callgraph(evsel, opts);
>
> + if (opts->sample_intr_regs) {
> + attr->sample_regs_intr = PERF_REGS_MASK;
> + perf_evsel__set_sample_bit(evsel, REGS_INTR);
> + }
> +
> if (target__has_cpu(&opts->target))
> perf_evsel__set_sample_bit(evsel, CPU);
>
> @@ -979,6 +984,7 @@ static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp)
> ret += PRINT_ATTR_X64(branch_sample_type);
> ret += PRINT_ATTR_X64(sample_regs_user);
> ret += PRINT_ATTR_U32(sample_stack_user);
> + ret += PRINT_ATTR_X64(sample_regs_intr);
>
> ret += fprintf(fp, "%.60s\n", graph_dotted_line);
>
> @@ -1468,6 +1474,23 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
> array++;
> }
>
> + data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
not necessary, data is zeroed in the top
jirka
On Wed, Jul 09, 2014 at 12:15:58AM +0200, Stephane Eranian wrote:
> Add the infrastructure to setup, collect and report the interrupt
> machine state regs which can be captured by the kernel.
>
> Signed-off-by: Stephane Eranian <[email protected]>
> ---
> tools/perf/perf.h | 1 +
> tools/perf/util/event.h | 7 +++++++
> tools/perf/util/evsel.c | 25 ++++++++++++++++++++++++-
> tools/perf/util/session.c | 26 ++++++++++++++++++++++++++
> 4 files changed, 58 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/perf.h b/tools/perf/perf.h
> index 510c65f..309d956 100644
> --- a/tools/perf/perf.h
> +++ b/tools/perf/perf.h
> @@ -54,6 +54,7 @@ struct record_opts {
> bool sample_weight;
> bool sample_time;
> bool period;
> + bool sample_intr_regs;
could you please also update tests/sample-parsing.c,
so we get some basic automated testing of this
thanks,
jirka
Hi Stephane,
On Wed, 9 Jul 2014 00:15:59 +0200, Stephane Eranian wrote:
> Add -I option to capture machine state registers at interrupt.
You need to add it in the Documentation too.
Thanks,
Namhyung
>
> Use perf report -D to display the sampled register values
> in each sample.
>
> Signed-off-by: Stephane Eranian <[email protected]>
> ---
> tools/perf/builtin-record.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 378b85b..17c45f8 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -876,6 +876,8 @@ const struct option record_options[] = {
> "sample transaction flags (special events only)"),
> OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
> "use per-thread mmaps"),
> + OPT_BOOLEAN('I', "sample-intr-regs", &record.opts.sample_intr_regs,
> + "Sample machine register on interrupt"),
> OPT_END()
> };
On Mon, Jul 14, 2014 at 10:00 AM, Namhyung Kim <[email protected]> wrote:
> Hi Stephane,
>
> On Wed, 9 Jul 2014 00:15:59 +0200, Stephane Eranian wrote:
>> Add -I option to capture machine state registers at interrupt.
>
> You need to add it in the Documentation too.
>
I added it for v2.
> Thanks,
> Namhyung
>
>
>>
>> Use perf report -D to display the sampled register values
>> in each sample.
>>
>> Signed-off-by: Stephane Eranian <[email protected]>
>> ---
>> tools/perf/builtin-record.c | 2 ++
>> 1 file changed, 2 insertions(+)
>>
>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>> index 378b85b..17c45f8 100644
>> --- a/tools/perf/builtin-record.c
>> +++ b/tools/perf/builtin-record.c
>> @@ -876,6 +876,8 @@ const struct option record_options[] = {
>> "sample transaction flags (special events only)"),
>> OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
>> "use per-thread mmaps"),
>> + OPT_BOOLEAN('I', "sample-intr-regs", &record.opts.sample_intr_regs,
>> + "Sample machine register on interrupt"),
>> OPT_END()
>> };
On Fri, Jul 11, 2014 at 10:25 AM, Jiri Olsa <[email protected]> wrote:
> On Wed, Jul 09, 2014 at 12:15:58AM +0200, Stephane Eranian wrote:
>> Add the infrastructure to setup, collect and report the interrupt
>> machine state regs which can be captured by the kernel.
>>
>
> SNIP
>
>> index 8606175..00e45d1 100644
>> --- a/tools/perf/util/evsel.c
>> +++ b/tools/perf/util/evsel.c
>> @@ -626,6 +626,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
>> if (opts->call_graph_enabled)
>> perf_evsel__config_callgraph(evsel, opts);
>>
>> + if (opts->sample_intr_regs) {
>> + attr->sample_regs_intr = PERF_REGS_MASK;
>> + perf_evsel__set_sample_bit(evsel, REGS_INTR);
>> + }
>> +
>> if (target__has_cpu(&opts->target))
>> perf_evsel__set_sample_bit(evsel, CPU);
>>
>> @@ -979,6 +984,7 @@ static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp)
>> ret += PRINT_ATTR_X64(branch_sample_type);
>> ret += PRINT_ATTR_X64(sample_regs_user);
>> ret += PRINT_ATTR_U32(sample_stack_user);
>> + ret += PRINT_ATTR_X64(sample_regs_intr);
>>
>> ret += fprintf(fp, "%.60s\n", graph_dotted_line);
>>
>> @@ -1468,6 +1474,23 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
>> array++;
>> }
>>
>> + data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
>
> not necessary, data is zeroed in the top
>
Don't like that too much. You are using an enum. So why not explicitly
initialize instead of relying on ABI_NONE = 0.