Subject: [PATCH v2] perf/x86-ibs: Precise event sampling with IBS for AMD CPUs

Updated version. Level 1 and 2 are handled the same way now. Don't
drop samples in precise level 2 if rip is invalid, instead support the
PERF_EFLAGS_EXACT flag.

No changes in other patches of

[PATCH 00/12] perf/x86-ibs: Precise event sampling with IBS for AMD CPUs

-Robert



>From 6d646cefdea9958c3401110caecc958b41f6e84d Mon Sep 17 00:00:00 2001
From: Robert Richter <[email protected]>
Date: Mon, 12 Mar 2012 12:54:32 +0100
Subject: [PATCH] perf/x86-ibs: Precise event sampling with IBS for AMD CPUs

This patch adds support for precise event sampling with IBS. There are
two counting modes to count either cycles or micro-ops. If the
corresponding performance counter events (hw events) are setup with
the precise flag set, the request is redirected to the ibs pmu:

perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
perf record -a -e r076:p ... # same as -e cpu-cycles:p
perf record -a -e r0C1:p ... # use ibs op counting micro-ops

Each ibs sample contains a linear address that points to the
instruction that was causing the sample to trigger. With ibs we have
skid 0. Thus, ibs supports precise levels 1 and 2. Samples are marked
with the PERF_EFLAGS_EXACT flag set. In rare cases the rip is invalid
when IBS was not able to record the rip correctly. Then the
PERF_EFLAGS_EXACT flag is cleared and the rip is taken from pt_regs.

V2:
* don't drop samples in precise level 2 if rip is invalid, instead
support the PERF_EFLAGS_EXACT flag

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event_amd.c | 7 +++-
arch/x86/kernel/cpu/perf_event_amd_ibs.c | 73 ++++++++++++++++++++++++++++-
2 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 95e7fe1..4be3463 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)

static int amd_pmu_hw_config(struct perf_event *event)
{
- int ret = x86_pmu_hw_config(event);
+ int ret;

+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ ret = x86_pmu_hw_config(event);
if (ret)
return ret;

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 0321b64..117b0aa 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -145,17 +145,80 @@ static struct perf_ibs *get_ibs_pmu(int type)
return NULL;
}

+/*
+ * Use IBS for precise event sampling:
+ *
+ * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
+ * perf record -a -e r076:p ... # same as -e cpu-cycles:p
+ * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
+ *
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+ *
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ *
+ */
+static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+{
+ switch (event->attr.precise_ip) {
+ case 0:
+ return -ENOENT;
+ case 1:
+ case 2:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ switch (event->attr.config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ *config = 0;
+ return 0;
+ }
+ break;
+ case PERF_TYPE_RAW:
+ switch (event->attr.config) {
+ case 0x0076:
+ *config = 0;
+ return 0;
+ case 0x00C1:
+ *config = IBS_OP_CNT_CTL;
+ return 0;
+ }
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return -EOPNOTSUPP;
+}
+
static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs;
u64 max_cnt, config;
+ int ret;

perf_ibs = get_ibs_pmu(event->attr.type);
- if (!perf_ibs)
+ if (perf_ibs) {
+ config = event->attr.config;
+ } else {
+ perf_ibs = &perf_ibs_op;
+ ret = perf_ibs_precise_event(event, &config);
+ if (ret)
+ return ret;
+ }
+
+ if (event->pmu != &perf_ibs->pmu)
return -ENOENT;

- config = event->attr.config;
if (config & ~perf_ibs->config_mask)
return -EINVAL;

@@ -437,8 +500,12 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
ibs_data.size = sizeof(u64) * size;

regs = *iregs;
- if (!check_rip || !(ibs_data.regs[2] & IBS_RIP_INVALID))
+ if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
+ regs.flags &= ~PERF_EFLAGS_EXACT;
+ } else {
instruction_pointer_set(&regs, ibs_data.regs[1]);
+ regs.flags |= PERF_EFLAGS_EXACT;
+ }

if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.size = sizeof(u32) + ibs_data.size;
--
1.7.8.4



--
Advanced Micro Devices, Inc.
Operating System Research Center


2012-05-02 11:14:57

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2] perf/x86-ibs: Precise event sampling with IBS for AMD CPUs

On Wed, 2012-05-02 at 12:33 +0200, Robert Richter wrote:
> Updated version. Level 1 and 2 are handled the same way now. Don't
> drop samples in precise level 2 if rip is invalid, instead support the
> PERF_EFLAGS_EXACT flag.
>
> No changes in other patches of
>
> [PATCH 00/12] perf/x86-ibs: Precise event sampling with IBS for AMD CPUs

Thanks!, I managed to stomp all patches on top of -tip and shall be
trying it out on my aging opteron-1216.

2012-05-04 17:53:59

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2] perf/x86-ibs: Precise event sampling with IBS for AMD CPUs

On Wed, 2012-05-02 at 13:14 +0200, Peter Zijlstra wrote:
> On Wed, 2012-05-02 at 12:33 +0200, Robert Richter wrote:
> > Updated version. Level 1 and 2 are handled the same way now. Don't
> > drop samples in precise level 2 if rip is invalid, instead support the
> > PERF_EFLAGS_EXACT flag.
> >
> > No changes in other patches of
> >
> > [PATCH 00/12] perf/x86-ibs: Precise event sampling with IBS for AMD CPUs
>
> Thanks!, I managed to stomp all patches on top of -tip and shall be
> trying it out on my aging opteron-1216.

Hmm, that box isn't reporting X86_FEATURE_IBS, a quick trip to Wikipedia
tells me this is a K8 (Santa Ana), not Fam 10h. Means I don't actually
have any hardware to test this on :-(

I'll have to throw it to Ingo then, IIRC he's got an Istanbul part.

Subject: [tip:perf/core] perf/x86-ibs: Precise event sampling with IBS for AMD CPUs

Commit-ID: 450bbd493d436f9eadd1b7828158f37559f26674
Gitweb: http://git.kernel.org/tip/450bbd493d436f9eadd1b7828158f37559f26674
Author: Robert Richter <[email protected]>
AuthorDate: Mon, 12 Mar 2012 12:54:32 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 9 May 2012 15:23:14 +0200

perf/x86-ibs: Precise event sampling with IBS for AMD CPUs

This patch adds support for precise event sampling with IBS. There are
two counting modes to count either cycles or micro-ops. If the
corresponding performance counter events (hw events) are setup with
the precise flag set, the request is redirected to the ibs pmu:

perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
perf record -a -e r076:p ... # same as -e cpu-cycles:p
perf record -a -e r0C1:p ... # use ibs op counting micro-ops

Each ibs sample contains a linear address that points to the
instruction that was causing the sample to trigger. With ibs we have
skid 0. Thus, ibs supports precise levels 1 and 2. Samples are marked
with the PERF_EFLAGS_EXACT flag set. In rare cases the rip is invalid
when IBS was not able to record the rip correctly. Then the
PERF_EFLAGS_EXACT flag is cleared and the rip is taken from pt_regs.

V2:
* don't drop samples in precise level 2 if rip is invalid, instead
support the PERF_EFLAGS_EXACT flag

Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event_amd.c | 7 +++-
arch/x86/kernel/cpu/perf_event_amd_ibs.c | 73 ++++++++++++++++++++++++++++-
2 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 589286f..6565226 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)

static int amd_pmu_hw_config(struct perf_event *event)
{
- int ret = x86_pmu_hw_config(event);
+ int ret;

+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ ret = x86_pmu_hw_config(event);
if (ret)
return ret;

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index cc1f329..34dfa85 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -145,17 +145,80 @@ static struct perf_ibs *get_ibs_pmu(int type)
return NULL;
}

+/*
+ * Use IBS for precise event sampling:
+ *
+ * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
+ * perf record -a -e r076:p ... # same as -e cpu-cycles:p
+ * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
+ *
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+ *
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ *
+ */
+static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+{
+ switch (event->attr.precise_ip) {
+ case 0:
+ return -ENOENT;
+ case 1:
+ case 2:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ switch (event->attr.config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ *config = 0;
+ return 0;
+ }
+ break;
+ case PERF_TYPE_RAW:
+ switch (event->attr.config) {
+ case 0x0076:
+ *config = 0;
+ return 0;
+ case 0x00C1:
+ *config = IBS_OP_CNT_CTL;
+ return 0;
+ }
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return -EOPNOTSUPP;
+}
+
static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs;
u64 max_cnt, config;
+ int ret;

perf_ibs = get_ibs_pmu(event->attr.type);
- if (!perf_ibs)
+ if (perf_ibs) {
+ config = event->attr.config;
+ } else {
+ perf_ibs = &perf_ibs_op;
+ ret = perf_ibs_precise_event(event, &config);
+ if (ret)
+ return ret;
+ }
+
+ if (event->pmu != &perf_ibs->pmu)
return -ENOENT;

- config = event->attr.config;
if (config & ~perf_ibs->config_mask)
return -EINVAL;

@@ -437,8 +500,12 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
ibs_data.size = sizeof(u64) * size;

regs = *iregs;
- if (!check_rip || !(ibs_data.regs[2] & IBS_RIP_INVALID))
+ if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
+ regs.flags &= ~PERF_EFLAGS_EXACT;
+ } else {
instruction_pointer_set(&regs, ibs_data.regs[1]);
+ regs.flags |= PERF_EFLAGS_EXACT;
+ }

if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.size = sizeof(u32) + ibs_data.size;