From: Andi Kleen <[email protected]>
Very similar to Sandy Bridge, but there is no PEBS problem.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/kernel/cpu/perf_event_intel.c | 9 ++++++++-
1 files changed, 8 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..1249c56 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1698,6 +1698,7 @@ __init int intel_pmu_init(void)
union cpuid10_ebx ebx;
unsigned int unused;
int version;
+ char *name;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
switch (boot_cpu_data.x86) {
@@ -1839,9 +1840,15 @@ __init int intel_pmu_init(void)
pr_cont("Westmere events, ");
break;
+ case 58: /* IvyBridge */
+ name = "Ivy";
+ goto snb_ivb_common;
+
case 42: /* SandyBridge */
x86_add_quirk(intel_sandybridge_quirk);
case 45: /* SandyBridge, "Romely-EP" */
+ name = "Sandy";
+ snb_ivb_common:
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -1861,7 +1868,7 @@ __init int intel_pmu_init(void)
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
- pr_cont("SandyBridge events, ");
+ pr_cont("%sBridge events, ", name);
break;
default:
--
1.7.7.6
From: Andi Kleen <[email protected]>
Even with precise profiling Intel CPUs have a "skid". The sample
triggers a few cycles later than the instruction, so in some
cases there can be systematic errors where expensive instructions never
show up in the profile log.
Sandy Bridge added a new PDIR instruction retired event that randomizes
the sampling slightly. This corrects for systematic errors, so that
you should in most cases see the correct instruction getting profile hits.
Unfortunately the SandyBridge version could only work with a otherwise
quiescent CPU and was difficult to use. But now on IvyBridge this
restriction is gone and can be more widely used.
This only works for retired instructions.
I enabled it -- somewhat arbitarily -- for two 'p's or more.
To use it
perf record -e instructions:pp ...
This provides a more precise alternative to the usual cycles:pp,
however it will not account for expensive instructions.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/kernel/cpu/perf_event_intel.c | 23 +++++++++++++++++++++++
1 files changed, 23 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 1249c56..b4eb9d9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1357,6 +1357,28 @@ static int intel_pmu_hw_config(struct perf_event *event)
return 0;
}
+static int pdir_hw_config(struct perf_event *event)
+{
+ int err = intel_pmu_hw_config(event);
+
+ if (err)
+ return err;
+
+ /*
+ * Use the PDIR instruction retired counter for two 'p's.
+ * This will randomize samples slightly and avoid some systematic
+ * measurement errors.
+ * Only works for retired cycles.
+ */
+ if (event->attr.precise_ip >= 2 &&
+ (event->hw.config & X86_RAW_EVENT_MASK) == 0xc0) {
+ u64 pdir_event = X86_CONFIG(.event=0xc0, .umask=1);
+ event->hw.config = pdir_event | (event->hw.config & ~X86_RAW_EVENT_MASK);
+ }
+
+ return 0;
+}
+
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
if (x86_pmu.guest_get_msrs)
@@ -1842,6 +1864,7 @@ __init int intel_pmu_init(void)
case 58: /* IvyBridge */
name = "Ivy";
+ x86_pmu.hw_config = pdir_hw_config;
goto snb_ivb_common;
case 42: /* SandyBridge */
--
1.7.7.6
On Fri, Jun 8, 2012 at 12:29 AM, Andi Kleen <[email protected]> wrote:
> From: Andi Kleen <[email protected]>
>
> Very similar to Sandy Bridge, but there is no PEBS problem.
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/kernel/cpu/perf_event_intel.c | 9 ++++++++-
> 1 files changed, 8 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 166546e..1249c56 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1698,6 +1698,7 @@ __init int intel_pmu_init(void)
> union cpuid10_ebx ebx;
> unsigned int unused;
> int version;
> + char *name;
>
> if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
> switch (boot_cpu_data.x86) {
> @@ -1839,9 +1840,15 @@ __init int intel_pmu_init(void)
> pr_cont("Westmere events, ");
> break;
>
> + case 58: /* IvyBridge */
> + name = "Ivy";
> + goto snb_ivb_common;
> +
> case 42: /* SandyBridge */
> x86_add_quirk(intel_sandybridge_quirk);
> case 45: /* SandyBridge, "Romely-EP" */
> + name = "Sandy";
> + snb_ivb_common:
> memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
> sizeof(hw_cache_event_ids));
>
> @@ -1861,7 +1868,7 @@ __init int intel_pmu_init(void)
> intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
> X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
>
But as far as I know for the other generic stall event:
/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
Event 0xb1, umask 0x1 does not exist on IVB. Only the
UOPS_DISPATCHED.CORE version does.
Given I don't have a definition for STALLED_CYCLES_BACKEND, I don't
know if measuring across
both HT thread would fit the bill. I would advise you keep this one
undefined on IVB otherwise this
may lead to confusion when comparing with SNB.
> - pr_cont("SandyBridge events, ");
> + pr_cont("%sBridge events, ", name);
> break;
>
> default:
> --
> 1.7.7.6
>
> But as far as I know for the other generic stall event:
> /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
> intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
> X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
Okay. I'll just undefine it.
-Andi
--
[email protected] -- Speaking for myself only
Also IVB needs its own pebs_constraint table as there is a new PEBS
event (0xd3):
+struct event_constraint intel_ivb_pebs_event_constraints[] = {
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.ALL */
+ INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /*
MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /*
MEM_LOAD_UOPS_LLC_MISS.LOCAL_DRAM */
+ EVENT_CONSTRAINT_END
+};
+
On Fri, Jun 8, 2012 at 12:43 AM, Andi Kleen <[email protected]> wrote:
>> But as far as I know for the other generic stall event:
>> /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
>> intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
>> X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
>
> Okay. I'll just undefine it.
>
> -Andi
>
>
> --
> [email protected] -- Speaking for myself only
On Fri, Jun 08, 2012 at 09:42:42AM +0200, Stephane Eranian wrote:
> Also IVB needs its own pebs_constraint table as there is a new PEBS
> event (0xd3):
Ok. Will add.
-Andi
>
> +struct event_constraint intel_ivb_pebs_event_constraints[] = {
> + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.ALL */
> + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
> + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
> + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
> + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
> + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
> + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
> + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /*
> MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
> + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /*
> MEM_LOAD_UOPS_LLC_MISS.LOCAL_DRAM */
> + EVENT_CONSTRAINT_END
> +};
> +