2009-07-07 20:21:12

by Vince Weaver

[permalink] [raw]
Subject: [patch] perf_counter: Add p6 PMU


OK, so I lied, I found time to throw together a p6 PMU patch.
I've tested it on a Pentium III and it seems to be working.

I'm sure the patch still needs a lot of work. I'm not sure how to
implement p6_pmu_enable_all(). There is no support for the various
cache-related counters.

The code is based on the info in the Intel Software Developer's Manual Vol
3B, with some comments and code taken from the relevant perfmon2 patches.

There's a lot of overlap between the p6, intel, and amd methods. Not sure
if it would be wise to merge up some of the common code.

I do have Pentium Pro, PII, and PIII hardware to test this on. I also
have a Yonah-based laptop, but it might have architectural perfmon version
1 despite being a p6-related core.

Let's hope this version of pine I am using doesn't do something stupid to
the whitespace in the patch.

Signed-off-by: Vince Weaver <[email protected]>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d4cf4ce..8d972d5 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -66,6 +66,44 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
};

/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0043,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
+};
+
+static u64 p6_pmu_event_map(int event)
+{
+ return p6_perfmon_event_map[event];
+}
+
+static u64 p6_pmu_raw_event(u64 event)
+{
+#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
+#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
+#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
+#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
+#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
+
+#define CORE_EVNTSEL_MASK \
+ (CORE_EVNTSEL_EVENT_MASK | \
+ CORE_EVNTSEL_UNIT_MASK | \
+ CORE_EVNTSEL_EDGE_MASK | \
+ CORE_EVNTSEL_INV_MASK | \
+ CORE_EVNTSEL_COUNTER_MASK)
+
+ return event & CORE_EVNTSEL_MASK;
+}
+
+
+/*
* Intel PerfMon v3. Used on Core2 and later.
*/
static const u64 intel_perfmon_event_map[] =
@@ -726,6 +764,12 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
return 0;
}

+static void p6_pmu_disable_all(void)
+{
+ /* p6 only has one enable register */
+ wrmsrl(MSR_P6_EVNTSEL0, 0);
+}
+
static void intel_pmu_disable_all(void)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -767,6 +811,10 @@ void hw_perf_disable(void)
return x86_pmu.disable_all();
}

+static void p6_pmu_enable_all(void)
+{
+}
+
static void intel_pmu_enable_all(void)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
@@ -846,6 +894,12 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
}

static inline void
+p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+{
+ x86_pmu_disable_counter(hwc, idx);
+}
+
+static inline void
intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
@@ -943,6 +997,17 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
err = checking_wrmsrl(hwc->config_base, ctrl_val);
}

+static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ if (cpuc->enabled)
+ x86_pmu_enable_counter(hwc, idx);
+ else
+ x86_pmu_disable_counter(hwc, idx);
+}
+
+
static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
@@ -1176,6 +1241,50 @@ static void intel_pmu_reset(void)
local_irq_restore(flags);
}

+static int p6_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_counters *cpuc;
+ struct perf_counter *counter;
+ struct hw_perf_counter *hwc;
+ int cpu, idx, handled = 0;
+ u64 val;
+
+ data.regs = regs;
+ data.addr = 0;
+
+ cpu = smp_processor_id();
+ cpuc = &per_cpu(cpu_hw_counters, cpu);
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ counter = cpuc->counters[idx];
+ hwc = &counter->hw;
+
+ val = x86_perf_counter_update(counter, hwc, idx);
+ if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+ continue;
+
+ /*
+ * counter overflow
+ */
+ handled = 1;
+ data.period = counter->hw.last_period;
+
+ if (!x86_perf_counter_set_period(counter, hwc, idx))
+ continue;
+
+ if (perf_counter_overflow(counter, 1, &data))
+ p6_pmu_disable_counter(hwc, idx);
+ }
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ return handled;
+}

/*
* This handler is triggered by the local APIC, so the APIC IRQ handling
@@ -1353,6 +1462,33 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
.priority = 1
};

+static struct x86_pmu p6_pmu = {
+ .name = "p6",
+ .handle_irq = p6_pmu_handle_irq,
+ .disable_all = p6_pmu_disable_all,
+ .enable_all = p6_pmu_enable_all,
+ .enable = p6_pmu_enable_counter,
+ .disable = p6_pmu_disable_counter,
+ .eventsel = MSR_P6_EVNTSEL0,
+ .perfctr = MSR_P6_PERFCTR0,
+ .event_map = p6_pmu_event_map,
+ .raw_event = p6_pmu_raw_event,
+ .max_events = ARRAY_SIZE(p6_perfmon_event_map),
+ .max_period = (1ULL << 31) - 1,
+ .version = 0,
+ .num_counters = 2,
+ /*
+ * Counters have 40 bits implemented. However they are designed such
+ * that bits [32-39] are sign extensions of bit 31. As such the
+ * effective width of a counter for P6-like PMU is 31 bits only.
+ *
+ *
+ * See IA-32 Intel Architecture Software developer manual Vol 3B
+ */
+ .counter_bits = 31,
+ .counter_mask = (1ULL << 31) - 1,
+};
+
static struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -1392,6 +1528,37 @@ static struct x86_pmu amd_pmu = {
.max_period = (1ULL << 47) - 1,
};

+static int p6_pmu_init(void)
+{
+ int high, low;
+
+ switch (boot_cpu_data.x86_model) {
+ case 1: /* Pentium Pro */
+ case 3:
+ case 5: /* Pentium II Deschutes */
+ case 7 ... 11:
+ break;
+ case 13:
+ /* for Pentium M, we need to check if PMU exist */
+ rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+ if (low & (1U << 7))
+ break;
+ default:
+ pr_cont("unsupported p6 CPU model %d ",
+ boot_cpu_data.x86_model);
+ return -1;
+ }
+
+ if (!cpu_has_apic) {
+ pr_info("no Local APIC, try rebooting with lapic");
+ return -1;
+ }
+
+ x86_pmu = p6_pmu;
+
+ return 0;
+}
+
static int intel_pmu_init(void)
{
union cpuid10_edx edx;
@@ -1400,6 +1567,12 @@ static int intel_pmu_init(void)
unsigned int ebx;
int version;

+ /* check for P6 processor family */
+ if (boot_cpu_data.x86 == 6) {
+ p6_pmu_init();
+ return 0;
+ }
+
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
return -ENODEV;


2009-07-07 20:35:15

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [patch] perf_counter: Add p6 PMU

On Tue, 2009-07-07 at 16:06 -0400, Vince Weaver wrote:
> OK, so I lied, I found time to throw together a p6 PMU patch.
> I've tested it on a Pentium III and it seems to be working.
>
> I'm sure the patch still needs a lot of work. I'm not sure how to
> implement p6_pmu_enable_all(). There is no support for the various
> cache-related counters.
>
> The code is based on the info in the Intel Software Developer's Manual Vol
> 3B, with some comments and code taken from the relevant perfmon2 patches.
>
> There's a lot of overlap between the p6, intel, and amd methods. Not sure
> if it would be wise to merge up some of the common code.
>
> I do have Pentium Pro, PII, and PIII hardware to test this on. I also
> have a Yonah-based laptop, but it might have architectural perfmon version
> 1 despite being a p6-related core.
>
> Let's hope this version of pine I am using doesn't do something stupid to
> the whitespace in the patch.

Awesome, I'll try it on my pentium-3 mobile tomorrow.

Thanks!

2009-07-07 23:24:48

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] perf_counter: Add p6 PMU


* Vince Weaver <[email protected]> wrote:

> OK, so I lied, I found time to throw together a p6 PMU
> patch. I've tested it on a Pentium III and it seems to be
> working.

Cool!! I've got some P3 box around somewhere so can try
this in a few days.

The patch looks really clean and simple.

Ingo

2009-07-08 11:16:12

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [patch] perf_counter: Add p6 PMU

On Tue, 2009-07-07 at 16:06 -0400, Vince Weaver wrote:
> OK, so I lied, I found time to throw together a p6 PMU patch.
> I've tested it on a Pentium III and it seems to be working.
>
> I'm sure the patch still needs a lot of work. I'm not sure how to
> implement p6_pmu_enable_all(). There is no support for the various
> cache-related counters.
>
> The code is based on the info in the Intel Software Developer's Manual Vol
> 3B, with some comments and code taken from the relevant perfmon2 patches.
>
> There's a lot of overlap between the p6, intel, and amd methods. Not sure
> if it would be wise to merge up some of the common code.
>
> I do have Pentium Pro, PII, and PIII hardware to test this on. I also
> have a Yonah-based laptop, but it might have architectural perfmon version
> 1 despite being a p6-related core.
>
> Let's hope this version of pine I am using doesn't do something stupid to
> the whitespace in the patch.
>
> Signed-off-by: Vince Weaver <[email protected]>

I did the below on top and it does indeed seem to work.. still testing.

---
Subject: perf_counter: cleanup/fix the P6 pmu support
From: Peter Zijlstra <[email protected]>
Date: Wed Jul 08 10:21:41 CEST 2009

- event 0x43:
All loads from any memory type. All stores to any memory type.
Each part of a split is counted separately. The internal logic
counts not only memory loads and stores, but also internal
retries.

doesn't sound like the right kind of event.. but then, it doesn't
have anything better either.

- s/CORE_/P6_/ for the evntsel masks

- completed p6_pmu_{enable,disable}_all()
enable_all was empty and disable_all would destroy the configuration.
Neither would touch cpuc->enabled even though p6_pmu_enable_counter()
relies on it.

- changed p6_pmu_disable_counter() to simply clear the configuration
but not touch the EN bit depending on the cpuc->enabled state.

- counters have an effective width of 32 bits: 0-31.

- fixed p6_pmu_init() error handling (my machine needs the lapic bit
so it triggered).

- remove the rmb() in userspace since ia32 machines tend to not have
lfence and will trap with SIGILL.

Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <new-submission>
---
arch/x86/kernel/cpu/perf_counter.c | 93 +++++++++++++++++++++++--------------
1 file changed, 58 insertions(+), 35 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_counter.c 2009-07-08 12:56:59.000000000 +0200
+++ linux-2.6/arch/x86/kernel/cpu/perf_counter.c 2009-07-08 12:57:53.000000000 +0200
@@ -72,7 +72,7 @@ static const u64 p6_perfmon_event_map[]
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0043,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000,
[PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
@@ -86,20 +86,20 @@ static u64 p6_pmu_event_map(int event)

static u64 p6_pmu_raw_event(u64 event)
{
-#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
-#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
-#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
-#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
-#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
+#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
+#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
+#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
+#define P6_EVNTSEL_INV_MASK 0x00800000ULL
+#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
+
+#define P6_EVNTSEL_MASK \
+ (P6_EVNTSEL_EVENT_MASK | \
+ P6_EVNTSEL_UNIT_MASK | \
+ P6_EVNTSEL_EDGE_MASK | \
+ P6_EVNTSEL_INV_MASK | \
+ P6_EVNTSEL_COUNTER_MASK)

-#define CORE_EVNTSEL_MASK \
- (CORE_EVNTSEL_EVENT_MASK | \
- CORE_EVNTSEL_UNIT_MASK | \
- CORE_EVNTSEL_EDGE_MASK | \
- CORE_EVNTSEL_INV_MASK | \
- CORE_EVNTSEL_COUNTER_MASK)
-
- return event & CORE_EVNTSEL_MASK;
+ return event & P6_EVNTSEL_MASK;
}


@@ -766,8 +766,19 @@ static int __hw_perf_counter_init(struct

static void p6_pmu_disable_all(void)
{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long val;
+
+ if (!cpuc->enabled)
+ return;
+
+ cpuc->enabled = 0;
+ barrier();
+
/* p6 only has one enable register */
- wrmsrl(MSR_P6_EVNTSEL0, 0);
+ rdmsrl(MSR_P6_EVNTSEL0, val);
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_P6_EVNTSEL0, val);
}

static void intel_pmu_disable_all(void)
@@ -813,6 +824,19 @@ void hw_perf_disable(void)

static void p6_pmu_enable_all(void)
{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long val;
+
+ if (cpuc->enabled)
+ return;
+
+ cpuc->enabled = 1;
+ barrier();
+
+ /* p6 only has one enable register */
+ rdmsrl(MSR_P6_EVNTSEL0, val);
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_P6_EVNTSEL0, val);
}

static void intel_pmu_enable_all(void)
@@ -867,16 +891,13 @@ static inline void intel_pmu_ack_status(

static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
- int err;
- err = checking_wrmsrl(hwc->config_base + idx,
+ (void)checking_wrmsrl(hwc->config_base + idx,
hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}

static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
- int err;
- err = checking_wrmsrl(hwc->config_base + idx,
- hwc->config);
+ (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
}

static inline void
@@ -884,19 +905,24 @@ intel_pmu_disable_fixed(struct hw_perf_c
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, mask;
- int err;

mask = 0xfULL << (idx * 4);

rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
- err = checking_wrmsrl(hwc->config_base, ctrl_val);
+ (void)checking_wrmsrl(hwc->config_base, ctrl_val);
}

static inline void
p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
- x86_pmu_disable_counter(hwc, idx);
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long val = ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+ if (!cpuc->enabled)
+ val = 0;
+
+ (void)checking_wrmsrl(hwc->config_base + idx, val);
}

static inline void
@@ -1247,14 +1273,13 @@ static int p6_pmu_handle_irq(struct pt_r
struct cpu_hw_counters *cpuc;
struct perf_counter *counter;
struct hw_perf_counter *hwc;
- int cpu, idx, handled = 0;
+ int idx, handled = 0;
u64 val;

data.regs = regs;
data.addr = 0;

- cpu = smp_processor_id();
- cpuc = &per_cpu(cpu_hw_counters, cpu);
+ cpuc = &__get_cpu_var(cpu_hw_counters);

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
if (!test_bit(idx, cpuc->active_mask))
@@ -1294,14 +1319,13 @@ static int intel_pmu_handle_irq(struct p
{
struct perf_sample_data data;
struct cpu_hw_counters *cpuc;
- int bit, cpu, loops;
+ int bit, loops;
u64 ack, status;

data.regs = regs;
data.addr = 0;

- cpu = smp_processor_id();
- cpuc = &per_cpu(cpu_hw_counters, cpu);
+ cpuc = &__get_cpu_var(cpu_hw_counters);

perf_disable();
status = intel_pmu_get_status();
@@ -1358,14 +1382,13 @@ static int amd_pmu_handle_irq(struct pt_
struct cpu_hw_counters *cpuc;
struct perf_counter *counter;
struct hw_perf_counter *hwc;
- int cpu, idx, handled = 0;
+ int idx, handled = 0;
u64 val;

data.regs = regs;
data.addr = 0;

- cpu = smp_processor_id();
- cpuc = &per_cpu(cpu_hw_counters, cpu);
+ cpuc = &__get_cpu_var(cpu_hw_counters);

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
if (!test_bit(idx, cpuc->active_mask))
@@ -1480,13 +1503,12 @@ static struct x86_pmu p6_pmu = {
/*
* Counters have 40 bits implemented. However they are designed such
* that bits [32-39] are sign extensions of bit 31. As such the
- * effective width of a counter for P6-like PMU is 31 bits only.
- *
+ * effective width of a counter for P6-like PMU is 32 bits only.
*
* See IA-32 Intel Architecture Software developer manual Vol 3B
*/
- .counter_bits = 31,
- .counter_mask = (1ULL << 31) - 1,
+ .counter_bits = 32,
+ .counter_mask = (1ULL << 32) - 1,
};

static struct x86_pmu intel_pmu = {
@@ -1541,17 +1563,17 @@ static int p6_pmu_init(void)
case 13:
/* for Pentium M, we need to check if PMU exist */
rdmsr(MSR_IA32_MISC_ENABLE, low, high);
- if (low & (1U << 7))
+ if (low & MSR_IA32_MISC_ENABLE_EMON)
break;
default:
pr_cont("unsupported p6 CPU model %d ",
boot_cpu_data.x86_model);
- return -1;
+ return -ENODEV;
}

if (!cpu_has_apic) {
pr_info("no Local APIC, try rebooting with lapic");
- return -1;
+ return -ENODEV;
}

x86_pmu = p6_pmu;
@@ -1568,10 +1590,8 @@ static int intel_pmu_init(void)
int version;

/* check for P6 processor family */
- if (boot_cpu_data.x86 == 6) {
- p6_pmu_init();
- return 0;
- }
+ if (boot_cpu_data.x86 == 6)
+ return p6_pmu_init();

if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
return -ENODEV;
Index: linux-2.6/tools/perf/perf.h
===================================================================
--- linux-2.6.orig/tools/perf/perf.h 2009-07-08 12:57:50.000000000 +0200
+++ linux-2.6/tools/perf/perf.h 2009-07-08 12:57:53.000000000 +0200
@@ -1,7 +1,13 @@
#ifndef _PERF_PERF_H
#define _PERF_PERF_H

-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__i386__)
+#include "../../arch/x86/include/asm/unistd.h"
+#define rmb() asm volatile("" ::: "memory")
+#define cpu_relax() asm volatile("rep; nop" ::: "memory");
+#endif
+
+#if defined(__x86_64__)
#include "../../arch/x86/include/asm/unistd.h"
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");

2009-07-08 21:34:17

by Vince Weaver

[permalink] [raw]
Subject: Re: [patch] perf_counter: Add p6 PMU


On Wed, 8 Jul 2009, Peter Zijlstra wrote:

> doesn't sound like the right kind of event.. but then, it doesn't
> have anything better either.

Is there a way to specify "invalid event"? Just setting it to 0 doesn't
work, on the Pentium Pro event 0 returns what looks like essentially
random numbers.

>
> - s/CORE_/P6_/ for the evntsel masks

thanks, I missed that.

> - int err;
> - err = checking_wrmsrl(hwc->config_base + idx,
> + (void)checking_wrmsrl(hwc->config_base + idx,

the patches that do the above seem to be unrelated to p6 support.
Did they get mixed in somehow?


The patch as it stands will break non-p6 intel perf counters, as Core2 and
atom are also cpu family 6. The attached patch takes the updated version
you sent out, and includes a fix to the detection logic.

Also the current patch gives the following warning:
arch/x86/kernel/cpu/perf_counter.c: In function p6_pmu_disable_counter:
arch/x86/kernel/cpu/perf_counter.c:925: warning: right shift count >= width of type

though I don't see where that actually happens, unless some deep macro
magic is going on.

Patch attached below. This is my first attempt at kernel development in
the modern era, so I have no idea how to do the signed off by if multiple
people are involved. Do I just put then all together?

I've tested this on a Pentium Pro and a PIII.

Vince

Signed-off-by: Vince Weaver <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>


diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d4cf4ce..700c8d3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -66,6 +66,44 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
};

/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
+};
+
+static u64 p6_pmu_event_map(int event)
+{
+ return p6_perfmon_event_map[event];
+}
+
+static u64 p6_pmu_raw_event(u64 event)
+{
+#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
+#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
+#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
+#define P6_EVNTSEL_INV_MASK 0x00800000ULL
+#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
+
+#define P6_EVNTSEL_MASK \
+ (P6_EVNTSEL_EVENT_MASK | \
+ P6_EVNTSEL_UNIT_MASK | \
+ P6_EVNTSEL_EDGE_MASK | \
+ P6_EVNTSEL_INV_MASK | \
+ P6_EVNTSEL_COUNTER_MASK)
+
+ return event & P6_EVNTSEL_MASK;
+}
+
+
+/*
* Intel PerfMon v3. Used on Core2 and later.
*/
static const u64 intel_perfmon_event_map[] =
@@ -726,6 +764,23 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
return 0;
}

+static void p6_pmu_disable_all(void)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long val;
+
+ if (!cpuc->enabled)
+ return;
+
+ cpuc->enabled = 0;
+ barrier();
+
+ /* p6 only has one enable register */
+ rdmsrl(MSR_P6_EVNTSEL0, val);
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
static void intel_pmu_disable_all(void)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -767,6 +822,23 @@ void hw_perf_disable(void)
return x86_pmu.disable_all();
}

+static void p6_pmu_enable_all(void)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long val;
+
+ if (cpuc->enabled)
+ return;
+
+ cpuc->enabled = 1;
+ barrier();
+
+ /* p6 only has one enable register */
+ rdmsrl(MSR_P6_EVNTSEL0, val);
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
static void intel_pmu_enable_all(void)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
@@ -819,16 +891,13 @@ static inline void intel_pmu_ack_status(u64 ack)

static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
- int err;
- err = checking_wrmsrl(hwc->config_base + idx,
+ (void)checking_wrmsrl(hwc->config_base + idx,
hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}

static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
- int err;
- err = checking_wrmsrl(hwc->config_base + idx,
- hwc->config);
+ (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
}

static inline void
@@ -836,13 +905,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, mask;
- int err;

mask = 0xfULL << (idx * 4);

rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
- err = checking_wrmsrl(hwc->config_base, ctrl_val);
+ (void)checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline void
+p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long val = ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+ if (!cpuc->enabled)
+ val = 0;
+
+ (void)checking_wrmsrl(hwc->config_base + idx, val);
}

static inline void
@@ -943,6 +1023,17 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
err = checking_wrmsrl(hwc->config_base, ctrl_val);
}

+static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ if (cpuc->enabled)
+ x86_pmu_enable_counter(hwc, idx);
+ else
+ x86_pmu_disable_counter(hwc, idx);
+}
+
+
static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
@@ -1176,6 +1267,49 @@ static void intel_pmu_reset(void)
local_irq_restore(flags);
}

+static int p6_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_counters *cpuc;
+ struct perf_counter *counter;
+ struct hw_perf_counter *hwc;
+ int idx, handled = 0;
+ u64 val;
+
+ data.regs = regs;
+ data.addr = 0;
+
+ cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ counter = cpuc->counters[idx];
+ hwc = &counter->hw;
+
+ val = x86_perf_counter_update(counter, hwc, idx);
+ if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+ continue;
+
+ /*
+ * counter overflow
+ */
+ handled = 1;
+ data.period = counter->hw.last_period;
+
+ if (!x86_perf_counter_set_period(counter, hwc, idx))
+ continue;
+
+ if (perf_counter_overflow(counter, 1, &data))
+ p6_pmu_disable_counter(hwc, idx);
+ }
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ return handled;
+}

/*
* This handler is triggered by the local APIC, so the APIC IRQ handling
@@ -1185,14 +1319,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
{
struct perf_sample_data data;
struct cpu_hw_counters *cpuc;
- int bit, cpu, loops;
+ int bit, loops;
u64 ack, status;

data.regs = regs;
data.addr = 0;

- cpu = smp_processor_id();
- cpuc = &per_cpu(cpu_hw_counters, cpu);
+ cpuc = &__get_cpu_var(cpu_hw_counters);

perf_disable();
status = intel_pmu_get_status();
@@ -1249,14 +1382,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
struct cpu_hw_counters *cpuc;
struct perf_counter *counter;
struct hw_perf_counter *hwc;
- int cpu, idx, handled = 0;
+ int idx, handled = 0;
u64 val;

data.regs = regs;
data.addr = 0;

- cpu = smp_processor_id();
- cpuc = &per_cpu(cpu_hw_counters, cpu);
+ cpuc = &__get_cpu_var(cpu_hw_counters);

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
if (!test_bit(idx, cpuc->active_mask))
@@ -1353,6 +1485,32 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
.priority = 1
};

+static struct x86_pmu p6_pmu = {
+ .name = "p6",
+ .handle_irq = p6_pmu_handle_irq,
+ .disable_all = p6_pmu_disable_all,
+ .enable_all = p6_pmu_enable_all,
+ .enable = p6_pmu_enable_counter,
+ .disable = p6_pmu_disable_counter,
+ .eventsel = MSR_P6_EVNTSEL0,
+ .perfctr = MSR_P6_PERFCTR0,
+ .event_map = p6_pmu_event_map,
+ .raw_event = p6_pmu_raw_event,
+ .max_events = ARRAY_SIZE(p6_perfmon_event_map),
+ .max_period = (1ULL << 31) - 1,
+ .version = 0,
+ .num_counters = 2,
+ /*
+ * Counters have 40 bits implemented. However they are designed such
+ * that bits [32-39] are sign extensions of bit 31. As such the
+ * effective width of a counter for P6-like PMU is 32 bits only.
+ *
+ * See IA-32 Intel Architecture Software developer manual Vol 3B
+ */
+ .counter_bits = 32,
+ .counter_mask = (1ULL << 32) - 1,
+};
+
static struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -1392,6 +1550,41 @@ static struct x86_pmu amd_pmu = {
.max_period = (1ULL << 47) - 1,
};

+static int p6_pmu_init(void)
+{
+ int high, low;
+
+ switch (boot_cpu_data.x86_model) {
+ case 1:
+ case 3: /* Pentium Pro */
+ case 5:
+ case 6: /* Pentium II */
+ case 7:
+ case 8:
+ case 11: /* Pentium III */
+ break;
+ case 9:
+ case 13:
+ /* for Pentium M, we need to check if PMU exist */
+ rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+ if (low & MSR_IA32_MISC_ENABLE_EMON)
+ break;
+ default:
+ pr_cont("unsupported p6 CPU model %d ",
+ boot_cpu_data.x86_model);
+ return -ENODEV;
+ }
+
+ if (!cpu_has_apic) {
+ pr_info("no Local APIC, try rebooting with lapic");
+ return -ENODEV;
+ }
+
+ x86_pmu = p6_pmu;
+
+ return 0;
+}
+
static int intel_pmu_init(void)
{
union cpuid10_edx edx;
@@ -1400,8 +1593,14 @@ static int intel_pmu_init(void)
unsigned int ebx;
int version;

- if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ /* check for P6 processor family */
+ if (boot_cpu_data.x86 == 6) {
+ return p6_pmu_init();
+ } else {
return -ENODEV;
+ }
+ }

/*
* Check whether the Architectural PerfMon supports
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index d3042a6..0696941 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -1,7 +1,13 @@
#ifndef _PERF_PERF_H
#define _PERF_PERF_H

-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__i386__)
+#include "../../arch/x86/include/asm/unistd.h"
+#define rmb() asm volatile("" ::: "memory")
+#define cpu_relax() asm volatile("rep; nop" ::: "memory");
+#endif
+
+#if defined(__x86_64__)
#include "../../arch/x86/include/asm/unistd.h"
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");

2009-07-08 21:45:44

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [patch] perf_counter: Add p6 PMU

On Wed, 2009-07-08 at 17:46 -0400, Vince Weaver wrote:
> On Wed, 8 Jul 2009, Peter Zijlstra wrote:
>
> > doesn't sound like the right kind of event.. but then, it doesn't
> > have anything better either.
>
> Is there a way to specify "invalid event"? Just setting it to 0 doesn't
> work, on the Pentium Pro event 0 returns what looks like essentially
> random numbers.

Hmm, bugger. I was assuming writing 0 to the evensel would disable the
counter. Apparently that only works for eventsel1, which would mean we
cannot run counter1 without counter0. That means we'd need to do a
counter swap at times... :/

I think we can extend __hw_perf_counter_init() to return failure when
->event_map() returns 0 or something.

> >
> > - s/CORE_/P6_/ for the evntsel masks
>
> thanks, I missed that.
>
> > - int err;
> > - err = checking_wrmsrl(hwc->config_base + idx,
> > + (void)checking_wrmsrl(hwc->config_base + idx,
>
> the patches that do the above seem to be unrelated to p6 support.
> Did they get mixed in somehow?

Yeah, random cleanups..

> The patch as it stands will break non-p6 intel perf counters, as Core2 and
> atom are also cpu family 6. The attached patch takes the updated version
> you sent out, and includes a fix to the detection logic.

Ah, thanks!

> Also the current patch gives the following warning:
> arch/x86/kernel/cpu/perf_counter.c: In function p6_pmu_disable_counter:
> arch/x86/kernel/cpu/perf_counter.c:925: warning: right shift count >= width of type

#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \
(u32)((val) >> 32))

and I passed in a unsigned long, which on ia32 is well 32 bits :-)

> though I don't see where that actually happens, unless some deep macro
> magic is going on.
>
> Patch attached below. This is my first attempt at kernel development in
> the modern era, so I have no idea how to do the signed off by if multiple
> people are involved. Do I just put then all together?

Yeah, that usually works..

Thanks, I'll have a got at it tomorrow.

2009-07-08 22:14:42

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [patch] perf_counter: Add p6 PMU

On Wed, 2009-07-08 at 23:45 +0200, Peter Zijlstra wrote:
> On Wed, 2009-07-08 at 17:46 -0400, Vince Weaver wrote:
> > On Wed, 8 Jul 2009, Peter Zijlstra wrote:
> >
> > > doesn't sound like the right kind of event.. but then, it doesn't
> > > have anything better either.
> >
> > Is there a way to specify "invalid event"? Just setting it to 0 doesn't
> > work, on the Pentium Pro event 0 returns what looks like essentially
> > random numbers.
>
> Hmm, bugger. I was assuming writing 0 to the evensel would disable the
> counter. Apparently that only works for eventsel1, which would mean we
> cannot run counter1 without counter0. That means we'd need to do a
> counter swap at times... :/

Ooh, we can instead use a counter that's specified to not count anything
for 'disabling' the counter, like event 0x2E with unit mask 0.

2009-07-09 13:25:07

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [patch] perf_counter: Add p6 PMU

On Thu, 2009-07-09 at 00:14 +0200, Peter Zijlstra wrote:
> On Wed, 2009-07-08 at 23:45 +0200, Peter Zijlstra wrote:
> > On Wed, 2009-07-08 at 17:46 -0400, Vince Weaver wrote:
> > > On Wed, 8 Jul 2009, Peter Zijlstra wrote:
> > >
> > > > doesn't sound like the right kind of event.. but then, it doesn't
> > > > have anything better either.
> > >
> > > Is there a way to specify "invalid event"? Just setting it to 0 doesn't
> > > work, on the Pentium Pro event 0 returns what looks like essentially
> > > random numbers.
> >
> > Hmm, bugger. I was assuming writing 0 to the evensel would disable the
> > counter. Apparently that only works for eventsel1, which would mean we
> > cannot run counter1 without counter0. That means we'd need to do a
> > counter swap at times... :/
>
> Ooh, we can instead use a counter that's specified to not count anything
> for 'disabling' the counter, like event 0x2E with unit mask 0.

The below seems to work..

---
arch/x86/kernel/cpu/perf_counter.c | 28 +++++++++++++++++++++++-----
1 file changed, 23 insertions(+), 5 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_counter.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_counter.c
@@ -84,6 +84,14 @@ static u64 p6_pmu_event_map(int event)
return p6_perfmon_event_map[event];
}

+/*
+ * Counter setting that is specified not to count anything.
+ * We use this to effectively disable a counter.
+ *
+ * L2_RQSTS with 0 MESI unit mask.
+ */
+#define P6_NOP_COUNTER 0x0000002EULL
+
static u64 p6_pmu_raw_event(u64 event)
{
#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
@@ -704,6 +712,7 @@ static int __hw_perf_counter_init(struct
{
struct perf_counter_attr *attr = &counter->attr;
struct hw_perf_counter *hwc = &counter->hw;
+ u64 config;
int err;

if (!x86_pmu_initialized())
@@ -756,10 +765,19 @@ static int __hw_perf_counter_init(struct

if (attr->config >= x86_pmu.max_events)
return -EINVAL;
+
/*
* The generic map:
*/
- hwc->config |= x86_pmu.event_map(attr->config);
+ config = x86_pmu.event_map(attr->config);
+
+ if (config == 0)
+ return -ENOENT;
+
+ if (config == -1LL)
+ return -EINVAL;
+
+ hwc->config |= config;

return 0;
}
@@ -767,7 +785,7 @@ static int __hw_perf_counter_init(struct
static void p6_pmu_disable_all(void)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
- unsigned long val;
+ u64 val;

if (!cpuc->enabled)
return;
@@ -917,10 +935,10 @@ static inline void
p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
- unsigned long val = ARCH_PERFMON_EVENTSEL0_ENABLE;
+ u64 val = P6_NOP_COUNTER;

- if (!cpuc->enabled)
- val = 0;
+ if (cpuc->enabled)
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;

(void)checking_wrmsrl(hwc->config_base + idx, val);
}

2009-07-10 10:41:14

by Vince Weaver

[permalink] [raw]
Subject: [tip:perfcounters/core] perf_counter: Add P6 PMU support

Commit-ID: 11d1578f9454159c43499d1d8fe8a7d728c176a3
Gitweb: http://git.kernel.org/tip/11d1578f9454159c43499d1d8fe8a7d728c176a3
Author: Vince Weaver <[email protected]>
AuthorDate: Wed, 8 Jul 2009 17:46:14 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 10 Jul 2009 10:28:26 +0200

perf_counter: Add P6 PMU support

Add basic P6 PMU support. The P6 uses the EVNTSEL0 EN bit to
enable/disable both its counters. We use this for the
global enable/disable, and clear all config bits (except EN)
to disable individual counters.

Actual ia32 hardware doesn't support lfence, so use a locked
op without side-effect to implement a full barrier.

perf stat and perf record seem to function correctly.

[[email protected]: cleanups and complete the enable/disable code]

Signed-off-by: Vince Weaver <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 227 +++++++++++++++++++++++++++++++++---
tools/perf/perf.h | 8 +-
2 files changed, 220 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 36c3dc7..1910f39 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -66,6 +66,44 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
};

/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
+};
+
+static u64 p6_pmu_event_map(int event)
+{
+ return p6_perfmon_event_map[event];
+}
+
+static u64 p6_pmu_raw_event(u64 event)
+{
+#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
+#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
+#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
+#define P6_EVNTSEL_INV_MASK 0x00800000ULL
+#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
+
+#define P6_EVNTSEL_MASK \
+ (P6_EVNTSEL_EVENT_MASK | \
+ P6_EVNTSEL_UNIT_MASK | \
+ P6_EVNTSEL_EDGE_MASK | \
+ P6_EVNTSEL_INV_MASK | \
+ P6_EVNTSEL_COUNTER_MASK)
+
+ return event & P6_EVNTSEL_MASK;
+}
+
+
+/*
* Intel PerfMon v3. Used on Core2 and later.
*/
static const u64 intel_perfmon_event_map[] =
@@ -726,6 +764,23 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
return 0;
}

+static void p6_pmu_disable_all(void)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long val;
+
+ if (!cpuc->enabled)
+ return;
+
+ cpuc->enabled = 0;
+ barrier();
+
+ /* p6 only has one enable register */
+ rdmsrl(MSR_P6_EVNTSEL0, val);
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
static void intel_pmu_disable_all(void)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -767,6 +822,23 @@ void hw_perf_disable(void)
return x86_pmu.disable_all();
}

+static void p6_pmu_enable_all(void)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long val;
+
+ if (cpuc->enabled)
+ return;
+
+ cpuc->enabled = 1;
+ barrier();
+
+ /* p6 only has one enable register */
+ rdmsrl(MSR_P6_EVNTSEL0, val);
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
static void intel_pmu_enable_all(void)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
@@ -819,16 +891,13 @@ static inline void intel_pmu_ack_status(u64 ack)

static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
- int err;
- err = checking_wrmsrl(hwc->config_base + idx,
+ (void)checking_wrmsrl(hwc->config_base + idx,
hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}

static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
- int err;
- err = checking_wrmsrl(hwc->config_base + idx,
- hwc->config);
+ (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
}

static inline void
@@ -836,13 +905,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, mask;
- int err;

mask = 0xfULL << (idx * 4);

rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
- err = checking_wrmsrl(hwc->config_base, ctrl_val);
+ (void)checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline void
+p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long val = ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+ if (!cpuc->enabled)
+ val = 0;
+
+ (void)checking_wrmsrl(hwc->config_base + idx, val);
}

static inline void
@@ -943,6 +1023,17 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
err = checking_wrmsrl(hwc->config_base, ctrl_val);
}

+static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ if (cpuc->enabled)
+ x86_pmu_enable_counter(hwc, idx);
+ else
+ x86_pmu_disable_counter(hwc, idx);
+}
+
+
static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
@@ -1176,6 +1267,49 @@ static void intel_pmu_reset(void)
local_irq_restore(flags);
}

+static int p6_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_counters *cpuc;
+ struct perf_counter *counter;
+ struct hw_perf_counter *hwc;
+ int idx, handled = 0;
+ u64 val;
+
+ data.regs = regs;
+ data.addr = 0;
+
+ cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ counter = cpuc->counters[idx];
+ hwc = &counter->hw;
+
+ val = x86_perf_counter_update(counter, hwc, idx);
+ if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+ continue;
+
+ /*
+ * counter overflow
+ */
+ handled = 1;
+ data.period = counter->hw.last_period;
+
+ if (!x86_perf_counter_set_period(counter, hwc, idx))
+ continue;
+
+ if (perf_counter_overflow(counter, 1, &data))
+ p6_pmu_disable_counter(hwc, idx);
+ }
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ return handled;
+}

/*
* This handler is triggered by the local APIC, so the APIC IRQ handling
@@ -1185,14 +1319,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
{
struct perf_sample_data data;
struct cpu_hw_counters *cpuc;
- int bit, cpu, loops;
+ int bit, loops;
u64 ack, status;

data.regs = regs;
data.addr = 0;

- cpu = smp_processor_id();
- cpuc = &per_cpu(cpu_hw_counters, cpu);
+ cpuc = &__get_cpu_var(cpu_hw_counters);

perf_disable();
status = intel_pmu_get_status();
@@ -1249,14 +1382,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
struct cpu_hw_counters *cpuc;
struct perf_counter *counter;
struct hw_perf_counter *hwc;
- int cpu, idx, handled = 0;
+ int idx, handled = 0;
u64 val;

data.regs = regs;
data.addr = 0;

- cpu = smp_processor_id();
- cpuc = &per_cpu(cpu_hw_counters, cpu);
+ cpuc = &__get_cpu_var(cpu_hw_counters);

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
if (!test_bit(idx, cpuc->active_mask))
@@ -1353,6 +1485,32 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
.priority = 1
};

+static struct x86_pmu p6_pmu = {
+ .name = "p6",
+ .handle_irq = p6_pmu_handle_irq,
+ .disable_all = p6_pmu_disable_all,
+ .enable_all = p6_pmu_enable_all,
+ .enable = p6_pmu_enable_counter,
+ .disable = p6_pmu_disable_counter,
+ .eventsel = MSR_P6_EVNTSEL0,
+ .perfctr = MSR_P6_PERFCTR0,
+ .event_map = p6_pmu_event_map,
+ .raw_event = p6_pmu_raw_event,
+ .max_events = ARRAY_SIZE(p6_perfmon_event_map),
+ .max_period = (1ULL << 31) - 1,
+ .version = 0,
+ .num_counters = 2,
+ /*
+ * Counters have 40 bits implemented. However they are designed such
+ * that bits [32-39] are sign extensions of bit 31. As such the
+ * effective width of a counter for P6-like PMU is 32 bits only.
+ *
+ * See IA-32 Intel Architecture Software developer manual Vol 3B
+ */
+ .counter_bits = 32,
+ .counter_mask = (1ULL << 32) - 1,
+};
+
static struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -1392,6 +1550,41 @@ static struct x86_pmu amd_pmu = {
.max_period = (1ULL << 47) - 1,
};

+static int p6_pmu_init(void)
+{
+ int high, low;
+
+ switch (boot_cpu_data.x86_model) {
+ case 1:
+ case 3: /* Pentium Pro */
+ case 5:
+ case 6: /* Pentium II */
+ case 7:
+ case 8:
+ case 11: /* Pentium III */
+ break;
+ case 9:
+ case 13:
+ /* for Pentium M, we need to check if PMU exist */
+ rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+ if (low & MSR_IA32_MISC_ENABLE_EMON)
+ break;
+ default:
+ pr_cont("unsupported p6 CPU model %d ",
+ boot_cpu_data.x86_model);
+ return -ENODEV;
+ }
+
+ if (!cpu_has_apic) {
+ pr_info("no Local APIC, try rebooting with lapic");
+ return -ENODEV;
+ }
+
+ x86_pmu = p6_pmu;
+
+ return 0;
+}
+
static int intel_pmu_init(void)
{
union cpuid10_edx edx;
@@ -1400,8 +1593,14 @@ static int intel_pmu_init(void)
unsigned int ebx;
int version;

- if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ /* check for P6 processor family */
+ if (boot_cpu_data.x86 == 6) {
+ return p6_pmu_init();
+ } else {
return -ENODEV;
+ }
+ }

/*
* Check whether the Architectural PerfMon supports
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 27887c9..53bb955 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -1,7 +1,13 @@
#ifndef _PERF_PERF_H
#define _PERF_PERF_H

-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__i386__)
+#include "../../arch/x86/include/asm/unistd.h"
+#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define cpu_relax() asm volatile("rep; nop" ::: "memory");
+#endif
+
+#if defined(__x86_64__)
#include "../../arch/x86/include/asm/unistd.h"
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");