Hello
the following patchset enables overflow interrupts on Knights Corner,
the initial KNC PMU driver that was included in 3.7-rc1 did not
support this.
The first patch should be straightforward.
The second should be too, but it relates to a problem with the p6 PMU
that I brought up in a separate thread.
The third patch copies code over from the perf_event_intel.c interrupt
handler. Unfortunately KNC and x86 architectural perfmon use
different MSR numbers. The proper fix might be to make this
generic and have function vectors for the status/ack functions,
but since they are inline and probably performance critical
I took the easy way out and just duplicated the code.
Thanks,
Vince Weaver
[email protected]
--
char x,_,*O="yyyyl1f41mm65mm5OU5m5_55m5555mmGUml1ED1ooG_ol\\lDClN:eml9niiobTQO"
"m8dlYoo=HUmmaNEnCDY[l1G@7oo975l1>UImmSG3m5`@Om5<OGm5__5mm4S_l1DTiyyyy";void X(
int x){while(x>-1)printf("\033[4%dm ",x%2*7),x-=2;}int main(){while(*O){for(x=
30;x--;)_=*O-48,X(_>>6?59:(_>>x%6)&1),x*=_<64,O+=!(x%6);X(5),X(0),puts("");}}
Early versions of KNC chips have a bug where bits above 32 were
not properly set. We worked around this by only using the bottom
32 bits (out of 40 that should be available).
It turns out this workaround breaks overflow handling.
The buggy silicon will in theory never be used in production systems,
so remove this workaround so we get proper overflow support.
Signed-off-by: Vince Weaver <[email protected]>
diff -ur linux-3.7-rc1.orig/arch/x86/kernel/cpu/perf_event_knc.c linux-3.7-rc1/arch/x86/kernel/cpu/perf_event_knc.c
--- linux-3.7-rc1.orig/arch/x86/kernel/cpu/perf_event_knc.c 2012-10-14 17:41:04.000000000 -0400
+++ linux-3.7-rc1/arch/x86/kernel/cpu/perf_event_knc.c 2012-10-17 11:50:08.688394147 -0400
@@ -226,12 +226,11 @@
.event_map = knc_pmu_event_map,
.max_events = ARRAY_SIZE(knc_perfmon_event_map),
.apic = 1,
- .max_period = (1ULL << 31) - 1,
+ .max_period = (1ULL << 39) - 1,
.version = 0,
.num_counters = 2,
- /* in theory 40 bits, early silicon is buggy though */
- .cntval_bits = 32,
- .cntval_mask = (1ULL << 32) - 1,
+ .cntval_bits = 40,
+ .cntval_mask = (1ULL << 40) - 1,
.get_event_constraints = x86_get_event_constraints,
.event_constraints = knc_event_constraints,
.format_attrs = intel_knc_formats_attr,
x86_pmu.enable() is called from x86_pmu_enable() with cpuc->enabled
set to 0. This means we weren't re-enabling the counters after
a context switch.
This patch just removes the check, as it should't be necessary
(and the equivelent x86_ generic code does not have the checks).
The origin of this problem is the KNC driver being based on the P6 one.
The P6 driver also has this issue, but works anyway due to various
lucky accidents.
Signed-off-by: Vince Weaver <[email protected]>
diff -ur linux-3.7-rc1.orig/arch/x86/kernel/cpu/perf_event_knc.c linux-3.7-rc1/arch/x86/kernel/cpu/perf_event_knc.c
--- linux-3.7-rc1.orig/arch/x86/kernel/cpu/perf_event_knc.c 2012-10-14 17:41:04.000000000 -0400
+++ linux-3.7-rc1/arch/x86/kernel/cpu/perf_event_knc.c 2012-10-17 11:48:22.952394235 -0400
@@ -173,26 +173,22 @@
static inline void
knc_pmu_disable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
- if (cpuc->enabled)
- val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
}
static void knc_pmu_enable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
- if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
}
Although based on the P6 design, the interrupt mechnanism
for KNC more closely resembles the intel architectural perfmon one.
We can't just re-use that code though, because KNC has different
MSR numbers for the status and ack registers.
In this case we just cut-and paste from perf_event_intel.c
with some minor changes, as it looks like it would not be
worth the trouble to change that code to be MSR-configurable.
Signed-off-by: Vince Weaver <[email protected]>
diff -ur linux-3.7-rc1.orig/arch/x86/kernel/cpu/perf_event_knc.c linux-3.7-rc1/arch/x86/kernel/cpu/perf_event_knc.c
--- linux-3.7-rc1.orig/arch/x86/kernel/cpu/perf_event_knc.c 2012-10-14 17:41:04.000000000 -0400
+++ linux-3.7-rc1/arch/x86/kernel/cpu/perf_event_knc.c 2012-10-17 12:11:48.604393067 -0400
@@ -3,6 +3,8 @@
#include <linux/perf_event.h>
#include <linux/types.h>
+#include <asm/hardirq.h>
+
#include "perf_event.h"
static const u64 knc_perfmon_event_map[] =
@@ -197,6 +199,80 @@
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
}
+static inline u64 knc_pmu_get_status(void)
+{
+ u64 status;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
+
+ return status;
+}
+
+static inline void knc_pmu_ack_status(u64 ack)
+{
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
+}
+
+static int knc_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ int bit, loops;
+ u64 status;
+ int handled=0;
+
+ cpuc = &__get_cpu_var(cpu_hw_events);
+
+ knc_pmu_disable_all();
+
+ status = knc_pmu_get_status();
+ if (!status) {
+ knc_pmu_enable_all(0);
+ return handled;
+ }
+
+ loops = 0;
+again:
+ knc_pmu_ack_status(status);
+ if (++loops > 100) {
+ WARN_ONCE(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
+ goto done;
+ }
+
+ inc_irq_stat(apic_perf_irqs);
+
+ for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+ struct perf_event *event = cpuc->events[bit];
+
+ handled++;
+
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+
+ if (!intel_pmu_save_and_restart(event))
+ continue;
+
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+
+ /*
+ * Repeat if there is more work to be done:
+ */
+ status = knc_pmu_get_status();
+ if (status)
+ goto again;
+
+done:
+ knc_pmu_enable_all(0);
+
+ return handled;
+}
+
+
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -214,7 +290,7 @@
static __initconst struct x86_pmu knc_pmu = {
.name = "knc",
- .handle_irq = x86_pmu_handle_irq,
+ .handle_irq = knc_pmu_handle_irq,
.disable_all = knc_pmu_disable_all,
.enable_all = knc_pmu_enable_all,
.enable = knc_pmu_enable_event,
* Vince Weaver <[email protected]> wrote:
> Hello
>
> the following patchset enables overflow interrupts on Knights Corner,
> the initial KNC PMU driver that was included in 3.7-rc1 did not
> support this.
Nice!
> The first patch should be straightforward.
> The second should be too, but it relates to a problem with the p6 PMU
> that I brought up in a separate thread.
> The third patch copies code over from the perf_event_intel.c interrupt
> handler. Unfortunately KNC and x86 architectural perfmon use
> different MSR numbers. The proper fix might be to make this
> generic and have function vectors for the status/ack functions,
> but since they are inline and probably performance critical
> I took the easy way out and just duplicated the code.
The duplication looks pretty limited to me, so I don't think
it's a problem.
How well tested is this on real hardware and how robust is the
hardware with this? Since it's a new PMU driver for v3.7, and if
these are reasonably well tested, then we could send these to
Linus via perf/urgent, so that they don't miss and have to wait
all the way to v3.8.
Thanks,
Ingo
On Wed, 17 Oct 2012, Ingo Molnar wrote:
> How well tested is this on real hardware and how robust is the
> hardware with this? Since it's a new PMU driver for v3.7, and if
> these are reasonably well tested, then we could send these to
> Linus via perf/urgent, so that they don't miss and have to wait
> all the way to v3.8.
You probably won't like this answer.
The patches have been tested against the 2.6.34 kernel that Intel provides
for KNC (found here http://software.intel.com/en-us/forums/topic/278102 )
perf stat, perf record, and PAPI have all been tested and work.
The patches I've sent are the 2.6.34 ones forward ported to the 3.7-rc1
kernel. I haven't been able to run a 3.7-rc kernel on the KNC board,
because there are other patches to mainline needed to get KNC running and
they'd all have to be forward-ported from 2.6.34.
Since the knc pmu driver closely models the p6 one, it was straightforward
to use it as a basis for the forward port. Everything _should_ work but
I've only compile tested.
Vince
[email protected]
* Vince Weaver <[email protected]> wrote:
> On Wed, 17 Oct 2012, Ingo Molnar wrote:
>
> > How well tested is this on real hardware and how robust is the
> > hardware with this? Since it's a new PMU driver for v3.7, and if
> > these are reasonably well tested, then we could send these to
> > Linus via perf/urgent, so that they don't miss and have to wait
> > all the way to v3.8.
>
> You probably won't like this answer.
>
> The patches have been tested against the 2.6.34 kernel that
> Intel provides for KNC (found here
> http://software.intel.com/en-us/forums/topic/278102 )
>
> perf stat, perf record, and PAPI have all been tested and
> work.
>
> The patches I've sent are the 2.6.34 ones forward ported to
> the 3.7-rc1 kernel. I haven't been able to run a 3.7-rc
> kernel on the KNC board, because there are other patches to
> mainline needed to get KNC running and they'd all have to be
> forward-ported from 2.6.34.
>
> Since the knc pmu driver closely models the p6 one, it was
> straightforward to use it as a basis for the forward port.
> Everything _should_ work but I've only compile tested.
Ok. As long as it's all identical I'd still be inclined to
include it in v3.7, to reduce version skew. If the v3.7-rc1
kernel won't even boot it's not like we'll be able to further
regress it.
Thanks,
Ingo
On Wed, 17 Oct 2012, Ingo Molnar wrote:
> > Since the knc pmu driver closely models the p6 one, it was
> > straightforward to use it as a basis for the forward port.
> > Everything _should_ work but I've only compile tested.
>
> Ok. As long as it's all identical I'd still be inclined to
> include it in v3.7, to reduce version skew. If the v3.7-rc1
> kernel won't even boot it's not like we'll be able to further
> regress it.
yes, it definitely shouldn't cause any regressions, and in fact the
version in 3.7-rc1 might not work properly without patch2 in this
set applied due to the cpuc->enabled problem that it took us a while to
track down.
I looked into getting a 3.7 kernel up and going on the KNC board, but
the diff between Intel's release and stock 2.6.34.11 has 70k
lines. Much of that is kdb, but there's also a lot of low-level
changes too, some of it due to the fact that KNC is 64-bit x86 but has no
support for SSE (so you need to handle that properly or none of your
userspace will run).
Thanks,
Vince Weaver
[email protected]
* Vince Weaver <[email protected]> wrote:
> I looked into getting a 3.7 kernel up and going on the KNC
> board, but the diff between Intel's release and stock
> 2.6.34.11 has 70k lines. Much of that is kdb, but there's
> also a lot of low-level changes too, some of it due to the
> fact that KNC is 64-bit x86 but has no support for SSE (so you
> need to handle that properly or none of your userspace will
> run).
Getting that support upstream would definitely be useful, so if
you feel so inclined splitting out (or creating anew) the
required patches ...
I don't know how it's structured, but making the !SSE
distinction runtime would be strongly preferred over any
compile-time .config switchery. Otherwise, the guiding principle
is that we'll do whatever it takes to support the hardware, with
the reasonably best possible cleanliness.
Thanks,
Ingo
Commit-ID: ae5ba47a990a18c869d66916fd72fb334c45cf91
Gitweb: http://git.kernel.org/tip/ae5ba47a990a18c869d66916fd72fb334c45cf91
Author: Vince Weaver <[email protected]>
AuthorDate: Wed, 17 Oct 2012 13:03:21 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 24 Oct 2012 12:00:48 +0200
perf/x86: Make Intel KNC use full 40-bit width of counters
Early versions of Intel KNC chips have a bug where bits above 32
were not properly set. We worked around this by only using the
bottom 32 bits (out of 40 that should be available).
It turns out this workaround breaks overflow handling.
The buggy silicon will in theory never be used in production
systems, so remove this workaround so we get proper overflow
support.
Signed-off-by: Vince Weaver <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: [email protected]
Cc: Meadows Lawrence F <[email protected]>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1210171302140.23243@vincent-weaver-1.um.maine.edu
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event_knc.c | 7 +++----
1 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
index 7c46bfd..73bcfbd 100644
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -226,12 +226,11 @@ static __initconst struct x86_pmu knc_pmu = {
.event_map = knc_pmu_event_map,
.max_events = ARRAY_SIZE(knc_perfmon_event_map),
.apic = 1,
- .max_period = (1ULL << 31) - 1,
+ .max_period = (1ULL << 39) - 1,
.version = 0,
.num_counters = 2,
- /* in theory 40 bits, early silicon is buggy though */
- .cntval_bits = 32,
- .cntval_mask = (1ULL << 32) - 1,
+ .cntval_bits = 40,
+ .cntval_mask = (1ULL << 40) - 1,
.get_event_constraints = x86_get_event_constraints,
.event_constraints = knc_event_constraints,
.format_attrs = intel_knc_formats_attr,
Commit-ID: 7d011962afbaa6e572cd8e0dbb7abf773e166e64
Gitweb: http://git.kernel.org/tip/7d011962afbaa6e572cd8e0dbb7abf773e166e64
Author: Vince Weaver <[email protected]>
AuthorDate: Wed, 17 Oct 2012 13:04:33 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 24 Oct 2012 12:00:49 +0200
perf/x86: Remove cpuc->enable check on Intl KNC event enable/disable
x86_pmu.enable() is called from x86_pmu_enable() with
cpuc->enabled set to 0. This means we weren't re-enabling the
counters after a context switch.
This patch just removes the check, as it should't be necessary
(and the equivelent x86_ generic code does not have the checks).
The origin of this problem is the KNC driver being based on the
P6 one. The P6 driver also has this issue, but works anyway
due to various lucky accidents.
Signed-off-by: Vince Weaver <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: [email protected]
Cc: Meadows
Cc: Lawrence F <[email protected]>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1210171303290.23243@vincent-weaver-1.um.maine.edu
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event_knc.c | 8 ++------
1 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
index 73bcfbd..f3a2af4 100644
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -173,26 +173,22 @@ static void knc_pmu_enable_all(int added)
static inline void
knc_pmu_disable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
- if (cpuc->enabled)
- val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
}
static void knc_pmu_enable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
- if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
}
Commit-ID: e4074b3049f99c6ad6e1a33e6d93d8ec0652e2c1
Gitweb: http://git.kernel.org/tip/e4074b3049f99c6ad6e1a33e6d93d8ec0652e2c1
Author: Vince Weaver <[email protected]>
AuthorDate: Wed, 17 Oct 2012 13:05:45 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 24 Oct 2012 12:00:49 +0200
perf/x86: Enable overflow on Intel KNC with a custom knc_pmu_handle_irq()
Although based on the Intel P6 design, the interrupt mechnanism
for KNC more closely resembles the Intel architectural
perfmon one.
We can't just re-use that code though, because KNC has different
MSR numbers for the status and ack registers.
In this case we just cut-and paste from perf_event_intel.c
with some minor changes, as it looks like it would not be
worth the trouble to change that code to be MSR-configurable.
Signed-off-by: Vince Weaver <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: [email protected]
Cc: Meadows Lawrence F <[email protected]>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1210171304410.23243@vincent-weaver-1.um.maine.edu
[ Small stylistic edits. ]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event_knc.c | 78 +++++++++++++++++++++++++++++++++-
1 files changed, 77 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
index f3a2af4..4b7731b 100644
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -3,6 +3,8 @@
#include <linux/perf_event.h>
#include <linux/types.h>
+#include <asm/hardirq.h>
+
#include "perf_event.h"
static const u64 knc_perfmon_event_map[] =
@@ -193,6 +195,80 @@ static void knc_pmu_enable_event(struct perf_event *event)
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
}
+static inline u64 knc_pmu_get_status(void)
+{
+ u64 status;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
+
+ return status;
+}
+
+static inline void knc_pmu_ack_status(u64 ack)
+{
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
+}
+
+static int knc_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ int handled = 0;
+ int bit, loops;
+ u64 status;
+
+ cpuc = &__get_cpu_var(cpu_hw_events);
+
+ knc_pmu_disable_all();
+
+ status = knc_pmu_get_status();
+ if (!status) {
+ knc_pmu_enable_all(0);
+ return handled;
+ }
+
+ loops = 0;
+again:
+ knc_pmu_ack_status(status);
+ if (++loops > 100) {
+ WARN_ONCE(1, "perf: irq loop stuck!\n");
+ perf_event_print_debug();
+ goto done;
+ }
+
+ inc_irq_stat(apic_perf_irqs);
+
+ for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+ struct perf_event *event = cpuc->events[bit];
+
+ handled++;
+
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+
+ if (!intel_pmu_save_and_restart(event))
+ continue;
+
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+
+ /*
+ * Repeat if there is more work to be done:
+ */
+ status = knc_pmu_get_status();
+ if (status)
+ goto again;
+
+done:
+ knc_pmu_enable_all(0);
+
+ return handled;
+}
+
+
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -210,7 +286,7 @@ static struct attribute *intel_knc_formats_attr[] = {
static __initconst struct x86_pmu knc_pmu = {
.name = "knc",
- .handle_irq = x86_pmu_handle_irq,
+ .handle_irq = knc_pmu_handle_irq,
.disable_all = knc_pmu_disable_all,
.enable_all = knc_pmu_enable_all,
.enable = knc_pmu_enable_event,