The following patchset enables 4 additional performance counters in
AMD family 15h processors that counts northbridge events -- such as
number of DRAM accesses.
This patchset is based on previous work done by Robert Richter
<[email protected]> :
https://lkml.org/lkml/2012/6/19/324
The main differences are:
* The northbridge counters are indexed contiguously right above the
core performance counters.
* MSR address offset calculations are moved to architecture specific
files.
* Interrups are set up to be delivered only to a single core.
V3:
Addressed the following feedback/comments from Robert's review
* https://lkml.org/lkml/2012/11/16/484
* https://lkml.org/lkml/2012/11/26/162
V2:
Separate out Robert's patches, and add properly ordered certificate of
origins.
Jacob Shin (3):
perf, x86: Move MSR address offset calculation to architecture
specific files
perf, amd: Enable northbridge performance counters on AMD family 15h
perf, amd: Use proper naming scheme for AMD bit field definitions
Robert Richter (2):
perf, amd: Rework northbridge event constraints handler
perf, amd: Generalize northbridge constraints code for family 15h
arch/x86/include/asm/cpufeature.h | 2 +
arch/x86/include/asm/msr-index.h | 2 +
arch/x86/include/asm/perf_event.h | 13 +-
arch/x86/kernel/cpu/perf_event.h | 21 +--
arch/x86/kernel/cpu/perf_event_amd.c | 269 ++++++++++++++++++++++++----------
5 files changed, 211 insertions(+), 96 deletions(-)
--
1.7.9.5
Update these AMD bit field names to be consistent with naming
convention followed by the rest of the file.
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/include/asm/perf_event.h | 4 ++--
arch/x86/kernel/cpu/perf_event_amd.c | 8 ++++----
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 63aba8f..57cb634 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -30,8 +30,8 @@
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
-#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40)
-#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41)
+#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
+#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37
#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 5e3a6a3..0faa35f 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -188,9 +188,9 @@ static int amd_core_hw_config(struct perf_event *event)
event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
ARCH_PERFMON_EVENTSEL_OS);
else if (event->attr.exclude_host)
- event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
+ event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
else if (event->attr.exclude_guest)
- event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
+ event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
return 0;
}
@@ -429,7 +429,7 @@ static void amd_pmu_cpu_starting(int cpu)
struct amd_nb *nb;
int i, nb_id;
- cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
if (boot_cpu_data.x86_max_cores < 2)
return;
@@ -782,7 +782,7 @@ void amd_pmu_disable_virt(void)
* SVM is disabled the Guest-only bits still gets set and the counter
* will not count anything.
*/
- cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
/* Reload all events */
x86_pmu_disable_all();
--
1.7.9.5
From: Robert Richter <[email protected]>
Generalize northbridge constraints code for family 10h so that later
we can reuse the same code path with other AMD processor families that
have the same northbridge event constraints.
Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/kernel/cpu/perf_event_amd.c | 43 ++++++++++++++++++++--------------
1 file changed, 25 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index d60c5c7..04ef43f 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -188,20 +188,13 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
return nb && nb->nb_id != -1;
}
-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
+static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
int i;
/*
- * only care about NB events
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
- return;
-
- /*
* need to scan whole list because event may not have
* been assigned during scheduling
*
@@ -247,12 +240,13 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
*
* Given that resources are allocated (cmpxchg), they must be
* eventually freed for others to use. This is accomplished by
- * calling amd_put_event_constraints().
+ * calling __amd_put_nb_event_constraints()
*
* Non NB events are not impacted by this restriction.
*/
static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+ struct event_constraint *c)
{
struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
@@ -260,12 +254,6 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
int idx, new = -1;
/*
- * if not NB event or no NB, then no constraints
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
- return &unconstrained;
-
- /*
* detect if already present, if so reuse
*
* cannot merge with actual allocation
@@ -275,7 +263,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
* because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable()
*/
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, c->idxmsk, X86_PMC_IDX_MAX) {
if (new == -1 || hwc->idx == idx)
/* assign free slot, prefer hwc->idx */
old = cmpxchg(nb->owners + idx, NULL, event);
@@ -391,6 +379,25 @@ static void amd_pmu_cpu_dead(int cpu)
}
}
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ /*
+ * if not NB event or no NB, then no constraints
+ */
+ if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
+ return &unconstrained;
+
+ return __amd_get_nb_event_constraints(cpuc, event, &unconstrained);
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
+ __amd_put_nb_event_constraints(cpuc, event);
+}
+
PMU_FORMAT_ATTR(event, "config:0-7,32-35");
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
--
1.7.9.5
Move counter index to MSR address offset calculation to architecture
specific files. This prepares the way for perf_event_amd to enable
counter addresses that are not contiguous -- for example AMD Family
15h processors have 6 core performance counters starting at 0xc0010200
and 4 northbridge performance counters starting at 0xc0010240.
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/kernel/cpu/perf_event.h | 21 +++++---------------
arch/x86/kernel/cpu/perf_event_amd.c | 35 ++++++++++++++++++++++++++++++++++
2 files changed, 40 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 271d257..aacf025 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -325,6 +325,7 @@ struct x86_pmu {
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
unsigned perfctr;
+ int (*addr_offset)(int index);
u64 (*event_map)(int);
int max_events;
int num_counters;
@@ -444,28 +445,16 @@ extern u64 __read_mostly hw_cache_extra_regs
u64 x86_perf_event_update(struct perf_event *event);
-static inline int x86_pmu_addr_offset(int index)
-{
- int offset;
-
- /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
- alternative_io(ASM_NOP2,
- "shll $1, %%eax",
- X86_FEATURE_PERFCTR_CORE,
- "=a" (offset),
- "a" (index));
-
- return offset;
-}
-
static inline unsigned int x86_pmu_config_addr(int index)
{
- return x86_pmu.eventsel + x86_pmu_addr_offset(index);
+ return x86_pmu.eventsel +
+ (x86_pmu.addr_offset ? x86_pmu.addr_offset(index) : index);
}
static inline unsigned int x86_pmu_event_addr(int index)
{
- return x86_pmu.perfctr + x86_pmu_addr_offset(index);
+ return x86_pmu.perfctr +
+ (x86_pmu.addr_offset ? x86_pmu.addr_offset(index) : index);
}
int x86_setup_perfctr(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 04ef43f..d6e3337 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,6 +132,40 @@ static u64 amd_pmu_event_map(int hw_event)
return amd_perfmon_event_map[hw_event];
}
+/*
+ * Previously calculated offsets
+ */
+static unsigned int addr_offsets[X86_PMC_IDX_MAX] __read_mostly;
+
+/*
+ * Legacy CPUs:
+ * 4 counters starting at 0xc0010000 each offset by 1
+ *
+ * CPUs with core performance counter extensions:
+ * 6 counters starting at 0xc0010200 each offset by 2
+ */
+static inline int amd_pmu_addr_offset(int index)
+{
+ int offset;
+
+ if (!index)
+ return index;
+
+ offset = addr_offsets[index];
+
+ if (offset)
+ return offset;
+
+ if (!cpu_has_perfctr_core)
+ offset = index;
+ else
+ offset = index << 1;
+
+ addr_offsets[index] = offset;
+
+ return offset;
+}
+
static int amd_pmu_hw_config(struct perf_event *event)
{
int ret;
@@ -570,6 +604,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.schedule_events = x86_schedule_events,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
+ .addr_offset = amd_pmu_addr_offset,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
--
1.7.9.5
From: Robert Richter <[email protected]>
Code simplification. No functional changes.
Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/kernel/cpu/perf_event_amd.c | 68 +++++++++++++---------------------
1 file changed, 26 insertions(+), 42 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 4528ae7..d60c5c7 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -256,9 +256,8 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
- struct perf_event *old = NULL;
- int max = x86_pmu.num_counters;
- int i, j, k = -1;
+ struct perf_event *old;
+ int idx, new = -1;
/*
* if not NB event or no NB, then no constraints
@@ -276,48 +275,33 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
* because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable()
*/
- for (i = 0; i < max; i++) {
- /*
- * keep track of first free slot
- */
- if (k == -1 && !nb->owners[i])
- k = i;
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (new == -1 || hwc->idx == idx)
+ /* assign free slot, prefer hwc->idx */
+ old = cmpxchg(nb->owners + idx, NULL, event);
+ else if (nb->owners[idx] == event)
+ /* event already present */
+ old = event;
+ else
+ continue;
+
+ if (old && old != event)
+ continue;
+
+ /* reassign to this slot */
+ if (new != -1)
+ cmpxchg(nb->owners + new, event, NULL);
+ new = idx;
/* already present, reuse */
- if (nb->owners[i] == event)
- goto done;
- }
- /*
- * not present, so grab a new slot
- * starting either at:
- */
- if (hwc->idx != -1) {
- /* previous assignment */
- i = hwc->idx;
- } else if (k != -1) {
- /* start from free slot found */
- i = k;
- } else {
- /*
- * event not found, no slot found in
- * first pass, try again from the
- * beginning
- */
- i = 0;
- }
- j = i;
- do {
- old = cmpxchg(nb->owners+i, NULL, event);
- if (!old)
+ if (old == event)
break;
- if (++i == max)
- i = 0;
- } while (i != j);
-done:
- if (!old)
- return &nb->event_constraints[i];
-
- return &emptyconstraint;
+ }
+
+ if (new == -1)
+ return &emptyconstraint;
+
+ return &nb->event_constraints[new];
}
static struct amd_nb *amd_alloc_nb(int cpu)
--
1.7.9.5
On AMD family 15h processors, there are 4 new performance counters
(in addition to 6 core performance counters) that can be used for
counting northbridge events (i.e. DRAM accesses). Their bit fields are
almost identical to the core performance counters. However, unlike the
core performance counters, these MSRs are shared between multiple
cores (that share the same northbridge). We will reuse the same code
path as existing family 10h northbridge event constraints handler
logic to enforce this sharing.
These new counters are indexed contiguously right above the existing
core performance counters, and their indexes correspond to RDPMC ECX
values.
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 2 +
arch/x86/include/asm/msr-index.h | 2 +
arch/x86/include/asm/perf_event.h | 9 +++
arch/x86/kernel/cpu/perf_event_amd.c | 131 ++++++++++++++++++++++++++++------
4 files changed, 122 insertions(+), 22 deletions(-)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 8c297aa..b05c722 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -167,6 +167,7 @@
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB (6*32+24) /* core performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -308,6 +309,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
+#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 7f0edce..e67ff1e 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -157,6 +157,8 @@
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_NB_PERF_CTL 0xc0010240
+#define MSR_F15H_NB_PERF_CTR 0xc0010241
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4fabcdf..63aba8f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,9 +29,14 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
+#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40)
#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41)
+#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37
+#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \
+ (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
+
#define AMD64_EVENTSEL_EVENT \
(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
#define INTEL_ARCH_EVENT_MASK \
@@ -46,8 +51,12 @@
#define AMD64_RAW_EVENT_MASK \
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
+#define AMD64_RAW_EVENT_MASK_NB \
+ (AMD64_EVENTSEL_EVENT | \
+ ARCH_PERFMON_EVENTSEL_UMASK)
#define AMD64_NUM_COUNTERS 4
#define AMD64_NUM_COUNTERS_CORE 6
+#define AMD64_NUM_COUNTERS_NB 4
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index d6e3337..5e3a6a3 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,6 +132,9 @@ static u64 amd_pmu_event_map(int hw_event)
return amd_perfmon_event_map[hw_event];
}
+static int num_core_counters;
+static struct event_constraint *amd_nb_event_constraint;
+
/*
* Previously calculated offsets
*/
@@ -143,6 +146,10 @@ static unsigned int addr_offsets[X86_PMC_IDX_MAX] __read_mostly;
*
* CPUs with core performance counter extensions:
* 6 counters starting at 0xc0010200 each offset by 2
+ *
+ * CPUs with north bridge performance counter extensions:
+ * 4 additional counters starting at 0xc0010240 each offset by 2
+ * (indexed right above either one of the above core counters)
*/
static inline int amd_pmu_addr_offset(int index)
{
@@ -156,7 +163,11 @@ static inline int amd_pmu_addr_offset(int index)
if (offset)
return offset;
- if (!cpu_has_perfctr_core)
+ if (amd_nb_event_constraint &&
+ test_bit(index, amd_nb_event_constraint->idxmsk))
+ offset = (MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel) +
+ ((index - num_core_counters) << 1);
+ else if (!cpu_has_perfctr_core)
offset = index;
else
offset = index << 1;
@@ -166,21 +177,8 @@ static inline int amd_pmu_addr_offset(int index)
return offset;
}
-static int amd_pmu_hw_config(struct perf_event *event)
+static int amd_core_hw_config(struct perf_event *event)
{
- int ret;
-
- /* pass precise event sampling to ibs: */
- if (event->attr.precise_ip && get_ibs_caps())
- return -ENOENT;
-
- ret = x86_pmu_hw_config(event);
- if (ret)
- return ret;
-
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
-
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -194,10 +192,39 @@ static int amd_pmu_hw_config(struct perf_event *event)
else if (event->attr.exclude_guest)
event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
- if (event->attr.type != PERF_TYPE_RAW)
- return 0;
+ return 0;
+}
+
+/*
+ * NB counters do not support the following event select bits:
+ * Host/Guest only
+ * Counter mask
+ * Invert counter mask
+ * Edge detect
+ * OS/User mode
+ */
+static int amd_nb_hw_config(struct perf_event *event)
+{
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+ ARCH_PERFMON_EVENTSEL_OS);
+
+ if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
+ ARCH_PERFMON_EVENTSEL_INT))
+ return -EINVAL;
- event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+ /* set up interrupts to be delivered only to this core */
+ if (amd_nb_event_constraint) {
+ struct cpuinfo_x86 *c = &cpu_data(event->cpu);
+
+ event->hw.config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
+ event->hw.config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
+ event->hw.config |= (u64)(c->cpu_core_id) <<
+ AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
+ }
return 0;
}
@@ -215,6 +242,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}
+static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
+{
+ return amd_nb_event_constraint && amd_is_nb_event(hwc);
+}
+
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{
struct amd_nb *nb = cpuc->amd_nb;
@@ -222,6 +254,30 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
return nb && nb->nb_id != -1;
}
+static int amd_pmu_hw_config(struct perf_event *event)
+{
+ int ret;
+
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ ret = x86_pmu_hw_config(event);
+ if (ret)
+ return ret;
+
+ if (event->attr.type == PERF_TYPE_RAW)
+ event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+ if (amd_is_perfctr_nb_event(&event->hw))
+ return amd_nb_hw_config(event);
+
+ return amd_core_hw_config(event);
+}
+
static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
@@ -287,6 +343,9 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
struct perf_event *old;
int idx, new = -1;
+ if (!c)
+ c = &unconstrained;
+
/*
* detect if already present, if so reuse
*
@@ -422,7 +481,8 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
return &unconstrained;
- return __amd_get_nb_event_constraints(cpuc, event, &unconstrained);
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
}
static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -521,6 +581,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
+static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
+
static struct event_constraint *
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
{
@@ -586,8 +649,11 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
return &amd_f15_PMC20;
}
case AMD_EVENT_NB:
- /* not yet implemented */
- return &emptyconstraint;
+ if (cpuc->is_fake)
+ return amd_nb_event_constraint;
+
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
default:
return &emptyconstraint;
}
@@ -625,7 +691,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static int setup_event_constraints(void)
{
- if (boot_cpu_data.x86 >= 0x15)
+ if (boot_cpu_data.x86 == 0x15)
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
return 0;
}
@@ -655,6 +721,23 @@ static int setup_perfctr_core(void)
return 0;
}
+static int setup_perfctr_nb(void)
+{
+ if (!cpu_has_perfctr_nb)
+ return -ENODEV;
+
+ x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
+
+ if (cpu_has_perfctr_core)
+ amd_nb_event_constraint = &amd_NBPMC96;
+ else
+ amd_nb_event_constraint = &amd_NBPMC74;
+
+ printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
+
+ return 0;
+}
+
__init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
@@ -666,6 +749,10 @@ __init int amd_pmu_init(void)
setup_event_constraints();
setup_perfctr_core();
+ num_core_counters = x86_pmu.num_counters;
+
+ setup_perfctr_nb();
+
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
--
1.7.9.5
One minor comment:
On 26.11.12 16:48:30, Jacob Shin wrote:
> __init int amd_pmu_init(void)
> {
> /* Performance-monitoring supported from K7 and later: */
> @@ -666,6 +749,10 @@ __init int amd_pmu_init(void)
> setup_event_constraints();
> setup_perfctr_core();
>
> + num_core_counters = x86_pmu.num_counters;
I would better move this to setup_perfctr_nb().
> +
> + setup_perfctr_nb();
> +
> /* Events are common for all AMDs */
> memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
> sizeof(hw_cache_event_ids));
Otherwise the whole patch set looks good.
Acked-by: Robert Richter <[email protected]>
Thanks Jacob.
-Robert
On Tue, Nov 27, 2012 at 01:10:51PM +0100, Robert Richter wrote:
> One minor comment:
>
> On 26.11.12 16:48:30, Jacob Shin wrote:
> > __init int amd_pmu_init(void)
> > {
> > /* Performance-monitoring supported from K7 and later: */
> > @@ -666,6 +749,10 @@ __init int amd_pmu_init(void)
> > setup_event_constraints();
> > setup_perfctr_core();
> >
> > + num_core_counters = x86_pmu.num_counters;
>
> I would better move this to setup_perfctr_nb().
Okay, see revised patch 4/5 below
>
> > +
> > + setup_perfctr_nb();
> > +
> > /* Events are common for all AMDs */
> > memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
> > sizeof(hw_cache_event_ids));
>
> Otherwise the whole patch set looks good.
>
> Acked-by: Robert Richter <[email protected]>
Great! Thanks for taking the time!
--
>From 0d813d2c29740315b5c90e4e323361111c492afe Mon Sep 17 00:00:00 2001
From: Jacob Shin <[email protected]>
Date: Mon, 26 Nov 2012 14:38:55 -0600
Subject: [PATCH 1/1] perf, amd: Enable northbridge performance counters on
AMD family 15h
On AMD family 15h processors, there are 4 new performance counters
(in addition to 6 core performance counters) that can be used for
counting northbridge events (i.e. DRAM accesses). Their bit fields are
almost identical to the core performance counters. However, unlike the
core performance counters, these MSRs are shared between multiple
cores (that share the same northbridge). We will reuse the same code
path as existing family 10h northbridge event constraints handler
logic to enforce this sharing.
These new counters are indexed contiguously right above the existing
core performance counters, and their indexes correspond to RDPMC ECX
values.
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 2 +
arch/x86/include/asm/msr-index.h | 2 +
arch/x86/include/asm/perf_event.h | 9 +++
arch/x86/kernel/cpu/perf_event_amd.c | 130 ++++++++++++++++++++++++++++------
4 files changed, 121 insertions(+), 22 deletions(-)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 8c297aa..b05c722 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -167,6 +167,7 @@
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB (6*32+24) /* core performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -308,6 +309,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
+#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 7f0edce..e67ff1e 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -157,6 +157,8 @@
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_NB_PERF_CTL 0xc0010240
+#define MSR_F15H_NB_PERF_CTR 0xc0010241
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4fabcdf..63aba8f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,9 +29,14 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
+#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40)
#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41)
+#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37
+#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \
+ (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
+
#define AMD64_EVENTSEL_EVENT \
(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
#define INTEL_ARCH_EVENT_MASK \
@@ -46,8 +51,12 @@
#define AMD64_RAW_EVENT_MASK \
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
+#define AMD64_RAW_EVENT_MASK_NB \
+ (AMD64_EVENTSEL_EVENT | \
+ ARCH_PERFMON_EVENTSEL_UMASK)
#define AMD64_NUM_COUNTERS 4
#define AMD64_NUM_COUNTERS_CORE 6
+#define AMD64_NUM_COUNTERS_NB 4
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index d6e3337..8f4eac6 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,6 +132,9 @@ static u64 amd_pmu_event_map(int hw_event)
return amd_perfmon_event_map[hw_event];
}
+static int num_core_counters;
+static struct event_constraint *amd_nb_event_constraint;
+
/*
* Previously calculated offsets
*/
@@ -143,6 +146,10 @@ static unsigned int addr_offsets[X86_PMC_IDX_MAX] __read_mostly;
*
* CPUs with core performance counter extensions:
* 6 counters starting at 0xc0010200 each offset by 2
+ *
+ * CPUs with north bridge performance counter extensions:
+ * 4 additional counters starting at 0xc0010240 each offset by 2
+ * (indexed right above either one of the above core counters)
*/
static inline int amd_pmu_addr_offset(int index)
{
@@ -156,7 +163,11 @@ static inline int amd_pmu_addr_offset(int index)
if (offset)
return offset;
- if (!cpu_has_perfctr_core)
+ if (amd_nb_event_constraint &&
+ test_bit(index, amd_nb_event_constraint->idxmsk))
+ offset = (MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel) +
+ ((index - num_core_counters) << 1);
+ else if (!cpu_has_perfctr_core)
offset = index;
else
offset = index << 1;
@@ -166,21 +177,8 @@ static inline int amd_pmu_addr_offset(int index)
return offset;
}
-static int amd_pmu_hw_config(struct perf_event *event)
+static int amd_core_hw_config(struct perf_event *event)
{
- int ret;
-
- /* pass precise event sampling to ibs: */
- if (event->attr.precise_ip && get_ibs_caps())
- return -ENOENT;
-
- ret = x86_pmu_hw_config(event);
- if (ret)
- return ret;
-
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
-
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -194,10 +192,39 @@ static int amd_pmu_hw_config(struct perf_event *event)
else if (event->attr.exclude_guest)
event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
- if (event->attr.type != PERF_TYPE_RAW)
- return 0;
+ return 0;
+}
+
+/*
+ * NB counters do not support the following event select bits:
+ * Host/Guest only
+ * Counter mask
+ * Invert counter mask
+ * Edge detect
+ * OS/User mode
+ */
+static int amd_nb_hw_config(struct perf_event *event)
+{
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+ ARCH_PERFMON_EVENTSEL_OS);
+
+ if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
+ ARCH_PERFMON_EVENTSEL_INT))
+ return -EINVAL;
- event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+ /* set up interrupts to be delivered only to this core */
+ if (amd_nb_event_constraint) {
+ struct cpuinfo_x86 *c = &cpu_data(event->cpu);
+
+ event->hw.config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
+ event->hw.config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
+ event->hw.config |= (u64)(c->cpu_core_id) <<
+ AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
+ }
return 0;
}
@@ -215,6 +242,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}
+static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
+{
+ return amd_nb_event_constraint && amd_is_nb_event(hwc);
+}
+
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{
struct amd_nb *nb = cpuc->amd_nb;
@@ -222,6 +254,30 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
return nb && nb->nb_id != -1;
}
+static int amd_pmu_hw_config(struct perf_event *event)
+{
+ int ret;
+
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ ret = x86_pmu_hw_config(event);
+ if (ret)
+ return ret;
+
+ if (event->attr.type == PERF_TYPE_RAW)
+ event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+ if (amd_is_perfctr_nb_event(&event->hw))
+ return amd_nb_hw_config(event);
+
+ return amd_core_hw_config(event);
+}
+
static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
@@ -287,6 +343,9 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
struct perf_event *old;
int idx, new = -1;
+ if (!c)
+ c = &unconstrained;
+
/*
* detect if already present, if so reuse
*
@@ -422,7 +481,8 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
return &unconstrained;
- return __amd_get_nb_event_constraints(cpuc, event, &unconstrained);
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
}
static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -521,6 +581,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
+static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
+
static struct event_constraint *
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
{
@@ -586,8 +649,11 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
return &amd_f15_PMC20;
}
case AMD_EVENT_NB:
- /* not yet implemented */
- return &emptyconstraint;
+ if (cpuc->is_fake)
+ return amd_nb_event_constraint;
+
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
default:
return &emptyconstraint;
}
@@ -625,7 +691,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static int setup_event_constraints(void)
{
- if (boot_cpu_data.x86 >= 0x15)
+ if (boot_cpu_data.x86 == 0x15)
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
return 0;
}
@@ -655,6 +721,25 @@ static int setup_perfctr_core(void)
return 0;
}
+static int setup_perfctr_nb(void)
+{
+ num_core_counters = x86_pmu.num_counters;
+
+ if (!cpu_has_perfctr_nb)
+ return -ENODEV;
+
+ x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
+
+ if (cpu_has_perfctr_core)
+ amd_nb_event_constraint = &amd_NBPMC96;
+ else
+ amd_nb_event_constraint = &amd_NBPMC74;
+
+ printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
+
+ return 0;
+}
+
__init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
@@ -665,6 +750,7 @@ __init int amd_pmu_init(void)
setup_event_constraints();
setup_perfctr_core();
+ setup_perfctr_nb();
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
--
1.7.9.5