Subject: [PATCH 0/5] perf, x86: perf, x86: Add support for AMD family 15h core counters

This patch set adds support for AMD family 15h core counters. Major
changes compared to family 10h counters are:

* Now there are separate northbridge and core counters that resides in
different MSR ranges (core: MSRC001_02[0B:00], nb:
MSRC001_02[47:40]).

* The MSR addresses of perfctr and evntsel registers are now located
side-by-side, we can not calculate the address with (base + index)
anymore.

* There are 4 northbridge counters and 6 core counters.

* There are legacy aliases to old MSR counter addresses
(MSRC001_00[03:00] -> MSRC001_02[07:00] respectively).

* There are restrictions now that not all performance monitor events
can be counted on all counters.

We need to change MSR address handling of the x86 perf_event
implementation and also add more AMD event constraints to schedule
events.

This patch set only adds core counters.

-Robert



Subject: [PATCH 3/5] perf, x86: Add new AMD family 15h msrs to perfctr reservation code

This patch allows the reservation of perfctrs with new msr addresses
introduced for AMD cpu family 15h (0xc0010200/0xc0010201, etc).

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perfctr-watchdog.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d5a2366..966512b 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
/* returns the bit offset of the performance counter register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
+ if (msr >= MSR_F15H_PERF_CTR)
+ return (msr - MSR_F15H_PERF_CTR) >> 1;
return msr - MSR_K7_PERFCTR0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
@@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
/* returns the bit offset of the event selection register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
+ if (msr >= MSR_F15H_PERF_CTL)
+ return (msr - MSR_F15H_PERF_CTL) >> 1;
return msr - MSR_K7_EVNTSEL0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
--
1.7.3.4

Subject: [PATCH 2/5] perf, x86: Calculate perfctr msr addresses in helper functions

This patch adds helper functions to calculate perfctr msr addresses.
We need this to later add support for AMD family 15h cpus. For this we
have to change the algorithms to generate the perfctr's msr addresses.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 36 ++++++++++++++++++++-----------
arch/x86/kernel/cpu/perf_event_intel.c | 4 +-
2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 70d6d8f..ee40c1ad 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -321,6 +321,16 @@ again:
return new_raw_count;
}

+static inline unsigned int x86_pmu_config_addr(int index)
+{
+ return x86_pmu.eventsel + index;
+}
+
+static inline unsigned int x86_pmu_event_addr(int index)
+{
+ return x86_pmu.perfctr + index;
+}
+
static atomic_t active_events;
static DEFINE_MUTEX(pmc_reserve_mutex);

@@ -331,12 +341,12 @@ static bool reserve_pmc_hardware(void)
int i;

for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
+ if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
goto perfctr_fail;
}

for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
+ if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
goto eventsel_fail;
}

@@ -344,13 +354,13 @@ static bool reserve_pmc_hardware(void)

eventsel_fail:
for (i--; i >= 0; i--)
- release_evntsel_nmi(x86_pmu.eventsel + i);
+ release_evntsel_nmi(x86_pmu_config_addr(i));

i = x86_pmu.num_counters;

perfctr_fail:
for (i--; i >= 0; i--)
- release_perfctr_nmi(x86_pmu.perfctr + i);
+ release_perfctr_nmi(x86_pmu_event_addr(i));

return false;
}
@@ -360,8 +370,8 @@ static void release_pmc_hardware(void)
int i;

for (i = 0; i < x86_pmu.num_counters; i++) {
- release_perfctr_nmi(x86_pmu.perfctr + i);
- release_evntsel_nmi(x86_pmu.eventsel + i);
+ release_perfctr_nmi(x86_pmu_event_addr(i));
+ release_evntsel_nmi(x86_pmu_config_addr(i));
}
}

@@ -382,7 +392,7 @@ static bool check_hw_exists(void)
* complain and bail.
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
- reg = x86_pmu.eventsel + i;
+ reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
@@ -407,8 +417,8 @@ static bool check_hw_exists(void)
* that don't trap on the MSR access and always return 0s.
*/
val = 0xabcdUL;
- ret = checking_wrmsrl(x86_pmu.perfctr, val);
- ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+ ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
+ ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
if (ret || val != val_new)
goto msr_fail;

@@ -617,11 +627,11 @@ static void x86_pmu_disable_all(void)

if (!test_bit(idx, cpuc->active_mask))
continue;
- rdmsrl(x86_pmu.eventsel + idx, val);
+ rdmsrl(x86_pmu_config_addr(idx), val);
if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu.eventsel + idx, val);
+ wrmsrl(x86_pmu_config_addr(idx), val);
}
}

@@ -1110,8 +1120,8 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
- rdmsrl(x86_pmu.perfctr + idx, pmc_count);
+ rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
+ rdmsrl(x86_pmu_event_addr(idx), pmc_count);

prev_left = per_cpu(pmc_prev_left[idx], cpu);

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 008835c..084b383 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -691,8 +691,8 @@ static void intel_pmu_reset(void)
printk("clearing PMU state on CPU#%d\n", smp_processor_id());

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
- checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
+ checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
+ checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
--
1.7.3.4

Subject: [PATCH 5/5] perf, x86: Add support for AMD family 15h core counters

This patch adds support for AMD family 15h core counters. There are
major changes compared to family 10h. First, there is a new perfctr
msr range for up to 6 counters. Northbridge counters are separate
now. This patch only adds support for core counters. Second, certain
events may only be scheduled on certain counters. For this we need to
extend the event scheduling and constraints.

This patch implements table lookups for pmc/msr mappings that can be
reused for other models. I have chosen this approach in favor of
calculating the addresses from the index because it is more flexible,
esp. for later addition of northbridge counters which resides in
another msr range.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 12 ++-
arch/x86/kernel/cpu/perf_event_amd.c | 189 +++++++++++++++++++++++++++++++++-
2 files changed, 198 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3161943..08643da 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -202,8 +202,10 @@ struct x86_pmu {
void (*disable)(struct perf_event *);
int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
- unsigned eventsel;
- unsigned perfctr;
+ unsigned int eventsel;
+ unsigned int perfctr;
+ unsigned int *eventsel_map;
+ unsigned int *perfctr_map;
u64 (*event_map)(int);
int max_events;
int num_counters;
@@ -323,11 +325,17 @@ again:

static inline unsigned int x86_pmu_config_addr(int index)
{
+ if (x86_pmu.eventsel_map)
+ return x86_pmu.eventsel_map[index];
+
return x86_pmu.eventsel + index;
}

static inline unsigned int x86_pmu_event_addr(int index)
{
+ if (x86_pmu.perfctr_map)
+ return x86_pmu.perfctr_map[index];
+
return x86_pmu.perfctr + index;
}

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67e2202..8c233c4 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event)
/*
* AMD64 events are detected based on their event codes.
*/
+static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
+{
+ return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+}
+
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
{
return (hwc->config & 0xe0) == 0xe0;
@@ -385,13 +390,195 @@ static __initconst const struct x86_pmu amd_pmu = {
.cpu_dead = amd_pmu_cpu_dead,
};

+/* AMD Family 15h */
+
+#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
+
+#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
+#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
+#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
+#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
+#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
+#define AMD_EVENT_EX_LS 0x000000C0ULL
+#define AMD_EVENT_DE 0x000000D0ULL
+#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
+
+/*
+ * AMD family 15h PMC/MSR address mapping:
+ */
+
+unsigned int eventsel_f15h[] = {
+ MSR_F15H_PERF_CTL,
+ MSR_F15H_PERF_CTL + 2,
+ MSR_F15H_PERF_CTL + 4,
+ MSR_F15H_PERF_CTL + 6,
+ MSR_F15H_PERF_CTL + 8,
+ MSR_F15H_PERF_CTL + 10,
+};
+
+unsigned int perfctr_f15h[] = {
+ MSR_F15H_PERF_CTR,
+ MSR_F15H_PERF_CTR + 2,
+ MSR_F15H_PERF_CTR + 4,
+ MSR_F15H_PERF_CTR + 6,
+ MSR_F15H_PERF_CTR + 8,
+ MSR_F15H_PERF_CTR + 10,
+};
+
+/*
+ * AMD family 15h event code/PMC mappings:
+ *
+ * type = event_code & 0x0F0:
+ *
+ * 0x000 FP PERF_CTL[5:3]
+ * 0x010 FP PERF_CTL[5:3]
+ * 0x020 LS PERF_CTL[5:0]
+ * 0x030 LS PERF_CTL[5:0]
+ * 0x040 DC PERF_CTL[5:0]
+ * 0x050 DC PERF_CTL[5:0]
+ * 0x060 CU PERF_CTL[2:0]
+ * 0x070 CU PERF_CTL[2:0]
+ * 0x080 IC/DE PERF_CTL[2:0]
+ * 0x090 IC/DE PERF_CTL[2:0]
+ * 0x0A0 ---
+ * 0x0B0 ---
+ * 0x0C0 EX/LS PERF_CTL[5:0]
+ * 0x0D0 DE PERF_CTL[2:0]
+ * 0x0E0 NB NB_PERF_CTL[3:0]
+ * 0x0F0 NB NB_PERF_CTL[3:0]
+ *
+ * Exceptions:
+ *
+ * 0x003 FP PERF_CTL[3]
+ * 0x00B FP PERF_CTL[3]
+ * 0x00D FP PERF_CTL[3]
+ * 0x023 DE PERF_CTL[2:0]
+ * 0x02D LS PERF_CTL[3]
+ * 0x02E LS PERF_CTL[3,0]
+ * 0x043 CU PERF_CTL[2:0]
+ * 0x045 CU PERF_CTL[2:0]
+ * 0x046 CU PERF_CTL[2:0]
+ * 0x054 CU PERF_CTL[2:0]
+ * 0x055 CU PERF_CTL[2:0]
+ * 0x08F IC PERF_CTL[0]
+ * 0x187 DE PERF_CTL[0]
+ * 0x188 DE PERF_CTL[0]
+ * 0x0DB EX PERF_CTL[5:0]
+ * 0x0DC LS PERF_CTL[5:0]
+ * 0x0DD LS PERF_CTL[5:0]
+ * 0x0DE LS PERF_CTL[5:0]
+ * 0x0DF LS PERF_CTL[5:0]
+ * 0x1D6 EX PERF_CTL[5:0]
+ * 0x1D8 EX PERF_CTL[5:0]
+ */
+
+static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
+static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
+static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
+static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+
+static struct event_constraint *
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ unsigned int event_code = amd_get_event_code(&event->hw);
+
+ switch (event_code & AMD_EVENT_TYPE_MASK) {
+ case AMD_EVENT_FP:
+ switch (event_code) {
+ case 0x003:
+ case 0x00B:
+ case 0x00D:
+ return &amd_f15_PMC3;
+ default:
+ return &amd_f15_PMC53;
+ }
+ case AMD_EVENT_LS:
+ case AMD_EVENT_DC:
+ case AMD_EVENT_EX_LS:
+ switch (event_code) {
+ case 0x023:
+ case 0x043:
+ case 0x045:
+ case 0x046:
+ case 0x054:
+ case 0x055:
+ return &amd_f15_PMC20;
+ case 0x02D:
+ return &amd_f15_PMC3;
+ case 0x02E:
+ return &amd_f15_PMC30;
+ default:
+ return &amd_f15_PMC50;
+ }
+ case AMD_EVENT_CU:
+ case AMD_EVENT_IC_DE:
+ case AMD_EVENT_DE:
+ switch (event_code) {
+ case 0x08F:
+ case 0x187:
+ case 0x188:
+ return &amd_f15_PMC0;
+ case 0x0DB ... 0x0DF:
+ case 0x1D6:
+ case 0x1D8:
+ return &amd_f15_PMC50;
+ default:
+ return &amd_f15_PMC20;
+ }
+ case AMD_EVENT_NB:
+ /* not yet implemented */
+ return &emptyconstraint;
+ default:
+ return &emptyconstraint;
+ }
+}
+
+static __initconst const struct x86_pmu amd_pmu_f15h = {
+ .name = "AMD Family 15h",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = x86_pmu_disable_all,
+ .enable_all = x86_pmu_enable_all,
+ .enable = x86_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+ .hw_config = amd_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel_map = eventsel_f15h,
+ .perfctr_map = perfctr_f15h,
+ .event_map = amd_pmu_event_map,
+ .max_events = ARRAY_SIZE(amd_perfmon_event_map),
+ .num_counters = ARRAY_SIZE(eventsel_f15h),
+ .cntval_bits = 48,
+ .cntval_mask = (1ULL << 48) - 1,
+ .apic = 1,
+ /* use highest bit to detect overflow */
+ .max_period = (1ULL << 47) - 1,
+ .get_event_constraints = amd_get_event_constraints_f15h,
+ /* nortbridge counters not yet implemented: */
+#if 0
+ .put_event_constraints = amd_put_event_constraints,
+
+ .cpu_prepare = amd_pmu_cpu_prepare,
+ .cpu_starting = amd_pmu_cpu_starting,
+ .cpu_dead = amd_pmu_cpu_dead,
+#endif
+};
+
static __init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;

- x86_pmu = amd_pmu;
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ x86_pmu = amd_pmu_f15h;
+ break;
+ default:
+ x86_pmu = amd_pmu;
+ break;
+ }

/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
--
1.7.3.4

Subject: [PATCH 4/5] perf, x86: Store perfctr msr addresses in config_base/event_base

Instead of storing the base addresses we can store the counter's msr
addresses directly in config_base/event_base of struct hw_perf_event.
This avoids recalculating the address with each msr access. The
addresses are configured one time. We also need this change to later
modify the address calculation.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 21 ++++++++-------------
arch/x86/kernel/cpu/perf_event_p4.c | 10 +++++-----
arch/x86/kernel/cpu/perf_event_p6.c | 4 ++--
3 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ee40c1ad..3161943 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -298,7 +298,7 @@ x86_perf_event_update(struct perf_event *event)
*/
again:
prev_raw_count = local64_read(&hwc->prev_count);
- rdmsrl(hwc->event_base + idx, new_raw_count);
+ rdmsrl(hwc->event_base, new_raw_count);

if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -655,7 +655,7 @@ static void x86_pmu_disable(struct pmu *pmu)
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
- wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
+ wrmsrl(hwc->config_base, hwc->config | enable_mask);
}

static void x86_pmu_enable_all(int added)
@@ -834,15 +834,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->event_base = 0;
} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- /*
- * We set it so that event_base + idx in wrmsr/rdmsr maps to
- * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
- */
- hwc->event_base =
- MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+ hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0;
} else {
- hwc->config_base = x86_pmu.eventsel;
- hwc->event_base = x86_pmu.perfctr;
+ hwc->config_base = x86_pmu_config_addr(hwc->idx);
+ hwc->event_base = x86_pmu_event_addr(hwc->idx);
}
}

@@ -932,7 +927,7 @@ static inline void x86_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;

- wrmsrl(hwc->config_base + hwc->idx, hwc->config);
+ wrmsrl(hwc->config_base, hwc->config);
}

static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -985,7 +980,7 @@ x86_perf_event_set_period(struct perf_event *event)
*/
local64_set(&hwc->prev_count, (u64)-left);

- wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);

/*
* Due to erratum on certan cpu we need
@@ -993,7 +988,7 @@ x86_perf_event_set_period(struct perf_event *event)
* is updated properly
*/
if (x86_pmu.perfctr_second_write) {
- wrmsrl(hwc->event_base + idx,
+ wrmsrl(hwc->event_base,
(u64)(-left) & x86_pmu.cntval_mask);
}

diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index e56b9bf..c0f1747 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -756,14 +756,14 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
u64 v;

/* an official way for overflow indication */
- rdmsrl(hwc->config_base + hwc->idx, v);
+ rdmsrl(hwc->config_base, v);
if (v & P4_CCCR_OVF) {
- wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
+ wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
return 1;
}

/* it might be unflagged overflow */
- rdmsrl(hwc->event_base + hwc->idx, v);
+ rdmsrl(hwc->event_base, v);
if (!(v & ARCH_P4_CNTRVAL_MASK))
return 1;

@@ -802,7 +802,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
* state we need to clear P4_CCCR_OVF, otherwise interrupt get
* asserted again and again
*/
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ (void)checking_wrmsrl(hwc->config_base,
(u64)(p4_config_unpack_cccr(hwc->config)) &
~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
}
@@ -872,7 +872,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
p4_pmu_enable_pebs(hwc->config);

(void)checking_wrmsrl(escr_addr, escr_conf);
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ (void)checking_wrmsrl(hwc->config_base,
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
}

diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 34ba07b..20c097e 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;

- (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+ (void)checking_wrmsrl(hwc->config_base, val);
}

static void p6_pmu_enable_event(struct perf_event *event)
@@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;

- (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+ (void)checking_wrmsrl(hwc->config_base, val);
}

static __initconst const struct x86_pmu p6_pmu = {
--
1.7.3.4

Subject: [PATCH 1/5] perf, x86: Use helper function in x86_pmu_enable_all()

Use helper function in x86_pmu_enable_all() to minimize access to
x86_pmu.eventsel in the fast path. The counter's msr address is now
calculated using struct hw_perf_event. Later we add code that
calculates the msr addresses with a table lookup which shouldn't be
done in the fast path.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 19 ++++++++-----------
1 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4d98789..70d6d8f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -642,21 +642,24 @@ static void x86_pmu_disable(struct pmu *pmu)
x86_pmu.disable_all();
}

+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+ u64 enable_mask)
+{
+ wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
+}
+
static void x86_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct perf_event *event = cpuc->events[idx];
- u64 val;
+ struct hw_perf_event *hwc = &cpuc->events[idx]->hw;

if (!test_bit(idx, cpuc->active_mask))
continue;

- val = event->hw.config;
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu.eventsel + idx, val);
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
}

@@ -915,12 +918,6 @@ static void x86_pmu_enable(struct pmu *pmu)
x86_pmu.enable_all(added);
}

-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
- u64 enable_mask)
-{
- wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
-}
-
static inline void x86_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
--
1.7.3.4

2011-02-02 17:02:19

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 5/5] perf, x86: Add support for AMD family 15h core counters

On Wed, 2011-02-02 at 17:41 +0100, Robert Richter wrote:
> + unsigned int eventsel;
> + unsigned int perfctr;
> + unsigned int *eventsel_map;
> + unsigned int *perfctr_map;
> u64 (*event_map)(int);
> int max_events;
> int num_counters;
> @@ -323,11 +325,17 @@ again:
>
> static inline unsigned int x86_pmu_config_addr(int index)
> {
> + if (x86_pmu.eventsel_map)
> + return x86_pmu.eventsel_map[index];
> +
> return x86_pmu.eventsel + index;
> }
>
> static inline unsigned int x86_pmu_event_addr(int index)
> {
> + if (x86_pmu.perfctr_map)
> + return x86_pmu.perfctr_map[index];
> +
> return x86_pmu.perfctr + index;
> }

Why this and not something like x86_pmu.perfctr + (index << 1)?
You could even use alternatives.


Subject: Re: [PATCH 5/5] perf, x86: Add support for AMD family 15h core counters

On 02.02.11 12:03:18, Peter Zijlstra wrote:
> On Wed, 2011-02-02 at 17:41 +0100, Robert Richter wrote:
> > + unsigned int eventsel;
> > + unsigned int perfctr;
> > + unsigned int *eventsel_map;
> > + unsigned int *perfctr_map;
> > u64 (*event_map)(int);
> > int max_events;
> > int num_counters;
> > @@ -323,11 +325,17 @@ again:
> >
> > static inline unsigned int x86_pmu_config_addr(int index)
> > {
> > + if (x86_pmu.eventsel_map)
> > + return x86_pmu.eventsel_map[index];
> > +
> > return x86_pmu.eventsel + index;
> > }
> >
> > static inline unsigned int x86_pmu_event_addr(int index)
> > {
> > + if (x86_pmu.perfctr_map)
> > + return x86_pmu.perfctr_map[index];
> > +
> > return x86_pmu.perfctr + index;
> > }
>
> Why this and not something like x86_pmu.perfctr + (index << 1)?
> You could even use alternatives.

I was thinking about this. The main reason is the implementation of
northbridge counters, the range is in MSRC001_02[47:40]. This would
add more complexity then. Using a table would be something like

unsigned int eventsel_f15h[] = {
MSR_F15H_PERF_CTL,
MSR_F15H_PERF_CTL + 2,
MSR_F15H_PERF_CTL + 4,
MSR_F15H_PERF_CTL + 6,
MSR_F15H_PERF_CTL + 8,
MSR_F15H_PERF_CTL + 10,
MSR_F15H_NB_PERF_CTL,
MSR_F15H_NB_PERF_CTL + 2,
MSR_F15H_NB_PERF_CTL + 6,
MSR_F15H_NB_PERF_CTL + 8,
};

We don't need to change the address generation for this. Otherwise we
need to introduce more logic for the calculation.

Also, were could be potential easier implementations for fixed
counters, BTS, P4, IBS, etc. But didn't look that close at it.

(Btw, I am not yet sure if NB counters shouldn't better start at index
16 or so to reserve space for perf counter expansion.)

-Robert

--
Advanced Micro Devices, Inc.
Operating System Research Center

2011-02-02 17:28:39

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 5/5] perf, x86: Add support for AMD family 15h core counters

On Wed, 2011-02-02 at 18:24 +0100, Robert Richter wrote:
> On 02.02.11 12:03:18, Peter Zijlstra wrote:
> > On Wed, 2011-02-02 at 17:41 +0100, Robert Richter wrote:
> > > + unsigned int eventsel;
> > > + unsigned int perfctr;
> > > + unsigned int *eventsel_map;
> > > + unsigned int *perfctr_map;
> > > u64 (*event_map)(int);
> > > int max_events;
> > > int num_counters;
> > > @@ -323,11 +325,17 @@ again:
> > >
> > > static inline unsigned int x86_pmu_config_addr(int index)
> > > {
> > > + if (x86_pmu.eventsel_map)
> > > + return x86_pmu.eventsel_map[index];
> > > +
> > > return x86_pmu.eventsel + index;
> > > }
> > >
> > > static inline unsigned int x86_pmu_event_addr(int index)
> > > {
> > > + if (x86_pmu.perfctr_map)
> > > + return x86_pmu.perfctr_map[index];
> > > +
> > > return x86_pmu.perfctr + index;
> > > }
> >
> > Why this and not something like x86_pmu.perfctr + (index << 1)?
> > You could even use alternatives.
>
> I was thinking about this. The main reason is the implementation of
> northbridge counters, the range is in MSRC001_02[47:40]. This would
> add more complexity then. Using a table would be something like
>
> unsigned int eventsel_f15h[] = {
> MSR_F15H_PERF_CTL,
> MSR_F15H_PERF_CTL + 2,
> MSR_F15H_PERF_CTL + 4,
> MSR_F15H_PERF_CTL + 6,
> MSR_F15H_PERF_CTL + 8,
> MSR_F15H_PERF_CTL + 10,
> MSR_F15H_NB_PERF_CTL,
> MSR_F15H_NB_PERF_CTL + 2,
> MSR_F15H_NB_PERF_CTL + 6,
> MSR_F15H_NB_PERF_CTL + 8,
> };
>
> We don't need to change the address generation for this. Otherwise we
> need to introduce more logic for the calculation.
>
> Also, were could be potential easier implementations for fixed
> counters, BTS, P4, IBS, etc. But didn't look that close at it.
>
> (Btw, I am not yet sure if NB counters shouldn't better start at index
> 16 or so to reserve space for perf counter expansion.)

Now that the NB PMU is completely separate from the core PMU, wouldn't
it make more sense to implement that as a separate entity just like the
intel uncore bits?

2011-02-02 20:38:56

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf, x86: perf, x86: Add support for AMD family 15h core counters

On Wed, Feb 2, 2011 at 5:40 PM, Robert Richter <[email protected]> wrote:
> This patch set adds support for AMD family 15h core counters. Major
> changes compared to family 10h counters are:
>
> * Now there are separate northbridge and core counters that resides in
>  different MSR ranges (core: MSRC001_02[0B:00], nb:
>  MSRC001_02[47:40]).
>
That leads me to believe, that Fam15h core and uncore PMUs should
be treated completely separately like the Ming's patch for Nehalem uncore.
That would also be true for the Northbridge support of libpfm4.

2011-02-02 22:44:26

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 5/5] perf, x86: Add support for AMD family 15h core counters

On Wed, Feb 2, 2011 at 6:29 PM, Peter Zijlstra <[email protected]> wrote:
> On Wed, 2011-02-02 at 18:24 +0100, Robert Richter wrote:
>> On 02.02.11 12:03:18, Peter Zijlstra wrote:
>> > On Wed, 2011-02-02 at 17:41 +0100, Robert Richter wrote:
>> > > +       unsigned int    eventsel;
>> > > +       unsigned int    perfctr;
>> > > +       unsigned int    *eventsel_map;
>> > > +       unsigned int    *perfctr_map;
>> > >         u64             (*event_map)(int);
>> > >         int             max_events;
>> > >         int             num_counters;
>> > > @@ -323,11 +325,17 @@ again:
>> > >
>> > >  static inline unsigned int x86_pmu_config_addr(int index)
>> > >  {
>> > > +       if (x86_pmu.eventsel_map)
>> > > +               return x86_pmu.eventsel_map[index];
>> > > +
>> > >         return x86_pmu.eventsel + index;
>> > >  }
>> > >
>> > >  static inline unsigned int x86_pmu_event_addr(int index)
>> > >  {
>> > > +       if (x86_pmu.perfctr_map)
>> > > +               return x86_pmu.perfctr_map[index];
>> > > +
>> > >         return x86_pmu.perfctr + index;
>> > >  }
>> >
>> > Why this and not something like x86_pmu.perfctr + (index << 1)?
>> > You could even use alternatives.
>>
>> I was thinking about this. The main reason is the implementation of
>> northbridge counters, the range is in MSRC001_02[47:40]. This would
>> add more complexity then. Using a table would be something like
>>
>> unsigned int eventsel_f15h[] = {
>>       MSR_F15H_PERF_CTL,
>>       MSR_F15H_PERF_CTL + 2,
>>       MSR_F15H_PERF_CTL + 4,
>>       MSR_F15H_PERF_CTL + 6,
>>       MSR_F15H_PERF_CTL + 8,
>>       MSR_F15H_PERF_CTL + 10,
>>       MSR_F15H_NB_PERF_CTL,
>>       MSR_F15H_NB_PERF_CTL + 2,
>>       MSR_F15H_NB_PERF_CTL + 6,
>>       MSR_F15H_NB_PERF_CTL + 8,
>> };
>>
>> We don't need to change the address generation for this. Otherwise we
>> need to introduce more logic for the calculation.
>>
>> Also, were could be potential easier implementations for fixed
>> counters, BTS, P4, IBS, etc. But didn't look that close at it.
>>
>> (Btw, I am not yet sure if NB counters shouldn't better start at index
>> 16 or so to reserve space for perf counter expansion.)
>
> Now that the NB PMU is completely separate from the core PMU, wouldn't
> it make more sense to implement that as a separate entity just like the
> intel uncore bits?

I agree on this.
>
>

Subject: Re: [PATCH 5/5] perf, x86: Add support for AMD family 15h core counters

On 02.02.11 17:44:22, Stephane Eranian wrote:
> On Wed, Feb 2, 2011 at 6:29 PM, Peter Zijlstra <[email protected]> wrote:
> > On Wed, 2011-02-02 at 18:24 +0100, Robert Richter wrote:
> >> On 02.02.11 12:03:18, Peter Zijlstra wrote:
> >> > Why this and not something like x86_pmu.perfctr + (index << 1)?
> >> > You could even use alternatives.
> >>
> >> I was thinking about this. The main reason is the implementation of
> >> northbridge counters, the range is in MSRC001_02[47:40]. This would
> >> add more complexity then. Using a table would be something like
> >>
> >> unsigned int eventsel_f15h[] = {
> >> ? ? ? MSR_F15H_PERF_CTL,
> >> ? ? ? MSR_F15H_PERF_CTL + 2,
> >> ? ? ? MSR_F15H_PERF_CTL + 4,
> >> ? ? ? MSR_F15H_PERF_CTL + 6,
> >> ? ? ? MSR_F15H_PERF_CTL + 8,
> >> ? ? ? MSR_F15H_PERF_CTL + 10,
> >> ? ? ? MSR_F15H_NB_PERF_CTL,
> >> ? ? ? MSR_F15H_NB_PERF_CTL + 2,
> >> ? ? ? MSR_F15H_NB_PERF_CTL + 6,
> >> ? ? ? MSR_F15H_NB_PERF_CTL + 8,
> >> };
> >>
> >> We don't need to change the address generation for this. Otherwise we
> >> need to introduce more logic for the calculation.
> >>
> >> Also, were could be potential easier implementations for fixed
> >> counters, BTS, P4, IBS, etc. But didn't look that close at it.
> >>
> >> (Btw, I am not yet sure if NB counters shouldn't better start at index
> >> 16 or so to reserve space for perf counter expansion.)
> >
> > Now that the NB PMU is completely separate from the core PMU, wouldn't
> > it make more sense to implement that as a separate entity just like the
> > intel uncore bits?
>
> I agree on this.

Peter,

ok, nb events may be implemented independent from core events in a
separate struct pmu.

I still would prefer a lookup table for counter addresses. Adding a
shift parameter to struct x86_pmu to do a

addr = base + (index << shift)

seems to me a quite special solution that may not be reused in other
implementations, while a lookup table is more generic. I also don't
see a performance or memory impact there.

Anyway, a shift parameter would work too. What do you think?

-Robert

--
Advanced Micro Devices, Inc.
Operating System Research Center

2011-02-03 09:37:09

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 5/5] perf, x86: Add support for AMD family 15h core counters

On Thu, 2011-02-03 at 10:00 +0100, Robert Richter wrote:
>
>
> ok, nb events may be implemented independent from core events in a
> separate struct pmu.
>
> I still would prefer a lookup table for counter addresses. Adding a
> shift parameter to struct x86_pmu to do a
>
> addr = base + (index << shift)
>
> seems to me a quite special solution that may not be reused in other
> implementations

What other implementations? I hope people will not re-arrange the MSR
layout on every new model, that'd be quite annoying.

> while a lookup table is more generic. I also don't
> see a performance or memory impact there.

Well it is an extra pointer chase and data cache hit just to get
something you can trivially compute.

> Anyway, a shift parameter would work too. What do you think?

I think the alternatives thing is probably nicest, except for having to
write the bits in asm.

Subject: Re: [PATCH 5/5] perf, x86: Add support for AMD family 15h core counters

On 03.02.11 04:38:03, Peter Zijlstra wrote:
> On Thu, 2011-02-03 at 10:00 +0100, Robert Richter wrote:
> >
> >
> > ok, nb events may be implemented independent from core events in a
> > separate struct pmu.
> >
> > I still would prefer a lookup table for counter addresses. Adding a
> > shift parameter to struct x86_pmu to do a
> >
> > addr = base + (index << shift)
> >
> > seems to me a quite special solution that may not be reused in other
> > implementations
>
> What other implementations? I hope people will not re-arrange the MSR
> layout on every new model, that'd be quite annoying.

I mean counters referred by index that cannot be derived from the base
address like fixed counters, BTS, IBS, P4... Often this is implemented
in if/else if paths.

> > while a lookup table is more generic. I also don't
> > see a performance or memory impact there.
>
> Well it is an extra pointer chase and data cache hit just to get
> something you can trivially compute.

Indeed, cache pollution is an argument.

> > Anyway, a shift parameter would work too. What do you think?
>
> I think the alternatives thing is probably nicest, except for having to
> write the bits in asm.

Will send an updated version.

-Robert

--
Advanced Micro Devices, Inc.
Operating System Research Center

Subject: [PATCH] perf, x86: Add support for AMD family 15h core counters

On 03.02.11 15:06:50, Robert Richter wrote:
> On 03.02.11 04:38:03, Peter Zijlstra wrote:
> > I think the alternatives thing is probably nicest, except for having to
> > write the bits in asm.
>
> Will send an updated version.

Below there is an updated version of my recent patch submission of

[PATCH 5/5] perf, x86: Add support for AMD family 15h core counters

It now generates the msr offset by shifting the index. I wrote the
code so that it can easily be replaced with a faster ALTERNATIVE()
version that uses binary patching. Thus, I have introduced and used
cpu feature flags for it.

I will work on an ALTERNATIVE() version, but wont be able for ig
before mid of March because I will be travelling the next weeks.

-Robert


--

>From 1b5a6061fceae47a8b7ddd6d24ae9a4909358fe8 Mon Sep 17 00:00:00 2001
From: Robert Richter <[email protected]>
Date: Wed, 2 Feb 2011 17:36:12 +0100
Subject: [PATCH] perf, x86: Add support for AMD family 15h core counters

This patch adds support for AMD family 15h core counters. There are
major changes compared to family 10h. First, there is a new perfctr
msr range for up to 6 counters. Northbridge counters are separate
now. This patch only adds support for core counters. Second, certain
events may only be scheduled on certain counters. For this we need to
extend the event scheduling and constraints.

We use cpu feature flags to calculate family 15h msr address offsets.
This way we later can implement a faster ALTERNATIVE() version for
this.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 2 +
arch/x86/kernel/cpu/perf_event.c | 12 ++-
arch/x86/kernel/cpu/perf_event_amd.c | 175 +++++++++++++++++++++++++++++++++-
3 files changed, 186 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 220e2ea..91f3e087 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -160,6 +160,7 @@
#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */

/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
+#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)

#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3161943..10bfe247 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -321,14 +321,22 @@ again:
return new_raw_count;
}

+/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
+static inline int x86_pmu_addr_offset(int index)
+{
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ return index << 1;
+ return index;
+}
+
static inline unsigned int x86_pmu_config_addr(int index)
{
- return x86_pmu.eventsel + index;
+ return x86_pmu.eventsel + x86_pmu_addr_offset(index);
}

static inline unsigned int x86_pmu_event_addr(int index)
{
- return x86_pmu.perfctr + index;
+ return x86_pmu.perfctr + x86_pmu_addr_offset(index);
}

static atomic_t active_events;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67e2202..461f62b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event)
/*
* AMD64 events are detected based on their event codes.
*/
+static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
+{
+ return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+}
+
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
{
return (hwc->config & 0xe0) == 0xe0;
@@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = {
.cpu_dead = amd_pmu_cpu_dead,
};

+/* AMD Family 15h */
+
+#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
+
+#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
+#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
+#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
+#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
+#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
+#define AMD_EVENT_EX_LS 0x000000C0ULL
+#define AMD_EVENT_DE 0x000000D0ULL
+#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
+
+/*
+ * AMD family 15h event code/PMC mappings:
+ *
+ * type = event_code & 0x0F0:
+ *
+ * 0x000 FP PERF_CTL[5:3]
+ * 0x010 FP PERF_CTL[5:3]
+ * 0x020 LS PERF_CTL[5:0]
+ * 0x030 LS PERF_CTL[5:0]
+ * 0x040 DC PERF_CTL[5:0]
+ * 0x050 DC PERF_CTL[5:0]
+ * 0x060 CU PERF_CTL[2:0]
+ * 0x070 CU PERF_CTL[2:0]
+ * 0x080 IC/DE PERF_CTL[2:0]
+ * 0x090 IC/DE PERF_CTL[2:0]
+ * 0x0A0 ---
+ * 0x0B0 ---
+ * 0x0C0 EX/LS PERF_CTL[5:0]
+ * 0x0D0 DE PERF_CTL[2:0]
+ * 0x0E0 NB NB_PERF_CTL[3:0]
+ * 0x0F0 NB NB_PERF_CTL[3:0]
+ *
+ * Exceptions:
+ *
+ * 0x003 FP PERF_CTL[3]
+ * 0x00B FP PERF_CTL[3]
+ * 0x00D FP PERF_CTL[3]
+ * 0x023 DE PERF_CTL[2:0]
+ * 0x02D LS PERF_CTL[3]
+ * 0x02E LS PERF_CTL[3,0]
+ * 0x043 CU PERF_CTL[2:0]
+ * 0x045 CU PERF_CTL[2:0]
+ * 0x046 CU PERF_CTL[2:0]
+ * 0x054 CU PERF_CTL[2:0]
+ * 0x055 CU PERF_CTL[2:0]
+ * 0x08F IC PERF_CTL[0]
+ * 0x187 DE PERF_CTL[0]
+ * 0x188 DE PERF_CTL[0]
+ * 0x0DB EX PERF_CTL[5:0]
+ * 0x0DC LS PERF_CTL[5:0]
+ * 0x0DD LS PERF_CTL[5:0]
+ * 0x0DE LS PERF_CTL[5:0]
+ * 0x0DF LS PERF_CTL[5:0]
+ * 0x1D6 EX PERF_CTL[5:0]
+ * 0x1D8 EX PERF_CTL[5:0]
+ */
+
+static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
+static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
+static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
+static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+
+static struct event_constraint *
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ unsigned int event_code = amd_get_event_code(&event->hw);
+
+ switch (event_code & AMD_EVENT_TYPE_MASK) {
+ case AMD_EVENT_FP:
+ switch (event_code) {
+ case 0x003:
+ case 0x00B:
+ case 0x00D:
+ return &amd_f15_PMC3;
+ default:
+ return &amd_f15_PMC53;
+ }
+ case AMD_EVENT_LS:
+ case AMD_EVENT_DC:
+ case AMD_EVENT_EX_LS:
+ switch (event_code) {
+ case 0x023:
+ case 0x043:
+ case 0x045:
+ case 0x046:
+ case 0x054:
+ case 0x055:
+ return &amd_f15_PMC20;
+ case 0x02D:
+ return &amd_f15_PMC3;
+ case 0x02E:
+ return &amd_f15_PMC30;
+ default:
+ return &amd_f15_PMC50;
+ }
+ case AMD_EVENT_CU:
+ case AMD_EVENT_IC_DE:
+ case AMD_EVENT_DE:
+ switch (event_code) {
+ case 0x08F:
+ case 0x187:
+ case 0x188:
+ return &amd_f15_PMC0;
+ case 0x0DB ... 0x0DF:
+ case 0x1D6:
+ case 0x1D8:
+ return &amd_f15_PMC50;
+ default:
+ return &amd_f15_PMC20;
+ }
+ case AMD_EVENT_NB:
+ /* not yet implemented */
+ return &emptyconstraint;
+ default:
+ return &emptyconstraint;
+ }
+}
+
+static __initconst const struct x86_pmu amd_pmu_f15h = {
+ .name = "AMD Family 15h",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = x86_pmu_disable_all,
+ .enable_all = x86_pmu_enable_all,
+ .enable = x86_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+ .hw_config = amd_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_F15H_PERF_CTL,
+ .perfctr = MSR_F15H_PERF_CTR,
+ .event_map = amd_pmu_event_map,
+ .max_events = ARRAY_SIZE(amd_perfmon_event_map),
+ .num_counters = 6,
+ .cntval_bits = 48,
+ .cntval_mask = (1ULL << 48) - 1,
+ .apic = 1,
+ /* use highest bit to detect overflow */
+ .max_period = (1ULL << 47) - 1,
+ .get_event_constraints = amd_get_event_constraints_f15h,
+ /* nortbridge counters not yet implemented: */
+#if 0
+ .put_event_constraints = amd_put_event_constraints,
+
+ .cpu_prepare = amd_pmu_cpu_prepare,
+ .cpu_starting = amd_pmu_cpu_starting,
+ .cpu_dead = amd_pmu_cpu_dead,
+#endif
+};
+
static __init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;

- x86_pmu = amd_pmu;
+ /*
+ * If core performance counter extensions exists, it must be
+ * family 15h, otherwise fail. See x86_pmu_addr_offset().
+ */
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ if (!cpu_has_perfctr_core)
+ return -ENODEV;
+ x86_pmu = amd_pmu_f15h;
+ break;
+ default:
+ if (cpu_has_perfctr_core)
+ return -ENODEV;
+ x86_pmu = amd_pmu;
+ break;
+ }

/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
--
1.7.3.4


--
Advanced Micro Devices, Inc.
Operating System Research Center

Subject: [tip:perf/core] perf, x86: Use helper function in x86_pmu_enable_all()

Commit-ID: d45dd923fcc620c948bd1eda16cc61426ac31646
Gitweb: http://git.kernel.org/tip/d45dd923fcc620c948bd1eda16cc61426ac31646
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 2 Feb 2011 17:40:56 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 16 Feb 2011 13:30:49 +0100

perf, x86: Use helper function in x86_pmu_enable_all()

Use helper function in x86_pmu_enable_all() to minimize access to
x86_pmu.eventsel in the fast path. The counter's msr address is now
calculated using struct hw_perf_event. Later we add code that
calculates the msr addresses with a table lookup which shouldn't be
done in the fast path.

Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 19 ++++++++-----------
1 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4d98789..70d6d8f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -642,21 +642,24 @@ static void x86_pmu_disable(struct pmu *pmu)
x86_pmu.disable_all();
}

+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+ u64 enable_mask)
+{
+ wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
+}
+
static void x86_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct perf_event *event = cpuc->events[idx];
- u64 val;
+ struct hw_perf_event *hwc = &cpuc->events[idx]->hw;

if (!test_bit(idx, cpuc->active_mask))
continue;

- val = event->hw.config;
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu.eventsel + idx, val);
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
}

@@ -915,12 +918,6 @@ static void x86_pmu_enable(struct pmu *pmu)
x86_pmu.enable_all(added);
}

-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
- u64 enable_mask)
-{
- wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
-}
-
static inline void x86_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;

Subject: [tip:perf/core] perf, x86: Calculate perfctr msr addresses in helper functions

Commit-ID: 41bf498949a263fa0b2d32524b89d696ac330e94
Gitweb: http://git.kernel.org/tip/41bf498949a263fa0b2d32524b89d696ac330e94
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 2 Feb 2011 17:40:57 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 16 Feb 2011 13:30:50 +0100

perf, x86: Calculate perfctr msr addresses in helper functions

This patch adds helper functions to calculate perfctr msr addresses.
We need this to later add support for AMD family 15h cpus. For this we
have to change the algorithms to generate the perfctr's msr addresses.

Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 36 ++++++++++++++++++++-----------
arch/x86/kernel/cpu/perf_event_intel.c | 4 +-
2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 70d6d8f..ee40c1ad 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -321,6 +321,16 @@ again:
return new_raw_count;
}

+static inline unsigned int x86_pmu_config_addr(int index)
+{
+ return x86_pmu.eventsel + index;
+}
+
+static inline unsigned int x86_pmu_event_addr(int index)
+{
+ return x86_pmu.perfctr + index;
+}
+
static atomic_t active_events;
static DEFINE_MUTEX(pmc_reserve_mutex);

@@ -331,12 +341,12 @@ static bool reserve_pmc_hardware(void)
int i;

for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
+ if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
goto perfctr_fail;
}

for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
+ if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
goto eventsel_fail;
}

@@ -344,13 +354,13 @@ static bool reserve_pmc_hardware(void)

eventsel_fail:
for (i--; i >= 0; i--)
- release_evntsel_nmi(x86_pmu.eventsel + i);
+ release_evntsel_nmi(x86_pmu_config_addr(i));

i = x86_pmu.num_counters;

perfctr_fail:
for (i--; i >= 0; i--)
- release_perfctr_nmi(x86_pmu.perfctr + i);
+ release_perfctr_nmi(x86_pmu_event_addr(i));

return false;
}
@@ -360,8 +370,8 @@ static void release_pmc_hardware(void)
int i;

for (i = 0; i < x86_pmu.num_counters; i++) {
- release_perfctr_nmi(x86_pmu.perfctr + i);
- release_evntsel_nmi(x86_pmu.eventsel + i);
+ release_perfctr_nmi(x86_pmu_event_addr(i));
+ release_evntsel_nmi(x86_pmu_config_addr(i));
}
}

@@ -382,7 +392,7 @@ static bool check_hw_exists(void)
* complain and bail.
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
- reg = x86_pmu.eventsel + i;
+ reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
@@ -407,8 +417,8 @@ static bool check_hw_exists(void)
* that don't trap on the MSR access and always return 0s.
*/
val = 0xabcdUL;
- ret = checking_wrmsrl(x86_pmu.perfctr, val);
- ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+ ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
+ ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
if (ret || val != val_new)
goto msr_fail;

@@ -617,11 +627,11 @@ static void x86_pmu_disable_all(void)

if (!test_bit(idx, cpuc->active_mask))
continue;
- rdmsrl(x86_pmu.eventsel + idx, val);
+ rdmsrl(x86_pmu_config_addr(idx), val);
if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu.eventsel + idx, val);
+ wrmsrl(x86_pmu_config_addr(idx), val);
}
}

@@ -1110,8 +1120,8 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
- rdmsrl(x86_pmu.perfctr + idx, pmc_count);
+ rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
+ rdmsrl(x86_pmu_event_addr(idx), pmc_count);

prev_left = per_cpu(pmc_prev_left[idx], cpu);

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 008835c..084b383 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -691,8 +691,8 @@ static void intel_pmu_reset(void)
printk("clearing PMU state on CPU#%d\n", smp_processor_id());

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
- checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
+ checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
+ checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);

Subject: [tip:perf/core] perf, x86: Add new AMD family 15h msrs to perfctr reservation code

Commit-ID: 69d8e1e8ac0a7d829f1c0fd5bd07eb3022d9a1a0
Gitweb: http://git.kernel.org/tip/69d8e1e8ac0a7d829f1c0fd5bd07eb3022d9a1a0
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 2 Feb 2011 17:40:58 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 16 Feb 2011 13:30:50 +0100

perf, x86: Add new AMD family 15h msrs to perfctr reservation code

This patch allows the reservation of perfctrs with new msr addresses
introduced for AMD cpu family 15h (0xc0010200/0xc0010201, etc).

Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perfctr-watchdog.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d5a2366..966512b 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
/* returns the bit offset of the performance counter register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
+ if (msr >= MSR_F15H_PERF_CTR)
+ return (msr - MSR_F15H_PERF_CTR) >> 1;
return msr - MSR_K7_PERFCTR0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
@@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
/* returns the bit offset of the event selection register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
+ if (msr >= MSR_F15H_PERF_CTL)
+ return (msr - MSR_F15H_PERF_CTL) >> 1;
return msr - MSR_K7_EVNTSEL0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))

Subject: [tip:perf/core] perf, x86: Store perfctr msr addresses in config_base/event_base

Commit-ID: 73d6e52206a20354738418625cedc244cbfd5023
Gitweb: http://git.kernel.org/tip/73d6e52206a20354738418625cedc244cbfd5023
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 2 Feb 2011 17:40:59 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 16 Feb 2011 13:30:52 +0100

perf, x86: Store perfctr msr addresses in config_base/event_base

Instead of storing the base addresses we can store the counter's msr
addresses directly in config_base/event_base of struct hw_perf_event.
This avoids recalculating the address with each msr access. The
addresses are configured one time. We also need this change to later
modify the address calculation.

Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 21 ++++++++-------------
arch/x86/kernel/cpu/perf_event_p4.c | 8 ++++----
arch/x86/kernel/cpu/perf_event_p6.c | 4 ++--
3 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ee40c1ad..3161943 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -298,7 +298,7 @@ x86_perf_event_update(struct perf_event *event)
*/
again:
prev_raw_count = local64_read(&hwc->prev_count);
- rdmsrl(hwc->event_base + idx, new_raw_count);
+ rdmsrl(hwc->event_base, new_raw_count);

if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -655,7 +655,7 @@ static void x86_pmu_disable(struct pmu *pmu)
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
- wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
+ wrmsrl(hwc->config_base, hwc->config | enable_mask);
}

static void x86_pmu_enable_all(int added)
@@ -834,15 +834,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->event_base = 0;
} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- /*
- * We set it so that event_base + idx in wrmsr/rdmsr maps to
- * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
- */
- hwc->event_base =
- MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+ hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0;
} else {
- hwc->config_base = x86_pmu.eventsel;
- hwc->event_base = x86_pmu.perfctr;
+ hwc->config_base = x86_pmu_config_addr(hwc->idx);
+ hwc->event_base = x86_pmu_event_addr(hwc->idx);
}
}

@@ -932,7 +927,7 @@ static inline void x86_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;

- wrmsrl(hwc->config_base + hwc->idx, hwc->config);
+ wrmsrl(hwc->config_base, hwc->config);
}

static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -985,7 +980,7 @@ x86_perf_event_set_period(struct perf_event *event)
*/
local64_set(&hwc->prev_count, (u64)-left);

- wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);

/*
* Due to erratum on certan cpu we need
@@ -993,7 +988,7 @@ x86_perf_event_set_period(struct perf_event *event)
* is updated properly
*/
if (x86_pmu.perfctr_second_write) {
- wrmsrl(hwc->event_base + idx,
+ wrmsrl(hwc->event_base,
(u64)(-left) & x86_pmu.cntval_mask);
}

diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ff751a9..3769ac8 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -764,9 +764,9 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
u64 v;

/* an official way for overflow indication */
- rdmsrl(hwc->config_base + hwc->idx, v);
+ rdmsrl(hwc->config_base, v);
if (v & P4_CCCR_OVF) {
- wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
+ wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
return 1;
}

@@ -815,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
* state we need to clear P4_CCCR_OVF, otherwise interrupt get
* asserted again and again
*/
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ (void)checking_wrmsrl(hwc->config_base,
(u64)(p4_config_unpack_cccr(hwc->config)) &
~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
}
@@ -885,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
p4_pmu_enable_pebs(hwc->config);

(void)checking_wrmsrl(escr_addr, escr_conf);
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ (void)checking_wrmsrl(hwc->config_base,
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
}

diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 34ba07b..20c097e 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;

- (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+ (void)checking_wrmsrl(hwc->config_base, val);
}

static void p6_pmu_enable_event(struct perf_event *event)
@@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;

- (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+ (void)checking_wrmsrl(hwc->config_base, val);
}

static __initconst const struct x86_pmu p6_pmu = {

Subject: [tip:perf/core] perf, x86: Add support for AMD family 15h core counters

Commit-ID: 4979d2729af22f6ce8faa325fc60a85a2c2daa02
Gitweb: http://git.kernel.org/tip/4979d2729af22f6ce8faa325fc60a85a2c2daa02
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 2 Feb 2011 17:36:12 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 16 Feb 2011 13:30:53 +0100

perf, x86: Add support for AMD family 15h core counters

This patch adds support for AMD family 15h core counters. There are
major changes compared to family 10h. First, there is a new perfctr
msr range for up to 6 counters. Northbridge counters are separate
now. This patch only adds support for core counters. Second, certain
events may only be scheduled on certain counters. For this we need to
extend the event scheduling and constraints.

We use cpu feature flags to calculate family 15h msr address offsets.
This way we later can implement a faster ALTERNATIVE() version for
this.

Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 2 +
arch/x86/kernel/cpu/perf_event.c | 12 ++-
arch/x86/kernel/cpu/perf_event_amd.c | 175 +++++++++++++++++++++++++++++++++-
3 files changed, 186 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 220e2ea..91f3e087 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -160,6 +160,7 @@
#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */

/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
+#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)

#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3161943..10bfe24 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -321,14 +321,22 @@ again:
return new_raw_count;
}

+/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
+static inline int x86_pmu_addr_offset(int index)
+{
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ return index << 1;
+ return index;
+}
+
static inline unsigned int x86_pmu_config_addr(int index)
{
- return x86_pmu.eventsel + index;
+ return x86_pmu.eventsel + x86_pmu_addr_offset(index);
}

static inline unsigned int x86_pmu_event_addr(int index)
{
- return x86_pmu.perfctr + index;
+ return x86_pmu.perfctr + x86_pmu_addr_offset(index);
}

static atomic_t active_events;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67e2202..461f62b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event)
/*
* AMD64 events are detected based on their event codes.
*/
+static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
+{
+ return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+}
+
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
{
return (hwc->config & 0xe0) == 0xe0;
@@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = {
.cpu_dead = amd_pmu_cpu_dead,
};

+/* AMD Family 15h */
+
+#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
+
+#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
+#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
+#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
+#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
+#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
+#define AMD_EVENT_EX_LS 0x000000C0ULL
+#define AMD_EVENT_DE 0x000000D0ULL
+#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
+
+/*
+ * AMD family 15h event code/PMC mappings:
+ *
+ * type = event_code & 0x0F0:
+ *
+ * 0x000 FP PERF_CTL[5:3]
+ * 0x010 FP PERF_CTL[5:3]
+ * 0x020 LS PERF_CTL[5:0]
+ * 0x030 LS PERF_CTL[5:0]
+ * 0x040 DC PERF_CTL[5:0]
+ * 0x050 DC PERF_CTL[5:0]
+ * 0x060 CU PERF_CTL[2:0]
+ * 0x070 CU PERF_CTL[2:0]
+ * 0x080 IC/DE PERF_CTL[2:0]
+ * 0x090 IC/DE PERF_CTL[2:0]
+ * 0x0A0 ---
+ * 0x0B0 ---
+ * 0x0C0 EX/LS PERF_CTL[5:0]
+ * 0x0D0 DE PERF_CTL[2:0]
+ * 0x0E0 NB NB_PERF_CTL[3:0]
+ * 0x0F0 NB NB_PERF_CTL[3:0]
+ *
+ * Exceptions:
+ *
+ * 0x003 FP PERF_CTL[3]
+ * 0x00B FP PERF_CTL[3]
+ * 0x00D FP PERF_CTL[3]
+ * 0x023 DE PERF_CTL[2:0]
+ * 0x02D LS PERF_CTL[3]
+ * 0x02E LS PERF_CTL[3,0]
+ * 0x043 CU PERF_CTL[2:0]
+ * 0x045 CU PERF_CTL[2:0]
+ * 0x046 CU PERF_CTL[2:0]
+ * 0x054 CU PERF_CTL[2:0]
+ * 0x055 CU PERF_CTL[2:0]
+ * 0x08F IC PERF_CTL[0]
+ * 0x187 DE PERF_CTL[0]
+ * 0x188 DE PERF_CTL[0]
+ * 0x0DB EX PERF_CTL[5:0]
+ * 0x0DC LS PERF_CTL[5:0]
+ * 0x0DD LS PERF_CTL[5:0]
+ * 0x0DE LS PERF_CTL[5:0]
+ * 0x0DF LS PERF_CTL[5:0]
+ * 0x1D6 EX PERF_CTL[5:0]
+ * 0x1D8 EX PERF_CTL[5:0]
+ */
+
+static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
+static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
+static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
+static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+
+static struct event_constraint *
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ unsigned int event_code = amd_get_event_code(&event->hw);
+
+ switch (event_code & AMD_EVENT_TYPE_MASK) {
+ case AMD_EVENT_FP:
+ switch (event_code) {
+ case 0x003:
+ case 0x00B:
+ case 0x00D:
+ return &amd_f15_PMC3;
+ default:
+ return &amd_f15_PMC53;
+ }
+ case AMD_EVENT_LS:
+ case AMD_EVENT_DC:
+ case AMD_EVENT_EX_LS:
+ switch (event_code) {
+ case 0x023:
+ case 0x043:
+ case 0x045:
+ case 0x046:
+ case 0x054:
+ case 0x055:
+ return &amd_f15_PMC20;
+ case 0x02D:
+ return &amd_f15_PMC3;
+ case 0x02E:
+ return &amd_f15_PMC30;
+ default:
+ return &amd_f15_PMC50;
+ }
+ case AMD_EVENT_CU:
+ case AMD_EVENT_IC_DE:
+ case AMD_EVENT_DE:
+ switch (event_code) {
+ case 0x08F:
+ case 0x187:
+ case 0x188:
+ return &amd_f15_PMC0;
+ case 0x0DB ... 0x0DF:
+ case 0x1D6:
+ case 0x1D8:
+ return &amd_f15_PMC50;
+ default:
+ return &amd_f15_PMC20;
+ }
+ case AMD_EVENT_NB:
+ /* not yet implemented */
+ return &emptyconstraint;
+ default:
+ return &emptyconstraint;
+ }
+}
+
+static __initconst const struct x86_pmu amd_pmu_f15h = {
+ .name = "AMD Family 15h",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = x86_pmu_disable_all,
+ .enable_all = x86_pmu_enable_all,
+ .enable = x86_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+ .hw_config = amd_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_F15H_PERF_CTL,
+ .perfctr = MSR_F15H_PERF_CTR,
+ .event_map = amd_pmu_event_map,
+ .max_events = ARRAY_SIZE(amd_perfmon_event_map),
+ .num_counters = 6,
+ .cntval_bits = 48,
+ .cntval_mask = (1ULL << 48) - 1,
+ .apic = 1,
+ /* use highest bit to detect overflow */
+ .max_period = (1ULL << 47) - 1,
+ .get_event_constraints = amd_get_event_constraints_f15h,
+ /* nortbridge counters not yet implemented: */
+#if 0
+ .put_event_constraints = amd_put_event_constraints,
+
+ .cpu_prepare = amd_pmu_cpu_prepare,
+ .cpu_starting = amd_pmu_cpu_starting,
+ .cpu_dead = amd_pmu_cpu_dead,
+#endif
+};
+
static __init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;

- x86_pmu = amd_pmu;
+ /*
+ * If core performance counter extensions exists, it must be
+ * family 15h, otherwise fail. See x86_pmu_addr_offset().
+ */
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ if (!cpu_has_perfctr_core)
+ return -ENODEV;
+ x86_pmu = amd_pmu_f15h;
+ break;
+ default:
+ if (cpu_has_perfctr_core)
+ return -ENODEV;
+ x86_pmu = amd_pmu;
+ break;
+ }

/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,