Per Ingo and Peter Z's request:
https://lkml.org/lkml/2013/4/10/267
This patchset removes the NB counter support from perf_event_amd.c and
creates a new PMU instead to add support for both AMD NB and L2I
counters.
Jacob Shin (2):
perf, amd: remove NB counter support from perf_event_amd.c
perf, amd: support for AMD NB and L2I "uncore" counters.
arch/x86/include/asm/cpufeature.h | 2 +
arch/x86/include/uapi/asm/msr-index.h | 4 +
arch/x86/kernel/cpu/Makefile | 2 +-
arch/x86/kernel/cpu/perf_event_amd.c | 138 +--------
arch/x86/kernel/cpu/perf_event_amd_uncore.c | 442 +++++++++++++++++++++++++++
5 files changed, 454 insertions(+), 134 deletions(-)
create mode 100644 arch/x86/kernel/cpu/perf_event_amd_uncore.c
--
1.7.9.5
Add support for AMD Family 15h [and above] northbridge performance
counters. MSRs 0xc0010240 ~ 0xc0010247 are shared across all cores
that share a common northbridge.
Add support for AMD Family 16h L2 performance counters. MSRs
0xc0010230 ~ 0xc0010237 are shared across all cores that share a
common L2 cache.
We do not enable counter overflow interrupts. Sampling mode and
per-thread events are not supported.
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 2 +
arch/x86/include/uapi/asm/msr-index.h | 4 +
arch/x86/kernel/cpu/Makefile | 2 +-
arch/x86/kernel/cpu/perf_event_amd_uncore.c | 442 +++++++++++++++++++++++++++
4 files changed, 449 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/kernel/cpu/perf_event_amd_uncore.c
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 93fe929..ac10df7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,7 @@
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -311,6 +312,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
+#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bf7bb68..b575788 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -196,6 +196,10 @@
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL 0xc0010230
+#define MSR_F16H_L2I_PERF_CTR 0xc0010231
+
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0e067d..0074572 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
+obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
new file mode 100644
index 0000000..384f674
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include "perf_event.h"
+
+#define NUM_COUNTERS_NB 4
+#define NUM_COUNTERS_L2 4
+#define MAX_COUNTERS NUM_COUNTERS_NB
+
+#define RDPMC_BASE_NB 6
+#define RDPMC_BASE_L2 10
+
+#define COUNTER_SHIFT 16
+
+struct amd_uncore {
+ int id;
+ int refcnt;
+ int num_counters;
+ int rdpmc_base;
+ u32 msr_base;
+ struct perf_event *events[MAX_COUNTERS];
+ struct amd_uncore *free_when_cpu_online;
+};
+
+static struct amd_uncore * __percpu *amd_uncore_nb;
+static struct amd_uncore * __percpu *amd_uncore_l2;
+
+static struct pmu amd_nb_pmu;
+static struct pmu amd_l2_pmu;
+
+static bool is_nb_event(struct perf_event *event)
+{
+ return event->pmu->type == amd_nb_pmu.type;
+}
+
+static bool is_l2_event(struct perf_event *event)
+{
+ return event->pmu->type == amd_l2_pmu.type;
+}
+
+static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
+{
+ if (is_nb_event(event) && amd_uncore_nb)
+ return *per_cpu_ptr(amd_uncore_nb, event->cpu);
+ else if (is_l2_event(event) && amd_uncore_l2)
+ return *per_cpu_ptr(amd_uncore_l2, event->cpu);
+
+ return NULL;
+}
+
+static void amd_uncore_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev, new;
+ s64 delta;
+
+ /*
+ * since we do not enable counter overflow interrupts,
+ * we do not have to worry about prev_count changing on us
+ */
+
+ prev = local64_read(&hwc->prev_count);
+ rdpmcl(hwc->event_base_rdpmc, new);
+ local64_set(&hwc->prev_count, new);
+ delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
+ delta >>= COUNTER_SHIFT;
+ local64_add(delta, &event->count);
+}
+
+static void amd_uncore_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (flags & PERF_EF_RELOAD)
+ wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+
+ hwc->state = 0;
+ wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
+ perf_event_update_userpage(event);
+}
+
+static void amd_uncore_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ wrmsrl(hwc->config_base, hwc->config);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ amd_uncore_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int amd_uncore_add(struct perf_event *event, int flags)
+{
+ int i;
+ struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* are we already assigned? */
+ if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
+ goto out;
+
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (uncore->events[i] == event) {
+ hwc->idx = i;
+ goto out;
+ }
+ }
+
+ /* if not, take the first available counter */
+ hwc->idx = -1;
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
+ hwc->idx = i;
+ break;
+ }
+ }
+
+out:
+ if (hwc->idx == -1)
+ return -EBUSY;
+
+ hwc->config_base = uncore->msr_base + (2 * hwc->idx);
+ hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
+ hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ amd_uncore_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void amd_uncore_del(struct perf_event *event, int flags)
+{
+ int i;
+ struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ amd_uncore_stop(event, PERF_EF_UPDATE);
+
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (cmpxchg(&uncore->events[i], event, NULL) == event)
+ break;
+ }
+
+ hwc->idx = -1;
+}
+
+static int amd_uncore_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * NB and L2 counters (MSRs) are shared across all cores that share the
+ * same NB / L2 cache. Interrupts can be directed to a single target
+ * core, however, event counts generated by processes running on other
+ * cores cannot be masked out. So we do not support sampling and
+ * per-thread events.
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ /* NB and L2 counters do not have usr/os/guest/host bits */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ hwc->idx = -1;
+
+ /* and we do not enable counter overflow interrupts */
+ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
+
+ return 0;
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+
+static struct attribute *amd_uncore_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_uncore_format_group = {
+ .name = "format",
+ .attrs = amd_uncore_format_attr,
+};
+
+static const struct attribute_group *amd_uncore_attr_groups[] = {
+ &amd_uncore_format_group,
+ NULL,
+};
+
+static struct pmu amd_nb_pmu = {
+ .attr_groups = amd_uncore_attr_groups,
+ .name = "amd_nb",
+ .event_init = amd_uncore_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+};
+
+static struct pmu amd_l2_pmu = {
+ .attr_groups = amd_uncore_attr_groups,
+ .name = "amd_l2",
+ .event_init = amd_uncore_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+};
+
+static struct amd_uncore * __cpuinit amd_uncore_alloc(unsigned int cpu)
+{
+ return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
+ cpu_to_node(cpu));
+}
+
+static void __cpuinit amd_uncore_cpu_prepare(unsigned int cpu)
+{
+ struct amd_uncore *uncore;
+
+ if (amd_uncore_nb) {
+ uncore = amd_uncore_alloc(cpu);
+ uncore->num_counters = NUM_COUNTERS_NB;
+ uncore->rdpmc_base = RDPMC_BASE_NB;
+ uncore->msr_base = MSR_F15H_NB_PERF_CTL;
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+ }
+
+ if (amd_uncore_l2) {
+ uncore = amd_uncore_alloc(cpu);
+ uncore->num_counters = NUM_COUNTERS_L2;
+ uncore->rdpmc_base = RDPMC_BASE_L2;
+ uncore->msr_base = MSR_F16H_L2I_PERF_CTL;
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ }
+}
+
+static struct amd_uncore *
+__cpuinit amd_uncore_find_online_sibling(struct amd_uncore *this,
+ struct amd_uncore * __percpu *uncores)
+{
+ unsigned int cpu;
+ struct amd_uncore *that;
+
+ for_each_online_cpu(cpu) {
+ that = *per_cpu_ptr(uncores, cpu);
+
+ if (!that)
+ continue;
+
+ if (this == that)
+ continue;
+
+ if (this->id == that->id) {
+ that->free_when_cpu_online = this;
+ this = that;
+ break;
+ }
+ }
+
+ this->refcnt++;
+ return this;
+}
+
+static void __cpuinit amd_uncore_cpu_starting(unsigned int cpu)
+{
+ unsigned int eax, ebx, ecx, edx;
+ struct amd_uncore *uncore;
+
+ if (amd_uncore_nb) {
+ uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+ uncore->id = ecx & 0xff;
+
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+ }
+
+ if (amd_uncore_l2) {
+ unsigned int apicid = cpu_data(cpu).apicid;
+ unsigned int nshared;
+
+ uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
+ cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
+ nshared = ((eax >> 14) & 0xfff) + 1;
+ uncore->id = apicid - (apicid % nshared);
+
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ }
+}
+
+static void __cpuinit uncore_online(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+ if (uncore->free_when_cpu_online) {
+ kfree(uncore->free_when_cpu_online);
+ uncore->free_when_cpu_online = NULL;
+ }
+}
+
+static void __cpuinit amd_uncore_cpu_online(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_online(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_online(cpu, amd_uncore_l2);
+}
+
+static void __cpuinit uncore_dead(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+ if (!--uncore->refcnt)
+ kfree(uncore);
+ *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
+}
+
+static void __cpuinit amd_uncore_cpu_dead(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_dead(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_dead(cpu, amd_uncore_l2);
+}
+
+static int __cpuinit
+amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ amd_uncore_cpu_prepare(cpu);
+ break;
+
+ case CPU_STARTING:
+ amd_uncore_cpu_starting(cpu);
+ break;
+
+ case CPU_ONLINE:
+ amd_uncore_cpu_online(cpu);
+ break;
+
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ amd_uncore_cpu_dead(cpu);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block amd_uncore_cpu_notifier_block __cpuinitdata = {
+ .notifier_call = amd_uncore_cpu_notifier,
+};
+
+static void __init init_cpu_already_online(void *dummy)
+{
+ unsigned int cpu = smp_processor_id();
+
+ amd_uncore_cpu_prepare(cpu);
+ amd_uncore_cpu_starting(cpu);
+ amd_uncore_cpu_online(cpu);
+}
+
+static int __init amd_uncore_init(void)
+{
+ unsigned int cpu;
+ int ret = -ENODEV;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return -ENODEV;
+
+ if (!cpu_has_topoext)
+ return -ENODEV;
+
+ if (cpu_has_perfctr_nb) {
+ amd_uncore_nb = alloc_percpu(struct amd_uncore *);
+ perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
+
+ printk(KERN_INFO "perf: AMD NB counters detected\n");
+ ret = 0;
+ }
+
+ if (cpu_has_perfctr_l2) {
+ amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
+ perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
+
+ printk(KERN_INFO "perf: AMD L2I counters detected\n");
+ ret = 0;
+ }
+
+ if (ret)
+ return -ENODEV;
+
+ get_online_cpus();
+ /* init cpus already online before registering for hotplug notifier */
+ for_each_online_cpu(cpu)
+ smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
+
+ register_cpu_notifier(&amd_uncore_cpu_notifier_block);
+ put_online_cpus();
+
+ return 0;
+}
+device_initcall(amd_uncore_init);
--
1.7.9.5
Support for NB counters, MSRs 0xc0010240 ~ 0xc0010247, will be moved
to perf_event_amd_uncore.c in a follow up patch. AMD Family 10h NB
events (events 0xe0 ~ 0xff, on MSRs 0xc001000 ~ 0xc001007) will still
continue to be handled by perf_event_amd.c
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/kernel/cpu/perf_event_amd.c | 138 ++--------------------------------
1 file changed, 5 insertions(+), 133 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index dfdab42..7e28d94 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,14 +132,11 @@ static u64 amd_pmu_event_map(int hw_event)
return amd_perfmon_event_map[hw_event];
}
-static struct event_constraint *amd_nb_event_constraint;
-
/*
* Previously calculated offsets
*/
static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
-static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
/*
* Legacy CPUs:
@@ -147,14 +144,10 @@ static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
*
* CPUs with core performance counter extensions:
* 6 counters starting at 0xc0010200 each offset by 2
- *
- * CPUs with north bridge performance counter extensions:
- * 4 additional counters starting at 0xc0010240 each offset by 2
- * (indexed right above either one of the above core counters)
*/
static inline int amd_pmu_addr_offset(int index, bool eventsel)
{
- int offset, first, base;
+ int offset;
if (!index)
return index;
@@ -167,23 +160,7 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
if (offset)
return offset;
- if (amd_nb_event_constraint &&
- test_bit(index, amd_nb_event_constraint->idxmsk)) {
- /*
- * calculate the offset of NB counters with respect to
- * base eventsel or perfctr
- */
-
- first = find_first_bit(amd_nb_event_constraint->idxmsk,
- X86_PMC_IDX_MAX);
-
- if (eventsel)
- base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
- else
- base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
-
- offset = base + ((index - first) << 1);
- } else if (!cpu_has_perfctr_core)
+ if (!cpu_has_perfctr_core)
offset = index;
else
offset = index << 1;
@@ -196,36 +173,6 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
return offset;
}
-static inline int amd_pmu_rdpmc_index(int index)
-{
- int ret, first;
-
- if (!index)
- return index;
-
- ret = rdpmc_indexes[index];
-
- if (ret)
- return ret;
-
- if (amd_nb_event_constraint &&
- test_bit(index, amd_nb_event_constraint->idxmsk)) {
- /*
- * according to the mnual, ECX value of the NB counters is
- * the index of the NB counter (0, 1, 2 or 3) plus 6
- */
-
- first = find_first_bit(amd_nb_event_constraint->idxmsk,
- X86_PMC_IDX_MAX);
- ret = index - first + 6;
- } else
- ret = index;
-
- rdpmc_indexes[index] = ret;
-
- return ret;
-}
-
static int amd_core_hw_config(struct perf_event *event)
{
if (event->attr.exclude_host && event->attr.exclude_guest)
@@ -245,34 +192,6 @@ static int amd_core_hw_config(struct perf_event *event)
}
/*
- * NB counters do not support the following event select bits:
- * Host/Guest only
- * Counter mask
- * Invert counter mask
- * Edge detect
- * OS/User mode
- */
-static int amd_nb_hw_config(struct perf_event *event)
-{
- /* for NB, we only allow system wide counting mode */
- if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
- return -EINVAL;
-
- if (event->attr.exclude_user || event->attr.exclude_kernel ||
- event->attr.exclude_host || event->attr.exclude_guest)
- return -EINVAL;
-
- event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
- ARCH_PERFMON_EVENTSEL_OS);
-
- if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
- ARCH_PERFMON_EVENTSEL_INT))
- return -EINVAL;
-
- return 0;
-}
-
-/*
* AMD64 events are detected based on their event codes.
*/
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
@@ -285,11 +204,6 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}
-static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
-{
- return amd_nb_event_constraint && amd_is_nb_event(hwc);
-}
-
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{
struct amd_nb *nb = cpuc->amd_nb;
@@ -315,9 +229,6 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (event->attr.type == PERF_TYPE_RAW)
event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
- if (amd_is_perfctr_nb_event(&event->hw))
- return amd_nb_hw_config(event);
-
return amd_core_hw_config(event);
}
@@ -341,19 +252,6 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
}
}
-static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
-{
- int core_id = cpu_data(smp_processor_id()).cpu_core_id;
-
- /* deliver interrupts only to this core */
- if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
- hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
- hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
- hwc->config |= (u64)(core_id) <<
- AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
- }
-}
-
/*
* AMD64 NorthBridge events need special treatment because
* counter access needs to be synchronized across all cores
@@ -441,9 +339,6 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
if (new == -1)
return &emptyconstraint;
- if (amd_is_perfctr_nb_event(hwc))
- amd_nb_interrupt_hw_config(hwc);
-
return &nb->event_constraints[new];
}
@@ -543,8 +438,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
return &unconstrained;
- return __amd_get_nb_event_constraints(cpuc, event,
- amd_nb_event_constraint);
+ return __amd_get_nb_event_constraints(cpuc, event, NULL);
}
static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -643,9 +537,6 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
-static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
-static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
-
static struct event_constraint *
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
{
@@ -711,8 +602,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
return &amd_f15_PMC20;
}
case AMD_EVENT_NB:
- return __amd_get_nb_event_constraints(cpuc, event,
- amd_nb_event_constraint);
+ /* moved to perf_event_amd_uncore.c */
+ return &emptyconstraint;
default:
return &emptyconstraint;
}
@@ -738,7 +629,6 @@ static __initconst const struct x86_pmu amd_pmu = {
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
.addr_offset = amd_pmu_addr_offset,
- .rdpmc_index = amd_pmu_rdpmc_index,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
@@ -790,23 +680,6 @@ static int setup_perfctr_core(void)
return 0;
}
-static int setup_perfctr_nb(void)
-{
- if (!cpu_has_perfctr_nb)
- return -ENODEV;
-
- x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
-
- if (cpu_has_perfctr_core)
- amd_nb_event_constraint = &amd_NBPMC96;
- else
- amd_nb_event_constraint = &amd_NBPMC74;
-
- printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
-
- return 0;
-}
-
__init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
@@ -817,7 +690,6 @@ __init int amd_pmu_init(void)
setup_event_constraints();
setup_perfctr_core();
- setup_perfctr_nb();
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
--
1.7.9.5
On Mon, Apr 15, 2013 at 12:21:21PM -0500, Jacob Shin wrote:
> Per Ingo and Peter Z's request:
> https://lkml.org/lkml/2013/4/10/267
>
> This patchset removes the NB counter support from perf_event_amd.c and
> creates a new PMU instead to add support for both AMD NB and L2I
> counters.
Hi Peter Z., could you please take a look over this when you get the
chance? Each new set of counters are now their own struct pmu as you
requested.
Thank you,
-Jacob
>
> Jacob Shin (2):
> perf, amd: remove NB counter support from perf_event_amd.c
> perf, amd: support for AMD NB and L2I "uncore" counters.
>
> arch/x86/include/asm/cpufeature.h | 2 +
> arch/x86/include/uapi/asm/msr-index.h | 4 +
> arch/x86/kernel/cpu/Makefile | 2 +-
> arch/x86/kernel/cpu/perf_event_amd.c | 138 +--------
> arch/x86/kernel/cpu/perf_event_amd_uncore.c | 442 +++++++++++++++++++++++++++
> 5 files changed, 454 insertions(+), 134 deletions(-)
> create mode 100644 arch/x86/kernel/cpu/perf_event_amd_uncore.c
>
> --
> 1.7.9.5
>
On Mon, 2013-04-15 at 12:21 -0500, Jacob Shin wrote:
> Add support for AMD Family 15h [and above] northbridge performance
> counters. MSRs 0xc0010240 ~ 0xc0010247 are shared across all cores
> that share a common northbridge.
>
> Add support for AMD Family 16h L2 performance counters. MSRs
> 0xc0010230 ~ 0xc0010237 are shared across all cores that share a
> common L2 cache.
>
> We do not enable counter overflow interrupts. Sampling mode and
> per-thread events are not supported.
Nice!
There's one crucial thing missing though.. The Intel uncore driver
explicitly maps all events of cpus that are of the same uncore to a
single cpu and migrates the events to another cpu (if any is available)
when that cpu goes down.
The advantage of keeping all the events on the same cpu is that we
don't need to consider shared resources between cpus of an uncore
group.
That way things like event rotation on overcommit also trivially works
right.
I appreciate the intel uncore driver might be a tad hard to read -- its
somewhat unwieldy. But it basically sets event->cpu on
pmu::event_init() to whatever cpu is selected to represent the group,
and then calls perf_pmu_migrate_context() from a hotplug notifier.
On Thu, Apr 18, 2013 at 01:28:54PM +0200, Peter Zijlstra wrote:
> On Mon, 2013-04-15 at 12:21 -0500, Jacob Shin wrote:
> > Add support for AMD Family 15h [and above] northbridge performance
> > counters. MSRs 0xc0010240 ~ 0xc0010247 are shared across all cores
> > that share a common northbridge.
> >
> > Add support for AMD Family 16h L2 performance counters. MSRs
> > 0xc0010230 ~ 0xc0010237 are shared across all cores that share a
> > common L2 cache.
> >
> > We do not enable counter overflow interrupts. Sampling mode and
> > per-thread events are not supported.
>
>
> Nice!
>
> There's one crucial thing missing though.. The Intel uncore driver
> explicitly maps all events of cpus that are of the same uncore to a
> single cpu and migrates the events to another cpu (if any is available)
> when that cpu goes down.
>
> The advantage of keeping all the events on the same cpu is that we
> don't need to consider shared resources between cpus of an uncore
> group.
>
> That way things like event rotation on overcommit also trivially works
> right.
>
> I appreciate the intel uncore driver might be a tad hard to read -- its
> somewhat unwieldy. But it basically sets event->cpu on
> pmu::event_init() to whatever cpu is selected to represent the group,
> and then calls perf_pmu_migrate_context() from a hotplug notifier.
Okay, here is V2 which does that. Thanks again in advance for taking
the time to look it over.
>From d68225744196d8b0d60a1bc33ae9abdbffbd9bc9 Mon Sep 17 00:00:00 2001
From: Jacob Shin <[email protected]>
Date: Sun, 14 Apr 2013 04:12:42 -0500
Subject: [PATCH 2/2] perf, amd: support for AMD NB and L2I "uncore" counters.
Add support for AMD Family 15h [and above] northbridge performance
counters. MSRs 0xc0010240 ~ 0xc0010247 are shared across all cores
that share a common northbridge.
Add support for AMD Family 16h L2 performance counters. MSRs
0xc0010230 ~ 0xc0010237 are shared across all cores that share a
common L2 cache.
We do not enable counter overflow interrupts. Sampling mode and
per-thread events are not supported.
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 2 +
arch/x86/include/uapi/asm/msr-index.h | 4 +
arch/x86/kernel/cpu/Makefile | 2 +-
arch/x86/kernel/cpu/perf_event_amd_uncore.c | 499 +++++++++++++++++++++++++++
4 files changed, 506 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/kernel/cpu/perf_event_amd_uncore.c
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 93fe929..ac10df7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,7 @@
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -311,6 +312,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
+#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bf7bb68..b575788 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -196,6 +196,10 @@
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL 0xc0010230
+#define MSR_F16H_L2I_PERF_CTR 0xc0010231
+
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0e067d..0074572 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
+obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
new file mode 100644
index 0000000..acaeeb1
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+
+#include <asm/cpufeature.h>
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+
+#define NUM_COUNTERS_NB 4
+#define NUM_COUNTERS_L2 4
+#define MAX_COUNTERS NUM_COUNTERS_NB
+
+#define RDPMC_BASE_NB 6
+#define RDPMC_BASE_L2 10
+
+#define COUNTER_SHIFT 16
+
+struct amd_uncore {
+ int id;
+ int refcnt;
+ int cpu;
+ int num_counters;
+ int rdpmc_base;
+ u32 msr_base;
+ struct perf_event *events[MAX_COUNTERS];
+ struct amd_uncore *free_when_cpu_online;
+};
+
+static struct amd_uncore * __percpu *amd_uncore_nb;
+static struct amd_uncore * __percpu *amd_uncore_l2;
+
+static struct pmu amd_nb_pmu;
+static struct pmu amd_l2_pmu;
+
+static bool is_nb_event(struct perf_event *event)
+{
+ return event->pmu->type == amd_nb_pmu.type;
+}
+
+static bool is_l2_event(struct perf_event *event)
+{
+ return event->pmu->type == amd_l2_pmu.type;
+}
+
+static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
+{
+ if (is_nb_event(event) && amd_uncore_nb)
+ return *per_cpu_ptr(amd_uncore_nb, event->cpu);
+ else if (is_l2_event(event) && amd_uncore_l2)
+ return *per_cpu_ptr(amd_uncore_l2, event->cpu);
+
+ return NULL;
+}
+
+static void amd_uncore_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev, new;
+ s64 delta;
+
+ /*
+ * since we do not enable counter overflow interrupts,
+ * we do not have to worry about prev_count changing on us
+ */
+
+ prev = local64_read(&hwc->prev_count);
+ rdpmcl(hwc->event_base_rdpmc, new);
+ local64_set(&hwc->prev_count, new);
+ delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
+ delta >>= COUNTER_SHIFT;
+ local64_add(delta, &event->count);
+}
+
+static void amd_uncore_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (flags & PERF_EF_RELOAD)
+ wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+
+ hwc->state = 0;
+ wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
+ perf_event_update_userpage(event);
+}
+
+static void amd_uncore_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ wrmsrl(hwc->config_base, hwc->config);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ amd_uncore_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int amd_uncore_add(struct perf_event *event, int flags)
+{
+ int i;
+ struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* are we already assigned? */
+ if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
+ goto out;
+
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (uncore->events[i] == event) {
+ hwc->idx = i;
+ goto out;
+ }
+ }
+
+ /* if not, take the first available counter */
+ hwc->idx = -1;
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
+ hwc->idx = i;
+ break;
+ }
+ }
+
+out:
+ if (hwc->idx == -1)
+ return -EBUSY;
+
+ hwc->config_base = uncore->msr_base + (2 * hwc->idx);
+ hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
+ hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ amd_uncore_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void amd_uncore_del(struct perf_event *event, int flags)
+{
+ int i;
+ struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ amd_uncore_stop(event, PERF_EF_UPDATE);
+
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (cmpxchg(&uncore->events[i], event, NULL) == event)
+ break;
+ }
+
+ hwc->idx = -1;
+}
+
+static int amd_uncore_event_init(struct perf_event *event)
+{
+ struct amd_uncore *uncore;
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * NB and L2 counters (MSRs) are shared across all cores that share the
+ * same NB / L2 cache. Interrupts can be directed to a single target
+ * core, however, event counts generated by processes running on other
+ * cores cannot be masked out. So we do not support sampling and
+ * per-thread events.
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ /* NB and L2 counters do not have usr/os/guest/host bits */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ /* and we do not enable counter overflow interrupts */
+ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
+ hwc->idx = -1;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ uncore = event_to_amd_uncore(event);
+ if (!uncore)
+ return -ENODEV;
+
+ /*
+ * since request can come in to any of the shared cores, we will remap
+ * to a single common cpu.
+ */
+ event->cpu = uncore->cpu;
+
+ return 0;
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+
+static struct attribute *amd_uncore_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_uncore_format_group = {
+ .name = "format",
+ .attrs = amd_uncore_format_attr,
+};
+
+static const struct attribute_group *amd_uncore_attr_groups[] = {
+ &amd_uncore_format_group,
+ NULL,
+};
+
+static struct pmu amd_nb_pmu = {
+ .attr_groups = amd_uncore_attr_groups,
+ .name = "amd_nb",
+ .event_init = amd_uncore_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+};
+
+static struct pmu amd_l2_pmu = {
+ .attr_groups = amd_uncore_attr_groups,
+ .name = "amd_l2",
+ .event_init = amd_uncore_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+};
+
+static struct amd_uncore * __cpuinit amd_uncore_alloc(unsigned int cpu)
+{
+ return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
+ cpu_to_node(cpu));
+}
+
+static void __cpuinit amd_uncore_cpu_up_prepare(unsigned int cpu)
+{
+ struct amd_uncore *uncore;
+
+ if (amd_uncore_nb) {
+ uncore = amd_uncore_alloc(cpu);
+ uncore->cpu = cpu;
+ uncore->num_counters = NUM_COUNTERS_NB;
+ uncore->rdpmc_base = RDPMC_BASE_NB;
+ uncore->msr_base = MSR_F15H_NB_PERF_CTL;
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+ }
+
+ if (amd_uncore_l2) {
+ uncore = amd_uncore_alloc(cpu);
+ uncore->cpu = cpu;
+ uncore->num_counters = NUM_COUNTERS_L2;
+ uncore->rdpmc_base = RDPMC_BASE_L2;
+ uncore->msr_base = MSR_F16H_L2I_PERF_CTL;
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ }
+}
+
+static struct amd_uncore *
+__cpuinit amd_uncore_find_online_sibling(struct amd_uncore *this,
+ struct amd_uncore * __percpu *uncores)
+{
+ unsigned int cpu;
+ struct amd_uncore *that;
+
+ for_each_online_cpu(cpu) {
+ that = *per_cpu_ptr(uncores, cpu);
+
+ if (!that)
+ continue;
+
+ if (this == that)
+ continue;
+
+ if (this->id == that->id) {
+ that->free_when_cpu_online = this;
+ this = that;
+ break;
+ }
+ }
+
+ this->refcnt++;
+ return this;
+}
+
+static void __cpuinit amd_uncore_cpu_starting(unsigned int cpu)
+{
+ unsigned int eax, ebx, ecx, edx;
+ struct amd_uncore *uncore;
+
+ if (amd_uncore_nb) {
+ uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+ uncore->id = ecx & 0xff;
+
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+ }
+
+ if (amd_uncore_l2) {
+ unsigned int apicid = cpu_data(cpu).apicid;
+ unsigned int nshared;
+
+ uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
+ cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
+ nshared = ((eax >> 14) & 0xfff) + 1;
+ uncore->id = apicid - (apicid % nshared);
+
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ }
+}
+
+static void __cpuinit uncore_online(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+ if (uncore->free_when_cpu_online) {
+ kfree(uncore->free_when_cpu_online);
+ uncore->free_when_cpu_online = NULL;
+ }
+}
+
+static void __cpuinit amd_uncore_cpu_online(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_online(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_online(cpu, amd_uncore_l2);
+}
+
+static void __cpuinit uncore_down_prepare(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores,
+ struct pmu *pmu)
+{
+ unsigned int i;
+ struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
+
+ if (this->cpu != cpu)
+ return;
+
+ /* this cpu is going down, migrate to a shared sibling if possible */
+ for_each_online_cpu(i) {
+ struct amd_uncore *that = *per_cpu_ptr(uncores, i);
+
+ if (cpu == i)
+ continue;
+
+ if (this == that) {
+ that->cpu = i;
+ perf_pmu_migrate_context(pmu, cpu, i);
+ break;
+ }
+ }
+}
+
+static void __cpuinit amd_uncore_cpu_down_prepare(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_down_prepare(cpu, amd_uncore_nb, &amd_nb_pmu);
+
+ if (amd_uncore_l2)
+ uncore_down_prepare(cpu, amd_uncore_l2, &amd_l2_pmu);
+}
+
+static void __cpuinit uncore_dead(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+ if (!--uncore->refcnt)
+ kfree(uncore);
+ *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
+}
+
+static void __cpuinit amd_uncore_cpu_dead(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_dead(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_dead(cpu, amd_uncore_l2);
+}
+
+static int __cpuinit
+amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ amd_uncore_cpu_up_prepare(cpu);
+ break;
+
+ case CPU_STARTING:
+ amd_uncore_cpu_starting(cpu);
+ break;
+
+ case CPU_ONLINE:
+ amd_uncore_cpu_online(cpu);
+ break;
+
+ case CPU_DOWN_PREPARE:
+ amd_uncore_cpu_down_prepare(cpu);
+ break;
+
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ amd_uncore_cpu_dead(cpu);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block amd_uncore_cpu_notifier_block __cpuinitdata = {
+ .notifier_call = amd_uncore_cpu_notifier,
+ .priority = CPU_PRI_PERF + 1,
+};
+
+static void __init init_cpu_already_online(void *dummy)
+{
+ unsigned int cpu = smp_processor_id();
+
+ amd_uncore_cpu_up_prepare(cpu);
+ amd_uncore_cpu_starting(cpu);
+ amd_uncore_cpu_online(cpu);
+}
+
+static int __init amd_uncore_init(void)
+{
+ unsigned int cpu;
+ int ret = -ENODEV;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return -ENODEV;
+
+ if (!cpu_has_topoext)
+ return -ENODEV;
+
+ if (cpu_has_perfctr_nb) {
+ amd_uncore_nb = alloc_percpu(struct amd_uncore *);
+ perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
+
+ printk(KERN_INFO "perf: AMD NB counters detected\n");
+ ret = 0;
+ }
+
+ if (cpu_has_perfctr_l2) {
+ amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
+ perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
+
+ printk(KERN_INFO "perf: AMD L2I counters detected\n");
+ ret = 0;
+ }
+
+ if (ret)
+ return -ENODEV;
+
+ get_online_cpus();
+ /* init cpus already online before registering for hotplug notifier */
+ for_each_online_cpu(cpu)
+ smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
+
+ register_cpu_notifier(&amd_uncore_cpu_notifier_block);
+ put_online_cpus();
+
+ return 0;
+}
+device_initcall(amd_uncore_init);
--
1.7.9.5
On Thu, 2013-04-18 at 11:33 -0500, Jacob Shin wrote:
> Okay, here is V2 which does that. Thanks again in advance for taking
> the time to look it over.
Awesome, looks good on a first read.. I'm assuming you've tested things
and they actually work too? :-)
> From d68225744196d8b0d60a1bc33ae9abdbffbd9bc9 Mon Sep 17 00:00:00 2001
> From: Jacob Shin <[email protected]>
> Date: Sun, 14 Apr 2013 04:12:42 -0500
> Subject: [PATCH 2/2] perf, amd: support for AMD NB and L2I "uncore" counters.
>
> Add support for AMD Family 15h [and above] northbridge performance
> counters. MSRs 0xc0010240 ~ 0xc0010247 are shared across all cores
> that share a common northbridge.
>
> Add support for AMD Family 16h L2 performance counters. MSRs
> 0xc0010230 ~ 0xc0010237 are shared across all cores that share a
> common L2 cache.
>
> We do not enable counter overflow interrupts. Sampling mode and
> per-thread events are not supported.
>
> Signed-off-by: Jacob Shin <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
On Fri, Apr 19, 2013 at 02:27:16PM +0200, Peter Zijlstra wrote:
> On Thu, 2013-04-18 at 11:33 -0500, Jacob Shin wrote:
>
> > Okay, here is V2 which does that. Thanks again in advance for taking
> > the time to look it over.
>
> Awesome, looks good on a first read.. I'm assuming you've tested things
> and they actually work too? :-)
Yes of course, tested on:
1. Older families without these new counters to make sure there is no
regression.
2. Family 15h, only has NB counters
3. Family 16h, has both NB and L2 counters
>
> > From d68225744196d8b0d60a1bc33ae9abdbffbd9bc9 Mon Sep 17 00:00:00 2001
> > From: Jacob Shin <[email protected]>
> > Date: Sun, 14 Apr 2013 04:12:42 -0500
> > Subject: [PATCH 2/2] perf, amd: support for AMD NB and L2I "uncore" counters.
> >
> > Add support for AMD Family 15h [and above] northbridge performance
> > counters. MSRs 0xc0010240 ~ 0xc0010247 are shared across all cores
> > that share a common northbridge.
> >
> > Add support for AMD Family 16h L2 performance counters. MSRs
> > 0xc0010230 ~ 0xc0010237 are shared across all cores that share a
> > common L2 cache.
> >
> > We do not enable counter overflow interrupts. Sampling mode and
> > per-thread events are not supported.
> >
> > Signed-off-by: Jacob Shin <[email protected]>
>
> Acked-by: Peter Zijlstra <[email protected]>
Thank you again, for taking the time.
Ingo, I hope it's not too late to make it into perf/core for 3.10.
Thanks,
-Jacob
On Fri, 2013-04-19 at 09:41 -0500, Jacob Shin wrote:
>
> Thank you again, for taking the time.
Ah something I just remembered, could you do a patch like the below
one? That makes things like perf stat -A work as expected.
---
commit 314d9f63f385096580e9e2a06eaa0745d92fe4ac
Author: Yan, Zheng <[email protected]>
Date: Mon Sep 10 15:53:49 2012 +0800
perf/x86: Add cpumask for uncore pmu
This patch adds a cpumask file to the uncore pmu sysfs directory. The
cpumask file contains one active cpu for every socket.
Signed-off-by: "Yan, Zheng" <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Acked-by: Ingo Molnar <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: "Yan, Zheng" <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 0a55710..62ec3e6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2341,6 +2341,27 @@ int uncore_pmu_event_init(struct perf_event *event)
return ret;
}
+static ssize_t uncore_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
+
+ buf[n++] = '\n';
+ buf[n] = '\0';
+ return n;
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
+
+static struct attribute *uncore_pmu_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group uncore_pmu_attr_group = {
+ .attrs = uncore_pmu_attrs,
+};
+
static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
{
int ret;
@@ -2378,8 +2399,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
free_percpu(type->pmus[i].box);
kfree(type->pmus);
type->pmus = NULL;
- kfree(type->attr_groups[1]);
- type->attr_groups[1] = NULL;
+ kfree(type->events_group);
+ type->events_group = NULL;
}
static void __init uncore_types_exit(struct intel_uncore_type **types)
@@ -2431,9 +2452,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
for (j = 0; j < i; j++)
attrs[j] = &type->event_descs[j].attr.attr;
- type->attr_groups[1] = events_group;
+ type->events_group = events_group;
}
+ type->pmu_group = &uncore_pmu_attr_group;
type->pmus = pmus;
return 0;
fail:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 5b81c18..e68a455 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -369,10 +369,12 @@ struct intel_uncore_type {
struct intel_uncore_pmu *pmus;
struct intel_uncore_ops *ops;
struct uncore_event_desc *event_descs;
- const struct attribute_group *attr_groups[3];
+ const struct attribute_group *attr_groups[4];
};
-#define format_group attr_groups[0]
+#define pmu_group attr_groups[0]
+#define format_group attr_groups[1]
+#define events_group attr_groups[2]
struct intel_uncore_ops {
void (*init_box)(struct intel_uncore_box *);
On Fri, Apr 19, 2013 at 08:55:24PM +0200, Peter Zijlstra wrote:
> On Fri, 2013-04-19 at 09:41 -0500, Jacob Shin wrote:
> >
> > Thank you again, for taking the time.
>
> Ah something I just remembered, could you do a patch like the below
> one? That makes things like perf stat -A work as expected.
Ah, okay. Here is V3 that also creates "cpumask" sysfs file. Tested
that the "perf stat -a .." works as expected.
Thanks again,
V3:
- Added "cpumask" attribute to sysfs to get "perf stat -a .." working
correctly
V2:
- Added event migration if possible when CPU goes offline
>From fc8a5328069496187f7749a32acbe319dedbb548 Mon Sep 17 00:00:00 2001
From: Jacob Shin <[email protected]>
Date: Sun, 14 Apr 2013 04:12:42 -0500
Subject: [PATCH V3 2/2] perf, amd: support for AMD NB and L2I "uncore" counters.
Add support for AMD Family 15h [and above] northbridge performance
counters. MSRs 0xc0010240 ~ 0xc0010247 are shared across all cores
that share a common northbridge.
Add support for AMD Family 16h L2 performance counters. MSRs
0xc0010230 ~ 0xc0010237 are shared across all cores that share a
common L2 cache.
We do not enable counter overflow interrupts. Sampling mode and
per-thread events are not supported.
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 2 +
arch/x86/include/uapi/asm/msr-index.h | 4 +
arch/x86/kernel/cpu/Makefile | 2 +-
arch/x86/kernel/cpu/perf_event_amd_uncore.c | 546 +++++++++++++++++++++++++++
4 files changed, 553 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/kernel/cpu/perf_event_amd_uncore.c
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 93fe929..ac10df7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,7 @@
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -311,6 +312,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
+#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bf7bb68..b575788 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -196,6 +196,10 @@
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL 0xc0010230
+#define MSR_F16H_L2I_PERF_CTR 0xc0010231
+
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0e067d..0074572 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
+obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
new file mode 100644
index 0000000..6dc6227
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+
+#include <asm/cpufeature.h>
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+
+#define NUM_COUNTERS_NB 4
+#define NUM_COUNTERS_L2 4
+#define MAX_COUNTERS NUM_COUNTERS_NB
+
+#define RDPMC_BASE_NB 6
+#define RDPMC_BASE_L2 10
+
+#define COUNTER_SHIFT 16
+
+struct amd_uncore {
+ int id;
+ int refcnt;
+ int cpu;
+ int num_counters;
+ int rdpmc_base;
+ u32 msr_base;
+ cpumask_t *active_mask;
+ struct pmu *pmu;
+ struct perf_event *events[MAX_COUNTERS];
+ struct amd_uncore *free_when_cpu_online;
+};
+
+static struct amd_uncore * __percpu *amd_uncore_nb;
+static struct amd_uncore * __percpu *amd_uncore_l2;
+
+static struct pmu amd_nb_pmu;
+static struct pmu amd_l2_pmu;
+
+static cpumask_t amd_nb_active_mask;
+static cpumask_t amd_l2_active_mask;
+
+static bool is_nb_event(struct perf_event *event)
+{
+ return event->pmu->type == amd_nb_pmu.type;
+}
+
+static bool is_l2_event(struct perf_event *event)
+{
+ return event->pmu->type == amd_l2_pmu.type;
+}
+
+static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
+{
+ if (is_nb_event(event) && amd_uncore_nb)
+ return *per_cpu_ptr(amd_uncore_nb, event->cpu);
+ else if (is_l2_event(event) && amd_uncore_l2)
+ return *per_cpu_ptr(amd_uncore_l2, event->cpu);
+
+ return NULL;
+}
+
+static void amd_uncore_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev, new;
+ s64 delta;
+
+ /*
+ * since we do not enable counter overflow interrupts,
+ * we do not have to worry about prev_count changing on us
+ */
+
+ prev = local64_read(&hwc->prev_count);
+ rdpmcl(hwc->event_base_rdpmc, new);
+ local64_set(&hwc->prev_count, new);
+ delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
+ delta >>= COUNTER_SHIFT;
+ local64_add(delta, &event->count);
+}
+
+static void amd_uncore_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (flags & PERF_EF_RELOAD)
+ wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+
+ hwc->state = 0;
+ wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
+ perf_event_update_userpage(event);
+}
+
+static void amd_uncore_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ wrmsrl(hwc->config_base, hwc->config);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ amd_uncore_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int amd_uncore_add(struct perf_event *event, int flags)
+{
+ int i;
+ struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* are we already assigned? */
+ if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
+ goto out;
+
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (uncore->events[i] == event) {
+ hwc->idx = i;
+ goto out;
+ }
+ }
+
+ /* if not, take the first available counter */
+ hwc->idx = -1;
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
+ hwc->idx = i;
+ break;
+ }
+ }
+
+out:
+ if (hwc->idx == -1)
+ return -EBUSY;
+
+ hwc->config_base = uncore->msr_base + (2 * hwc->idx);
+ hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
+ hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ amd_uncore_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void amd_uncore_del(struct perf_event *event, int flags)
+{
+ int i;
+ struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ amd_uncore_stop(event, PERF_EF_UPDATE);
+
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (cmpxchg(&uncore->events[i], event, NULL) == event)
+ break;
+ }
+
+ hwc->idx = -1;
+}
+
+static int amd_uncore_event_init(struct perf_event *event)
+{
+ struct amd_uncore *uncore;
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * NB and L2 counters (MSRs) are shared across all cores that share the
+ * same NB / L2 cache. Interrupts can be directed to a single target
+ * core, however, event counts generated by processes running on other
+ * cores cannot be masked out. So we do not support sampling and
+ * per-thread events.
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ /* NB and L2 counters do not have usr/os/guest/host bits */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ /* and we do not enable counter overflow interrupts */
+ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
+ hwc->idx = -1;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ uncore = event_to_amd_uncore(event);
+ if (!uncore)
+ return -ENODEV;
+
+ /*
+ * since request can come in to any of the shared cores, we will remap
+ * to a single common cpu.
+ */
+ event->cpu = uncore->cpu;
+
+ return 0;
+}
+
+static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int n;
+ cpumask_t *active_mask;
+ struct pmu *pmu = dev_get_drvdata(dev);
+
+ if (pmu->type == amd_nb_pmu.type)
+ active_mask = &amd_nb_active_mask;
+ else if (pmu->type == amd_l2_pmu.type)
+ active_mask = &amd_l2_active_mask;
+ else
+ return 0;
+
+ n = cpulist_scnprintf(buf, PAGE_SIZE - 2, active_mask);
+ buf[n++] = '\n';
+ buf[n] = '\0';
+ return n;
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
+
+static struct attribute *amd_uncore_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_uncore_attr_group = {
+ .attrs = amd_uncore_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15");
+
+static struct attribute *amd_uncore_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_uncore_format_group = {
+ .name = "format",
+ .attrs = amd_uncore_format_attr,
+};
+
+static const struct attribute_group *amd_uncore_attr_groups[] = {
+ &amd_uncore_attr_group,
+ &amd_uncore_format_group,
+ NULL,
+};
+
+static struct pmu amd_nb_pmu = {
+ .attr_groups = amd_uncore_attr_groups,
+ .name = "amd_nb",
+ .event_init = amd_uncore_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+};
+
+static struct pmu amd_l2_pmu = {
+ .attr_groups = amd_uncore_attr_groups,
+ .name = "amd_l2",
+ .event_init = amd_uncore_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+};
+
+static struct amd_uncore * __cpuinit amd_uncore_alloc(unsigned int cpu)
+{
+ return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
+ cpu_to_node(cpu));
+}
+
+static void __cpuinit amd_uncore_cpu_up_prepare(unsigned int cpu)
+{
+ struct amd_uncore *uncore;
+
+ if (amd_uncore_nb) {
+ uncore = amd_uncore_alloc(cpu);
+ uncore->cpu = cpu;
+ uncore->num_counters = NUM_COUNTERS_NB;
+ uncore->rdpmc_base = RDPMC_BASE_NB;
+ uncore->msr_base = MSR_F15H_NB_PERF_CTL;
+ uncore->active_mask = &amd_nb_active_mask;
+ uncore->pmu = &amd_nb_pmu;
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+ }
+
+ if (amd_uncore_l2) {
+ uncore = amd_uncore_alloc(cpu);
+ uncore->cpu = cpu;
+ uncore->num_counters = NUM_COUNTERS_L2;
+ uncore->rdpmc_base = RDPMC_BASE_L2;
+ uncore->msr_base = MSR_F16H_L2I_PERF_CTL;
+ uncore->active_mask = &amd_l2_active_mask;
+ uncore->pmu = &amd_l2_pmu;
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ }
+}
+
+static struct amd_uncore *
+__cpuinit amd_uncore_find_online_sibling(struct amd_uncore *this,
+ struct amd_uncore * __percpu *uncores)
+{
+ unsigned int cpu;
+ struct amd_uncore *that;
+
+ for_each_online_cpu(cpu) {
+ that = *per_cpu_ptr(uncores, cpu);
+
+ if (!that)
+ continue;
+
+ if (this == that)
+ continue;
+
+ if (this->id == that->id) {
+ that->free_when_cpu_online = this;
+ this = that;
+ break;
+ }
+ }
+
+ this->refcnt++;
+ return this;
+}
+
+static void __cpuinit amd_uncore_cpu_starting(unsigned int cpu)
+{
+ unsigned int eax, ebx, ecx, edx;
+ struct amd_uncore *uncore;
+
+ if (amd_uncore_nb) {
+ uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+ uncore->id = ecx & 0xff;
+
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+ }
+
+ if (amd_uncore_l2) {
+ unsigned int apicid = cpu_data(cpu).apicid;
+ unsigned int nshared;
+
+ uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
+ cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
+ nshared = ((eax >> 14) & 0xfff) + 1;
+ uncore->id = apicid - (apicid % nshared);
+
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ }
+}
+
+static void __cpuinit uncore_online(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+ kfree(uncore->free_when_cpu_online);
+ uncore->free_when_cpu_online = NULL;
+
+ if (cpu == uncore->cpu)
+ cpumask_set_cpu(cpu, uncore->active_mask);
+}
+
+static void __cpuinit amd_uncore_cpu_online(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_online(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_online(cpu, amd_uncore_l2);
+}
+
+static void __cpuinit uncore_down_prepare(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ unsigned int i;
+ struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
+
+ if (this->cpu != cpu)
+ return;
+
+ /* this cpu is going down, migrate to a shared sibling if possible */
+ for_each_online_cpu(i) {
+ struct amd_uncore *that = *per_cpu_ptr(uncores, i);
+
+ if (cpu == i)
+ continue;
+
+ if (this == that) {
+ perf_pmu_migrate_context(this->pmu, cpu, i);
+ cpumask_clear_cpu(cpu, that->active_mask);
+ cpumask_set_cpu(i, that->active_mask);
+ that->cpu = i;
+ break;
+ }
+ }
+}
+
+static void __cpuinit amd_uncore_cpu_down_prepare(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_down_prepare(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_down_prepare(cpu, amd_uncore_l2);
+}
+
+static void __cpuinit uncore_dead(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+ if (cpu == uncore->cpu)
+ cpumask_clear_cpu(cpu, uncore->active_mask);
+
+ if (!--uncore->refcnt)
+ kfree(uncore);
+ *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
+}
+
+static void __cpuinit amd_uncore_cpu_dead(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_dead(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_dead(cpu, amd_uncore_l2);
+}
+
+static int __cpuinit
+amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ amd_uncore_cpu_up_prepare(cpu);
+ break;
+
+ case CPU_STARTING:
+ amd_uncore_cpu_starting(cpu);
+ break;
+
+ case CPU_ONLINE:
+ amd_uncore_cpu_online(cpu);
+ break;
+
+ case CPU_DOWN_PREPARE:
+ amd_uncore_cpu_down_prepare(cpu);
+ break;
+
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ amd_uncore_cpu_dead(cpu);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block amd_uncore_cpu_notifier_block __cpuinitdata = {
+ .notifier_call = amd_uncore_cpu_notifier,
+ .priority = CPU_PRI_PERF + 1,
+};
+
+static void __init init_cpu_already_online(void *dummy)
+{
+ unsigned int cpu = smp_processor_id();
+
+ amd_uncore_cpu_up_prepare(cpu);
+ amd_uncore_cpu_starting(cpu);
+ amd_uncore_cpu_online(cpu);
+}
+
+static int __init amd_uncore_init(void)
+{
+ unsigned int cpu;
+ int ret = -ENODEV;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return -ENODEV;
+
+ if (!cpu_has_topoext)
+ return -ENODEV;
+
+ if (cpu_has_perfctr_nb) {
+ amd_uncore_nb = alloc_percpu(struct amd_uncore *);
+ perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
+
+ printk(KERN_INFO "perf: AMD NB counters detected\n");
+ ret = 0;
+ }
+
+ if (cpu_has_perfctr_l2) {
+ amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
+ perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
+
+ printk(KERN_INFO "perf: AMD L2I counters detected\n");
+ ret = 0;
+ }
+
+ if (ret)
+ return -ENODEV;
+
+ get_online_cpus();
+ /* init cpus already online before registering for hotplug notifier */
+ for_each_online_cpu(cpu)
+ smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
+
+ register_cpu_notifier(&amd_uncore_cpu_notifier_block);
+ put_online_cpus();
+
+ return 0;
+}
+device_initcall(amd_uncore_init);
--
1.7.9.5
Commit-ID: c43ca5091a374c1f6778bd7e4a39a5a10735a917
Gitweb: http://git.kernel.org/tip/c43ca5091a374c1f6778bd7e4a39a5a10735a917
Author: Jacob Shin <[email protected]>
AuthorDate: Fri, 19 Apr 2013 16:34:28 -0500
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 21 Apr 2013 11:01:24 +0200
perf/x86/amd: Add support for AMD NB and L2I "uncore" counters
Add support for AMD Family 15h [and above] northbridge
performance counters. MSRs 0xc0010240 ~ 0xc0010247 are shared
across all cores that share a common northbridge.
Add support for AMD Family 16h L2 performance counters. MSRs
0xc0010230 ~ 0xc0010237 are shared across all cores that share a
common L2 cache.
We do not enable counter overflow interrupts. Sampling mode and
per-thread events are not supported.
Signed-off-by: Jacob Shin <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/20130419213428.GA8229@jshin-Toonie
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 2 +
arch/x86/include/uapi/asm/msr-index.h | 4 +
arch/x86/kernel/cpu/Makefile | 2 +-
arch/x86/kernel/cpu/perf_event_amd_uncore.c | 546 ++++++++++++++++++++++++++++
4 files changed, 553 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 93fe929..ac10df7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,7 @@
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -311,6 +312,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
+#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bf7bb68..b575788 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -196,6 +196,10 @@
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL 0xc0010230
+#define MSR_F16H_L2I_PERF_CTR 0xc0010231
+
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0e067d..0074572 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
+obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
new file mode 100644
index 0000000..6dc6227
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+
+#include <asm/cpufeature.h>
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+
+#define NUM_COUNTERS_NB 4
+#define NUM_COUNTERS_L2 4
+#define MAX_COUNTERS NUM_COUNTERS_NB
+
+#define RDPMC_BASE_NB 6
+#define RDPMC_BASE_L2 10
+
+#define COUNTER_SHIFT 16
+
+struct amd_uncore {
+ int id;
+ int refcnt;
+ int cpu;
+ int num_counters;
+ int rdpmc_base;
+ u32 msr_base;
+ cpumask_t *active_mask;
+ struct pmu *pmu;
+ struct perf_event *events[MAX_COUNTERS];
+ struct amd_uncore *free_when_cpu_online;
+};
+
+static struct amd_uncore * __percpu *amd_uncore_nb;
+static struct amd_uncore * __percpu *amd_uncore_l2;
+
+static struct pmu amd_nb_pmu;
+static struct pmu amd_l2_pmu;
+
+static cpumask_t amd_nb_active_mask;
+static cpumask_t amd_l2_active_mask;
+
+static bool is_nb_event(struct perf_event *event)
+{
+ return event->pmu->type == amd_nb_pmu.type;
+}
+
+static bool is_l2_event(struct perf_event *event)
+{
+ return event->pmu->type == amd_l2_pmu.type;
+}
+
+static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
+{
+ if (is_nb_event(event) && amd_uncore_nb)
+ return *per_cpu_ptr(amd_uncore_nb, event->cpu);
+ else if (is_l2_event(event) && amd_uncore_l2)
+ return *per_cpu_ptr(amd_uncore_l2, event->cpu);
+
+ return NULL;
+}
+
+static void amd_uncore_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev, new;
+ s64 delta;
+
+ /*
+ * since we do not enable counter overflow interrupts,
+ * we do not have to worry about prev_count changing on us
+ */
+
+ prev = local64_read(&hwc->prev_count);
+ rdpmcl(hwc->event_base_rdpmc, new);
+ local64_set(&hwc->prev_count, new);
+ delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
+ delta >>= COUNTER_SHIFT;
+ local64_add(delta, &event->count);
+}
+
+static void amd_uncore_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (flags & PERF_EF_RELOAD)
+ wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+
+ hwc->state = 0;
+ wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
+ perf_event_update_userpage(event);
+}
+
+static void amd_uncore_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ wrmsrl(hwc->config_base, hwc->config);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ amd_uncore_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int amd_uncore_add(struct perf_event *event, int flags)
+{
+ int i;
+ struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* are we already assigned? */
+ if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
+ goto out;
+
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (uncore->events[i] == event) {
+ hwc->idx = i;
+ goto out;
+ }
+ }
+
+ /* if not, take the first available counter */
+ hwc->idx = -1;
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
+ hwc->idx = i;
+ break;
+ }
+ }
+
+out:
+ if (hwc->idx == -1)
+ return -EBUSY;
+
+ hwc->config_base = uncore->msr_base + (2 * hwc->idx);
+ hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
+ hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ amd_uncore_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void amd_uncore_del(struct perf_event *event, int flags)
+{
+ int i;
+ struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ amd_uncore_stop(event, PERF_EF_UPDATE);
+
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (cmpxchg(&uncore->events[i], event, NULL) == event)
+ break;
+ }
+
+ hwc->idx = -1;
+}
+
+static int amd_uncore_event_init(struct perf_event *event)
+{
+ struct amd_uncore *uncore;
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * NB and L2 counters (MSRs) are shared across all cores that share the
+ * same NB / L2 cache. Interrupts can be directed to a single target
+ * core, however, event counts generated by processes running on other
+ * cores cannot be masked out. So we do not support sampling and
+ * per-thread events.
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ /* NB and L2 counters do not have usr/os/guest/host bits */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ /* and we do not enable counter overflow interrupts */
+ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
+ hwc->idx = -1;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ uncore = event_to_amd_uncore(event);
+ if (!uncore)
+ return -ENODEV;
+
+ /*
+ * since request can come in to any of the shared cores, we will remap
+ * to a single common cpu.
+ */
+ event->cpu = uncore->cpu;
+
+ return 0;
+}
+
+static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int n;
+ cpumask_t *active_mask;
+ struct pmu *pmu = dev_get_drvdata(dev);
+
+ if (pmu->type == amd_nb_pmu.type)
+ active_mask = &amd_nb_active_mask;
+ else if (pmu->type == amd_l2_pmu.type)
+ active_mask = &amd_l2_active_mask;
+ else
+ return 0;
+
+ n = cpulist_scnprintf(buf, PAGE_SIZE - 2, active_mask);
+ buf[n++] = '\n';
+ buf[n] = '\0';
+ return n;
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
+
+static struct attribute *amd_uncore_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_uncore_attr_group = {
+ .attrs = amd_uncore_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15");
+
+static struct attribute *amd_uncore_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_uncore_format_group = {
+ .name = "format",
+ .attrs = amd_uncore_format_attr,
+};
+
+static const struct attribute_group *amd_uncore_attr_groups[] = {
+ &amd_uncore_attr_group,
+ &amd_uncore_format_group,
+ NULL,
+};
+
+static struct pmu amd_nb_pmu = {
+ .attr_groups = amd_uncore_attr_groups,
+ .name = "amd_nb",
+ .event_init = amd_uncore_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+};
+
+static struct pmu amd_l2_pmu = {
+ .attr_groups = amd_uncore_attr_groups,
+ .name = "amd_l2",
+ .event_init = amd_uncore_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+};
+
+static struct amd_uncore * __cpuinit amd_uncore_alloc(unsigned int cpu)
+{
+ return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
+ cpu_to_node(cpu));
+}
+
+static void __cpuinit amd_uncore_cpu_up_prepare(unsigned int cpu)
+{
+ struct amd_uncore *uncore;
+
+ if (amd_uncore_nb) {
+ uncore = amd_uncore_alloc(cpu);
+ uncore->cpu = cpu;
+ uncore->num_counters = NUM_COUNTERS_NB;
+ uncore->rdpmc_base = RDPMC_BASE_NB;
+ uncore->msr_base = MSR_F15H_NB_PERF_CTL;
+ uncore->active_mask = &amd_nb_active_mask;
+ uncore->pmu = &amd_nb_pmu;
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+ }
+
+ if (amd_uncore_l2) {
+ uncore = amd_uncore_alloc(cpu);
+ uncore->cpu = cpu;
+ uncore->num_counters = NUM_COUNTERS_L2;
+ uncore->rdpmc_base = RDPMC_BASE_L2;
+ uncore->msr_base = MSR_F16H_L2I_PERF_CTL;
+ uncore->active_mask = &amd_l2_active_mask;
+ uncore->pmu = &amd_l2_pmu;
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ }
+}
+
+static struct amd_uncore *
+__cpuinit amd_uncore_find_online_sibling(struct amd_uncore *this,
+ struct amd_uncore * __percpu *uncores)
+{
+ unsigned int cpu;
+ struct amd_uncore *that;
+
+ for_each_online_cpu(cpu) {
+ that = *per_cpu_ptr(uncores, cpu);
+
+ if (!that)
+ continue;
+
+ if (this == that)
+ continue;
+
+ if (this->id == that->id) {
+ that->free_when_cpu_online = this;
+ this = that;
+ break;
+ }
+ }
+
+ this->refcnt++;
+ return this;
+}
+
+static void __cpuinit amd_uncore_cpu_starting(unsigned int cpu)
+{
+ unsigned int eax, ebx, ecx, edx;
+ struct amd_uncore *uncore;
+
+ if (amd_uncore_nb) {
+ uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+ uncore->id = ecx & 0xff;
+
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+ }
+
+ if (amd_uncore_l2) {
+ unsigned int apicid = cpu_data(cpu).apicid;
+ unsigned int nshared;
+
+ uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
+ cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
+ nshared = ((eax >> 14) & 0xfff) + 1;
+ uncore->id = apicid - (apicid % nshared);
+
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ }
+}
+
+static void __cpuinit uncore_online(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+ kfree(uncore->free_when_cpu_online);
+ uncore->free_when_cpu_online = NULL;
+
+ if (cpu == uncore->cpu)
+ cpumask_set_cpu(cpu, uncore->active_mask);
+}
+
+static void __cpuinit amd_uncore_cpu_online(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_online(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_online(cpu, amd_uncore_l2);
+}
+
+static void __cpuinit uncore_down_prepare(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ unsigned int i;
+ struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
+
+ if (this->cpu != cpu)
+ return;
+
+ /* this cpu is going down, migrate to a shared sibling if possible */
+ for_each_online_cpu(i) {
+ struct amd_uncore *that = *per_cpu_ptr(uncores, i);
+
+ if (cpu == i)
+ continue;
+
+ if (this == that) {
+ perf_pmu_migrate_context(this->pmu, cpu, i);
+ cpumask_clear_cpu(cpu, that->active_mask);
+ cpumask_set_cpu(i, that->active_mask);
+ that->cpu = i;
+ break;
+ }
+ }
+}
+
+static void __cpuinit amd_uncore_cpu_down_prepare(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_down_prepare(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_down_prepare(cpu, amd_uncore_l2);
+}
+
+static void __cpuinit uncore_dead(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+ if (cpu == uncore->cpu)
+ cpumask_clear_cpu(cpu, uncore->active_mask);
+
+ if (!--uncore->refcnt)
+ kfree(uncore);
+ *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
+}
+
+static void __cpuinit amd_uncore_cpu_dead(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_dead(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_dead(cpu, amd_uncore_l2);
+}
+
+static int __cpuinit
+amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ amd_uncore_cpu_up_prepare(cpu);
+ break;
+
+ case CPU_STARTING:
+ amd_uncore_cpu_starting(cpu);
+ break;
+
+ case CPU_ONLINE:
+ amd_uncore_cpu_online(cpu);
+ break;
+
+ case CPU_DOWN_PREPARE:
+ amd_uncore_cpu_down_prepare(cpu);
+ break;
+
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ amd_uncore_cpu_dead(cpu);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block amd_uncore_cpu_notifier_block __cpuinitdata = {
+ .notifier_call = amd_uncore_cpu_notifier,
+ .priority = CPU_PRI_PERF + 1,
+};
+
+static void __init init_cpu_already_online(void *dummy)
+{
+ unsigned int cpu = smp_processor_id();
+
+ amd_uncore_cpu_up_prepare(cpu);
+ amd_uncore_cpu_starting(cpu);
+ amd_uncore_cpu_online(cpu);
+}
+
+static int __init amd_uncore_init(void)
+{
+ unsigned int cpu;
+ int ret = -ENODEV;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return -ENODEV;
+
+ if (!cpu_has_topoext)
+ return -ENODEV;
+
+ if (cpu_has_perfctr_nb) {
+ amd_uncore_nb = alloc_percpu(struct amd_uncore *);
+ perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
+
+ printk(KERN_INFO "perf: AMD NB counters detected\n");
+ ret = 0;
+ }
+
+ if (cpu_has_perfctr_l2) {
+ amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
+ perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
+
+ printk(KERN_INFO "perf: AMD L2I counters detected\n");
+ ret = 0;
+ }
+
+ if (ret)
+ return -ENODEV;
+
+ get_online_cpus();
+ /* init cpus already online before registering for hotplug notifier */
+ for_each_online_cpu(cpu)
+ smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
+
+ register_cpu_notifier(&amd_uncore_cpu_notifier_block);
+ put_online_cpus();
+
+ return 0;
+}
+device_initcall(amd_uncore_init);
On Sun, Apr 21, 2013 at 05:48:36AM -0700, tip-bot for Jacob Shin wrote:
> Commit-ID: c43ca5091a374c1f6778bd7e4a39a5a10735a917
> Gitweb: http://git.kernel.org/tip/c43ca5091a374c1f6778bd7e4a39a5a10735a917
> Author: Jacob Shin <[email protected]>
> AuthorDate: Fri, 19 Apr 2013 16:34:28 -0500
> Committer: Ingo Molnar <[email protected]>
> CommitDate: Sun, 21 Apr 2013 11:01:24 +0200
>
> perf/x86/amd: Add support for AMD NB and L2I "uncore" counters
>
> Add support for AMD Family 15h [and above] northbridge
> performance counters. MSRs 0xc0010240 ~ 0xc0010247 are shared
> across all cores that share a common northbridge.
>
> Add support for AMD Family 16h L2 performance counters. MSRs
> 0xc0010230 ~ 0xc0010237 are shared across all cores that share a
> common L2 cache.
>
> We do not enable counter overflow interrupts. Sampling mode and
> per-thread events are not supported.
>
> Signed-off-by: Jacob Shin <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Cc: Stephane Eranian <[email protected]>
> Cc: Jiri Olsa <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Link: http://lkml.kernel.org/r/20130419213428.GA8229@jshin-Toonie
> Signed-off-by: Ingo Molnar <[email protected]>
Thanks Ingo.
This is [PATCH 2/2],
There is also [PATCH 1/2] which removes NB counter support from
perf_event_amd.c that is needed:
http://lkml.org/lkml/2013/4/15/421
(Also .mbox attached)
Thanks,
-Jacob
On Sun, Apr 21, 2013 at 05:48:36AM -0700, tip-bot for Jacob Shin wrote:
> Commit-ID: c43ca5091a374c1f6778bd7e4a39a5a10735a917
> Gitweb: http://git.kernel.org/tip/c43ca5091a374c1f6778bd7e4a39a5a10735a917
> Author: Jacob Shin <[email protected]>
> AuthorDate: Fri, 19 Apr 2013 16:34:28 -0500
> Committer: Ingo Molnar <[email protected]>
> CommitDate: Sun, 21 Apr 2013 11:01:24 +0200
>
> perf/x86/amd: Add support for AMD NB and L2I "uncore" counters
>
> Add support for AMD Family 15h [and above] northbridge
> performance counters. MSRs 0xc0010240 ~ 0xc0010247 are shared
> across all cores that share a common northbridge.
>
> Add support for AMD Family 16h L2 performance counters. MSRs
> 0xc0010230 ~ 0xc0010237 are shared across all cores that share a
> common L2 cache.
>
> We do not enable counter overflow interrupts. Sampling mode and
> per-thread events are not supported.
Something's fishy with lockdep here:
[ 1.217368] perf: AMD NB counters detected
[ 1.217438] ------------[ cut here ]------------
[ 1.217497] WARNING: at kernel/lockdep.c:2590 trace_hardirqs_on_caller+0x1b9/0x1f0()
[ 1.217572] Hardware name: To be filled by O.E.M.
[ 1.217624] Modules linked in:
[ 1.217705] Pid: 0, comm: swapper/1 Not tainted 3.9.0-rc7+ #2
[ 1.217760] Call Trace:
[ 1.217810] <IRQ> [<ffffffff8103bd2f>] warn_slowpath_common+0x7f/0xc0
[ 1.217926] [<ffffffff81130905>] ? new_slab+0x225/0x2a0
[ 1.217981] [<ffffffff8103bd8a>] warn_slowpath_null+0x1a/0x20
[ 1.218038] [<ffffffff8109aac9>] trace_hardirqs_on_caller+0x1b9/0x1f0
[ 1.218095] [<ffffffff8109ab0d>] trace_hardirqs_on+0xd/0x10
[ 1.218149] [<ffffffff81130905>] new_slab+0x225/0x2a0
[ 1.218205] [<ffffffff815959d8>] __slab_alloc.isra.57.constprop.60+0x31e/0x454
[ 1.218281] [<ffffffff8107a813>] ? local_clock+0x43/0x50
[ 1.218365] [<ffffffff810a62ee>] ? generic_smp_call_function_single_interrupt+0x7e/0x110
[ 1.218441] [<ffffffff810984e8>] ? trace_hardirqs_off_caller+0x28/0x120
[ 1.218497] [<ffffffff81131f05>] kmem_cache_alloc_trace+0xb5/0x1f0
[ 1.218552] [<ffffffff810a62ee>] ? generic_smp_call_function_single_interrupt+0x7e/0x110
[ 1.218629] [<ffffffff8158b4e1>] amd_uncore_alloc.isra.7+0x1f/0x29
[ 1.218683] [<ffffffff8158b505>] amd_uncore_cpu_up_prepare+0x1a/0xac
[ 1.218739] [<ffffffff81a9dfe8>] init_cpu_already_online+0x14/0x26
[ 1.218794] [<ffffffff8107a813>] ? local_clock+0x43/0x50
[ 1.218848] [<ffffffff810a630d>] generic_smp_call_function_single_interrupt+0x9d/0x110
[ 1.218924] [<ffffffff81027157>] smp_call_function_single_interrupt+0x27/0x40
[ 1.219000] [<ffffffff8159e56f>] call_function_single_interrupt+0x6f/0x80
[ 1.219620] <EOI> [<ffffffff8100b515>] ? default_idle+0x25/0x270
[ 1.219735] [<ffffffff8100b513>] ? default_idle+0x23/0x270
[ 1.219789] [<ffffffff8100c146>] arch_cpu_idle+0x26/0x30
[ 1.219844] [<ffffffff8108cf1e>] cpu_startup_entry+0x7e/0x3d0
[ 1.219898] [<ffffffff8158d2c4>] start_secondary+0x1b7/0x1bb
[ 1.219956] ---[ end trace abe5f11379b64afd ]---
[ 1.220059] LVT offset 0 assigned for vector 0x400
[ 1.220140] perf: AMD IBS detected (0x000000ff)
.config attached.
--
Regards/Gruss,
Boris.
Sent from a fat crate under my desk. Formatting is fine.
--
On Sun, Apr 21, 2013 at 07:02:32PM +0200, Borislav Petkov wrote:
> On Sun, Apr 21, 2013 at 05:48:36AM -0700, tip-bot for Jacob Shin wrote:
> > Commit-ID: c43ca5091a374c1f6778bd7e4a39a5a10735a917
> > Gitweb: http://git.kernel.org/tip/c43ca5091a374c1f6778bd7e4a39a5a10735a917
> > Author: Jacob Shin <[email protected]>
> > AuthorDate: Fri, 19 Apr 2013 16:34:28 -0500
> > Committer: Ingo Molnar <[email protected]>
> > CommitDate: Sun, 21 Apr 2013 11:01:24 +0200
> >
> > perf/x86/amd: Add support for AMD NB and L2I "uncore" counters
> >
> > Add support for AMD Family 15h [and above] northbridge
> > performance counters. MSRs 0xc0010240 ~ 0xc0010247 are shared
> > across all cores that share a common northbridge.
> >
> > Add support for AMD Family 16h L2 performance counters. MSRs
> > 0xc0010230 ~ 0xc0010237 are shared across all cores that share a
> > common L2 cache.
> >
> > We do not enable counter overflow interrupts. Sampling mode and
> > per-thread events are not supported.
>
> Something's fishy with lockdep here:
>
> [ 1.217368] perf: AMD NB counters detected
> [ 1.217438] ------------[ cut here ]------------
> [ 1.217497] WARNING: at kernel/lockdep.c:2590 trace_hardirqs_on_caller+0x1b9/0x1f0()
> [ 1.217572] Hardware name: To be filled by O.E.M.
> [ 1.217624] Modules linked in:
> [ 1.217705] Pid: 0, comm: swapper/1 Not tainted 3.9.0-rc7+ #2
> [ 1.217760] Call Trace:
> [ 1.217810] <IRQ> [<ffffffff8103bd2f>] warn_slowpath_common+0x7f/0xc0
> [ 1.217926] [<ffffffff81130905>] ? new_slab+0x225/0x2a0
> [ 1.217981] [<ffffffff8103bd8a>] warn_slowpath_null+0x1a/0x20
> [ 1.218038] [<ffffffff8109aac9>] trace_hardirqs_on_caller+0x1b9/0x1f0
> [ 1.218095] [<ffffffff8109ab0d>] trace_hardirqs_on+0xd/0x10
> [ 1.218149] [<ffffffff81130905>] new_slab+0x225/0x2a0
> [ 1.218205] [<ffffffff815959d8>] __slab_alloc.isra.57.constprop.60+0x31e/0x454
> [ 1.218281] [<ffffffff8107a813>] ? local_clock+0x43/0x50
> [ 1.218365] [<ffffffff810a62ee>] ? generic_smp_call_function_single_interrupt+0x7e/0x110
> [ 1.218441] [<ffffffff810984e8>] ? trace_hardirqs_off_caller+0x28/0x120
> [ 1.218497] [<ffffffff81131f05>] kmem_cache_alloc_trace+0xb5/0x1f0
> [ 1.218552] [<ffffffff810a62ee>] ? generic_smp_call_function_single_interrupt+0x7e/0x110
> [ 1.218629] [<ffffffff8158b4e1>] amd_uncore_alloc.isra.7+0x1f/0x29
> [ 1.218683] [<ffffffff8158b505>] amd_uncore_cpu_up_prepare+0x1a/0xac
> [ 1.218739] [<ffffffff81a9dfe8>] init_cpu_already_online+0x14/0x26
> [ 1.218794] [<ffffffff8107a813>] ? local_clock+0x43/0x50
> [ 1.218848] [<ffffffff810a630d>] generic_smp_call_function_single_interrupt+0x9d/0x110
> [ 1.218924] [<ffffffff81027157>] smp_call_function_single_interrupt+0x27/0x40
> [ 1.219000] [<ffffffff8159e56f>] call_function_single_interrupt+0x6f/0x80
> [ 1.219620] <EOI> [<ffffffff8100b515>] ? default_idle+0x25/0x270
> [ 1.219735] [<ffffffff8100b513>] ? default_idle+0x23/0x270
> [ 1.219789] [<ffffffff8100c146>] arch_cpu_idle+0x26/0x30
> [ 1.219844] [<ffffffff8108cf1e>] cpu_startup_entry+0x7e/0x3d0
> [ 1.219898] [<ffffffff8158d2c4>] start_secondary+0x1b7/0x1bb
> [ 1.219956] ---[ end trace abe5f11379b64afd ]---
> [ 1.220059] LVT offset 0 assigned for vector 0x400
> [ 1.220140] perf: AMD IBS detected (0x000000ff)
>
Hm .. I think maybe during _init hotplug lock is already held? Let me
investigate ASAP:
+static int __init amd_uncore_init(void)
+{
..snip..
+ get_online_cpus();
+ /* init cpus already online before registering for hotplug notifier */
+ for_each_online_cpu(cpu)
+ smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
+
+ register_cpu_notifier(&amd_uncore_cpu_notifier_block);
+ put_online_cpus();
..snip..
+device_initcall(amd_uncore_init);
On Sun, Apr 21, 2013 at 12:33:26PM -0500, Jacob Shin wrote:
> > [ 1.217368] perf: AMD NB counters detected
> > [ 1.217438] ------------[ cut here ]------------
> > [ 1.217497] WARNING: at kernel/lockdep.c:2590 trace_hardirqs_on_caller+0x1b9/0x1f0()
> > [ 1.217572] Hardware name: To be filled by O.E.M.
> > [ 1.217624] Modules linked in:
> > [ 1.217705] Pid: 0, comm: swapper/1 Not tainted 3.9.0-rc7+ #2
> > [ 1.217760] Call Trace:
> > [ 1.217810] <IRQ> [<ffffffff8103bd2f>] warn_slowpath_common+0x7f/0xc0
> > [ 1.217926] [<ffffffff81130905>] ? new_slab+0x225/0x2a0
> > [ 1.217981] [<ffffffff8103bd8a>] warn_slowpath_null+0x1a/0x20
> > [ 1.218038] [<ffffffff8109aac9>] trace_hardirqs_on_caller+0x1b9/0x1f0
> > [ 1.218095] [<ffffffff8109ab0d>] trace_hardirqs_on+0xd/0x10
> > [ 1.218149] [<ffffffff81130905>] new_slab+0x225/0x2a0
> > [ 1.218205] [<ffffffff815959d8>] __slab_alloc.isra.57.constprop.60+0x31e/0x454
> > [ 1.218281] [<ffffffff8107a813>] ? local_clock+0x43/0x50
> > [ 1.218365] [<ffffffff810a62ee>] ? generic_smp_call_function_single_interrupt+0x7e/0x110
> > [ 1.218441] [<ffffffff810984e8>] ? trace_hardirqs_off_caller+0x28/0x120
> > [ 1.218497] [<ffffffff81131f05>] kmem_cache_alloc_trace+0xb5/0x1f0
> > [ 1.218552] [<ffffffff810a62ee>] ? generic_smp_call_function_single_interrupt+0x7e/0x110
> > [ 1.218629] [<ffffffff8158b4e1>] amd_uncore_alloc.isra.7+0x1f/0x29
> > [ 1.218683] [<ffffffff8158b505>] amd_uncore_cpu_up_prepare+0x1a/0xac
> > [ 1.218739] [<ffffffff81a9dfe8>] init_cpu_already_online+0x14/0x26
> > [ 1.218794] [<ffffffff8107a813>] ? local_clock+0x43/0x50
> > [ 1.218848] [<ffffffff810a630d>] generic_smp_call_function_single_interrupt+0x9d/0x110
> > [ 1.218924] [<ffffffff81027157>] smp_call_function_single_interrupt+0x27/0x40
> > [ 1.219000] [<ffffffff8159e56f>] call_function_single_interrupt+0x6f/0x80
> > [ 1.219620] <EOI> [<ffffffff8100b515>] ? default_idle+0x25/0x270
> > [ 1.219735] [<ffffffff8100b513>] ? default_idle+0x23/0x270
> > [ 1.219789] [<ffffffff8100c146>] arch_cpu_idle+0x26/0x30
> > [ 1.219844] [<ffffffff8108cf1e>] cpu_startup_entry+0x7e/0x3d0
> > [ 1.219898] [<ffffffff8158d2c4>] start_secondary+0x1b7/0x1bb
> > [ 1.219956] ---[ end trace abe5f11379b64afd ]---
> > [ 1.220059] LVT offset 0 assigned for vector 0x400
> > [ 1.220140] perf: AMD IBS detected (0x000000ff)
> >
>
> Hm .. I think maybe during _init hotplug lock is already held? Let me
> investigate ASAP:
>
> +static int __init amd_uncore_init(void)
> +{
>
> ..snip..
>
> + get_online_cpus();
> + /* init cpus already online before registering for hotplug notifier */
> + for_each_online_cpu(cpu)
> + smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
> +
> + register_cpu_notifier(&amd_uncore_cpu_notifier_block);
> + put_online_cpus();
>
> ..snip..
>
> +device_initcall(amd_uncore_init);
Well, AFAICT, you're sending IPIs to each core to do
init_cpu_already_online() which does amd_uncore_alloc() and landing in
the slab allocator. It then, along its path, enables interrupts but
you're still executing the IPI handler so interrupts should remain
off, actually. So maybe you shouldn't be allocating memory in the IRQ
handler.
--
Regards/Gruss,
Boris.
Sent from a fat crate under my desk. Formatting is fine.
--
On Sun, Apr 21, 2013 at 07:02:32PM +0200, Borislav Petkov wrote:
> On Sun, Apr 21, 2013 at 05:48:36AM -0700, tip-bot for Jacob Shin wrote:
> > Commit-ID: c43ca5091a374c1f6778bd7e4a39a5a10735a917
> > Gitweb: http://git.kernel.org/tip/c43ca5091a374c1f6778bd7e4a39a5a10735a917
> > Author: Jacob Shin <[email protected]>
> > AuthorDate: Fri, 19 Apr 2013 16:34:28 -0500
> > Committer: Ingo Molnar <[email protected]>
> > CommitDate: Sun, 21 Apr 2013 11:01:24 +0200
> >
> > perf/x86/amd: Add support for AMD NB and L2I "uncore" counters
> >
> > Add support for AMD Family 15h [and above] northbridge
> > performance counters. MSRs 0xc0010240 ~ 0xc0010247 are shared
> > across all cores that share a common northbridge.
> >
> > Add support for AMD Family 16h L2 performance counters. MSRs
> > 0xc0010230 ~ 0xc0010237 are shared across all cores that share a
> > common L2 cache.
> >
> > We do not enable counter overflow interrupts. Sampling mode and
> > per-thread events are not supported.
>
> Something's fishy with lockdep here:
>
> [ 1.217368] perf: AMD NB counters detected
> [ 1.217438] ------------[ cut here ]------------
> [ 1.217497] WARNING: at kernel/lockdep.c:2590 trace_hardirqs_on_caller+0x1b9/0x1f0()
> [ 1.217572] Hardware name: To be filled by O.E.M.
> [ 1.217624] Modules linked in:
> [ 1.217705] Pid: 0, comm: swapper/1 Not tainted 3.9.0-rc7+ #2
> [ 1.217760] Call Trace:
> [ 1.217810] <IRQ> [<ffffffff8103bd2f>] warn_slowpath_common+0x7f/0xc0
> [ 1.217926] [<ffffffff81130905>] ? new_slab+0x225/0x2a0
> [ 1.217981] [<ffffffff8103bd8a>] warn_slowpath_null+0x1a/0x20
> [ 1.218038] [<ffffffff8109aac9>] trace_hardirqs_on_caller+0x1b9/0x1f0
> [ 1.218095] [<ffffffff8109ab0d>] trace_hardirqs_on+0xd/0x10
> [ 1.218149] [<ffffffff81130905>] new_slab+0x225/0x2a0
> [ 1.218205] [<ffffffff815959d8>] __slab_alloc.isra.57.constprop.60+0x31e/0x454
> [ 1.218281] [<ffffffff8107a813>] ? local_clock+0x43/0x50
> [ 1.218365] [<ffffffff810a62ee>] ? generic_smp_call_function_single_interrupt+0x7e/0x110
> [ 1.218441] [<ffffffff810984e8>] ? trace_hardirqs_off_caller+0x28/0x120
> [ 1.218497] [<ffffffff81131f05>] kmem_cache_alloc_trace+0xb5/0x1f0
> [ 1.218552] [<ffffffff810a62ee>] ? generic_smp_call_function_single_interrupt+0x7e/0x110
> [ 1.218629] [<ffffffff8158b4e1>] amd_uncore_alloc.isra.7+0x1f/0x29
> [ 1.218683] [<ffffffff8158b505>] amd_uncore_cpu_up_prepare+0x1a/0xac
> [ 1.218739] [<ffffffff81a9dfe8>] init_cpu_already_online+0x14/0x26
> [ 1.218794] [<ffffffff8107a813>] ? local_clock+0x43/0x50
> [ 1.218848] [<ffffffff810a630d>] generic_smp_call_function_single_interrupt+0x9d/0x110
> [ 1.218924] [<ffffffff81027157>] smp_call_function_single_interrupt+0x27/0x40
> [ 1.219000] [<ffffffff8159e56f>] call_function_single_interrupt+0x6f/0x80
> [ 1.219620] <EOI> [<ffffffff8100b515>] ? default_idle+0x25/0x270
> [ 1.219735] [<ffffffff8100b513>] ? default_idle+0x23/0x270
> [ 1.219789] [<ffffffff8100c146>] arch_cpu_idle+0x26/0x30
> [ 1.219844] [<ffffffff8108cf1e>] cpu_startup_entry+0x7e/0x3d0
> [ 1.219898] [<ffffffff8158d2c4>] start_secondary+0x1b7/0x1bb
> [ 1.219956] ---[ end trace abe5f11379b64afd ]---
> [ 1.220059] LVT offset 0 assigned for vector 0x400
> [ 1.220140] perf: AMD IBS detected (0x000000ff)
This should fix it. Sorry about that,
Thanks!!
>From 2ac60428e58f7bfd7dec4baa772f523eeedfecc6 Mon Sep 17 00:00:00 2001
From: Jacob Shin <[email protected]>
Date: Sun, 21 Apr 2013 12:58:21 -0500
Subject: [PATCH 1/1] perf, amd: fix lockdep warning, don't __GFP_FS alloc
with IRQs disabled
Fix lockdep warning by not calling kzalloc in smp_call_function_single.
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/kernel/cpu/perf_event_amd_uncore.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index 6dc6227..c0c661a 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -498,7 +498,6 @@ static void __init init_cpu_already_online(void *dummy)
{
unsigned int cpu = smp_processor_id();
- amd_uncore_cpu_up_prepare(cpu);
amd_uncore_cpu_starting(cpu);
amd_uncore_cpu_online(cpu);
}
@@ -535,8 +534,10 @@ static int __init amd_uncore_init(void)
get_online_cpus();
/* init cpus already online before registering for hotplug notifier */
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ amd_uncore_cpu_up_prepare(cpu);
smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
+ }
register_cpu_notifier(&amd_uncore_cpu_notifier_block);
put_online_cpus();
--
1.7.9.5
On Sun, Apr 21, 2013 at 01:06:27PM -0500, Jacob Shin wrote:
> From 2ac60428e58f7bfd7dec4baa772f523eeedfecc6 Mon Sep 17 00:00:00 2001
> From: Jacob Shin <[email protected]>
> Date: Sun, 21 Apr 2013 12:58:21 -0500
> Subject: [PATCH 1/1] perf, amd: fix lockdep warning, don't __GFP_FS alloc
> with IRQs disabled
>
> Fix lockdep warning by not calling kzalloc in smp_call_function_single.
This should say something about "do not allocate memory in the
IRQ handler or so." And I think the flag is __GFP_WAIT. Look at
allocate_slab() in slub.c:
if (flags & __GFP_WAIT)
local_irq_enable();
The local_irq_enable() calls into lockdep with trace_hardirqs_on().
Other than that:
Tested-by: Borislav Petkov <[email protected]>
Thanks.
--
Regards/Gruss,
Boris.
Sent from a fat crate under my desk. Formatting is fine.
--
Commit-ID: 0cf5f4323b1b51ecca3e952f95110e03ea611882
Gitweb: http://git.kernel.org/tip/0cf5f4323b1b51ecca3e952f95110e03ea611882
Author: Jacob Shin <[email protected]>
AuthorDate: Mon, 15 Apr 2013 12:21:22 -0500
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 21 Apr 2013 17:21:59 +0200
perf/x86/amd: Remove old-style NB counter support from perf_event_amd.c
Support for NB counters, MSRs 0xc0010240 ~ 0xc0010247, got
moved to perf_event_amd_uncore.c in the following commit:
c43ca5091a37 perf/x86/amd: Add support for AMD NB and L2I "uncore" counters
AMD Family 10h NB events (events 0xe0 ~ 0xff, on MSRs 0xc001000 ~
0xc001007) will still continue to be handled by perf_event_amd.c
Signed-off-by: Jacob Shin <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Jacob Shin <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event_amd.c | 138 ++---------------------------------
1 file changed, 5 insertions(+), 133 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index dfdab42..7e28d94 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,14 +132,11 @@ static u64 amd_pmu_event_map(int hw_event)
return amd_perfmon_event_map[hw_event];
}
-static struct event_constraint *amd_nb_event_constraint;
-
/*
* Previously calculated offsets
*/
static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
-static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
/*
* Legacy CPUs:
@@ -147,14 +144,10 @@ static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
*
* CPUs with core performance counter extensions:
* 6 counters starting at 0xc0010200 each offset by 2
- *
- * CPUs with north bridge performance counter extensions:
- * 4 additional counters starting at 0xc0010240 each offset by 2
- * (indexed right above either one of the above core counters)
*/
static inline int amd_pmu_addr_offset(int index, bool eventsel)
{
- int offset, first, base;
+ int offset;
if (!index)
return index;
@@ -167,23 +160,7 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
if (offset)
return offset;
- if (amd_nb_event_constraint &&
- test_bit(index, amd_nb_event_constraint->idxmsk)) {
- /*
- * calculate the offset of NB counters with respect to
- * base eventsel or perfctr
- */
-
- first = find_first_bit(amd_nb_event_constraint->idxmsk,
- X86_PMC_IDX_MAX);
-
- if (eventsel)
- base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
- else
- base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
-
- offset = base + ((index - first) << 1);
- } else if (!cpu_has_perfctr_core)
+ if (!cpu_has_perfctr_core)
offset = index;
else
offset = index << 1;
@@ -196,36 +173,6 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
return offset;
}
-static inline int amd_pmu_rdpmc_index(int index)
-{
- int ret, first;
-
- if (!index)
- return index;
-
- ret = rdpmc_indexes[index];
-
- if (ret)
- return ret;
-
- if (amd_nb_event_constraint &&
- test_bit(index, amd_nb_event_constraint->idxmsk)) {
- /*
- * according to the mnual, ECX value of the NB counters is
- * the index of the NB counter (0, 1, 2 or 3) plus 6
- */
-
- first = find_first_bit(amd_nb_event_constraint->idxmsk,
- X86_PMC_IDX_MAX);
- ret = index - first + 6;
- } else
- ret = index;
-
- rdpmc_indexes[index] = ret;
-
- return ret;
-}
-
static int amd_core_hw_config(struct perf_event *event)
{
if (event->attr.exclude_host && event->attr.exclude_guest)
@@ -245,34 +192,6 @@ static int amd_core_hw_config(struct perf_event *event)
}
/*
- * NB counters do not support the following event select bits:
- * Host/Guest only
- * Counter mask
- * Invert counter mask
- * Edge detect
- * OS/User mode
- */
-static int amd_nb_hw_config(struct perf_event *event)
-{
- /* for NB, we only allow system wide counting mode */
- if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
- return -EINVAL;
-
- if (event->attr.exclude_user || event->attr.exclude_kernel ||
- event->attr.exclude_host || event->attr.exclude_guest)
- return -EINVAL;
-
- event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
- ARCH_PERFMON_EVENTSEL_OS);
-
- if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
- ARCH_PERFMON_EVENTSEL_INT))
- return -EINVAL;
-
- return 0;
-}
-
-/*
* AMD64 events are detected based on their event codes.
*/
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
@@ -285,11 +204,6 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}
-static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
-{
- return amd_nb_event_constraint && amd_is_nb_event(hwc);
-}
-
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{
struct amd_nb *nb = cpuc->amd_nb;
@@ -315,9 +229,6 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (event->attr.type == PERF_TYPE_RAW)
event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
- if (amd_is_perfctr_nb_event(&event->hw))
- return amd_nb_hw_config(event);
-
return amd_core_hw_config(event);
}
@@ -341,19 +252,6 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
}
}
-static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
-{
- int core_id = cpu_data(smp_processor_id()).cpu_core_id;
-
- /* deliver interrupts only to this core */
- if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
- hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
- hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
- hwc->config |= (u64)(core_id) <<
- AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
- }
-}
-
/*
* AMD64 NorthBridge events need special treatment because
* counter access needs to be synchronized across all cores
@@ -441,9 +339,6 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
if (new == -1)
return &emptyconstraint;
- if (amd_is_perfctr_nb_event(hwc))
- amd_nb_interrupt_hw_config(hwc);
-
return &nb->event_constraints[new];
}
@@ -543,8 +438,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
return &unconstrained;
- return __amd_get_nb_event_constraints(cpuc, event,
- amd_nb_event_constraint);
+ return __amd_get_nb_event_constraints(cpuc, event, NULL);
}
static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -643,9 +537,6 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
-static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
-static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
-
static struct event_constraint *
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
{
@@ -711,8 +602,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
return &amd_f15_PMC20;
}
case AMD_EVENT_NB:
- return __amd_get_nb_event_constraints(cpuc, event,
- amd_nb_event_constraint);
+ /* moved to perf_event_amd_uncore.c */
+ return &emptyconstraint;
default:
return &emptyconstraint;
}
@@ -738,7 +629,6 @@ static __initconst const struct x86_pmu amd_pmu = {
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
.addr_offset = amd_pmu_addr_offset,
- .rdpmc_index = amd_pmu_rdpmc_index,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
@@ -790,23 +680,6 @@ static int setup_perfctr_core(void)
return 0;
}
-static int setup_perfctr_nb(void)
-{
- if (!cpu_has_perfctr_nb)
- return -ENODEV;
-
- x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
-
- if (cpu_has_perfctr_core)
- amd_nb_event_constraint = &amd_NBPMC96;
- else
- amd_nb_event_constraint = &amd_NBPMC74;
-
- printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
-
- return 0;
-}
-
__init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
@@ -817,7 +690,6 @@ __init int amd_pmu_init(void)
setup_event_constraints();
setup_perfctr_core();
- setup_perfctr_nb();
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
Commit-ID: 94f4db3590893c600506105b88dab581c7f6f5c8
Gitweb: http://git.kernel.org/tip/94f4db3590893c600506105b88dab581c7f6f5c8
Author: Jacob Shin <[email protected]>
AuthorDate: Sun, 21 Apr 2013 13:06:27 -0500
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 22 Apr 2013 10:10:55 +0200
perf/x86/amd: Fix AMD NB and L2I "uncore" support
Borislav Petkov reported a lockdep splat warning about kzalloc()
done in an IPI (hardirq) handler.
This is a real bug, do not call kzalloc() in a smp_call_function_single()
handler because it can schedule and crash.
Reported-by: Borislav Petkov <[email protected]>
Signed-off-by: Jacob Shin <[email protected]>
Tested-by: Borislav Petkov <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Link: http://lkml.kernel.org/r/20130421180627.GA21049@jshin-Toonie
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event_amd_uncore.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index 6dc6227..c0c661a 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -498,7 +498,6 @@ static void __init init_cpu_already_online(void *dummy)
{
unsigned int cpu = smp_processor_id();
- amd_uncore_cpu_up_prepare(cpu);
amd_uncore_cpu_starting(cpu);
amd_uncore_cpu_online(cpu);
}
@@ -535,8 +534,10 @@ static int __init amd_uncore_init(void)
get_online_cpus();
/* init cpus already online before registering for hotplug notifier */
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ amd_uncore_cpu_up_prepare(cpu);
smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
+ }
register_cpu_notifier(&amd_uncore_cpu_notifier_block);
put_online_cpus();