Subject: [PATCH 0/29] x86/perfcounters: x86 and AMD cpu updates

This patch series updates the perfcounters implementation mainly for
the x86 architecture.

Also, it introduces a data structure (struct pmu) describing a generic
performance monitoring unit (pmu). This structure is a replacement for
struct hw_perf_counter_ops. Similiar, I introduced struct x86_pmu for
the x86 architecture (as a replacement for struct pmc_x86_ops).

There are patches for x86 with some fixes and cleanups, a change in
the model specific split and a complete rework of AMD pmu code. The
result is simplified model specific code and more generalized and
unified code. Features that are only supported by AMD or Intel are now
implemented in vendor specific functions.

The AMD pmu differs to Intel, especially there is no status register
and also there are no fixed counters. This makes a separate interrupt
handler for AMD cpus necessary. Also, a global disable/enable of the
performance counters (e.g. to avoid NMIs to protect the modification
of a list) is expensive on AMD cpus leading to up to 4 msr
reads/writes per counter. There is still some more work to do here to
avoid this.

This patch series bases on the tip/percounters/core branch.

I developed this patches based on 03ced43 and later rebased to
1b88991. The latest tip/percounters/core branch seems to be broken, no
nmis are delivered, only perfcounter interrupts with no results on
kerneltop. I am still debugging this. However, I could test
successfully the patch series based on 03ced43 and want to release the
patches anyway.

-Robert



Subject: [PATCH 01/29] x86: perfcounter: remove X86_FEATURE_ARCH_PERFMON flag for AMD cpus

X86_FEATURE_ARCH_PERFMON is an Intel hardware feature that does not
work on AMD CPUs. The flag is now only used in Intel specific code
(especially initialization).

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/amd.c | 4 ----
arch/x86/kernel/cpu/perf_counter.c | 6 +++---
2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fd69c51..7e4a459 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -420,10 +420,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
if (c->x86 >= 6)
set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);

- /* Enable Performance counter for K7 and later */
- if (c->x86 > 6 && c->x86 <= 0x11)
- set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
-
if (!c->x86_model_id[0]) {
switch (c->x86) {
case 0xf:
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 0fcbaab..7d0f81d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -949,6 +949,9 @@ static struct pmc_x86_ops *pmc_intel_init(void)
unsigned int unused;
unsigned int ebx;

+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return NULL;
+
/*
* Check whether the Architectural PerfMon supports
* Branch Misses Retired Event or not.
@@ -987,9 +990,6 @@ static struct pmc_x86_ops *pmc_amd_init(void)

void __init init_hw_perf_counters(void)
{
- if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return;
-
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
pmc_ops = pmc_intel_init();
--
1.6.1.3

Subject: [PATCH 02/29] perfcounter: declare perf_max_counters only for CONFIG_PERF_COUNTERS

This is only needed for CONFIG_PERF_COUNTERS enabled.

Signed-off-by: Robert Richter <[email protected]>
---
include/linux/perf_counter.h | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 9814328..be10b3f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -512,12 +512,13 @@ struct perf_cpu_context {
int recursion[4];
};

+#ifdef CONFIG_PERF_COUNTERS
+
/*
* Set by architecture code:
*/
extern int perf_max_counters;

-#ifdef CONFIG_PERF_COUNTERS
extern const struct hw_perf_counter_ops *
hw_perf_counter_init(struct perf_counter *counter);

--
1.6.1.3

Subject: [PATCH 14/29] x86/perfcounters: make x86_pmu data a static struct

Instead of using a pointer to reference to the x86 pmu we now have one
single data structure that is initialized at the beginning. This saves
the pointer access when using this memory.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 50 ++++++++++++++++++------------------
1 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 7c72a94..68597d7 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -60,7 +60,7 @@ struct x86_pmu {
int max_events;
};

-static struct x86_pmu *x86_pmu __read_mostly;
+static struct x86_pmu x86_pmu __read_mostly;

static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
.enabled = 1,
@@ -184,12 +184,12 @@ static bool reserve_pmc_hardware(void)
disable_lapic_nmi_watchdog();

for (i = 0; i < nr_counters_generic; i++) {
- if (!reserve_perfctr_nmi(x86_pmu->perfctr + i))
+ if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
goto perfctr_fail;
}

for (i = 0; i < nr_counters_generic; i++) {
- if (!reserve_evntsel_nmi(x86_pmu->eventsel + i))
+ if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}

@@ -197,13 +197,13 @@ static bool reserve_pmc_hardware(void)

eventsel_fail:
for (i--; i >= 0; i--)
- release_evntsel_nmi(x86_pmu->eventsel + i);
+ release_evntsel_nmi(x86_pmu.eventsel + i);

i = nr_counters_generic;

perfctr_fail:
for (i--; i >= 0; i--)
- release_perfctr_nmi(x86_pmu->perfctr + i);
+ release_perfctr_nmi(x86_pmu.perfctr + i);

if (nmi_watchdog == NMI_LOCAL_APIC)
enable_lapic_nmi_watchdog();
@@ -216,8 +216,8 @@ static void release_pmc_hardware(void)
int i;

for (i = 0; i < nr_counters_generic; i++) {
- release_perfctr_nmi(x86_pmu->perfctr + i);
- release_evntsel_nmi(x86_pmu->eventsel + i);
+ release_perfctr_nmi(x86_pmu.perfctr + i);
+ release_evntsel_nmi(x86_pmu.eventsel + i);
}

if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -297,14 +297,14 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
* Raw event type provide the config in the event structure
*/
if (perf_event_raw(hw_event)) {
- hwc->config |= x86_pmu->raw_event(perf_event_config(hw_event));
+ hwc->config |= x86_pmu.raw_event(perf_event_config(hw_event));
} else {
- if (perf_event_id(hw_event) >= x86_pmu->max_events)
+ if (perf_event_id(hw_event) >= x86_pmu.max_events)
return -EINVAL;
/*
* The generic map:
*/
- hwc->config |= x86_pmu->event_map(perf_event_id(hw_event));
+ hwc->config |= x86_pmu.event_map(perf_event_id(hw_event));
}

counter->destroy = hw_perf_counter_destroy;
@@ -356,7 +356,7 @@ u64 hw_perf_save_disable(void)
if (unlikely(!perf_counters_initialized))
return 0;

- return x86_pmu->save_disable_all();
+ return x86_pmu.save_disable_all();
}
/*
* Exported because of ACPI idle
@@ -396,7 +396,7 @@ void hw_perf_restore(u64 ctrl)
if (unlikely(!perf_counters_initialized))
return;

- x86_pmu->restore_all(ctrl);
+ x86_pmu.restore_all(ctrl);
}
/*
* Exported because of ACPI idle
@@ -441,7 +441,7 @@ static void hw_perf_enable(int idx, u64 config)
if (unlikely(!perf_counters_initialized))
return;

- x86_pmu->enable(idx, config);
+ x86_pmu.enable(idx, config);
}

static void intel_pmu_disable_counter(int idx, u64 config)
@@ -463,7 +463,7 @@ static void hw_perf_disable(int idx, u64 config)
if (unlikely(!perf_counters_initialized))
return;

- x86_pmu->disable(idx, config);
+ x86_pmu.disable(idx, config);
}

static inline void
@@ -580,11 +580,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)

event = hwc->config & ARCH_PERFMON_EVENT_MASK;

- if (unlikely(event == x86_pmu->event_map(PERF_COUNT_INSTRUCTIONS)))
+ if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
- if (unlikely(event == x86_pmu->event_map(PERF_COUNT_CPU_CYCLES)))
+ if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES)))
return X86_PMC_IDX_FIXED_CPU_CYCLES;
- if (unlikely(event == x86_pmu->event_map(PERF_COUNT_BUS_CYCLES)))
+ if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES)))
return X86_PMC_IDX_FIXED_BUS_CYCLES;

return -1;
@@ -628,8 +628,8 @@ try_generic:
set_bit(idx, cpuc->used);
hwc->idx = idx;
}
- hwc->config_base = x86_pmu->eventsel;
- hwc->counter_base = x86_pmu->perfctr;
+ hwc->config_base = x86_pmu.eventsel;
+ hwc->counter_base = x86_pmu.perfctr;
}

perf_counters_lapic_init(hwc->nmi);
@@ -677,8 +677,8 @@ void perf_counter_print_debug(void)
pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);

for (idx = 0; idx < nr_counters_generic; idx++) {
- rdmsrl(x86_pmu->eventsel + idx, pmc_ctrl);
- rdmsrl(x86_pmu->perfctr + idx, pmc_count);
+ rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
+ rdmsrl(x86_pmu.perfctr + idx, pmc_count);

prev_left = per_cpu(prev_left[idx], cpu);

@@ -819,7 +819,7 @@ void smp_perf_counter_interrupt(struct pt_regs *regs)
irq_enter();
apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
ack_APIC_irq();
- x86_pmu->handle_irq(regs, 0);
+ x86_pmu.handle_irq(regs, 0);
irq_exit();
}

@@ -876,7 +876,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
regs = args->regs;

apic_write(APIC_LVTPC, APIC_DM_NMI);
- ret = x86_pmu->handle_irq(regs, 1);
+ ret = x86_pmu.handle_irq(regs, 1);

return ret ? NOTIFY_STOP : NOTIFY_OK;
}
@@ -940,7 +940,7 @@ static int intel_pmu_init(void)
pr_info("... bit width: %d\n", eax.split.bit_width);
pr_info("... mask length: %d\n", eax.split.mask_length);

- x86_pmu = &intel_pmu;
+ x86_pmu = intel_pmu;

nr_counters_generic = eax.split.num_counters;
nr_counters_fixed = edx.split.num_counters_fixed;
@@ -951,7 +951,7 @@ static int intel_pmu_init(void)

static int amd_pmu_init(void)
{
- x86_pmu = &amd_pmu;
+ x86_pmu = amd_pmu;

nr_counters_generic = 4;
nr_counters_fixed = 0;
--
1.6.1.3

Subject: [PATCH 04/29] x86/perfcounters: rework pmc_amd_save_disable_all() and pmc_amd_restore_all()

MSR reads and writes are expensive. This patch adds checks to avoid
its usage where possible.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 24 ++++++++++++++----------
1 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d6d6529..75a0903 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -334,11 +334,13 @@ static u64 pmc_amd_save_disable_all(void)
for (idx = 0; idx < nr_counters_generic; idx++) {
u64 val;

+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
- if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
- val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
- }
+ if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
+ continue;
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
}

return enabled;
@@ -372,13 +374,15 @@ static void pmc_amd_restore_all(u64 ctrl)
return;

for (idx = 0; idx < nr_counters_generic; idx++) {
- if (test_bit(idx, cpuc->active_mask)) {
- u64 val;
+ u64 val;

- rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
- val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
- }
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+ rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+ if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+ continue;
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
}
}

--
1.6.1.3

Subject: [PATCH 09/29] x86/perfcounters: remove get_status() from struct x86_pmu

This function is Intel only and not necessary for AMD cpus.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 39 ++++-------------------------------
1 files changed, 5 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9d90de0..d0bb029 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -51,7 +51,6 @@ struct x86_pmu {
int (*handle_irq)(struct pt_regs *, int);
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
- u64 (*get_status)(u64);
void (*ack_status)(u64);
void (*enable)(int, u64);
void (*disable)(int, u64);
@@ -405,41 +404,15 @@ void hw_perf_restore(u64 ctrl)
*/
EXPORT_SYMBOL_GPL(hw_perf_restore);

-static u64 intel_pmu_get_status(u64 mask)
+static inline u64 intel_pmu_get_status(u64 mask)
{
u64 status;

- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-
- return status;
-}
-
-static u64 amd_pmu_get_status(u64 mask)
-{
- u64 status = 0;
- int idx;
-
- for (idx = 0; idx < nr_counters_generic; idx++) {
- s64 val;
-
- if (!(mask & (1 << idx)))
- continue;
-
- rdmsrl(MSR_K7_PERFCTR0 + idx, val);
- val <<= (64 - counter_value_bits);
- if (val >= 0)
- status |= (1 << idx);
- }
-
- return status;
-}
-
-static u64 hw_perf_get_status(u64 mask)
-{
if (unlikely(!perf_counters_initialized))
return 0;
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);

- return x86_pmu->get_status(mask);
+ return status;
}

static void intel_pmu_ack_status(u64 ack)
@@ -795,7 +768,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)

cpuc->throttle_ctrl = hw_perf_save_disable();

- status = hw_perf_get_status(cpuc->throttle_ctrl);
+ status = intel_pmu_get_status(cpuc->throttle_ctrl);
if (!status)
goto out;

@@ -820,7 +793,7 @@ again:
/*
* Repeat if there is more work to be done:
*/
- status = hw_perf_get_status(cpuc->throttle_ctrl);
+ status = intel_pmu_get_status(cpuc->throttle_ctrl);
if (status)
goto again;
out:
@@ -931,7 +904,6 @@ static struct x86_pmu intel_pmu = {
.handle_irq = intel_pmu_handle_irq,
.save_disable_all = intel_pmu_save_disable_all,
.restore_all = intel_pmu_restore_all,
- .get_status = intel_pmu_get_status,
.ack_status = intel_pmu_ack_status,
.enable = intel_pmu_enable_counter,
.disable = intel_pmu_disable_counter,
@@ -946,7 +918,6 @@ static struct x86_pmu amd_pmu = {
.handle_irq = amd_pmu_handle_irq,
.save_disable_all = amd_pmu_save_disable_all,
.restore_all = amd_pmu_restore_all,
- .get_status = amd_pmu_get_status,
.ack_status = amd_pmu_ack_status,
.enable = amd_pmu_enable_counter,
.disable = amd_pmu_disable_counter,
--
1.6.1.3

Subject: [PATCH 13/29] x86/perfcounters: modify initialization of struct x86_pmu

This patch adds an error handler and changes initialization of struct
x86_pmu. No functional changes. Needed for follow-on patches.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 27 ++++++++++++++++-----------
1 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 5a52d73..7c72a94 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -913,7 +913,7 @@ static struct x86_pmu amd_pmu = {
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
};

-static struct x86_pmu *intel_pmu_init(void)
+static int intel_pmu_init(void)
{
union cpuid10_edx edx;
union cpuid10_eax eax;
@@ -921,7 +921,7 @@ static struct x86_pmu *intel_pmu_init(void)
unsigned int ebx;

if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return NULL;
+ return -ENODEV;

/*
* Check whether the Architectural PerfMon supports
@@ -929,49 +929,54 @@ static struct x86_pmu *intel_pmu_init(void)
*/
cpuid(10, &eax.full, &ebx, &unused, &edx.full);
if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
- return NULL;
+ return -ENODEV;

intel_perfmon_version = eax.split.version_id;
if (intel_perfmon_version < 2)
- return NULL;
+ return -ENODEV;

pr_info("Intel Performance Monitoring support detected.\n");
pr_info("... version: %d\n", intel_perfmon_version);
pr_info("... bit width: %d\n", eax.split.bit_width);
pr_info("... mask length: %d\n", eax.split.mask_length);

+ x86_pmu = &intel_pmu;
+
nr_counters_generic = eax.split.num_counters;
nr_counters_fixed = edx.split.num_counters_fixed;
counter_value_mask = (1ULL << eax.split.bit_width) - 1;

- return &intel_pmu;
+ return 0;
}

-static struct x86_pmu *amd_pmu_init(void)
+static int amd_pmu_init(void)
{
+ x86_pmu = &amd_pmu;
+
nr_counters_generic = 4;
nr_counters_fixed = 0;
counter_value_mask = 0x0000FFFFFFFFFFFFULL;
counter_value_bits = 48;

pr_info("AMD Performance Monitoring support detected.\n");
-
- return &amd_pmu;
+ return 0;
}

void __init init_hw_perf_counters(void)
{
+ int err;
+
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
- x86_pmu = intel_pmu_init();
+ err = intel_pmu_init();
break;
case X86_VENDOR_AMD:
- x86_pmu = amd_pmu_init();
+ err = amd_pmu_init();
break;
default:
return;
}
- if (!x86_pmu)
+ if (err != 0)
return;

pr_info("... num counters: %d\n", nr_counters_generic);
--
1.6.1.3

Subject: [PATCH 24/29] x86/perfcounters: implement the interrupt handler for AMD cpus

This patch implements the interrupt handler for AMD performance
counters. In difference to the Intel pmu, there is no single status
register and also there are no fixed counters. This makes the handler
very different and it is useful to make the handler vendor
specific. To check if a counter is overflowed the upper bit of the
counter is checked. Only counters where the active bit is set are
checked.

With this patch throttling is enabled for AMD performance counters.

This patch also reenables Linux performance counters on AMD cpus.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 45 +++++++++++++++++++++++++++++------
1 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 2d3681b..f4d59d4 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -240,10 +240,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
struct hw_perf_counter *hwc = &counter->hw;
int err;

- /* disable temporarily */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return -ENOSYS;
-
if (!x86_pmu_initialized())
return -ENODEV;

@@ -773,7 +769,43 @@ out:
return ret;
}

-static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) { return 0; }
+static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
+{
+ int cpu = smp_processor_id();
+ struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
+ u64 val;
+ int handled = 0;
+ struct perf_counter *counter;
+ struct hw_perf_counter *hwc;
+ int idx;
+
+ ++cpuc->interrupts;
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active))
+ continue;
+ counter = cpuc->counters[idx];
+ hwc = &counter->hw;
+ x86_perf_counter_update(counter, hwc, idx);
+ val = atomic64_read(&hwc->prev_count);
+ if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+ continue;
+ /* counter overflow */
+ x86_perf_counter_set_period(counter, hwc, idx);
+ handled = 1;
+ inc_irq_stat(apic_perf_irqs);
+ if (perf_counter_overflow(counter, nmi, regs, 0))
+ amd_pmu_disable_counter(hwc, idx);
+ else if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS)
+ /*
+ * do not reenable when throttled, but reload
+ * the register
+ */
+ amd_pmu_disable_counter(hwc, idx);
+ else if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+ amd_pmu_enable_counter(hwc, idx);
+ }
+ return handled;
+}

void perf_counter_unthrottle(void)
{
@@ -782,9 +814,6 @@ void perf_counter_unthrottle(void)
if (!x86_pmu_initialized())
return;

- if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return;
-
cpuc = &__get_cpu_var(cpu_hw_counters);
if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
if (printk_ratelimit())
--
1.6.1.3

Subject: [PATCH 15/29] x86/perfcounters: move counter parameters to struct x86_pmu

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 80 ++++++++++++++++-------------------
1 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 68597d7..75dbb1f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -24,16 +24,7 @@
#include <asm/nmi.h>

static bool perf_counters_initialized __read_mostly;
-
-/*
- * Number of (generic) HW counters:
- */
-static int nr_counters_generic __read_mostly;
static u64 perf_counter_mask __read_mostly;
-static u64 counter_value_mask __read_mostly;
-static int counter_value_bits __read_mostly;
-
-static int nr_counters_fixed __read_mostly;

struct cpu_hw_counters {
struct perf_counter *counters[X86_PMC_IDX_MAX];
@@ -58,6 +49,10 @@ struct x86_pmu {
u64 (*event_map)(int);
u64 (*raw_event)(u64);
int max_events;
+ int num_counters;
+ int num_counters_fixed;
+ int counter_bits;
+ u64 counter_mask;
};

static struct x86_pmu x86_pmu __read_mostly;
@@ -183,12 +178,12 @@ static bool reserve_pmc_hardware(void)
if (nmi_watchdog == NMI_LOCAL_APIC)
disable_lapic_nmi_watchdog();

- for (i = 0; i < nr_counters_generic; i++) {
+ for (i = 0; i < x86_pmu.num_counters; i++) {
if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
goto perfctr_fail;
}

- for (i = 0; i < nr_counters_generic; i++) {
+ for (i = 0; i < x86_pmu.num_counters; i++) {
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}
@@ -199,7 +194,7 @@ eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu.eventsel + i);

- i = nr_counters_generic;
+ i = x86_pmu.num_counters;

perfctr_fail:
for (i--; i >= 0; i--)
@@ -215,7 +210,7 @@ static void release_pmc_hardware(void)
{
int i;

- for (i = 0; i < nr_counters_generic; i++) {
+ for (i = 0; i < x86_pmu.num_counters; i++) {
release_perfctr_nmi(x86_pmu.perfctr + i);
release_evntsel_nmi(x86_pmu.eventsel + i);
}
@@ -336,7 +331,7 @@ static u64 amd_pmu_save_disable_all(void)
*/
barrier();

- for (idx = 0; idx < nr_counters_generic; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

if (!test_bit(idx, cpuc->active_mask))
@@ -378,7 +373,7 @@ static void amd_pmu_restore_all(u64 ctrl)
if (!ctrl)
return;

- for (idx = 0; idx < nr_counters_generic; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

if (!test_bit(idx, cpuc->active_mask))
@@ -527,7 +522,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
atomic64_set(&hwc->prev_count, (u64)-left);

err = checking_wrmsrl(hwc->counter_base + idx,
- (u64)(-left) & counter_value_mask);
+ (u64)(-left) & x86_pmu.counter_mask);
}

static inline void
@@ -621,8 +616,9 @@ static int x86_pmu_enable(struct perf_counter *counter)
/* Try to get the previous generic counter again */
if (test_and_set_bit(idx, cpuc->used)) {
try_generic:
- idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
- if (idx == nr_counters_generic)
+ idx = find_first_zero_bit(cpuc->used,
+ x86_pmu.num_counters);
+ if (idx == x86_pmu.num_counters)
return -EAGAIN;

set_bit(idx, cpuc->used);
@@ -654,7 +650,7 @@ void perf_counter_print_debug(void)
struct cpu_hw_counters *cpuc;
int cpu, idx;

- if (!nr_counters_generic)
+ if (!x86_pmu.num_counters)
return;

local_irq_disable();
@@ -676,7 +672,7 @@ void perf_counter_print_debug(void)
}
pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);

- for (idx = 0; idx < nr_counters_generic; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
rdmsrl(x86_pmu.perfctr + idx, pmc_count);

@@ -689,7 +685,7 @@ void perf_counter_print_debug(void)
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
cpu, idx, prev_left);
}
- for (idx = 0; idx < nr_counters_fixed; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);

pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -911,6 +907,9 @@ static struct x86_pmu amd_pmu = {
.event_map = amd_pmu_event_map,
.raw_event = amd_pmu_raw_event,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
+ .num_counters = 4,
+ .counter_bits = 48,
+ .counter_mask = (1ULL << 48) - 1,
};

static int intel_pmu_init(void)
@@ -941,10 +940,10 @@ static int intel_pmu_init(void)
pr_info("... mask length: %d\n", eax.split.mask_length);

x86_pmu = intel_pmu;
-
- nr_counters_generic = eax.split.num_counters;
- nr_counters_fixed = edx.split.num_counters_fixed;
- counter_value_mask = (1ULL << eax.split.bit_width) - 1;
+ x86_pmu.num_counters = eax.split.num_counters;
+ x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
+ x86_pmu.counter_bits = eax.split.bit_width;
+ x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;

return 0;
}
@@ -952,12 +951,6 @@ static int intel_pmu_init(void)
static int amd_pmu_init(void)
{
x86_pmu = amd_pmu;
-
- nr_counters_generic = 4;
- nr_counters_fixed = 0;
- counter_value_mask = 0x0000FFFFFFFFFFFFULL;
- counter_value_bits = 48;
-
pr_info("AMD Performance Monitoring support detected.\n");
return 0;
}
@@ -979,25 +972,26 @@ void __init init_hw_perf_counters(void)
if (err != 0)
return;

- pr_info("... num counters: %d\n", nr_counters_generic);
- if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
- nr_counters_generic = X86_PMC_MAX_GENERIC;
+ pr_info("... num counters: %d\n", x86_pmu.num_counters);
+ if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
+ x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
- nr_counters_generic, X86_PMC_MAX_GENERIC);
+ x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
}
- perf_counter_mask = (1 << nr_counters_generic) - 1;
- perf_max_counters = nr_counters_generic;
+ perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
+ perf_max_counters = x86_pmu.num_counters;

- pr_info("... value mask: %016Lx\n", counter_value_mask);
+ pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);

- if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
- nr_counters_fixed = X86_PMC_MAX_FIXED;
+ if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
+ x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
- nr_counters_fixed, X86_PMC_MAX_FIXED);
+ x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
}
- pr_info("... fixed counters: %d\n", nr_counters_fixed);
+ pr_info("... fixed counters: %d\n", x86_pmu.num_counters_fixed);

- perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
+ perf_counter_mask |=
+ ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;

pr_info("... counter mask: %016Lx\n", perf_counter_mask);
perf_counters_initialized = true;
--
1.6.1.3

Subject: [PATCH 28/29] x86/perfcounters: remove unused function argument in intel_pmu_get_status()

The mask argument is unused and thus can be removed.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d1c8036..856b0b8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -393,7 +393,7 @@ void hw_perf_restore(u64 ctrl)
*/
EXPORT_SYMBOL_GPL(hw_perf_restore);

-static inline u64 intel_pmu_get_status(u64 mask)
+static inline u64 intel_pmu_get_status(void)
{
u64 status;

@@ -728,7 +728,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)

cpuc->throttle_ctrl = intel_pmu_save_disable_all();

- status = intel_pmu_get_status(cpuc->throttle_ctrl);
+ status = intel_pmu_get_status();
if (!status)
goto out;

@@ -753,7 +753,7 @@ again:
/*
* Repeat if there is more work to be done:
*/
- status = intel_pmu_get_status(cpuc->throttle_ctrl);
+ status = intel_pmu_get_status();
if (status)
goto again;
out:
--
1.6.1.3

Subject: [PATCH 21/29] x86/perfcounters: rework counter enable functions

There is vendor specific code in generic x86 code, and there is vendor
specific code that could be generic. This patch introduces
x86_pmu_enable_counter() for x86 generic code. Fixed counter code for
Intel is moved to Intel only functions. In the end, checks and calls
via function pointers were reduced to the necessary. Also, the
internal function i/f changed.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 52 ++++++++++++++++-------------------
1 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d8beebe..ae55933 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -44,7 +44,7 @@ struct x86_pmu {
int (*handle_irq)(struct pt_regs *, int);
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
- void (*enable)(int, u64);
+ void (*enable)(struct hw_perf_counter *, int);
void (*disable)(int, u64);
unsigned eventsel;
unsigned perfctr;
@@ -414,28 +414,15 @@ static inline void intel_pmu_ack_status(u64 ack)
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

-static void intel_pmu_enable_counter(int idx, u64 config)
+static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
- config | ARCH_PERFMON_EVENTSEL0_ENABLE);
-}
-
-static void amd_pmu_enable_counter(int idx, u64 config)
-{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
- if (cpuc->enabled)
- config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
- wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
-}
+ int err;

-static void hw_perf_enable(int idx, u64 config)
-{
if (unlikely(!perf_counters_initialized))
return;

- x86_pmu.enable(idx, config);
+ err = checking_wrmsrl(hwc->config_base + idx,
+ hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}

static void intel_pmu_disable_counter(int idx, u64 config)
@@ -522,8 +509,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
}

static inline void
-__pmc_fixed_enable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int __idx)
+intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, bits, mask;
@@ -548,14 +534,24 @@ __pmc_fixed_enable(struct perf_counter *counter,
err = checking_wrmsrl(hwc->config_base, ctrl_val);
}

-static void
-__x86_pmu_enable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int idx)
+static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
- __pmc_fixed_enable(counter, hwc, idx);
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+ intel_pmu_enable_fixed(hwc, idx);
+ return;
+ }
+
+ x86_pmu_enable_counter(hwc, idx);
+}
+
+static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ if (cpuc->enabled)
+ x86_pmu_enable_counter(hwc, idx);
else
- hw_perf_enable(idx, hwc->config);
+ amd_pmu_disable_counter(idx, hwc->config);
}

static int
@@ -632,7 +628,7 @@ try_generic:
set_bit(idx, cpuc->active);

x86_perf_counter_set_period(counter, hwc, idx);
- __x86_pmu_enable(counter, hwc, idx);
+ x86_pmu.enable(hwc, idx);

return 0;
}
@@ -728,7 +724,7 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter)
x86_perf_counter_set_period(counter, hwc, idx);

if (counter->state == PERF_COUNTER_STATE_ACTIVE)
- __x86_pmu_enable(counter, hwc, idx);
+ intel_pmu_enable_counter(hwc, idx);
}

/*
--
1.6.1.3

Subject: [PATCH 03/29] x86/perfcounters: add default path to cpu detection

This quits hw counter initialization immediately if no cpu is
detected.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 7d0f81d..d6d6529 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -997,6 +997,8 @@ void __init init_hw_perf_counters(void)
case X86_VENDOR_AMD:
pmc_ops = pmc_amd_init();
break;
+ default:
+ return;
}
if (!pmc_ops)
return;
--
1.6.1.3

Subject: [PATCH 07/29] x86/perfcounters: rename struct pmc_x86_ops into struct x86_pmu

This patch renames struct pmc_x86_ops into struct x86_pmu. It
introduces a structure to describe an x86 model specific pmu
(performance monitoring unit). It may contain ops and data. The new
name of the structure fits better, is shorter, and thus better to
handle. Where it was appropriate, names of function and variable have
been changed too.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 135 ++++++++++++++++++------------------
1 files changed, 68 insertions(+), 67 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 95de980..808a1a1 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -44,9 +44,9 @@ struct cpu_hw_counters {
};

/*
- * struct pmc_x86_ops - performance counter x86 ops
+ * struct x86_pmu - generic x86 pmu
*/
-struct pmc_x86_ops {
+struct x86_pmu {
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
u64 (*get_status)(u64);
@@ -60,7 +60,7 @@ struct pmc_x86_ops {
int max_events;
};

-static struct pmc_x86_ops *pmc_ops __read_mostly;
+static struct x86_pmu *x86_pmu __read_mostly;

static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
.enabled = 1,
@@ -82,12 +82,12 @@ static const u64 intel_perfmon_event_map[] =
[PERF_COUNT_BUS_CYCLES] = 0x013c,
};

-static u64 pmc_intel_event_map(int event)
+static u64 intel_pmu_event_map(int event)
{
return intel_perfmon_event_map[event];
}

-static u64 pmc_intel_raw_event(u64 event)
+static u64 intel_pmu_raw_event(u64 event)
{
#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
@@ -114,12 +114,12 @@ static const u64 amd_perfmon_event_map[] =
[PERF_COUNT_BRANCH_MISSES] = 0x00c5,
};

-static u64 pmc_amd_event_map(int event)
+static u64 amd_pmu_event_map(int event)
{
return amd_perfmon_event_map[event];
}

-static u64 pmc_amd_raw_event(u64 event)
+static u64 amd_pmu_raw_event(u64 event)
{
#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
@@ -184,12 +184,12 @@ static bool reserve_pmc_hardware(void)
disable_lapic_nmi_watchdog();

for (i = 0; i < nr_counters_generic; i++) {
- if (!reserve_perfctr_nmi(pmc_ops->perfctr + i))
+ if (!reserve_perfctr_nmi(x86_pmu->perfctr + i))
goto perfctr_fail;
}

for (i = 0; i < nr_counters_generic; i++) {
- if (!reserve_evntsel_nmi(pmc_ops->eventsel + i))
+ if (!reserve_evntsel_nmi(x86_pmu->eventsel + i))
goto eventsel_fail;
}

@@ -197,13 +197,13 @@ static bool reserve_pmc_hardware(void)

eventsel_fail:
for (i--; i >= 0; i--)
- release_evntsel_nmi(pmc_ops->eventsel + i);
+ release_evntsel_nmi(x86_pmu->eventsel + i);

i = nr_counters_generic;

perfctr_fail:
for (i--; i >= 0; i--)
- release_perfctr_nmi(pmc_ops->perfctr + i);
+ release_perfctr_nmi(x86_pmu->perfctr + i);

if (nmi_watchdog == NMI_LOCAL_APIC)
enable_lapic_nmi_watchdog();
@@ -216,8 +216,8 @@ static void release_pmc_hardware(void)
int i;

for (i = 0; i < nr_counters_generic; i++) {
- release_perfctr_nmi(pmc_ops->perfctr + i);
- release_evntsel_nmi(pmc_ops->eventsel + i);
+ release_perfctr_nmi(x86_pmu->perfctr + i);
+ release_evntsel_nmi(x86_pmu->eventsel + i);
}

if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -293,14 +293,14 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
* Raw event type provide the config in the event structure
*/
if (perf_event_raw(hw_event)) {
- hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event));
+ hwc->config |= x86_pmu->raw_event(perf_event_config(hw_event));
} else {
- if (perf_event_id(hw_event) >= pmc_ops->max_events)
+ if (perf_event_id(hw_event) >= x86_pmu->max_events)
return -EINVAL;
/*
* The generic map:
*/
- hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
+ hwc->config |= x86_pmu->event_map(perf_event_id(hw_event));
}

counter->destroy = hw_perf_counter_destroy;
@@ -308,7 +308,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
return 0;
}

-static u64 pmc_intel_save_disable_all(void)
+static u64 intel_pmu_save_disable_all(void)
{
u64 ctrl;

@@ -318,7 +318,7 @@ static u64 pmc_intel_save_disable_all(void)
return ctrl;
}

-static u64 pmc_amd_save_disable_all(void)
+static u64 amd_pmu_save_disable_all(void)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
int enabled, idx;
@@ -327,7 +327,8 @@ static u64 pmc_amd_save_disable_all(void)
cpuc->enabled = 0;
/*
* ensure we write the disable before we start disabling the
- * counters proper, so that pcm_amd_enable() does the right thing.
+ * counters proper, so that amd_pmu_enable_counter() does the
+ * right thing.
*/
barrier();

@@ -351,19 +352,19 @@ u64 hw_perf_save_disable(void)
if (unlikely(!perf_counters_initialized))
return 0;

- return pmc_ops->save_disable_all();
+ return x86_pmu->save_disable_all();
}
/*
* Exported because of ACPI idle
*/
EXPORT_SYMBOL_GPL(hw_perf_save_disable);

-static void pmc_intel_restore_all(u64 ctrl)
+static void intel_pmu_restore_all(u64 ctrl)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
}

-static void pmc_amd_restore_all(u64 ctrl)
+static void amd_pmu_restore_all(u64 ctrl)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
int idx;
@@ -391,14 +392,14 @@ void hw_perf_restore(u64 ctrl)
if (unlikely(!perf_counters_initialized))
return;

- pmc_ops->restore_all(ctrl);
+ x86_pmu->restore_all(ctrl);
}
/*
* Exported because of ACPI idle
*/
EXPORT_SYMBOL_GPL(hw_perf_restore);

-static u64 pmc_intel_get_status(u64 mask)
+static u64 intel_pmu_get_status(u64 mask)
{
u64 status;

@@ -407,7 +408,7 @@ static u64 pmc_intel_get_status(u64 mask)
return status;
}

-static u64 pmc_amd_get_status(u64 mask)
+static u64 amd_pmu_get_status(u64 mask)
{
u64 status = 0;
int idx;
@@ -432,15 +433,15 @@ static u64 hw_perf_get_status(u64 mask)
if (unlikely(!perf_counters_initialized))
return 0;

- return pmc_ops->get_status(mask);
+ return x86_pmu->get_status(mask);
}

-static void pmc_intel_ack_status(u64 ack)
+static void intel_pmu_ack_status(u64 ack)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

-static void pmc_amd_ack_status(u64 ack)
+static void amd_pmu_ack_status(u64 ack)
{
}

@@ -449,16 +450,16 @@ static void hw_perf_ack_status(u64 ack)
if (unlikely(!perf_counters_initialized))
return;

- pmc_ops->ack_status(ack);
+ x86_pmu->ack_status(ack);
}

-static void pmc_intel_enable(int idx, u64 config)
+static void intel_pmu_enable_counter(int idx, u64 config)
{
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}

-static void pmc_amd_enable(int idx, u64 config)
+static void amd_pmu_enable_counter(int idx, u64 config)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);

@@ -474,15 +475,15 @@ static void hw_perf_enable(int idx, u64 config)
if (unlikely(!perf_counters_initialized))
return;

- pmc_ops->enable(idx, config);
+ x86_pmu->enable(idx, config);
}

-static void pmc_intel_disable(int idx, u64 config)
+static void intel_pmu_disable_counter(int idx, u64 config)
{
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
}

-static void pmc_amd_disable(int idx, u64 config)
+static void amd_pmu_disable_counter(int idx, u64 config)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);

@@ -496,7 +497,7 @@ static void hw_perf_disable(int idx, u64 config)
if (unlikely(!perf_counters_initialized))
return;

- pmc_ops->disable(idx, config);
+ x86_pmu->disable(idx, config);
}

static inline void
@@ -613,11 +614,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)

event = hwc->config & ARCH_PERFMON_EVENT_MASK;

- if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
+ if (unlikely(event == x86_pmu->event_map(PERF_COUNT_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
- if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
+ if (unlikely(event == x86_pmu->event_map(PERF_COUNT_CPU_CYCLES)))
return X86_PMC_IDX_FIXED_CPU_CYCLES;
- if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
+ if (unlikely(event == x86_pmu->event_map(PERF_COUNT_BUS_CYCLES)))
return X86_PMC_IDX_FIXED_BUS_CYCLES;

return -1;
@@ -661,8 +662,8 @@ try_generic:
set_bit(idx, cpuc->used);
hwc->idx = idx;
}
- hwc->config_base = pmc_ops->eventsel;
- hwc->counter_base = pmc_ops->perfctr;
+ hwc->config_base = x86_pmu->eventsel;
+ hwc->counter_base = x86_pmu->perfctr;
}

perf_counters_lapic_init(hwc->nmi);
@@ -710,8 +711,8 @@ void perf_counter_print_debug(void)
pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);

for (idx = 0; idx < nr_counters_generic; idx++) {
- rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
- rdmsrl(pmc_ops->perfctr + idx, pmc_count);
+ rdmsrl(x86_pmu->eventsel + idx, pmc_ctrl);
+ rdmsrl(x86_pmu->perfctr + idx, pmc_count);

prev_left = per_cpu(prev_left[idx], cpu);

@@ -918,35 +919,35 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
.priority = 1
};

-static struct pmc_x86_ops pmc_intel_ops = {
- .save_disable_all = pmc_intel_save_disable_all,
- .restore_all = pmc_intel_restore_all,
- .get_status = pmc_intel_get_status,
- .ack_status = pmc_intel_ack_status,
- .enable = pmc_intel_enable,
- .disable = pmc_intel_disable,
+static struct x86_pmu intel_pmu = {
+ .save_disable_all = intel_pmu_save_disable_all,
+ .restore_all = intel_pmu_restore_all,
+ .get_status = intel_pmu_get_status,
+ .ack_status = intel_pmu_ack_status,
+ .enable = intel_pmu_enable_counter,
+ .disable = intel_pmu_disable_counter,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
- .event_map = pmc_intel_event_map,
- .raw_event = pmc_intel_raw_event,
+ .event_map = intel_pmu_event_map,
+ .raw_event = intel_pmu_raw_event,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
};

-static struct pmc_x86_ops pmc_amd_ops = {
- .save_disable_all = pmc_amd_save_disable_all,
- .restore_all = pmc_amd_restore_all,
- .get_status = pmc_amd_get_status,
- .ack_status = pmc_amd_ack_status,
- .enable = pmc_amd_enable,
- .disable = pmc_amd_disable,
+static struct x86_pmu amd_pmu = {
+ .save_disable_all = amd_pmu_save_disable_all,
+ .restore_all = amd_pmu_restore_all,
+ .get_status = amd_pmu_get_status,
+ .ack_status = amd_pmu_ack_status,
+ .enable = amd_pmu_enable_counter,
+ .disable = amd_pmu_disable_counter,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
- .event_map = pmc_amd_event_map,
- .raw_event = pmc_amd_raw_event,
+ .event_map = amd_pmu_event_map,
+ .raw_event = amd_pmu_raw_event,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
};

-static struct pmc_x86_ops *pmc_intel_init(void)
+static struct x86_pmu *intel_pmu_init(void)
{
union cpuid10_edx edx;
union cpuid10_eax eax;
@@ -977,10 +978,10 @@ static struct pmc_x86_ops *pmc_intel_init(void)
nr_counters_fixed = edx.split.num_counters_fixed;
counter_value_mask = (1ULL << eax.split.bit_width) - 1;

- return &pmc_intel_ops;
+ return &intel_pmu;
}

-static struct pmc_x86_ops *pmc_amd_init(void)
+static struct x86_pmu *amd_pmu_init(void)
{
nr_counters_generic = 4;
nr_counters_fixed = 0;
@@ -989,22 +990,22 @@ static struct pmc_x86_ops *pmc_amd_init(void)

pr_info("AMD Performance Monitoring support detected.\n");

- return &pmc_amd_ops;
+ return &amd_pmu;
}

void __init init_hw_perf_counters(void)
{
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
- pmc_ops = pmc_intel_init();
+ x86_pmu = intel_pmu_init();
break;
case X86_VENDOR_AMD:
- pmc_ops = pmc_amd_init();
+ x86_pmu = amd_pmu_init();
break;
default:
return;
}
- if (!pmc_ops)
+ if (!x86_pmu)
return;

pr_info("... num counters: %d\n", nr_counters_generic);
--
1.6.1.3

Subject: [PATCH 10/29] x86/perfcounters: remove ack_status() from struct x86_pmu

This function is Intel only and not necessary for AMD cpus.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 19 ++-----------------
1 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d0bb029..6bbdc16 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -51,7 +51,6 @@ struct x86_pmu {
int (*handle_irq)(struct pt_regs *, int);
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
- void (*ack_status)(u64);
void (*enable)(int, u64);
void (*disable)(int, u64);
unsigned eventsel;
@@ -415,23 +414,11 @@ static inline u64 intel_pmu_get_status(u64 mask)
return status;
}

-static void intel_pmu_ack_status(u64 ack)
+static inline void intel_pmu_ack_status(u64 ack)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

-static void amd_pmu_ack_status(u64 ack)
-{
-}
-
-static void hw_perf_ack_status(u64 ack)
-{
- if (unlikely(!perf_counters_initialized))
- return;
-
- x86_pmu->ack_status(ack);
-}
-
static void intel_pmu_enable_counter(int idx, u64 config)
{
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
@@ -788,7 +775,7 @@ again:
__x86_pmu_disable(counter, &counter->hw, bit);
}

- hw_perf_ack_status(ack);
+ intel_pmu_ack_status(ack);

/*
* Repeat if there is more work to be done:
@@ -904,7 +891,6 @@ static struct x86_pmu intel_pmu = {
.handle_irq = intel_pmu_handle_irq,
.save_disable_all = intel_pmu_save_disable_all,
.restore_all = intel_pmu_restore_all,
- .ack_status = intel_pmu_ack_status,
.enable = intel_pmu_enable_counter,
.disable = intel_pmu_disable_counter,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
@@ -918,7 +904,6 @@ static struct x86_pmu amd_pmu = {
.handle_irq = amd_pmu_handle_irq,
.save_disable_all = amd_pmu_save_disable_all,
.restore_all = amd_pmu_restore_all,
- .ack_status = amd_pmu_ack_status,
.enable = amd_pmu_enable_counter,
.disable = amd_pmu_disable_counter,
.eventsel = MSR_K7_EVNTSEL0,
--
1.6.1.3

Subject: [PATCH 05/29] x86/perfcounters: protect per-cpu variables with compile barriers only

Per-cpu variables needn't to be protected with cpu barriers
(smp_wmb()). Protection is only needed for preemption on the same cpu
(rescheduling or the nmi handler). This can be done using a compiler
barrier only.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 75a0903..ad663d5 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -673,7 +673,7 @@ try_generic:
/*
* Make it visible before enabling the hw:
*/
- smp_wmb();
+ barrier();

__hw_perf_counter_set_period(counter, hwc, idx);
__pmc_generic_enable(counter, hwc, idx);
@@ -745,7 +745,7 @@ static void pmc_generic_disable(struct perf_counter *counter)
* Make sure the cleared pointer becomes visible before we
* (potentially) free the counter:
*/
- smp_wmb();
+ barrier();

/*
* Drain the remaining delta count out of a counter
--
1.6.1.3

Subject: [PATCH 17/29] x86/perfcounters: make x86_pmu_read() static inline

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 15d2c03..3f3ae47 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1002,7 +1002,7 @@ void __init init_hw_perf_counters(void)
register_die_notifier(&perf_counter_nmi_notifier);
}

-static void x86_pmu_read(struct perf_counter *counter)
+static inline void x86_pmu_read(struct perf_counter *counter)
{
x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
}
--
1.6.1.3

Subject: [PATCH 19/29] x86/perfcounters: generic use of cpuc->active

cpuc->active will now be used to indicate an enabled counter which
implies also valid pointers of cpuc->counters[]. In contrast,
cpuc->used only locks the counter, but it can be still uninitialized.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 20 +++++++++-----------
1 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9ec51a6..f7fd4a3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -424,7 +424,6 @@ static void amd_pmu_enable_counter(int idx, u64 config)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);

- set_bit(idx, cpuc->active);
if (cpuc->enabled)
config |= ARCH_PERFMON_EVENTSEL0_ENABLE;

@@ -446,9 +445,6 @@ static void intel_pmu_disable_counter(int idx, u64 config)

static void amd_pmu_disable_counter(int idx, u64 config)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
- clear_bit(idx, cpuc->active);
wrmsrl(MSR_K7_EVNTSEL0 + idx, config);

}
@@ -633,10 +629,7 @@ try_generic:
__x86_pmu_disable(counter, hwc, idx);

cpuc->counters[idx] = counter;
- /*
- * Make it visible before enabling the hw:
- */
- barrier();
+ set_bit(idx, cpuc->active);

x86_perf_counter_set_period(counter, hwc, idx);
__x86_pmu_enable(counter, hwc, idx);
@@ -700,10 +693,13 @@ static void x86_pmu_disable(struct perf_counter *counter)
struct hw_perf_counter *hwc = &counter->hw;
unsigned int idx = hwc->idx;

+ /*
+ * Must be done before we disable, otherwise the nmi handler
+ * could reenable again:
+ */
+ clear_bit(idx, cpuc->active);
__x86_pmu_disable(counter, hwc, idx);

- clear_bit(idx, cpuc->used);
- cpuc->counters[idx] = NULL;
/*
* Make sure the cleared pointer becomes visible before we
* (potentially) free the counter:
@@ -715,6 +711,8 @@ static void x86_pmu_disable(struct perf_counter *counter)
* that we are disabling:
*/
x86_perf_counter_update(counter, hwc, idx);
+ cpuc->counters[idx] = NULL;
+ clear_bit(idx, cpuc->used);
}

/*
@@ -763,7 +761,7 @@ again:
struct perf_counter *counter = cpuc->counters[bit];

clear_bit(bit, (unsigned long *) &status);
- if (!counter)
+ if (!test_bit(bit, cpuc->active))
continue;

intel_pmu_save_and_restart(counter);
--
1.6.1.3

Subject: [PATCH 20/29] x86/perfcounters: consistent use of type int for counter index

The type of counter index is sometimes implemented as unsigned
int. This patch changes this to have a consistent usage of int.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 8 ++++----
include/linux/perf_counter.h | 2 +-
2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f7fd4a3..d8beebe 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -459,7 +459,7 @@ static void hw_perf_disable(int idx, u64 config)

static inline void
__pmc_fixed_disable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, unsigned int __idx)
+ struct hw_perf_counter *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, mask;
@@ -474,7 +474,7 @@ __pmc_fixed_disable(struct perf_counter *counter,

static inline void
__x86_pmu_disable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, unsigned int idx)
+ struct hw_perf_counter *hwc, int idx)
{
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
__pmc_fixed_disable(counter, hwc, idx);
@@ -523,7 +523,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,

static inline void
__pmc_fixed_enable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, unsigned int __idx)
+ struct hw_perf_counter *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, bits, mask;
@@ -691,7 +691,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
struct hw_perf_counter *hwc = &counter->hw;
- unsigned int idx = hwc->idx;
+ int idx = hwc->idx;

/*
* Must be done before we disable, otherwise the nmi handler
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c3db52d..41aed42 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -318,7 +318,7 @@ struct hw_perf_counter {
unsigned long config_base;
unsigned long counter_base;
int nmi;
- unsigned int idx;
+ int idx;
};
union { /* software */
atomic64_t count;
--
1.6.1.3

Subject: [PATCH 06/29] perfcounters: rename struct hw_perf_counter_ops into struct pmu

This patch renames struct hw_perf_counter_ops into struct pmu. It
introduces a structure to describe a cpu specific pmu (performance
monitoring unit). It may contain ops and data. The new name of the
structure fits better, is shorter, and thus better to handle. Where it
was appropriate, names of function and variable have been changed too.

Signed-off-by: Robert Richter <[email protected]>
---
arch/powerpc/kernel/perf_counter.c | 25 ++++++-------
arch/x86/kernel/cpu/perf_counter.c | 37 +++++++++----------
include/linux/perf_counter.h | 9 ++---
kernel/perf_counter.c | 68 +++++++++++++++++-------------------
4 files changed, 66 insertions(+), 73 deletions(-)

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bd76d0f..d9bbe5e 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -256,7 +256,7 @@ static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
return 0;
}

-static void power_perf_read(struct perf_counter *counter)
+static void power_pmu_read(struct perf_counter *counter)
{
long val, delta, prev;

@@ -405,7 +405,7 @@ void hw_perf_restore(u64 disable)
for (i = 0; i < cpuhw->n_counters; ++i) {
counter = cpuhw->counter[i];
if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
- power_perf_read(counter);
+ power_pmu_read(counter);
write_pmc(counter->hw.idx, 0);
counter->hw.idx = 0;
}
@@ -477,7 +477,7 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
counter->oncpu = cpu;
counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
if (is_software_counter(counter))
- counter->hw_ops->enable(counter);
+ counter->pmu->enable(counter);
}

/*
@@ -533,7 +533,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
* re-enable the PMU in order to get hw_perf_restore to do the
* actual work of reconfiguring the PMU.
*/
-static int power_perf_enable(struct perf_counter *counter)
+static int power_pmu_enable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuhw;
unsigned long flags;
@@ -573,7 +573,7 @@ static int power_perf_enable(struct perf_counter *counter)
/*
* Remove a counter from the PMU.
*/
-static void power_perf_disable(struct perf_counter *counter)
+static void power_pmu_disable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuhw;
long i;
@@ -583,7 +583,7 @@ static void power_perf_disable(struct perf_counter *counter)
local_irq_save(flags);
pmudis = hw_perf_save_disable();

- power_perf_read(counter);
+ power_pmu_read(counter);

cpuhw = &__get_cpu_var(cpu_hw_counters);
for (i = 0; i < cpuhw->n_counters; ++i) {
@@ -607,10 +607,10 @@ static void power_perf_disable(struct perf_counter *counter)
local_irq_restore(flags);
}

-struct hw_perf_counter_ops power_perf_ops = {
- .enable = power_perf_enable,
- .disable = power_perf_disable,
- .read = power_perf_read
+struct pmu power_pmu = {
+ .enable = power_pmu_enable,
+ .disable = power_pmu_disable,
+ .read = power_pmu_read,
};

/* Number of perf_counters counting hardware events */
@@ -631,8 +631,7 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
}
}

-const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
{
unsigned long ev;
struct perf_counter *ctrs[MAX_HWCOUNTERS];
@@ -705,7 +704,7 @@ hw_perf_counter_init(struct perf_counter *counter)

if (err)
return ERR_PTR(err);
- return &power_perf_ops;
+ return &power_pmu;
}

/*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ad663d5..95de980 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -515,8 +515,8 @@ __pmc_fixed_disable(struct perf_counter *counter,
}

static inline void
-__pmc_generic_disable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, unsigned int idx)
+__x86_pmu_disable(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, unsigned int idx)
{
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
__pmc_fixed_disable(counter, hwc, idx);
@@ -591,8 +591,8 @@ __pmc_fixed_enable(struct perf_counter *counter,
}

static void
-__pmc_generic_enable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int idx)
+__x86_pmu_enable(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, int idx)
{
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
__pmc_fixed_enable(counter, hwc, idx);
@@ -626,7 +626,7 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
/*
* Find a PMC slot for the freshly enabled / scheduled in counter:
*/
-static int pmc_generic_enable(struct perf_counter *counter)
+static int x86_pmu_enable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
struct hw_perf_counter *hwc = &counter->hw;
@@ -667,7 +667,7 @@ try_generic:

perf_counters_lapic_init(hwc->nmi);

- __pmc_generic_disable(counter, hwc, idx);
+ __x86_pmu_disable(counter, hwc, idx);

cpuc->counters[idx] = counter;
/*
@@ -676,7 +676,7 @@ try_generic:
barrier();

__hw_perf_counter_set_period(counter, hwc, idx);
- __pmc_generic_enable(counter, hwc, idx);
+ __x86_pmu_enable(counter, hwc, idx);

return 0;
}
@@ -731,13 +731,13 @@ void perf_counter_print_debug(void)
local_irq_enable();
}

-static void pmc_generic_disable(struct perf_counter *counter)
+static void x86_pmu_disable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
struct hw_perf_counter *hwc = &counter->hw;
unsigned int idx = hwc->idx;

- __pmc_generic_disable(counter, hwc, idx);
+ __x86_pmu_disable(counter, hwc, idx);

clear_bit(idx, cpuc->used);
cpuc->counters[idx] = NULL;
@@ -767,7 +767,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
__hw_perf_counter_set_period(counter, hwc, idx);

if (counter->state == PERF_COUNTER_STATE_ACTIVE)
- __pmc_generic_enable(counter, hwc, idx);
+ __x86_pmu_enable(counter, hwc, idx);
}

/*
@@ -805,7 +805,7 @@ again:

perf_save_and_restart(counter);
if (perf_counter_overflow(counter, nmi, regs, 0))
- __pmc_generic_disable(counter, &counter->hw, bit);
+ __x86_pmu_disable(counter, &counter->hw, bit);
}

hw_perf_ack_status(ack);
@@ -1034,19 +1034,18 @@ void __init init_hw_perf_counters(void)
register_die_notifier(&perf_counter_nmi_notifier);
}

-static void pmc_generic_read(struct perf_counter *counter)
+static void x86_pmu_read(struct perf_counter *counter)
{
x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
}

-static const struct hw_perf_counter_ops x86_perf_counter_ops = {
- .enable = pmc_generic_enable,
- .disable = pmc_generic_disable,
- .read = pmc_generic_read,
+static const struct pmu pmu = {
+ .enable = x86_pmu_enable,
+ .disable = x86_pmu_disable,
+ .read = x86_pmu_read,
};

-const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
{
int err;

@@ -1054,7 +1053,7 @@ hw_perf_counter_init(struct perf_counter *counter)
if (err)
return ERR_PTR(err);

- return &x86_perf_counter_ops;
+ return &pmu;
}

/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index be10b3f..c3db52d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -334,9 +334,9 @@ struct hw_perf_counter {
struct perf_counter;

/**
- * struct hw_perf_counter_ops - performance counter hw ops
+ * struct pmu - generic performance monitoring unit
*/
-struct hw_perf_counter_ops {
+struct pmu {
int (*enable) (struct perf_counter *counter);
void (*disable) (struct perf_counter *counter);
void (*read) (struct perf_counter *counter);
@@ -381,7 +381,7 @@ struct perf_counter {
struct list_head sibling_list;
int nr_siblings;
struct perf_counter *group_leader;
- const struct hw_perf_counter_ops *hw_ops;
+ const struct pmu *pmu;

enum perf_counter_active_state state;
enum perf_counter_active_state prev_state;
@@ -519,8 +519,7 @@ struct perf_cpu_context {
*/
extern int perf_max_counters;

-extern const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter);
+extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);

extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0939609..582108a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -52,8 +52,7 @@ static DEFINE_MUTEX(perf_resource_mutex);
/*
* Architecture provided APIs - weak aliases:
*/
-extern __weak const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
{
return NULL;
}
@@ -124,7 +123,7 @@ counter_sched_out(struct perf_counter *counter,

counter->state = PERF_COUNTER_STATE_INACTIVE;
counter->tstamp_stopped = ctx->time;
- counter->hw_ops->disable(counter);
+ counter->pmu->disable(counter);
counter->oncpu = -1;

if (!is_software_counter(counter))
@@ -417,7 +416,7 @@ counter_sched_in(struct perf_counter *counter,
*/
smp_wmb();

- if (counter->hw_ops->enable(counter)) {
+ if (counter->pmu->enable(counter)) {
counter->state = PERF_COUNTER_STATE_INACTIVE;
counter->oncpu = -1;
return -EAGAIN;
@@ -1096,7 +1095,7 @@ static void __read(void *info)
local_irq_save(flags);
if (ctx->is_active)
update_context_time(ctx);
- counter->hw_ops->read(counter);
+ counter->pmu->read(counter);
update_counter_times(counter);
local_irq_restore(flags);
}
@@ -1922,7 +1921,7 @@ static void perf_counter_output(struct perf_counter *counter,
leader = counter->group_leader;
list_for_each_entry(sub, &leader->sibling_list, list_entry) {
if (sub != counter)
- sub->hw_ops->read(sub);
+ sub->pmu->read(sub);

group_entry.event = sub->hw_event.config;
group_entry.counter = atomic64_read(&sub->count);
@@ -2264,7 +2263,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
struct pt_regs *regs;

counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
- counter->hw_ops->read(counter);
+ counter->pmu->read(counter);

regs = get_irq_regs();
/*
@@ -2410,7 +2409,7 @@ static void perf_swcounter_disable(struct perf_counter *counter)
perf_swcounter_update(counter);
}

-static const struct hw_perf_counter_ops perf_ops_generic = {
+static const struct pmu perf_ops_generic = {
.enable = perf_swcounter_enable,
.disable = perf_swcounter_disable,
.read = perf_swcounter_read,
@@ -2460,7 +2459,7 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter)
cpu_clock_perf_counter_update(counter);
}

-static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
+static const struct pmu perf_ops_cpu_clock = {
.enable = cpu_clock_perf_counter_enable,
.disable = cpu_clock_perf_counter_disable,
.read = cpu_clock_perf_counter_read,
@@ -2522,7 +2521,7 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
task_clock_perf_counter_update(counter, time);
}

-static const struct hw_perf_counter_ops perf_ops_task_clock = {
+static const struct pmu perf_ops_task_clock = {
.enable = task_clock_perf_counter_enable,
.disable = task_clock_perf_counter_disable,
.read = task_clock_perf_counter_read,
@@ -2574,7 +2573,7 @@ static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
cpu_migrations_perf_counter_update(counter);
}

-static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
+static const struct pmu perf_ops_cpu_migrations = {
.enable = cpu_migrations_perf_counter_enable,
.disable = cpu_migrations_perf_counter_disable,
.read = cpu_migrations_perf_counter_read,
@@ -2600,8 +2599,7 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
ftrace_profile_disable(perf_event_id(&counter->hw_event));
}

-static const struct hw_perf_counter_ops *
-tp_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
{
int event_id = perf_event_id(&counter->hw_event);
int ret;
@@ -2616,18 +2614,16 @@ tp_perf_counter_init(struct perf_counter *counter)
return &perf_ops_generic;
}
#else
-static const struct hw_perf_counter_ops *
-tp_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
{
return NULL;
}
#endif

-static const struct hw_perf_counter_ops *
-sw_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
{
struct perf_counter_hw_event *hw_event = &counter->hw_event;
- const struct hw_perf_counter_ops *hw_ops = NULL;
+ const struct pmu *pmu = NULL;
struct hw_perf_counter *hwc = &counter->hw;

/*
@@ -2639,7 +2635,7 @@ sw_perf_counter_init(struct perf_counter *counter)
*/
switch (perf_event_id(&counter->hw_event)) {
case PERF_COUNT_CPU_CLOCK:
- hw_ops = &perf_ops_cpu_clock;
+ pmu = &perf_ops_cpu_clock;

if (hw_event->irq_period && hw_event->irq_period < 10000)
hw_event->irq_period = 10000;
@@ -2650,9 +2646,9 @@ sw_perf_counter_init(struct perf_counter *counter)
* use the cpu_clock counter instead.
*/
if (counter->ctx->task)
- hw_ops = &perf_ops_task_clock;
+ pmu = &perf_ops_task_clock;
else
- hw_ops = &perf_ops_cpu_clock;
+ pmu = &perf_ops_cpu_clock;

if (hw_event->irq_period && hw_event->irq_period < 10000)
hw_event->irq_period = 10000;
@@ -2661,18 +2657,18 @@ sw_perf_counter_init(struct perf_counter *counter)
case PERF_COUNT_PAGE_FAULTS_MIN:
case PERF_COUNT_PAGE_FAULTS_MAJ:
case PERF_COUNT_CONTEXT_SWITCHES:
- hw_ops = &perf_ops_generic;
+ pmu = &perf_ops_generic;
break;
case PERF_COUNT_CPU_MIGRATIONS:
if (!counter->hw_event.exclude_kernel)
- hw_ops = &perf_ops_cpu_migrations;
+ pmu = &perf_ops_cpu_migrations;
break;
}

- if (hw_ops)
+ if (pmu)
hwc->irq_period = hw_event->irq_period;

- return hw_ops;
+ return pmu;
}

/*
@@ -2685,7 +2681,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
struct perf_counter *group_leader,
gfp_t gfpflags)
{
- const struct hw_perf_counter_ops *hw_ops;
+ const struct pmu *pmu;
struct perf_counter *counter;
long err;

@@ -2713,46 +2709,46 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
counter->cpu = cpu;
counter->hw_event = *hw_event;
counter->group_leader = group_leader;
- counter->hw_ops = NULL;
+ counter->pmu = NULL;
counter->ctx = ctx;

counter->state = PERF_COUNTER_STATE_INACTIVE;
if (hw_event->disabled)
counter->state = PERF_COUNTER_STATE_OFF;

- hw_ops = NULL;
+ pmu = NULL;

if (perf_event_raw(hw_event)) {
- hw_ops = hw_perf_counter_init(counter);
+ pmu = hw_perf_counter_init(counter);
goto done;
}

switch (perf_event_type(hw_event)) {
case PERF_TYPE_HARDWARE:
- hw_ops = hw_perf_counter_init(counter);
+ pmu = hw_perf_counter_init(counter);
break;

case PERF_TYPE_SOFTWARE:
- hw_ops = sw_perf_counter_init(counter);
+ pmu = sw_perf_counter_init(counter);
break;

case PERF_TYPE_TRACEPOINT:
- hw_ops = tp_perf_counter_init(counter);
+ pmu = tp_perf_counter_init(counter);
break;
}
done:
err = 0;
- if (!hw_ops)
+ if (!pmu)
err = -EINVAL;
- else if (IS_ERR(hw_ops))
- err = PTR_ERR(hw_ops);
+ else if (IS_ERR(pmu))
+ err = PTR_ERR(pmu);

if (err) {
kfree(counter);
return ERR_PTR(err);
}

- counter->hw_ops = hw_ops;
+ counter->pmu = pmu;

if (counter->hw_event.mmap)
atomic_inc(&nr_mmap_tracking);
--
1.6.1.3

Subject: [PATCH 08/29] x86/perfcounters: make interrupt handler model specific

This separates the perfcounter interrupt handler for AMD and Intel
cpus. The AMD interrupt handler implementation is a follow-on patch.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 16 +++++++++++++---
1 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 808a1a1..9d90de0 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -4,6 +4,7 @@
* Copyright(C) 2008 Thomas Gleixner <[email protected]>
* Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
* Copyright(C) 2009 Jaswinder Singh Rajput
+ * Copyright(C) 2009 Advanced Micro Devices, Inc., Robert Richter
*
* For licencing details see kernel-base/COPYING
*/
@@ -47,6 +48,7 @@ struct cpu_hw_counters {
* struct x86_pmu - generic x86 pmu
*/
struct x86_pmu {
+ int (*handle_irq)(struct pt_regs *, int);
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
u64 (*get_status)(u64);
@@ -241,6 +243,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
struct hw_perf_counter *hwc = &counter->hw;
int err;

+ /* disable temporarily */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return -ENOSYS;
+
if (unlikely(!perf_counters_initialized))
return -EINVAL;

@@ -780,7 +786,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
* This handler is triggered by the local APIC, so the APIC IRQ handling
* rules apply:
*/
-static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
+static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
{
int bit, cpu = smp_processor_id();
u64 ack, status;
@@ -827,6 +833,8 @@ out:
return ret;
}

+static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) { return 0; }
+
void perf_counter_unthrottle(void)
{
struct cpu_hw_counters *cpuc;
@@ -851,7 +859,7 @@ void smp_perf_counter_interrupt(struct pt_regs *regs)
irq_enter();
apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
ack_APIC_irq();
- __smp_perf_counter_interrupt(regs, 0);
+ x86_pmu->handle_irq(regs, 0);
irq_exit();
}

@@ -908,7 +916,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
regs = args->regs;

apic_write(APIC_LVTPC, APIC_DM_NMI);
- ret = __smp_perf_counter_interrupt(regs, 1);
+ ret = x86_pmu->handle_irq(regs, 1);

return ret ? NOTIFY_STOP : NOTIFY_OK;
}
@@ -920,6 +928,7 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
};

static struct x86_pmu intel_pmu = {
+ .handle_irq = intel_pmu_handle_irq,
.save_disable_all = intel_pmu_save_disable_all,
.restore_all = intel_pmu_restore_all,
.get_status = intel_pmu_get_status,
@@ -934,6 +943,7 @@ static struct x86_pmu intel_pmu = {
};

static struct x86_pmu amd_pmu = {
+ .handle_irq = amd_pmu_handle_irq,
.save_disable_all = amd_pmu_save_disable_all,
.restore_all = amd_pmu_restore_all,
.get_status = amd_pmu_get_status,
--
1.6.1.3

Subject: [PATCH 29/29] perfcounters: updating kerneltop documentation

The documentation about the kerneltop build was outdated after
perfstat has been implemented. This updates it.

Signed-off-by: Robert Richter <[email protected]>
---
Documentation/perf_counter/builtin-top.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index dea016f..a4d00f4 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -3,7 +3,7 @@

Build with:

- cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
+ make -C Documentation/perf_counter/

Sample output:

--
1.6.1.3

Subject: [PATCH 16/29] x86/perfcounters: make pmu version generic

This makes the use of the version variable generic. Also, some debug
messages have been generalized.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 24 +++++++++++++-----------
1 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 75dbb1f..15d2c03 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -39,6 +39,8 @@ struct cpu_hw_counters {
* struct x86_pmu - generic x86 pmu
*/
struct x86_pmu {
+ const char *name;
+ int version;
int (*handle_irq)(struct pt_regs *, int);
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
@@ -61,8 +63,6 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
.enabled = 1,
};

-static __read_mostly int intel_perfmon_version;
-
/*
* Intel PerfMon v3. Used on Core2 and later.
*/
@@ -658,7 +658,7 @@ void perf_counter_print_debug(void)
cpu = smp_processor_id();
cpuc = &per_cpu(cpu_hw_counters, cpu);

- if (intel_perfmon_version >= 2) {
+ if (x86_pmu.version >= 2) {
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
@@ -884,6 +884,7 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
};

static struct x86_pmu intel_pmu = {
+ .name = "Intel",
.handle_irq = intel_pmu_handle_irq,
.save_disable_all = intel_pmu_save_disable_all,
.restore_all = intel_pmu_restore_all,
@@ -897,6 +898,7 @@ static struct x86_pmu intel_pmu = {
};

static struct x86_pmu amd_pmu = {
+ .name = "AMD",
.handle_irq = amd_pmu_handle_irq,
.save_disable_all = amd_pmu_save_disable_all,
.restore_all = amd_pmu_restore_all,
@@ -918,6 +920,7 @@ static int intel_pmu_init(void)
union cpuid10_eax eax;
unsigned int unused;
unsigned int ebx;
+ int version;

if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
return -ENODEV;
@@ -930,16 +933,12 @@ static int intel_pmu_init(void)
if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
return -ENODEV;

- intel_perfmon_version = eax.split.version_id;
- if (intel_perfmon_version < 2)
+ version = eax.split.version_id;
+ if (version < 2)
return -ENODEV;

- pr_info("Intel Performance Monitoring support detected.\n");
- pr_info("... version: %d\n", intel_perfmon_version);
- pr_info("... bit width: %d\n", eax.split.bit_width);
- pr_info("... mask length: %d\n", eax.split.mask_length);
-
x86_pmu = intel_pmu;
+ x86_pmu.version = version;
x86_pmu.num_counters = eax.split.num_counters;
x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
x86_pmu.counter_bits = eax.split.bit_width;
@@ -951,7 +950,6 @@ static int intel_pmu_init(void)
static int amd_pmu_init(void)
{
x86_pmu = amd_pmu;
- pr_info("AMD Performance Monitoring support detected.\n");
return 0;
}

@@ -972,6 +970,10 @@ void __init init_hw_perf_counters(void)
if (err != 0)
return;

+ pr_info("%s Performance Monitoring support detected.\n", x86_pmu.name);
+ pr_info("... version: %d\n", x86_pmu.version);
+ pr_info("... bit width: %d\n", x86_pmu.counter_bits);
+
pr_info("... num counters: %d\n", x86_pmu.num_counters);
if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
--
1.6.1.3

Subject: [PATCH 23/29] x86/perfcounters: change and remove pmu initialization checks

Some functions are only called if the pmu was proper initialized. That
initalization checks can ber removed. The way to check initialization
changed too. Now, the pointer to the interrupt handler is checked. If
it exists the pmu is initialized. This also removes a static variable
and uses struct x86_pmu as only data source for the check.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 34 +++++++++++++---------------------
1 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index df9012b..2d3681b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -23,7 +23,6 @@
#include <asm/stacktrace.h>
#include <asm/nmi.h>

-static bool perf_counters_initialized __read_mostly;
static u64 perf_counter_mask __read_mostly;

struct cpu_hw_counters {
@@ -227,6 +226,11 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
}
}

+static inline int x86_pmu_initialized(void)
+{
+ return x86_pmu.handle_irq != NULL;
+}
+
/*
* Setup the hardware configuration for a given hw_event_type
*/
@@ -240,8 +244,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
return -ENOSYS;

- if (unlikely(!perf_counters_initialized))
- return -EINVAL;
+ if (!x86_pmu_initialized())
+ return -ENODEV;

err = 0;
if (atomic_inc_not_zero(&num_counters)) {
@@ -348,9 +352,8 @@ static u64 amd_pmu_save_disable_all(void)

u64 hw_perf_save_disable(void)
{
- if (unlikely(!perf_counters_initialized))
+ if (!x86_pmu_initialized())
return 0;
-
return x86_pmu.save_disable_all();
}
/*
@@ -388,9 +391,8 @@ static void amd_pmu_restore_all(u64 ctrl)

void hw_perf_restore(u64 ctrl)
{
- if (unlikely(!perf_counters_initialized))
+ if (!x86_pmu_initialized())
return;
-
x86_pmu.restore_all(ctrl);
}
/*
@@ -402,8 +404,6 @@ static inline u64 intel_pmu_get_status(u64 mask)
{
u64 status;

- if (unlikely(!perf_counters_initialized))
- return 0;
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);

return status;
@@ -417,10 +417,6 @@ static inline void intel_pmu_ack_status(u64 ack)
static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
int err;
-
- if (unlikely(!perf_counters_initialized))
- return;
-
err = checking_wrmsrl(hwc->config_base + idx,
hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}
@@ -428,10 +424,6 @@ static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
int err;
-
- if (unlikely(!perf_counters_initialized))
- return;
-
err = checking_wrmsrl(hwc->config_base + idx,
hwc->config);
}
@@ -787,10 +779,10 @@ void perf_counter_unthrottle(void)
{
struct cpu_hw_counters *cpuc;

- if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ if (!x86_pmu_initialized())
return;

- if (unlikely(!perf_counters_initialized))
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
return;

cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -829,8 +821,9 @@ void perf_counters_lapic_init(int nmi)
{
u32 apic_val;

- if (!perf_counters_initialized)
+ if (!x86_pmu_initialized())
return;
+
/*
* Enable the performance counter vector in the APIC LVT:
*/
@@ -988,7 +981,6 @@ void __init init_hw_perf_counters(void)
((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;

pr_info("... counter mask: %016Lx\n", perf_counter_mask);
- perf_counters_initialized = true;

perf_counters_lapic_init(0);
register_die_notifier(&perf_counter_nmi_notifier);
--
1.6.1.3

Subject: [PATCH 26/29] x86/perfcounters: introduce max_period variable

In x86 pmus the allowed counter period to programm differs. This
introduces a max_period value and allows the generic implementation
for all models to check the max period.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 20 ++++++++++++--------
1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a8a53ab..4b8715b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -54,6 +54,7 @@ struct x86_pmu {
int num_counters_fixed;
int counter_bits;
u64 counter_mask;
+ u64 max_period;
};

static struct x86_pmu x86_pmu __read_mostly;
@@ -279,14 +280,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
hwc->nmi = 1;

hwc->irq_period = hw_event->irq_period;
- /*
- * Intel PMCs cannot be accessed sanely above 32 bit width,
- * so we install an artificial 1<<31 period regardless of
- * the generic counter period:
- */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
- hwc->irq_period = 0x7FFFFFFF;
+ if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
+ hwc->irq_period = x86_pmu.max_period;

atomic64_set(&hwc->period_left, hwc->irq_period);

@@ -910,6 +905,12 @@ static struct x86_pmu intel_pmu = {
.event_map = intel_pmu_event_map,
.raw_event = intel_pmu_raw_event,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
+ /*
+ * Intel PMCs cannot be accessed sanely above 32 bit width,
+ * so we install an artificial 1<<31 period regardless of
+ * the generic counter period:
+ */
+ .max_period = (1ULL << 31) - 1,
};

static struct x86_pmu amd_pmu = {
@@ -927,6 +928,8 @@ static struct x86_pmu amd_pmu = {
.num_counters = 4,
.counter_bits = 48,
.counter_mask = (1ULL << 48) - 1,
+ /* use highest bit to detect overflow */
+ .max_period = (1ULL << 47) - 1,
};

static int intel_pmu_init(void)
@@ -999,6 +1002,7 @@ void __init init_hw_perf_counters(void)
perf_max_counters = x86_pmu.num_counters;

pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);
+ pr_info("... max period: %016Lx\n", x86_pmu.max_period);

if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
--
1.6.1.3

Subject: [PATCH 11/29] x86/perfcounters: rename __hw_perf_counter_set_period into x86_perf_counter_set_period

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6bbdc16..fa6541d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -498,7 +498,7 @@ static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
* To be called with the counter disabled in hw:
*/
static void
-__hw_perf_counter_set_period(struct perf_counter *counter,
+x86_perf_counter_set_period(struct perf_counter *counter,
struct hw_perf_counter *hwc, int idx)
{
s64 left = atomic64_read(&hwc->period_left);
@@ -642,7 +642,7 @@ try_generic:
*/
barrier();

- __hw_perf_counter_set_period(counter, hwc, idx);
+ x86_perf_counter_set_period(counter, hwc, idx);
__x86_pmu_enable(counter, hwc, idx);

return 0;
@@ -731,7 +731,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
int idx = hwc->idx;

x86_perf_counter_update(counter, hwc, idx);
- __hw_perf_counter_set_period(counter, hwc, idx);
+ x86_perf_counter_set_period(counter, hwc, idx);

if (counter->state == PERF_COUNTER_STATE_ACTIVE)
__x86_pmu_enable(counter, hwc, idx);
--
1.6.1.3

Subject: [PATCH 25/29] x86/perfcounters: return raw count with x86_perf_counter_update()

To check on AMD cpus if a counter overflows, the upper bit of the raw
counter value must be checked. This value is already internally
available in x86_perf_counter_update(). Now, the value is returned so
that it can be used directly to check for overflows.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f4d59d4..a8a53ab 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -132,7 +132,7 @@ static u64 amd_pmu_raw_event(u64 event)
* Can only be executed on the CPU where the counter is active.
* Returns the delta events processed.
*/
-static void
+static u64
x86_perf_counter_update(struct perf_counter *counter,
struct hw_perf_counter *hwc, int idx)
{
@@ -165,6 +165,8 @@ again:

atomic64_add(delta, &counter->count);
atomic64_sub(delta, &hwc->period_left);
+
+ return new_raw_count;
}

static atomic_t num_counters;
@@ -785,8 +787,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
continue;
counter = cpuc->counters[idx];
hwc = &counter->hw;
- x86_perf_counter_update(counter, hwc, idx);
- val = atomic64_read(&hwc->prev_count);
+ val = x86_perf_counter_update(counter, hwc, idx);
if (val & (1ULL << (x86_pmu.counter_bits - 1)))
continue;
/* counter overflow */
--
1.6.1.3

Subject: [PATCH 22/29] x86/perfcounters: rework counter disable functions

As for the enable function, this patch reworks the disable functions
and introduces x86_pmu_disable_counter(). The internal function i/f in
struct x86_pmu changed too.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 48 +++++++++++++++++------------------
1 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ae55933..df9012b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -45,7 +45,7 @@ struct x86_pmu {
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
void (*enable)(struct hw_perf_counter *, int);
- void (*disable)(int, u64);
+ void (*disable)(struct hw_perf_counter *, int);
unsigned eventsel;
unsigned perfctr;
u64 (*event_map)(int);
@@ -425,28 +425,19 @@ static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}

-static void intel_pmu_disable_counter(int idx, u64 config)
+static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
-}
-
-static void amd_pmu_disable_counter(int idx, u64 config)
-{
- wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
-
-}
+ int err;

-static void hw_perf_disable(int idx, u64 config)
-{
if (unlikely(!perf_counters_initialized))
return;

- x86_pmu.disable(idx, config);
+ err = checking_wrmsrl(hwc->config_base + idx,
+ hwc->config);
}

static inline void
-__pmc_fixed_disable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int __idx)
+intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, mask;
@@ -460,13 +451,20 @@ __pmc_fixed_disable(struct perf_counter *counter,
}

static inline void
-__x86_pmu_disable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int idx)
+intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
- __pmc_fixed_disable(counter, hwc, idx);
- else
- hw_perf_disable(idx, hwc->config);
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+ intel_pmu_disable_fixed(hwc, idx);
+ return;
+ }
+
+ x86_pmu_disable_counter(hwc, idx);
+}
+
+static inline void
+amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+{
+ x86_pmu_disable_counter(hwc, idx);
}

static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
@@ -551,7 +549,7 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
if (cpuc->enabled)
x86_pmu_enable_counter(hwc, idx);
else
- amd_pmu_disable_counter(idx, hwc->config);
+ x86_pmu_disable_counter(hwc, idx);
}

static int
@@ -622,7 +620,7 @@ try_generic:

perf_counters_lapic_init(hwc->nmi);

- __x86_pmu_disable(counter, hwc, idx);
+ x86_pmu.disable(hwc, idx);

cpuc->counters[idx] = counter;
set_bit(idx, cpuc->active);
@@ -694,7 +692,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
* could reenable again:
*/
clear_bit(idx, cpuc->active);
- __x86_pmu_disable(counter, hwc, idx);
+ x86_pmu.disable(hwc, idx);

/*
* Make sure the cleared pointer becomes visible before we
@@ -762,7 +760,7 @@ again:

intel_pmu_save_and_restart(counter);
if (perf_counter_overflow(counter, nmi, regs, 0))
- __x86_pmu_disable(counter, &counter->hw, bit);
+ intel_pmu_disable_counter(&counter->hw, bit);
}

intel_pmu_ack_status(ack);
--
1.6.1.3

Subject: [PATCH 27/29] x86/perfcounters: remove vendor check in fixed_mode_idx()

The function fixed_mode_idx() is used generically. Now it checks the
num_counters_fixed value instead of the vendor to decide if fixed
counters are present.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 4b8715b..d1c8036 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -542,7 +542,7 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
{
unsigned int event;

- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (!x86_pmu.num_counters_fixed)
return -1;

if (unlikely(hwc->nmi))
--
1.6.1.3

Subject: [PATCH 12/29] x86/perfcounters: rename intel only functions

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index fa6541d..5a52d73 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -725,7 +725,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
* Save and restart an expired counter. Called by NMI contexts,
* so it has to be careful about preempting normal counter ops:
*/
-static void perf_save_and_restart(struct perf_counter *counter)
+static void intel_pmu_save_and_restart(struct perf_counter *counter)
{
struct hw_perf_counter *hwc = &counter->hw;
int idx = hwc->idx;
@@ -753,7 +753,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
int ret = 0;

- cpuc->throttle_ctrl = hw_perf_save_disable();
+ cpuc->throttle_ctrl = intel_pmu_save_disable_all();

status = intel_pmu_get_status(cpuc->throttle_ctrl);
if (!status)
@@ -770,7 +770,7 @@ again:
if (!counter)
continue;

- perf_save_and_restart(counter);
+ intel_pmu_save_and_restart(counter);
if (perf_counter_overflow(counter, nmi, regs, 0))
__x86_pmu_disable(counter, &counter->hw, bit);
}
@@ -788,7 +788,7 @@ out:
* Restore - do not reenable when global enable is off or throttled:
*/
if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
- hw_perf_restore(cpuc->throttle_ctrl);
+ intel_pmu_restore_all(cpuc->throttle_ctrl);

return ret;
}
--
1.6.1.3

Subject: [PATCH 18/29] x86/perfcounters: rename cpuc->active_mask

This is to have a consistent naming scheme with cpuc->used.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 10 +++++-----
1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 3f3ae47..9ec51a6 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -29,9 +29,9 @@ static u64 perf_counter_mask __read_mostly;
struct cpu_hw_counters {
struct perf_counter *counters[X86_PMC_IDX_MAX];
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long active[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
u64 throttle_ctrl;
- unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int enabled;
};

@@ -334,7 +334,7 @@ static u64 amd_pmu_save_disable_all(void)
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

- if (!test_bit(idx, cpuc->active_mask))
+ if (!test_bit(idx, cpuc->active))
continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
@@ -376,7 +376,7 @@ static void amd_pmu_restore_all(u64 ctrl)
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

- if (!test_bit(idx, cpuc->active_mask))
+ if (!test_bit(idx, cpuc->active))
continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
@@ -424,7 +424,7 @@ static void amd_pmu_enable_counter(int idx, u64 config)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);

- set_bit(idx, cpuc->active_mask);
+ set_bit(idx, cpuc->active);
if (cpuc->enabled)
config |= ARCH_PERFMON_EVENTSEL0_ENABLE;

@@ -448,7 +448,7 @@ static void amd_pmu_disable_counter(int idx, u64 config)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);

- clear_bit(idx, cpuc->active_mask);
+ clear_bit(idx, cpuc->active);
wrmsrl(MSR_K7_EVNTSEL0 + idx, config);

}
--
1.6.1.3

2009-04-29 11:08:37

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 04/29] x86/perfcounters: rework pmc_amd_save_disable_all() and pmc_amd_restore_all()

On Wed, 2009-04-29 at 12:47 +0200, Robert Richter wrote:
> MSR reads and writes are expensive. This patch adds checks to avoid
> its usage where possible.

save_disable_all()
enable(1)
restore_all()

would not correctly enable 1 with the below modification as we do not
write the configuration into the msr, on which restore relies, as it
only toggles the _ENABLE bit.

That said, I'm not sure if that's really an issue, but its why the does
does as it does.

A better abstraction could perhaps avoid this issue all-together.

> Signed-off-by: Robert Richter <[email protected]>
> ---
> arch/x86/kernel/cpu/perf_counter.c | 24 ++++++++++++++----------
> 1 files changed, 14 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
> index d6d6529..75a0903 100644
> --- a/arch/x86/kernel/cpu/perf_counter.c
> +++ b/arch/x86/kernel/cpu/perf_counter.c
> @@ -334,11 +334,13 @@ static u64 pmc_amd_save_disable_all(void)
> for (idx = 0; idx < nr_counters_generic; idx++) {
> u64 val;
>
> + if (!test_bit(idx, cpuc->active_mask))
> + continue;
> rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
> - if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
> - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
> - wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
> - }
> + if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
> + continue;
> + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
> + wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
> }
>
> return enabled;
> @@ -372,13 +374,15 @@ static void pmc_amd_restore_all(u64 ctrl)
> return;
>
> for (idx = 0; idx < nr_counters_generic; idx++) {
> - if (test_bit(idx, cpuc->active_mask)) {
> - u64 val;
> + u64 val;
>
> - rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
> - val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
> - wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
> - }
> + if (!test_bit(idx, cpuc->active_mask))
> + continue;
> + rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
> + if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
> + continue;
> + val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
> + wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
> }
> }
>

2009-04-29 11:10:11

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 0/29] x86/perfcounters: x86 and AMD cpu updates


* Robert Richter <[email protected]> wrote:

> This patch series updates the perfcounters implementation mainly
> for the x86 architecture.

Wow, very nice series! Still havent looked through all of them, but
wanted to give some quick feedback that the splitup and direction
looks all good.

> Also, it introduces a data structure (struct pmu) describing a
> generic performance monitoring unit (pmu). This structure is a
> replacement for struct hw_perf_counter_ops. Similiar, I introduced
> struct x86_pmu for the x86 architecture (as a replacement for
> struct pmc_x86_ops).

Looks sensible. There will eventually be PMU features that dont fit
the hw-counter abstraction but which can still be expressed at the
general counter level.

> There are patches for x86 with some fixes and cleanups, a change
> in the model specific split and a complete rework of AMD pmu code.
> The result is simplified model specific code and more generalized
> and unified code. Features that are only supported by AMD or Intel
> are now implemented in vendor specific functions.

Nice!

> The AMD pmu differs to Intel, especially there is no status
> register and also there are no fixed counters. This makes a
> separate interrupt handler for AMD cpus necessary. Also, a global
> disable/enable of the performance counters (e.g. to avoid NMIs to
> protect the modification of a list) is expensive on AMD cpus
> leading to up to 4 msr reads/writes per counter. There is still
> some more work to do here to avoid this.

Yeah. The previous code was really just a first-level approximation
to show that it can be done.

> This patch series bases on the tip/percounters/core branch.
>
> I developed this patches based on 03ced43 and later rebased to
> 1b88991. The latest tip/percounters/core branch seems to be
> broken, no nmis are delivered, only perfcounter interrupts with no
> results on kerneltop. I am still debugging this. However, I could
> test successfully the patch series based on 03ced43 and want to
> release the patches anyway.

hm, it works all fine for me. This is "perf top" output from an
AMD/Barcelona box:

------------------------------------------------------------------------------
KernelTop: 139908 irqs/sec kernel: 9.5% [NMI, 100000 CPU cycles], (all, 16 CPUs)
------------------------------------------------------------------------------

events pcnt RIP kernel function
______ ______ _____ ________________ _______________

11038.00 - 22.2% - ffffffff8037a090 : clear_page_c
5842.00 - 11.7% - ffffffff804c6e02 : acpi_pm_read
2235.00 - 4.5% - ffffffff80579530 : page_fault
1518.00 - 3.0% - ffffffff8037a300 : copy_user_generic_string!
1184.00 - 2.4% - ffffffff80291598 : get_page_from_freelist
899.00 - 1.8% - ffffffff8057919a : _spin_lock
824.00 - 1.7% - ffffffff802a0c0a : unmap_vmas
739.00 - 1.5% - ffffffff8029d8a4 : __dec_zone_state
696.00 - 1.4% - ffffffff8028aafe : perf_swcounter_event
672.00 - 1.3% - ffffffff802a1b2e : handle_mm_fault

that's NMIs delivered to 16 cores. No lockups and no stuck IRQ
handling.

Would be nice to fix this...

Ingo

2009-04-29 11:12:37

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 04/29] x86/perfcounters: rework pmc_amd_save_disable_all() and pmc_amd_restore_all()


* Peter Zijlstra <[email protected]> wrote:

> On Wed, 2009-04-29 at 12:47 +0200, Robert Richter wrote:
> > MSR reads and writes are expensive. This patch adds checks to avoid
> > its usage where possible.
>
> save_disable_all()
> enable(1)
> restore_all()
>
> would not correctly enable 1 with the below modification as we do
> not write the configuration into the msr, on which restore relies,
> as it only toggles the _ENABLE bit.
>
> That said, I'm not sure if that's really an issue, but its why the
> does does as it does.
>
> A better abstraction could perhaps avoid this issue all-together.

Could we remove the disable-all facility altogether and make the
core code NMI-safe? The current approach wont scale on CPUs that
dont have global-disable features.

disable-all was arguably a hack i introduced and which spread too
far. Can you see a conceptual need for it?

Ingo

2009-04-29 11:13:21

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 29/29] perfcounters: updating kerneltop documentation


* Robert Richter <[email protected]> wrote:

> The documentation about the kerneltop build was outdated after
> perfstat has been implemented. This updates it.
>
> Signed-off-by: Robert Richter <[email protected]>
> ---
> Documentation/perf_counter/builtin-top.c | 2 +-
> 1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
> index dea016f..a4d00f4 100644
> --- a/Documentation/perf_counter/builtin-top.c
> +++ b/Documentation/perf_counter/builtin-top.c
> @@ -3,7 +3,7 @@
>
> Build with:
>
> - cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
> + make -C Documentation/perf_counter/

These are leftover comments - i think we should just remove these
comments (and the sample output) and move it into the
perf_counter/Documentation/ help files?

Ingo

2009-04-29 11:14:36

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 26/29] x86/perfcounters: introduce max_period variable


* Robert Richter <[email protected]> wrote:

> In x86 pmus the allowed counter period to programm differs. This
> introduces a max_period value and allows the generic implementation
> for all models to check the max period.
>
> Signed-off-by: Robert Richter <[email protected]>
> ---
> arch/x86/kernel/cpu/perf_counter.c | 20 ++++++++++++--------
> 1 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
> index a8a53ab..4b8715b 100644
> --- a/arch/x86/kernel/cpu/perf_counter.c
> +++ b/arch/x86/kernel/cpu/perf_counter.c
> @@ -54,6 +54,7 @@ struct x86_pmu {
> int num_counters_fixed;
> int counter_bits;
> u64 counter_mask;
> + u64 max_period;
> };
>
> static struct x86_pmu x86_pmu __read_mostly;
> @@ -279,14 +280,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
> hwc->nmi = 1;
>
> hwc->irq_period = hw_event->irq_period;
> - /*
> - * Intel PMCs cannot be accessed sanely above 32 bit width,
> - * so we install an artificial 1<<31 period regardless of
> - * the generic counter period:
> - */
> - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
> - if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
> - hwc->irq_period = 0x7FFFFFFF;
> + if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
> + hwc->irq_period = x86_pmu.max_period;

btw., should hwc->irq_period perhaps have an s64 type?

Ingo

2009-04-29 11:16:49

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 04/29] x86/perfcounters: rework pmc_amd_save_disable_all() and pmc_amd_restore_all()

On Wed, 2009-04-29 at 13:11 +0200, Ingo Molnar wrote:
> * Peter Zijlstra <[email protected]> wrote:
>
> > On Wed, 2009-04-29 at 12:47 +0200, Robert Richter wrote:
> > > MSR reads and writes are expensive. This patch adds checks to avoid
> > > its usage where possible.
> >
> > save_disable_all()
> > enable(1)
> > restore_all()
> >
> > would not correctly enable 1 with the below modification as we do
> > not write the configuration into the msr, on which restore relies,
> > as it only toggles the _ENABLE bit.
> >
> > That said, I'm not sure if that's really an issue, but its why the
> > does does as it does.
> >
> > A better abstraction could perhaps avoid this issue all-together.
>
> Could we remove the disable-all facility altogether and make the
> core code NMI-safe? The current approach wont scale on CPUs that
> dont have global-disable features.
>
> disable-all was arguably a hack i introduced and which spread too
> far. Can you see a conceptual need for it?

power suffers the same issue and simply iterates the things like amd
does now.

The thing is, with a global disable you get slightly better coupling, so
in that respect it might be nice to have.

2009-04-29 11:19:51

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 04/29] x86/perfcounters: rework pmc_amd_save_disable_all() and pmc_amd_restore_all()


* Peter Zijlstra <[email protected]> wrote:

> On Wed, 2009-04-29 at 13:11 +0200, Ingo Molnar wrote:
> > * Peter Zijlstra <[email protected]> wrote:
> >
> > > On Wed, 2009-04-29 at 12:47 +0200, Robert Richter wrote:
> > > > MSR reads and writes are expensive. This patch adds checks to avoid
> > > > its usage where possible.
> > >
> > > save_disable_all()
> > > enable(1)
> > > restore_all()
> > >
> > > would not correctly enable 1 with the below modification as we do
> > > not write the configuration into the msr, on which restore relies,
> > > as it only toggles the _ENABLE bit.
> > >
> > > That said, I'm not sure if that's really an issue, but its why the
> > > does does as it does.
> > >
> > > A better abstraction could perhaps avoid this issue all-together.
> >
> > Could we remove the disable-all facility altogether and make the
> > core code NMI-safe? The current approach wont scale on CPUs that
> > dont have global-disable features.
> >
> > disable-all was arguably a hack i introduced and which spread too
> > far. Can you see a conceptual need for it?
>
> power suffers the same issue and simply iterates the things like
> amd does now.
>
> The thing is, with a global disable you get slightly better
> coupling, so in that respect it might be nice to have.

ok. With system-wide profiling there's no global disable/enable in
the fastpath. Do we have any of them in the per task counter
fastpath?

Ingo

2009-04-29 11:22:27

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 01/29] x86: perfcounter: remove X86_FEATURE_ARCH_PERFMON flag for AMD cpus


* Robert Richter <[email protected]> wrote:

> X86_FEATURE_ARCH_PERFMON is an Intel hardware feature that does not
> work on AMD CPUs. The flag is now only used in Intel specific code
> (especially initialization).
>
> Signed-off-by: Robert Richter <[email protected]>
> ---
> arch/x86/kernel/cpu/amd.c | 4 ----
> arch/x86/kernel/cpu/perf_counter.c | 6 +++---
> 2 files changed, 3 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index fd69c51..7e4a459 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -420,10 +420,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
> if (c->x86 >= 6)
> set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
>
> - /* Enable Performance counter for K7 and later */
> - if (c->x86 > 6 && c->x86 <= 0x11)
> - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);

might make sense to introduce a 'virtual CPU ID flag' for it like we
do it in other cases, to signal that we support perfcounters on this
CPU.

Ingo

2009-04-29 11:22:59

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 0/29] x86/perfcounters: x86 and AMD cpu updates

On Wed, 2009-04-29 at 13:09 +0200, Ingo Molnar wrote:
> * Robert Richter <[email protected]> wrote:
>
> > This patch series updates the perfcounters implementation mainly
> > for the x86 architecture.
>
> Wow, very nice series! Still havent looked through all of them, but
> wanted to give some quick feedback that the splitup and direction
> looks all good.

Agreed, patches look nice, thanks!

> > I developed this patches based on 03ced43 and later rebased to
> > 1b88991. The latest tip/percounters/core branch seems to be
> > broken, no nmis are delivered, only perfcounter interrupts with no
> > results on kerneltop. I am still debugging this. However, I could
> > test successfully the patch series based on 03ced43 and want to
> > release the patches anyway.
>
> hm, it works all fine for me. This is "perf top" output from an
> AMD/Barcelona box:

Works on my opteron as well:

vendor_id : AuthenticAMD
cpu family : 15
model : 67
model name : Dual-Core AMD Opteron(tm) Processor 1216
stepping : 2


2009-04-29 11:24:48

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 18/29] x86/perfcounters: rename cpuc->active_mask


* Robert Richter <[email protected]> wrote:

> This is to have a consistent naming scheme with cpuc->used.
>
> Signed-off-by: Robert Richter <[email protected]>
> ---
> arch/x86/kernel/cpu/perf_counter.c | 10 +++++-----
> 1 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
> index 3f3ae47..9ec51a6 100644
> --- a/arch/x86/kernel/cpu/perf_counter.c
> +++ b/arch/x86/kernel/cpu/perf_counter.c
> @@ -29,9 +29,9 @@ static u64 perf_counter_mask __read_mostly;
> struct cpu_hw_counters {
> struct perf_counter *counters[X86_PMC_IDX_MAX];
> unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
> + unsigned long active[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
> unsigned long interrupts;
> u64 throttle_ctrl;
> - unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
> int enabled;

better would be to change ->used to ->used_mask. That makes it sure
nobody does this typo:

if (cpuc->used)

it's a lot harder to typo this:

if (cpuc->used_mask)

Ingo

2009-04-29 11:28:12

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 04/29] x86/perfcounters: rework pmc_amd_save_disable_all() and pmc_amd_restore_all()

On Wed, 2009-04-29 at 13:19 +0200, Ingo Molnar wrote:
> * Peter Zijlstra <[email protected]> wrote:

> > > disable-all was arguably a hack i introduced and which spread too
> > > far. Can you see a conceptual need for it?
> >
> > power suffers the same issue and simply iterates the things like
> > amd does now.
> >
> > The thing is, with a global disable you get slightly better
> > coupling, so in that respect it might be nice to have.
>
> ok. With system-wide profiling there's no global disable/enable in
> the fastpath. Do we have any of them in the per task counter
> fastpath?

the scheduling bits seem to be littered with
hw_perf_save_disable/restore, other than that I think the overflow
interrupt used it.

2009-04-29 11:28:54

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 0/29] x86/perfcounters: x86 and AMD cpu updates


* Peter Zijlstra <[email protected]> wrote:

> On Wed, 2009-04-29 at 13:09 +0200, Ingo Molnar wrote:
> > * Robert Richter <[email protected]> wrote:
> >
> > > This patch series updates the perfcounters implementation mainly
> > > for the x86 architecture.
> >
> > Wow, very nice series! Still havent looked through all of them, but
> > wanted to give some quick feedback that the splitup and direction
> > looks all good.
>
> Agreed, patches look nice, thanks!
>
> > > I developed this patches based on 03ced43 and later rebased to
> > > 1b88991. The latest tip/percounters/core branch seems to be
> > > broken, no nmis are delivered, only perfcounter interrupts with no
> > > results on kerneltop. I am still debugging this. However, I could
> > > test successfully the patch series based on 03ced43 and want to
> > > release the patches anyway.
> >
> > hm, it works all fine for me. This is "perf top" output from an
> > AMD/Barcelona box:
>
> Works on my opteron as well:
>
> vendor_id : AuthenticAMD
> cpu family : 15
> model : 67
> model name : Dual-Core AMD Opteron(tm) Processor 1216
> stepping : 2

Mine is:

processor : 15
vendor_id : AuthenticAMD
cpu family : 16
model : 2
model name : Quad-Core AMD Opteron(tm) Processor 8356
stepping : 3
cpu MHz : 2300.000
cache size : 512 KB
physical id : 3
siblings : 1
core id : 3
cpu cores : 4
apicid : 19
initial apicid : 15
fpu : yes
fpu_exception : yes
cpuid level : 5
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr
pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx
mmxext fxsr_opt pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc
arch_perfmon rep_good nonstop_tsc pni monitor cx16 lahf_lm
cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse
3dnowprefetch osvw ibs
bogomips : 4621.68
TLB size : 1024 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 48 bits physical, 48 bits virtual
power management: ts ttp tm stc 100mhzsteps hwpstate

Ingo

2009-04-29 12:39:15

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH 04/29] x86/perfcounters: rework pmc_amd_save_disable_all() and pmc_amd_restore_all()

Ingo Molnar wrote:

> Could we remove the disable-all facility altogether and make the
> core code NMI-safe? The current approach wont scale on CPUs that
> dont have global-disable features.
>
> disable-all was arguably a hack i introduced and which spread too
> far. Can you see a conceptual need for it?

If you're talking about hw_perf_save_disable / hw_perf_restore, please
don't get rid of those. I use them to batch up counter enable/disable
operations so I only have to compute the PMU config once for each
batch rather than every time an individual counter is enabled or
disabled.

Paul.

2009-04-29 12:56:22

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 04/29] x86/perfcounters: rework pmc_amd_save_disable_all() and pmc_amd_restore_all()



* Paul Mackerras <[email protected]> wrote:

> Ingo Molnar wrote:
>
> > Could we remove the disable-all facility altogether and make the
> > core code NMI-safe? The current approach wont scale on CPUs that
> > dont have global-disable features.
> >
> > disable-all was arguably a hack i introduced and which spread
> > too far. Can you see a conceptual need for it?
>
> If you're talking about hw_perf_save_disable / hw_perf_restore,
> please don't get rid of those. I use them to batch up counter
> enable/disable operations so I only have to compute the PMU config
> once for each batch rather than every time an individual counter
> is enabled or disabled.

ok, sure.

Ingo

Subject: [tip:perfcounters/core] perf_counter, x86: remove X86_FEATURE_ARCH_PERFMON flag for AMD cpus

Commit-ID: da1a776be1ac7f78bb30ececbec4c1383163b079
Gitweb: http://git.kernel.org/tip/da1a776be1ac7f78bb30ececbec4c1383163b079
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:46:58 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:00 +0200

perf_counter, x86: remove X86_FEATURE_ARCH_PERFMON flag for AMD cpus

X86_FEATURE_ARCH_PERFMON is an Intel hardware feature that does not
work on AMD CPUs. The flag is now only used in Intel specific code
(especially initialization).

[ Impact: refactor code ]

Signed-off-by: Robert Richter <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/amd.c | 4 ----
arch/x86/kernel/cpu/perf_counter.c | 6 +++---
2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fd69c51..7e4a459 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -420,10 +420,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
if (c->x86 >= 6)
set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);

- /* Enable Performance counter for K7 and later */
- if (c->x86 > 6 && c->x86 <= 0x11)
- set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
-
if (!c->x86_model_id[0]) {
switch (c->x86) {
case 0xf:
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 0fcbaab..7d0f81d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -949,6 +949,9 @@ static struct pmc_x86_ops *pmc_intel_init(void)
unsigned int unused;
unsigned int ebx;

+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return NULL;
+
/*
* Check whether the Architectural PerfMon supports
* Branch Misses Retired Event or not.
@@ -987,9 +990,6 @@ static struct pmc_x86_ops *pmc_amd_init(void)

void __init init_hw_perf_counters(void)
{
- if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return;
-
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
pmc_ops = pmc_intel_init();

Subject: [tip:perfcounters/core] perf_counter, x86: declare perf_max_counters only for CONFIG_PERF_COUNTERS

Commit-ID: 829b42dd395c5801f6ae87da87ecbdcfd5ef1a6c
Gitweb: http://git.kernel.org/tip/829b42dd395c5801f6ae87da87ecbdcfd5ef1a6c
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:46:59 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:01 +0200

perf_counter, x86: declare perf_max_counters only for CONFIG_PERF_COUNTERS

This is only needed for CONFIG_PERF_COUNTERS enabled.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
include/linux/perf_counter.h | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 9814328..be10b3f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -512,12 +512,13 @@ struct perf_cpu_context {
int recursion[4];
};

+#ifdef CONFIG_PERF_COUNTERS
+
/*
* Set by architecture code:
*/
extern int perf_max_counters;

-#ifdef CONFIG_PERF_COUNTERS
extern const struct hw_perf_counter_ops *
hw_perf_counter_init(struct perf_counter *counter);

Subject: [tip:perfcounters/core] perf_counter, x86: add default path to cpu detection

Commit-ID: 4138960a9251a265002b5cf07e671a49f8495381
Gitweb: http://git.kernel.org/tip/4138960a9251a265002b5cf07e671a49f8495381
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:00 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:01 +0200

perf_counter, x86: add default path to cpu detection

This quits hw counter initialization immediately if no cpu is
detected.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 7d0f81d..d6d6529 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -997,6 +997,8 @@ void __init init_hw_perf_counters(void)
case X86_VENDOR_AMD:
pmc_ops = pmc_amd_init();
break;
+ default:
+ return;
}
if (!pmc_ops)
return;

Subject: [tip:perfcounters/core] perf_counter, x86: rework pmc_amd_save_disable_all() and pmc_amd_restore_all()

Commit-ID: 4295ee62660b13ddb87d41539f49b239e6e7d56f
Gitweb: http://git.kernel.org/tip/4295ee62660b13ddb87d41539f49b239e6e7d56f
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:01 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:02 +0200

perf_counter, x86: rework pmc_amd_save_disable_all() and pmc_amd_restore_all()

MSR reads and writes are expensive. This patch adds checks to avoid
its usage where possible.

[ Impact: micro-optimization on AMD CPUs ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 24 ++++++++++++++----------
1 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d6d6529..75a0903 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -334,11 +334,13 @@ static u64 pmc_amd_save_disable_all(void)
for (idx = 0; idx < nr_counters_generic; idx++) {
u64 val;

+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
- if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
- val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
- }
+ if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
+ continue;
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
}

return enabled;
@@ -372,13 +374,15 @@ static void pmc_amd_restore_all(u64 ctrl)
return;

for (idx = 0; idx < nr_counters_generic; idx++) {
- if (test_bit(idx, cpuc->active_mask)) {
- u64 val;
+ u64 val;

- rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
- val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
- }
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+ rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+ if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+ continue;
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
}
}

Subject: [tip:perfcounters/core] perfcounters: rename struct hw_perf_counter_ops into struct pmu

Commit-ID: 4aeb0b4239bb3b67ed402cb9cef3e000c892cadf
Gitweb: http://git.kernel.org/tip/4aeb0b4239bb3b67ed402cb9cef3e000c892cadf
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:03 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:03 +0200

perfcounters: rename struct hw_perf_counter_ops into struct pmu

This patch renames struct hw_perf_counter_ops into struct pmu. It
introduces a structure to describe a cpu specific pmu (performance
monitoring unit). It may contain ops and data. The new name of the
structure fits better, is shorter, and thus better to handle. Where it
was appropriate, names of function and variable have been changed too.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/powerpc/kernel/perf_counter.c | 25 ++++++-------
arch/x86/kernel/cpu/perf_counter.c | 37 +++++++++----------
include/linux/perf_counter.h | 9 ++---
kernel/perf_counter.c | 68 +++++++++++++++++-------------------
4 files changed, 66 insertions(+), 73 deletions(-)

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bd76d0f..d9bbe5e 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -256,7 +256,7 @@ static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
return 0;
}

-static void power_perf_read(struct perf_counter *counter)
+static void power_pmu_read(struct perf_counter *counter)
{
long val, delta, prev;

@@ -405,7 +405,7 @@ void hw_perf_restore(u64 disable)
for (i = 0; i < cpuhw->n_counters; ++i) {
counter = cpuhw->counter[i];
if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
- power_perf_read(counter);
+ power_pmu_read(counter);
write_pmc(counter->hw.idx, 0);
counter->hw.idx = 0;
}
@@ -477,7 +477,7 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
counter->oncpu = cpu;
counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
if (is_software_counter(counter))
- counter->hw_ops->enable(counter);
+ counter->pmu->enable(counter);
}

/*
@@ -533,7 +533,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
* re-enable the PMU in order to get hw_perf_restore to do the
* actual work of reconfiguring the PMU.
*/
-static int power_perf_enable(struct perf_counter *counter)
+static int power_pmu_enable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuhw;
unsigned long flags;
@@ -573,7 +573,7 @@ static int power_perf_enable(struct perf_counter *counter)
/*
* Remove a counter from the PMU.
*/
-static void power_perf_disable(struct perf_counter *counter)
+static void power_pmu_disable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuhw;
long i;
@@ -583,7 +583,7 @@ static void power_perf_disable(struct perf_counter *counter)
local_irq_save(flags);
pmudis = hw_perf_save_disable();

- power_perf_read(counter);
+ power_pmu_read(counter);

cpuhw = &__get_cpu_var(cpu_hw_counters);
for (i = 0; i < cpuhw->n_counters; ++i) {
@@ -607,10 +607,10 @@ static void power_perf_disable(struct perf_counter *counter)
local_irq_restore(flags);
}

-struct hw_perf_counter_ops power_perf_ops = {
- .enable = power_perf_enable,
- .disable = power_perf_disable,
- .read = power_perf_read
+struct pmu power_pmu = {
+ .enable = power_pmu_enable,
+ .disable = power_pmu_disable,
+ .read = power_pmu_read,
};

/* Number of perf_counters counting hardware events */
@@ -631,8 +631,7 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
}
}

-const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
{
unsigned long ev;
struct perf_counter *ctrs[MAX_HWCOUNTERS];
@@ -705,7 +704,7 @@ hw_perf_counter_init(struct perf_counter *counter)

if (err)
return ERR_PTR(err);
- return &power_perf_ops;
+ return &power_pmu;
}

/*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ad663d5..95de980 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -515,8 +515,8 @@ __pmc_fixed_disable(struct perf_counter *counter,
}

static inline void
-__pmc_generic_disable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, unsigned int idx)
+__x86_pmu_disable(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, unsigned int idx)
{
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
__pmc_fixed_disable(counter, hwc, idx);
@@ -591,8 +591,8 @@ __pmc_fixed_enable(struct perf_counter *counter,
}

static void
-__pmc_generic_enable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int idx)
+__x86_pmu_enable(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, int idx)
{
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
__pmc_fixed_enable(counter, hwc, idx);
@@ -626,7 +626,7 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
/*
* Find a PMC slot for the freshly enabled / scheduled in counter:
*/
-static int pmc_generic_enable(struct perf_counter *counter)
+static int x86_pmu_enable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
struct hw_perf_counter *hwc = &counter->hw;
@@ -667,7 +667,7 @@ try_generic:

perf_counters_lapic_init(hwc->nmi);

- __pmc_generic_disable(counter, hwc, idx);
+ __x86_pmu_disable(counter, hwc, idx);

cpuc->counters[idx] = counter;
/*
@@ -676,7 +676,7 @@ try_generic:
barrier();

__hw_perf_counter_set_period(counter, hwc, idx);
- __pmc_generic_enable(counter, hwc, idx);
+ __x86_pmu_enable(counter, hwc, idx);

return 0;
}
@@ -731,13 +731,13 @@ void perf_counter_print_debug(void)
local_irq_enable();
}

-static void pmc_generic_disable(struct perf_counter *counter)
+static void x86_pmu_disable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
struct hw_perf_counter *hwc = &counter->hw;
unsigned int idx = hwc->idx;

- __pmc_generic_disable(counter, hwc, idx);
+ __x86_pmu_disable(counter, hwc, idx);

clear_bit(idx, cpuc->used);
cpuc->counters[idx] = NULL;
@@ -767,7 +767,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
__hw_perf_counter_set_period(counter, hwc, idx);

if (counter->state == PERF_COUNTER_STATE_ACTIVE)
- __pmc_generic_enable(counter, hwc, idx);
+ __x86_pmu_enable(counter, hwc, idx);
}

/*
@@ -805,7 +805,7 @@ again:

perf_save_and_restart(counter);
if (perf_counter_overflow(counter, nmi, regs, 0))
- __pmc_generic_disable(counter, &counter->hw, bit);
+ __x86_pmu_disable(counter, &counter->hw, bit);
}

hw_perf_ack_status(ack);
@@ -1034,19 +1034,18 @@ void __init init_hw_perf_counters(void)
register_die_notifier(&perf_counter_nmi_notifier);
}

-static void pmc_generic_read(struct perf_counter *counter)
+static void x86_pmu_read(struct perf_counter *counter)
{
x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
}

-static const struct hw_perf_counter_ops x86_perf_counter_ops = {
- .enable = pmc_generic_enable,
- .disable = pmc_generic_disable,
- .read = pmc_generic_read,
+static const struct pmu pmu = {
+ .enable = x86_pmu_enable,
+ .disable = x86_pmu_disable,
+ .read = x86_pmu_read,
};

-const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
{
int err;

@@ -1054,7 +1053,7 @@ hw_perf_counter_init(struct perf_counter *counter)
if (err)
return ERR_PTR(err);

- return &x86_perf_counter_ops;
+ return &pmu;
}

/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index be10b3f..c3db52d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -334,9 +334,9 @@ struct hw_perf_counter {
struct perf_counter;

/**
- * struct hw_perf_counter_ops - performance counter hw ops
+ * struct pmu - generic performance monitoring unit
*/
-struct hw_perf_counter_ops {
+struct pmu {
int (*enable) (struct perf_counter *counter);
void (*disable) (struct perf_counter *counter);
void (*read) (struct perf_counter *counter);
@@ -381,7 +381,7 @@ struct perf_counter {
struct list_head sibling_list;
int nr_siblings;
struct perf_counter *group_leader;
- const struct hw_perf_counter_ops *hw_ops;
+ const struct pmu *pmu;

enum perf_counter_active_state state;
enum perf_counter_active_state prev_state;
@@ -519,8 +519,7 @@ struct perf_cpu_context {
*/
extern int perf_max_counters;

-extern const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter);
+extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);

extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0939609..582108a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -52,8 +52,7 @@ static DEFINE_MUTEX(perf_resource_mutex);
/*
* Architecture provided APIs - weak aliases:
*/
-extern __weak const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
{
return NULL;
}
@@ -124,7 +123,7 @@ counter_sched_out(struct perf_counter *counter,

counter->state = PERF_COUNTER_STATE_INACTIVE;
counter->tstamp_stopped = ctx->time;
- counter->hw_ops->disable(counter);
+ counter->pmu->disable(counter);
counter->oncpu = -1;

if (!is_software_counter(counter))
@@ -417,7 +416,7 @@ counter_sched_in(struct perf_counter *counter,
*/
smp_wmb();

- if (counter->hw_ops->enable(counter)) {
+ if (counter->pmu->enable(counter)) {
counter->state = PERF_COUNTER_STATE_INACTIVE;
counter->oncpu = -1;
return -EAGAIN;
@@ -1096,7 +1095,7 @@ static void __read(void *info)
local_irq_save(flags);
if (ctx->is_active)
update_context_time(ctx);
- counter->hw_ops->read(counter);
+ counter->pmu->read(counter);
update_counter_times(counter);
local_irq_restore(flags);
}
@@ -1922,7 +1921,7 @@ static void perf_counter_output(struct perf_counter *counter,
leader = counter->group_leader;
list_for_each_entry(sub, &leader->sibling_list, list_entry) {
if (sub != counter)
- sub->hw_ops->read(sub);
+ sub->pmu->read(sub);

group_entry.event = sub->hw_event.config;
group_entry.counter = atomic64_read(&sub->count);
@@ -2264,7 +2263,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
struct pt_regs *regs;

counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
- counter->hw_ops->read(counter);
+ counter->pmu->read(counter);

regs = get_irq_regs();
/*
@@ -2410,7 +2409,7 @@ static void perf_swcounter_disable(struct perf_counter *counter)
perf_swcounter_update(counter);
}

-static const struct hw_perf_counter_ops perf_ops_generic = {
+static const struct pmu perf_ops_generic = {
.enable = perf_swcounter_enable,
.disable = perf_swcounter_disable,
.read = perf_swcounter_read,
@@ -2460,7 +2459,7 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter)
cpu_clock_perf_counter_update(counter);
}

-static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
+static const struct pmu perf_ops_cpu_clock = {
.enable = cpu_clock_perf_counter_enable,
.disable = cpu_clock_perf_counter_disable,
.read = cpu_clock_perf_counter_read,
@@ -2522,7 +2521,7 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
task_clock_perf_counter_update(counter, time);
}

-static const struct hw_perf_counter_ops perf_ops_task_clock = {
+static const struct pmu perf_ops_task_clock = {
.enable = task_clock_perf_counter_enable,
.disable = task_clock_perf_counter_disable,
.read = task_clock_perf_counter_read,
@@ -2574,7 +2573,7 @@ static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
cpu_migrations_perf_counter_update(counter);
}

-static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
+static const struct pmu perf_ops_cpu_migrations = {
.enable = cpu_migrations_perf_counter_enable,
.disable = cpu_migrations_perf_counter_disable,
.read = cpu_migrations_perf_counter_read,
@@ -2600,8 +2599,7 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
ftrace_profile_disable(perf_event_id(&counter->hw_event));
}

-static const struct hw_perf_counter_ops *
-tp_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
{
int event_id = perf_event_id(&counter->hw_event);
int ret;
@@ -2616,18 +2614,16 @@ tp_perf_counter_init(struct perf_counter *counter)
return &perf_ops_generic;
}
#else
-static const struct hw_perf_counter_ops *
-tp_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
{
return NULL;
}
#endif

-static const struct hw_perf_counter_ops *
-sw_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
{
struct perf_counter_hw_event *hw_event = &counter->hw_event;
- const struct hw_perf_counter_ops *hw_ops = NULL;
+ const struct pmu *pmu = NULL;
struct hw_perf_counter *hwc = &counter->hw;

/*
@@ -2639,7 +2635,7 @@ sw_perf_counter_init(struct perf_counter *counter)
*/
switch (perf_event_id(&counter->hw_event)) {
case PERF_COUNT_CPU_CLOCK:
- hw_ops = &perf_ops_cpu_clock;
+ pmu = &perf_ops_cpu_clock;

if (hw_event->irq_period && hw_event->irq_period < 10000)
hw_event->irq_period = 10000;
@@ -2650,9 +2646,9 @@ sw_perf_counter_init(struct perf_counter *counter)
* use the cpu_clock counter instead.
*/
if (counter->ctx->task)
- hw_ops = &perf_ops_task_clock;
+ pmu = &perf_ops_task_clock;
else
- hw_ops = &perf_ops_cpu_clock;
+ pmu = &perf_ops_cpu_clock;

if (hw_event->irq_period && hw_event->irq_period < 10000)
hw_event->irq_period = 10000;
@@ -2661,18 +2657,18 @@ sw_perf_counter_init(struct perf_counter *counter)
case PERF_COUNT_PAGE_FAULTS_MIN:
case PERF_COUNT_PAGE_FAULTS_MAJ:
case PERF_COUNT_CONTEXT_SWITCHES:
- hw_ops = &perf_ops_generic;
+ pmu = &perf_ops_generic;
break;
case PERF_COUNT_CPU_MIGRATIONS:
if (!counter->hw_event.exclude_kernel)
- hw_ops = &perf_ops_cpu_migrations;
+ pmu = &perf_ops_cpu_migrations;
break;
}

- if (hw_ops)
+ if (pmu)
hwc->irq_period = hw_event->irq_period;

- return hw_ops;
+ return pmu;
}

/*
@@ -2685,7 +2681,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
struct perf_counter *group_leader,
gfp_t gfpflags)
{
- const struct hw_perf_counter_ops *hw_ops;
+ const struct pmu *pmu;
struct perf_counter *counter;
long err;

@@ -2713,46 +2709,46 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
counter->cpu = cpu;
counter->hw_event = *hw_event;
counter->group_leader = group_leader;
- counter->hw_ops = NULL;
+ counter->pmu = NULL;
counter->ctx = ctx;

counter->state = PERF_COUNTER_STATE_INACTIVE;
if (hw_event->disabled)
counter->state = PERF_COUNTER_STATE_OFF;

- hw_ops = NULL;
+ pmu = NULL;

if (perf_event_raw(hw_event)) {
- hw_ops = hw_perf_counter_init(counter);
+ pmu = hw_perf_counter_init(counter);
goto done;
}

switch (perf_event_type(hw_event)) {
case PERF_TYPE_HARDWARE:
- hw_ops = hw_perf_counter_init(counter);
+ pmu = hw_perf_counter_init(counter);
break;

case PERF_TYPE_SOFTWARE:
- hw_ops = sw_perf_counter_init(counter);
+ pmu = sw_perf_counter_init(counter);
break;

case PERF_TYPE_TRACEPOINT:
- hw_ops = tp_perf_counter_init(counter);
+ pmu = tp_perf_counter_init(counter);
break;
}
done:
err = 0;
- if (!hw_ops)
+ if (!pmu)
err = -EINVAL;
- else if (IS_ERR(hw_ops))
- err = PTR_ERR(hw_ops);
+ else if (IS_ERR(pmu))
+ err = PTR_ERR(pmu);

if (err) {
kfree(counter);
return ERR_PTR(err);
}

- counter->hw_ops = hw_ops;
+ counter->pmu = pmu;

if (counter->hw_event.mmap)
atomic_inc(&nr_mmap_tracking);

Subject: [tip:perfcounters/core] perf_counter, x86: protect per-cpu variables with compile barriers only

Commit-ID: 527e26af3741a2168986d8b82653ffe173891324
Gitweb: http://git.kernel.org/tip/527e26af3741a2168986d8b82653ffe173891324
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:02 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:02 +0200

perf_counter, x86: protect per-cpu variables with compile barriers only

Per-cpu variables needn't to be protected with cpu barriers
(smp_wmb()). Protection is only needed for preemption on the same cpu
(rescheduling or the nmi handler). This can be done using a compiler
barrier only.

[ Impact: micro-optimization ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 75a0903..ad663d5 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -673,7 +673,7 @@ try_generic:
/*
* Make it visible before enabling the hw:
*/
- smp_wmb();
+ barrier();

__hw_perf_counter_set_period(counter, hwc, idx);
__pmc_generic_enable(counter, hwc, idx);
@@ -745,7 +745,7 @@ static void pmc_generic_disable(struct perf_counter *counter)
* Make sure the cleared pointer becomes visible before we
* (potentially) free the counter:
*/
- smp_wmb();
+ barrier();

/*
* Drain the remaining delta count out of a counter

Subject: [tip:perfcounters/core] perf_counter, x86: rename struct pmc_x86_ops into struct x86_pmu

Commit-ID: 5f4ec28ffe77c840354cce1820a3436106e9e0f1
Gitweb: http://git.kernel.org/tip/5f4ec28ffe77c840354cce1820a3436106e9e0f1
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:04 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:04 +0200

perf_counter, x86: rename struct pmc_x86_ops into struct x86_pmu

This patch renames struct pmc_x86_ops into struct x86_pmu. It
introduces a structure to describe an x86 model specific pmu
(performance monitoring unit). It may contain ops and data. The new
name of the structure fits better, is shorter, and thus better to
handle. Where it was appropriate, names of function and variable have
been changed too.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 135 ++++++++++++++++++------------------
1 files changed, 68 insertions(+), 67 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 95de980..808a1a1 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -44,9 +44,9 @@ struct cpu_hw_counters {
};

/*
- * struct pmc_x86_ops - performance counter x86 ops
+ * struct x86_pmu - generic x86 pmu
*/
-struct pmc_x86_ops {
+struct x86_pmu {
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
u64 (*get_status)(u64);
@@ -60,7 +60,7 @@ struct pmc_x86_ops {
int max_events;
};

-static struct pmc_x86_ops *pmc_ops __read_mostly;
+static struct x86_pmu *x86_pmu __read_mostly;

static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
.enabled = 1,
@@ -82,12 +82,12 @@ static const u64 intel_perfmon_event_map[] =
[PERF_COUNT_BUS_CYCLES] = 0x013c,
};

-static u64 pmc_intel_event_map(int event)
+static u64 intel_pmu_event_map(int event)
{
return intel_perfmon_event_map[event];
}

-static u64 pmc_intel_raw_event(u64 event)
+static u64 intel_pmu_raw_event(u64 event)
{
#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
@@ -114,12 +114,12 @@ static const u64 amd_perfmon_event_map[] =
[PERF_COUNT_BRANCH_MISSES] = 0x00c5,
};

-static u64 pmc_amd_event_map(int event)
+static u64 amd_pmu_event_map(int event)
{
return amd_perfmon_event_map[event];
}

-static u64 pmc_amd_raw_event(u64 event)
+static u64 amd_pmu_raw_event(u64 event)
{
#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
@@ -184,12 +184,12 @@ static bool reserve_pmc_hardware(void)
disable_lapic_nmi_watchdog();

for (i = 0; i < nr_counters_generic; i++) {
- if (!reserve_perfctr_nmi(pmc_ops->perfctr + i))
+ if (!reserve_perfctr_nmi(x86_pmu->perfctr + i))
goto perfctr_fail;
}

for (i = 0; i < nr_counters_generic; i++) {
- if (!reserve_evntsel_nmi(pmc_ops->eventsel + i))
+ if (!reserve_evntsel_nmi(x86_pmu->eventsel + i))
goto eventsel_fail;
}

@@ -197,13 +197,13 @@ static bool reserve_pmc_hardware(void)

eventsel_fail:
for (i--; i >= 0; i--)
- release_evntsel_nmi(pmc_ops->eventsel + i);
+ release_evntsel_nmi(x86_pmu->eventsel + i);

i = nr_counters_generic;

perfctr_fail:
for (i--; i >= 0; i--)
- release_perfctr_nmi(pmc_ops->perfctr + i);
+ release_perfctr_nmi(x86_pmu->perfctr + i);

if (nmi_watchdog == NMI_LOCAL_APIC)
enable_lapic_nmi_watchdog();
@@ -216,8 +216,8 @@ static void release_pmc_hardware(void)
int i;

for (i = 0; i < nr_counters_generic; i++) {
- release_perfctr_nmi(pmc_ops->perfctr + i);
- release_evntsel_nmi(pmc_ops->eventsel + i);
+ release_perfctr_nmi(x86_pmu->perfctr + i);
+ release_evntsel_nmi(x86_pmu->eventsel + i);
}

if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -293,14 +293,14 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
* Raw event type provide the config in the event structure
*/
if (perf_event_raw(hw_event)) {
- hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event));
+ hwc->config |= x86_pmu->raw_event(perf_event_config(hw_event));
} else {
- if (perf_event_id(hw_event) >= pmc_ops->max_events)
+ if (perf_event_id(hw_event) >= x86_pmu->max_events)
return -EINVAL;
/*
* The generic map:
*/
- hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
+ hwc->config |= x86_pmu->event_map(perf_event_id(hw_event));
}

counter->destroy = hw_perf_counter_destroy;
@@ -308,7 +308,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
return 0;
}

-static u64 pmc_intel_save_disable_all(void)
+static u64 intel_pmu_save_disable_all(void)
{
u64 ctrl;

@@ -318,7 +318,7 @@ static u64 pmc_intel_save_disable_all(void)
return ctrl;
}

-static u64 pmc_amd_save_disable_all(void)
+static u64 amd_pmu_save_disable_all(void)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
int enabled, idx;
@@ -327,7 +327,8 @@ static u64 pmc_amd_save_disable_all(void)
cpuc->enabled = 0;
/*
* ensure we write the disable before we start disabling the
- * counters proper, so that pcm_amd_enable() does the right thing.
+ * counters proper, so that amd_pmu_enable_counter() does the
+ * right thing.
*/
barrier();

@@ -351,19 +352,19 @@ u64 hw_perf_save_disable(void)
if (unlikely(!perf_counters_initialized))
return 0;

- return pmc_ops->save_disable_all();
+ return x86_pmu->save_disable_all();
}
/*
* Exported because of ACPI idle
*/
EXPORT_SYMBOL_GPL(hw_perf_save_disable);

-static void pmc_intel_restore_all(u64 ctrl)
+static void intel_pmu_restore_all(u64 ctrl)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
}

-static void pmc_amd_restore_all(u64 ctrl)
+static void amd_pmu_restore_all(u64 ctrl)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
int idx;
@@ -391,14 +392,14 @@ void hw_perf_restore(u64 ctrl)
if (unlikely(!perf_counters_initialized))
return;

- pmc_ops->restore_all(ctrl);
+ x86_pmu->restore_all(ctrl);
}
/*
* Exported because of ACPI idle
*/
EXPORT_SYMBOL_GPL(hw_perf_restore);

-static u64 pmc_intel_get_status(u64 mask)
+static u64 intel_pmu_get_status(u64 mask)
{
u64 status;

@@ -407,7 +408,7 @@ static u64 pmc_intel_get_status(u64 mask)
return status;
}

-static u64 pmc_amd_get_status(u64 mask)
+static u64 amd_pmu_get_status(u64 mask)
{
u64 status = 0;
int idx;
@@ -432,15 +433,15 @@ static u64 hw_perf_get_status(u64 mask)
if (unlikely(!perf_counters_initialized))
return 0;

- return pmc_ops->get_status(mask);
+ return x86_pmu->get_status(mask);
}

-static void pmc_intel_ack_status(u64 ack)
+static void intel_pmu_ack_status(u64 ack)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

-static void pmc_amd_ack_status(u64 ack)
+static void amd_pmu_ack_status(u64 ack)
{
}

@@ -449,16 +450,16 @@ static void hw_perf_ack_status(u64 ack)
if (unlikely(!perf_counters_initialized))
return;

- pmc_ops->ack_status(ack);
+ x86_pmu->ack_status(ack);
}

-static void pmc_intel_enable(int idx, u64 config)
+static void intel_pmu_enable_counter(int idx, u64 config)
{
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}

-static void pmc_amd_enable(int idx, u64 config)
+static void amd_pmu_enable_counter(int idx, u64 config)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);

@@ -474,15 +475,15 @@ static void hw_perf_enable(int idx, u64 config)
if (unlikely(!perf_counters_initialized))
return;

- pmc_ops->enable(idx, config);
+ x86_pmu->enable(idx, config);
}

-static void pmc_intel_disable(int idx, u64 config)
+static void intel_pmu_disable_counter(int idx, u64 config)
{
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
}

-static void pmc_amd_disable(int idx, u64 config)
+static void amd_pmu_disable_counter(int idx, u64 config)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);

@@ -496,7 +497,7 @@ static void hw_perf_disable(int idx, u64 config)
if (unlikely(!perf_counters_initialized))
return;

- pmc_ops->disable(idx, config);
+ x86_pmu->disable(idx, config);
}

static inline void
@@ -613,11 +614,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)

event = hwc->config & ARCH_PERFMON_EVENT_MASK;

- if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
+ if (unlikely(event == x86_pmu->event_map(PERF_COUNT_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
- if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
+ if (unlikely(event == x86_pmu->event_map(PERF_COUNT_CPU_CYCLES)))
return X86_PMC_IDX_FIXED_CPU_CYCLES;
- if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
+ if (unlikely(event == x86_pmu->event_map(PERF_COUNT_BUS_CYCLES)))
return X86_PMC_IDX_FIXED_BUS_CYCLES;

return -1;
@@ -661,8 +662,8 @@ try_generic:
set_bit(idx, cpuc->used);
hwc->idx = idx;
}
- hwc->config_base = pmc_ops->eventsel;
- hwc->counter_base = pmc_ops->perfctr;
+ hwc->config_base = x86_pmu->eventsel;
+ hwc->counter_base = x86_pmu->perfctr;
}

perf_counters_lapic_init(hwc->nmi);
@@ -710,8 +711,8 @@ void perf_counter_print_debug(void)
pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);

for (idx = 0; idx < nr_counters_generic; idx++) {
- rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
- rdmsrl(pmc_ops->perfctr + idx, pmc_count);
+ rdmsrl(x86_pmu->eventsel + idx, pmc_ctrl);
+ rdmsrl(x86_pmu->perfctr + idx, pmc_count);

prev_left = per_cpu(prev_left[idx], cpu);

@@ -918,35 +919,35 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
.priority = 1
};

-static struct pmc_x86_ops pmc_intel_ops = {
- .save_disable_all = pmc_intel_save_disable_all,
- .restore_all = pmc_intel_restore_all,
- .get_status = pmc_intel_get_status,
- .ack_status = pmc_intel_ack_status,
- .enable = pmc_intel_enable,
- .disable = pmc_intel_disable,
+static struct x86_pmu intel_pmu = {
+ .save_disable_all = intel_pmu_save_disable_all,
+ .restore_all = intel_pmu_restore_all,
+ .get_status = intel_pmu_get_status,
+ .ack_status = intel_pmu_ack_status,
+ .enable = intel_pmu_enable_counter,
+ .disable = intel_pmu_disable_counter,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
- .event_map = pmc_intel_event_map,
- .raw_event = pmc_intel_raw_event,
+ .event_map = intel_pmu_event_map,
+ .raw_event = intel_pmu_raw_event,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
};

-static struct pmc_x86_ops pmc_amd_ops = {
- .save_disable_all = pmc_amd_save_disable_all,
- .restore_all = pmc_amd_restore_all,
- .get_status = pmc_amd_get_status,
- .ack_status = pmc_amd_ack_status,
- .enable = pmc_amd_enable,
- .disable = pmc_amd_disable,
+static struct x86_pmu amd_pmu = {
+ .save_disable_all = amd_pmu_save_disable_all,
+ .restore_all = amd_pmu_restore_all,
+ .get_status = amd_pmu_get_status,
+ .ack_status = amd_pmu_ack_status,
+ .enable = amd_pmu_enable_counter,
+ .disable = amd_pmu_disable_counter,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
- .event_map = pmc_amd_event_map,
- .raw_event = pmc_amd_raw_event,
+ .event_map = amd_pmu_event_map,
+ .raw_event = amd_pmu_raw_event,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
};

-static struct pmc_x86_ops *pmc_intel_init(void)
+static struct x86_pmu *intel_pmu_init(void)
{
union cpuid10_edx edx;
union cpuid10_eax eax;
@@ -977,10 +978,10 @@ static struct pmc_x86_ops *pmc_intel_init(void)
nr_counters_fixed = edx.split.num_counters_fixed;
counter_value_mask = (1ULL << eax.split.bit_width) - 1;

- return &pmc_intel_ops;
+ return &intel_pmu;
}

-static struct pmc_x86_ops *pmc_amd_init(void)
+static struct x86_pmu *amd_pmu_init(void)
{
nr_counters_generic = 4;
nr_counters_fixed = 0;
@@ -989,22 +990,22 @@ static struct pmc_x86_ops *pmc_amd_init(void)

pr_info("AMD Performance Monitoring support detected.\n");

- return &pmc_amd_ops;
+ return &amd_pmu;
}

void __init init_hw_perf_counters(void)
{
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
- pmc_ops = pmc_intel_init();
+ x86_pmu = intel_pmu_init();
break;
case X86_VENDOR_AMD:
- pmc_ops = pmc_amd_init();
+ x86_pmu = amd_pmu_init();
break;
default:
return;
}
- if (!pmc_ops)
+ if (!x86_pmu)
return;

pr_info("... num counters: %d\n", nr_counters_generic);

Subject: [tip:perfcounters/core] perf_counter, x86: make interrupt handler model specific

Commit-ID: 39d81eab2374d71b2d9c82f66258a1a4f57ddd2e
Gitweb: http://git.kernel.org/tip/39d81eab2374d71b2d9c82f66258a1a4f57ddd2e
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:05 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:04 +0200

perf_counter, x86: make interrupt handler model specific

This separates the perfcounter interrupt handler for AMD and Intel
cpus. The AMD interrupt handler implementation is a follow-on patch.

[ Impact: refactor and clean up code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 16 +++++++++++++---
1 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 808a1a1..9d90de0 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -4,6 +4,7 @@
* Copyright(C) 2008 Thomas Gleixner <[email protected]>
* Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
* Copyright(C) 2009 Jaswinder Singh Rajput
+ * Copyright(C) 2009 Advanced Micro Devices, Inc., Robert Richter
*
* For licencing details see kernel-base/COPYING
*/
@@ -47,6 +48,7 @@ struct cpu_hw_counters {
* struct x86_pmu - generic x86 pmu
*/
struct x86_pmu {
+ int (*handle_irq)(struct pt_regs *, int);
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
u64 (*get_status)(u64);
@@ -241,6 +243,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
struct hw_perf_counter *hwc = &counter->hw;
int err;

+ /* disable temporarily */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return -ENOSYS;
+
if (unlikely(!perf_counters_initialized))
return -EINVAL;

@@ -780,7 +786,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
* This handler is triggered by the local APIC, so the APIC IRQ handling
* rules apply:
*/
-static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
+static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
{
int bit, cpu = smp_processor_id();
u64 ack, status;
@@ -827,6 +833,8 @@ out:
return ret;
}

+static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) { return 0; }
+
void perf_counter_unthrottle(void)
{
struct cpu_hw_counters *cpuc;
@@ -851,7 +859,7 @@ void smp_perf_counter_interrupt(struct pt_regs *regs)
irq_enter();
apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
ack_APIC_irq();
- __smp_perf_counter_interrupt(regs, 0);
+ x86_pmu->handle_irq(regs, 0);
irq_exit();
}

@@ -908,7 +916,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
regs = args->regs;

apic_write(APIC_LVTPC, APIC_DM_NMI);
- ret = __smp_perf_counter_interrupt(regs, 1);
+ ret = x86_pmu->handle_irq(regs, 1);

return ret ? NOTIFY_STOP : NOTIFY_OK;
}
@@ -920,6 +928,7 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
};

static struct x86_pmu intel_pmu = {
+ .handle_irq = intel_pmu_handle_irq,
.save_disable_all = intel_pmu_save_disable_all,
.restore_all = intel_pmu_restore_all,
.get_status = intel_pmu_get_status,
@@ -934,6 +943,7 @@ static struct x86_pmu intel_pmu = {
};

static struct x86_pmu amd_pmu = {
+ .handle_irq = amd_pmu_handle_irq,
.save_disable_all = amd_pmu_save_disable_all,
.restore_all = amd_pmu_restore_all,
.get_status = amd_pmu_get_status,

Subject: [tip:perfcounters/core] perf_counter, x86: remove get_status() from struct x86_pmu

Commit-ID: b7f8859a8ed1937e2139c17b84878f1d413fa659
Gitweb: http://git.kernel.org/tip/b7f8859a8ed1937e2139c17b84878f1d413fa659
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:06 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:05 +0200

perf_counter, x86: remove get_status() from struct x86_pmu

This function is Intel only and not necessary for AMD cpus.

[ Impact: simplify code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 39 ++++-------------------------------
1 files changed, 5 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9d90de0..d0bb029 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -51,7 +51,6 @@ struct x86_pmu {
int (*handle_irq)(struct pt_regs *, int);
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
- u64 (*get_status)(u64);
void (*ack_status)(u64);
void (*enable)(int, u64);
void (*disable)(int, u64);
@@ -405,41 +404,15 @@ void hw_perf_restore(u64 ctrl)
*/
EXPORT_SYMBOL_GPL(hw_perf_restore);

-static u64 intel_pmu_get_status(u64 mask)
+static inline u64 intel_pmu_get_status(u64 mask)
{
u64 status;

- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-
- return status;
-}
-
-static u64 amd_pmu_get_status(u64 mask)
-{
- u64 status = 0;
- int idx;
-
- for (idx = 0; idx < nr_counters_generic; idx++) {
- s64 val;
-
- if (!(mask & (1 << idx)))
- continue;
-
- rdmsrl(MSR_K7_PERFCTR0 + idx, val);
- val <<= (64 - counter_value_bits);
- if (val >= 0)
- status |= (1 << idx);
- }
-
- return status;
-}
-
-static u64 hw_perf_get_status(u64 mask)
-{
if (unlikely(!perf_counters_initialized))
return 0;
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);

- return x86_pmu->get_status(mask);
+ return status;
}

static void intel_pmu_ack_status(u64 ack)
@@ -795,7 +768,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)

cpuc->throttle_ctrl = hw_perf_save_disable();

- status = hw_perf_get_status(cpuc->throttle_ctrl);
+ status = intel_pmu_get_status(cpuc->throttle_ctrl);
if (!status)
goto out;

@@ -820,7 +793,7 @@ again:
/*
* Repeat if there is more work to be done:
*/
- status = hw_perf_get_status(cpuc->throttle_ctrl);
+ status = intel_pmu_get_status(cpuc->throttle_ctrl);
if (status)
goto again;
out:
@@ -931,7 +904,6 @@ static struct x86_pmu intel_pmu = {
.handle_irq = intel_pmu_handle_irq,
.save_disable_all = intel_pmu_save_disable_all,
.restore_all = intel_pmu_restore_all,
- .get_status = intel_pmu_get_status,
.ack_status = intel_pmu_ack_status,
.enable = intel_pmu_enable_counter,
.disable = intel_pmu_disable_counter,
@@ -946,7 +918,6 @@ static struct x86_pmu amd_pmu = {
.handle_irq = amd_pmu_handle_irq,
.save_disable_all = amd_pmu_save_disable_all,
.restore_all = amd_pmu_restore_all,
- .get_status = amd_pmu_get_status,
.ack_status = amd_pmu_ack_status,
.enable = amd_pmu_enable_counter,
.disable = amd_pmu_disable_counter,

Subject: [tip:perfcounters/core] perf_counter, x86: remove ack_status() from struct x86_pmu

Commit-ID: dee5d9067ca78b317538fd67930be4e09a83dbc5
Gitweb: http://git.kernel.org/tip/dee5d9067ca78b317538fd67930be4e09a83dbc5
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:07 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:05 +0200

perf_counter, x86: remove ack_status() from struct x86_pmu

This function is Intel only and not necessary for AMD cpus.

[ Impact: simplify code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 19 ++-----------------
1 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d0bb029..6bbdc16 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -51,7 +51,6 @@ struct x86_pmu {
int (*handle_irq)(struct pt_regs *, int);
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
- void (*ack_status)(u64);
void (*enable)(int, u64);
void (*disable)(int, u64);
unsigned eventsel;
@@ -415,23 +414,11 @@ static inline u64 intel_pmu_get_status(u64 mask)
return status;
}

-static void intel_pmu_ack_status(u64 ack)
+static inline void intel_pmu_ack_status(u64 ack)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

-static void amd_pmu_ack_status(u64 ack)
-{
-}
-
-static void hw_perf_ack_status(u64 ack)
-{
- if (unlikely(!perf_counters_initialized))
- return;
-
- x86_pmu->ack_status(ack);
-}
-
static void intel_pmu_enable_counter(int idx, u64 config)
{
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
@@ -788,7 +775,7 @@ again:
__x86_pmu_disable(counter, &counter->hw, bit);
}

- hw_perf_ack_status(ack);
+ intel_pmu_ack_status(ack);

/*
* Repeat if there is more work to be done:
@@ -904,7 +891,6 @@ static struct x86_pmu intel_pmu = {
.handle_irq = intel_pmu_handle_irq,
.save_disable_all = intel_pmu_save_disable_all,
.restore_all = intel_pmu_restore_all,
- .ack_status = intel_pmu_ack_status,
.enable = intel_pmu_enable_counter,
.disable = intel_pmu_disable_counter,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
@@ -918,7 +904,6 @@ static struct x86_pmu amd_pmu = {
.handle_irq = amd_pmu_handle_irq,
.save_disable_all = amd_pmu_save_disable_all,
.restore_all = amd_pmu_restore_all,
- .ack_status = amd_pmu_ack_status,
.enable = amd_pmu_enable_counter,
.disable = amd_pmu_disable_counter,
.eventsel = MSR_K7_EVNTSEL0,

Subject: [tip:perfcounters/core] perf_counter, x86: rename __hw_perf_counter_set_period into x86_perf_counter_set_period

Commit-ID: 26816c287e13eedc67bc4ed0cd40c138314b7c7d
Gitweb: http://git.kernel.org/tip/26816c287e13eedc67bc4ed0cd40c138314b7c7d
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:08 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:06 +0200

perf_counter, x86: rename __hw_perf_counter_set_period into x86_perf_counter_set_period

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6bbdc16..fa6541d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -498,7 +498,7 @@ static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
* To be called with the counter disabled in hw:
*/
static void
-__hw_perf_counter_set_period(struct perf_counter *counter,
+x86_perf_counter_set_period(struct perf_counter *counter,
struct hw_perf_counter *hwc, int idx)
{
s64 left = atomic64_read(&hwc->period_left);
@@ -642,7 +642,7 @@ try_generic:
*/
barrier();

- __hw_perf_counter_set_period(counter, hwc, idx);
+ x86_perf_counter_set_period(counter, hwc, idx);
__x86_pmu_enable(counter, hwc, idx);

return 0;
@@ -731,7 +731,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
int idx = hwc->idx;

x86_perf_counter_update(counter, hwc, idx);
- __hw_perf_counter_set_period(counter, hwc, idx);
+ x86_perf_counter_set_period(counter, hwc, idx);

if (counter->state == PERF_COUNTER_STATE_ACTIVE)
__x86_pmu_enable(counter, hwc, idx);

Subject: [tip:perfcounters/core] perf_counter, x86: modify initialization of struct x86_pmu

Commit-ID: 72eae04d3a3075c26d39e1e685acfc8e8c29db64
Gitweb: http://git.kernel.org/tip/72eae04d3a3075c26d39e1e685acfc8e8c29db64
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:10 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:07 +0200

perf_counter, x86: modify initialization of struct x86_pmu

This patch adds an error handler and changes initialization of struct
x86_pmu. No functional changes. Needed for follow-on patches.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 27 ++++++++++++++++-----------
1 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 5a52d73..7c72a94 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -913,7 +913,7 @@ static struct x86_pmu amd_pmu = {
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
};

-static struct x86_pmu *intel_pmu_init(void)
+static int intel_pmu_init(void)
{
union cpuid10_edx edx;
union cpuid10_eax eax;
@@ -921,7 +921,7 @@ static struct x86_pmu *intel_pmu_init(void)
unsigned int ebx;

if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return NULL;
+ return -ENODEV;

/*
* Check whether the Architectural PerfMon supports
@@ -929,49 +929,54 @@ static struct x86_pmu *intel_pmu_init(void)
*/
cpuid(10, &eax.full, &ebx, &unused, &edx.full);
if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
- return NULL;
+ return -ENODEV;

intel_perfmon_version = eax.split.version_id;
if (intel_perfmon_version < 2)
- return NULL;
+ return -ENODEV;

pr_info("Intel Performance Monitoring support detected.\n");
pr_info("... version: %d\n", intel_perfmon_version);
pr_info("... bit width: %d\n", eax.split.bit_width);
pr_info("... mask length: %d\n", eax.split.mask_length);

+ x86_pmu = &intel_pmu;
+
nr_counters_generic = eax.split.num_counters;
nr_counters_fixed = edx.split.num_counters_fixed;
counter_value_mask = (1ULL << eax.split.bit_width) - 1;

- return &intel_pmu;
+ return 0;
}

-static struct x86_pmu *amd_pmu_init(void)
+static int amd_pmu_init(void)
{
+ x86_pmu = &amd_pmu;
+
nr_counters_generic = 4;
nr_counters_fixed = 0;
counter_value_mask = 0x0000FFFFFFFFFFFFULL;
counter_value_bits = 48;

pr_info("AMD Performance Monitoring support detected.\n");
-
- return &amd_pmu;
+ return 0;
}

void __init init_hw_perf_counters(void)
{
+ int err;
+
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
- x86_pmu = intel_pmu_init();
+ err = intel_pmu_init();
break;
case X86_VENDOR_AMD:
- x86_pmu = amd_pmu_init();
+ err = amd_pmu_init();
break;
default:
return;
}
- if (!x86_pmu)
+ if (err != 0)
return;

pr_info("... num counters: %d\n", nr_counters_generic);

Subject: [tip:perfcounters/core] perf_counter, x86: rename intel only functions

Commit-ID: 55de0f2e57994b525324bf0d04d242d9358a2417
Gitweb: http://git.kernel.org/tip/55de0f2e57994b525324bf0d04d242d9358a2417
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:09 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:06 +0200

perf_counter, x86: rename intel only functions

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index fa6541d..5a52d73 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -725,7 +725,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
* Save and restart an expired counter. Called by NMI contexts,
* so it has to be careful about preempting normal counter ops:
*/
-static void perf_save_and_restart(struct perf_counter *counter)
+static void intel_pmu_save_and_restart(struct perf_counter *counter)
{
struct hw_perf_counter *hwc = &counter->hw;
int idx = hwc->idx;
@@ -753,7 +753,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
int ret = 0;

- cpuc->throttle_ctrl = hw_perf_save_disable();
+ cpuc->throttle_ctrl = intel_pmu_save_disable_all();

status = intel_pmu_get_status(cpuc->throttle_ctrl);
if (!status)
@@ -770,7 +770,7 @@ again:
if (!counter)
continue;

- perf_save_and_restart(counter);
+ intel_pmu_save_and_restart(counter);
if (perf_counter_overflow(counter, nmi, regs, 0))
__x86_pmu_disable(counter, &counter->hw, bit);
}
@@ -788,7 +788,7 @@ out:
* Restore - do not reenable when global enable is off or throttled:
*/
if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
- hw_perf_restore(cpuc->throttle_ctrl);
+ intel_pmu_restore_all(cpuc->throttle_ctrl);

return ret;
}

Subject: [tip:perfcounters/core] perf_counter, x86: make x86_pmu data a static struct

Commit-ID: 4a06bd8508f65ad1dd5cd2046b85694813fa36a2
Gitweb: http://git.kernel.org/tip/4a06bd8508f65ad1dd5cd2046b85694813fa36a2
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:11 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:07 +0200

perf_counter, x86: make x86_pmu data a static struct

Instead of using a pointer to reference to the x86 pmu we now have one
single data structure that is initialized at the beginning. This saves
the pointer access when using this memory.

[ Impact: micro-optimization ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 50 ++++++++++++++++++------------------
1 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 7c72a94..68597d7 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -60,7 +60,7 @@ struct x86_pmu {
int max_events;
};

-static struct x86_pmu *x86_pmu __read_mostly;
+static struct x86_pmu x86_pmu __read_mostly;

static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
.enabled = 1,
@@ -184,12 +184,12 @@ static bool reserve_pmc_hardware(void)
disable_lapic_nmi_watchdog();

for (i = 0; i < nr_counters_generic; i++) {
- if (!reserve_perfctr_nmi(x86_pmu->perfctr + i))
+ if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
goto perfctr_fail;
}

for (i = 0; i < nr_counters_generic; i++) {
- if (!reserve_evntsel_nmi(x86_pmu->eventsel + i))
+ if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}

@@ -197,13 +197,13 @@ static bool reserve_pmc_hardware(void)

eventsel_fail:
for (i--; i >= 0; i--)
- release_evntsel_nmi(x86_pmu->eventsel + i);
+ release_evntsel_nmi(x86_pmu.eventsel + i);

i = nr_counters_generic;

perfctr_fail:
for (i--; i >= 0; i--)
- release_perfctr_nmi(x86_pmu->perfctr + i);
+ release_perfctr_nmi(x86_pmu.perfctr + i);

if (nmi_watchdog == NMI_LOCAL_APIC)
enable_lapic_nmi_watchdog();
@@ -216,8 +216,8 @@ static void release_pmc_hardware(void)
int i;

for (i = 0; i < nr_counters_generic; i++) {
- release_perfctr_nmi(x86_pmu->perfctr + i);
- release_evntsel_nmi(x86_pmu->eventsel + i);
+ release_perfctr_nmi(x86_pmu.perfctr + i);
+ release_evntsel_nmi(x86_pmu.eventsel + i);
}

if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -297,14 +297,14 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
* Raw event type provide the config in the event structure
*/
if (perf_event_raw(hw_event)) {
- hwc->config |= x86_pmu->raw_event(perf_event_config(hw_event));
+ hwc->config |= x86_pmu.raw_event(perf_event_config(hw_event));
} else {
- if (perf_event_id(hw_event) >= x86_pmu->max_events)
+ if (perf_event_id(hw_event) >= x86_pmu.max_events)
return -EINVAL;
/*
* The generic map:
*/
- hwc->config |= x86_pmu->event_map(perf_event_id(hw_event));
+ hwc->config |= x86_pmu.event_map(perf_event_id(hw_event));
}

counter->destroy = hw_perf_counter_destroy;
@@ -356,7 +356,7 @@ u64 hw_perf_save_disable(void)
if (unlikely(!perf_counters_initialized))
return 0;

- return x86_pmu->save_disable_all();
+ return x86_pmu.save_disable_all();
}
/*
* Exported because of ACPI idle
@@ -396,7 +396,7 @@ void hw_perf_restore(u64 ctrl)
if (unlikely(!perf_counters_initialized))
return;

- x86_pmu->restore_all(ctrl);
+ x86_pmu.restore_all(ctrl);
}
/*
* Exported because of ACPI idle
@@ -441,7 +441,7 @@ static void hw_perf_enable(int idx, u64 config)
if (unlikely(!perf_counters_initialized))
return;

- x86_pmu->enable(idx, config);
+ x86_pmu.enable(idx, config);
}

static void intel_pmu_disable_counter(int idx, u64 config)
@@ -463,7 +463,7 @@ static void hw_perf_disable(int idx, u64 config)
if (unlikely(!perf_counters_initialized))
return;

- x86_pmu->disable(idx, config);
+ x86_pmu.disable(idx, config);
}

static inline void
@@ -580,11 +580,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)

event = hwc->config & ARCH_PERFMON_EVENT_MASK;

- if (unlikely(event == x86_pmu->event_map(PERF_COUNT_INSTRUCTIONS)))
+ if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
- if (unlikely(event == x86_pmu->event_map(PERF_COUNT_CPU_CYCLES)))
+ if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES)))
return X86_PMC_IDX_FIXED_CPU_CYCLES;
- if (unlikely(event == x86_pmu->event_map(PERF_COUNT_BUS_CYCLES)))
+ if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES)))
return X86_PMC_IDX_FIXED_BUS_CYCLES;

return -1;
@@ -628,8 +628,8 @@ try_generic:
set_bit(idx, cpuc->used);
hwc->idx = idx;
}
- hwc->config_base = x86_pmu->eventsel;
- hwc->counter_base = x86_pmu->perfctr;
+ hwc->config_base = x86_pmu.eventsel;
+ hwc->counter_base = x86_pmu.perfctr;
}

perf_counters_lapic_init(hwc->nmi);
@@ -677,8 +677,8 @@ void perf_counter_print_debug(void)
pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);

for (idx = 0; idx < nr_counters_generic; idx++) {
- rdmsrl(x86_pmu->eventsel + idx, pmc_ctrl);
- rdmsrl(x86_pmu->perfctr + idx, pmc_count);
+ rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
+ rdmsrl(x86_pmu.perfctr + idx, pmc_count);

prev_left = per_cpu(prev_left[idx], cpu);

@@ -819,7 +819,7 @@ void smp_perf_counter_interrupt(struct pt_regs *regs)
irq_enter();
apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
ack_APIC_irq();
- x86_pmu->handle_irq(regs, 0);
+ x86_pmu.handle_irq(regs, 0);
irq_exit();
}

@@ -876,7 +876,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
regs = args->regs;

apic_write(APIC_LVTPC, APIC_DM_NMI);
- ret = x86_pmu->handle_irq(regs, 1);
+ ret = x86_pmu.handle_irq(regs, 1);

return ret ? NOTIFY_STOP : NOTIFY_OK;
}
@@ -940,7 +940,7 @@ static int intel_pmu_init(void)
pr_info("... bit width: %d\n", eax.split.bit_width);
pr_info("... mask length: %d\n", eax.split.mask_length);

- x86_pmu = &intel_pmu;
+ x86_pmu = intel_pmu;

nr_counters_generic = eax.split.num_counters;
nr_counters_fixed = edx.split.num_counters_fixed;
@@ -951,7 +951,7 @@ static int intel_pmu_init(void)

static int amd_pmu_init(void)
{
- x86_pmu = &amd_pmu;
+ x86_pmu = amd_pmu;

nr_counters_generic = 4;
nr_counters_fixed = 0;

Subject: [tip:perfcounters/core] perf_counter, x86: make pmu version generic

Commit-ID: faa28ae018ed004a22aa4a7704e04ccdde4a941e
Gitweb: http://git.kernel.org/tip/faa28ae018ed004a22aa4a7704e04ccdde4a941e
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:13 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:08 +0200

perf_counter, x86: make pmu version generic

This makes the use of the version variable generic. Also, some debug
messages have been generalized.

[ Impact: refactor and generalize code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 24 +++++++++++++-----------
1 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 75dbb1f..15d2c03 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -39,6 +39,8 @@ struct cpu_hw_counters {
* struct x86_pmu - generic x86 pmu
*/
struct x86_pmu {
+ const char *name;
+ int version;
int (*handle_irq)(struct pt_regs *, int);
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
@@ -61,8 +63,6 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
.enabled = 1,
};

-static __read_mostly int intel_perfmon_version;
-
/*
* Intel PerfMon v3. Used on Core2 and later.
*/
@@ -658,7 +658,7 @@ void perf_counter_print_debug(void)
cpu = smp_processor_id();
cpuc = &per_cpu(cpu_hw_counters, cpu);

- if (intel_perfmon_version >= 2) {
+ if (x86_pmu.version >= 2) {
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
@@ -884,6 +884,7 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
};

static struct x86_pmu intel_pmu = {
+ .name = "Intel",
.handle_irq = intel_pmu_handle_irq,
.save_disable_all = intel_pmu_save_disable_all,
.restore_all = intel_pmu_restore_all,
@@ -897,6 +898,7 @@ static struct x86_pmu intel_pmu = {
};

static struct x86_pmu amd_pmu = {
+ .name = "AMD",
.handle_irq = amd_pmu_handle_irq,
.save_disable_all = amd_pmu_save_disable_all,
.restore_all = amd_pmu_restore_all,
@@ -918,6 +920,7 @@ static int intel_pmu_init(void)
union cpuid10_eax eax;
unsigned int unused;
unsigned int ebx;
+ int version;

if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
return -ENODEV;
@@ -930,16 +933,12 @@ static int intel_pmu_init(void)
if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
return -ENODEV;

- intel_perfmon_version = eax.split.version_id;
- if (intel_perfmon_version < 2)
+ version = eax.split.version_id;
+ if (version < 2)
return -ENODEV;

- pr_info("Intel Performance Monitoring support detected.\n");
- pr_info("... version: %d\n", intel_perfmon_version);
- pr_info("... bit width: %d\n", eax.split.bit_width);
- pr_info("... mask length: %d\n", eax.split.mask_length);
-
x86_pmu = intel_pmu;
+ x86_pmu.version = version;
x86_pmu.num_counters = eax.split.num_counters;
x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
x86_pmu.counter_bits = eax.split.bit_width;
@@ -951,7 +950,6 @@ static int intel_pmu_init(void)
static int amd_pmu_init(void)
{
x86_pmu = amd_pmu;
- pr_info("AMD Performance Monitoring support detected.\n");
return 0;
}

@@ -972,6 +970,10 @@ void __init init_hw_perf_counters(void)
if (err != 0)
return;

+ pr_info("%s Performance Monitoring support detected.\n", x86_pmu.name);
+ pr_info("... version: %d\n", x86_pmu.version);
+ pr_info("... bit width: %d\n", x86_pmu.counter_bits);
+
pr_info("... num counters: %d\n", x86_pmu.num_counters);
if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
x86_pmu.num_counters = X86_PMC_MAX_GENERIC;

Subject: [tip:perfcounters/core] perf_counter, x86: move counter parameters to struct x86_pmu

Commit-ID: 0933e5c6a680ba8d8d786a6f7fa377b7ec0d1e49
Gitweb: http://git.kernel.org/tip/0933e5c6a680ba8d8d786a6f7fa377b7ec0d1e49
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:12 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:08 +0200

perf_counter, x86: move counter parameters to struct x86_pmu

[ Impact: refactor and generalize code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 80 ++++++++++++++++-------------------
1 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 68597d7..75dbb1f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -24,16 +24,7 @@
#include <asm/nmi.h>

static bool perf_counters_initialized __read_mostly;
-
-/*
- * Number of (generic) HW counters:
- */
-static int nr_counters_generic __read_mostly;
static u64 perf_counter_mask __read_mostly;
-static u64 counter_value_mask __read_mostly;
-static int counter_value_bits __read_mostly;
-
-static int nr_counters_fixed __read_mostly;

struct cpu_hw_counters {
struct perf_counter *counters[X86_PMC_IDX_MAX];
@@ -58,6 +49,10 @@ struct x86_pmu {
u64 (*event_map)(int);
u64 (*raw_event)(u64);
int max_events;
+ int num_counters;
+ int num_counters_fixed;
+ int counter_bits;
+ u64 counter_mask;
};

static struct x86_pmu x86_pmu __read_mostly;
@@ -183,12 +178,12 @@ static bool reserve_pmc_hardware(void)
if (nmi_watchdog == NMI_LOCAL_APIC)
disable_lapic_nmi_watchdog();

- for (i = 0; i < nr_counters_generic; i++) {
+ for (i = 0; i < x86_pmu.num_counters; i++) {
if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
goto perfctr_fail;
}

- for (i = 0; i < nr_counters_generic; i++) {
+ for (i = 0; i < x86_pmu.num_counters; i++) {
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}
@@ -199,7 +194,7 @@ eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu.eventsel + i);

- i = nr_counters_generic;
+ i = x86_pmu.num_counters;

perfctr_fail:
for (i--; i >= 0; i--)
@@ -215,7 +210,7 @@ static void release_pmc_hardware(void)
{
int i;

- for (i = 0; i < nr_counters_generic; i++) {
+ for (i = 0; i < x86_pmu.num_counters; i++) {
release_perfctr_nmi(x86_pmu.perfctr + i);
release_evntsel_nmi(x86_pmu.eventsel + i);
}
@@ -336,7 +331,7 @@ static u64 amd_pmu_save_disable_all(void)
*/
barrier();

- for (idx = 0; idx < nr_counters_generic; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

if (!test_bit(idx, cpuc->active_mask))
@@ -378,7 +373,7 @@ static void amd_pmu_restore_all(u64 ctrl)
if (!ctrl)
return;

- for (idx = 0; idx < nr_counters_generic; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

if (!test_bit(idx, cpuc->active_mask))
@@ -527,7 +522,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
atomic64_set(&hwc->prev_count, (u64)-left);

err = checking_wrmsrl(hwc->counter_base + idx,
- (u64)(-left) & counter_value_mask);
+ (u64)(-left) & x86_pmu.counter_mask);
}

static inline void
@@ -621,8 +616,9 @@ static int x86_pmu_enable(struct perf_counter *counter)
/* Try to get the previous generic counter again */
if (test_and_set_bit(idx, cpuc->used)) {
try_generic:
- idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
- if (idx == nr_counters_generic)
+ idx = find_first_zero_bit(cpuc->used,
+ x86_pmu.num_counters);
+ if (idx == x86_pmu.num_counters)
return -EAGAIN;

set_bit(idx, cpuc->used);
@@ -654,7 +650,7 @@ void perf_counter_print_debug(void)
struct cpu_hw_counters *cpuc;
int cpu, idx;

- if (!nr_counters_generic)
+ if (!x86_pmu.num_counters)
return;

local_irq_disable();
@@ -676,7 +672,7 @@ void perf_counter_print_debug(void)
}
pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);

- for (idx = 0; idx < nr_counters_generic; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
rdmsrl(x86_pmu.perfctr + idx, pmc_count);

@@ -689,7 +685,7 @@ void perf_counter_print_debug(void)
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
cpu, idx, prev_left);
}
- for (idx = 0; idx < nr_counters_fixed; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);

pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -911,6 +907,9 @@ static struct x86_pmu amd_pmu = {
.event_map = amd_pmu_event_map,
.raw_event = amd_pmu_raw_event,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
+ .num_counters = 4,
+ .counter_bits = 48,
+ .counter_mask = (1ULL << 48) - 1,
};

static int intel_pmu_init(void)
@@ -941,10 +940,10 @@ static int intel_pmu_init(void)
pr_info("... mask length: %d\n", eax.split.mask_length);

x86_pmu = intel_pmu;
-
- nr_counters_generic = eax.split.num_counters;
- nr_counters_fixed = edx.split.num_counters_fixed;
- counter_value_mask = (1ULL << eax.split.bit_width) - 1;
+ x86_pmu.num_counters = eax.split.num_counters;
+ x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
+ x86_pmu.counter_bits = eax.split.bit_width;
+ x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;

return 0;
}
@@ -952,12 +951,6 @@ static int intel_pmu_init(void)
static int amd_pmu_init(void)
{
x86_pmu = amd_pmu;
-
- nr_counters_generic = 4;
- nr_counters_fixed = 0;
- counter_value_mask = 0x0000FFFFFFFFFFFFULL;
- counter_value_bits = 48;
-
pr_info("AMD Performance Monitoring support detected.\n");
return 0;
}
@@ -979,25 +972,26 @@ void __init init_hw_perf_counters(void)
if (err != 0)
return;

- pr_info("... num counters: %d\n", nr_counters_generic);
- if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
- nr_counters_generic = X86_PMC_MAX_GENERIC;
+ pr_info("... num counters: %d\n", x86_pmu.num_counters);
+ if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
+ x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
- nr_counters_generic, X86_PMC_MAX_GENERIC);
+ x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
}
- perf_counter_mask = (1 << nr_counters_generic) - 1;
- perf_max_counters = nr_counters_generic;
+ perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
+ perf_max_counters = x86_pmu.num_counters;

- pr_info("... value mask: %016Lx\n", counter_value_mask);
+ pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);

- if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
- nr_counters_fixed = X86_PMC_MAX_FIXED;
+ if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
+ x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
- nr_counters_fixed, X86_PMC_MAX_FIXED);
+ x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
}
- pr_info("... fixed counters: %d\n", nr_counters_fixed);
+ pr_info("... fixed counters: %d\n", x86_pmu.num_counters_fixed);

- perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
+ perf_counter_mask |=
+ ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;

pr_info("... counter mask: %016Lx\n", perf_counter_mask);
perf_counters_initialized = true;

Subject: [tip:perfcounters/core] perf_counter, x86: make x86_pmu_read() static inline

Commit-ID: bb775fc2d1dcd1aa6eafde37a8289ba2d80783aa
Gitweb: http://git.kernel.org/tip/bb775fc2d1dcd1aa6eafde37a8289ba2d80783aa
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:14 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:09 +0200

perf_counter, x86: make x86_pmu_read() static inline

[ Impact: micro-optimization ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 15d2c03..3f3ae47 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1002,7 +1002,7 @@ void __init init_hw_perf_counters(void)
register_die_notifier(&perf_counter_nmi_notifier);
}

-static void x86_pmu_read(struct perf_counter *counter)
+static inline void x86_pmu_read(struct perf_counter *counter)
{
x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
}

Subject: [tip:perfcounters/core] perf_counter, x86: rename cpuc->active_mask

Commit-ID: 93904966934193204ad08e951f806d5631c29eb3
Gitweb: http://git.kernel.org/tip/93904966934193204ad08e951f806d5631c29eb3
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:15 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:09 +0200

perf_counter, x86: rename cpuc->active_mask

This is to have a consistent naming scheme with cpuc->used.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 10 +++++-----
1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 3f3ae47..9ec51a6 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -29,9 +29,9 @@ static u64 perf_counter_mask __read_mostly;
struct cpu_hw_counters {
struct perf_counter *counters[X86_PMC_IDX_MAX];
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long active[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
u64 throttle_ctrl;
- unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int enabled;
};

@@ -334,7 +334,7 @@ static u64 amd_pmu_save_disable_all(void)
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

- if (!test_bit(idx, cpuc->active_mask))
+ if (!test_bit(idx, cpuc->active))
continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
@@ -376,7 +376,7 @@ static void amd_pmu_restore_all(u64 ctrl)
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

- if (!test_bit(idx, cpuc->active_mask))
+ if (!test_bit(idx, cpuc->active))
continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
@@ -424,7 +424,7 @@ static void amd_pmu_enable_counter(int idx, u64 config)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);

- set_bit(idx, cpuc->active_mask);
+ set_bit(idx, cpuc->active);
if (cpuc->enabled)
config |= ARCH_PERFMON_EVENTSEL0_ENABLE;

@@ -448,7 +448,7 @@ static void amd_pmu_disable_counter(int idx, u64 config)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);

- clear_bit(idx, cpuc->active_mask);
+ clear_bit(idx, cpuc->active);
wrmsrl(MSR_K7_EVNTSEL0 + idx, config);

}

Subject: [tip:perfcounters/core] perf_counter, x86: generic use of cpuc->active

Commit-ID: 095342389e2ed8deed07b3076f990260ce3c7c9f
Gitweb: http://git.kernel.org/tip/095342389e2ed8deed07b3076f990260ce3c7c9f
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:16 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:10 +0200

perf_counter, x86: generic use of cpuc->active

cpuc->active will now be used to indicate an enabled counter which
implies also valid pointers of cpuc->counters[]. In contrast,
cpuc->used only locks the counter, but it can be still uninitialized.

[ Impact: refactor and generalize code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 20 +++++++++-----------
1 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9ec51a6..f7fd4a3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -424,7 +424,6 @@ static void amd_pmu_enable_counter(int idx, u64 config)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);

- set_bit(idx, cpuc->active);
if (cpuc->enabled)
config |= ARCH_PERFMON_EVENTSEL0_ENABLE;

@@ -446,9 +445,6 @@ static void intel_pmu_disable_counter(int idx, u64 config)

static void amd_pmu_disable_counter(int idx, u64 config)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
- clear_bit(idx, cpuc->active);
wrmsrl(MSR_K7_EVNTSEL0 + idx, config);

}
@@ -633,10 +629,7 @@ try_generic:
__x86_pmu_disable(counter, hwc, idx);

cpuc->counters[idx] = counter;
- /*
- * Make it visible before enabling the hw:
- */
- barrier();
+ set_bit(idx, cpuc->active);

x86_perf_counter_set_period(counter, hwc, idx);
__x86_pmu_enable(counter, hwc, idx);
@@ -700,10 +693,13 @@ static void x86_pmu_disable(struct perf_counter *counter)
struct hw_perf_counter *hwc = &counter->hw;
unsigned int idx = hwc->idx;

+ /*
+ * Must be done before we disable, otherwise the nmi handler
+ * could reenable again:
+ */
+ clear_bit(idx, cpuc->active);
__x86_pmu_disable(counter, hwc, idx);

- clear_bit(idx, cpuc->used);
- cpuc->counters[idx] = NULL;
/*
* Make sure the cleared pointer becomes visible before we
* (potentially) free the counter:
@@ -715,6 +711,8 @@ static void x86_pmu_disable(struct perf_counter *counter)
* that we are disabling:
*/
x86_perf_counter_update(counter, hwc, idx);
+ cpuc->counters[idx] = NULL;
+ clear_bit(idx, cpuc->used);
}

/*
@@ -763,7 +761,7 @@ again:
struct perf_counter *counter = cpuc->counters[bit];

clear_bit(bit, (unsigned long *) &status);
- if (!counter)
+ if (!test_bit(bit, cpuc->active))
continue;

intel_pmu_save_and_restart(counter);

Subject: [tip:perfcounters/core] perf_counter, x86: consistent use of type int for counter index

Commit-ID: 6f00cada07bb5da7f751929d3173494dcc5446cc
Gitweb: http://git.kernel.org/tip/6f00cada07bb5da7f751929d3173494dcc5446cc
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:17 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:10 +0200

perf_counter, x86: consistent use of type int for counter index

The type of counter index is sometimes implemented as unsigned
int. This patch changes this to have a consistent usage of int.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 8 ++++----
include/linux/perf_counter.h | 2 +-
2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f7fd4a3..d8beebe 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -459,7 +459,7 @@ static void hw_perf_disable(int idx, u64 config)

static inline void
__pmc_fixed_disable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, unsigned int __idx)
+ struct hw_perf_counter *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, mask;
@@ -474,7 +474,7 @@ __pmc_fixed_disable(struct perf_counter *counter,

static inline void
__x86_pmu_disable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, unsigned int idx)
+ struct hw_perf_counter *hwc, int idx)
{
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
__pmc_fixed_disable(counter, hwc, idx);
@@ -523,7 +523,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,

static inline void
__pmc_fixed_enable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, unsigned int __idx)
+ struct hw_perf_counter *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, bits, mask;
@@ -691,7 +691,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
struct hw_perf_counter *hwc = &counter->hw;
- unsigned int idx = hwc->idx;
+ int idx = hwc->idx;

/*
* Must be done before we disable, otherwise the nmi handler
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c3db52d..41aed42 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -318,7 +318,7 @@ struct hw_perf_counter {
unsigned long config_base;
unsigned long counter_base;
int nmi;
- unsigned int idx;
+ int idx;
};
union { /* software */
atomic64_t count;

Subject: [tip:perfcounters/core] perf_counter, x86: rework counter enable functions

Commit-ID: 7c90cc45f89af4dd4617f97d452740ad95b800d5
Gitweb: http://git.kernel.org/tip/7c90cc45f89af4dd4617f97d452740ad95b800d5
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:18 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:11 +0200

perf_counter, x86: rework counter enable functions

There is vendor specific code in generic x86 code, and there is vendor
specific code that could be generic. This patch introduces
x86_pmu_enable_counter() for x86 generic code. Fixed counter code for
Intel is moved to Intel only functions. In the end, checks and calls
via function pointers were reduced to the necessary. Also, the
internal function i/f changed.

[ Impact: refactor and generalize code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 52 ++++++++++++++++-------------------
1 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d8beebe..ae55933 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -44,7 +44,7 @@ struct x86_pmu {
int (*handle_irq)(struct pt_regs *, int);
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
- void (*enable)(int, u64);
+ void (*enable)(struct hw_perf_counter *, int);
void (*disable)(int, u64);
unsigned eventsel;
unsigned perfctr;
@@ -414,28 +414,15 @@ static inline void intel_pmu_ack_status(u64 ack)
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

-static void intel_pmu_enable_counter(int idx, u64 config)
+static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
- config | ARCH_PERFMON_EVENTSEL0_ENABLE);
-}
-
-static void amd_pmu_enable_counter(int idx, u64 config)
-{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
- if (cpuc->enabled)
- config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
- wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
-}
+ int err;

-static void hw_perf_enable(int idx, u64 config)
-{
if (unlikely(!perf_counters_initialized))
return;

- x86_pmu.enable(idx, config);
+ err = checking_wrmsrl(hwc->config_base + idx,
+ hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}

static void intel_pmu_disable_counter(int idx, u64 config)
@@ -522,8 +509,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
}

static inline void
-__pmc_fixed_enable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int __idx)
+intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, bits, mask;
@@ -548,14 +534,24 @@ __pmc_fixed_enable(struct perf_counter *counter,
err = checking_wrmsrl(hwc->config_base, ctrl_val);
}

-static void
-__x86_pmu_enable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int idx)
+static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
- __pmc_fixed_enable(counter, hwc, idx);
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+ intel_pmu_enable_fixed(hwc, idx);
+ return;
+ }
+
+ x86_pmu_enable_counter(hwc, idx);
+}
+
+static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ if (cpuc->enabled)
+ x86_pmu_enable_counter(hwc, idx);
else
- hw_perf_enable(idx, hwc->config);
+ amd_pmu_disable_counter(idx, hwc->config);
}

static int
@@ -632,7 +628,7 @@ try_generic:
set_bit(idx, cpuc->active);

x86_perf_counter_set_period(counter, hwc, idx);
- __x86_pmu_enable(counter, hwc, idx);
+ x86_pmu.enable(hwc, idx);

return 0;
}
@@ -728,7 +724,7 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter)
x86_perf_counter_set_period(counter, hwc, idx);

if (counter->state == PERF_COUNTER_STATE_ACTIVE)
- __x86_pmu_enable(counter, hwc, idx);
+ intel_pmu_enable_counter(hwc, idx);
}

/*

Subject: [tip:perfcounters/core] perf_counter, x86: change and remove pmu initialization checks

Commit-ID: 85cf9dba92152bb4edec118b2f4f0be1ae7fdcab
Gitweb: http://git.kernel.org/tip/85cf9dba92152bb4edec118b2f4f0be1ae7fdcab
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:20 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:12 +0200

perf_counter, x86: change and remove pmu initialization checks

Some functions are only called if the pmu was proper initialized. That
initalization checks can be removed. The way to check initialization
changed too. Now, the pointer to the interrupt handler is checked. If
it exists the pmu is initialized. This also removes a static variable
and uses struct x86_pmu as only data source for the check.

[ Impact: simplify code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 34 +++++++++++++---------------------
1 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index df9012b..2d3681b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -23,7 +23,6 @@
#include <asm/stacktrace.h>
#include <asm/nmi.h>

-static bool perf_counters_initialized __read_mostly;
static u64 perf_counter_mask __read_mostly;

struct cpu_hw_counters {
@@ -227,6 +226,11 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
}
}

+static inline int x86_pmu_initialized(void)
+{
+ return x86_pmu.handle_irq != NULL;
+}
+
/*
* Setup the hardware configuration for a given hw_event_type
*/
@@ -240,8 +244,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
return -ENOSYS;

- if (unlikely(!perf_counters_initialized))
- return -EINVAL;
+ if (!x86_pmu_initialized())
+ return -ENODEV;

err = 0;
if (atomic_inc_not_zero(&num_counters)) {
@@ -348,9 +352,8 @@ static u64 amd_pmu_save_disable_all(void)

u64 hw_perf_save_disable(void)
{
- if (unlikely(!perf_counters_initialized))
+ if (!x86_pmu_initialized())
return 0;
-
return x86_pmu.save_disable_all();
}
/*
@@ -388,9 +391,8 @@ static void amd_pmu_restore_all(u64 ctrl)

void hw_perf_restore(u64 ctrl)
{
- if (unlikely(!perf_counters_initialized))
+ if (!x86_pmu_initialized())
return;
-
x86_pmu.restore_all(ctrl);
}
/*
@@ -402,8 +404,6 @@ static inline u64 intel_pmu_get_status(u64 mask)
{
u64 status;

- if (unlikely(!perf_counters_initialized))
- return 0;
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);

return status;
@@ -417,10 +417,6 @@ static inline void intel_pmu_ack_status(u64 ack)
static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
int err;
-
- if (unlikely(!perf_counters_initialized))
- return;
-
err = checking_wrmsrl(hwc->config_base + idx,
hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}
@@ -428,10 +424,6 @@ static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
int err;
-
- if (unlikely(!perf_counters_initialized))
- return;
-
err = checking_wrmsrl(hwc->config_base + idx,
hwc->config);
}
@@ -787,10 +779,10 @@ void perf_counter_unthrottle(void)
{
struct cpu_hw_counters *cpuc;

- if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ if (!x86_pmu_initialized())
return;

- if (unlikely(!perf_counters_initialized))
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
return;

cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -829,8 +821,9 @@ void perf_counters_lapic_init(int nmi)
{
u32 apic_val;

- if (!perf_counters_initialized)
+ if (!x86_pmu_initialized())
return;
+
/*
* Enable the performance counter vector in the APIC LVT:
*/
@@ -988,7 +981,6 @@ void __init init_hw_perf_counters(void)
((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;

pr_info("... counter mask: %016Lx\n", perf_counter_mask);
- perf_counters_initialized = true;

perf_counters_lapic_init(0);
register_die_notifier(&perf_counter_nmi_notifier);

Subject: [tip:perfcounters/core] perf_counter, x86: return raw count with x86_perf_counter_update()

Commit-ID: 4b7bfd0d276da3a006d37e85d3cf900d7a14ae2a
Gitweb: http://git.kernel.org/tip/4b7bfd0d276da3a006d37e85d3cf900d7a14ae2a
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:22 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:13 +0200

perf_counter, x86: return raw count with x86_perf_counter_update()

To check on AMD cpus if a counter overflows, the upper bit of the raw
counter value must be checked. This value is already internally
available in x86_perf_counter_update(). Now, the value is returned so
that it can be used directly to check for overflows.

[ Impact: micro-optimization ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f4d59d4..a8a53ab 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -132,7 +132,7 @@ static u64 amd_pmu_raw_event(u64 event)
* Can only be executed on the CPU where the counter is active.
* Returns the delta events processed.
*/
-static void
+static u64
x86_perf_counter_update(struct perf_counter *counter,
struct hw_perf_counter *hwc, int idx)
{
@@ -165,6 +165,8 @@ again:

atomic64_add(delta, &counter->count);
atomic64_sub(delta, &hwc->period_left);
+
+ return new_raw_count;
}

static atomic_t num_counters;
@@ -785,8 +787,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
continue;
counter = cpuc->counters[idx];
hwc = &counter->hw;
- x86_perf_counter_update(counter, hwc, idx);
- val = atomic64_read(&hwc->prev_count);
+ val = x86_perf_counter_update(counter, hwc, idx);
if (val & (1ULL << (x86_pmu.counter_bits - 1)))
continue;
/* counter overflow */

Subject: [tip:perfcounters/core] perf_counter, x86: rework counter disable functions

Commit-ID: d43698918bd46c71d494555fb92195fbea1fcb6c
Gitweb: http://git.kernel.org/tip/d43698918bd46c71d494555fb92195fbea1fcb6c
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:19 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:11 +0200

perf_counter, x86: rework counter disable functions

As for the enable function, this patch reworks the disable functions
and introduces x86_pmu_disable_counter(). The internal function i/f in
struct x86_pmu changed too.

[ Impact: refactor and generalize code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 48 +++++++++++++++++------------------
1 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ae55933..df9012b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -45,7 +45,7 @@ struct x86_pmu {
u64 (*save_disable_all)(void);
void (*restore_all)(u64);
void (*enable)(struct hw_perf_counter *, int);
- void (*disable)(int, u64);
+ void (*disable)(struct hw_perf_counter *, int);
unsigned eventsel;
unsigned perfctr;
u64 (*event_map)(int);
@@ -425,28 +425,19 @@ static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}

-static void intel_pmu_disable_counter(int idx, u64 config)
+static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
-}
-
-static void amd_pmu_disable_counter(int idx, u64 config)
-{
- wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
-
-}
+ int err;

-static void hw_perf_disable(int idx, u64 config)
-{
if (unlikely(!perf_counters_initialized))
return;

- x86_pmu.disable(idx, config);
+ err = checking_wrmsrl(hwc->config_base + idx,
+ hwc->config);
}

static inline void
-__pmc_fixed_disable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int __idx)
+intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, mask;
@@ -460,13 +451,20 @@ __pmc_fixed_disable(struct perf_counter *counter,
}

static inline void
-__x86_pmu_disable(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int idx)
+intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
- __pmc_fixed_disable(counter, hwc, idx);
- else
- hw_perf_disable(idx, hwc->config);
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+ intel_pmu_disable_fixed(hwc, idx);
+ return;
+ }
+
+ x86_pmu_disable_counter(hwc, idx);
+}
+
+static inline void
+amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+{
+ x86_pmu_disable_counter(hwc, idx);
}

static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
@@ -551,7 +549,7 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
if (cpuc->enabled)
x86_pmu_enable_counter(hwc, idx);
else
- amd_pmu_disable_counter(idx, hwc->config);
+ x86_pmu_disable_counter(hwc, idx);
}

static int
@@ -622,7 +620,7 @@ try_generic:

perf_counters_lapic_init(hwc->nmi);

- __x86_pmu_disable(counter, hwc, idx);
+ x86_pmu.disable(hwc, idx);

cpuc->counters[idx] = counter;
set_bit(idx, cpuc->active);
@@ -694,7 +692,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
* could reenable again:
*/
clear_bit(idx, cpuc->active);
- __x86_pmu_disable(counter, hwc, idx);
+ x86_pmu.disable(hwc, idx);

/*
* Make sure the cleared pointer becomes visible before we
@@ -762,7 +760,7 @@ again:

intel_pmu_save_and_restart(counter);
if (perf_counter_overflow(counter, nmi, regs, 0))
- __x86_pmu_disable(counter, &counter->hw, bit);
+ intel_pmu_disable_counter(&counter->hw, bit);
}

intel_pmu_ack_status(ack);

Subject: [tip:perfcounters/core] perf_counter, x86: implement the interrupt handler for AMD cpus

Commit-ID: a29aa8a7ff93e4196d558036928597e68337dd8d
Gitweb: http://git.kernel.org/tip/a29aa8a7ff93e4196d558036928597e68337dd8d
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:21 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:12 +0200

perf_counter, x86: implement the interrupt handler for AMD cpus

This patch implements the interrupt handler for AMD performance
counters. In difference to the Intel pmu, there is no single status
register and also there are no fixed counters. This makes the handler
very different and it is useful to make the handler vendor
specific. To check if a counter is overflowed the upper bit of the
counter is checked. Only counters where the active bit is set are
checked.

With this patch throttling is enabled for AMD performance counters.

This patch also reenables Linux performance counters on AMD cpus.

[ Impact: re-enable perfcounters on AMD CPUs ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 45 +++++++++++++++++++++++++++++------
1 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 2d3681b..f4d59d4 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -240,10 +240,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
struct hw_perf_counter *hwc = &counter->hw;
int err;

- /* disable temporarily */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return -ENOSYS;
-
if (!x86_pmu_initialized())
return -ENODEV;

@@ -773,7 +769,43 @@ out:
return ret;
}

-static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) { return 0; }
+static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
+{
+ int cpu = smp_processor_id();
+ struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
+ u64 val;
+ int handled = 0;
+ struct perf_counter *counter;
+ struct hw_perf_counter *hwc;
+ int idx;
+
+ ++cpuc->interrupts;
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active))
+ continue;
+ counter = cpuc->counters[idx];
+ hwc = &counter->hw;
+ x86_perf_counter_update(counter, hwc, idx);
+ val = atomic64_read(&hwc->prev_count);
+ if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+ continue;
+ /* counter overflow */
+ x86_perf_counter_set_period(counter, hwc, idx);
+ handled = 1;
+ inc_irq_stat(apic_perf_irqs);
+ if (perf_counter_overflow(counter, nmi, regs, 0))
+ amd_pmu_disable_counter(hwc, idx);
+ else if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS)
+ /*
+ * do not reenable when throttled, but reload
+ * the register
+ */
+ amd_pmu_disable_counter(hwc, idx);
+ else if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+ amd_pmu_enable_counter(hwc, idx);
+ }
+ return handled;
+}

void perf_counter_unthrottle(void)
{
@@ -782,9 +814,6 @@ void perf_counter_unthrottle(void)
if (!x86_pmu_initialized())
return;

- if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return;
-
cpuc = &__get_cpu_var(cpu_hw_counters);
if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
if (printk_ratelimit())

Subject: [tip:perfcounters/core] perf_counter, x86: introduce max_period variable

Commit-ID: c619b8ffb1cec6a431687a35695dc6fd292a79e6
Gitweb: http://git.kernel.org/tip/c619b8ffb1cec6a431687a35695dc6fd292a79e6
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:23 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:13 +0200

perf_counter, x86: introduce max_period variable

In x86 pmus the allowed counter period to programm differs. This
introduces a max_period value and allows the generic implementation
for all models to check the max period.

[ Impact: generalize code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 20 ++++++++++++--------
1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a8a53ab..4b8715b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -54,6 +54,7 @@ struct x86_pmu {
int num_counters_fixed;
int counter_bits;
u64 counter_mask;
+ u64 max_period;
};

static struct x86_pmu x86_pmu __read_mostly;
@@ -279,14 +280,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
hwc->nmi = 1;

hwc->irq_period = hw_event->irq_period;
- /*
- * Intel PMCs cannot be accessed sanely above 32 bit width,
- * so we install an artificial 1<<31 period regardless of
- * the generic counter period:
- */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
- hwc->irq_period = 0x7FFFFFFF;
+ if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
+ hwc->irq_period = x86_pmu.max_period;

atomic64_set(&hwc->period_left, hwc->irq_period);

@@ -910,6 +905,12 @@ static struct x86_pmu intel_pmu = {
.event_map = intel_pmu_event_map,
.raw_event = intel_pmu_raw_event,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
+ /*
+ * Intel PMCs cannot be accessed sanely above 32 bit width,
+ * so we install an artificial 1<<31 period regardless of
+ * the generic counter period:
+ */
+ .max_period = (1ULL << 31) - 1,
};

static struct x86_pmu amd_pmu = {
@@ -927,6 +928,8 @@ static struct x86_pmu amd_pmu = {
.num_counters = 4,
.counter_bits = 48,
.counter_mask = (1ULL << 48) - 1,
+ /* use highest bit to detect overflow */
+ .max_period = (1ULL << 47) - 1,
};

static int intel_pmu_init(void)
@@ -999,6 +1002,7 @@ void __init init_hw_perf_counters(void)
perf_max_counters = x86_pmu.num_counters;

pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);
+ pr_info("... max period: %016Lx\n", x86_pmu.max_period);

if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;

Subject: [tip:perfcounters/core] perf_counter, x86: remove vendor check in fixed_mode_idx()

Commit-ID: ef7b3e09ffdcd5200aea9523f6b56d331d1c4fc0
Gitweb: http://git.kernel.org/tip/ef7b3e09ffdcd5200aea9523f6b56d331d1c4fc0
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:24 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:14 +0200

perf_counter, x86: remove vendor check in fixed_mode_idx()

The function fixed_mode_idx() is used generically. Now it checks the
num_counters_fixed value instead of the vendor to decide if fixed
counters are present.

[ Impact: generalize code ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 4b8715b..d1c8036 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -542,7 +542,7 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
{
unsigned int event;

- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (!x86_pmu.num_counters_fixed)
return -1;

if (unlikely(hwc->nmi))

Subject: [tip:perfcounters/core] perf_counter, x86: remove unused function argument in intel_pmu_get_status()

Commit-ID: 19d84dab55a383d75c885b5c1a618f5ead96f2f6
Gitweb: http://git.kernel.org/tip/19d84dab55a383d75c885b5c1a618f5ead96f2f6
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:25 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:14 +0200

perf_counter, x86: remove unused function argument in intel_pmu_get_status()

The mask argument is unused and thus can be removed.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d1c8036..856b0b8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -393,7 +393,7 @@ void hw_perf_restore(u64 ctrl)
*/
EXPORT_SYMBOL_GPL(hw_perf_restore);

-static inline u64 intel_pmu_get_status(u64 mask)
+static inline u64 intel_pmu_get_status(void)
{
u64 status;

@@ -728,7 +728,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)

cpuc->throttle_ctrl = intel_pmu_save_disable_all();

- status = intel_pmu_get_status(cpuc->throttle_ctrl);
+ status = intel_pmu_get_status();
if (!status)
goto out;

@@ -753,7 +753,7 @@ again:
/*
* Repeat if there is more work to be done:
*/
- status = intel_pmu_get_status(cpuc->throttle_ctrl);
+ status = intel_pmu_get_status();
if (status)
goto again;
out:

Subject: [tip:perfcounters/core] perf_counter: update 'perf top' documentation

Commit-ID: 38105f0234d4795c77c7c6845916caf3a395c451
Gitweb: http://git.kernel.org/tip/38105f0234d4795c77c7c6845916caf3a395c451
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 12:47:26 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 14:51:15 +0200

perf_counter: update 'perf top' documentation

The documentation about the perf-top build was outdated after
perfstat has been implemented. This updates it.

[ Impact: update documentation ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Paul Mackerras <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
Documentation/perf_counter/builtin-top.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 6a276d2..8d28864 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -3,7 +3,7 @@

Build with:

- cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
+ make -C Documentation/perf_counter/

Sample output:

2009-04-29 13:17:41

by Ingo Molnar

[permalink] [raw]
Subject: Re: [tip:perfcounters/core] perf_counter, x86: rename cpuc->active_mask


* tip-bot for Robert Richter <[email protected]> wrote:

> Commit-ID: 93904966934193204ad08e951f806d5631c29eb3
> Gitweb: http://git.kernel.org/tip/93904966934193204ad08e951f806d5631c29eb3
> Author: Robert Richter <[email protected]>
> AuthorDate: Wed, 29 Apr 2009 12:47:15 +0200
> Committer: Ingo Molnar <[email protected]>
> CommitDate: Wed, 29 Apr 2009 14:51:09 +0200
>
> perf_counter, x86: rename cpuc->active_mask
>
> This is to have a consistent naming scheme with cpuc->used.
>
> [ Impact: cleanup ]
>
> Signed-off-by: Robert Richter <[email protected]>
> Cc: Paul Mackerras <[email protected]>
> Acked-by: Peter Zijlstra <[email protected]>
> LKML-Reference: <[email protected]>
> Signed-off-by: Ingo Molnar <[email protected]>
>
>
> ---
> arch/x86/kernel/cpu/perf_counter.c | 10 +++++-----
> 1 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
> index 3f3ae47..9ec51a6 100644
> --- a/arch/x86/kernel/cpu/perf_counter.c
> +++ b/arch/x86/kernel/cpu/perf_counter.c
> @@ -29,9 +29,9 @@ static u64 perf_counter_mask __read_mostly;
> struct cpu_hw_counters {
> struct perf_counter *counters[X86_PMC_IDX_MAX];
> unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
> + unsigned long active[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
> unsigned long interrupts;
> u64 throttle_ctrl;
> - unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
> int enabled;
> };

Note, i applied this because later patches had dependencies - but
could you please send a followup patch that renames both ->used and
->active to ->used_mask and ->active_mask?

Thanks,

Ingo

Subject: [PATCH] perf_counter, x86: rename bitmasks to ->used_mask and ->active_mask

This reverts commit 9390496 (perf_counter, x86: rename
cpuc->active_mask). Instead, cpuc->used is renamed to
cpuc->used_mask. See Ingo's comment:

On 29.04.09 13:24:23, Ingo Molnar wrote:
> better would be to change ->used to ->used_mask. That makes it sure
> nobody does this typo:
>
> if (cpuc->used)
>
> it's a lot harder to typo this:
>
> if (cpuc->used_mask)
>

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_counter.c | 28 ++++++++++++++--------------
1 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 47e563b..fc06f4d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -28,8 +28,8 @@ static u64 perf_counter_mask __read_mostly;

struct cpu_hw_counters {
struct perf_counter *counters[X86_PMC_IDX_MAX];
- unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- unsigned long active[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
u64 throttle_ctrl;
int enabled;
@@ -332,7 +332,7 @@ static u64 amd_pmu_save_disable_all(void)
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

- if (!test_bit(idx, cpuc->active))
+ if (!test_bit(idx, cpuc->active_mask))
continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
@@ -373,7 +373,7 @@ static void amd_pmu_restore_all(u64 ctrl)
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

- if (!test_bit(idx, cpuc->active))
+ if (!test_bit(idx, cpuc->active_mask))
continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
@@ -576,7 +576,7 @@ static int x86_pmu_enable(struct perf_counter *counter)
* Try to get the fixed counter, if that is already taken
* then try to get a generic counter:
*/
- if (test_and_set_bit(idx, cpuc->used))
+ if (test_and_set_bit(idx, cpuc->used_mask))
goto try_generic;

hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
@@ -590,14 +590,14 @@ static int x86_pmu_enable(struct perf_counter *counter)
} else {
idx = hwc->idx;
/* Try to get the previous generic counter again */
- if (test_and_set_bit(idx, cpuc->used)) {
+ if (test_and_set_bit(idx, cpuc->used_mask)) {
try_generic:
- idx = find_first_zero_bit(cpuc->used,
+ idx = find_first_zero_bit(cpuc->used_mask,
x86_pmu.num_counters);
if (idx == x86_pmu.num_counters)
return -EAGAIN;

- set_bit(idx, cpuc->used);
+ set_bit(idx, cpuc->used_mask);
hwc->idx = idx;
}
hwc->config_base = x86_pmu.eventsel;
@@ -609,7 +609,7 @@ try_generic:
x86_pmu.disable(hwc, idx);

cpuc->counters[idx] = counter;
- set_bit(idx, cpuc->active);
+ set_bit(idx, cpuc->active_mask);

x86_perf_counter_set_period(counter, hwc, idx);
x86_pmu.enable(hwc, idx);
@@ -643,7 +643,7 @@ void perf_counter_print_debug(void)
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
}
- pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);
+ pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -677,7 +677,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
* Must be done before we disable, otherwise the nmi handler
* could reenable again:
*/
- clear_bit(idx, cpuc->active);
+ clear_bit(idx, cpuc->active_mask);
x86_pmu.disable(hwc, idx);

/*
@@ -692,7 +692,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
*/
x86_perf_counter_update(counter, hwc, idx);
cpuc->counters[idx] = NULL;
- clear_bit(idx, cpuc->used);
+ clear_bit(idx, cpuc->used_mask);
}

/*
@@ -741,7 +741,7 @@ again:
struct perf_counter *counter = cpuc->counters[bit];

clear_bit(bit, (unsigned long *) &status);
- if (!test_bit(bit, cpuc->active))
+ if (!test_bit(bit, cpuc->active_mask))
continue;

intel_pmu_save_and_restart(counter);
@@ -779,7 +779,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)

++cpuc->interrupts;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- if (!test_bit(idx, cpuc->active))
+ if (!test_bit(idx, cpuc->active_mask))
continue;
counter = cpuc->counters[idx];
hwc = &counter->hw;
--
1.6.1.3

Subject: [tip:perfcounters/core] perf_counter, x86: rename bitmasks to ->used_mask and ->active_mask

Commit-ID: 43f6201a22dbf1c5abe1cab96b49bd56fa9df8f4
Gitweb: http://git.kernel.org/tip/43f6201a22dbf1c5abe1cab96b49bd56fa9df8f4
Author: Robert Richter <[email protected]>
AuthorDate: Wed, 29 Apr 2009 16:55:56 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 29 Apr 2009 22:19:36 +0200

perf_counter, x86: rename bitmasks to ->used_mask and ->active_mask

Standardize on explicitly mentioning '_mask' in fields that
are not plain flags but masks. This avoids typos like:

if (cpuc->used)

(which could easily slip through review unnoticed), while if a
typo looks like this:

if (cpuc->used_mask)

it might get noticed during review.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/kernel/cpu/perf_counter.c | 28 ++++++++++++++--------------
1 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 47e563b..fc06f4d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -28,8 +28,8 @@ static u64 perf_counter_mask __read_mostly;

struct cpu_hw_counters {
struct perf_counter *counters[X86_PMC_IDX_MAX];
- unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- unsigned long active[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
u64 throttle_ctrl;
int enabled;
@@ -332,7 +332,7 @@ static u64 amd_pmu_save_disable_all(void)
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

- if (!test_bit(idx, cpuc->active))
+ if (!test_bit(idx, cpuc->active_mask))
continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
@@ -373,7 +373,7 @@ static void amd_pmu_restore_all(u64 ctrl)
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

- if (!test_bit(idx, cpuc->active))
+ if (!test_bit(idx, cpuc->active_mask))
continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
@@ -576,7 +576,7 @@ static int x86_pmu_enable(struct perf_counter *counter)
* Try to get the fixed counter, if that is already taken
* then try to get a generic counter:
*/
- if (test_and_set_bit(idx, cpuc->used))
+ if (test_and_set_bit(idx, cpuc->used_mask))
goto try_generic;

hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
@@ -590,14 +590,14 @@ static int x86_pmu_enable(struct perf_counter *counter)
} else {
idx = hwc->idx;
/* Try to get the previous generic counter again */
- if (test_and_set_bit(idx, cpuc->used)) {
+ if (test_and_set_bit(idx, cpuc->used_mask)) {
try_generic:
- idx = find_first_zero_bit(cpuc->used,
+ idx = find_first_zero_bit(cpuc->used_mask,
x86_pmu.num_counters);
if (idx == x86_pmu.num_counters)
return -EAGAIN;

- set_bit(idx, cpuc->used);
+ set_bit(idx, cpuc->used_mask);
hwc->idx = idx;
}
hwc->config_base = x86_pmu.eventsel;
@@ -609,7 +609,7 @@ try_generic:
x86_pmu.disable(hwc, idx);

cpuc->counters[idx] = counter;
- set_bit(idx, cpuc->active);
+ set_bit(idx, cpuc->active_mask);

x86_perf_counter_set_period(counter, hwc, idx);
x86_pmu.enable(hwc, idx);
@@ -643,7 +643,7 @@ void perf_counter_print_debug(void)
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
}
- pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);
+ pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -677,7 +677,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
* Must be done before we disable, otherwise the nmi handler
* could reenable again:
*/
- clear_bit(idx, cpuc->active);
+ clear_bit(idx, cpuc->active_mask);
x86_pmu.disable(hwc, idx);

/*
@@ -692,7 +692,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
*/
x86_perf_counter_update(counter, hwc, idx);
cpuc->counters[idx] = NULL;
- clear_bit(idx, cpuc->used);
+ clear_bit(idx, cpuc->used_mask);
}

/*
@@ -741,7 +741,7 @@ again:
struct perf_counter *counter = cpuc->counters[bit];

clear_bit(bit, (unsigned long *) &status);
- if (!test_bit(bit, cpuc->active))
+ if (!test_bit(bit, cpuc->active_mask))
continue;

intel_pmu_save_and_restart(counter);
@@ -779,7 +779,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)

++cpuc->interrupts;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- if (!test_bit(idx, cpuc->active))
+ if (!test_bit(idx, cpuc->active_mask))
continue;
counter = cpuc->counters[idx];
hwc = &counter->hw;