Subject: [PATCH 01/12] perf/core: x86 and AMD pmu updates

I know there is some ongoing work by Stephane in this code too, but I
want to submit the patches anyway for review. If necessary I will
rebase the patch set on top of Stephanes patches. Anyway, this patch
set applies to tip/perf/core and can be pulled from here too:

git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile.git perf

The patches contain changes, updates and code reworks I made during
reviewing the code and implementing IBS for perf_events. Patches for
IBS I will submit in a later series. These patches will include an
implementation that extends the ABI to support model specific hardware
pmu features.

In this patch set I made some model specific pmu code generic and vice
versa. The event constraint handler works now for AMD pmu too. Fixed
counter code that is only need on Intel cpus is now model
specific. Also the MSR masks were unified, where the hardware
implementation is the same for all cpus.

-Robert


Subject: [PATCH 04/12] perf/core, x86: remove cpu_hw_events.interrupts

This member in the struct is not used anymore and can be removed.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4291f86..c49221b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -70,7 +70,6 @@ struct cpu_hw_events {
struct perf_event *events[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- unsigned long interrupts;
int enabled;
struct debug_store *ds;
};
--
1.6.6

Subject: [PATCH 02/12] perf/core, x86: undo some some *_counter* -> *_event* renames

The big rename

cdd6c48 perf: Do the big rename: Performance Counters -> Performance Events

accidentally renamed some members of stucts that were named after
registers in the spec. To avoid confusion this patch reverts some
changes. The related specs are MSR descriptions in AMD's BKDGs and the
ARCHITECTURAL PERFORMANCE MONITORING section in the Intel 64 and IA-32
Architectures Software Developer's Manuals.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/include/asm/perf_event.h | 4 +-
arch/x86/kernel/cpu/perf_event.c | 96 ++++++++++++++++++------------------
arch/x86/oprofile/op_model_ppro.c | 4 +-
3 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8d9f854..1f50cfc 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -54,7 +54,7 @@
union cpuid10_eax {
struct {
unsigned int version_id:8;
- unsigned int num_events:8;
+ unsigned int num_counters:8;
unsigned int bit_width:8;
unsigned int mask_length:8;
} split;
@@ -63,7 +63,7 @@ union cpuid10_eax {

union cpuid10_edx {
struct {
- unsigned int num_events_fixed:4;
+ unsigned int num_counters_fixed:4;
unsigned int reserved:28;
} split;
unsigned int full;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b1bb8c5..c3f920a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -105,10 +105,10 @@ struct x86_pmu {
u64 (*event_map)(int);
u64 (*raw_event)(u64);
int max_events;
- int num_events;
- int num_events_fixed;
- int event_bits;
- u64 event_mask;
+ int num_counters;
+ int num_counters_fixed;
+ int cntval_bits;
+ u64 cntval_mask;
int apic;
u64 max_period;
u64 intel_ctrl;
@@ -672,7 +672,7 @@ static u64
x86_perf_event_update(struct perf_event *event,
struct hw_perf_event *hwc, int idx)
{
- int shift = 64 - x86_pmu.event_bits;
+ int shift = 64 - x86_pmu.cntval_bits;
u64 prev_raw_count, new_raw_count;
s64 delta;

@@ -722,12 +722,12 @@ static bool reserve_pmc_hardware(void)
if (nmi_watchdog == NMI_LOCAL_APIC)
disable_lapic_nmi_watchdog();

- for (i = 0; i < x86_pmu.num_events; i++) {
+ for (i = 0; i < x86_pmu.num_counters; i++) {
if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
goto perfctr_fail;
}

- for (i = 0; i < x86_pmu.num_events; i++) {
+ for (i = 0; i < x86_pmu.num_counters; i++) {
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}
@@ -740,7 +740,7 @@ eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu.eventsel + i);

- i = x86_pmu.num_events;
+ i = x86_pmu.num_counters;

perfctr_fail:
for (i--; i >= 0; i--)
@@ -758,7 +758,7 @@ static void release_pmc_hardware(void)
#ifdef CONFIG_X86_LOCAL_APIC
int i;

- for (i = 0; i < x86_pmu.num_events; i++) {
+ for (i = 0; i < x86_pmu.num_counters; i++) {
release_perfctr_nmi(x86_pmu.perfctr + i);
release_evntsel_nmi(x86_pmu.eventsel + i);
}
@@ -1105,7 +1105,7 @@ static void amd_pmu_disable_all(void)
*/
barrier();

- for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;

if (!test_bit(idx, cpuc->active_mask))
@@ -1176,7 +1176,7 @@ static void amd_pmu_enable_all(void)
cpuc->enabled = 1;
barrier();

- for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
struct perf_event *event = cpuc->events[idx];
u64 val;

@@ -1319,7 +1319,7 @@ x86_perf_event_set_period(struct perf_event *event,
atomic64_set(&hwc->prev_count, (u64)-left);

err = checking_wrmsrl(hwc->event_base + idx,
- (u64)(-left) & x86_pmu.event_mask);
+ (u64)(-left) & x86_pmu.cntval_mask);

perf_event_update_userpage(event);

@@ -1402,7 +1402,7 @@ static int fixed_mode_idx(struct hw_perf_event *hwc)
(hwc->sample_period == 1)))
return X86_PMC_IDX_FIXED_BTS;

- if (!x86_pmu.num_events_fixed)
+ if (!x86_pmu.num_counters_fixed)
return -1;

/*
@@ -1429,8 +1429,8 @@ gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
{
int idx;

- idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
- return idx == x86_pmu.num_events ? -1 : idx;
+ idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_counters);
+ return idx == x86_pmu.num_counters ? -1 : idx;
}

/*
@@ -1556,7 +1556,7 @@ void perf_event_print_debug(void)
unsigned long flags;
int cpu, idx;

- if (!x86_pmu.num_events)
+ if (!x86_pmu.num_counters)
return;

local_irq_save(flags);
@@ -1578,7 +1578,7 @@ void perf_event_print_debug(void)
}
pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);

- for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
rdmsrl(x86_pmu.perfctr + idx, pmc_count);

@@ -1591,7 +1591,7 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
cpu, idx, prev_left);
}
- for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);

pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1720,18 +1720,18 @@ static void intel_pmu_reset(void)
unsigned long flags;
int idx;

- if (!x86_pmu.num_events)
+ if (!x86_pmu.num_counters)
return;

local_irq_save(flags);

printk("clearing PMU state on CPU#%d\n", smp_processor_id());

- for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
}
- for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
}
if (ds)
@@ -1754,7 +1754,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)

cpuc = &__get_cpu_var(cpu_hw_events);

- for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
if (!test_bit(idx, cpuc->active_mask))
continue;

@@ -1762,7 +1762,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
hwc = &event->hw;

val = x86_perf_event_update(event, hwc, idx);
- if (val & (1ULL << (x86_pmu.event_bits - 1)))
+ if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
continue;

/*
@@ -1864,7 +1864,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)

cpuc = &__get_cpu_var(cpu_hw_events);

- for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
if (!test_bit(idx, cpuc->active_mask))
continue;

@@ -1872,7 +1872,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
hwc = &event->hw;

val = x86_perf_event_update(event, hwc, idx);
- if (val & (1ULL << (x86_pmu.event_bits - 1)))
+ if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
continue;

/*
@@ -1983,7 +1983,7 @@ static __initconst struct x86_pmu p6_pmu = {
.apic = 1,
.max_period = (1ULL << 31) - 1,
.version = 0,
- .num_events = 2,
+ .num_counters = 2,
/*
* Events have 40 bits implemented. However they are designed such
* that bits [32-39] are sign extensions of bit 31. As such the
@@ -1991,8 +1991,8 @@ static __initconst struct x86_pmu p6_pmu = {
*
* See IA-32 Intel Architecture Software developer manual Vol 3B
*/
- .event_bits = 32,
- .event_mask = (1ULL << 32) - 1,
+ .cntval_bits = 32,
+ .cntval_mask = (1ULL << 32) - 1,
.get_event_idx = intel_get_event_idx,
};

@@ -2032,9 +2032,9 @@ static __initconst struct x86_pmu amd_pmu = {
.event_map = amd_pmu_event_map,
.raw_event = amd_pmu_raw_event,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
- .num_events = 4,
- .event_bits = 48,
- .event_mask = (1ULL << 48) - 1,
+ .num_counters = 4,
+ .cntval_bits = 48,
+ .cntval_mask = (1ULL << 48) - 1,
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
@@ -2100,15 +2100,15 @@ static __init int intel_pmu_init(void)

x86_pmu = intel_pmu;
x86_pmu.version = version;
- x86_pmu.num_events = eax.split.num_events;
- x86_pmu.event_bits = eax.split.bit_width;
- x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
+ x86_pmu.num_counters = eax.split.num_counters;
+ x86_pmu.cntval_bits = eax.split.bit_width;
+ x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;

/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events:
*/
- x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
+ x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);

/*
* Install the hw-cache-events table:
@@ -2192,33 +2192,33 @@ void __init init_hw_perf_events(void)

pr_cont("%s PMU driver.\n", x86_pmu.name);

- if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
+ if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
- x86_pmu.num_events, X86_PMC_MAX_GENERIC);
- x86_pmu.num_events = X86_PMC_MAX_GENERIC;
+ x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
+ x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
}
- perf_event_mask = (1 << x86_pmu.num_events) - 1;
- perf_max_events = x86_pmu.num_events;
+ perf_event_mask = (1 << x86_pmu.num_counters) - 1;
+ perf_max_events = x86_pmu.num_counters;

- if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
+ if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
- x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
- x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
+ x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
+ x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
}

perf_event_mask |=
- ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
+ ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
x86_pmu.intel_ctrl = perf_event_mask;

perf_events_lapic_init();
register_die_notifier(&perf_event_nmi_notifier);

pr_info("... version: %d\n", x86_pmu.version);
- pr_info("... bit width: %d\n", x86_pmu.event_bits);
- pr_info("... generic registers: %d\n", x86_pmu.num_events);
- pr_info("... value mask: %016Lx\n", x86_pmu.event_mask);
+ pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
+ pr_info("... generic registers: %d\n", x86_pmu.num_counters);
+ pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
- pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
+ pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", perf_event_mask);
}

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 8eb0587..4899215 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void)
if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
current_cpu_data.x86_model == 15) {
eax.split.version_id = 2;
- eax.split.num_events = 2;
+ eax.split.num_counters = 2;
eax.split.bit_width = 40;
}

- num_counters = eax.split.num_events;
+ num_counters = eax.split.num_counters;

op_arch_perfmon_spec.num_counters = num_counters;
op_arch_perfmon_spec.num_controls = num_counters;
--
1.6.6

Subject: [PATCH 03/12] perf/core, x86: remove duplicate perf_event_mask variable

The same information is stored also in x86_pmu.intel_ctrl. This patch
removes perf_event_mask and instead uses x86_pmu.intel_ctrl directly.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 9 +++------
1 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c3f920a..4291f86 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -27,8 +27,6 @@
#include <asm/stacktrace.h>
#include <asm/nmi.h>

-static u64 perf_event_mask __read_mostly;
-
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 4

@@ -2197,7 +2195,7 @@ void __init init_hw_perf_events(void)
x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
}
- perf_event_mask = (1 << x86_pmu.num_counters) - 1;
+ x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
perf_max_events = x86_pmu.num_counters;

if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
@@ -2206,9 +2204,8 @@ void __init init_hw_perf_events(void)
x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
}

- perf_event_mask |=
+ x86_pmu.intel_ctrl |=
((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
- x86_pmu.intel_ctrl = perf_event_mask;

perf_events_lapic_init();
register_die_notifier(&perf_event_nmi_notifier);
@@ -2219,7 +2216,7 @@ void __init init_hw_perf_events(void)
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
- pr_info("... event mask: %016Lx\n", perf_event_mask);
+ pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
}

static inline void x86_pmu_read(struct perf_event *event)
--
1.6.6

Subject: [PATCH 12/12] perf/core, x86: make event constraint handler generic

This patch makes the handler for event constraints that was
implemented for Intel pmus general for all models. Now, an event
constraint table can be implemented and used also for AMD models.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 43 +++++++++++++++-----------------------
1 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b893ee7..ac2d3a7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1387,47 +1387,38 @@ static int fixed_mode_idx(struct hw_perf_event *hwc)
}

/*
- * generic counter allocator: get next free counter
+ * generic counter allocator
*/
static int
gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
{
- int idx;
-
-again:
- idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_counters);
- if (idx == x86_pmu.num_counters)
- return -1;
- if (test_and_set_bit(idx, cpuc->used_mask))
- goto again;
- return idx;
-}
-
-/*
- * intel-specific counter allocator: check event constraints
- */
-static inline int
-__intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
const struct event_constraint *event_constraint;
- int i, code;
+ int idx, code;

if (!x86_pmu.event_constraints)
- goto skip;
+ goto get_next_free;

code = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;

+ /* check event constraints */
for_each_event_constraint(event_constraint, x86_pmu.event_constraints) {
if (code == event_constraint->code) {
- for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
- if (!test_and_set_bit(i, cpuc->used_mask))
- return i;
+ for_each_bit(idx, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
}
return -1;
}
}
-skip:
- return gen_get_event_idx(cpuc, hwc);
+
+get_next_free:
+ /* get next free counter */
+ idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_counters);
+ if (idx == x86_pmu.num_counters)
+ return -1;
+ if (test_and_set_bit(idx, cpuc->used_mask))
+ goto get_next_free;
+ return idx;
}

static int
@@ -1465,7 +1456,7 @@ intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
/* Try to get the previous generic event again */
if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
try_generic:
- idx = __intel_get_event_idx(cpuc, hwc);
+ idx = gen_get_event_idx(cpuc, hwc);
if (idx == -1)
return -EAGAIN;

--
1.6.6

Subject: [PATCH 05/12] perf/core, x86: reduce number of CONFIG_X86_LOCAL_APIC macros

The function reserve_pmc_hardware() and release_pmc_hardware() were
hard to read. This patch improves readablity of the code by removing
most of the CONFIG_X86_LOCAL_APIC macros.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 15 +++++++++------
1 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c49221b..5c6244b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -711,9 +711,10 @@ again:
static atomic_t active_events;
static DEFINE_MUTEX(pmc_reserve_mutex);

+#ifdef CONFIG_X86_LOCAL_APIC
+
static bool reserve_pmc_hardware(void)
{
-#ifdef CONFIG_X86_LOCAL_APIC
int i;

if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -728,11 +729,9 @@ static bool reserve_pmc_hardware(void)
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}
-#endif

return true;

-#ifdef CONFIG_X86_LOCAL_APIC
eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -747,12 +746,10 @@ perfctr_fail:
enable_lapic_nmi_watchdog();

return false;
-#endif
}

static void release_pmc_hardware(void)
{
-#ifdef CONFIG_X86_LOCAL_APIC
int i;

for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -762,9 +759,15 @@ static void release_pmc_hardware(void)

if (nmi_watchdog == NMI_LOCAL_APIC)
enable_lapic_nmi_watchdog();
-#endif
}

+#else
+
+static bool reserve_pmc_hardware(void) { return true; }
+static void release_pmc_hardware(void) {}
+
+#endif
+
static inline bool bts_available(void)
{
return x86_pmu.enable_bts != NULL;
--
1.6.6

Subject: [PATCH 07/12] perf/core, x86: rename macro in ARCH_PERFMON_EVENTSEL_ENABLE

For consistency reasons this patch renames
ARCH_PERFMON_EVENTSEL0_ENABLE to ARCH_PERFMON_EVENTSEL_ENABLE.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/include/asm/perf_event.h | 2 +-
arch/x86/kernel/cpu/perf_event.c | 16 ++++++++--------
arch/x86/kernel/cpu/perfctr-watchdog.c | 2 +-
arch/x86/oprofile/op_model_amd.c | 4 ++--
arch/x86/oprofile/op_model_ppro.c | 4 ++--
5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 1f50cfc..c7f723a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -18,7 +18,7 @@
#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
#define MSR_ARCH_PERFMON_EVENTSEL1 0x187

-#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22)
#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1863cda..96cbfe6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1051,7 +1051,7 @@ static void p6_pmu_disable_all(void)

/* p6 only has one enable register */
rdmsrl(MSR_P6_EVNTSEL0, val);
- val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(MSR_P6_EVNTSEL0, val);
}

@@ -1093,9 +1093,9 @@ static void amd_pmu_disable_all(void)
if (!test_bit(idx, cpuc->active_mask))
continue;
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
- if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
+ if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
continue;
- val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
}
}
@@ -1120,7 +1120,7 @@ static void p6_pmu_enable_all(void)

/* p6 only has one enable register */
rdmsrl(MSR_P6_EVNTSEL0, val);
- val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(MSR_P6_EVNTSEL0, val);
}

@@ -1166,7 +1166,7 @@ static void amd_pmu_enable_all(void)
continue;

val = event->hw.config;
- val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
}
}
@@ -1195,7 +1195,7 @@ static inline void intel_pmu_ack_status(u64 ack)
static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
(void)checking_wrmsrl(hwc->config_base + idx,
- hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
+ hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
}

static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
@@ -1223,7 +1223,7 @@ p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
u64 val = P6_NOP_EVENT;

if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;

(void)checking_wrmsrl(hwc->config_base + idx, val);
}
@@ -1341,7 +1341,7 @@ static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)

val = hwc->config;
if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;

(void)checking_wrmsrl(hwc->config_base + idx, val);
}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 74f4e85..fb329e9 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -680,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
cpu_nmi_set_wd_enabled();

apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsr(evntsel_msr, evntsel, 0);
intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
return 1;
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 39686c2..729e361 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -314,7 +314,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
if (!reset_value[op_x86_phys_to_virt(i)])
continue;
rdmsrl(msrs->controls[i].addr, val);
- val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(msrs->controls[i].addr, val);
}

@@ -334,7 +334,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
if (!reset_value[op_x86_phys_to_virt(i)])
continue;
rdmsrl(msrs->controls[i].addr, val);
- val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(msrs->controls[i].addr, val);
}

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 4899215..be7f787 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -161,7 +161,7 @@ static void ppro_start(struct op_msrs const * const msrs)
for (i = 0; i < num_counters; ++i) {
if (reset_value[i]) {
rdmsrl(msrs->controls[i].addr, val);
- val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(msrs->controls[i].addr, val);
}
}
@@ -179,7 +179,7 @@ static void ppro_stop(struct op_msrs const * const msrs)
if (!reset_value[i])
continue;
rdmsrl(msrs->controls[i].addr, val);
- val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(msrs->controls[i].addr, val);
}
}
--
1.6.6

Subject: [PATCH 10/12] perf/core, x86: removing fixed counter handling for AMD pmu

The AMD pmu does not support fixed counters. Thus, fixed counters may
not be considered for scheduling decissions. This patch implements an
AMD specific event scheduler without fixed counter calculation that
also improves code performance in the fast path.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 36 +++++++++++++++++++++++++++++++-----
1 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3f81f91..3e0fc29 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1407,8 +1407,8 @@ again:
/*
* intel-specific counter allocator: check event constraints
*/
-static int
-intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+static inline int
+__intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
{
const struct event_constraint *event_constraint;
int i, code;
@@ -1432,7 +1432,7 @@ skip:
}

static int
-x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
{
int idx;

@@ -1466,7 +1466,7 @@ x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
/* Try to get the previous generic event again */
if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
try_generic:
- idx = x86_pmu.get_event_idx(cpuc, hwc);
+ idx = __intel_get_event_idx(cpuc, hwc);
if (idx == -1)
return -EAGAIN;

@@ -1479,6 +1479,32 @@ try_generic:
return idx;
}

+static int
+amd_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+ int idx;
+
+ idx = hwc->idx;
+ /* Try to get the previous generic event again */
+ if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
+ idx = gen_get_event_idx(cpuc, hwc);
+ if (idx == -1)
+ return -EAGAIN;
+
+ hwc->idx = idx;
+ }
+ hwc->config_base = x86_pmu.eventsel;
+ hwc->event_base = x86_pmu.perfctr;
+
+ return idx;
+}
+
+static int
+x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+ return x86_pmu.get_event_idx(cpuc, hwc);
+}
+
/*
* Find a PMC slot for the freshly enabled / scheduled in event:
*/
@@ -2008,7 +2034,7 @@ static __initconst struct x86_pmu amd_pmu = {
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
- .get_event_idx = gen_get_event_idx,
+ .get_event_idx = amd_get_event_idx,
};

static __init int p6_pmu_init(void)
--
1.6.6

Subject: [PATCH 11/12] perf/core, x86: make event_constraints a member of struct x86_pmu

The event_constraints describe the x86 pmu and thus are better located
in struct x86_pmu than in a separate static variable. This is also
necessary to generalize some model specific functions such as
intel_get_event_idx().

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 24 ++++++++++--------------
1 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3e0fc29..b893ee7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -123,6 +123,7 @@ struct x86_pmu {
void (*disable_bts)(void);
int (*get_event_idx)(struct cpu_hw_events *cpuc,
struct hw_perf_event *hwc);
+ struct event_constraint *event_constraints;
};

static struct x86_pmu x86_pmu __read_mostly;
@@ -131,8 +132,6 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};

-static const struct event_constraint *event_constraints;
-
/*
* Not sure about some of these
*/
@@ -160,7 +159,7 @@ static u64 p6_pmu_event_map(int hw_event)
*/
#define P6_NOP_EVENT 0x0000002EULL

-static const struct event_constraint intel_p6_event_constraints[] =
+static struct event_constraint intel_p6_event_constraints[] =
{
EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
@@ -185,7 +184,7 @@ static const u64 intel_perfmon_event_map[] =
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
};

-static const struct event_constraint intel_core_event_constraints[] =
+static struct event_constraint intel_core_event_constraints[] =
{
EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
@@ -199,7 +198,7 @@ static const struct event_constraint intel_core_event_constraints[] =
EVENT_CONSTRAINT_END
};

-static const struct event_constraint intel_nehalem_event_constraints[] =
+static struct event_constraint intel_nehalem_event_constraints[] =
{
EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
@@ -1413,12 +1412,12 @@ __intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
const struct event_constraint *event_constraint;
int i, code;

- if (!event_constraints)
+ if (!x86_pmu.event_constraints)
goto skip;

code = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;

- for_each_event_constraint(event_constraint, event_constraints) {
+ for_each_event_constraint(event_constraint, x86_pmu.event_constraints) {
if (code == event_constraint->code) {
for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
if (!test_and_set_bit(i, cpuc->used_mask))
@@ -1990,6 +1989,7 @@ static __initconst struct x86_pmu p6_pmu = {
.cntval_bits = 32,
.cntval_mask = (1ULL << 32) - 1,
.get_event_idx = intel_get_event_idx,
+ .event_constraints = intel_p6_event_constraints,
};

static __initconst struct x86_pmu intel_pmu = {
@@ -2047,12 +2047,8 @@ static __init int p6_pmu_init(void)
case 7:
case 8:
case 11: /* Pentium III */
- event_constraints = intel_p6_event_constraints;
- break;
case 9:
- case 13:
- /* Pentium M */
- event_constraints = intel_p6_event_constraints;
+ case 13: /* Pentium M */
break;
default:
pr_cont("unsupported p6 CPU model %d ",
@@ -2118,14 +2114,14 @@ static __init int intel_pmu_init(void)
sizeof(hw_cache_event_ids));

pr_cont("Core2 events, ");
- event_constraints = intel_core_event_constraints;
+ x86_pmu.event_constraints = intel_core_event_constraints;
break;
default:
case 26:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));

- event_constraints = intel_nehalem_event_constraints;
+ x86_pmu.event_constraints = intel_nehalem_event_constraints;
pr_cont("Nehalem/Corei7 events, ");
break;
case 28:
--
1.6.6

Subject: [PATCH 09/12] perf/core, x86: use test_and_set_bit() to when grabbing a counter

Using test_and_set_bit() in the generic counter allocator makes this
atomic. Otherwise a counter could have been allocated twice.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4eb0355..3f81f91 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1395,8 +1395,13 @@ gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
{
int idx;

+again:
idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_counters);
- return idx == x86_pmu.num_counters ? -1 : idx;
+ if (idx == x86_pmu.num_counters)
+ return -1;
+ if (test_and_set_bit(idx, cpuc->used_mask))
+ goto again;
+ return idx;
}

/*
@@ -1465,7 +1470,6 @@ try_generic:
if (idx == -1)
return -EAGAIN;

- set_bit(idx, cpuc->used_mask);
hwc->idx = idx;
}
hwc->config_base = x86_pmu.eventsel;
--
1.6.6

Subject: [PATCH 01/12] perf/core: correct files in MAINTAINERS entry

This corrects the file entries for perf_events. The following files
are catched now:

arch/frv/include/asm/perf_event.h
arch/frv/lib/perf_event.c
arch/parisc/include/asm/perf_event.h
arch/powerpc/include/asm/perf_event.h
arch/powerpc/kernel/perf_callchain.c
arch/powerpc/kernel/perf_event.c
arch/s390/include/asm/perf_event.h
arch/sh/include/asm/perf_event.h
arch/sh/kernel/cpu/sh4a/perf_event.c
arch/sh/kernel/cpu/sh4/perf_event.c
arch/sh/kernel/perf_callchain.c
arch/sh/kernel/perf_event.c
arch/sparc/include/asm/perf_event.h
arch/sparc/kernel/perf_event.c
arch/x86/include/asm/perf_event.h
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h
kernel/perf_event.c
tools/perf/

Signed-off-by: Robert Richter <[email protected]>
---
MAINTAINERS | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c8f47bf..350efc3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4224,7 +4224,9 @@ M: Ingo Molnar <[email protected]>
S: Supported
F: kernel/perf_event.c
F: include/linux/perf_event.h
-F: arch/*/*/kernel/perf_event.c
+F: arch/*/kernel/perf_event.c
+F: arch/*/kernel/*/perf_event.c
+F: arch/*/kernel/*/*/perf_event.c
F: arch/*/include/asm/perf_event.h
F: arch/*/lib/perf_event.c
F: arch/*/kernel/perf_callchain.c
--
1.6.6

Subject: [PATCH 08/12] perf/core, x86: implement ARCH_PERFMON_EVENTSEL bit masks

ARCH_PERFMON_EVENTSEL bit masks are offen used in the kernel. This
patch adds macros for the bit masks and removes local defines.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/include/asm/perf_event.h | 16 ++++++++++---
arch/x86/kernel/cpu/perf_event.c | 42 +++++++++++-------------------------
2 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index c7f723a..448bcf5 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -18,10 +18,18 @@
#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
#define MSR_ARCH_PERFMON_EVENTSEL1 0x187

-#define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
+#define ARCH_PERFMON_EVENTSEL_EVENT 0x000000FFULL
+#define ARCH_PERFMON_EVENTSEL_UMASK 0x0000FF00ULL
+#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16)
+#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
+#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18)
+#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20)
+#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
+#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
+#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
+
+#define AMD64_EVENTSEL_EVENT \
+ (ARCH_PERFMON_EVENTSEL_EVENT | 0x0FULL << 32)

/*
* Includes eventsel and unit mask as well:
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 96cbfe6..4eb0355 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -27,6 +27,16 @@
#include <asm/stacktrace.h>
#include <asm/nmi.h>

+#define X86_RAW_EVENT_MASK \
+ (ARCH_PERFMON_EVENTSEL_EVENT | \
+ ARCH_PERFMON_EVENTSEL_UMASK | \
+ ARCH_PERFMON_EVENTSEL_EDGE | \
+ ARCH_PERFMON_EVENTSEL_INV | \
+ ARCH_PERFMON_EVENTSEL_CMASK)
+#define AMD64_RAW_EVENT_MASK \
+ (X86_RAW_EVENT_MASK | \
+ AMD64_EVENTSEL_EVENT)
+
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 4

@@ -499,20 +509,7 @@ static __initconst u64 atom_hw_cache_event_ids

static u64 intel_pmu_raw_event(u64 hw_event)
{
-#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
-#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
-#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
-#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
-#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
-
-#define CORE_EVNTSEL_MASK \
- (CORE_EVNTSEL_EVENT_MASK | \
- CORE_EVNTSEL_UNIT_MASK | \
- CORE_EVNTSEL_EDGE_MASK | \
- CORE_EVNTSEL_INV_MASK | \
- CORE_EVNTSEL_REG_MASK)
-
- return hw_event & CORE_EVNTSEL_MASK;
+ return hw_event & X86_RAW_EVENT_MASK;
}

static __initconst u64 amd_hw_cache_event_ids
@@ -626,20 +623,7 @@ static u64 amd_pmu_event_map(int hw_event)

static u64 amd_pmu_raw_event(u64 hw_event)
{
-#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
-#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
-#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
-#define K7_EVNTSEL_INV_MASK 0x000800000ULL
-#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
-
-#define K7_EVNTSEL_MASK \
- (K7_EVNTSEL_EVENT_MASK | \
- K7_EVNTSEL_UNIT_MASK | \
- K7_EVNTSEL_EDGE_MASK | \
- K7_EVNTSEL_INV_MASK | \
- K7_EVNTSEL_REG_MASK)
-
- return hw_event & K7_EVNTSEL_MASK;
+ return hw_event & AMD64_RAW_EVENT_MASK;
}

/*
@@ -1427,7 +1411,7 @@ intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
if (!event_constraints)
goto skip;

- code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
+ code = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;

for_each_event_constraint(event_constraint, event_constraints) {
if (code == event_constraint->code) {
--
1.6.6

Subject: [PATCH 06/12] perf/core, x86: removing p6_pmu_raw_event()

The function is the same as intel_pmu_raw_event(). This patch removes
the duplicate code.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 20 +-------------------
1 files changed, 1 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5c6244b..1863cda 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -150,24 +150,6 @@ static u64 p6_pmu_event_map(int hw_event)
*/
#define P6_NOP_EVENT 0x0000002EULL

-static u64 p6_pmu_raw_event(u64 hw_event)
-{
-#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
-#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
-#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
-#define P6_EVNTSEL_INV_MASK 0x00800000ULL
-#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
-
-#define P6_EVNTSEL_MASK \
- (P6_EVNTSEL_EVENT_MASK | \
- P6_EVNTSEL_UNIT_MASK | \
- P6_EVNTSEL_EDGE_MASK | \
- P6_EVNTSEL_INV_MASK | \
- P6_EVNTSEL_REG_MASK)
-
- return hw_event & P6_EVNTSEL_MASK;
-}
-
static const struct event_constraint intel_p6_event_constraints[] =
{
EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
@@ -1978,7 +1960,7 @@ static __initconst struct x86_pmu p6_pmu = {
.eventsel = MSR_P6_EVNTSEL0,
.perfctr = MSR_P6_PERFCTR0,
.event_map = p6_pmu_event_map,
- .raw_event = p6_pmu_raw_event,
+ .raw_event = intel_pmu_raw_event,
.max_events = ARRAY_SIZE(p6_perfmon_event_map),
.apic = 1,
.max_period = (1ULL << 31) - 1,
--
1.6.6