2010-01-12 13:16:21

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH] perf_events: improve x86 event scheduling

This patch improves event scheduling by maximizing the use
of PMU registers regardless of the order in which events are
created in a group.

The algorithm takes into account the list of counter constraints
for each event. It assigns events to counters from the most
constrained, i.e., works on only one counter, to the least
constrained, i.e., works on any counter.

Intel Fixed counter events and the BTS special event are also
handled via this algorithm which is designed to be fairly generic.

The patch also updates the validation of an event to use the
scheduling algorithm. This will cause early failure in
perf_event_open().

This is the 2nd version of this patch. It follows the model used
by PPC, by running the scheduling algorithm and the actual
assignment separately. Actual assignment takes place in
hw_perf_enable() whereas scheduling is implemented in
hw_perf_group_sched_in() and x86_pmu_enable().

Signed-off-by: Stephane Eranian <[email protected]>
--
include/asm/perf_event.h | 17 +
kernel/cpu/perf_event.c | 694 ++++++++++++++++++++++++++++++++---------------
2 files changed, 494 insertions(+), 217 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8d9f854..16beb34 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -19,6 +19,7 @@
#define MSR_ARCH_PERFMON_EVENTSEL1 0x187

#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21)
#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
@@ -26,7 +27,14 @@
/*
* Includes eventsel and unit mask as well:
*/
-#define ARCH_PERFMON_EVENT_MASK 0xffff
+
+
+#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL
+#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL
+#define INTEL_ARCH_EDGE_MASK 0x00040000ULL
+#define INTEL_ARCH_INV_MASK 0x00800000ULL
+#define INTEL_ARCH_CNT_MASK 0xFF000000ULL
+#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)

/*
* filter mask to validate fixed counter events.
@@ -37,7 +45,12 @@
* The other filters are supported by fixed counters.
* The any-thread option is supported starting with v3.
*/
-#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000
+#define INTEL_ARCH_FIXED_MASK \
+ (INTEL_ARCH_CNT_MASK| \
+ INTEL_ARCH_INV_MASK| \
+ INTEL_ARCH_EDGE_MASK|\
+ INTEL_ARCH_UNIT_MASK|\
+ INTEL_ARCH_EVENT_MASK)

#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index d616c06..d68c3e5 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -7,6 +7,7 @@
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <[email protected]>
* Copyright (C) 2009 Intel Corporation, <[email protected]>
+ * Copyright (C) 2009 Google Inc., Stephane Eranian
*
* For licencing details see kernel-base/COPYING
*/
@@ -68,26 +69,37 @@ struct debug_store {
u64 pebs_event_reset[MAX_PEBS_EVENTS];
};

+#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
+
+struct event_constraint {
+ u64 idxmsk[BITS_TO_U64(X86_PMC_IDX_MAX)];
+ int code;
+ int cmask;
+};
+
struct cpu_hw_events {
- struct perf_event *events[X86_PMC_IDX_MAX];
- unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
int enabled;
struct debug_store *ds;
-};

-struct event_constraint {
- unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- int code;
+ int n_events;
+ int n_added;
+ int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+ struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
};

-#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
-#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 }
+#define EVENT_CONSTRAINT(c, n, m) { \
+ .code = (c), \
+ .cmask = (m), \
+ .idxmsk[0] = (n) }

-#define for_each_event_constraint(e, c) \
- for ((e) = (c); (e)->idxmsk[0]; (e)++)
+#define EVENT_CONSTRAINT_END \
+ { .code = 0, .cmask = 0, .idxmsk[0] = 0 }

+#define for_each_event_constraint(e, c) \
+ for ((e) = (c); (e)->cmask; (e)++)

/*
* struct x86_pmu - generic x86 pmu
@@ -114,8 +126,8 @@ struct x86_pmu {
u64 intel_ctrl;
void (*enable_bts)(u64 config);
void (*disable_bts)(void);
- int (*get_event_idx)(struct cpu_hw_events *cpuc,
- struct hw_perf_event *hwc);
+ const struct event_constraint *(*get_event_constraints)(struct perf_event *event);
+ const struct event_constraint *event_constraints;
};

static struct x86_pmu x86_pmu __read_mostly;
@@ -124,7 +136,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};

-static const struct event_constraint *event_constraints;
+static int x86_perf_event_set_period(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx);

/*
* Not sure about some of these
@@ -171,14 +184,14 @@ static u64 p6_pmu_raw_event(u64 hw_event)
return hw_event & P6_EVNTSEL_MASK;
}

-static const struct event_constraint intel_p6_event_constraints[] =
+static struct event_constraint intel_p6_event_constraints[] =
{
- EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
- EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
- EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
- EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
- EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ EVENT_CONSTRAINT(0xc1, 0x1, INTEL_ARCH_EVENT_MASK), /* FLOPS */
+ EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */
+ EVENT_CONSTRAINT(0x11, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */
+ EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */
+ EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */
+ EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */
EVENT_CONSTRAINT_END
};

@@ -196,35 +209,43 @@ static const u64 intel_perfmon_event_map[] =
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
};

-static const struct event_constraint intel_core_event_constraints[] =
-{
- EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
- EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
- EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
- EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
- EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
- EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
- EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
- EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+static struct event_constraint intel_core_event_constraints[] =
+{
+ EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
+ EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
+ EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */
+ EVENT_CONSTRAINT(0x11, 0x2, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */
+ EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */
+ EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */
+ EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */
+ EVENT_CONSTRAINT(0x18, 0x1, INTEL_ARCH_EVENT_MASK), /* IDLE_DURING_DIV */
+ EVENT_CONSTRAINT(0x19, 0x2, INTEL_ARCH_EVENT_MASK), /* DELAYED_BYPASS */
+ EVENT_CONSTRAINT(0xa1, 0x1, INTEL_ARCH_EVENT_MASK), /* RS_UOPS_DISPATCH_CYCLES */
+ EVENT_CONSTRAINT(0xcb, 0x1, INTEL_ARCH_EVENT_MASK), /* MEM_LOAD_RETIRED */
EVENT_CONSTRAINT_END
};

-static const struct event_constraint intel_nehalem_event_constraints[] =
-{
- EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
- EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
- EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
- EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
- EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
- EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
- EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
- EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
- EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
- EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
+static struct event_constraint intel_nehalem_event_constraints[] = {
+ EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
+ EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
+ EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */
+ EVENT_CONSTRAINT(0x41, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_ST */
+ EVENT_CONSTRAINT(0x42, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK */
+ EVENT_CONSTRAINT(0x43, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_ALL_REF */
+ EVENT_CONSTRAINT(0x4e, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_PREFETCH */
+ EVENT_CONSTRAINT(0x4c, 0x3, INTEL_ARCH_EVENT_MASK), /* LOAD_HIT_PRE */
+ EVENT_CONSTRAINT(0x51, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D */
+ EVENT_CONSTRAINT(0x52, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
+ EVENT_CONSTRAINT(0x53, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK_FB_HIT */
+ EVENT_CONSTRAINT(0xc5, 0x3, INTEL_ARCH_EVENT_MASK), /* CACHE_LOCK_CYCLES */
EVENT_CONSTRAINT_END
};

+static struct event_constraint intel_gen_event_constraints[] = {
+ EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
+ EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
+};
+
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
@@ -527,11 +548,11 @@ static u64 intel_pmu_raw_event(u64 hw_event)
#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL

#define CORE_EVNTSEL_MASK \
- (CORE_EVNTSEL_EVENT_MASK | \
- CORE_EVNTSEL_UNIT_MASK | \
- CORE_EVNTSEL_EDGE_MASK | \
- CORE_EVNTSEL_INV_MASK | \
- CORE_EVNTSEL_REG_MASK)
+ (INTEL_ARCH_EVTSEL_MASK | \
+ INTEL_ARCH_UNIT_MASK | \
+ INTEL_ARCH_EDGE_MASK | \
+ INTEL_ARCH_INV_MASK | \
+ INTEL_ARCH_CNT_MASK)

return hw_event & CORE_EVNTSEL_MASK;
}
@@ -1120,9 +1141,15 @@ static void amd_pmu_disable_all(void)

void hw_perf_disable(void)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
if (!x86_pmu_initialized())
return;
- return x86_pmu.disable_all();
+
+ if (cpuc->enabled)
+ cpuc->n_added = 0;
+
+ x86_pmu.disable_all();
}

static void p6_pmu_enable_all(void)
@@ -1189,10 +1216,219 @@ static void amd_pmu_enable_all(void)
}
}

+static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+{
+ int i, j , w, num, lim;
+ int weight, wmax;
+ const struct event_constraint *c;
+ const struct event_constraint *constraints[X86_PMC_IDX_MAX];
+ unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ struct hw_perf_event *hwc;
+
+ bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+ for(i=0; i < n; i++)
+ constraints[i] = x86_pmu.get_event_constraints(cpuc->event_list[i]);
+
+ /*
+ * weight = number of possible counters
+ *
+ * 1 = most constrained, only works on one counter
+ * wmax = least constrained, works on 1 fixed counter
+ * or any generic counter
+ *
+ * assign events to counters starting with most
+ * constrained events.
+ */
+ wmax = 1 + x86_pmu.num_events;
+ num = n;
+ for(w=1; num && w <= wmax; w++) {
+
+ /* for each event */
+ for(i=0; i < n; i++) {
+ c = constraints[i];
+ hwc = &cpuc->event_list[i]->hw;
+
+ lim = X86_PMC_IDX_MAX;
+
+ weight = c ? bitmap_weight((unsigned long *)c->idxmsk, lim)
+ : x86_pmu.num_events;
+
+ if (weight != w)
+ continue;
+
+ /*
+ * try to reuse previous assignment
+ *
+ * This is possible despite the fact that
+ * events or events order may have changed.
+ *
+ * What matters is the level of constraints
+ * of an event and this is constant for now.
+ *
+ * This is possible also because we always
+ * scan from most to least constrained. Thus,
+ * if a counter can be reused, it means no,
+ * more constrained events, needed it. And
+ * next events will either compete for it
+ * (which cannot be solved anyway) or they
+ * have fewer constraints, and they can use
+ * another counter.
+ */
+ j = hwc->idx;
+ if (j != -1 && !test_bit(j, used_mask))
+ goto skip;
+
+ if (c) {
+ for_each_bit(j, (unsigned long *)c->idxmsk, lim)
+ if (!test_bit(j, used_mask))
+ break;
+
+ } else {
+ lim = x86_pmu.num_events;
+ /*
+ * fixed counter events have necessarily a
+ * constraint thus we come here only for
+ * generic counters and thus we limit the
+ * scan to those
+ */
+ j = find_first_zero_bit(used_mask, lim);
+ }
+ if (j == lim)
+ return -EAGAIN;
+skip:
+ set_bit(j, used_mask);
+
+ if (assign)
+ assign[i] = j;
+ num--;
+ }
+ }
+ if (num)
+ return -ENOSPC;
+
+ return 0;
+}
+
+/*
+ * dogrp: true if must collect siblings events (group)
+ * returns total number of events and error code
+ */
+static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
+{
+ struct perf_event *event;
+ int n, max_count;
+
+ max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
+
+ /* current number of events already accepted */
+ n = cpuc->n_events;
+
+ if (!is_software_event(leader)) {
+ if (n >= max_count)
+ return -ENOSPC;
+ cpuc->event_list[n] = leader;
+ n++;
+ }
+ if (!dogrp)
+ return n;
+
+ list_for_each_entry(event, &leader->sibling_list, group_entry) {
+ if (is_software_event(event) ||
+ event->state == PERF_EVENT_STATE_OFF)
+ continue;
+
+ if (n >= max_count)
+ return -ENOSPC;
+
+ cpuc->event_list[n] = event;
+ n++;
+ }
+ return n;
+}
+
+
+static inline void x86_assign_hw_event(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx)
+{
+ hwc->idx = idx;
+
+ if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
+ hwc->config_base = 0;
+ hwc->event_base = 0;
+ } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
+ hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+ /*
+ * We set it so that event_base + idx in wrmsr/rdmsr maps to
+ * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+ */
+ hwc->event_base =
+ MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+ } else {
+ hwc->config_base = x86_pmu.eventsel;
+ hwc->event_base = x86_pmu.perfctr;
+ }
+}
+
void hw_perf_enable(void)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct perf_event *event;
+ struct hw_perf_event *hwc;
+ int i;
+
if (!x86_pmu_initialized())
return;
+ if (cpuc->n_added) {
+ /*
+ * apply assignment obtained either from
+ * hw_perf_group_sched_in() or x86_pmu_enable()
+ *
+ * step1: save events moving to new counters
+ * step2: reprogram moved events into new counters
+ */
+ for(i=0; i < cpuc->n_events; i++) {
+
+ event = cpuc->event_list[i];
+ hwc = &event->hw;
+
+ if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
+ continue;
+
+ x86_pmu.disable(hwc, hwc->idx);
+
+ clear_bit(hwc->idx, cpuc->active_mask);
+ barrier();
+ cpuc->events[hwc->idx] = NULL;
+
+ x86_perf_event_update(event, hwc, hwc->idx);
+
+ hwc->idx = -1;
+ }
+
+ for(i=0; i < cpuc->n_events; i++) {
+
+ event = cpuc->event_list[i];
+ hwc = &event->hw;
+
+ if (hwc->idx == -1) {
+ x86_assign_hw_event(event, hwc, cpuc->assign[i]);
+ x86_perf_event_set_period(event, hwc, hwc->idx);
+ }
+ /*
+ * need to mark as active because x86_pmu_disable()
+ * clear active_mask and eventsp[] yet it preserves
+ * idx
+ */
+ set_bit(hwc->idx, cpuc->active_mask);
+ cpuc->events[hwc->idx] = event;
+
+ x86_pmu.enable(hwc, hwc->idx);
+ perf_event_update_userpage(event);
+ }
+ cpuc->n_added = 0;
+ perf_events_lapic_init();
+ }
x86_pmu.enable_all();
}

@@ -1343,6 +1579,13 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
bits |= 0x2;
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
bits |= 0x1;
+
+ /*
+ * ANY bit is supported in v3 and up
+ */
+ if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
+ bits |= 0x4;
+
bits <<= (idx * 4);
mask = 0xfULL << (idx * 4);

@@ -1391,148 +1634,43 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
x86_pmu_enable_event(hwc, idx);
}

-static int fixed_mode_idx(struct hw_perf_event *hwc)
-{
- unsigned int hw_event;
-
- hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
- if (unlikely((hw_event ==
- x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
- (hwc->sample_period == 1)))
- return X86_PMC_IDX_FIXED_BTS;
-
- if (!x86_pmu.num_events_fixed)
- return -1;
-
- /*
- * fixed counters do not take all possible filters
- */
- if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
- return -1;
-
- if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
- return X86_PMC_IDX_FIXED_INSTRUCTIONS;
- if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
- return X86_PMC_IDX_FIXED_CPU_CYCLES;
- if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
- return X86_PMC_IDX_FIXED_BUS_CYCLES;
-
- return -1;
-}
-
-/*
- * generic counter allocator: get next free counter
- */
-static int
-gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
- int idx;
-
- idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
- return idx == x86_pmu.num_events ? -1 : idx;
-}
-
-/*
- * intel-specific counter allocator: check event constraints
- */
-static int
-intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
- const struct event_constraint *event_constraint;
- int i, code;
-
- if (!event_constraints)
- goto skip;
-
- code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
-
- for_each_event_constraint(event_constraint, event_constraints) {
- if (code == event_constraint->code) {
- for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
- if (!test_and_set_bit(i, cpuc->used_mask))
- return i;
- }
- return -1;
- }
- }
-skip:
- return gen_get_event_idx(cpuc, hwc);
-}
-
-static int
-x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
- int idx;
-
- idx = fixed_mode_idx(hwc);
- if (idx == X86_PMC_IDX_FIXED_BTS) {
- /* BTS is already occupied. */
- if (test_and_set_bit(idx, cpuc->used_mask))
- return -EAGAIN;
-
- hwc->config_base = 0;
- hwc->event_base = 0;
- hwc->idx = idx;
- } else if (idx >= 0) {
- /*
- * Try to get the fixed event, if that is already taken
- * then try to get a generic event:
- */
- if (test_and_set_bit(idx, cpuc->used_mask))
- goto try_generic;
-
- hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- /*
- * We set it so that event_base + idx in wrmsr/rdmsr maps to
- * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
- */
- hwc->event_base =
- MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
- hwc->idx = idx;
- } else {
- idx = hwc->idx;
- /* Try to get the previous generic event again */
- if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
-try_generic:
- idx = x86_pmu.get_event_idx(cpuc, hwc);
- if (idx == -1)
- return -EAGAIN;
-
- set_bit(idx, cpuc->used_mask);
- hwc->idx = idx;
- }
- hwc->config_base = x86_pmu.eventsel;
- hwc->event_base = x86_pmu.perfctr;
- }
-
- return idx;
-}
-
/*
- * Find a PMC slot for the freshly enabled / scheduled in event:
+ * activate a single event
+ *
+ * The event is added to the group of enabled events
+ * but only if it can be scehduled with existing events.
+ *
+ * Called with PMU disabled. If successful and return value 1,
+ * then guaranteed to call perf_enable() and hw_perf_enable()
*/
static int x86_pmu_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- int idx;
+ struct hw_perf_event *hwc;
+ int assign[X86_PMC_IDX_MAX];
+ int n, n0, ret;

- idx = x86_schedule_event(cpuc, hwc);
- if (idx < 0)
- return idx;
+ hwc = &event->hw;

- perf_events_lapic_init();
+ n0 = cpuc->n_events;
+ n = collect_events(cpuc, event, false);
+ if (n < 0)
+ return n;

- x86_pmu.disable(hwc, idx);
+ ret = x86_schedule_events(cpuc, n, assign);
+ if (ret)
+ return ret;
+ /*
+ * copy new assignment, now we know it is possible
+ * will be used by hw_perf_enable()
+ */
+ memcpy(cpuc->assign, assign, n*sizeof(int));

- cpuc->events[idx] = event;
- set_bit(idx, cpuc->active_mask);
+ cpuc->n_events = n;
+ cpuc->n_added = n - n0;

- x86_perf_event_set_period(event, hwc, idx);
- x86_pmu.enable(hwc, idx);
-
- perf_event_update_userpage(event);
+ if (hwc->idx != -1)
+ x86_perf_event_set_period(event, hwc, hwc->idx);

return 0;
}
@@ -1576,7 +1714,7 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
}
- pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
+ pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);

for (idx = 0; idx < x86_pmu.num_events; idx++) {
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -1664,7 +1802,7 @@ static void x86_pmu_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
+ int i, idx = hwc->idx;

/*
* Must be done before we disable, otherwise the nmi handler
@@ -1690,8 +1828,14 @@ static void x86_pmu_disable(struct perf_event *event)
intel_pmu_drain_bts_buffer(cpuc);

cpuc->events[idx] = NULL;
- clear_bit(idx, cpuc->used_mask);

+ for(i=0; i < cpuc->n_events; i++) {
+ if (event == cpuc->event_list[i]) {
+ while (++i < cpuc->n_events)
+ cpuc->event_list[i-1] = cpuc->event_list[i];
+ --cpuc->n_events;
+ }
+ }
perf_event_update_userpage(event);
}

@@ -1962,6 +2106,115 @@ perf_event_nmi_handler(struct notifier_block *self,
return NOTIFY_STOP;
}

+static struct event_constraint bts_constraint={
+ .code = 0,
+ .cmask = 0,
+ .idxmsk[0] = 1ULL << X86_PMC_IDX_FIXED_BTS
+};
+
+static struct event_constraint *intel_special_constraints(struct perf_event *event)
+{
+ unsigned int hw_event;
+
+ hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
+
+ if (unlikely((hw_event ==
+ x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+ (event->hw.sample_period == 1)))
+ return &bts_constraint;
+
+ return NULL;
+}
+
+static const struct event_constraint *intel_get_event_constraints(struct perf_event *event)
+{
+ const struct event_constraint *c;
+
+ c = intel_special_constraints(event);
+ if (c)
+ return c;
+
+ if (x86_pmu.event_constraints)
+ for_each_event_constraint(c, x86_pmu.event_constraints) {
+ if ((event->hw.config & c->cmask) == c->code)
+ return c;
+ }
+
+ return NULL;
+}
+
+static const struct event_constraint *amd_get_event_constraints(struct perf_event *event)
+{
+ return NULL;
+}
+
+
+static void event_sched_in(struct perf_event *event, int cpu)
+{
+ event->state = PERF_EVENT_STATE_ACTIVE;
+ event->oncpu = cpu;
+ event->tstamp_running += event->ctx->time - event->tstamp_stopped;
+ if (is_software_event(event))
+ event->pmu->enable(event);
+}
+
+/*
+ * Called to enable a whole group of events.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ *
+ * called with PMU disabled. If successful and return value 1,
+ * then guaranteed to call perf_enable() and hw_perf_enable()
+ */
+int hw_perf_group_sched_in(struct perf_event *leader,
+ struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx, int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct perf_event *sub;
+ int assign[X86_PMC_IDX_MAX];
+ int n, n0, ret;
+
+ n0 = cpuc->n_events;
+
+ n = collect_events(cpuc, leader, true);
+ if (n < 0)
+ return n;
+
+ ret = x86_schedule_events(cpuc, n, assign);
+ if (ret)
+ return ret;
+
+ /*
+ * copy new assignment, now we know it is possible
+ * will be used by hw_perf_enable()
+ */
+ memcpy(cpuc->assign, assign, n*sizeof(int));
+
+ cpuc->n_events = n;
+ cpuc->n_added = n - n0;
+
+ n = 1;
+ event_sched_in(leader, cpu);
+ list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+ if (sub->state != PERF_EVENT_STATE_OFF) {
+ event_sched_in(sub, cpu);
+ ++n;
+ }
+ }
+ ctx->nr_active += n;
+
+ /*
+ * 1 means successful and events are active
+ * This is not quite true because we defer
+ * actual activation until hw_perf_enable() but
+ * this way we* ensure caller won't try to enable
+ * individual events
+ */
+ return 1;
+}
+
static __read_mostly struct notifier_block perf_event_nmi_notifier = {
.notifier_call = perf_event_nmi_handler,
.next = NULL,
@@ -1993,7 +2246,8 @@ static __initconst struct x86_pmu p6_pmu = {
*/
.event_bits = 32,
.event_mask = (1ULL << 32) - 1,
- .get_event_idx = intel_get_event_idx,
+ .get_event_constraints = intel_get_event_constraints,
+ .event_constraints = intel_p6_event_constraints
};

static __initconst struct x86_pmu intel_pmu = {
@@ -2017,7 +2271,7 @@ static __initconst struct x86_pmu intel_pmu = {
.max_period = (1ULL << 31) - 1,
.enable_bts = intel_pmu_enable_bts,
.disable_bts = intel_pmu_disable_bts,
- .get_event_idx = intel_get_event_idx,
+ .get_event_constraints = intel_get_event_constraints
};

static __initconst struct x86_pmu amd_pmu = {
@@ -2038,7 +2292,7 @@ static __initconst struct x86_pmu amd_pmu = {
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
- .get_event_idx = gen_get_event_idx,
+ .get_event_constraints = amd_get_event_constraints
};

static __init int p6_pmu_init(void)
@@ -2051,12 +2305,9 @@ static __init int p6_pmu_init(void)
case 7:
case 8:
case 11: /* Pentium III */
- event_constraints = intel_p6_event_constraints;
- break;
case 9:
case 13:
/* Pentium M */
- event_constraints = intel_p6_event_constraints;
break;
default:
pr_cont("unsupported p6 CPU model %d ",
@@ -2121,23 +2372,29 @@ static __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));

+ x86_pmu.event_constraints = intel_core_event_constraints;
pr_cont("Core2 events, ");
- event_constraints = intel_core_event_constraints;
break;
- default:
case 26:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));

- event_constraints = intel_nehalem_event_constraints;
+ x86_pmu.event_constraints = intel_nehalem_event_constraints;
pr_cont("Nehalem/Corei7 events, ");
break;
case 28:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));

+ x86_pmu.event_constraints = intel_gen_event_constraints;
pr_cont("Atom events, ");
break;
+ default:
+ /*
+ * default constraints for v2 and up
+ */
+ x86_pmu.event_constraints = intel_gen_event_constraints;
+ pr_cont("generic architected perfmon, ");
}
return 0;
}
@@ -2234,36 +2491,43 @@ static const struct pmu pmu = {
.unthrottle = x86_pmu_unthrottle,
};

-static int
-validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
-{
- struct hw_perf_event fake_event = event->hw;
-
- if (event->pmu && event->pmu != &pmu)
- return 0;
-
- return x86_schedule_event(cpuc, &fake_event) >= 0;
-}
-
+/*
+ * validate a single event group
+ *
+ * validation include:
+ * - check events are compatible which each other
+ * - events do not compete for the same counter
+ * - number of events <= number of counters
+ *
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
static int validate_group(struct perf_event *event)
{
- struct perf_event *sibling, *leader = event->group_leader;
- struct cpu_hw_events fake_pmu;
+ struct perf_event *leader = event->group_leader;
+ struct cpu_hw_events fake_cpuc;
+ int n;

- memset(&fake_pmu, 0, sizeof(fake_pmu));
+ memset(&fake_cpuc, 0, sizeof(fake_cpuc));

- if (!validate_event(&fake_pmu, leader))
+ /*
+ * the event is not yet connected with its
+ * siblings therefore we must first collect
+ * existing siblings, then add the new event
+ * before we can simulate the scheduling
+ */
+ n = collect_events(&fake_cpuc, leader, true);
+ if (n < 0)
return -ENOSPC;

- list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
- if (!validate_event(&fake_pmu, sibling))
- return -ENOSPC;
- }
-
- if (!validate_event(&fake_pmu, event))
+ fake_cpuc.n_events = n;
+ n = collect_events(&fake_cpuc, event, false);
+ if (n < 0)
return -ENOSPC;

- return 0;
+ fake_cpuc.n_events = n;
+
+ return x86_schedule_events(&fake_cpuc, n, NULL);
}

const struct pmu *hw_perf_event_init(struct perf_event *event)


2010-01-12 16:10:34

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf_events: improve x86 event scheduling

On Tue, 2010-01-12 at 12:50 +0200, Stephane Eranian wrote:
> This patch improves event scheduling by maximizing the use
> of PMU registers regardless of the order in which events are
> created in a group.
>
> The algorithm takes into account the list of counter constraints
> for each event. It assigns events to counters from the most
> constrained, i.e., works on only one counter, to the least
> constrained, i.e., works on any counter.
>
> Intel Fixed counter events and the BTS special event are also
> handled via this algorithm which is designed to be fairly generic.
>
> The patch also updates the validation of an event to use the
> scheduling algorithm. This will cause early failure in
> perf_event_open().
>
> This is the 2nd version of this patch. It follows the model used
> by PPC, by running the scheduling algorithm and the actual
> assignment separately. Actual assignment takes place in
> hw_perf_enable() whereas scheduling is implemented in
> hw_perf_group_sched_in() and x86_pmu_enable().

Looks very good, will have to reread it again in the morning to look for
more details but from an initial reading its fine.

One concern I do have is the loss of error checking on
event_sched_in()'s event->pmu->enable(), that could be another
'hardware' PMU like breakpoints, in which case it could fail.

Not sure what to do with that.. maybe we should not allow mixing
different hardware PMU events, but only 1 hardware + software events, in
which case the below patch should retain some of the
is_software_event()s.

Frederic, Paul?



> Signed-off-by: Stephane Eranian <[email protected]>
> --

> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 8d9f854..16beb34 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -19,6 +19,7 @@
> #define MSR_ARCH_PERFMON_EVENTSEL1 0x187
>
> #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
> +#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21)
> #define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
> #define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
> #define ARCH_PERFMON_EVENTSEL_USR (1 << 16)


> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index d616c06..d68c3e5 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1343,6 +1579,13 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
> bits |= 0x2;
> if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
> bits |= 0x1;
> +
> + /*
> + * ANY bit is supported in v3 and up
> + */
> + if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
> + bits |= 0x4;
> +
> bits <<= (idx * 4);
> mask = 0xfULL << (idx * 4);
>

This looks like a separate patch all by itself.

Also, the below fixes a few style nits and that is_software_event()
usage:

---
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -225,7 +225,8 @@ static struct event_constraint intel_cor
EVENT_CONSTRAINT_END
};

-static struct event_constraint intel_nehalem_event_constraints[] = {
+static struct event_constraint intel_nehalem_event_constraints[] =
+{
EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */
@@ -241,7 +242,8 @@ static struct event_constraint intel_neh
EVENT_CONSTRAINT_END
};

-static struct event_constraint intel_gen_event_constraints[] = {
+static struct event_constraint intel_gen_event_constraints[] =
+{
EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
};
@@ -1310,6 +1312,13 @@ skip:
return 0;
}

+static const struct pmu pmu;
+
+static inline bool is_x86_event(struct perf_event *event)
+{
+ return event->pmu == pmu;
+}
+
/*
* dogrp: true if must collect siblings events (group)
* returns total number of events and error code
@@ -1324,7 +1333,7 @@ static int collect_events(struct cpu_hw_
/* current number of events already accepted */
n = cpuc->n_events;

- if (!is_software_event(leader)) {
+ if (is_x86_event(leader)) {
if (n >= max_count)
return -ENOSPC;
cpuc->event_list[n] = leader;
@@ -1334,7 +1343,7 @@ static int collect_events(struct cpu_hw_
return n;

list_for_each_entry(event, &leader->sibling_list, group_entry) {
- if (is_software_event(event) ||
+ if (!is_x86_event(event) ||
event->state == PERF_EVENT_STATE_OFF)
continue;

@@ -2154,7 +2163,7 @@ static void event_sched_in(struct perf_e
event->state = PERF_EVENT_STATE_ACTIVE;
event->oncpu = cpu;
event->tstamp_running += event->ctx->time - event->tstamp_stopped;
- if (is_software_event(event))
+ if (!is_x86_event(event))
event->pmu->enable(event);
}

@@ -2209,7 +2218,7 @@ int hw_perf_group_sched_in(struct perf_e
* 1 means successful and events are active
* This is not quite true because we defer
* actual activation until hw_perf_enable() but
- * this way we* ensure caller won't try to enable
+ * this way we ensure caller won't try to enable
* individual events
*/
return 1;


2010-01-13 09:54:50

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf_events: improve x86 event scheduling

On Tue, Jan 12, 2010 at 5:10 PM, Peter Zijlstra <[email protected]> wrote:
> On Tue, 2010-01-12 at 12:50 +0200, Stephane Eranian wrote:
>>       This patch improves event scheduling by maximizing the use
>>       of PMU registers regardless of the order in which events are
>>       created in a group.
>>
>>       The algorithm takes into account the list of counter constraints
>>       for each event. It assigns events to counters from the most
>>       constrained, i.e., works on only one counter, to the least
>>       constrained, i.e., works on any counter.
>>
>>       Intel Fixed counter events and the BTS special event are also
>>       handled via this algorithm which is designed to be fairly generic.
>>
>>       The patch also updates the validation of an event to use the
>>       scheduling algorithm. This will cause early failure in
>>       perf_event_open().
>>
>>       This is the 2nd version of this patch. It follows the model used
>>       by PPC, by running the scheduling algorithm and the actual
>>       assignment separately. Actual assignment takes place in
>>       hw_perf_enable() whereas scheduling is implemented in
>>       hw_perf_group_sched_in() and x86_pmu_enable().
>
> Looks very good, will have to reread it again in the morning to look for
> more details but from an initial reading its fine.
>
> One concern I do have is the loss of error checking on
> event_sched_in()'s event->pmu->enable(), that could be another
> 'hardware' PMU like breakpoints, in which case it could fail.
>
Well, x86_pmu_enable() does return an error code, so it is up
to the upper layer to handle the error gracefully and in particular
in perf_ctx_adjust_freq().

>>       Signed-off-by: Stephane Eranian <[email protected]>
>> --
>
>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>> index 8d9f854..16beb34 100644
>> --- a/arch/x86/include/asm/perf_event.h
>> +++ b/arch/x86/include/asm/perf_event.h
>> @@ -19,6 +19,7 @@
>>  #define MSR_ARCH_PERFMON_EVENTSEL1                        0x187
>>
>>  #define ARCH_PERFMON_EVENTSEL0_ENABLE                          (1 << 22)
>> +#define ARCH_PERFMON_EVENTSEL_ANY                      (1 << 21)
>>  #define ARCH_PERFMON_EVENTSEL_INT                      (1 << 20)
>>  #define ARCH_PERFMON_EVENTSEL_OS                       (1 << 17)
>>  #define ARCH_PERFMON_EVENTSEL_USR                      (1 << 16)
>
>
>> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
>> index d616c06..d68c3e5 100644
>> --- a/arch/x86/kernel/cpu/perf_event.c
>> +++ b/arch/x86/kernel/cpu/perf_event.c
>> @@ -1343,6 +1579,13 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
>>               bits |= 0x2;
>>       if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
>>               bits |= 0x1;
>> +
>> +     /*
>> +      * ANY bit is supported in v3 and up
>> +      */
>> +     if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
>> +             bits |= 0x4;
>> +
>>       bits <<= (idx * 4);
>>       mask = 0xfULL << (idx * 4);
>>
>
> This looks like a separate patch all by itself.
>
> Also, the below fixes a few style nits and that is_software_event()
> usage:
>
> ---
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
> @@ -225,7 +225,8 @@ static struct event_constraint intel_cor
>        EVENT_CONSTRAINT_END
>  };
>
> -static struct event_constraint intel_nehalem_event_constraints[] = {
> +static struct event_constraint intel_nehalem_event_constraints[] =
> +{
>        EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
>        EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
>        EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */
> @@ -241,7 +242,8 @@ static struct event_constraint intel_neh
>        EVENT_CONSTRAINT_END
>  };
>
> -static struct event_constraint intel_gen_event_constraints[] = {
> +static struct event_constraint intel_gen_event_constraints[] =
> +{
>        EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
>        EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
>  };
> @@ -1310,6 +1312,13 @@ skip:
>        return 0;
>  }
>
> +static const struct pmu pmu;
> +
> +static inline bool is_x86_event(struct perf_event *event)
> +{
> +       return event->pmu == pmu;
> +}
> +
>  /*
>  * dogrp: true if must collect siblings events (group)
>  * returns total number of events and error code
> @@ -1324,7 +1333,7 @@ static int collect_events(struct cpu_hw_
>        /* current number of events already accepted */
>        n = cpuc->n_events;
>
> -       if (!is_software_event(leader)) {
> +       if (is_x86_event(leader)) {
>                if (n >= max_count)
>                        return -ENOSPC;
>                cpuc->event_list[n] = leader;
> @@ -1334,7 +1343,7 @@ static int collect_events(struct cpu_hw_
>                return n;
>
>        list_for_each_entry(event, &leader->sibling_list, group_entry) {
> -               if (is_software_event(event) ||
> +               if (!is_x86_event(event) ||
>                    event->state == PERF_EVENT_STATE_OFF)
>                        continue;
>
> @@ -2154,7 +2163,7 @@ static void event_sched_in(struct perf_e
>        event->state = PERF_EVENT_STATE_ACTIVE;
>        event->oncpu = cpu;
>        event->tstamp_running += event->ctx->time - event->tstamp_stopped;
> -       if (is_software_event(event))
> +       if (!is_x86_event(event))
>                event->pmu->enable(event);
>  }
>
> @@ -2209,7 +2218,7 @@ int hw_perf_group_sched_in(struct perf_e
>         * 1 means successful and events are active
>         * This is not quite true because we defer
>         * actual activation until hw_perf_enable() but
> -        * this way we* ensure caller won't try to enable
> +        * this way we ensure caller won't try to enable
>         * individual events
>         */
>        return 1;
>
>
>
>



--
Stephane Eranian | EMEA Software Engineering
Google France | 38 avenue de l'Opéra | 75002 Paris
Tel : +33 (0) 1 42 68 53 00
This email may be confidential or privileged. If you received this
communication by mistake, please
don't forward it to anyone else, please erase all copies and
attachments, and please let me know that
it went to the wrong person. Thanks

2010-01-13 16:30:14

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf_events: improve x86 event scheduling

On Wed, 2010-01-13 at 10:54 +0100, Stephane Eranian wrote:

> > One concern I do have is the loss of error checking on
> > event_sched_in()'s event->pmu->enable(), that could be another
> > 'hardware' PMU like breakpoints, in which case it could fail.
> >
> Well, x86_pmu_enable() does return an error code, so it is up
> to the upper layer to handle the error gracefully and in particular
> in perf_ctx_adjust_freq().

> +static void event_sched_in(struct perf_event *event, int cpu)
> +{
> + event->state = PERF_EVENT_STATE_ACTIVE;
> + event->oncpu = cpu;
> + event->tstamp_running += event->ctx->time - event->tstamp_stopped;
> + if (is_software_event(event))
> + event->pmu->enable(event);
> +}
> +
> +/*
> + * Called to enable a whole group of events.
> + * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
> + * Assumes the caller has disabled interrupts and has
> + * frozen the PMU with hw_perf_save_disable.
> + *
> + * called with PMU disabled. If successful and return value 1,
> + * then guaranteed to call perf_enable() and hw_perf_enable()
> + */
> +int hw_perf_group_sched_in(struct perf_event *leader,
> + struct perf_cpu_context *cpuctx,
> + struct perf_event_context *ctx, int cpu)
> +{
> + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
> + struct perf_event *sub;
> + int assign[X86_PMC_IDX_MAX];
> + int n, n0, ret;
> +
> + n0 = cpuc->n_events;
> +
> + n = collect_events(cpuc, leader, true);
> + if (n < 0)
> + return n;
> +
> + ret = x86_schedule_events(cpuc, n, assign);
> + if (ret)
> + return ret;
> +
> + /*
> + * copy new assignment, now we know it is possible
> + * will be used by hw_perf_enable()
> + */
> + memcpy(cpuc->assign, assign, n*sizeof(int));
> +
> + cpuc->n_events = n;
> + cpuc->n_added = n - n0;
> +
> + n = 1;
> + event_sched_in(leader, cpu);
> + list_for_each_entry(sub, &leader->sibling_list, group_entry) {
> + if (sub->state != PERF_EVENT_STATE_OFF) {
> + event_sched_in(sub, cpu);
> + ++n;
> + }
> + }
> + ctx->nr_active += n;
> +
> + /*
> + * 1 means successful and events are active
> + * This is not quite true because we defer
> + * actual activation until hw_perf_enable() but
> + * this way we* ensure caller won't try to enable
> + * individual events
> + */
> + return 1;
> +}

That most certainly looses error codes for all !is_x86_event() events in
the group.

So you either need to deal with that event_sched_in() failing, or
guarantee that it always succeeds (forcing software events only for
example).

2010-01-13 16:52:36

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf_events: improve x86 event scheduling

On Wed, Jan 13, 2010 at 5:29 PM, Peter Zijlstra <[email protected]> wrote:
> On Wed, 2010-01-13 at 10:54 +0100, Stephane Eranian wrote:
>
>> > One concern I do have is the loss of error checking on
>> > event_sched_in()'s event->pmu->enable(), that could be another
>> > 'hardware' PMU like breakpoints, in which case it could fail.
>> >
>> Well, x86_pmu_enable() does return an error code, so it is up
>> to the upper layer to handle the error gracefully and in particular
>> in perf_ctx_adjust_freq().
>
>> +static void event_sched_in(struct perf_event *event, int cpu)
>> +{
>> +       event->state = PERF_EVENT_STATE_ACTIVE;
>> +       event->oncpu = cpu;
>> +       event->tstamp_running += event->ctx->time - event->tstamp_stopped;
>> +       if (is_software_event(event))
>> +               event->pmu->enable(event);
>> +}
>> +
>> +/*
>> + * Called to enable a whole group of events.
>> + * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
>> + * Assumes the caller has disabled interrupts and has
>> + * frozen the PMU with hw_perf_save_disable.
>> + *
>> + * called with PMU disabled. If successful and return value 1,
>> + * then guaranteed to call perf_enable() and hw_perf_enable()
>> + */
>> +int hw_perf_group_sched_in(struct perf_event *leader,
>> +              struct perf_cpu_context *cpuctx,
>> +              struct perf_event_context *ctx, int cpu)
>> +{
>> +       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
>> +       struct perf_event *sub;
>> +       int assign[X86_PMC_IDX_MAX];
>> +       int n, n0, ret;
>> +
>> +       n0 = cpuc->n_events;
>> +
>> +       n = collect_events(cpuc, leader, true);
>> +       if (n < 0)
>> +               return n;
>> +
>> +       ret = x86_schedule_events(cpuc, n, assign);
>> +       if (ret)
>> +               return ret;
>> +
>> +       /*
>> +        * copy new assignment, now we know it is possible
>> +        * will be used by hw_perf_enable()
>> +        */
>> +       memcpy(cpuc->assign, assign, n*sizeof(int));
>> +
>> +       cpuc->n_events = n;
>> +       cpuc->n_added  = n - n0;
>> +
>> +       n = 1;
>> +       event_sched_in(leader, cpu);
>> +       list_for_each_entry(sub, &leader->sibling_list, group_entry) {
>> +               if (sub->state != PERF_EVENT_STATE_OFF) {
>> +                       event_sched_in(sub, cpu);
>> +                       ++n;
>> +               }
>> +       }
>> +       ctx->nr_active += n;
>> +
>> +       /*
>> +        * 1 means successful and events are active
>> +        * This is not quite true because we defer
>> +        * actual activation until hw_perf_enable() but
>> +        * this way we* ensure caller won't try to enable
>> +        * individual events
>> +        */
>> +       return 1;
>> +}
>
> That most certainly looses error codes for all !is_x86_event() events in
> the group.
>
> So you either need to deal with that event_sched_in() failing, or
> guarantee that it always succeeds (forcing software events only for
> example).
>
True, but that one can be fixed because it is only called from
hw_perf_group_sched_in() which returns an error code.

The same code would have to be fixed on PPC as well.

>



--
Stephane Eranian | EMEA Software Engineering
Google France | 38 avenue de l'Opéra | 75002 Paris
Tel : +33 (0) 1 42 68 53 00
This email may be confidential or privileged. If you received this
communication by mistake, please
don't forward it to anyone else, please erase all copies and
attachments, and please let me know that
it went to the wrong person. Thanks

2010-01-13 17:23:06

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf_events: improve x86 event scheduling

Ok,

Something like that should problably do it:

static void event_sched_out(struct perf_event *event, int cpu)
{
event->state = PERF_EVENT_STATE_INACTIVE;
event->oncpu = -1;
}

hw_perf_group_sched_in()
{
....
n = 1;
list_for_each_entry(sub, &leader->sibling_list, group_entry) {
if (sub->state > PERF_EVENT_STATE_OFF) {
ret = event_sched_in(sub, cpu);
if (ret)
goto undo;
++n;
}
}
/*
* copy new assignment, now we know it is possible
* will be used by hw_perf_enable()
*/
memcpy(cpuc->assign, assign, n*sizeof(int));

cpuc->n_events = n;
cpuc->n_added = n - n0;
ctx->nr_active += n;
/*
* 1 means successful and events are active
* This is not quite true because we defer
* actual activation until hw_perf_enable() but
* this way we* ensure caller won't try to enable
* individual events
*/
return 1;
undo:
event_sched_out(leader, cpu);
n0 = n;
n = 1;
list_for_each_entry(sub, &leader->sibling_list, group_entry) {
if (sub->state == PERF_EVENT_STATE_ACTIVE) {
event_sched_out(sub, cpu);
if (++n == n0)
break;
}
}
return ret;
}

Looking at the generic and PPC code, there are a few points which
are still unclear to me:

1. I believe that the check on attr_exclusive as is done in
perf_event.c:event_sched_in()
is missing from my patch and the PPC equivalent code.

if (event->attr.exclusive)
cpuctx->exclusive = 1;

So is the smp_wmb() and the accounting in cpuctx->active_oncpu.


2. the management of event->tstamp_stopped, tstamp_running

Looks like we may have to undo the update to stamp_running or
defer the update
until we know for sure, event_sched_in() succeeded for all events,
incl. software
events.


On Wed, Jan 13, 2010 at 5:52 PM, Stephane Eranian <[email protected]> wrote:
> On Wed, Jan 13, 2010 at 5:29 PM, Peter Zijlstra <[email protected]> wrote:
>> On Wed, 2010-01-13 at 10:54 +0100, Stephane Eranian wrote:
>>
>>> > One concern I do have is the loss of error checking on
>>> > event_sched_in()'s event->pmu->enable(), that could be another
>>> > 'hardware' PMU like breakpoints, in which case it could fail.
>>> >
>>> Well, x86_pmu_enable() does return an error code, so it is up
>>> to the upper layer to handle the error gracefully and in particular
>>> in perf_ctx_adjust_freq().
>>
>>> +static void event_sched_in(struct perf_event *event, int cpu)
>>> +{
>>> +       event->state = PERF_EVENT_STATE_ACTIVE;
>>> +       event->oncpu = cpu;
>>> +       event->tstamp_running += event->ctx->time - event->tstamp_stopped;
>>> +       if (is_software_event(event))
>>> +               event->pmu->enable(event);
>>> +}
>>> +
>>> +/*
>>> + * Called to enable a whole group of events.
>>> + * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
>>> + * Assumes the caller has disabled interrupts and has
>>> + * frozen the PMU with hw_perf_save_disable.
>>> + *
>>> + * called with PMU disabled. If successful and return value 1,
>>> + * then guaranteed to call perf_enable() and hw_perf_enable()
>>> + */
>>> +int hw_perf_group_sched_in(struct perf_event *leader,
>>> +              struct perf_cpu_context *cpuctx,
>>> +              struct perf_event_context *ctx, int cpu)
>>> +{
>>> +       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
>>> +       struct perf_event *sub;
>>> +       int assign[X86_PMC_IDX_MAX];
>>> +       int n, n0, ret;
>>> +
>>> +       n0 = cpuc->n_events;
>>> +
>>> +       n = collect_events(cpuc, leader, true);
>>> +       if (n < 0)
>>> +               return n;
>>> +
>>> +       ret = x86_schedule_events(cpuc, n, assign);
>>> +       if (ret)
>>> +               return ret;
>>> +
>>> +       /*
>>> +        * copy new assignment, now we know it is possible
>>> +        * will be used by hw_perf_enable()
>>> +        */
>>> +       memcpy(cpuc->assign, assign, n*sizeof(int));
>>> +
>>> +       cpuc->n_events = n;
>>> +       cpuc->n_added  = n - n0;
>>> +
>>> +       n = 1;
>>> +       event_sched_in(leader, cpu);
>>> +       list_for_each_entry(sub, &leader->sibling_list, group_entry) {
>>> +               if (sub->state != PERF_EVENT_STATE_OFF) {
>>> +                       event_sched_in(sub, cpu);
>>> +                       ++n;
>>> +               }
>>> +       }
>>> +       ctx->nr_active += n;
>>> +
>>> +       /*
>>> +        * 1 means successful and events are active
>>> +        * This is not quite true because we defer
>>> +        * actual activation until hw_perf_enable() but
>>> +        * this way we* ensure caller won't try to enable
>>> +        * individual events
>>> +        */
>>> +       return 1;
>>> +}
>>
>> That most certainly looses error codes for all !is_x86_event() events in
>> the group.
>>
>> So you either need to deal with that event_sched_in() failing, or
>> guarantee that it always succeeds (forcing software events only for
>> example).
>>
> True, but that one can be fixed because it is only called from
> hw_perf_group_sched_in() which returns an error code.
>
> The same code would have to be fixed on PPC as well.
>
>>
>
>
>
> --
> Stephane Eranian  | EMEA Software Engineering
> Google France | 38 avenue de l'Opéra | 75002 Paris
> Tel : +33 (0) 1 42 68 53 00
> This email may be confidential or privileged. If you received this
> communication by mistake, please
> don't forward it to anyone else, please erase all copies and
> attachments, and please let me know that
> it went to the wrong person. Thanks
>



--
Stephane Eranian | EMEA Software Engineering
Google France | 38 avenue de l'Opéra | 75002 Paris
Tel : +33 (0) 1 42 68 53 00
This email may be confidential or privileged. If you received this
communication by mistake, please
don't forward it to anyone else, please erase all copies and
attachments, and please let me know that
it went to the wrong person. Thanks

2010-01-17 14:12:42

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] perf_events: improve x86 event scheduling

On Wed, Jan 13, 2010 at 06:22:54PM +0100, Stephane Eranian wrote:
> Ok,
>
> Something like that should problably do it:
>
> static void event_sched_out(struct perf_event *event, int cpu)
> {
> event->state = PERF_EVENT_STATE_INACTIVE;
> event->oncpu = -1;
> }



You need to also call pmu->disable() if it is a software event,
because a breakpoint needs to be unregistered in hardware level
too.

And disable it in x86 level if it is an x86 event?



> hw_perf_group_sched_in()
> {
> ....
> n = 1;
> list_for_each_entry(sub, &leader->sibling_list, group_entry) {
> if (sub->state > PERF_EVENT_STATE_OFF) {
> ret = event_sched_in(sub, cpu);
> if (ret)
> goto undo;



Yeah we indeed really need to check that.

Thanks.

2010-01-17 14:42:24

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf_events: improve x86 event scheduling

Frederic,


Here is what I have now in the x86 code.

As for your comment on disabling the x86 event, we don't
need to do this because it is not actually activated yet when
we return from hw_perf_group_sched_in(). Activation occurs
really in hw_perf_enable().


static int x86_event_sched_in(struct perf_event *event,
struct perf_cpu_context *cpuctx, int cpu)
{
int ret = 0;

event->state = PERF_EVENT_STATE_ACTIVE;
event->oncpu = cpu;
event->tstamp_running += event->ctx->time - event->tstamp_stopped;

if (is_software_event(event))
ret = event->pmu->enable(event);

if (!ret && !is_software_event(event))
cpuctx->active_oncpu++;

if (!ret && event->attr.exclusive)
cpuctx->exclusive = 1;

return ret;
}

static void x86_event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx, int cpu)
{
event->state = PERF_EVENT_STATE_INACTIVE;
event->oncpu = -1;

event->tstamp_running -= event->ctx->time - event->tstamp_stopped;

if (is_software_event(event))
event->pmu->disable(event);

if (!is_software_event(event))
cpuctx->active_oncpu--;

if (event->attr.exclusive || !cpuctx->active_oncpu)
cpuctx->exclusive = 0;
}


On Sun, Jan 17, 2010 at 3:12 PM, Frederic Weisbecker <[email protected]> wrote:
> On Wed, Jan 13, 2010 at 06:22:54PM +0100, Stephane Eranian wrote:
>> Ok,
>>
>> Something like that should problably do it:
>>
>> static void event_sched_out(struct perf_event *event, int cpu)
>> {
>>         event->state = PERF_EVENT_STATE_INACTIVE;
>>         event->oncpu = -1;
>> }
>
>
>
> You need to also call pmu->disable() if it is a software event,
> because a breakpoint needs to be unregistered in hardware level
> too.
>
> And disable it in x86 level if it is an x86 event?
>
>
>
>> hw_perf_group_sched_in()
>> {
>>        ....
>>        n = 1;
>>         list_for_each_entry(sub, &leader->sibling_list, group_entry) {
>>                 if (sub->state > PERF_EVENT_STATE_OFF) {
>>                         ret = event_sched_in(sub, cpu);
>>                         if (ret)
>>                                 goto undo;
>
>
>
> Yeah we indeed really need to check that.
>
> Thanks.
>
>



--
Stephane Eranian | EMEA Software Engineering
Google France | 38 avenue de l'Opéra | 75002 Paris
Tel : +33 (0) 1 42 68 53 00
This email may be confidential or privileged. If you received this
communication by mistake, please
don't forward it to anyone else, please erase all copies and
attachments, and please let me know that
it went to the wrong person. Thanks

2010-01-17 16:19:17

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] perf_events: improve x86 event scheduling

On Sun, Jan 17, 2010 at 03:42:16PM +0100, Stephane Eranian wrote:
> Frederic,
>
>
> Here is what I have now in the x86 code.
>
> As for your comment on disabling the x86 event, we don't
> need to do this because it is not actually activated yet when
> we return from hw_perf_group_sched_in(). Activation occurs
> really in hw_perf_enable().


Ah, indeed.


>
>
> static int x86_event_sched_in(struct perf_event *event,
> struct perf_cpu_context *cpuctx, int cpu)
> {
> int ret = 0;
>
> event->state = PERF_EVENT_STATE_ACTIVE;
> event->oncpu = cpu;
> event->tstamp_running += event->ctx->time - event->tstamp_stopped;
>
> if (is_software_event(event))
> ret = event->pmu->enable(event);
>
> if (!ret && !is_software_event(event))
> cpuctx->active_oncpu++;
>
> if (!ret && event->attr.exclusive)
> cpuctx->exclusive = 1;
>
> return ret;
> }
>
> static void x86_event_sched_out(struct perf_event *event,
> struct perf_cpu_context *cpuctx, int cpu)
> {
> event->state = PERF_EVENT_STATE_INACTIVE;
> event->oncpu = -1;
>
> event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
>
> if (is_software_event(event))
> event->pmu->disable(event);
>
> if (!is_software_event(event))
> cpuctx->active_oncpu--;
>
> if (event->attr.exclusive || !cpuctx->active_oncpu)
> cpuctx->exclusive = 0;
> }



Yeah looks good.

Thanks.

2010-01-17 21:53:53

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf_events: improve x86 event scheduling

hi,

Will repost a new version of the patch with these changes.

On Sun, Jan 17, 2010 at 5:19 PM, Frederic Weisbecker <[email protected]> wrote:
> On Sun, Jan 17, 2010 at 03:42:16PM +0100, Stephane Eranian wrote:
>> Frederic,
>>
>>
>> Here is what I have now in the x86 code.
>>
>> As for your comment on disabling the x86 event, we don't
>> need to do this because it is not actually activated yet when
>> we return from hw_perf_group_sched_in(). Activation occurs
>> really in hw_perf_enable().
>
>
> Ah, indeed.
>
>
>>
>>
>> static int x86_event_sched_in(struct perf_event *event,
>>                           struct perf_cpu_context *cpuctx, int cpu)
>> {
>>         int ret = 0;
>>
>>         event->state = PERF_EVENT_STATE_ACTIVE;
>>         event->oncpu = cpu;
>>         event->tstamp_running += event->ctx->time - event->tstamp_stopped;
>>
>>         if (is_software_event(event))
>>                 ret = event->pmu->enable(event);
>>
>>         if (!ret && !is_software_event(event))
>>                 cpuctx->active_oncpu++;
>>
>>         if (!ret && event->attr.exclusive)
>>                 cpuctx->exclusive = 1;
>>
>>         return ret;
>> }
>>
>> static void x86_event_sched_out(struct perf_event *event,
>>                             struct perf_cpu_context *cpuctx, int cpu)
>> {
>>         event->state = PERF_EVENT_STATE_INACTIVE;
>>         event->oncpu = -1;
>>
>>         event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
>>
>>         if (is_software_event(event))
>>                 event->pmu->disable(event);
>>
>>         if (!is_software_event(event))
>>                 cpuctx->active_oncpu--;
>>
>>         if (event->attr.exclusive || !cpuctx->active_oncpu)
>>                 cpuctx->exclusive = 0;
>> }
>
>
>
> Yeah looks good.
>
> Thanks.
>
>



--
Stephane Eranian | EMEA Software Engineering
Google France | 38 avenue de l'Opéra | 75002 Paris
Tel : +33 (0) 1 42 68 53 00
This email may be confidential or privileged. If you received this
communication by mistake, please
don't forward it to anyone else, please erase all copies and
attachments, and please let me know that
it went to the wrong person. Thanks

2010-01-18 11:13:40

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf_events: improve x86 event scheduling

On Sun, 2010-01-17 at 15:12 +0100, Frederic Weisbecker wrote:

> You need to also call pmu->disable() if it is a software event,
> because a breakpoint needs to be unregistered in hardware level
> too.

breakpoint isn't a software pmu. But yeah, enable and disable need to
match.

2010-01-18 11:54:01

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH] perf: fix the is_software_event() definition

On Mon, 2010-01-18 at 12:13 +0100, Peter Zijlstra wrote:
> On Sun, 2010-01-17 at 15:12 +0100, Frederic Weisbecker wrote:
>
> > You need to also call pmu->disable() if it is a software event,
> > because a breakpoint needs to be unregistered in hardware level
> > too.
>
> breakpoint isn't a software pmu. But yeah, enable and disable need to
> match.

That is, it shouldn't be a software pmu, because we assume software
events can always be scheduled, whereas that's definitely not so for the
breakpoint one.

Which seems to suggest the following

---
Subject: perf: fix the is_software_event() definition

When adding the breakpoint pmu Frederic forgot to exclude it from being
a software event. While we're at it, make it an inclusive expression.

Signed-off-by: Peter Zijlstra <[email protected]>
---
include/linux/perf_event.h | 10 +++++++---
1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c66b34f..835ba26 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -814,9 +814,13 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
*/
static inline int is_software_event(struct perf_event *event)
{
- return (event->attr.type != PERF_TYPE_RAW) &&
- (event->attr.type != PERF_TYPE_HARDWARE) &&
- (event->attr.type != PERF_TYPE_HW_CACHE);
+ switch (event->attr.type) {
+ case PERF_TYPE_SOFTWARE:
+ case PERF_TYPE_TRACEPOINT:
+ case PERF_TYPE_HW_CACHE:
+ return 1;
+ }
+ return 0;
}

extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];

2010-01-18 12:07:55

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] perf: fix the is_software_event() definition

On Mon, Jan 18, 2010 at 12:53:36PM +0100, Peter Zijlstra wrote:
> On Mon, 2010-01-18 at 12:13 +0100, Peter Zijlstra wrote:
> > On Sun, 2010-01-17 at 15:12 +0100, Frederic Weisbecker wrote:
> >
> > > You need to also call pmu->disable() if it is a software event,
> > > because a breakpoint needs to be unregistered in hardware level
> > > too.
> >
> > breakpoint isn't a software pmu. But yeah, enable and disable need to
> > match.
>
> That is, it shouldn't be a software pmu, because we assume software
> events can always be scheduled, whereas that's definitely not so for the
> breakpoint one.
>
> Which seems to suggest the following
>
> ---
> Subject: perf: fix the is_software_event() definition
>
> When adding the breakpoint pmu Frederic forgot to exclude it from being
> a software event. While we're at it, make it an inclusive expression.
>
> Signed-off-by: Peter Zijlstra <[email protected]>



Agreed.

But then Stephane will need to update his patch and use
something else than is_software_event() to guess if an event
needs its pmu->enable/disable to be called.

A kind of helper that can tell: I am not handled by
hw_perf_group_sched_in()

But I suck too much in naming to propose something sane :)

2010-01-18 12:19:40

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf: fix the is_software_event() definition

On Mon, 2010-01-18 at 13:07 +0100, Frederic Weisbecker wrote:

> Agreed.
>
> But then Stephane will need to update his patch and use
> something else than is_software_event() to guess if an event
> needs its pmu->enable/disable to be called.

Yes, that's what I told him before and even send a patch for, the name I
chose was is_x86_event().

http://lkml.org/lkml/2010/1/12/154


2010-01-18 12:53:45

by Stephane Eranian

[permalink] [raw]
Subject: Re: [perfmon2] [PATCH] perf: fix the is_software_event() definition

On Mon, Jan 18, 2010 at 12:53 PM, Peter Zijlstra <[email protected]> wrote:
> On Mon, 2010-01-18 at 12:13 +0100, Peter Zijlstra wrote:
>> On Sun, 2010-01-17 at 15:12 +0100, Frederic Weisbecker wrote:
>>
>> > You need to also call pmu->disable() if it is a software event,
>> > because a breakpoint needs to be unregistered in hardware level
>> > too.
>>
>> breakpoint isn't a software pmu. But yeah, enable and disable need to
>> match.
>
> That is, it shouldn't be a software pmu, because we assume software
> events can always be scheduled, whereas that's definitely not so for the
> breakpoint one.
>
> Which seems to suggest the following
>
> ---
> Subject: perf: fix the is_software_event() definition
>
> When adding the breakpoint pmu Frederic forgot to exclude it from being
> a software event. While we're at it, make it an inclusive expression.
>
> Signed-off-by: Peter Zijlstra <[email protected]>
> ---
>  include/linux/perf_event.h |   10 +++++++---
>  1 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index c66b34f..835ba26 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -814,9 +814,13 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
>  */
>  static inline int is_software_event(struct perf_event *event)
>  {
> -       return (event->attr.type != PERF_TYPE_RAW) &&
> -               (event->attr.type != PERF_TYPE_HARDWARE) &&
> -               (event->attr.type != PERF_TYPE_HW_CACHE);
> +       switch (event->attr.type) {
> +       case PERF_TYPE_SOFTWARE:
> +       case PERF_TYPE_TRACEPOINT:
> +       case PERF_TYPE_HW_CACHE:
> +               return 1;
> +       }
> +       return 0;
>  }
>
>  extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
>
PERF_TYPE_HW_CACHE is a hardware PMU event.


>
>
> ------------------------------------------------------------------------------
> Throughout its 18-year history, RSA Conference consistently attracts the
> world's best and brightest in the field, creating opportunities for Conference
> attendees to learn about information security's most important issues through
> interactions with peers, luminaries and emerging and established companies.
> http://p.sf.net/sfu/rsaconf-dev2dev
> _______________________________________________
> perfmon2-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/perfmon2-devel
>

2010-01-18 12:57:36

by Stephane Eranian

[permalink] [raw]
Subject: Re: [perfmon2] [PATCH] perf: fix the is_software_event() definition

On Mon, Jan 18, 2010 at 1:07 PM, Frederic Weisbecker <[email protected]> wrote:
> On Mon, Jan 18, 2010 at 12:53:36PM +0100, Peter Zijlstra wrote:
>> On Mon, 2010-01-18 at 12:13 +0100, Peter Zijlstra wrote:
>> > On Sun, 2010-01-17 at 15:12 +0100, Frederic Weisbecker wrote:
>> >
>> > > You need to also call pmu->disable() if it is a software event,
>> > > because a breakpoint needs to be unregistered in hardware level
>> > > too.
>> >
>> > breakpoint isn't a software pmu. But yeah, enable and disable need to
>> > match.
>>
>> That is, it shouldn't be a software pmu, because we assume software
>> events can always be scheduled, whereas that's definitely not so for the
>> breakpoint one.
>>
>> Which seems to suggest the following
>>
>> ---
>> Subject: perf: fix the is_software_event() definition
>>
>> When adding the breakpoint pmu Frederic forgot to exclude it from being
>> a software event. While we're at it, make it an inclusive expression.
>>
>> Signed-off-by: Peter Zijlstra <[email protected]>
>
>
>
> Agreed.
>
> But then Stephane will need to update his patch and use
> something else than is_software_event() to guess if an event
> needs its pmu->enable/disable to be called.
>
> A kind of helper that can tell: I am not handled by
> hw_perf_group_sched_in()
>
Then, we should use something that checks if the event
is handled by the X86 PMU layer:

int is_x86_hw_event(struct perf_event *event)
{
return event->pmu == x86_pmu;
}

2010-01-18 13:01:17

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [perfmon2] [PATCH] perf: fix the is_software_event() definition

On Mon, 2010-01-18 at 13:53 +0100, stephane eranian wrote:

> PERF_TYPE_HW_CACHE is a hardware PMU event.

D'0h!

2010-01-18 13:06:34

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [perfmon2] [PATCH] perf: fix the is_software_event() definition

2010/1/18 stephane eranian <[email protected]>:
> On Mon, Jan 18, 2010 at 1:07 PM, Frederic Weisbecker <[email protected]> wrote:
>> On Mon, Jan 18, 2010 at 12:53:36PM +0100, Peter Zijlstra wrote:
>>> On Mon, 2010-01-18 at 12:13 +0100, Peter Zijlstra wrote:
>>> > On Sun, 2010-01-17 at 15:12 +0100, Frederic Weisbecker wrote:
>>> >
>>> > > You need to also call pmu->disable() if it is a software event,
>>> > > because a breakpoint needs to be unregistered in hardware level
>>> > > too.
>>> >
>>> > breakpoint isn't a software pmu. But yeah, enable and disable need to
>>> > match.
>>>
>>> That is, it shouldn't be a software pmu, because we assume software
>>> events can always be scheduled, whereas that's definitely not so for the
>>> breakpoint one.
>>>
>>> Which seems to suggest the following
>>>
>>> ---
>>> Subject: perf: fix the is_software_event() definition
>>>
>>> When adding the breakpoint pmu Frederic forgot to exclude it from being
>>> a software event. While we're at it, make it an inclusive expression.
>>>
>>> Signed-off-by: Peter Zijlstra <[email protected]>
>>
>>
>>
>> Agreed.
>>
>> But then Stephane will need to update his patch and use
>> something else than is_software_event() to guess if an event
>> needs its pmu->enable/disable to be called.
>>
>> A kind of helper that can tell: I am not handled by
>> hw_perf_group_sched_in()
>>
> Then, we should use something that checks if the event
> is handled by the X86 PMU layer:
>
> int is_x86_hw_event(struct perf_event *event)
> {
> ? return event->pmu == x86_pmu;
> }
>

Yeah. I missed this patch from Peter in its answer. Looks good.

2010-01-18 13:46:25

by Stephane Eranian

[permalink] [raw]
Subject: Re: [perfmon2] [PATCH] perf: fix the is_software_event() definition

On Mon, Jan 18, 2010 at 1:19 PM, Peter Zijlstra <[email protected]> wrote:
> On Mon, 2010-01-18 at 13:07 +0100, Frederic Weisbecker wrote:
>
>> Agreed.
>>
>> But then Stephane will need to update his patch and use
>> something else than is_software_event() to guess if an event
>> needs its pmu->enable/disable to be called.
>
> Yes, that's what I told him before and even send a patch for, the name I
> chose was is_x86_event().
>
Yes, I remember that. I will modify the patch to use a function like that.
I think the PPC code would need to be updated similarly.