2012-06-27 07:09:52

by Yan, Zheng

[permalink] [raw]
Subject: [PATCH 1/2] perf/x86: Use 0xff as pseudo code for fixed uncore event

From: "Yan, Zheng" <[email protected]>

Stephane Eranian suggestted using 0xff as pseudo code for fixed
uncore event and using the umask value to determine which of the
fixed events we want to map to. So far there is at most one fixed
counter in a uncore PMU. So just change the definition of
UNCORE_FIXED_EVENT to 0xff.

Signed-off-by: Yan, Zheng <[email protected]>
---
arch/x86/kernel/cpu/perf_event_intel_uncore.c | 4 ++--
arch/x86/kernel/cpu/perf_event_intel_uncore.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 6f43f95..8ca0f8f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -179,7 +179,7 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = {
};

static struct uncore_event_desc snbep_uncore_imc_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0xff"),
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff"),
INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
{ /* end: all zeroes */ },
@@ -616,7 +616,7 @@ static struct attribute_group nhm_uncore_format_group = {
};

static struct uncore_event_desc nhm_uncore_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0xff"),
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff"),
INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"),
INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"),
INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"),
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 4d52db0..88498c7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -9,7 +9,7 @@

#define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC)

-#define UNCORE_FIXED_EVENT 0xffff
+#define UNCORE_FIXED_EVENT 0xff
#define UNCORE_PMC_IDX_MAX_GENERIC 8
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
--
1.7.10.2


2012-06-27 07:09:55

by Yan, Zheng

[permalink] [raw]
Subject: [PATCH 2/2] perf/x86: Uncore Filter support for SandyBridge-EP

From: "Yan, Zheng" <[email protected]>

This patch adds C-Box and PCU filter support for SandyBridge-EP
uncore. We can filter C-Box events by thread/core ID and filter
PCU events by frequency/voltage.

Signed-off-by: Yan, Zheng <[email protected]>
---
arch/x86/kernel/cpu/perf_event_intel_uncore.c | 233 ++++++++++++++++++++-----
arch/x86/kernel/cpu/perf_event_intel_uncore.h | 24 ++-
2 files changed, 211 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 8ca0f8f..70a27ac 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -14,10 +14,13 @@ static cpumask_t uncore_cpu_mask;
/* constraint for the fixed counter */
static struct event_constraint constraint_fixed =
EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
+static struct event_constraint constraint_empty =
+ EVENT_CONSTRAINT(0, 0, 0);

DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
@@ -26,8 +29,19 @@ DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31");

/* Sandy Bridge-EP uncore support */
+static struct intel_uncore_type snbep_uncore_cbox;
+static struct intel_uncore_type snbep_uncore_pcu;
+
static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
@@ -120,6 +134,10 @@ static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box,
struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE)
+ wrmsrl(reg1->reg, reg1->config);

wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
}
@@ -149,6 +167,71 @@ static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
}

+static struct event_constraint *
+snbep_uncore_get_constraint(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ unsigned long flags;
+ bool ok = false;
+
+ if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc))
+ return NULL;
+
+ er = &box->shared_regs[reg1->idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (!atomic_read(&er->ref) || er->config1 == reg1->config) {
+ atomic_inc(&er->ref);
+ er->config1 = reg1->config;
+ ok = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ if (ok) {
+ if (box->phys_id >= 0)
+ reg1->alloc = 1;
+ return NULL;
+ }
+ return &constraint_empty;
+}
+
+static void snbep_uncore_put_constraint(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+
+ if (box->phys_id < 0 || !reg1->alloc)
+ return;
+
+ er = &box->shared_regs[reg1->idx];
+ atomic_sub(1, &er->ref);
+ reg1->alloc = 0;
+}
+
+static int snbep_uncore_hw_config(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (box->pmu->type == &snbep_uncore_cbox) {
+ reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+ SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 &
+ SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK;
+ } else if (box->pmu->type == &snbep_uncore_pcu) {
+ reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
+ reg1->config = event->attr.config1 &
+ SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK;
+ } else {
+ return 0;
+ }
+ reg1->idx = 0;
+ return 0;
+}
+
static struct attribute *snbep_uncore_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -167,6 +250,20 @@ static struct attribute *snbep_uncore_ubox_formats_attr[] = {
NULL,
};

+static struct attribute *snbep_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid.attr,
+ &format_attr_filter_nid.attr,
+ &format_attr_filter_state.attr,
+ &format_attr_filter_opc.attr,
+ NULL,
+};
+
static struct attribute *snbep_uncore_pcu_formats_attr[] = {
&format_attr_event.attr,
&format_attr_occ_sel.attr,
@@ -175,6 +272,10 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = {
&format_attr_thresh5.attr,
&format_attr_occ_invert.attr,
&format_attr_occ_edge.attr,
+ &format_attr_filter_brand0.attr,
+ &format_attr_filter_brand1.attr,
+ &format_attr_filter_brand2.attr,
+ &format_attr_filter_brand3.attr,
NULL,
};

@@ -203,6 +304,11 @@ static struct attribute_group snbep_uncore_ubox_format_group = {
.attrs = snbep_uncore_ubox_formats_attr,
};

+static struct attribute_group snbep_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_cbox_formats_attr,
+};
+
static struct attribute_group snbep_uncore_pcu_format_group = {
.name = "format",
.attrs = snbep_uncore_pcu_formats_attr,
@@ -215,6 +321,9 @@ static struct intel_uncore_ops snbep_uncore_msr_ops = {
.disable_event = snbep_uncore_msr_disable_event,
.enable_event = snbep_uncore_msr_enable_event,
.read_counter = snbep_uncore_msr_read_counter,
+ .get_constraint = snbep_uncore_get_constraint,
+ .put_constraint = snbep_uncore_put_constraint,
+ .hw_config = snbep_uncore_hw_config,
};

static struct intel_uncore_ops snbep_uncore_pci_ops = {
@@ -307,31 +416,33 @@ static struct intel_uncore_type snbep_uncore_ubox = {
};

static struct intel_uncore_type snbep_uncore_cbox = {
- .name = "cbox",
- .num_counters = 4,
- .num_boxes = 8,
- .perf_ctr_bits = 44,
- .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
- .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
- .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
- .msr_offset = SNBEP_CBO_MSR_OFFSET,
- .constraints = snbep_uncore_cbox_constraints,
- .ops = &snbep_uncore_msr_ops,
- .format_group = &snbep_uncore_format_group,
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 44,
+ .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
+ .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = SNBEP_CBO_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = snbep_uncore_cbox_constraints,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &snbep_uncore_cbox_format_group,
};

static struct intel_uncore_type snbep_uncore_pcu = {
- .name = "pcu",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
- .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
- .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
- .ops = &snbep_uncore_msr_ops,
- .format_group = &snbep_uncore_pcu_format_group,
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &snbep_uncore_pcu_format_group,
};

static struct intel_uncore_type *snbep_msr_uncores[] = {
@@ -747,15 +858,27 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
box->hrtimer.function = uncore_pmu_hrtimer;
}

-struct intel_uncore_box *uncore_alloc_box(int cpu)
+struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
+ int cpu)
{
struct intel_uncore_box *box;
+ int size = sizeof(*box);
+
+ if (type->num_shared_regs)
+ size += type->num_shared_regs *
+ sizeof(struct intel_uncore_extra_reg);

- box = kmalloc_node(sizeof(*box), GFP_KERNEL | __GFP_ZERO,
- cpu_to_node(cpu));
+ box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
if (!box)
return NULL;

+ if (type->num_shared_regs) {
+ int i;
+ box->shared_regs = (struct intel_uncore_extra_reg *)(box + 1);
+ for (i = 0; i < type->num_shared_regs; i++)
+ raw_spin_lock_init(&box->shared_regs[i].lock);
+ }
+
uncore_pmu_init_hrtimer(box);
atomic_set(&box->refcnt, 1);
box->cpu = -1;
@@ -834,11 +957,18 @@ static int uncore_collect_events(struct intel_uncore_box *box,
}

static struct event_constraint *
-uncore_event_constraint(struct intel_uncore_type *type,
- struct perf_event *event)
+uncore_get_event_constraint(struct intel_uncore_box *box,
+ struct perf_event *event)
{
+ struct intel_uncore_type *type = box->pmu->type;
struct event_constraint *c;

+ if (type->ops->get_constraint) {
+ c = type->ops->get_constraint(box, event);
+ if (c)
+ return c;
+ }
+
if (event->hw.config == ~0ULL)
return &constraint_fixed;

@@ -852,19 +982,25 @@ uncore_event_constraint(struct intel_uncore_type *type,
return &type->unconstrainted;
}

+static void uncore_put_event_constraint(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ if (box->pmu->type->ops->put_constraint)
+ box->pmu->type->ops->put_constraint(box, event);
+}
+
static int uncore_assign_events(struct intel_uncore_box *box,
int assign[], int n)
{
unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
- int i, ret, wmin, wmax;
+ int i, wmin, wmax, ret = 0;
struct hw_perf_event *hwc;

bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);

for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- c = uncore_event_constraint(box->pmu->type,
- box->event_list[i]);
+ c = uncore_get_event_constraint(box, box->event_list[i]);
constraints[i] = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
@@ -888,13 +1024,17 @@ static int uncore_assign_events(struct intel_uncore_box *box,
break;

__set_bit(hwc->idx, used_mask);
- assign[i] = hwc->idx;
+ if (assign)
+ assign[i] = hwc->idx;
}
- if (i == n)
- return 0;
-
/* slow path */
- ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+ if (i != n)
+ ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+
+ if (!assign || ret) {
+ for (i = 0; i < n; i++)
+ uncore_put_event_constraint(box, box->event_list[i]);
+ }
return ret ? -EINVAL : 0;
}

@@ -1021,6 +1161,8 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags)

for (i = 0; i < box->n_events; i++) {
if (event == box->event_list[i]) {
+ uncore_put_event_constraint(box, event);
+
while (++i < box->n_events)
box->event_list[i - 1] = box->event_list[i];

@@ -1048,10 +1190,9 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
{
struct perf_event *leader = event->group_leader;
struct intel_uncore_box *fake_box;
- int assign[UNCORE_PMC_IDX_MAX];
int ret = -EINVAL, n;

- fake_box = uncore_alloc_box(smp_processor_id());
+ fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
if (!fake_box)
return -ENOMEM;

@@ -1073,7 +1214,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,

fake_box->n_events = n;

- ret = uncore_assign_events(fake_box, assign, n);
+ ret = uncore_assign_events(fake_box, NULL, n);
out:
kfree(fake_box);
return ret;
@@ -1117,6 +1258,10 @@ int uncore_pmu_event_init(struct perf_event *event)
return -EINVAL;
event->cpu = box->cpu;

+ event->hw.idx = -1;
+ event->hw.last_tag = ~0ULL;
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+
if (event->attr.config == UNCORE_FIXED_EVENT) {
/* no fixed counter */
if (!pmu->type->fixed_ctl)
@@ -1130,11 +1275,13 @@ int uncore_pmu_event_init(struct perf_event *event)
hwc->config = ~0ULL;
} else {
hwc->config = event->attr.config & pmu->type->event_mask;
+ if (pmu->type->ops->hw_config) {
+ ret = pmu->type->ops->hw_config(box, event);
+ if (ret)
+ return ret;
+ }
}

- event->hw.idx = -1;
- event->hw.last_tag = ~0ULL;
-
if (event->group_leader != event)
ret = uncore_validate_group(pmu, event);
else
@@ -1276,7 +1423,7 @@ static int __devinit uncore_pci_add(struct intel_uncore_type *type,
if (phys_id < 0)
return -ENODEV;

- box = uncore_alloc_box(0);
+ box = uncore_alloc_box(type, 0);
if (!box)
return -ENOMEM;

@@ -1458,7 +1605,7 @@ static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
if (pmu->func_id < 0)
pmu->func_id = j;

- box = uncore_alloc_box(cpu);
+ box = uncore_alloc_box(type, cpu);
if (!box)
return -ENOMEM;

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 88498c7..534585a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -97,6 +97,10 @@
SNBEP_PMON_CTL_INVERT | \
SNBEP_U_MSR_PMON_CTL_TRESH_MASK)

+#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19)
+#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+
/* SNB-EP PCU event control */
#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000
#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000
@@ -140,15 +144,17 @@
/* SNB-EP Cbo register */
#define SNBEP_C0_MSR_PMON_CTR0 0xd16
#define SNBEP_C0_MSR_PMON_CTL0 0xd10
-#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14
#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04
+#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK 0xfffffc1f
#define SNBEP_CBO_MSR_OFFSET 0x20

/* SNB-EP PCU register */
#define SNBEP_PCU_MSR_PMON_CTR0 0xc36
#define SNBEP_PCU_MSR_PMON_CTL0 0xc30
-#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34
#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff
#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc
#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd

@@ -163,7 +169,6 @@ struct intel_uncore_type {
int num_boxes;
int perf_ctr_bits;
int fixed_ctr_bits;
- int single_fixed;
unsigned perf_ctr;
unsigned event_ctl;
unsigned event_mask;
@@ -171,6 +176,8 @@ struct intel_uncore_type {
unsigned fixed_ctl;
unsigned box_ctl;
unsigned msr_offset;
+ unsigned num_shared_regs:8;
+ unsigned single_fixed:1;
struct event_constraint unconstrainted;
struct event_constraint *constraints;
struct intel_uncore_pmu *pmus;
@@ -188,6 +195,10 @@ struct intel_uncore_ops {
void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
+ int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
+ struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
+ struct perf_event *);
+ void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
};

struct intel_uncore_pmu {
@@ -200,6 +211,12 @@ struct intel_uncore_pmu {
struct list_head box_list;
};

+struct intel_uncore_extra_reg {
+ raw_spinlock_t lock;
+ u64 config1;
+ atomic_t ref;
+};
+
struct intel_uncore_box {
int phys_id;
int n_active; /* number of active events */
@@ -211,6 +228,7 @@ struct intel_uncore_box {
struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
u64 tags[UNCORE_PMC_IDX_MAX];
+ struct intel_uncore_extra_reg *shared_regs;
struct pci_dev *pci_dev;
struct intel_uncore_pmu *pmu;
struct hrtimer hrtimer;
--
1.7.10.2

2012-06-27 09:42:59

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 2/2] perf/x86: Uncore Filter support for SandyBridge-EP

On Wed, 2012-06-27 at 15:09 +0800, Yan, Zheng wrote:
> +static void snbep_uncore_put_constraint(struct intel_uncore_box *box,
> + struct perf_event *event)
> +{
> + struct intel_uncore_extra_reg *er;
> + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
> +
> + if (box->phys_id < 0 || !reg1->alloc)
> + return;
> +
> + er = &box->shared_regs[reg1->idx];
> + atomic_sub(1, &er->ref);

We have atomic_dec() for this ;-)

> + reg1->alloc = 0;
> +}

2012-06-27 10:01:37

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 2/2] perf/x86: Uncore Filter support for SandyBridge-EP

On Wed, 2012-06-27 at 15:09 +0800, Yan, Zheng wrote:
> +struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
> + int cpu)
> {
> struct intel_uncore_box *box;
> + int size = sizeof(*box);
> +
> + if (type->num_shared_regs)
> + size += type->num_shared_regs *
> + sizeof(struct intel_uncore_extra_reg);
>
> - box = kmalloc_node(sizeof(*box), GFP_KERNEL | __GFP_ZERO,
> - cpu_to_node(cpu));
> + box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
> if (!box)
> return NULL;
>
> + if (type->num_shared_regs) {
> + int i;
> + box->shared_regs = (struct intel_uncore_extra_reg *)(box + 1);
> + for (i = 0; i < type->num_shared_regs; i++)
> + raw_spin_lock_init(&box->shared_regs[i].lock);
> + }
> +
> uncore_pmu_init_hrtimer(box);
> atomic_set(&box->refcnt, 1);
> box->cpu = -1;

Yuck.. that's vile. How about something like this:


---
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -206,7 +206,7 @@ static void snbep_uncore_put_constraint(
return;

er = &box->shared_regs[reg1->idx];
- atomic_sub(1, &er->ref);
+ atomic_dec(&er->ref);
reg1->alloc = 0;
}

@@ -862,22 +862,17 @@ struct intel_uncore_box *uncore_alloc_bo
int cpu)
{
struct intel_uncore_box *box;
- int size = sizeof(*box);
-
- if (type->num_shared_regs)
- size += type->num_shared_regs *
- sizeof(struct intel_uncore_extra_reg);
+ int i, size;
+
+ size = sizeof(*box) + type->num_shared_regs *
+ sizeof(struct intel_uncore_extra_reg);

box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
if (!box)
return NULL;

- if (type->num_shared_regs) {
- int i;
- box->shared_regs = (struct intel_uncore_extra_reg *)(box + 1);
- for (i = 0; i < type->num_shared_regs; i++)
- raw_spin_lock_init(&box->shared_regs[i].lock);
- }
+ for (i = 0; i < type->num_shared_regs; i++)
+ raw_spin_lock_init(&box->shared_regs[i].lock);

uncore_pmu_init_hrtimer(box);
atomic_set(&box->refcnt, 1);
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -228,11 +228,11 @@ struct intel_uncore_box {
struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
u64 tags[UNCORE_PMC_IDX_MAX];
- struct intel_uncore_extra_reg *shared_regs;
struct pci_dev *pci_dev;
struct intel_uncore_pmu *pmu;
struct hrtimer hrtimer;
struct list_head list;
+ struct intel_uncore_extra_reg shared_regs[0];
};

#define UNCORE_BOX_FLAG_INITIATED 0

2012-06-27 10:06:55

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 2/2] perf/x86: Uncore Filter support for SandyBridge-EP

On Wed, 2012-06-27 at 15:09 +0800, Yan, Zheng wrote:
> @@ -1048,10 +1190,9 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
> {
> struct perf_event *leader = event->group_leader;
> struct intel_uncore_box *fake_box;
> - int assign[UNCORE_PMC_IDX_MAX];
> int ret = -EINVAL, n;
>
> - fake_box = uncore_alloc_box(smp_processor_id());
> + fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
> if (!fake_box)
> return -ENOMEM;
>
> @@ -1073,7 +1214,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
>
> fake_box->n_events = n;
>
> - ret = uncore_assign_events(fake_box, assign, n);
> + ret = uncore_assign_events(fake_box, NULL, n);
> out:
> kfree(fake_box);
> return ret;

Isn't the uncore now suffering the same problem you found for the
regular extra stuff?

---
commit 5a425294ee7d4ab5a374248e85838dfd450caf75
Author: Peter Zijlstra <[email protected]>
Date: Tue Jun 5 15:30:31 2012 +0200

perf/x86: Fix Intel shared extra MSR allocation

Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.

Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.

The code is restructured to minimize the special case impact.

Reported-by: Zheng Yan <[email protected]>
Acked-by: Stephane Eranian <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins
Signed-off-by: Ingo Molnar <[email protected]>

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6d..cb60838 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
if (!cpuc->shared_regs)
goto error;
}
+ cpuc->is_fake = 1;
return cpuc;
error:
free_fake_cpuc(cpuc);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf..83794d8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */

unsigned int group_flag;
+ int is_fake;

/*
* Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..965baa2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}

-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
{
if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
- return false;
+ return idx;

- if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
- event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= 0x01bb;
- event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
- event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
- } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+ if (idx == EXTRA_REG_RSP_0)
+ return EXTRA_REG_RSP_1;
+
+ if (idx == EXTRA_REG_RSP_1)
+ return EXTRA_REG_RSP_0;
+
+ return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+ event->hw.extra_reg.idx = idx;
+
+ if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01b7;
- event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+ } else if (idx == EXTRA_REG_RSP_1) {
+ event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+ event->hw.config |= 0x01bb;
+ event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
-
- if (event->hw.extra_reg.idx == orig_idx)
- return false;
-
- return true;
}

/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct event_constraint *c = &emptyconstraint;
struct er_account *era;
unsigned long flags;
- int orig_idx = reg->idx;
+ int idx = reg->idx;

- /* already allocated shared msr */
- if (reg->alloc)
+ /*
+ * reg->alloc can be set due to existing state, so for fake cpuc we
+ * need to ignore this, otherwise we might fail to allocate proper fake
+ * state for this extra reg constraint. Also see the comment below.
+ */
+ if (reg->alloc && !cpuc->is_fake)
return NULL; /* call x86_get_event_constraint() */

again:
- era = &cpuc->shared_regs->regs[reg->idx];
+ era = &cpuc->shared_regs->regs[idx];
/*
* we use spin_lock_irqsave() to avoid lockdep issues when
* passing a fake cpuc
@@ -1173,6 +1183,29 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,

if (!atomic_read(&era->ref) || era->config == reg->config) {

+ /*
+ * If its a fake cpuc -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ *
+ * Not doing the ER fixup will only result in era->reg being
+ * wrong, but since we won't actually try and program hardware
+ * this isn't a problem either.
+ */
+ if (!cpuc->is_fake) {
+ if (idx != reg->idx)
+ intel_fixup_er(event, idx);
+
+ /*
+ * x86_schedule_events() can call get_event_constraints()
+ * multiple times on events in the case of incremental
+ * scheduling(). reg->alloc ensures we only do the ER
+ * allocation once.
+ */
+ reg->alloc = 1;
+ }
+
/* lock in msr value */
era->config = reg->config;
era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
/* one more user */
atomic_inc(&era->ref);

- /* no need to reallocate during incremental event scheduling */
- reg->alloc = 1;
-
/*
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = NULL;
- } else if (intel_try_alt_er(event, orig_idx)) {
- raw_spin_unlock_irqrestore(&era->lock, flags);
- goto again;
+ } else {
+ idx = intel_alt_er(idx);
+ if (idx != reg->idx) {
+ raw_spin_unlock_irqrestore(&era->lock, flags);
+ goto again;
+ }
}
raw_spin_unlock_irqrestore(&era->lock, flags);

@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
struct er_account *era;

/*
- * only put constraint if extra reg was actually
- * allocated. Also takes care of event which do
- * not use an extra shared reg
+ * Only put constraint if extra reg was actually allocated. Also takes
+ * care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake cpuc we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+ * either since it'll be thrown out.
*/
- if (!reg->alloc)
+ if (!reg->alloc || cpuc->is_fake)
return;

era = &cpuc->shared_regs->regs[reg->idx];

2012-06-27 13:06:58

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 1/2] perf/x86: Use 0xff as pseudo code for fixed uncore event

On Wed, Jun 27, 2012 at 9:09 AM, Yan, Zheng <[email protected]> wrote:
> From: "Yan, Zheng" <[email protected]>
>
> Stephane Eranian suggestted using 0xff as pseudo code for fixed
> uncore event and using the umask value to determine which of the
> fixed events we want to map to. So far there is at most one fixed
> counter in a uncore PMU. So just change the definition of
> UNCORE_FIXED_EVENT to 0xff.
>
I would still do: event=0xff,umask=0x00
to reinforce the fact that first fixed counter is index 0x00.

> Signed-off-by: Yan, Zheng <[email protected]>
> ---
>  arch/x86/kernel/cpu/perf_event_intel_uncore.c |    4 ++--
>  arch/x86/kernel/cpu/perf_event_intel_uncore.h |    2 +-
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
> index 6f43f95..8ca0f8f 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
> @@ -179,7 +179,7 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = {
>  };
>
>  static struct uncore_event_desc snbep_uncore_imc_events[] = {
> -       INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0xff"),
> +       INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff"),
>        INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
>        INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
>        { /* end: all zeroes */ },
> @@ -616,7 +616,7 @@ static struct attribute_group nhm_uncore_format_group = {
>  };
>
>  static struct uncore_event_desc nhm_uncore_events[] = {
> -       INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0xff"),
> +       INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff"),
>        INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any,       "event=0x2f,umask=0x0f"),
>        INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any,      "event=0x2c,umask=0x0f"),
>        INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads,     "event=0x20,umask=0x01"),
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
> index 4d52db0..88498c7 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
> +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
> @@ -9,7 +9,7 @@
>
>  #define UNCORE_PMU_HRTIMER_INTERVAL    (60 * NSEC_PER_SEC)
>
> -#define UNCORE_FIXED_EVENT             0xffff
> +#define UNCORE_FIXED_EVENT             0xff
>  #define UNCORE_PMC_IDX_MAX_GENERIC     8
>  #define UNCORE_PMC_IDX_FIXED           UNCORE_PMC_IDX_MAX_GENERIC
>  #define UNCORE_PMC_IDX_MAX             (UNCORE_PMC_IDX_FIXED + 1)
> --
> 1.7.10.2
>

2012-06-27 13:11:00

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 1/2] perf/x86: Use 0xff as pseudo code for fixed uncore event

On Wed, 2012-06-27 at 15:06 +0200, Stephane Eranian wrote:
> I would still do: event=0xff,umask=0x00
> to reinforce the fact that first fixed counter is index 0x00.

Ok, fixed up the patch..

2012-06-28 01:30:20

by Yan, Zheng

[permalink] [raw]
Subject: Re: [PATCH 2/2] perf/x86: Uncore Filter support for SandyBridge-EP

On 06/27/2012 06:06 PM, Peter Zijlstra wrote:
> On Wed, 2012-06-27 at 15:09 +0800, Yan, Zheng wrote:
>> > @@ -1048,10 +1190,9 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
>> > {
>> > struct perf_event *leader = event->group_leader;
>> > struct intel_uncore_box *fake_box;
>> > - int assign[UNCORE_PMC_IDX_MAX];
>> > int ret = -EINVAL, n;
>> >
>> > - fake_box = uncore_alloc_box(smp_processor_id());
>> > + fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
>> > if (!fake_box)
>> > return -ENOMEM;
>> >
>> > @@ -1073,7 +1214,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
>> >
>> > fake_box->n_events = n;
>> >
>> > - ret = uncore_assign_events(fake_box, assign, n);
>> > + ret = uncore_assign_events(fake_box, NULL, n);
>> > out:
>> > kfree(fake_box);
>> > return ret;
> Isn't the uncore now suffering the same problem you found for the
> regular extra stuff?

The snbep_uncore_get/put_constraint(...) has check for that. The uncore case is simpler,
because we don't need try swapping RSP_0/RSP_1.

Regards
Yan, Zheng

2012-06-28 10:10:58

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 2/2] perf/x86: Uncore Filter support for SandyBridge-EP

On Thu, 2012-06-28 at 09:30 +0800, Yan, Zheng wrote:
> > Isn't the uncore now suffering the same problem you found for the
> > regular extra stuff?
>
> The snbep_uncore_get/put_constraint(...) has check for that. The uncore case is simpler,
> because we don't need try swapping RSP_0/RSP_1.

The fact that I missed that is a big hint..