2017-04-03 16:48:39

by Hoan Tran

[permalink] [raw]
Subject: [PATCH v2 0/3] perf: xgene: Add support for SoC PMU version 3

This patch set adds support for SoC-wide (AKA uncore) Performance Monitoring
Unit version 3.

It can support up to
- 2 IOB PMU instances
- 8 L3C PMU instances
- 2 MCB PMU instances
- 8 MCU PMU instances

v2:
* Split into separate patches
* Use the function pointers for the PMU leaf functions
* Parse PMU subnode by the match table
* Dont allow user change agent id by config1 for SoC PMU v3

Hoan Tran (3):
perf: xgene: Parse PMU subnode from the match table
perf: xgene: Move PMU leaf functions into function pointer structure
perf: xgene: Add support for SoC PMU version 3

drivers/perf/xgene_pmu.c | 695 +++++++++++++++++++++++++++++++++++++++++------
1 file changed, 619 insertions(+), 76 deletions(-)

--
1.9.1


2017-04-03 16:48:42

by Hoan Tran

[permalink] [raw]
Subject: [PATCH v2 2/3] perf: xgene: Move PMU leaf functions into function pointer structure

This patch moves PMU leaf functions into a function pointer structure.
It helps code maintain and expasion easier.

Signed-off-by: Hoan Tran <[email protected]>
---
drivers/perf/xgene_pmu.c | 85 +++++++++++++++++++++++++++++++++++++-----------
1 file changed, 66 insertions(+), 19 deletions(-)

diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 5ffd580..f34fc78 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -96,6 +96,23 @@ struct xgene_pmu_dev {
struct perf_event *pmu_counter_event[PMU_MAX_COUNTERS];
};

+struct xgene_pmu_ops {
+ void (*mask_int)(struct xgene_pmu *pmu);
+ void (*unmask_int)(struct xgene_pmu *pmu);
+ u64 (*read_counter)(struct xgene_pmu_dev *pmu, int idx);
+ void (*write_counter)(struct xgene_pmu_dev *pmu, int idx, u64 val);
+ void (*write_evttype)(struct xgene_pmu_dev *pmu_dev, int idx, u32 val);
+ void (*write_agentmsk)(struct xgene_pmu_dev *pmu_dev, u32 val);
+ void (*write_agent1msk)(struct xgene_pmu_dev *pmu_dev, u32 val);
+ void (*enable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
+ void (*disable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
+ void (*enable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
+ void (*disable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
+ void (*reset_counters)(struct xgene_pmu_dev *pmu_dev);
+ void (*start_counters)(struct xgene_pmu_dev *pmu_dev);
+ void (*stop_counters)(struct xgene_pmu_dev *pmu_dev);
+};
+
struct xgene_pmu {
struct device *dev;
int version;
@@ -104,6 +121,7 @@ struct xgene_pmu {
u32 mc_active_mask;
cpumask_t cpu;
raw_spinlock_t lock;
+ const struct xgene_pmu_ops *ops;
struct list_head l3cpmus;
struct list_head iobpmus;
struct list_head mcbpmus;
@@ -392,13 +410,14 @@ static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu)
writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
}

-static inline u32 xgene_pmu_read_counter(struct xgene_pmu_dev *pmu_dev, int idx)
+static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
+ int idx)
{
- return readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
+ return (u64)readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
}

static inline void
-xgene_pmu_write_counter(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
+xgene_pmu_write_counter32(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
{
writel(val, pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
}
@@ -491,20 +510,22 @@ static inline void xgene_pmu_stop_counters(struct xgene_pmu_dev *pmu_dev)
static void xgene_perf_pmu_enable(struct pmu *pmu)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
int enabled = bitmap_weight(pmu_dev->cntr_assign_mask,
pmu_dev->max_counters);

if (!enabled)
return;

- xgene_pmu_start_counters(pmu_dev);
+ xgene_pmu->ops->start_counters(pmu_dev);
}

static void xgene_perf_pmu_disable(struct pmu *pmu)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;

- xgene_pmu_stop_counters(pmu_dev);
+ xgene_pmu->ops->stop_counters(pmu_dev);
}

static int xgene_perf_event_init(struct perf_event *event)
@@ -572,27 +593,32 @@ static int xgene_perf_event_init(struct perf_event *event)
static void xgene_perf_enable_event(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;

- xgene_pmu_write_evttype(pmu_dev, GET_CNTR(event), GET_EVENTID(event));
- xgene_pmu_write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event)));
+ xgene_pmu->ops->write_evttype(pmu_dev, GET_CNTR(event),
+ GET_EVENTID(event));
+ xgene_pmu->ops->write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event)));
if (pmu_dev->inf->type == PMU_TYPE_IOB)
- xgene_pmu_write_agent1msk(pmu_dev, ~((u32)GET_AGENT1ID(event)));
+ xgene_pmu->ops->write_agent1msk(pmu_dev,
+ ~((u32)GET_AGENT1ID(event)));

- xgene_pmu_enable_counter(pmu_dev, GET_CNTR(event));
- xgene_pmu_enable_counter_int(pmu_dev, GET_CNTR(event));
+ xgene_pmu->ops->enable_counter(pmu_dev, GET_CNTR(event));
+ xgene_pmu->ops->enable_counter_int(pmu_dev, GET_CNTR(event));
}

static void xgene_perf_disable_event(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;

- xgene_pmu_disable_counter(pmu_dev, GET_CNTR(event));
- xgene_pmu_disable_counter_int(pmu_dev, GET_CNTR(event));
+ xgene_pmu->ops->disable_counter(pmu_dev, GET_CNTR(event));
+ xgene_pmu->ops->disable_counter_int(pmu_dev, GET_CNTR(event));
}

static void xgene_perf_event_set_period(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
struct hw_perf_event *hw = &event->hw;
/*
* The X-Gene PMU counters have a period of 2^32. To account for the
@@ -603,18 +629,19 @@ static void xgene_perf_event_set_period(struct perf_event *event)
u64 val = 1ULL << 31;

local64_set(&hw->prev_count, val);
- xgene_pmu_write_counter(pmu_dev, hw->idx, (u32) val);
+ xgene_pmu->ops->write_counter(pmu_dev, hw->idx, val);
}

static void xgene_perf_event_update(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
struct hw_perf_event *hw = &event->hw;
u64 delta, prev_raw_count, new_raw_count;

again:
prev_raw_count = local64_read(&hw->prev_count);
- new_raw_count = xgene_pmu_read_counter(pmu_dev, GET_CNTR(event));
+ new_raw_count = xgene_pmu->ops->read_counter(pmu_dev, GET_CNTR(event));

if (local64_cmpxchg(&hw->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -633,6 +660,7 @@ static void xgene_perf_read(struct perf_event *event)
static void xgene_perf_start(struct perf_event *event, int flags)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
struct hw_perf_event *hw = &event->hw;

if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED)))
@@ -646,8 +674,8 @@ static void xgene_perf_start(struct perf_event *event, int flags)
if (flags & PERF_EF_RELOAD) {
u64 prev_raw_count = local64_read(&hw->prev_count);

- xgene_pmu_write_counter(pmu_dev, GET_CNTR(event),
- (u32) prev_raw_count);
+ xgene_pmu->ops->write_counter(pmu_dev, GET_CNTR(event),
+ prev_raw_count);
}

xgene_perf_enable_event(event);
@@ -736,8 +764,8 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
};

/* Hardware counter init */
- xgene_pmu_stop_counters(pmu_dev);
- xgene_pmu_reset_counters(pmu_dev);
+ xgene_pmu->ops->stop_counters(pmu_dev);
+ xgene_pmu->ops->reset_counters(pmu_dev);

return perf_pmu_register(&pmu_dev->pmu, name, -1);
}
@@ -1255,6 +1283,23 @@ static int xgene_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
.id = PCP_PMU_V2,
};

+const struct xgene_pmu_ops xgene_pmu_ops = {
+ .mask_int = xgene_pmu_mask_int,
+ .unmask_int = xgene_pmu_unmask_int,
+ .read_counter = xgene_pmu_read_counter32,
+ .write_counter = xgene_pmu_write_counter32,
+ .write_evttype = xgene_pmu_write_evttype,
+ .write_agentmsk = xgene_pmu_write_agentmsk,
+ .write_agent1msk = xgene_pmu_write_agent1msk,
+ .enable_counter = xgene_pmu_enable_counter,
+ .disable_counter = xgene_pmu_disable_counter,
+ .enable_counter_int = xgene_pmu_enable_counter_int,
+ .disable_counter_int = xgene_pmu_disable_counter_int,
+ .reset_counters = xgene_pmu_reset_counters,
+ .start_counters = xgene_pmu_start_counters,
+ .stop_counters = xgene_pmu_stop_counters,
+};
+
static const struct of_device_id xgene_pmu_of_match[] = {
{ .compatible = "apm,xgene-pmu", .data = &xgene_pmu_data },
{ .compatible = "apm,xgene-pmu-v2", .data = &xgene_pmu_v2_data },
@@ -1304,6 +1349,8 @@ static int xgene_pmu_probe(struct platform_device *pdev)
if (version < 0)
return -ENODEV;

+ xgene_pmu->ops = &xgene_pmu_ops;
+
INIT_LIST_HEAD(&xgene_pmu->l3cpmus);
INIT_LIST_HEAD(&xgene_pmu->iobpmus);
INIT_LIST_HEAD(&xgene_pmu->mcbpmus);
@@ -1362,7 +1409,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
}

/* Enable interrupt */
- xgene_pmu_unmask_int(xgene_pmu);
+ xgene_pmu->ops->unmask_int(xgene_pmu);

return 0;

--
1.9.1

2017-04-03 16:48:56

by Hoan Tran

[permalink] [raw]
Subject: [PATCH v2 3/3] perf: xgene: Add support for SoC PMU version 3

This patch adds support for SoC-wide (AKA uncore) Performance Monitoring
Unit version 3.

It can support up to
- 2 IOB PMU instances
- 8 L3C PMU instances
- 2 MCB PMU instances
- 8 MCU PMU instances

Signed-off-by: Hoan Tran <[email protected]>
---
drivers/perf/xgene_pmu.c | 572 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 524 insertions(+), 48 deletions(-)

diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index f34fc78..a72814d 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -37,6 +37,8 @@

#define CSW_CSWCR 0x0000
#define CSW_CSWCR_DUALMCB_MASK BIT(0)
+#define CSW_CSWCR_MCB0_ROUTING(x) (((x) & 0x0C) >> 2)
+#define CSW_CSWCR_MCB1_ROUTING(x) (((x) & 0x30) >> 4)
#define MCBADDRMR 0x0000
#define MCBADDRMR_DUALMCU_MODE_MASK BIT(2)

@@ -50,8 +52,17 @@
#define PCPPMU_INT_L3C BIT(2)
#define PCPPMU_INT_IOB BIT(3)

+#define PCPPMU_V3_INTMASK 0x00FF33FF
+#define PCPPMU_V3_INTENMASK 0xFFFFFFFF
+#define PCPPMU_V3_INTCLRMASK 0xFF00CC00
+#define PCPPMU_V3_INT_MCU 0x000000FF
+#define PCPPMU_V3_INT_MCB 0x00000300
+#define PCPPMU_V3_INT_L3C 0x00FF0000
+#define PCPPMU_V3_INT_IOB 0x00003000
+
#define PMU_MAX_COUNTERS 4
-#define PMU_CNT_MAX_PERIOD 0x100000000ULL
+#define PMU_CNT_MAX_PERIOD 0xFFFFFFFFULL
+#define PMU_V3_CNT_MAX_PERIOD 0xFFFFFFFFFFFFFFFFULL
#define PMU_OVERFLOW_MASK 0xF
#define PMU_PMCR_E BIT(0)
#define PMU_PMCR_P BIT(1)
@@ -73,6 +84,10 @@
#define PMU_PMOVSR 0xC80
#define PMU_PMCR 0xE04

+/* PMU registers for V3 */
+#define PMU_PMOVSCLR 0xC80
+#define PMU_PMOVSSET 0xCC0
+
#define to_pmu_dev(p) container_of(p, struct xgene_pmu_dev, pmu)
#define GET_CNTR(ev) (ev->hw.idx)
#define GET_EVENTID(ev) (ev->hw.config & 0xFFULL)
@@ -119,6 +134,7 @@ struct xgene_pmu {
void __iomem *pcppmu_csr;
u32 mcb_active_mask;
u32 mc_active_mask;
+ u32 l3c_active_mask;
cpumask_t cpu;
raw_spinlock_t lock;
const struct xgene_pmu_ops *ops;
@@ -143,11 +159,13 @@ struct xgene_pmu_data {
enum xgene_pmu_version {
PCP_PMU_V1 = 1,
PCP_PMU_V2,
+ PCP_PMU_V3,
};

enum xgene_pmu_dev_type {
PMU_TYPE_L3C = 0,
PMU_TYPE_IOB,
+ PMU_TYPE_IOB_SLOW,
PMU_TYPE_MCB,
PMU_TYPE_MC,
};
@@ -213,6 +231,56 @@ static ssize_t xgene_pmu_format_show(struct device *dev,
.attrs = mc_pmu_format_attrs,
};

+static struct attribute *l3c_pmu_v3_format_attrs[] = {
+ XGENE_PMU_FORMAT_ATTR(l3c_eventid, "config:0-39"),
+ NULL,
+};
+
+static struct attribute *iob_pmu_v3_format_attrs[] = {
+ XGENE_PMU_FORMAT_ATTR(iob_eventid, "config:0-47"),
+ NULL,
+};
+
+static struct attribute *iob_slow_pmu_v3_format_attrs[] = {
+ XGENE_PMU_FORMAT_ATTR(iob_slow_eventid, "config:0-16"),
+ NULL,
+};
+
+static struct attribute *mcb_pmu_v3_format_attrs[] = {
+ XGENE_PMU_FORMAT_ATTR(mcb_eventid, "config:0-35"),
+ NULL,
+};
+
+static struct attribute *mc_pmu_v3_format_attrs[] = {
+ XGENE_PMU_FORMAT_ATTR(mc_eventid, "config:0-44"),
+ NULL,
+};
+
+static const struct attribute_group l3c_pmu_v3_format_attr_group = {
+ .name = "format",
+ .attrs = l3c_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group iob_pmu_v3_format_attr_group = {
+ .name = "format",
+ .attrs = iob_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group iob_slow_pmu_v3_format_attr_group = {
+ .name = "format",
+ .attrs = iob_slow_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group mcb_pmu_v3_format_attr_group = {
+ .name = "format",
+ .attrs = mcb_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group mc_pmu_v3_format_attr_group = {
+ .name = "format",
+ .attrs = mc_pmu_v3_format_attrs,
+};
+
/*
* sysfs event attributes
*/
@@ -329,6 +397,219 @@ static ssize_t xgene_pmu_event_show(struct device *dev,
.attrs = mc_pmu_events_attrs,
};

+static struct attribute *l3c_pmu_v3_events_attrs[] = {
+ XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
+ XGENE_PMU_EVENT_ATTR(read-hit, 0x01),
+ XGENE_PMU_EVENT_ATTR(read-miss, 0x02),
+ XGENE_PMU_EVENT_ATTR(index-flush-eviction, 0x03),
+ XGENE_PMU_EVENT_ATTR(write-caused-replacement, 0x04),
+ XGENE_PMU_EVENT_ATTR(write-not-caused-replacement, 0x05),
+ XGENE_PMU_EVENT_ATTR(clean-eviction, 0x06),
+ XGENE_PMU_EVENT_ATTR(dirty-eviction, 0x07),
+ XGENE_PMU_EVENT_ATTR(reads, 0x08),
+ XGENE_PMU_EVENT_ATTR(writes, 0x09),
+ XGENE_PMU_EVENT_ATTR(requests, 0x0a),
+ XGENE_PMU_EVENT_ATTR(tq-bank-conflict-issue-stall, 0x0b),
+ XGENE_PMU_EVENT_ATTR(tq-full, 0x0c),
+ XGENE_PMU_EVENT_ATTR(ackq-full, 0x0d),
+ XGENE_PMU_EVENT_ATTR(wdb-full, 0x0e),
+ XGENE_PMU_EVENT_ATTR(odb-full, 0x10),
+ XGENE_PMU_EVENT_ATTR(wbq-full, 0x11),
+ XGENE_PMU_EVENT_ATTR(input-req-async-fifo-stall, 0x12),
+ XGENE_PMU_EVENT_ATTR(output-req-async-fifo-stall, 0x13),
+ XGENE_PMU_EVENT_ATTR(output-data-async-fifo-stall, 0x14),
+ XGENE_PMU_EVENT_ATTR(total-insertions, 0x15),
+ XGENE_PMU_EVENT_ATTR(sip-insertions-r-set, 0x16),
+ XGENE_PMU_EVENT_ATTR(sip-insertions-r-clear, 0x17),
+ XGENE_PMU_EVENT_ATTR(dip-insertions-r-set, 0x18),
+ XGENE_PMU_EVENT_ATTR(dip-insertions-r-clear, 0x19),
+ XGENE_PMU_EVENT_ATTR(dip-insertions-force-r-set, 0x1a),
+ XGENE_PMU_EVENT_ATTR(egressions, 0x1b),
+ XGENE_PMU_EVENT_ATTR(replacements, 0x1c),
+ XGENE_PMU_EVENT_ATTR(old-replacements, 0x1d),
+ XGENE_PMU_EVENT_ATTR(young-replacements, 0x1e),
+ XGENE_PMU_EVENT_ATTR(r-set-replacements, 0x1f),
+ XGENE_PMU_EVENT_ATTR(r-clear-replacements, 0x20),
+ XGENE_PMU_EVENT_ATTR(old-r-replacements, 0x21),
+ XGENE_PMU_EVENT_ATTR(old-nr-replacements, 0x22),
+ XGENE_PMU_EVENT_ATTR(young-r-replacements, 0x23),
+ XGENE_PMU_EVENT_ATTR(young-nr-replacements, 0x24),
+ XGENE_PMU_EVENT_ATTR(bloomfilter-clearings, 0x25),
+ XGENE_PMU_EVENT_ATTR(generation-flips, 0x26),
+ XGENE_PMU_EVENT_ATTR(vcc-droop-detected, 0x27),
+ NULL,
+};
+
+static struct attribute *iob_fast_pmu_v3_events_attrs[] = {
+ XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
+ XGENE_PMU_EVENT_ATTR(PA-req-buf-alloc-all, 0x01),
+ XGENE_PMU_EVENT_ATTR(PA-req-buf-alloc-rd, 0x02),
+ XGENE_PMU_EVENT_ATTR(PA-req-buf-alloc-wr, 0x03),
+ XGENE_PMU_EVENT_ATTR(PA-all-CP-req, 0x04),
+ XGENE_PMU_EVENT_ATTR(PA-CP-blk-req, 0x05),
+ XGENE_PMU_EVENT_ATTR(PA-CP-ptl-req, 0x06),
+ XGENE_PMU_EVENT_ATTR(PA-CP-rd-req, 0x07),
+ XGENE_PMU_EVENT_ATTR(PA-CP-wr-req, 0x08),
+ XGENE_PMU_EVENT_ATTR(BA-all-req, 0x09),
+ XGENE_PMU_EVENT_ATTR(BA-rd-req, 0x0a),
+ XGENE_PMU_EVENT_ATTR(BA-wr-req, 0x0b),
+ XGENE_PMU_EVENT_ATTR(PA-rd-shared-req-issued, 0x10),
+ XGENE_PMU_EVENT_ATTR(PA-rd-exclusive-req-issued, 0x11),
+ XGENE_PMU_EVENT_ATTR(PA-wr-invalidate-req-issued-stashable, 0x12),
+ XGENE_PMU_EVENT_ATTR(PA-wr-invalidate-req-issued-nonstashable, 0x13),
+ XGENE_PMU_EVENT_ATTR(PA-wr-back-req-issued-stashable, 0x14),
+ XGENE_PMU_EVENT_ATTR(PA-wr-back-req-issued-nonstashable, 0x15),
+ XGENE_PMU_EVENT_ATTR(PA-ptl-wr-req, 0x16),
+ XGENE_PMU_EVENT_ATTR(PA-ptl-rd-req, 0x17),
+ XGENE_PMU_EVENT_ATTR(PA-wr-back-clean-data, 0x18),
+ XGENE_PMU_EVENT_ATTR(PA-wr-back-cancelled-on-SS, 0x1b),
+ XGENE_PMU_EVENT_ATTR(PA-barrier-occurrence, 0x1c),
+ XGENE_PMU_EVENT_ATTR(PA-barrier-cycles, 0x1d),
+ XGENE_PMU_EVENT_ATTR(PA-total-CP-snoops, 0x20),
+ XGENE_PMU_EVENT_ATTR(PA-rd-shared-snoop, 0x21),
+ XGENE_PMU_EVENT_ATTR(PA-rd-shared-snoop-hit, 0x22),
+ XGENE_PMU_EVENT_ATTR(PA-rd-exclusive-snoop, 0x23),
+ XGENE_PMU_EVENT_ATTR(PA-rd-exclusive-snoop-hit, 0x24),
+ XGENE_PMU_EVENT_ATTR(PA-rd-wr-invalid-snoop, 0x25),
+ XGENE_PMU_EVENT_ATTR(PA-rd-wr-invalid-snoop-hit, 0x26),
+ XGENE_PMU_EVENT_ATTR(PA-req-buffer-full, 0x28),
+ XGENE_PMU_EVENT_ATTR(Cswlf-outbound-req-fifo-full, 0x29),
+ XGENE_PMU_EVENT_ATTR(Cswlf-inbound-snoop-fifo-backpressure, 0x2a),
+ XGENE_PMU_EVENT_ATTR(Cswlf-outbound-lack-fifo-full, 0x2b),
+ XGENE_PMU_EVENT_ATTR(Cswlf-inbound-gack-fifo-backpressure, 0x2c),
+ XGENE_PMU_EVENT_ATTR(Cswlf-outbound-data-fifo-full, 0x2d),
+ XGENE_PMU_EVENT_ATTR(Cswlf-inbound-data-fifo-backpressure, 0x2e),
+ XGENE_PMU_EVENT_ATTR(Cswlf-inbound-req-backpressure, 0x2f),
+ NULL,
+};
+
+static struct attribute *iob_slow_pmu_v3_events_attrs[] = {
+ XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
+ XGENE_PMU_EVENT_ATTR(PA-AXI0-rd-req, 0x01),
+ XGENE_PMU_EVENT_ATTR(PA-AXI0-wr-req, 0x02),
+ XGENE_PMU_EVENT_ATTR(PA-AXI1-rd-req, 0x03),
+ XGENE_PMU_EVENT_ATTR(PA-AXI1-wr-req, 0x04),
+ XGENE_PMU_EVENT_ATTR(BA-all-AXI-req, 0x07),
+ XGENE_PMU_EVENT_ATTR(BA-AXI-rd-req, 0x08),
+ XGENE_PMU_EVENT_ATTR(BA-AXI-wr-req, 0x09),
+ XGENE_PMU_EVENT_ATTR(BA-free-list-empty, 0x10),
+ NULL,
+};
+
+static struct attribute *mcb_pmu_v3_events_attrs[] = {
+ XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
+ XGENE_PMU_EVENT_ATTR(req-receive, 0x01),
+ XGENE_PMU_EVENT_ATTR(rd-req-recv, 0x02),
+ XGENE_PMU_EVENT_ATTR(rd-req-recv-2, 0x03),
+ XGENE_PMU_EVENT_ATTR(wr-req-recv, 0x04),
+ XGENE_PMU_EVENT_ATTR(wr-req-recv-2, 0x05),
+ XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu, 0x06),
+ XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu-2, 0x07),
+ XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu, 0x08),
+ XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu-2, 0x09),
+ XGENE_PMU_EVENT_ATTR(glbl-ack-recv-for-rd-sent-to-spec-mcu, 0x0a),
+ XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-for-rd-sent-to-spec-mcu, 0x0b),
+ XGENE_PMU_EVENT_ATTR(glbl-ack-nogo-recv-for-rd-sent-to-spec-mcu, 0x0c),
+ XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req, 0x0d),
+ XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req-2, 0x0e),
+ XGENE_PMU_EVENT_ATTR(wr-req-sent-to-mcu, 0x0f),
+ XGENE_PMU_EVENT_ATTR(gack-recv, 0x10),
+ XGENE_PMU_EVENT_ATTR(rd-gack-recv, 0x11),
+ XGENE_PMU_EVENT_ATTR(wr-gack-recv, 0x12),
+ XGENE_PMU_EVENT_ATTR(cancel-rd-gack, 0x13),
+ XGENE_PMU_EVENT_ATTR(cancel-wr-gack, 0x14),
+ XGENE_PMU_EVENT_ATTR(mcb-csw-req-stall, 0x15),
+ XGENE_PMU_EVENT_ATTR(mcu-req-intf-blocked, 0x16),
+ XGENE_PMU_EVENT_ATTR(mcb-mcu-rd-intf-stall, 0x17),
+ XGENE_PMU_EVENT_ATTR(csw-rd-intf-blocked, 0x18),
+ XGENE_PMU_EVENT_ATTR(csw-local-ack-intf-blocked, 0x19),
+ XGENE_PMU_EVENT_ATTR(mcu-req-table-full, 0x1a),
+ XGENE_PMU_EVENT_ATTR(mcu-stat-table-full, 0x1b),
+ XGENE_PMU_EVENT_ATTR(mcu-wr-table-full, 0x1c),
+ XGENE_PMU_EVENT_ATTR(mcu-rdreceipt-resp, 0x1d),
+ XGENE_PMU_EVENT_ATTR(mcu-wrcomplete-resp, 0x1e),
+ XGENE_PMU_EVENT_ATTR(mcu-retryack-resp, 0x1f),
+ XGENE_PMU_EVENT_ATTR(mcu-pcrdgrant-resp, 0x20),
+ XGENE_PMU_EVENT_ATTR(mcu-req-from-lastload, 0x21),
+ XGENE_PMU_EVENT_ATTR(mcu-req-from-bypass, 0x22),
+ XGENE_PMU_EVENT_ATTR(volt-droop-detect, 0x23),
+ NULL,
+};
+
+static struct attribute *mc_pmu_v3_events_attrs[] = {
+ XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
+ XGENE_PMU_EVENT_ATTR(pmu-act-sent, 0x01),
+ XGENE_PMU_EVENT_ATTR(pmu-pre-sent, 0x02),
+ XGENE_PMU_EVENT_ATTR(pmu-rd-sent, 0x03),
+ XGENE_PMU_EVENT_ATTR(pmu-rda-sent, 0x04),
+ XGENE_PMU_EVENT_ATTR(pmu-wr-sent, 0x05),
+ XGENE_PMU_EVENT_ATTR(pmu-wra-sent, 0x06),
+ XGENE_PMU_EVENT_ATTR(pmu-pd-entry-vld, 0x07),
+ XGENE_PMU_EVENT_ATTR(pmu-sref-entry-vld, 0x08),
+ XGENE_PMU_EVENT_ATTR(pmu-prea-sent, 0x09),
+ XGENE_PMU_EVENT_ATTR(pmu-ref-sent, 0x0a),
+ XGENE_PMU_EVENT_ATTR(pmu-rd-rda-sent, 0x0b),
+ XGENE_PMU_EVENT_ATTR(pmu-wr-wra-sent, 0x0c),
+ XGENE_PMU_EVENT_ATTR(pmu-raw-hazard, 0x0d),
+ XGENE_PMU_EVENT_ATTR(pmu-war-hazard, 0x0e),
+ XGENE_PMU_EVENT_ATTR(pmu-waw-hazard, 0x0f),
+ XGENE_PMU_EVENT_ATTR(pmu-rar-hazard, 0x10),
+ XGENE_PMU_EVENT_ATTR(pmu-raw-war-waw-hazard, 0x11),
+ XGENE_PMU_EVENT_ATTR(pmu-hprd-lprd-wr-req-vld, 0x12),
+ XGENE_PMU_EVENT_ATTR(pmu-lprd-req-vld, 0x13),
+ XGENE_PMU_EVENT_ATTR(pmu-hprd-req-vld, 0x14),
+ XGENE_PMU_EVENT_ATTR(pmu-hprd-lprd-req-vld, 0x15),
+ XGENE_PMU_EVENT_ATTR(pmu-wr-req-vld, 0x16),
+ XGENE_PMU_EVENT_ATTR(pmu-partial-wr-req-vld, 0x17),
+ XGENE_PMU_EVENT_ATTR(pmu-rd-retry, 0x18),
+ XGENE_PMU_EVENT_ATTR(pmu-wr-retry, 0x19),
+ XGENE_PMU_EVENT_ATTR(pmu-retry-gnt, 0x1a),
+ XGENE_PMU_EVENT_ATTR(pmu-rank-change, 0x1b),
+ XGENE_PMU_EVENT_ATTR(pmu-dir-change, 0x1c),
+ XGENE_PMU_EVENT_ATTR(pmu-rank-dir-change, 0x1d),
+ XGENE_PMU_EVENT_ATTR(pmu-rank-active, 0x1e),
+ XGENE_PMU_EVENT_ATTR(pmu-rank-idle, 0x1f),
+ XGENE_PMU_EVENT_ATTR(pmu-rank-pd, 0x20),
+ XGENE_PMU_EVENT_ATTR(pmu-rank-sref, 0x21),
+ XGENE_PMU_EVENT_ATTR(pmu-queue-fill-gt-thresh, 0x22),
+ XGENE_PMU_EVENT_ATTR(pmu-queue-rds-gt-thresh, 0x23),
+ XGENE_PMU_EVENT_ATTR(pmu-queue-wrs-gt-thresh, 0x24),
+ XGENE_PMU_EVENT_ATTR(pmu-phy-updt-complt, 0x25),
+ XGENE_PMU_EVENT_ATTR(pmu-tz-fail, 0x26),
+ XGENE_PMU_EVENT_ATTR(pmu-dram-errc, 0x27),
+ XGENE_PMU_EVENT_ATTR(pmu-dram-errd, 0x28),
+ XGENE_PMU_EVENT_ATTR(pmu-rd-enq, 0x29),
+ XGENE_PMU_EVENT_ATTR(pmu-wr-enq, 0x2a),
+ XGENE_PMU_EVENT_ATTR(pmu-tmac-limit-reached, 0x2b),
+ XGENE_PMU_EVENT_ATTR(pmu-tmaw-tracker-full, 0x2c),
+ NULL,
+};
+
+static const struct attribute_group l3c_pmu_v3_events_attr_group = {
+ .name = "events",
+ .attrs = l3c_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group iob_fast_pmu_v3_events_attr_group = {
+ .name = "events",
+ .attrs = iob_fast_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group iob_slow_pmu_v3_events_attr_group = {
+ .name = "events",
+ .attrs = iob_slow_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group mcb_pmu_v3_events_attr_group = {
+ .name = "events",
+ .attrs = mcb_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group mc_pmu_v3_events_attr_group = {
+ .name = "events",
+ .attrs = mc_pmu_v3_events_attrs,
+};
+
/*
* sysfs cpumask attributes
*/
@@ -352,7 +633,7 @@ static ssize_t xgene_pmu_cpumask_show(struct device *dev,
};

/*
- * Per PMU device attribute groups
+ * Per PMU device attribute groups of PMU v1 and v2
*/
static const struct attribute_group *l3c_pmu_attr_groups[] = {
&l3c_pmu_format_attr_group,
@@ -382,6 +663,44 @@ static ssize_t xgene_pmu_cpumask_show(struct device *dev,
NULL
};

+/*
+ * Per PMU device attribute groups of PMU v3
+ */
+static const struct attribute_group *l3c_pmu_v3_attr_groups[] = {
+ &l3c_pmu_v3_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &l3c_pmu_v3_events_attr_group,
+ NULL
+};
+
+static const struct attribute_group *iob_fast_pmu_v3_attr_groups[] = {
+ &iob_pmu_v3_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &iob_fast_pmu_v3_events_attr_group,
+ NULL
+};
+
+static const struct attribute_group *iob_slow_pmu_v3_attr_groups[] = {
+ &iob_slow_pmu_v3_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &iob_slow_pmu_v3_events_attr_group,
+ NULL
+};
+
+static const struct attribute_group *mcb_pmu_v3_attr_groups[] = {
+ &mcb_pmu_v3_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &mcb_pmu_v3_events_attr_group,
+ NULL
+};
+
+static const struct attribute_group *mc_pmu_v3_attr_groups[] = {
+ &mc_pmu_v3_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &mc_pmu_v3_events_attr_group,
+ NULL
+};
+
static int get_next_avail_cntr(struct xgene_pmu_dev *pmu_dev)
{
int cntr;
@@ -405,17 +724,47 @@ static inline void xgene_pmu_mask_int(struct xgene_pmu *xgene_pmu)
writel(PCPPMU_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
}

+static inline void xgene_pmu_v3_mask_int(struct xgene_pmu *xgene_pmu)
+{
+ writel(PCPPMU_V3_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu)
{
writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
}

+static inline void xgene_pmu_v3_unmask_int(struct xgene_pmu *xgene_pmu)
+{
+ writel(PCPPMU_V3_INTCLRMASK,
+ xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
int idx)
{
return (u64)readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
}

+static inline u64 xgene_pmu_read_counter64(struct xgene_pmu_dev *pmu_dev,
+ int idx)
+{
+ u32 lo, hi;
+
+ /*
+ * v3 has 64-bit counter registers composed by 2 32-bit registers
+ * This can be a problem if the counter increases and carries
+ * out of bit [31] between 2 reads. The extra reads would help
+ * to prevent this issue.
+ */
+ do {
+ hi = xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1);
+ lo = xgene_pmu_read_counter32(pmu_dev, 2 * idx);
+ } while (hi != xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1));
+
+ return (((u64)hi << 32) | lo);
+}
+
static inline void
xgene_pmu_write_counter32(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
{
@@ -423,6 +772,19 @@ static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
}

static inline void
+xgene_pmu_write_counter64(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
+{
+ u32 cnt_lo, cnt_hi;
+
+ cnt_lo = val & 0xFFFFFFFF;
+ cnt_hi = val >> 32;
+
+ /* v3 has 64-bit counter registers composed by 2 32-bit registers */
+ xgene_pmu_write_counter32(pmu_dev, 2 * idx, val);
+ xgene_pmu_write_counter32(pmu_dev, 2 * idx + 1, val >> 32);
+}
+
+static inline void
xgene_pmu_write_evttype(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
{
writel(val, pmu_dev->inf->csr + PMU_PMEVTYPER0 + (4 * idx));
@@ -435,12 +797,18 @@ static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
}

static inline void
+xgene_pmu_v3_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
+
+static inline void
xgene_pmu_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val)
{
writel(val, pmu_dev->inf->csr + PMU_PMAMR1);
}

static inline void
+xgene_pmu_v3_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
+
+static inline void
xgene_pmu_enable_counter(struct xgene_pmu_dev *pmu_dev, int idx)
{
u32 val;
@@ -621,7 +989,7 @@ static void xgene_perf_event_set_period(struct perf_event *event)
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
struct hw_perf_event *hw = &event->hw;
/*
- * The X-Gene PMU counters have a period of 2^32. To account for the
+ * The X-Gene PMU counters have a period of 2^32 or more. To account for the
* possiblity of extreme interrupt latency we program for a period of
* half that. Hopefully we can handle the interrupt before another 2^31
* events occur and the counter overtakes its previous value.
@@ -741,7 +1109,10 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
{
struct xgene_pmu *xgene_pmu;

- pmu_dev->max_period = PMU_CNT_MAX_PERIOD - 1;
+ if (pmu_dev->parent->version == PCP_PMU_V3)
+ pmu_dev->max_period = PMU_V3_CNT_MAX_PERIOD;
+ else
+ pmu_dev->max_period = PMU_CNT_MAX_PERIOD;
/* First version PMU supports only single event counter */
xgene_pmu = pmu_dev->parent;
if (xgene_pmu->version == PCP_PMU_V1)
@@ -786,20 +1157,38 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)

switch (pmu->inf->type) {
case PMU_TYPE_L3C:
- pmu->attr_groups = l3c_pmu_attr_groups;
+ if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask))
+ goto dev_err;
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmu->attr_groups = l3c_pmu_v3_attr_groups;
+ else
+ pmu->attr_groups = l3c_pmu_attr_groups;
break;
case PMU_TYPE_IOB:
- pmu->attr_groups = iob_pmu_attr_groups;
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmu->attr_groups = iob_fast_pmu_v3_attr_groups;
+ else
+ pmu->attr_groups = iob_pmu_attr_groups;
+ break;
+ case PMU_TYPE_IOB_SLOW:
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmu->attr_groups = iob_slow_pmu_v3_attr_groups;
break;
case PMU_TYPE_MCB:
if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask))
goto dev_err;
- pmu->attr_groups = mcb_pmu_attr_groups;
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmu->attr_groups = mcb_pmu_v3_attr_groups;
+ else
+ pmu->attr_groups = mcb_pmu_attr_groups;
break;
case PMU_TYPE_MC:
if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask))
goto dev_err;
- pmu->attr_groups = mc_pmu_attr_groups;
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmu->attr_groups = mc_pmu_v3_attr_groups;
+ else
+ pmu->attr_groups = mc_pmu_attr_groups;
break;
default:
return -EINVAL;
@@ -823,18 +1212,25 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
{
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+ void __iomem *csr = pmu_dev->inf->csr;
u32 pmovsr;
int idx;

- pmovsr = readl(pmu_dev->inf->csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK;
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmovsr = readl(csr + PMU_PMOVSSET) & PMU_OVERFLOW_MASK;
+ else
+ pmovsr = readl(csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK;
+
if (!pmovsr)
return;

/* Clear interrupt flag */
if (xgene_pmu->version == PCP_PMU_V1)
- writel(0x0, pmu_dev->inf->csr + PMU_PMOVSR);
+ writel(0x0, csr + PMU_PMOVSR);
+ else if (xgene_pmu->version == PCP_PMU_V2)
+ writel(pmovsr, csr + PMU_PMOVSR);
else
- writel(pmovsr, pmu_dev->inf->csr + PMU_PMOVSR);
+ writel(pmovsr, csr + PMU_PMOVSCLR);

for (idx = 0; idx < PMU_MAX_COUNTERS; idx++) {
struct perf_event *event = pmu_dev->pmu_counter_event[idx];
@@ -850,6 +1246,7 @@ static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)

static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
{
+ u32 intr_mcu, intr_mcb, intr_l3c, intr_iob;
struct xgene_pmu_dev_ctx *ctx;
struct xgene_pmu *xgene_pmu = dev_id;
unsigned long flags;
@@ -859,22 +1256,33 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)

/* Get Interrupt PMU source */
val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG);
- if (val & PCPPMU_INT_MCU) {
+ if (xgene_pmu->version == PCP_PMU_V3) {
+ intr_mcu = PCPPMU_V3_INT_MCU;
+ intr_mcb = PCPPMU_V3_INT_MCB;
+ intr_l3c = PCPPMU_V3_INT_L3C;
+ intr_iob = PCPPMU_V3_INT_IOB;
+ } else {
+ intr_mcu = PCPPMU_INT_MCU;
+ intr_mcb = PCPPMU_INT_MCB;
+ intr_l3c = PCPPMU_INT_L3C;
+ intr_iob = PCPPMU_INT_IOB;
+ }
+ if (val & intr_mcu) {
list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
}
- if (val & PCPPMU_INT_MCB) {
+ if (val & intr_mcb) {
list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
}
- if (val & PCPPMU_INT_L3C) {
+ if (val & intr_l3c) {
list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
}
- if (val & PCPPMU_INT_IOB) {
+ if (val & intr_iob) {
list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
@@ -885,12 +1293,14 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}

-static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
+static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
struct resource *res;
unsigned int reg;
+ u32 mcb0routing;
+ u32 mcb1routing;

res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
csw_csr = devm_ioremap_resource(&pdev->dev, res);
@@ -899,41 +1309,72 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
return PTR_ERR(csw_csr);
}

- res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
- mcba_csr = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(mcba_csr)) {
- dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n");
- return PTR_ERR(mcba_csr);
- }
-
- res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
- mcbb_csr = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(mcbb_csr)) {
- dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n");
- return PTR_ERR(mcbb_csr);
- }
-
reg = readl(csw_csr + CSW_CSWCR);
- if (reg & CSW_CSWCR_DUALMCB_MASK) {
- /* Dual MCB active */
- xgene_pmu->mcb_active_mask = 0x3;
- /* Probe all active MC(s) */
- reg = readl(mcbb_csr + CSW_CSWCR);
- xgene_pmu->mc_active_mask =
- (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
+ if (xgene_pmu->version == PCP_PMU_V3) {
+ mcb0routing = CSW_CSWCR_MCB0_ROUTING(reg);
+ mcb1routing = CSW_CSWCR_MCB0_ROUTING(reg);
+ if (reg & CSW_CSWCR_DUALMCB_MASK) {
+ /* Dual MCB active */
+ xgene_pmu->mcb_active_mask = 0x3;
+ /* Probe all active L3C(s), maximum is 8 */
+ xgene_pmu->l3c_active_mask = 0xFF;
+ /* Probe all active MC(s), maximum is 8 */
+ if ((mcb0routing == 0x2) && (mcb1routing == 0x2))
+ xgene_pmu->mc_active_mask = 0xFF;
+ else if ((mcb0routing == 0x1) && (mcb1routing == 0x1))
+ xgene_pmu->mc_active_mask = 0x33;
+ else
+ xgene_pmu->mc_active_mask = 0x11;
+
+ } else {
+ /* Single MCB active */
+ xgene_pmu->mcb_active_mask = 0x1;
+ /* Probe all active L3C(s), maximum is 4 */
+ xgene_pmu->l3c_active_mask = 0x0F;
+ /* Probe all active MC(s), maximum is 4 */
+ if ((mcb0routing == 0x2) && (mcb1routing == 0x0))
+ xgene_pmu->mc_active_mask = 0x0F;
+ else if ((mcb0routing == 0x1) && (mcb1routing == 0x0))
+ xgene_pmu->mc_active_mask = 0x03;
+ else
+ xgene_pmu->mc_active_mask = 0x01;
+ }
} else {
- /* Single MCB active */
- xgene_pmu->mcb_active_mask = 0x1;
- /* Probe all active MC(s) */
- reg = readl(mcba_csr + CSW_CSWCR);
- xgene_pmu->mc_active_mask =
- (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
- }
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+ mcba_csr = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(mcba_csr)) {
+ dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n");
+ return PTR_ERR(mcba_csr);
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+ mcbb_csr = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(mcbb_csr)) {
+ dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n");
+ return PTR_ERR(mcbb_csr);
+ }

+ xgene_pmu->l3c_active_mask = 0x1;
+ if (reg & CSW_CSWCR_DUALMCB_MASK) {
+ /* Dual MCB active */
+ xgene_pmu->mcb_active_mask = 0x3;
+ /* Probe all active MC(s) */
+ reg = readl(mcbb_csr + CSW_CSWCR);
+ xgene_pmu->mc_active_mask =
+ (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
+ } else {
+ /* Single MCB active */
+ xgene_pmu->mcb_active_mask = 0x1;
+ /* Probe all active MC(s) */
+ reg = readl(mcba_csr + CSW_CSWCR);
+ xgene_pmu->mc_active_mask =
+ (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
+ }
+ }
return 0;
}

-static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
+static int fdt_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
struct regmap *csw_map, *mcba_map, *mcbb_map;
@@ -958,6 +1399,7 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
return PTR_ERR(mcbb_map);
}

+ xgene_pmu->l3c_active_mask = 0x1;
if (regmap_read(csw_map, CSW_CSWCR, &reg))
return -EINVAL;

@@ -986,8 +1428,8 @@ static int xgene_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
if (has_acpi_companion(&pdev->dev))
- return acpi_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
- return fdt_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
+ return acpi_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
+ return fdt_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
}

static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
@@ -997,6 +1439,8 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id);
case PMU_TYPE_IOB:
return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id);
+ case PMU_TYPE_IOB_SLOW:
+ return devm_kasprintf(dev, GFP_KERNEL, "iob-slow%d", id);
case PMU_TYPE_MCB:
return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id);
case PMU_TYPE_MC:
@@ -1080,6 +1524,11 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
{"APMC0D5E", PMU_TYPE_IOB},
{"APMC0D5F", PMU_TYPE_MCB},
{"APMC0D60", PMU_TYPE_MC},
+ {"APMC0D84", PMU_TYPE_L3C},
+ {"APMC0D85", PMU_TYPE_IOB},
+ {"APMC0D86", PMU_TYPE_IOB_SLOW},
+ {"APMC0D87", PMU_TYPE_MCB},
+ {"APMC0D88", PMU_TYPE_MC},
{},
};

@@ -1134,6 +1583,9 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
case PMU_TYPE_IOB:
list_add(&ctx->next, &xgene_pmu->iobpmus);
break;
+ case PMU_TYPE_IOB_SLOW:
+ list_add(&ctx->next, &xgene_pmu->iobpmus);
+ break;
case PMU_TYPE_MCB:
list_add(&ctx->next, &xgene_pmu->mcbpmus);
break;
@@ -1255,6 +1707,9 @@ static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
case PMU_TYPE_IOB:
list_add(&ctx->next, &xgene_pmu->iobpmus);
break;
+ case PMU_TYPE_IOB_SLOW:
+ list_add(&ctx->next, &xgene_pmu->iobpmus);
+ break;
case PMU_TYPE_MCB:
list_add(&ctx->next, &xgene_pmu->mcbpmus);
break;
@@ -1300,6 +1755,23 @@ static int xgene_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
.stop_counters = xgene_pmu_stop_counters,
};

+const struct xgene_pmu_ops xgene_pmu_v3_ops = {
+ .mask_int = xgene_pmu_v3_mask_int,
+ .unmask_int = xgene_pmu_v3_unmask_int,
+ .read_counter = xgene_pmu_read_counter64,
+ .write_counter = xgene_pmu_write_counter64,
+ .write_evttype = xgene_pmu_write_evttype,
+ .write_agentmsk = xgene_pmu_v3_write_agentmsk,
+ .write_agent1msk = xgene_pmu_v3_write_agent1msk,
+ .enable_counter = xgene_pmu_enable_counter,
+ .disable_counter = xgene_pmu_disable_counter,
+ .enable_counter_int = xgene_pmu_enable_counter_int,
+ .disable_counter_int = xgene_pmu_disable_counter_int,
+ .reset_counters = xgene_pmu_reset_counters,
+ .start_counters = xgene_pmu_start_counters,
+ .stop_counters = xgene_pmu_stop_counters,
+};
+
static const struct of_device_id xgene_pmu_of_match[] = {
{ .compatible = "apm,xgene-pmu", .data = &xgene_pmu_data },
{ .compatible = "apm,xgene-pmu-v2", .data = &xgene_pmu_v2_data },
@@ -1310,6 +1782,7 @@ static int xgene_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
static const struct acpi_device_id xgene_pmu_acpi_match[] = {
{"APMC0D5B", PCP_PMU_V1},
{"APMC0D5C", PCP_PMU_V2},
+ {"APMC0D83", PCP_PMU_V3},
{},
};
MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
@@ -1349,7 +1822,10 @@ static int xgene_pmu_probe(struct platform_device *pdev)
if (version < 0)
return -ENODEV;

- xgene_pmu->ops = &xgene_pmu_ops;
+ if (version != PCP_PMU_V3)
+ xgene_pmu->ops = &xgene_pmu_ops;
+ else
+ xgene_pmu->ops = &xgene_pmu_v3_ops;

INIT_LIST_HEAD(&xgene_pmu->l3cpmus);
INIT_LIST_HEAD(&xgene_pmu->iobpmus);
--
1.9.1

2017-04-03 16:49:18

by Hoan Tran

[permalink] [raw]
Subject: [PATCH v2 1/3] perf: xgene: Parse PMU subnode from the match table

This patch parses PMU Subnode from a match table.

Signed-off-by: Hoan Tran <[email protected]>
---
drivers/perf/xgene_pmu.c | 40 ++++++++++++++++++++++++++++++----------
1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 35b5289..5ffd580 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1047,9 +1047,35 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
return NULL;
}

+static const struct acpi_device_id xgene_pmu_acpi_type_match[] = {
+ {"APMC0D5D", PMU_TYPE_L3C},
+ {"APMC0D5E", PMU_TYPE_IOB},
+ {"APMC0D5F", PMU_TYPE_MCB},
+ {"APMC0D60", PMU_TYPE_MC},
+ {},
+};
+
+static const struct acpi_device_id *xgene_pmu_acpi_match_type(
+ const struct acpi_device_id *ids,
+ struct acpi_device *adev)
+{
+ const struct acpi_device_id *match_id = NULL;
+ const struct acpi_device_id *id;
+
+ for (id = ids; id->id[0] || id->cls; id++) {
+ if (!acpi_match_device_ids(adev, id))
+ match_id = id;
+ else if (match_id)
+ break;
+ }
+
+ return match_id;
+}
+
static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
void *data, void **return_value)
{
+ const struct acpi_device_id *acpi_id;
struct xgene_pmu *xgene_pmu = data;
struct xgene_pmu_dev_ctx *ctx;
struct acpi_device *adev;
@@ -1059,17 +1085,11 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
if (acpi_bus_get_status(adev) || !adev->status.present)
return AE_OK;

- if (!strcmp(acpi_device_hid(adev), "APMC0D5D"))
- ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_L3C);
- else if (!strcmp(acpi_device_hid(adev), "APMC0D5E"))
- ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_IOB);
- else if (!strcmp(acpi_device_hid(adev), "APMC0D5F"))
- ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MCB);
- else if (!strcmp(acpi_device_hid(adev), "APMC0D60"))
- ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MC);
- else
- ctx = NULL;
+ acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
+ if (!acpi_id)
+ return AE_OK;

+ ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, (u32)acpi_id->driver_data);
if (!ctx)
return AE_OK;

--
1.9.1

2017-04-17 16:36:11

by Hoan Tran

[permalink] [raw]
Subject: Re: [PATCH v2 0/3] perf: xgene: Add support for SoC PMU version 3

Hi All,

Do you have any comments on this patch set?

Thanks

On Mon, Apr 3, 2017 at 9:47 AM, Hoan Tran <[email protected]> wrote:
> This patch set adds support for SoC-wide (AKA uncore) Performance Monitoring
> Unit version 3.
>
> It can support up to
> - 2 IOB PMU instances
> - 8 L3C PMU instances
> - 2 MCB PMU instances
> - 8 MCU PMU instances
>
> v2:
> * Split into separate patches
> * Use the function pointers for the PMU leaf functions
> * Parse PMU subnode by the match table
> * Dont allow user change agent id by config1 for SoC PMU v3
>
> Hoan Tran (3):
> perf: xgene: Parse PMU subnode from the match table
> perf: xgene: Move PMU leaf functions into function pointer structure
> perf: xgene: Add support for SoC PMU version 3
>
> drivers/perf/xgene_pmu.c | 695 +++++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 619 insertions(+), 76 deletions(-)
>
> --
> 1.9.1
>

2017-06-02 15:00:18

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] perf: xgene: Parse PMU subnode from the match table

Hi Hoan,

Apologies for the last reply.

On Mon, Apr 03, 2017 at 09:47:55AM -0700, Hoan Tran wrote:
> +static const struct acpi_device_id xgene_pmu_acpi_type_match[] = {
> + {"APMC0D5D", PMU_TYPE_L3C},
> + {"APMC0D5E", PMU_TYPE_IOB},
> + {"APMC0D5F", PMU_TYPE_MCB},
> + {"APMC0D60", PMU_TYPE_MC},
> + {},
> +};
> +
> +static const struct acpi_device_id *xgene_pmu_acpi_match_type(
> + const struct acpi_device_id *ids,
> + struct acpi_device *adev)
> +{
> + const struct acpi_device_id *match_id = NULL;
> + const struct acpi_device_id *id;
> +
> + for (id = ids; id->id[0] || id->cls; id++) {
> + if (!acpi_match_device_ids(adev, id))
> + match_id = id;
> + else if (match_id)
> + break;
> + }
> +
> + return match_id;
> +}

I don't believe this look is necessary. AFAICT, acpi_match_device_ids()
already iterates over the id table it is given.

> +
> static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
> void *data, void **return_value)
> {
> + const struct acpi_device_id *acpi_id;
> struct xgene_pmu *xgene_pmu = data;
> struct xgene_pmu_dev_ctx *ctx;
> struct acpi_device *adev;
> @@ -1059,17 +1085,11 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
> if (acpi_bus_get_status(adev) || !adev->status.present)
> return AE_OK;
>
> - if (!strcmp(acpi_device_hid(adev), "APMC0D5D"))
> - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_L3C);
> - else if (!strcmp(acpi_device_hid(adev), "APMC0D5E"))
> - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_IOB);
> - else if (!strcmp(acpi_device_hid(adev), "APMC0D5F"))
> - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MCB);
> - else if (!strcmp(acpi_device_hid(adev), "APMC0D60"))
> - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MC);
> - else
> - ctx = NULL;
> + acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
> + if (!acpi_id)
> + return AE_OK;

As above, and as I covered in my reply to v1, I think the above should
be:

acpi_id = acpi_match_device_ids(adev, xgene_pmu_acpi_type_match);
if (!acpi_id)
return AE_OK;

... or am I missing something?

With that change:

Acked-by: Mark Rutland <[email protected]>

Thanks,
Mark.

>
> + ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, (u32)acpi_id->driver_data);
> if (!ctx)
> return AE_OK;
>
> --
> 1.9.1
>

2017-06-02 16:05:36

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] perf: xgene: Add support for SoC PMU version 3

Hi Hoan,

Apologies for the delay in getting to this.

On Mon, Apr 03, 2017 at 09:47:57AM -0700, Hoan Tran wrote:
> This patch adds support for SoC-wide (AKA uncore) Performance Monitoring
> Unit version 3.
>
> It can support up to
> - 2 IOB PMU instances
> - 8 L3C PMU instances
> - 2 MCB PMU instances
> - 8 MCU PMU instances

Please elaborate on the differences compared with prior versions.

I see that counters are 64-bit. Are there any other important details to
be aware of?

[...]

> +static struct attribute *l3c_pmu_v3_events_attrs[] = {

> + XGENE_PMU_EVENT_ATTR(read-hit, 0x01),
> + XGENE_PMU_EVENT_ATTR(read-miss, 0x02),

> + XGENE_PMU_EVENT_ATTR(reads, 0x08),
> + XGENE_PMU_EVENT_ATTR(writes, 0x09),

> +};

Some of these are singular (e.g. "read-hit", "read-miss"), while others
are plural (e.g. "reads", "writes").

The existing driver use the singular form. Please remain consistent with
that. e.g. turn "reads" into "read".

> + XGENE_PMU_EVENT_ATTR(PA-req-buf-alloc-all, 0x01),

Likewise, please consistently use lower case.

> + XGENE_PMU_EVENT_ATTR(pmu-act-sent, 0x01),

Surely we don't need "pmu" in the event names?

[...]

> static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
> int idx)
> {
> return (u64)readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
> }

I don't think the cast is necessary. Please remove it.

> static inline void
> +xgene_pmu_write_counter64(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
> +{
> + u32 cnt_lo, cnt_hi;
> +
> + cnt_lo = val & 0xFFFFFFFF;
> + cnt_hi = val >> 32;

cnt_hi = upper_32_bits(val);
cnt_lo = lower_32_bits(val);

(both are in <linux/kernel.h>)

> +
> + /* v3 has 64-bit counter registers composed by 2 32-bit registers */
> + xgene_pmu_write_counter32(pmu_dev, 2 * idx, val);
> + xgene_pmu_write_counter32(pmu_dev, 2 * idx + 1, val >> 32);

Please use cnt_hi and cnt_lo for the writes.

[...]

> @@ -621,7 +989,7 @@ static void xgene_perf_event_set_period(struct perf_event *event)
> struct xgene_pmu *xgene_pmu = pmu_dev->parent;
> struct hw_perf_event *hw = &event->hw;
> /*
> - * The X-Gene PMU counters have a period of 2^32. To account for the
> + * The X-Gene PMU counters have a period of 2^32 or more. To account for the
> * possiblity of extreme interrupt latency we program for a period of
> * half that. Hopefully we can handle the interrupt before another 2^31
> * events occur and the counter overtakes its previous value.

This comment is now a little out-of-date, as we don't start 64-bit
counters at half their period.

I guess we don't expect a 64-bit counter to overflow, so can we state
that in the comment?

[...]

> -static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
> +static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
> struct platform_device *pdev)
> {
> void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
> struct resource *res;
> unsigned int reg;
> + u32 mcb0routing;
> + u32 mcb1routing;
>
> res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> csw_csr = devm_ioremap_resource(&pdev->dev, res);
> @@ -899,41 +1309,72 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
> return PTR_ERR(csw_csr);
> }
>
> - res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
> - mcba_csr = devm_ioremap_resource(&pdev->dev, res);
> - if (IS_ERR(mcba_csr)) {
> - dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n");
> - return PTR_ERR(mcba_csr);
> - }
> -
> - res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
> - mcbb_csr = devm_ioremap_resource(&pdev->dev, res);
> - if (IS_ERR(mcbb_csr)) {
> - dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n");
> - return PTR_ERR(mcbb_csr);
> - }
> -
> reg = readl(csw_csr + CSW_CSWCR);
> - if (reg & CSW_CSWCR_DUALMCB_MASK) {
> - /* Dual MCB active */
> - xgene_pmu->mcb_active_mask = 0x3;
> - /* Probe all active MC(s) */
> - reg = readl(mcbb_csr + CSW_CSWCR);
> - xgene_pmu->mc_active_mask =
> - (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
> + if (xgene_pmu->version == PCP_PMU_V3) {
> + mcb0routing = CSW_CSWCR_MCB0_ROUTING(reg);
> + mcb1routing = CSW_CSWCR_MCB0_ROUTING(reg);
> + if (reg & CSW_CSWCR_DUALMCB_MASK) {
> + /* Dual MCB active */
> + xgene_pmu->mcb_active_mask = 0x3;
> + /* Probe all active L3C(s), maximum is 8 */
> + xgene_pmu->l3c_active_mask = 0xFF;
> + /* Probe all active MC(s), maximum is 8 */
> + if ((mcb0routing == 0x2) && (mcb1routing == 0x2))
> + xgene_pmu->mc_active_mask = 0xFF;
> + else if ((mcb0routing == 0x1) && (mcb1routing == 0x1))
> + xgene_pmu->mc_active_mask = 0x33;
> + else
> + xgene_pmu->mc_active_mask = 0x11;
> +
> + } else {
> + /* Single MCB active */
> + xgene_pmu->mcb_active_mask = 0x1;
> + /* Probe all active L3C(s), maximum is 4 */
> + xgene_pmu->l3c_active_mask = 0x0F;
> + /* Probe all active MC(s), maximum is 4 */
> + if ((mcb0routing == 0x2) && (mcb1routing == 0x0))
> + xgene_pmu->mc_active_mask = 0x0F;
> + else if ((mcb0routing == 0x1) && (mcb1routing == 0x0))
> + xgene_pmu->mc_active_mask = 0x03;
> + else
> + xgene_pmu->mc_active_mask = 0x01;
> + }
> } else {
> - /* Single MCB active */
> - xgene_pmu->mcb_active_mask = 0x1;
> - /* Probe all active MC(s) */
> - reg = readl(mcba_csr + CSW_CSWCR);
> - xgene_pmu->mc_active_mask =
> - (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
> - }
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
> + mcba_csr = devm_ioremap_resource(&pdev->dev, res);
> + if (IS_ERR(mcba_csr)) {
> + dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n");
> + return PTR_ERR(mcba_csr);
> + }
> +
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
> + mcbb_csr = devm_ioremap_resource(&pdev->dev, res);
> + if (IS_ERR(mcbb_csr)) {
> + dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n");
> + return PTR_ERR(mcbb_csr);
> + }
>
> + xgene_pmu->l3c_active_mask = 0x1;
> + if (reg & CSW_CSWCR_DUALMCB_MASK) {
> + /* Dual MCB active */
> + xgene_pmu->mcb_active_mask = 0x3;
> + /* Probe all active MC(s) */
> + reg = readl(mcbb_csr + CSW_CSWCR);
> + xgene_pmu->mc_active_mask =
> + (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
> + } else {
> + /* Single MCB active */
> + xgene_pmu->mcb_active_mask = 0x1;
> + /* Probe all active MC(s) */
> + reg = readl(mcba_csr + CSW_CSWCR);
> + xgene_pmu->mc_active_mask =
> + (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
> + }
> + }
> return 0;
> }

Please split these into separate functions. e.g. have
acpi_pmu_probe_active_mcb_mcu_v1_2() and
acpi_pmu_probe_active_mcb_mcu_v3().

> static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
> @@ -997,6 +1439,8 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
> return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id);
> case PMU_TYPE_IOB:
> return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id);
> + case PMU_TYPE_IOB_SLOW:
> + return devm_kasprintf(dev, GFP_KERNEL, "iob-slow%d", id);

What is IOB slow? How does it relate to the existing IOB?

Thanks,
Mark.

2017-06-02 16:54:35

by Hoan Tran

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] perf: xgene: Parse PMU subnode from the match table

Hi Mark

On Fri, Jun 2, 2017 at 7:59 AM, Mark Rutland <[email protected]> wrote:
> Hi Hoan,
>
> Apologies for the last reply.
>
> On Mon, Apr 03, 2017 at 09:47:55AM -0700, Hoan Tran wrote:
>> +static const struct acpi_device_id xgene_pmu_acpi_type_match[] = {
>> + {"APMC0D5D", PMU_TYPE_L3C},
>> + {"APMC0D5E", PMU_TYPE_IOB},
>> + {"APMC0D5F", PMU_TYPE_MCB},
>> + {"APMC0D60", PMU_TYPE_MC},
>> + {},
>> +};
>> +
>> +static const struct acpi_device_id *xgene_pmu_acpi_match_type(
>> + const struct acpi_device_id *ids,
>> + struct acpi_device *adev)
>> +{
>> + const struct acpi_device_id *match_id = NULL;
>> + const struct acpi_device_id *id;
>> +
>> + for (id = ids; id->id[0] || id->cls; id++) {
>> + if (!acpi_match_device_ids(adev, id))
>> + match_id = id;
>> + else if (match_id)
>> + break;
>> + }
>> +
>> + return match_id;
>> +}
>
> I don't believe this look is necessary. AFAICT, acpi_match_device_ids()
> already iterates over the id table it is given.

The acpi_match_device_ids() function just returns if a device ID is
available on the given list. It does not return the first matching ID.
That's the reason I created this function to find the first matching ID.

>
>> +
>> static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
>> void *data, void **return_value)
>> {
>> + const struct acpi_device_id *acpi_id;
>> struct xgene_pmu *xgene_pmu = data;
>> struct xgene_pmu_dev_ctx *ctx;
>> struct acpi_device *adev;
>> @@ -1059,17 +1085,11 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
>> if (acpi_bus_get_status(adev) || !adev->status.present)
>> return AE_OK;
>>
>> - if (!strcmp(acpi_device_hid(adev), "APMC0D5D"))
>> - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_L3C);
>> - else if (!strcmp(acpi_device_hid(adev), "APMC0D5E"))
>> - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_IOB);
>> - else if (!strcmp(acpi_device_hid(adev), "APMC0D5F"))
>> - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MCB);
>> - else if (!strcmp(acpi_device_hid(adev), "APMC0D60"))
>> - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MC);
>> - else
>> - ctx = NULL;
>> + acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
>> + if (!acpi_id)
>> + return AE_OK;
>
> As above, and as I covered in my reply to v1, I think the above should
> be:
>
> acpi_id = acpi_match_device_ids(adev, xgene_pmu_acpi_type_match);
> if (!acpi_id)
> return AE_OK;
>
> ... or am I missing something?

The same above.

Thanks
Hoan

>
> With that change:
>
> Acked-by: Mark Rutland <[email protected]>
>
> Thanks,
> Mark.
>
>>
>> + ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, (u32)acpi_id->driver_data);
>> if (!ctx)
>> return AE_OK;
>>
>> --
>> 1.9.1
>>

2017-06-02 17:24:40

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] perf: xgene: Parse PMU subnode from the match table

On Fri, Jun 02, 2017 at 09:54:32AM -0700, Hoan Tran wrote:
> On Fri, Jun 2, 2017 at 7:59 AM, Mark Rutland <[email protected]> wrote:
> > On Mon, Apr 03, 2017 at 09:47:55AM -0700, Hoan Tran wrote:
> >> +static const struct acpi_device_id *xgene_pmu_acpi_match_type(
> >> + const struct acpi_device_id *ids,
> >> + struct acpi_device *adev)
> >> +{
> >> + const struct acpi_device_id *match_id = NULL;
> >> + const struct acpi_device_id *id;
> >> +
> >> + for (id = ids; id->id[0] || id->cls; id++) {
> >> + if (!acpi_match_device_ids(adev, id))
> >> + match_id = id;
> >> + else if (match_id)
> >> + break;
> >> + }
> >> +
> >> + return match_id;
> >> +}
> >
> > I don't believe this look is necessary. AFAICT, acpi_match_device_ids()
> > already iterates over the id table it is given.
>
> The acpi_match_device_ids() function just returns if a device ID is
> available on the given list. It does not return the first matching ID.
> That's the reason I created this function to find the first matching ID.

Ah, I see. Thanks for correcting me!

Can we use acpi_match_device(ids, &adev->dev), or is that the wrong dev?

Thanks,
Mark.

2017-06-02 20:41:27

by Hoan Tran

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] perf: xgene: Parse PMU subnode from the match table

Hi Mark,

On Fri, Jun 2, 2017 at 10:23 AM, Mark Rutland <[email protected]> wrote:
> On Fri, Jun 02, 2017 at 09:54:32AM -0700, Hoan Tran wrote:
>> On Fri, Jun 2, 2017 at 7:59 AM, Mark Rutland <[email protected]> wrote:
>> > On Mon, Apr 03, 2017 at 09:47:55AM -0700, Hoan Tran wrote:
>> >> +static const struct acpi_device_id *xgene_pmu_acpi_match_type(
>> >> + const struct acpi_device_id *ids,
>> >> + struct acpi_device *adev)
>> >> +{
>> >> + const struct acpi_device_id *match_id = NULL;
>> >> + const struct acpi_device_id *id;
>> >> +
>> >> + for (id = ids; id->id[0] || id->cls; id++) {
>> >> + if (!acpi_match_device_ids(adev, id))
>> >> + match_id = id;
>> >> + else if (match_id)
>> >> + break;
>> >> + }
>> >> +
>> >> + return match_id;
>> >> +}
>> >
>> > I don't believe this look is necessary. AFAICT, acpi_match_device_ids()
>> > already iterates over the id table it is given.
>>
>> The acpi_match_device_ids() function just returns if a device ID is
>> available on the given list. It does not return the first matching ID.
>> That's the reason I created this function to find the first matching ID.
>
> Ah, I see. Thanks for correcting me!
>
> Can we use acpi_match_device(ids, &adev->dev), or is that the wrong dev?

They are subnode device, so they don't have full dev. Because of that,
acpi_match_device doesn't work.

Thanks
Hoan

>
> Thanks,
> Mark.

2017-06-02 21:03:01

by Hoan Tran

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] perf: xgene: Add support for SoC PMU version 3

Hi Mark,

On Fri, Jun 2, 2017 at 9:04 AM, Mark Rutland <[email protected]> wrote:
> Hi Hoan,
>
> Apologies for the delay in getting to this.
>
> On Mon, Apr 03, 2017 at 09:47:57AM -0700, Hoan Tran wrote:
>> This patch adds support for SoC-wide (AKA uncore) Performance Monitoring
>> Unit version 3.
>>
>> It can support up to
>> - 2 IOB PMU instances
>> - 8 L3C PMU instances
>> - 2 MCB PMU instances
>> - 8 MCU PMU instances
>
> Please elaborate on the differences compared with prior versions.
>
> I see that counters are 64-bit. Are there any other important details to
> be aware of?

Yes, let me add 64 bit counter into the patch description. Beside of
that, I don't think anything else besides more PMU instances compared
with the previous version.

>
> [...]
>
>> +static struct attribute *l3c_pmu_v3_events_attrs[] = {
>
>> + XGENE_PMU_EVENT_ATTR(read-hit, 0x01),
>> + XGENE_PMU_EVENT_ATTR(read-miss, 0x02),
>
>> + XGENE_PMU_EVENT_ATTR(reads, 0x08),
>> + XGENE_PMU_EVENT_ATTR(writes, 0x09),
>
>> +};
>
> Some of these are singular (e.g. "read-hit", "read-miss"), while others
> are plural (e.g. "reads", "writes").
>
> The existing driver use the singular form. Please remain consistent with
> that. e.g. turn "reads" into "read".

Will fix it.

>
>> + XGENE_PMU_EVENT_ATTR(PA-req-buf-alloc-all, 0x01),
>
> Likewise, please consistently use lower case.

Yes

>
>> + XGENE_PMU_EVENT_ATTR(pmu-act-sent, 0x01),
>
> Surely we don't need "pmu" in the event names?

Yes, will remove "pmu".

>
> [...]
>
>> static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
>> int idx)
>> {
>> return (u64)readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
>> }
>
> I don't think the cast is necessary. Please remove it.
>
>> static inline void
>> +xgene_pmu_write_counter64(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
>> +{
>> + u32 cnt_lo, cnt_hi;
>> +
>> + cnt_lo = val & 0xFFFFFFFF;
>> + cnt_hi = val >> 32;
>
> cnt_hi = upper_32_bits(val);
> cnt_lo = lower_32_bits(val);
>
> (both are in <linux/kernel.h>)
>
>> +
>> + /* v3 has 64-bit counter registers composed by 2 32-bit registers */
>> + xgene_pmu_write_counter32(pmu_dev, 2 * idx, val);
>> + xgene_pmu_write_counter32(pmu_dev, 2 * idx + 1, val >> 32);
>
> Please use cnt_hi and cnt_lo for the writes.
>
> [...]
>
>> @@ -621,7 +989,7 @@ static void xgene_perf_event_set_period(struct perf_event *event)
>> struct xgene_pmu *xgene_pmu = pmu_dev->parent;
>> struct hw_perf_event *hw = &event->hw;
>> /*
>> - * The X-Gene PMU counters have a period of 2^32. To account for the
>> + * The X-Gene PMU counters have a period of 2^32 or more. To account for the
>> * possiblity of extreme interrupt latency we program for a period of
>> * half that. Hopefully we can handle the interrupt before another 2^31
>> * events occur and the counter overtakes its previous value.
>
> This comment is now a little out-of-date, as we don't start 64-bit
> counters at half their period.
>
> I guess we don't expect a 64-bit counter to overflow, so can we state
> that in the comment?

How about this one.

/*
* For 32 bit counter, it has a period of 2^32. To account for the
* possibility of extreme interrupt latency we program for a period of
* half that. Hopefully, we can handle the interrupt before another 2^31
* events occur and the counter overtakes its previous value.
* For 64 bit counter, we don't expect it overflow.
*/

>
> [...]
>
>> -static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
>> +static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
>> struct platform_device *pdev)
>> {
>> void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
>> struct resource *res;
>> unsigned int reg;
>> + u32 mcb0routing;
>> + u32 mcb1routing;
>>
>> res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
>> csw_csr = devm_ioremap_resource(&pdev->dev, res);
>> @@ -899,41 +1309,72 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
>> return PTR_ERR(csw_csr);
>> }
>>
>> - res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
>> - mcba_csr = devm_ioremap_resource(&pdev->dev, res);
>> - if (IS_ERR(mcba_csr)) {
>> - dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n");
>> - return PTR_ERR(mcba_csr);
>> - }
>> -
>> - res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
>> - mcbb_csr = devm_ioremap_resource(&pdev->dev, res);
>> - if (IS_ERR(mcbb_csr)) {
>> - dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n");
>> - return PTR_ERR(mcbb_csr);
>> - }
>> -
>> reg = readl(csw_csr + CSW_CSWCR);
>> - if (reg & CSW_CSWCR_DUALMCB_MASK) {
>> - /* Dual MCB active */
>> - xgene_pmu->mcb_active_mask = 0x3;
>> - /* Probe all active MC(s) */
>> - reg = readl(mcbb_csr + CSW_CSWCR);
>> - xgene_pmu->mc_active_mask =
>> - (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
>> + if (xgene_pmu->version == PCP_PMU_V3) {
>> + mcb0routing = CSW_CSWCR_MCB0_ROUTING(reg);
>> + mcb1routing = CSW_CSWCR_MCB0_ROUTING(reg);
>> + if (reg & CSW_CSWCR_DUALMCB_MASK) {
>> + /* Dual MCB active */
>> + xgene_pmu->mcb_active_mask = 0x3;
>> + /* Probe all active L3C(s), maximum is 8 */
>> + xgene_pmu->l3c_active_mask = 0xFF;
>> + /* Probe all active MC(s), maximum is 8 */
>> + if ((mcb0routing == 0x2) && (mcb1routing == 0x2))
>> + xgene_pmu->mc_active_mask = 0xFF;
>> + else if ((mcb0routing == 0x1) && (mcb1routing == 0x1))
>> + xgene_pmu->mc_active_mask = 0x33;
>> + else
>> + xgene_pmu->mc_active_mask = 0x11;
>> +
>> + } else {
>> + /* Single MCB active */
>> + xgene_pmu->mcb_active_mask = 0x1;
>> + /* Probe all active L3C(s), maximum is 4 */
>> + xgene_pmu->l3c_active_mask = 0x0F;
>> + /* Probe all active MC(s), maximum is 4 */
>> + if ((mcb0routing == 0x2) && (mcb1routing == 0x0))
>> + xgene_pmu->mc_active_mask = 0x0F;
>> + else if ((mcb0routing == 0x1) && (mcb1routing == 0x0))
>> + xgene_pmu->mc_active_mask = 0x03;
>> + else
>> + xgene_pmu->mc_active_mask = 0x01;
>> + }
>> } else {
>> - /* Single MCB active */
>> - xgene_pmu->mcb_active_mask = 0x1;
>> - /* Probe all active MC(s) */
>> - reg = readl(mcba_csr + CSW_CSWCR);
>> - xgene_pmu->mc_active_mask =
>> - (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
>> - }
>> + res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
>> + mcba_csr = devm_ioremap_resource(&pdev->dev, res);
>> + if (IS_ERR(mcba_csr)) {
>> + dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n");
>> + return PTR_ERR(mcba_csr);
>> + }
>> +
>> + res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
>> + mcbb_csr = devm_ioremap_resource(&pdev->dev, res);
>> + if (IS_ERR(mcbb_csr)) {
>> + dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n");
>> + return PTR_ERR(mcbb_csr);
>> + }
>>
>> + xgene_pmu->l3c_active_mask = 0x1;
>> + if (reg & CSW_CSWCR_DUALMCB_MASK) {
>> + /* Dual MCB active */
>> + xgene_pmu->mcb_active_mask = 0x3;
>> + /* Probe all active MC(s) */
>> + reg = readl(mcbb_csr + CSW_CSWCR);
>> + xgene_pmu->mc_active_mask =
>> + (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
>> + } else {
>> + /* Single MCB active */
>> + xgene_pmu->mcb_active_mask = 0x1;
>> + /* Probe all active MC(s) */
>> + reg = readl(mcba_csr + CSW_CSWCR);
>> + xgene_pmu->mc_active_mask =
>> + (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
>> + }
>> + }
>> return 0;
>> }
>
> Please split these into separate functions. e.g. have
> acpi_pmu_probe_active_mcb_mcu_v1_2() and
> acpi_pmu_probe_active_mcb_mcu_v3().
>

Yes, I can do it.

>> static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
>> @@ -997,6 +1439,8 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
>> return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id);
>> case PMU_TYPE_IOB:
>> return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id);
>> + case PMU_TYPE_IOB_SLOW:
>> + return devm_kasprintf(dev, GFP_KERNEL, "iob-slow%d", id);
>
> What is IOB slow? How does it relate to the existing IOB?

IOB SLOW PMU is another PMU, it's not related to IOB PMU. They are
sitting in 2 different clock domains.

Thanks
Hoan

>
> Thanks,
> Mark.