2017-07-03 09:40:03

by Anju T Sudhakar

[permalink] [raw]
Subject: [PATCH v12 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support

Adds cpumask attribute to be used by each IMC pmu. Only one cpu (any
online CPU) from each chip for nest PMUs is designated to read counters.

On CPU hotplug, dying CPU is checked to see whether it is one of the
designated cpus, if yes, next online cpu from the same chip (for nest
units) is designated as new cpu to read counters. For this purpose, we
introduce a new state : CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE.

Signed-off-by: Anju T Sudhakar <[email protected]>
Signed-off-by: Hemant Kumar <[email protected]>
Signed-off-by: Madhavan Srinivasan <[email protected]>
---
arch/powerpc/include/asm/imc-pmu.h | 11 +
arch/powerpc/include/asm/opal-api.h | 10 +-
arch/powerpc/include/asm/opal.h | 4 +
arch/powerpc/perf/imc-pmu.c | 280 ++++++++++++++++++++++++-
arch/powerpc/platforms/powernv/opal-imc.c | 21 +-
arch/powerpc/platforms/powernv/opal-wrappers.S | 3 +
include/linux/cpuhotplug.h | 1 +
7 files changed, 324 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 25d0c57d14fe..aeed903b2a79 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -24,6 +24,7 @@
* For static allocation of some of the structures.
*/
#define IMC_MAX_PMUS 32
+#define IMC_MAX_CHIPS 32

/*
* This macro is used for memory buffer allocation of
@@ -94,6 +95,16 @@ struct imc_pmu {
const struct attribute_group *attr_groups[4];
};

+/*
+ * Structure to hold id, lock and reference count for the imc events which
+ * are inited.
+ */
+struct imc_pmu_ref {
+ unsigned int id;
+ struct mutex lock;
+ int refc;
+};
+
/* In-Memory Collection Counters Type */
enum {
IMC_COUNTER_PER_CHIP = 0x10,
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index cb3e6242a78c..fdacb030cd77 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -190,7 +190,10 @@
#define OPAL_NPU_INIT_CONTEXT 146
#define OPAL_NPU_DESTROY_CONTEXT 147
#define OPAL_NPU_MAP_LPAR 148
-#define OPAL_LAST 148
+#define OPAL_IMC_COUNTERS_INIT 149
+#define OPAL_IMC_COUNTERS_START 150
+#define OPAL_IMC_COUNTERS_STOP 151
+#define OPAL_LAST 151

/* Device tree flags */

@@ -1003,6 +1006,11 @@ enum {
XIVE_DUMP_EMU_STATE = 5,
};

+/* Argument to OPAL_IMC_COUNTERS_* */
+enum {
+ OPAL_IMC_COUNTERS_NEST = 1,
+};
+
#endif /* __ASSEMBLY__ */

#endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 588fb1c23af9..48842d2d465c 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -268,6 +268,10 @@ int64_t opal_xive_free_irq(uint32_t girq);
int64_t opal_xive_sync(uint32_t type, uint32_t id);
int64_t opal_xive_dump(uint32_t type, uint32_t id);

+int64_t opal_imc_counters_init(uint32_t type, uint64_t address, uint64_t cpu);
+int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir);
+int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir);
+
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
int depth, void *data);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 4e2f837b8bb7..ca9662bea7d6 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -20,6 +20,16 @@

/* Needed for sanity check */
struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static cpumask_t nest_imc_cpumask;
+static int nest_imc_cpumask_initialized;
+static int nest_pmus;
+/*
+ * Used to avoid races in counting the nest-pmu units during hotplug
+ * register and unregister
+ */
+static DEFINE_MUTEX(imc_nest_inited_reserve);
+
+struct imc_pmu_ref *nest_imc_refc;

struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
{
@@ -43,12 +53,183 @@ static struct attribute_group imc_format_group = {
.attrs = nest_imc_format_attrs,
};

+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ cpumask_t *active_mask;
+
+ active_mask = &nest_imc_cpumask;
+ return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *imc_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group imc_pmu_cpumask_attr_group = {
+ .attrs = imc_pmu_cpumask_attrs,
+};
+
+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+ struct imc_pmu **pn = per_nest_pmu_arr;
+ int i;
+
+ if (old_cpu < 0 || new_cpu < 0)
+ return;
+
+ for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++)
+ perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
+}
+
+/* get_nest_pmu_ref: Return the imc_pmu_ref struct for the given node */
+static struct imc_pmu_ref *get_nest_pmu_ref(unsigned int node_id)
+{
+ int nid, i = 0;
+
+ if (!nest_imc_refc)
+ return NULL;
+
+ for_each_online_node(nid) {
+ if (nest_imc_refc[i].id == node_id)
+ return &nest_imc_refc[i];
+ i++;
+ }
+ return NULL;
+}
+
+static int ppc_nest_imc_cpu_offline(unsigned int cpu)
+{
+ int nid, target = -1;
+ const struct cpumask *l_cpumask;
+ struct imc_pmu_ref *ref;
+
+ /*
+ * Check in the designated list for this cpu. Dont bother
+ * if not one of them.
+ */
+ if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
+ return 0;
+
+ /*
+ * Now that this cpu is one of the designated,
+ * find a next cpu a) which is online and b) in same chip.
+ */
+ nid = cpu_to_node(cpu);
+ l_cpumask = cpumask_of_node(nid);
+ target = cpumask_any_but(l_cpumask, cpu);
+
+ /*
+ * Update the cpumask with the target cpu and
+ * migrate the context if needed
+ */
+ if (target >= 0 && target < nr_cpu_ids) {
+ cpumask_set_cpu(target, &nest_imc_cpumask);
+ nest_change_cpu_context(cpu, target);
+ } else {
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(cpu));
+ /*
+ * If this is the last cpu in this chip then, skip the lock and
+ * make the reference count on this chip zero.
+ */
+ ref = get_nest_pmu_ref(nid);
+ if (!ref)
+ return -EINVAL;
+
+ ref->refc = 0;
+ }
+ return 0;
+}
+
+static int ppc_nest_imc_cpu_online(unsigned int cpu)
+{
+ const struct cpumask *l_cpumask;
+ static struct cpumask tmp_mask;
+ int res;
+
+ /* Get the cpumask of this node */
+ l_cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+ /*
+ * If this is not the first online CPU on this node, then
+ * just return.
+ */
+ if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
+ return 0;
+
+ /*
+ * If this is the first online cpu on this node
+ * disable the nest counters by making an OPAL call.
+ */
+ res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(cpu));
+ if (res)
+ return res;
+
+ /* Make this CPU the designated target for counter collection */
+ cpumask_set_cpu(cpu, &nest_imc_cpumask);
+ return 0;
+}
+
+static int nest_pmu_cpumask_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
+ "perf/powerpc/imc:online",
+ ppc_nest_imc_cpu_online,
+ ppc_nest_imc_cpu_offline);
+}
+
+static void nest_imc_counters_release(struct perf_event *event)
+{
+ int rc, node_id;
+ struct imc_pmu_ref *ref;
+
+ if (event->cpu < 0)
+ return;
+
+ node_id = cpu_to_node(event->cpu);
+
+ /*
+ * See if we need to disable the nest PMU.
+ * If no events are currently in use, then we have to take a
+ * mutex to ensure that we don't race with another task doing
+ * enable or disable the nest counters.
+ */
+ ref = get_nest_pmu_ref(node_id);
+ if (!ref)
+ return;
+
+ /* Take the mutex lock for this node and then decrement the reference count */
+ mutex_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ mutex_unlock(&nest_imc_refc[node_id].lock);
+ pr_err("IMC: Unable to stop the counters for core %d\n", node_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ WARN(1, "nest-imc: Invalid event reference count\n");
+ ref->refc = 0;
+ }
+ mutex_unlock(&ref->lock);
+}
+
static int nest_imc_event_init(struct perf_event *event)
{
- int chip_id;
+ int chip_id, rc, node_id;
u32 l_config, config = event->attr.config;
struct imc_mem_info *pcni;
struct imc_pmu *pmu;
+ struct imc_pmu_ref *ref;
bool flag = false;

if (event->attr.type != event->pmu->type)
@@ -102,6 +283,31 @@ static int nest_imc_event_init(struct perf_event *event)
l_config = config & IMC_EVENT_OFFSET_MASK;
event->hw.event_base = (u64)pcni->vbase[l_config/PAGE_SIZE] +
(l_config & ~PAGE_MASK);
+ node_id = cpu_to_node(event->cpu);
+
+ /*
+ * Get the imc_pmu_ref struct for this node.
+ * Take the mutex lock and then increment the count of nest pmu events
+ * inited.
+ */
+ ref = get_nest_pmu_ref(node_id);
+ if (!ref)
+ return -EINVAL;
+
+ mutex_lock(&ref->lock);
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ mutex_unlock(&nest_imc_refc[node_id].lock);
+ pr_err("IMC: Unable to start the counters for node %d\n", node_id);
+ return rc;
+ }
+ }
+ ++ref->refc;
+ mutex_unlock(&ref->lock);
+
+ event->destroy = nest_imc_counters_release;
return 0;
}

@@ -179,6 +385,7 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->pmu.start = imc_event_start;
pmu->pmu.stop = imc_event_stop;
pmu->pmu.read = imc_perf_event_update;
+ pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
pmu->pmu.attr_groups = pmu->attr_groups;

@@ -242,18 +449,71 @@ static int update_events_in_group(struct imc_events *events,
return 0;
}

+/* init_nest_pmu_ref: Initialize the imc_pmu_ref struct for all the nodes */
+static int init_nest_pmu_ref(void)
+{
+ int nid, i = 0;
+
+ nest_imc_refc = kzalloc((sizeof(struct imc_pmu_ref) *
+ IMC_MAX_CHIPS), GFP_KERNEL);
+
+ if (!nest_imc_refc)
+ return -ENOMEM;
+
+ for_each_online_node(nid) {
+ nest_imc_refc[i].id = nid;
+ /*
+ * Mutex lock to avoid races while tracking the number of
+ * sessions using the chip's nest pmu units.
+ */
+ mutex_init(&nest_imc_refc[i].lock);
+ i++;
+ }
+ return 0;
+}
+
/*
* init_imc_pmu : Setup and register the IMC pmu device.
*
* @events: events memory for this pmu.
* @idx: number of event entries created.
* @pmu_ptr: memory allocated for this pmu.
+ *
+ * init_imc_pmu() setup the cpu mask information for these pmus and setup
+ * the state machine hotplug notifiers as well.
*/
int init_imc_pmu(struct imc_events *events, int idx,
struct imc_pmu *pmu_ptr)
{
int ret;

+ /*
+ * Register for cpu hotplug notification.
+ *
+ * Nest imc pmu need only one cpu per chip, we initialize the cpumask
+ * for the first nest imc pmu and use the same for the rest.
+ * To handle the cpuhotplug callback unregister, we track the number of
+ * nest pmus in "nest_pmus".
+ * "nest_imc_cpumask_initialized" is set to zero during cpuhotplug
+ * callback unregister.
+ */
+ mutex_lock(&imc_nest_inited_reserve);
+ if (nest_pmus == 0) {
+ ret = init_nest_pmu_ref();
+ if (ret) {
+ mutex_unlock(&imc_nest_inited_reserve);
+ goto err_free;
+ }
+ ret = nest_pmu_cpumask_init();
+ if (ret) {
+ mutex_unlock(&imc_nest_inited_reserve);
+ goto err_free;
+ }
+ nest_imc_cpumask_initialized = 1;
+ }
+ nest_pmus++;
+ mutex_unlock(&imc_nest_inited_reserve);
+
ret = update_events_in_group(events, idx, pmu_ptr);
if (ret)
goto err_free;
@@ -278,6 +538,22 @@ int init_imc_pmu(struct imc_events *events, int idx,
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
}
-
+ if (pmu_ptr->domain == IMC_DOMAIN_NEST) {
+ /*
+ * If no nest pmu units are registered, then obtain the mutex
+ * lock and unregister the hotplug callback.
+ */
+ mutex_lock(&imc_nest_inited_reserve);
+ --nest_pmus;
+ if (nest_pmus <= 0) {
+ if (nest_imc_cpumask_initialized == 1) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
+ nest_imc_cpumask_initialized = 0;
+ }
+ kfree(nest_imc_refc);
+ nest_pmus = 0;
+ }
+ mutex_unlock(&imc_nest_inited_reserve);
+ }
return ret;
}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index a68d66d1ddb1..406f7c10850a 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -467,6 +467,19 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
return ret;
}

+static void disable_nest_pmu_counters(void)
+{
+ int nid, cpu;
+ struct cpumask *l_cpumask;
+
+ for_each_online_node(nid) {
+ l_cpumask = cpumask_of_node(nid);
+ cpu = cpumask_first(l_cpumask);
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(cpu));
+ }
+}
+
static int opal_imc_counters_probe(struct platform_device *pdev)
{
struct device_node *imc_dev = NULL;
@@ -477,11 +490,13 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
return -ENODEV;

/*
- * Check whether this is kdump kernel. If yes, just return.
+ * Check whether this is kdump kernel. If yes, force the engines to
+ * stop and return.
*/
- if (is_kdump_kernel())
+ if (is_kdump_kernel()) {
+ disable_nest_pmu_counters();
return -ENODEV;
-
+ }
imc_dev = pdev->dev.of_node;
if (!imc_dev)
return -ENODEV;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index f620572f891f..1828b24fbb53 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -310,3 +310,6 @@ OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
+OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
+OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
+OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0f2a80377520..dca7f2b07f93 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -139,6 +139,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_L2X0_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
+ CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
CPUHP_AP_ONLINE_DYN,
--
2.11.0


2017-07-03 09:40:38

by Anju T Sudhakar

[permalink] [raw]
Subject: [PATCH v12 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging

From: Madhavan Srinivasan <[email protected]>

Code to add PMU function to initialize a core IMC event. It also
adds cpumask initialization function for core IMC PMU. For
initialization, memory is allocated per core where the data
for core IMC counters will be accumulated. The base address for this
page is sent to OPAL via an OPAL call which initializes various SCOMs
related to Core IMC initialization. Upon any errors, the pages are
free'ed and core IMC counters are disabled using the same OPAL call.

For CPU hotplugging, a cpumask is initialized which contains an online
CPU from each core. If a cpu goes offline, we check whether that cpu
belongs to the core imc cpumask, if yes, then, we migrate the PMU
context to any other online cpu (if available) in that core. If a cpu
comes back online, then this cpu will be added to the core imc cpumask
only if there was no other cpu from that core in the previous cpumask.

To register the hotplug functions for core_imc, a new state
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE is added to the list of existing
states.

Patch also adds OPAL device shutdown callback. Needed to disable the
IMC core engine to handle kexec.

Signed-off-by: Hemant Kumar <[email protected]>
Signed-off-by: Anju T Sudhakar <[email protected]>
Signed-off-by: Madhavan Srinivasan <[email protected]>
---
arch/powerpc/include/asm/opal-api.h | 1 +
arch/powerpc/perf/imc-pmu.c | 371 +++++++++++++++++++++++++++---
arch/powerpc/platforms/powernv/opal-imc.c | 25 ++
include/linux/cpuhotplug.h | 1 +
4 files changed, 371 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index fdacb030cd77..0d83427b7467 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1009,6 +1009,7 @@ enum {
/* Argument to OPAL_IMC_COUNTERS_* */
enum {
OPAL_IMC_COUNTERS_NEST = 1,
+ OPAL_IMC_COUNTERS_CORE = 2,
};

#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 041d3097d42a..c1a275ed2510 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1,5 +1,5 @@
/*
- * Nest Performance Monitor counter support.
+ * IMC Performance Monitor counter support.
*
* Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
* (C) 2017 Anju T Sudhakar, IBM Corporation.
@@ -21,6 +21,7 @@
/* Needed for sanity check */
struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
static cpumask_t nest_imc_cpumask;
+static cpumask_t core_imc_cpumask;
static int nest_imc_cpumask_initialized;
static int nest_pmus;
/*
@@ -30,7 +31,7 @@ static int nest_pmus;
static DEFINE_MUTEX(imc_nest_inited_reserve);

struct imc_pmu_ref *nest_imc_refc;
-
+struct imc_pmu_ref *core_imc_refc;
struct imc_pmu *core_imc_pmu;

struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
@@ -55,14 +56,32 @@ static struct attribute_group imc_format_group = {
.attrs = nest_imc_format_attrs,
};

+static struct attribute *core_imc_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_offset.attr,
+ &format_attr_rvalue.attr,
+ NULL,
+};
+
+static struct attribute_group core_imc_format_group = {
+ .name = "format",
+ .attrs = core_imc_format_attrs,
+};
+
/* Get the cpumask printed to a buffer "buf" */
static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+ struct pmu *pmu = dev_get_drvdata(dev);
cpumask_t *active_mask;

- active_mask = &nest_imc_cpumask;
+ if (!strncmp(pmu->name, "nest_", strlen("nest_")))
+ active_mask = &nest_imc_cpumask;
+ else if (!strncmp(pmu->name, "core_", strlen("core_")))
+ active_mask = &core_imc_cpumask;
+ else
+ return 0;
return cpumap_print_to_pagebuf(true, buf, active_mask);
}

@@ -313,6 +332,242 @@ static int nest_imc_event_init(struct perf_event *event)
return 0;
}

+/*
+ * core_imc_mem_init : Initializes memory for the current core.
+ *
+ * Uses alloc_pages_node() and uses the returned address as an argument to
+ * an opal call to configure the pdbar. The address sent as an argument is
+ * converted to physical address before the opal call is made. This is the
+ * base address at which the core imc counters are populated.
+ */
+static int core_imc_mem_init(int cpu, int size)
+{
+ int phys_id, rc = 0, core_id = (cpu / threads_per_core);
+ struct imc_mem_info *mem_info;
+
+ /*
+ * alloc_pages_node() will allocate memory for core in the
+ * local node only.
+ */
+ phys_id = topology_physical_package_id(cpu);
+ mem_info = &core_imc_pmu->mem_info[core_id];
+ mem_info->id = core_id;
+
+ /* We need only vbase[0] for core counters */
+ mem_info->vbase[0] = page_address(alloc_pages_node(phys_id,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
+ get_order(size)));
+ if (!mem_info->vbase[0])
+ return -ENOMEM;
+
+ /* Init the mutex */
+ core_imc_refc[core_id].id = core_id;
+ mutex_init(&core_imc_refc[core_id].lock);
+
+ rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
+ (u64)virt_to_phys((void *)mem_info->vbase[0]),
+ get_hard_smp_processor_id(cpu));
+ if (rc) {
+ free_pages((u64)mem_info->vbase[0], get_order(size));
+ mem_info->vbase[0] = NULL;
+ }
+
+ return rc;
+}
+
+static bool is_core_imc_mem_inited(int cpu)
+{
+ struct imc_mem_info *mem_info;
+ int core_id = (cpu / threads_per_core);
+
+ mem_info = &core_imc_pmu->mem_info[core_id];
+ if ((mem_info->id == core_id) && (mem_info->vbase[0] != NULL))
+ return true;
+
+ return false;
+}
+
+static int ppc_core_imc_cpu_online(unsigned int cpu)
+{
+ const struct cpumask *l_cpumask;
+ static struct cpumask tmp_mask;
+ int ret = 0;
+
+ /* Get the cpumask for this core */
+ l_cpumask = cpu_sibling_mask(cpu);
+
+ /* If a cpu for this core is already set, then, don't do anything */
+ if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
+ return 0;
+
+ if (!is_core_imc_mem_inited(cpu)) {
+ ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
+ if (ret) {
+ pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
+ return ret;
+ }
+ } else {
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(cpu));
+ }
+
+ /* set the cpu in the mask, and change the context */
+ cpumask_set_cpu(cpu, &core_imc_cpumask);
+ return 0;
+}
+
+static int ppc_core_imc_cpu_offline(unsigned int cpu)
+{
+ unsigned int ncpu, core_id;
+ struct imc_pmu_ref *ref;
+
+ /*
+ * clear this cpu out of the mask, if not present in the mask,
+ * don't bother doing anything.
+ */
+ if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
+ return 0;
+
+ /* Find any online cpu in that core except the current "cpu" */
+ ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+
+ if (ncpu >= 0 && ncpu < nr_cpu_ids) {
+ cpumask_set_cpu(ncpu, &core_imc_cpumask);
+ perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
+ } else {
+ /*
+ * If this is the last cpu in this core then, skip the lock and
+ * make the reference count for this core zero.
+ */
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(cpu));
+ core_id = cpu / threads_per_core;
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+
+ ref->refc = 0;
+ }
+ return 0;
+}
+
+static int core_imc_pmu_cpumask_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
+ "perf/powerpc/imc_core:online",
+ ppc_core_imc_cpu_online,
+ ppc_core_imc_cpu_offline);
+}
+
+static void core_imc_counters_release(struct perf_event *event)
+{
+ int rc, core_id;
+ struct imc_pmu_ref *ref;
+
+ if (event->cpu < 0)
+ return;
+ /*
+ * See if we need to disable the IMC PMU.
+ * If no events are currently in use, then we have to take a
+ * mutex to ensure that we don't race with another task doing
+ * enable or disable the core counters.
+ */
+ core_id = event->cpu / threads_per_core;
+
+ /* Take the mutex lock and decrement the refernce count for this core */
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return;
+
+ mutex_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ mutex_unlock(&ref->lock);
+ pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ WARN(1, "core-imc: Invalid event reference count\n");
+ ref->refc = 0;
+ }
+ mutex_unlock(&ref->lock);
+}
+
+static int core_imc_event_init(struct perf_event *event)
+{
+ int core_id, rc;
+ u64 config = event->attr.config;
+ struct imc_mem_info *pcmi;
+ struct imc_pmu *pmu;
+ struct imc_pmu_ref *ref;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* Sampling not supported */
+ if (event->hw.sample_period)
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ event->hw.idx = -1;
+ pmu = imc_event_to_pmu(event);
+
+ /* Sanity check for config (event offset and rvalue) */
+ if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size) ||
+ ((config & IMC_EVENT_RVALUE_MASK) != 0))
+ return -EINVAL;
+
+ if (!is_core_imc_mem_inited(event->cpu))
+ return -ENODEV;
+
+ core_id = event->cpu / threads_per_core;
+ pcmi = &pmu->mem_info[core_id];
+ if ((pcmi->id != core_id) || (!pcmi->vbase[0]))
+ return -ENODEV;
+
+ event->hw.event_base = (u64)pcmi->vbase[0] + (config & IMC_EVENT_OFFSET_MASK);
+
+ /*
+ * Core pmu units are enabled only when it is used.
+ * See if this is triggered for the first time.
+ * If yes, take the mutex lock and enable the core counters.
+ * If not, just increment the count in core_imc_refc struct.
+ */
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+
+ mutex_lock(&ref->lock);
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ mutex_unlock(&ref->lock);
+ pr_err("IMC: Unable to start the counters for core %d\n", core_id);
+ return rc;
+ }
+ }
+ ++ref->refc;
+ mutex_unlock(&ref->lock);
+
+ event->destroy = core_imc_counters_release;
+ return 0;
+}
+
static void imc_read_counter(struct perf_event *event)
{
u64 *addr, data;
@@ -381,14 +636,19 @@ static int update_pmu_ops(struct imc_pmu *pmu)
return -EINVAL;

pmu->pmu.task_ctx_nr = perf_invalid_context;
- pmu->pmu.event_init = nest_imc_event_init;
+ if (pmu->domain == IMC_DOMAIN_NEST) {
+ pmu->pmu.event_init = nest_imc_event_init;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
+ } else if (pmu->domain == IMC_DOMAIN_CORE) {
+ pmu->pmu.event_init = core_imc_event_init;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &core_imc_format_group;
+ }
pmu->pmu.add = imc_event_add;
pmu->pmu.del = imc_event_stop;
pmu->pmu.start = imc_event_start;
pmu->pmu.stop = imc_event_stop;
pmu->pmu.read = imc_perf_event_update;
pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
- pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
pmu->pmu.attr_groups = pmu->attr_groups;

return 0;
@@ -474,6 +734,42 @@ static int init_nest_pmu_ref(void)
return 0;
}

+static void cleanup_all_core_imc_memory(struct imc_pmu *pmu_ptr)
+{
+ int i, nr_cores = num_present_cpus() / threads_per_core;
+ struct imc_mem_info *ptr = pmu_ptr->mem_info;
+
+ for (i = 0; i < nr_cores; i++) {
+ if (&ptr[i] && ptr[i].vbase[0])
+ free_pages((u64)ptr->vbase[0], 0);
+ }
+ kfree(pmu_ptr->mem_info);
+ kfree(core_imc_refc);
+}
+
+/*
+ * imc_mem_init : Function to support memory allocation for core imc.
+ */
+static int imc_mem_init(struct imc_pmu *pmu_ptr)
+{
+ int nr_cores;
+
+ if (pmu_ptr->imc_counter_mmaped)
+ return 0;
+
+ nr_cores = num_present_cpus() / threads_per_core;
+ pmu_ptr->mem_info = kzalloc((sizeof(struct imc_mem_info) * nr_cores), GFP_KERNEL);
+ if (!pmu_ptr->mem_info)
+ return -ENOMEM;
+
+ core_imc_refc = kzalloc((sizeof(struct imc_pmu_ref) * nr_cores),
+ GFP_KERNEL);
+ if (!core_imc_refc)
+ return -ENOMEM;
+
+ return 0;
+}
+
/*
* init_imc_pmu : Setup and register the IMC pmu device.
*
@@ -489,32 +785,48 @@ int init_imc_pmu(struct imc_events *events, int idx,
{
int ret;

- /*
- * Register for cpu hotplug notification.
- *
- * Nest imc pmu need only one cpu per chip, we initialize the cpumask
- * for the first nest imc pmu and use the same for the rest.
- * To handle the cpuhotplug callback unregister, we track the number of
- * nest pmus in "nest_pmus".
- * "nest_imc_cpumask_initialized" is set to zero during cpuhotplug
- * callback unregister.
- */
- mutex_lock(&imc_nest_inited_reserve);
- if (nest_pmus == 0) {
- ret = init_nest_pmu_ref();
- if (ret) {
- mutex_unlock(&imc_nest_inited_reserve);
- goto err_free;
+ ret = imc_mem_init(pmu_ptr);
+ if (ret)
+ goto err_free;
+
+ /* Register for cpu hotplug notification. */
+ switch (pmu_ptr->domain) {
+ case IMC_DOMAIN_NEST:
+ /*
+ * Nest imc pmu need only one cpu per chip, we initialize the
+ * cpumask for the first nest imc pmu and use the same for the
+ * rest. To handle the cpuhotplug callback unregister, we track
+ * the number of nest pmus in "nest_pmus".
+ * "nest_imc_cpumask_initialized" is set to zero during cpuhotplug
+ * callback unregister.
+ */
+ mutex_lock(&imc_nest_inited_reserve);
+ if (nest_pmus == 0) {
+ ret = init_nest_pmu_ref();
+ if (ret) {
+ mutex_unlock(&imc_nest_inited_reserve);
+ goto err_free;
+ }
+ ret = nest_pmu_cpumask_init();
+ if (ret) {
+ mutex_unlock(&imc_nest_inited_reserve);
+ goto err_free;
+ }
+ nest_imc_cpumask_initialized = 1;
}
- ret = nest_pmu_cpumask_init();
+ nest_pmus++;
+ mutex_unlock(&imc_nest_inited_reserve);
+ break;
+ case IMC_DOMAIN_CORE:
+ ret = core_imc_pmu_cpumask_init();
if (ret) {
- mutex_unlock(&imc_nest_inited_reserve);
- goto err_free;
+ cleanup_all_core_imc_memory(pmu_ptr);
+ return ret;
}
- nest_imc_cpumask_initialized = 1;
+ break;
+ default:
+ return -1; /* Unknown domain */
}
- nest_pmus++;
- mutex_unlock(&imc_nest_inited_reserve);

ret = update_events_in_group(events, idx, pmu_ptr);
if (ret)
@@ -557,5 +869,10 @@ int init_imc_pmu(struct imc_events *events, int idx,
}
mutex_unlock(&imc_nest_inited_reserve);
}
+ /* For core_imc, we have allocated memory, we need to free it */
+ if (pmu_ptr->domain == IMC_DOMAIN_CORE) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE);
+ cleanup_all_core_imc_memory(pmu_ptr);
+ }
return ret;
}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index aeef59b66420..91b8dd8d7619 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -33,6 +33,7 @@
#include <asm/uaccess.h>
#include <asm/cputable.h>
#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>

static int imc_event_prop_update(char *name, struct imc_events *events)
{
@@ -486,6 +487,22 @@ static void disable_nest_pmu_counters(void)
}
}

+static void disable_core_pmu_counters(void)
+{
+ cpumask_t cores_map;
+ int cpu, rc;
+
+ /* Disable the IMC Core functions */
+ cores_map = cpu_online_cores_map();
+ for_each_cpu(cpu, &cores_map) {
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(cpu));
+ if (rc)
+ pr_err("%s: Failed to stop Core (cpu = %d)\n",
+ __FUNCTION__, cpu);
+ }
+}
+
static int opal_imc_counters_probe(struct platform_device *pdev)
{
struct device_node *imc_dev = NULL;
@@ -501,6 +518,7 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
*/
if (is_kdump_kernel()) {
disable_nest_pmu_counters();
+ disable_core_pmu_counters();
return -ENODEV;
}
imc_dev = pdev->dev.of_node;
@@ -521,6 +539,12 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
return 0;
}

+static void opal_imc_counters_shutdown(struct platform_device *pdev)
+{
+ /* Disable the IMC Core functions */
+ disable_core_pmu_counters();
+}
+
static const struct of_device_id opal_imc_match[] = {
{ .compatible = IMC_DTB_COMPAT },
{},
@@ -532,6 +556,7 @@ static struct platform_driver opal_imc_driver = {
.of_match_table = opal_imc_match,
},
.probe = opal_imc_counters_probe,
+ .shutdown = opal_imc_counters_shutdown,
};

MODULE_DEVICE_TABLE(of, opal_imc_match);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index dca7f2b07f93..e145fffec093 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -140,6 +140,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
+ CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
CPUHP_AP_ONLINE_DYN,
--
2.11.0

2017-07-03 09:40:12

by Anju T Sudhakar

[permalink] [raw]
Subject: [PATCH v12 10/10] powerpc/perf: Thread IMC PMU functions

Code to add support for thread IMC on cpuhotplug.
When a cpu goes offline, the LDBAR for that cpu is disabled, and when it comes
back online the previous ldbar value is written back to the LDBAR for that cpu.

To register the hotplug functions for thread_imc, a new state
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE is added to the list of existing
states.

Signed-off-by: Anju T Sudhakar <[email protected]>
Signed-off-by: Madhavan Srinivasan <[email protected]>
---
arch/powerpc/perf/imc-pmu.c | 38 +++++++++++++++++++++++++++++++++++++-
include/linux/cpuhotplug.h | 1 +
2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index bea4dafc2aad..700e3cb0c89f 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -931,6 +931,37 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
return 0;
}

+static int ppc_thread_imc_cpu_online(unsigned int cpu)
+{
+ int rc = 0;
+ u64 ldbar_value;
+
+ if (per_cpu(thread_imc_mem, cpu) == NULL)
+ rc = thread_imc_mem_alloc(cpu, thread_imc_mem_size);
+
+ if (rc)
+ mtspr(SPRN_LDBAR, 0);
+
+ ldbar_value = ((u64)per_cpu(thread_imc_mem, cpu) & (u64)THREAD_IMC_LDBAR_MASK) |
+ (u64)THREAD_IMC_ENABLE;
+ mtspr(SPRN_LDBAR, ldbar_value);
+ return 0;
+}
+
+static int ppc_thread_imc_cpu_offline(unsigned int cpu)
+{
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+void thread_imc_cpu_init(void)
+{
+ cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+ "perf/powerpc/imc_thread:online",
+ ppc_thread_imc_cpu_online,
+ ppc_thread_imc_cpu_offline);
+}
+
/*
* imc_mem_init : Function to support memory allocation for core imc.
*/
@@ -1044,6 +1075,9 @@ int init_imc_pmu(struct imc_events *events, int idx,
return ret;
}
break;
+ case IMC_DOMAIN_THREAD:
+ thread_imc_cpu_init();
+ break;
default:
return -1; /* Unknown domain */
}
@@ -1095,7 +1129,9 @@ int init_imc_pmu(struct imc_events *events, int idx,
cleanup_all_core_imc_memory(pmu_ptr);
}
/* For thread_imc, we have allocated memory, we need to free it */
- if (pmu_ptr->domain == IMC_DOMAIN_THREAD)
+ if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
+ }
return ret;
}
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index e145fffec093..937d1ec8c3e9 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -141,6 +141,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
+ CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
CPUHP_AP_ONLINE_DYN,
--
2.11.0