Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752241AbdGVHMp (ORCPT ); Sat, 22 Jul 2017 03:12:45 -0400 Received: from mx0a-001b2d01.pphosted.com ([148.163.156.1]:43518 "EHLO mx0a-001b2d01.pphosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750989AbdGVHMn (ORCPT ); Sat, 22 Jul 2017 03:12:43 -0400 Subject: Re: [PATCH v13 4/5] powerpc/perf: Add core imc pmu support To: mpe@ellerman.id.au Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, Anju T Sudhakar , Hemant Kumar References: <1500413796-18340-1-git-send-email-maddy@linux.vnet.ibm.com> <1500413796-18340-5-git-send-email-maddy@linux.vnet.ibm.com> From: Madhavan Srinivasan Date: Sat, 22 Jul 2017 12:42:31 +0530 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.1.1 MIME-Version: 1.0 In-Reply-To: <1500413796-18340-5-git-send-email-maddy@linux.vnet.ibm.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-Language: en-US X-TM-AS-MML: disable x-cbid: 17072207-0040-0000-0000-0000034A051C X-IBM-AV-DETECTION: SAVI=unused REMOTE=unused XFE=unused x-cbparentid: 17072207-0041-0000-0000-00000CC606E5 Message-Id: X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10432:,, definitions=2017-07-22_04:,, signatures=0 X-Proofpoint-Spam-Details: rule=outbound_notspam policy=outbound score=0 spamscore=0 suspectscore=4 malwarescore=0 phishscore=0 adultscore=0 bulkscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1706020000 definitions=main-1707220117 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 13030 Lines: 426 my bad, missed to change the authership of this patch. From: Anju T Sudhakar On Wednesday 19 July 2017 03:06 AM, Madhavan Srinivasan wrote: > Add support to register Core In-Memory Collection pmu counters. > Patch adds core imc specific data structures, along with memory > init functions and cpuhotplug support. > > Signed-off-by: Anju T Sudhakar > Signed-off-by: Hemant Kumar > Signed-off-by: Madhavan Srinivasan > --- > arch/powerpc/perf/imc-pmu.c | 303 +++++++++++++++++++++++++++++++++++++++++++- > include/linux/cpuhotplug.h | 1 + > 2 files changed, 299 insertions(+), 5 deletions(-) > > diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c > index 6e00ea7358a2..cd8ba3b98d29 100644 > --- a/arch/powerpc/perf/imc-pmu.c > +++ b/arch/powerpc/perf/imc-pmu.c > @@ -18,7 +18,6 @@ > #include > #include > > -//Nest imc data structures and variable > /* > * Used to avoid races in counting the nest-pmu units during hotplug > * register and unregister > @@ -30,6 +29,11 @@ static cpumask_t nest_imc_cpumask; > struct imc_pmu_ref *nest_imc_refc; > static int nest_pmus; > > +//Core imc data structs and variables > +static cpumask_t core_imc_cpumask; > +struct imc_pmu_ref *core_imc_refc; > +static struct imc_pmu *core_imc_pmu; > + > struct imc_pmu *imc_event_to_pmu(struct perf_event *event) > { > return container_of(event->pmu, struct imc_pmu, pmu); > @@ -61,11 +65,13 @@ static ssize_t imc_pmu_cpumask_get_attr(struct device *dev, > struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu); > cpumask_t *active_mask; > > - /* Subsequenct patch will add more pmu types here */ > switch(imc_pmu->domain){ > case IMC_DOMAIN_NEST: > active_mask = &nest_imc_cpumask; > break; > + case IMC_DOMAIN_CORE: > + active_mask = &core_imc_cpumask; > + break; > default: > return 0; > } > @@ -485,6 +491,240 @@ static int nest_imc_event_init(struct perf_event *event) > return 0; > } > > +/* > + * core_imc_mem_init : Initializes memory for the current core. > + * > + * Uses alloc_pages_node() and uses the returned address as an argument to > + * an opal call to configure the pdbar. The address sent as an argument is > + * converted to physical address before the opal call is made. This is the > + * base address at which the core imc counters are populated. > + */ > +static int core_imc_mem_init(int cpu, int size) > +{ > + int phys_id, rc = 0, core_id = (cpu / threads_per_core); > + struct imc_mem_info *mem_info; > + > + /* > + * alloc_pages_node() will allocate memory for core in the > + * local node only. > + */ > + phys_id = topology_physical_package_id(cpu); > + mem_info = &core_imc_pmu->mem_info[core_id]; > + mem_info->id = core_id; > + > + /* We need only vbase for core counters */ > + mem_info->vbase = page_address(alloc_pages_node(phys_id, > + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, > + get_order(size))); > + if (!mem_info->vbase) > + return -ENOMEM; > + > + /* Init the mutex */ > + core_imc_refc[core_id].id = core_id; > + mutex_init(&core_imc_refc[core_id].lock); > + > + rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE, > + __pa((void *)mem_info->vbase), > + get_hard_smp_processor_id(cpu)); > + if (rc) { > + free_pages((u64)mem_info->vbase, get_order(size)); > + mem_info->vbase = NULL; > + } > + > + return rc; > +} > + > +static bool is_core_imc_mem_inited(int cpu) > +{ > + struct imc_mem_info *mem_info; > + int core_id = (cpu / threads_per_core); > + > + mem_info = &core_imc_pmu->mem_info[core_id]; > + if (!mem_info->vbase) > + return false; > + > + return true; > +} > + > +static int ppc_core_imc_cpu_online(unsigned int cpu) > +{ > + const struct cpumask *l_cpumask; > + static struct cpumask tmp_mask; > + int ret = 0; > + > + /* Get the cpumask for this core */ > + l_cpumask = cpu_sibling_mask(cpu); > + > + /* If a cpu for this core is already set, then, don't do anything */ > + if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask)) > + return 0; > + > + if (!is_core_imc_mem_inited(cpu)) { > + ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size); > + if (ret) { > + pr_info("core_imc memory allocation for cpu %d failed\n", cpu); > + return ret; > + } > + } > + > + /* set the cpu in the mask */ > + cpumask_set_cpu(cpu, &core_imc_cpumask); > + return 0; > +} > + > +static int ppc_core_imc_cpu_offline(unsigned int cpu) > +{ > + unsigned int ncpu, core_id; > + struct imc_pmu_ref *ref; > + > + /* > + * clear this cpu out of the mask, if not present in the mask, > + * don't bother doing anything. > + */ > + if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask)) > + return 0; > + > + /* Find any online cpu in that core except the current "cpu" */ > + ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu); > + > + if (ncpu >= 0 && ncpu < nr_cpu_ids) { > + cpumask_set_cpu(ncpu, &core_imc_cpumask); > + perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu); > + } else { > + /* > + * If this is the last cpu in this core then, skip taking refernce > + * count mutex lock for this core and directly zero "refc" for > + * this core. > + */ > + opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, > + get_hard_smp_processor_id(cpu)); > + core_id = cpu / threads_per_core; > + ref = &core_imc_refc[core_id]; > + if (!ref) > + return -EINVAL; > + > + ref->refc = 0; > + } > + return 0; > +} > + > +static int core_imc_pmu_cpumask_init(void) > +{ > + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, > + "perf/powerpc/imc_core:online", > + ppc_core_imc_cpu_online, > + ppc_core_imc_cpu_offline); > +} > + > +static void core_imc_counters_release(struct perf_event *event) > +{ > + int rc, core_id; > + struct imc_pmu_ref *ref; > + > + if (event->cpu < 0) > + return; > + /* > + * See if we need to disable the IMC PMU. > + * If no events are currently in use, then we have to take a > + * mutex to ensure that we don't race with another task doing > + * enable or disable the core counters. > + */ > + core_id = event->cpu / threads_per_core; > + > + /* Take the mutex lock and decrement the refernce count for this core */ > + ref = &core_imc_refc[core_id]; > + if (!ref) > + return; > + > + mutex_lock(&ref->lock); > + ref->refc--; > + if (ref->refc == 0) { > + rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, > + get_hard_smp_processor_id(event->cpu)); > + if (rc) { > + mutex_unlock(&ref->lock); > + pr_err("IMC: Unable to stop the counters for core %d\n", core_id); > + return; > + } > + } else if (ref->refc < 0) { > + WARN(1, "core-imc: Invalid event reference count\n"); > + ref->refc = 0; > + } > + mutex_unlock(&ref->lock); > +} > + > +static int core_imc_event_init(struct perf_event *event) > +{ > + int core_id, rc; > + u64 config = event->attr.config; > + struct imc_mem_info *pcmi; > + struct imc_pmu *pmu; > + struct imc_pmu_ref *ref; > + > + if (event->attr.type != event->pmu->type) > + return -ENOENT; > + > + /* Sampling not supported */ > + if (event->hw.sample_period) > + return -EINVAL; > + > + /* unsupported modes and filters */ > + if (event->attr.exclude_user || > + event->attr.exclude_kernel || > + event->attr.exclude_hv || > + event->attr.exclude_idle || > + event->attr.exclude_host || > + event->attr.exclude_guest) > + return -EINVAL; > + > + if (event->cpu < 0) > + return -EINVAL; > + > + event->hw.idx = -1; > + pmu = imc_event_to_pmu(event); > + > + /* Sanity check for config (event offset) */ > + if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)) > + return -EINVAL; > + > + if (!is_core_imc_mem_inited(event->cpu)) > + return -ENODEV; > + > + core_id = event->cpu / threads_per_core; > + pcmi = &core_imc_pmu->mem_info[core_id]; > + if ((!pcmi->vbase)) > + return -ENODEV; > + > + /* Get the core_imc mutex for this core */ > + ref = &core_imc_refc[core_id]; > + if (!ref) > + return -EINVAL; > + > + /* > + * Core pmu units are enabled only when it is used. > + * See if this is triggered for the first time. > + * If yes, take the mutex lock and enable the core counters. > + * If not, just increment the count in core_imc_refc struct. > + */ > + mutex_lock(&ref->lock); > + if (ref->refc == 0) { > + rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, > + get_hard_smp_processor_id(event->cpu)); > + if (rc) { > + mutex_unlock(&ref->lock); > + pr_err("core-imc: Unable to start the counters for core %d\n", > + core_id); > + return rc; > + } > + } > + ++ref->refc; > + mutex_unlock(&ref->lock); > + > + event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); > + event->destroy = core_imc_counters_release; > + return 0; > +} > + > static u64 * get_event_base_addr(struct perf_event *event) > { > /* > @@ -563,12 +803,15 @@ static int update_pmu_ops(struct imc_pmu *pmu) > pmu->pmu.attr_groups = pmu->attr_groups; > pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group; > > - /* Subsequenct patch will add more pmu types here */ > switch (pmu->domain) { > case IMC_DOMAIN_NEST: > pmu->pmu.event_init = nest_imc_event_init; > pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group; > break; > + case IMC_DOMAIN_CORE: > + pmu->pmu.event_init = core_imc_event_init; > + pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group; > + break; > default: > break; > } > @@ -620,6 +863,22 @@ static int init_nest_pmu_ref(void) > return 0; > } > > +static void cleanup_all_core_imc_memory(void) > +{ > + int i, nr_cores = num_present_cpus() / threads_per_core; > + struct imc_mem_info *ptr = core_imc_pmu->mem_info; > + int size = core_imc_pmu->counter_mem_size; > + > + /* mem_info will never be NULL */ > + for (i = 0; i < nr_cores; i++) { > + if (ptr[i].vbase) > + free_pages((u64)ptr->vbase, get_order(size)); > + } > + > + kfree(ptr); > + kfree(core_imc_refc); > +} > + > /* > * Common function to unregister cpu hotplug callback and > * free the memory. > @@ -640,6 +899,12 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) > mutex_unlock(&nest_init_lock); > } > > + /* Free core_imc memory */ > + if (pmu_ptr->domain == IMC_DOMAIN_CORE) { > + cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE); > + cleanup_all_core_imc_memory(); > + } > + > /* Only free the attr_groups which are dynamically allocated */ > kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); > kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); > @@ -655,11 +920,11 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, > int pmu_index) > { > const char *s; > + int nr_cores; > > if (of_property_read_string(parent, "name", &s)) > return -ENODEV; > > - /* Subsequenct patch will add more pmu types here */ > switch (pmu_ptr->domain) { > case IMC_DOMAIN_NEST: > /* Update the pmu name */ > @@ -670,6 +935,27 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, > /* Needed for hotplug/migration */ > per_nest_pmu_arr[pmu_index] = pmu_ptr; > break; > + case IMC_DOMAIN_CORE: > + /* Update the pmu name */ > + pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc"); > + if (!pmu_ptr->pmu.name) > + return -ENOMEM; > + > + nr_cores = num_present_cpus() / threads_per_core; > + pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info), > + GFP_KERNEL); > + > + if (!pmu_ptr->mem_info) > + return -ENOMEM; > + > + core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref), > + GFP_KERNEL); > + > + if (!core_imc_refc) > + return -ENOMEM; > + > + core_imc_pmu = pmu_ptr; > + break; > default: > return -EINVAL; > } > @@ -695,7 +981,6 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id > if (ret) > goto err_free; > > - /* Subsequenct patch will add more pmu types here */ > switch (pmu_ptr->domain) { > case IMC_DOMAIN_NEST: > /* > @@ -721,6 +1006,14 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id > nest_pmus++; > mutex_unlock(&nest_init_lock); > break; > + case IMC_DOMAIN_CORE: > + ret = core_imc_pmu_cpumask_init(); > + if (ret) { > + cleanup_all_core_imc_memory(); > + return ret; > + } > + > + break; > default: > return -1; /* Unknown domain */ > } > diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h > index 0853a14b1fa1..1be505db0090 100644 > --- a/include/linux/cpuhotplug.h > +++ b/include/linux/cpuhotplug.h > @@ -140,6 +140,7 @@ enum cpuhp_state { > CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, > CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, > CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, > + CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, > CPUHP_AP_WORKQUEUE_ONLINE, > CPUHP_AP_RCUTREE_ONLINE, > CPUHP_AP_ONLINE_DYN,