Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751492AbdFGF0w (ORCPT ); Wed, 7 Jun 2017 01:26:52 -0400 Received: from mx0a-001b2d01.pphosted.com ([148.163.156.1]:60352 "EHLO mx0a-001b2d01.pphosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750789AbdFGF0u (ORCPT ); Wed, 7 Jun 2017 01:26:50 -0400 Subject: Re: [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support To: Thomas Gleixner Cc: mpe@ellerman.id.au, linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, ego@linux.vnet.ibm.com, bsingharora@gmail.com, anton@samba.org, sukadev@linux.vnet.ibm.com, mikey@neuling.org, stewart@linux.vnet.ibm.com, dja@axtens.net, eranian@google.com, hemant@linux.vnet.ibm.com, maddy@linux.vnet.ibm.com References: <1496665922-702-1-git-send-email-anju@linux.vnet.ibm.com> From: Anju T Sudhakar Date: Wed, 7 Jun 2017 10:56:35 +0530 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.1.1 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-Language: en-US X-TM-AS-GCONF: 00 x-cbid: 17060705-0008-0000-0000-000007F6A2BF X-IBM-SpamModules-Scores: X-IBM-SpamModules-Versions: BY=3.00007187; HX=3.00000241; KW=3.00000007; PH=3.00000004; SC=3.00000212; SDB=6.00871272; UDB=6.00433327; IPR=6.00651264; BA=6.00005403; NDR=6.00000001; ZLA=6.00000005; ZF=6.00000009; ZB=6.00000000; ZP=6.00000000; ZH=6.00000000; ZU=6.00000002; MB=3.00015727; XFM=3.00000015; UTC=2017-06-07 05:26:48 X-IBM-AV-DETECTION: SAVI=unused REMOTE=unused XFE=unused x-cbparentid: 17060705-0009-0000-0000-000042801C8A Message-Id: <557d97b2-df0f-7eb3-2df3-31ac2fc5a47c@linux.vnet.ibm.com> X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10432:,, definitions=2017-06-07_03:,, signatures=0 X-Proofpoint-Spam-Details: rule=outbound_notspam policy=outbound score=0 spamscore=0 suspectscore=2 malwarescore=0 phishscore=0 adultscore=0 bulkscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1703280000 definitions=main-1706070101 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6843 Lines: 246 Hi Thomas, Thank you for reviewing the patch. On Tuesday 06 June 2017 02:09 PM, Thomas Gleixner wrote: > On Mon, 5 Jun 2017, Anju T Sudhakar wrote: >> +/* >> + * nest_imc_stop : Does OPAL call to stop nest engine. >> + */ >> +static void nest_imc_stop(int *cpu_opal_rc) >> +{ >> + int rc; >> + >> + rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST); >> + if (rc) >> + cpu_opal_rc[smp_processor_id()] = 1; > What's wrong with just assigning the result directly? yes. We can assign it directly. Will do this. > >> +/* nest_imc_start: Does the OPAL call to enable nest counters. */ >> +static void nest_imc_start(int *cpu_opal_rc) >> +{ >> + int rc; >> + >> + rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST); >> + if (rc) >> + cpu_opal_rc[smp_processor_id()] = 1; >> +} >> + >> +/* >> + * nest_imc_control: Function to start and stop nest imc engine. >> + * The function is primarily called from event init >> + * and event destroy. > As I don't see other call sites, what's the secondary usage? This function is called from event init and event destroy only. Will fix the comment here. > >> + */ >> +static int nest_imc_control(int operation) >> +{ >> + int *cpus_opal_rc, cpu; >> + >> + /* >> + * Memory for OPAL call return value. >> + */ >> + cpus_opal_rc = kzalloc((sizeof(int) * nr_cpu_ids), GFP_KERNEL); >> + if (!cpus_opal_rc) >> + return -ENOMEM; >> + switch (operation) { >> + case IMC_COUNTER_ENABLE: >> + /* Initialize Nest PMUs in each node using designated cpus */ >> + on_each_cpu_mask(&nest_imc_cpumask, (smp_call_func_t)nest_imc_start, >> + cpus_opal_rc, 1); > That's one indentation level too deep. My bad. Will fix this. >> + break; >> + case IMC_COUNTER_DISABLE: >> + /* Disable the counters */ >> + on_each_cpu_mask(&nest_imc_cpumask, (smp_call_func_t)nest_imc_stop, >> + cpus_opal_rc, 1); >> + break; >> + default: >> + kfree(cpus_opal_rc); >> + return -EINVAL; >> + >> + } >> + >> + /* Check return value array for any OPAL call failure */ >> + for_each_cpu(cpu, &nest_imc_cpumask) { >> + if (cpus_opal_rc[cpu]) { >> + kfree(cpus_opal_rc); >> + return -ENODEV; >> + } >> + } >> + kfree(cpus_opal_rc); > So you have THREE places where you do kfree(cpus_opal_rc). Sigh. > > You can spare that hole alloc/free dance by simply having > > static struct cpumask nest_imc_result; > > mutex_lock(&imc_nest_reserve); > memset(&nest_imc_result, 0, sizeof(nest_imc_result)); > > switch (op) { > case IMC_COUNTER_ENABLE: > on_each_cpu_mask(&nest_imc_cpumask, nest_imc_start, NULL, 1); > break; > .... > } > > res = cpumask_empty(&nest_imc_result) ? 0 : -ENODEV; > mutex_unlock(&imc_nest_reserve); > return res; > > And in the start/stop functions: > > if (opal_imc_counters_xxx(OPAL_IMC_COUNTERS_NEST)) > cpumask_set_cpu(smp_processor_id(), &nest_imc_result); Ok. >> +static void nest_change_cpu_context(int old_cpu, int new_cpu) >> +{ >> + struct imc_pmu **pn = per_nest_pmu_arr; >> + int i; >> + >> + if (old_cpu < 0 || new_cpu < 0) >> + return; >> + >> + for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++) >> + perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu); >> + >> +} >> + >> +static int ppc_nest_imc_cpu_offline(unsigned int cpu) >> +{ >> + int nid, target = -1; >> + const struct cpumask *l_cpumask; >> + >> + /* >> + * Check in the designated list for this cpu. Dont bother >> + * if not one of them. >> + */ >> + if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask)) >> + return 0; >> + >> + /* >> + * Now that this cpu is one of the designated, >> + * find a next cpu a) which is online and b) in same chip. >> + */ >> + nid = cpu_to_node(cpu); >> + l_cpumask = cpumask_of_node(nid); >> + target = cpumask_next(cpu, l_cpumask); > So if the outgoing CPU is the highest numbered CPU on the node, then > cpumask_next() will not find a CPU, while there still might be other lower > numbered CPUs online in the node. > > target = cpumask_any_but(l_cpumask, cpu); > > is what you want. In the previous revisions we designated the smallest cpu in the mask. And then we re wrote the code to pick next available cpu. But this is a nice catch. :-) Thanks! I will fix this. >> + >> + /* >> + * Update the cpumask with the target cpu and >> + * migrate the context if needed >> + */ >> + if (target >= 0 && target < nr_cpu_ids) { >> + cpumask_set_cpu(target, &nest_imc_cpumask); >> + nest_change_cpu_context(cpu, target); >> + } else { >> + target = -1; >> + opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST); >> + nest_change_cpu_context(cpu, target); > Why are you calling nest_change_cpu_context()? You already know that it > will return immediately due to target < 0. Yes right. Will remove the function call here. >> + } >> + return 0; >> +} >> + >> +static int ppc_nest_imc_cpu_online(unsigned int cpu) >> +{ >> + const struct cpumask *l_cpumask; >> + static struct cpumask tmp_mask; >> + int res; >> + >> + /* Get the cpumask of this node */ >> + l_cpumask = cpumask_of_node(cpu_to_node(cpu)); >> + >> + /* >> + * If this is not the first online CPU on this node, then >> + * just return. >> + */ >> + if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask)) >> + return 0; >> + >> + /* >> + * If this is the first online cpu on this node >> + * disable the nest counters by making an OPAL call. >> + */ >> + res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST); >> + if (res) >> + return res; >> + >> + /* Make this CPU the designated target for counter collection */ >> + cpumask_set_cpu(cpu, &nest_imc_cpumask); >> + nest_change_cpu_context(-1, cpu); > Ditto > >> static int nest_imc_event_init(struct perf_event *event) >> { >> - int chip_id; >> + int chip_id, rc; >> u32 config = event->attr.config; >> struct imc_mem_info *pcni; >> struct imc_pmu *pmu; >> @@ -80,6 +277,20 @@ static int nest_imc_event_init(struct perf_event *event) >> * "chip_id" and add "config" to it". >> */ >> event->hw.event_base = pcni->vbase[config/PAGE_SIZE] + (config & ~PAGE_MASK); >> + /* >> + * Nest pmu units are enabled only when it is used. >> + * See if this is triggered for the first time. >> + * If yes, take the mutex lock and enable the nest counters. >> + * If not, just increment the count in nest_events. >> + */ >> + if (atomic_inc_return(&nest_events) == 1) { >> + mutex_lock(&imc_nest_reserve); >> + rc = nest_imc_control(IMC_COUNTER_ENABLE); >> + mutex_unlock(&imc_nest_reserve); >> + if (rc) >> + pr_err("IMC: Unable to start the counters\n"); > So if that fails, nest_events will stay incremented. Is that on purpose? Nice catch. Will return here and decrement the count. Thanks and Regards, Anju >> + } >> + event->destroy = nest_imc_counters_release; >> return 0; > And this happily returns success even if the enable call failed .... > > Thanks, > > tglx >