Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756134AbbDOPL3 (ORCPT ); Wed, 15 Apr 2015 11:11:29 -0400 Received: from mga01.intel.com ([192.55.52.88]:64224 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755108AbbDOPLW (ORCPT ); Wed, 15 Apr 2015 11:11:22 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.11,582,1422950400"; d="scan'208";a="680547632" Date: Wed, 15 Apr 2015 08:08:07 -0700 From: Jacob Pan To: Stephane Eranian Cc: LKML , Peter Zijlstra , "H. Peter Anvin" , Paul Mackerras , Andi Kleen , Thomas Gleixner , Arnaldo Carvalho de Melo , Vince Weaver Subject: Re: [PATCH v3] perf/rapl: support per domain energy unit Message-ID: <20150415080807.52fb0336@icelake> In-Reply-To: References: <1427405325-780-1-git-send-email-jacob.jun.pan@linux.intel.com> <20150406082147.5d183773@icelake> Organization: OTC X-Mailer: Claws Mail 3.9.3 (GTK+ 2.24.23; x86_64-pc-linux-gnu) MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10246 Lines: 295 On Mon, 6 Apr 2015 10:30:33 -0700 Stephane Eranian wrote: > On Mon, Apr 6, 2015 at 8:21 AM, Jacob Pan > wrote: > > > > On Thu, 26 Mar 2015 14:28:45 -0700 > > Jacob Pan wrote: > > > > > RAPL energy hardware unit can vary within a single CPU package, > > > e.g. HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16) > > > whereas the unit on other domains can be enumerated from power > > > unit MSR. There might be other variations in the future, this > > > patch adds per cpu model quirk to allow special handling of > > > certain cpus. > > > > > > hw_unit is also removed from per cpu data since it is not per cpu > > > and the sampling rate for energy counter is typically not high. > > > > > > Without this patch, DRAM domain on HSW servers will be counted > > > 4x higher than the real energy counter. > > > > > > > are there any more comments about this patch? > > > I think the patch is fine. > Reviewed-by: Stephane Eranian > Hi Peter, Any more comments? This is really a bug fix. Thanks, Jacob > > > > Thanks, > > > > Jacob > > > Signed-off-by: Jacob Pan > > > --- > > > arch/x86/kernel/cpu/perf_event_intel_rapl.c | 94 > > > ++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 21 > > > deletions(-) > > > > > > diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c > > > b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index > > > c4bb8b8..999289b9 100644 --- > > > a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ > > > b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -62,6 +62,14 @@ > > > #define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ > > > #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding > > > */ > > > +#define NR_RAPL_DOMAINS 0x4 > > > +static const char *rapl_domain_names[NR_RAPL_DOMAINS] > > > __initconst = { > > > + "pp0-core", > > > + "package", > > > + "dram", > > > + "pp1-gpu", > > > +}; > > > + > > > /* Clients have PP0, PKG */ > > > #define RAPL_IDX_CLN (1< > > 1< > > @@ -112,7 +120,6 @@ static struct perf_pmu_events_attr > > > event_attr_##v = { \ > > > struct rapl_pmu { > > > spinlock_t lock; > > > - int hw_unit; /* 1/2^hw_unit Joule */ > > > int n_active; /* number of active events */ > > > struct list_head active_list; > > > struct pmu *pmu; /* pointer to rapl_pmu_class */ > > > @@ -120,6 +127,7 @@ struct rapl_pmu { > > > struct hrtimer hrtimer; > > > }; > > > > > > +static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* > > > 1/2^hw_unit Joule */ static struct pmu rapl_pmu_class; > > > static cpumask_t rapl_cpu_mask; > > > static int rapl_cntr_mask; > > > @@ -127,6 +135,7 @@ static int rapl_cntr_mask; > > > static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); > > > static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); > > > > > > +static struct x86_pmu_quirk *rapl_quirks; > > > static inline u64 rapl_read_counter(struct perf_event *event) > > > { > > > u64 raw; > > > @@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct > > > perf_event *event) return raw; > > > } > > > > > > -static inline u64 rapl_scale(u64 v) > > > +#define > > > rapl_add_quirk(func_) > > > \ +do > > > { > > > \ > > > + static struct x86_pmu_quirk __quirk __initdata = > > > { \ > > > + .func = > > > func_, \ > > > + }; > > > \ > > > + __quirk.next = > > > rapl_quirks; \ > > > + rapl_quirks = > > > &__quirk; \ +} while > > > (0) + > > > +static inline u64 rapl_scale(u64 v, int cfg) > > > { > > > + if (cfg > NR_RAPL_DOMAINS) { > > > + pr_warn("invalid domain %d, failed to scale data\n", > > > cfg); > > > + return v; > > > + } > > > /* > > > * scale delta to smallest unit (1/2^32) > > > * users must then scale back: count * 1/(1e9*2^32) to get > > > Joules > > > * or use ldexp(count, -32). > > > * Watts = Joules/Time delta > > > */ > > > - return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit); > > > + return v << (32 - rapl_hw_unit[cfg - 1]); > > > } > > > > > > static u64 rapl_event_update(struct perf_event *event) > > > @@ -173,7 +195,7 @@ again: > > > delta = (new_raw_count << shift) - (prev_raw_count << > > > shift); delta >>= shift; > > > > > > - sdelta = rapl_scale(delta); > > > + sdelta = rapl_scale(delta, event->hw.config); > > > > > > local64_add(sdelta, &event->count); > > > > > > @@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu) > > > cpumask_set_cpu(cpu, &rapl_cpu_mask); > > > } > > > > > > +static __init void rapl_hsw_server_quirk(void) > > > +{ > > > + /* > > > + * DRAM domain on HSW server has fixed energy unit which can > > > be > > > + * different than the unit from power unit MSR. > > > + * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product > > > Families, V2 > > > + * of 2. Datasheet, September 2014, Reference Number: > > > 330784-001 " > > > + */ > > > + rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; > > > +} > > > + > > > static int rapl_cpu_prepare(int cpu) > > > { > > > struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); > > > int phys_id = topology_physical_package_id(cpu); > > > u64 ms; > > > - u64 msr_rapl_power_unit_bits; > > > > > > if (pmu) > > > return 0; > > > @@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu) > > > if (phys_id < 0) > > > return -1; > > > > > > - /* protect rdmsrl() to handle virtualization */ > > > - if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, > > > &msr_rapl_power_unit_bits)) > > > - return -1; > > > - > > > pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, > > > cpu_to_node(cpu)); if (!pmu) > > > return -1; > > > - > > > spin_lock_init(&pmu->lock); > > > > > > INIT_LIST_HEAD(&pmu->active_list); > > > > > > - /* > > > - * grab power unit as: 1/2^unit Joules > > > - * > > > - * we cache in local PMU instance > > > - */ > > > - pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; > > > pmu->pmu = &rapl_pmu_class; > > > > > > /* > > > @@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu) > > > * divide interval by 2 to avoid lockstep (2 * 100) > > > * if hw unit is 32, then we use 2 ms 1/200/2 > > > */ > > > - if (pmu->hw_unit < 32) > > > - ms = (1000 / (2 * 100)) * (1ULL << (32 - > > > pmu->hw_unit - 1)); > > > + if (rapl_hw_unit[0] < 32) > > > + ms = (1000 / (2 * 100)) * (1ULL << (32 - > > > rapl_hw_unit[0] - 1)); else > > > ms = 2; > > > > > > @@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct > > > notifier_block *self, return NOTIFY_OK; > > > } > > > > > > +static int rapl_check_hw_unit(void) > > > +{ > > > + u64 msr_rapl_power_unit_bits; > > > + int i; > > > + > > > + /* protect rdmsrl() to handle virtualization */ > > > + if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, > > > &msr_rapl_power_unit_bits)) > > > + return -1; > > > + for (i = 0; i < NR_RAPL_DOMAINS; i++) > > > + rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & > > > 0x1FULL; + > > > + return 0; > > > +} > > > + > > > static const struct x86_cpu_id rapl_cpu_match[] = { > > > [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, > > > [1] = {}, > > > @@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void) > > > { > > > struct rapl_pmu *pmu; > > > int cpu, ret; > > > + struct x86_pmu_quirk *quirk; > > > + int i; > > > > > > /* > > > * check for Intel processor family 6 > > > @@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void) > > > rapl_cntr_mask = RAPL_IDX_CLN; > > > rapl_pmu_events_group.attrs = rapl_events_cln_attr; > > > break; > > > + case 63: /* Haswell-Server */ > > > + rapl_add_quirk(rapl_hsw_server_quirk); > > > + rapl_cntr_mask = RAPL_IDX_SRV; > > > + rapl_pmu_events_group.attrs = rapl_events_srv_attr; > > > + break; > > > case 60: /* Haswell */ > > > case 69: /* Haswell-Celeron */ > > > rapl_cntr_mask = RAPL_IDX_HSW; > > > @@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void) > > > /* unsupported */ > > > return 0; > > > } > > > + ret = rapl_check_hw_unit(); > > > + if (ret) > > > + return ret; > > > > > > + /* run cpu model quirks */ > > > + for (quirk = rapl_quirks; quirk; quirk = quirk->next) > > > + quirk->func(); > > > cpu_notifier_register_begin(); > > > > > > for_each_online_cpu(cpu) { > > > @@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void) > > > > > > pmu = __this_cpu_read(rapl_pmu); > > > > > > - pr_info("RAPL PMU detected, hw unit 2^-%d Joules," > > > + pr_info("RAPL PMU detected," > > > " API unit is 2^-32 Joules," > > > " %d fixed counters" > > > " %llu ms ovfl timer\n", > > > - pmu->hw_unit, > > > hweight32(rapl_cntr_mask), > > > ktime_to_ms(pmu->timer_interval)); > > > - > > > + for (i = 0; i < NR_RAPL_DOMAINS; i++) { > > > + if (rapl_cntr_mask & (1 << i)) { > > > + pr_info("hw unit of domain %s 2^-%d > > > Joules\n", > > > + rapl_domain_names[i], > > > rapl_hw_unit[i]); > > > + } > > > + } > > > out: > > > cpu_notifier_register_done(); > > > > > > > [Jacob Pan] [Jacob Pan] -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/