Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752884AbbDFRag (ORCPT ); Mon, 6 Apr 2015 13:30:36 -0400 Received: from mail-ob0-f180.google.com ([209.85.214.180]:33247 "EHLO mail-ob0-f180.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752673AbbDFRae (ORCPT ); Mon, 6 Apr 2015 13:30:34 -0400 MIME-Version: 1.0 In-Reply-To: <20150406082147.5d183773@icelake> References: <1427405325-780-1-git-send-email-jacob.jun.pan@linux.intel.com> <20150406082147.5d183773@icelake> Date: Mon, 6 Apr 2015 10:30:33 -0700 Message-ID: Subject: Re: [PATCH v3] perf/rapl: support per domain energy unit From: Stephane Eranian To: Jacob Pan Cc: LKML , Peter Zijlstra , "H. Peter Anvin" , Paul Mackerras , Andi Kleen , Thomas Gleixner , Arnaldo Carvalho de Melo , Vince Weaver Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9525 Lines: 278 On Mon, Apr 6, 2015 at 8:21 AM, Jacob Pan wrote: > > On Thu, 26 Mar 2015 14:28:45 -0700 > Jacob Pan wrote: > > > RAPL energy hardware unit can vary within a single CPU package, e.g. > > HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16) whereas > > the unit on other domains can be enumerated from power unit MSR. > > There might be other variations in the future, this patch adds > > per cpu model quirk to allow special handling of certain cpus. > > > > hw_unit is also removed from per cpu data since it is not per cpu > > and the sampling rate for energy counter is typically not high. > > > > Without this patch, DRAM domain on HSW servers will be counted > > 4x higher than the real energy counter. > > > > are there any more comments about this patch? > I think the patch is fine. Reviewed-by: Stephane Eranian > > Thanks, > > Jacob > > Signed-off-by: Jacob Pan > > --- > > arch/x86/kernel/cpu/perf_event_intel_rapl.c | 94 > > ++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 21 > > deletions(-) > > > > diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c > > b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index c4bb8b8..999289b9 > > 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c > > +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c > > @@ -62,6 +62,14 @@ > > #define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ > > #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding > > */ > > +#define NR_RAPL_DOMAINS 0x4 > > +static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { > > + "pp0-core", > > + "package", > > + "dram", > > + "pp1-gpu", > > +}; > > + > > /* Clients have PP0, PKG */ > > #define RAPL_IDX_CLN (1< > 1< > @@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v > > = { \ > > struct rapl_pmu { > > spinlock_t lock; > > - int hw_unit; /* 1/2^hw_unit Joule */ > > int n_active; /* number of active events */ > > struct list_head active_list; > > struct pmu *pmu; /* pointer to rapl_pmu_class */ > > @@ -120,6 +127,7 @@ struct rapl_pmu { > > struct hrtimer hrtimer; > > }; > > > > +static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* > > 1/2^hw_unit Joule */ static struct pmu rapl_pmu_class; > > static cpumask_t rapl_cpu_mask; > > static int rapl_cntr_mask; > > @@ -127,6 +135,7 @@ static int rapl_cntr_mask; > > static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); > > static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); > > > > +static struct x86_pmu_quirk *rapl_quirks; > > static inline u64 rapl_read_counter(struct perf_event *event) > > { > > u64 raw; > > @@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct > > perf_event *event) return raw; > > } > > > > -static inline u64 rapl_scale(u64 v) > > +#define > > rapl_add_quirk(func_) > > \ +do > > { > > \ > > + static struct x86_pmu_quirk __quirk __initdata = > > { \ > > + .func = > > func_, \ > > + }; > > \ > > + __quirk.next = > > rapl_quirks; \ > > + rapl_quirks = > > &__quirk; \ +} while > > (0) + > > +static inline u64 rapl_scale(u64 v, int cfg) > > { > > + if (cfg > NR_RAPL_DOMAINS) { > > + pr_warn("invalid domain %d, failed to scale data\n", > > cfg); > > + return v; > > + } > > /* > > * scale delta to smallest unit (1/2^32) > > * users must then scale back: count * 1/(1e9*2^32) to get > > Joules > > * or use ldexp(count, -32). > > * Watts = Joules/Time delta > > */ > > - return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit); > > + return v << (32 - rapl_hw_unit[cfg - 1]); > > } > > > > static u64 rapl_event_update(struct perf_event *event) > > @@ -173,7 +195,7 @@ again: > > delta = (new_raw_count << shift) - (prev_raw_count << shift); > > delta >>= shift; > > > > - sdelta = rapl_scale(delta); > > + sdelta = rapl_scale(delta, event->hw.config); > > > > local64_add(sdelta, &event->count); > > > > @@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu) > > cpumask_set_cpu(cpu, &rapl_cpu_mask); > > } > > > > +static __init void rapl_hsw_server_quirk(void) > > +{ > > + /* > > + * DRAM domain on HSW server has fixed energy unit which can > > be > > + * different than the unit from power unit MSR. > > + * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product > > Families, V2 > > + * of 2. Datasheet, September 2014, Reference Number: > > 330784-001 " > > + */ > > + rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; > > +} > > + > > static int rapl_cpu_prepare(int cpu) > > { > > struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); > > int phys_id = topology_physical_package_id(cpu); > > u64 ms; > > - u64 msr_rapl_power_unit_bits; > > > > if (pmu) > > return 0; > > @@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu) > > if (phys_id < 0) > > return -1; > > > > - /* protect rdmsrl() to handle virtualization */ > > - if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, > > &msr_rapl_power_unit_bits)) > > - return -1; > > - > > pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, > > cpu_to_node(cpu)); if (!pmu) > > return -1; > > - > > spin_lock_init(&pmu->lock); > > > > INIT_LIST_HEAD(&pmu->active_list); > > > > - /* > > - * grab power unit as: 1/2^unit Joules > > - * > > - * we cache in local PMU instance > > - */ > > - pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; > > pmu->pmu = &rapl_pmu_class; > > > > /* > > @@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu) > > * divide interval by 2 to avoid lockstep (2 * 100) > > * if hw unit is 32, then we use 2 ms 1/200/2 > > */ > > - if (pmu->hw_unit < 32) > > - ms = (1000 / (2 * 100)) * (1ULL << (32 - > > pmu->hw_unit - 1)); > > + if (rapl_hw_unit[0] < 32) > > + ms = (1000 / (2 * 100)) * (1ULL << (32 - > > rapl_hw_unit[0] - 1)); else > > ms = 2; > > > > @@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct > > notifier_block *self, return NOTIFY_OK; > > } > > > > +static int rapl_check_hw_unit(void) > > +{ > > + u64 msr_rapl_power_unit_bits; > > + int i; > > + > > + /* protect rdmsrl() to handle virtualization */ > > + if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, > > &msr_rapl_power_unit_bits)) > > + return -1; > > + for (i = 0; i < NR_RAPL_DOMAINS; i++) > > + rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & > > 0x1FULL; + > > + return 0; > > +} > > + > > static const struct x86_cpu_id rapl_cpu_match[] = { > > [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, > > [1] = {}, > > @@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void) > > { > > struct rapl_pmu *pmu; > > int cpu, ret; > > + struct x86_pmu_quirk *quirk; > > + int i; > > > > /* > > * check for Intel processor family 6 > > @@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void) > > rapl_cntr_mask = RAPL_IDX_CLN; > > rapl_pmu_events_group.attrs = rapl_events_cln_attr; > > break; > > + case 63: /* Haswell-Server */ > > + rapl_add_quirk(rapl_hsw_server_quirk); > > + rapl_cntr_mask = RAPL_IDX_SRV; > > + rapl_pmu_events_group.attrs = rapl_events_srv_attr; > > + break; > > case 60: /* Haswell */ > > case 69: /* Haswell-Celeron */ > > rapl_cntr_mask = RAPL_IDX_HSW; > > @@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void) > > /* unsupported */ > > return 0; > > } > > + ret = rapl_check_hw_unit(); > > + if (ret) > > + return ret; > > > > + /* run cpu model quirks */ > > + for (quirk = rapl_quirks; quirk; quirk = quirk->next) > > + quirk->func(); > > cpu_notifier_register_begin(); > > > > for_each_online_cpu(cpu) { > > @@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void) > > > > pmu = __this_cpu_read(rapl_pmu); > > > > - pr_info("RAPL PMU detected, hw unit 2^-%d Joules," > > + pr_info("RAPL PMU detected," > > " API unit is 2^-32 Joules," > > " %d fixed counters" > > " %llu ms ovfl timer\n", > > - pmu->hw_unit, > > hweight32(rapl_cntr_mask), > > ktime_to_ms(pmu->timer_interval)); > > - > > + for (i = 0; i < NR_RAPL_DOMAINS; i++) { > > + if (rapl_cntr_mask & (1 << i)) { > > + pr_info("hw unit of domain %s 2^-%d > > Joules\n", > > + rapl_domain_names[i], > > rapl_hw_unit[i]); > > + } > > + } > > out: > > cpu_notifier_register_done(); > > > > [Jacob Pan] -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/