Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932990AbcCJXcc (ORCPT ); Thu, 10 Mar 2016 18:32:32 -0500 Received: from mga04.intel.com ([192.55.52.120]:40159 "EHLO mga04.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932910AbcCJXcY (ORCPT ); Thu, 10 Mar 2016 18:32:24 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.24,317,1455004800"; d="scan'208";a="761983475" From: Vikas Shivappa To: vikas.shivappa@intel.com, vikas.shivappa@linux.intel.com Cc: linux-kernel@vger.kernel.org, x86@kernel.org, hpa@zytor.com, tglx@linutronix.de, mingo@kernel.org, peterz@infradead.org, ravi.v.shankar@intel.com, tony.luck@intel.com, fenghua.yu@intel.com, h.peter.anvin@intel.com Subject: [PATCH 4/6] x86/mbm: Memory bandwidth monitoring event management Date: Thu, 10 Mar 2016 15:32:10 -0800 Message-Id: <1457652732-4499-5-git-send-email-vikas.shivappa@linux.intel.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1457652732-4499-1-git-send-email-vikas.shivappa@linux.intel.com> References: <1457652732-4499-1-git-send-email-vikas.shivappa@linux.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7015 Lines: 235 From: Tony Luck Includes all the core infrastructure to measure the total_bytes and bandwidth. We have per socket counters for both total system wide L3 external bytes and local socket memory-controller bytes. The OS does MSR writes to MSR_IA32_QM_EVTSEL and MSR_IA32_QM_CTR to read the counters and uses the IA32_PQR_ASSOC_MSR to associate the RMID with the task. The tasks have a common RMID for cqm(cache quality of service monitoring) and MBM. Hence most of the scheduling code is reused from cqm. Reviewed-by: Tony Luck Signed-off-by: Tony Luck Signed-off-by: Vikas Shivappa --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 130 +++++++++++++++++++++++++++-- 1 file changed, 125 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 0496a56..7f1e6b3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -13,6 +13,13 @@ #define MSR_IA32_QM_CTR 0x0c8e #define MSR_IA32_QM_EVTSEL 0x0c8d +/* + * MBM Counter is 24bits wide. MBM_CNTR_MAX defines max counter + * value + */ +#define MBM_CNTR_WIDTH 24 +#define MBM_CNTR_MAX ((1U << MBM_CNTR_WIDTH) - 1) + static u32 cqm_max_rmid = -1; static unsigned int cqm_l3_scale; /* supposedly cacheline size */ static bool cqm_enabled, mbm_enabled; @@ -62,6 +69,16 @@ static struct sample *mbm_total; */ static struct sample *mbm_local; +#define pkg_id topology_physical_package_id(smp_processor_id()) +/* + * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array. + * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of + * rmids per socket, an example is given below + * RMID1 of Socket0: vrmid = 1 + * RMID1 of Socket1: vrmid = 1 * (cqm_max_rmid + 1) + 1 + * RMID1 of Socket2: vrmid = 2 * (cqm_max_rmid + 1) + 1 + */ +#define rmid_2_index(rmid) ((pkg_id * (cqm_max_rmid + 1)) + rmid) /* * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. * Also protects event->hw.cqm_rmid @@ -85,8 +102,12 @@ static cpumask_t cqm_cpumask; #define RMID_VAL_UNAVAIL (1ULL << 62) #define QOS_L3_OCCUP_EVENT_ID (1 << 0) - -#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID +/* + * MBM Event IDs as defined in SDM section 17.15.5 + * Event IDs are used to program EVTSEL MSRs before reading mbm event counters + */ +#define QOS_MBM_TOTAL_EVENT_ID 0x02 +#define QOS_MBM_LOCAL_EVENT_ID 0x03 /* * This is central to the rotation algorithm in __intel_cqm_rmid_rotate(). @@ -429,9 +450,16 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b) struct rmid_read { u32 rmid; atomic64_t value; + u32 evt_type; }; static void __intel_cqm_event_count(void *info); +static void init_mbm_sample(u32 rmid, u32 evt_type); + +static bool is_mbm_event(int e) +{ + return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID); +} /* * Exchange the RMID of a group of events. @@ -873,6 +901,73 @@ static void intel_cqm_rmid_rotate(struct work_struct *work) schedule_delayed_work(&intel_cqm_rmid_work, delay); } +static u64 update_sample(unsigned int rmid, + u32 evt_type, int first) +{ + struct sample *mbm_current; + u32 vrmid = rmid_2_index(rmid); + u64 val, bytes, shift; + u32 eventid; + + if (evt_type == QOS_MBM_LOCAL_EVENT_ID) { + mbm_current = &mbm_local[vrmid]; + eventid = QOS_MBM_LOCAL_EVENT_ID; + } else { + mbm_current = &mbm_total[vrmid]; + eventid = QOS_MBM_TOTAL_EVENT_ID; + } + + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + rdmsrl(MSR_IA32_QM_CTR, val); + if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) + return mbm_current->total_bytes; + + if (first) { + mbm_current->prev_msr = val; + mbm_current->total_bytes = 0; + return mbm_current->total_bytes; + } + + if (val < mbm_current->prev_msr) { + bytes = MBM_CNTR_MAX - mbm_current->prev_msr + val + 1; + } else { + shift = 64 - MBM_CNTR_WIDTH; + bytes = (val << shift) - (mbm_current->prev_msr << shift); + bytes >>= shift; + } + + bytes *= cqm_l3_scale; + + mbm_current->total_bytes += bytes; + mbm_current->prev_msr = val; + + return mbm_current->total_bytes; +} + +static u64 rmid_read_mbm(unsigned int rmid, u32 evt_type) +{ + return update_sample(rmid, evt_type, 0); +} + +static void __intel_mbm_event_init(void *info) +{ + struct rmid_read *rr = info; + + update_sample(rr->rmid, rr->evt_type, 1); +} + +static void init_mbm_sample(u32 rmid, u32 evt_type) +{ + struct rmid_read rr = { + .value = ATOMIC64_INIT(0), + .rmid = rmid, + .evt_type = evt_type, + }; + + /* on each socket, init sample */ + on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1); +} + /* * Find a group and setup RMID. * @@ -893,6 +988,8 @@ static void intel_cqm_setup_event(struct perf_event *event, /* All tasks in a group share an RMID */ event->hw.cqm_rmid = rmid; *group = iter; + if (is_mbm_event(event->attr.config)) + init_mbm_sample(rmid, event->attr.config); return; } @@ -909,6 +1006,9 @@ static void intel_cqm_setup_event(struct perf_event *event, else rmid = __get_rmid(); + if (is_mbm_event(event->attr.config)) + init_mbm_sample(rmid, event->attr.config); + event->hw.cqm_rmid = rmid; } @@ -930,7 +1030,10 @@ static void intel_cqm_event_read(struct perf_event *event) if (!__rmid_valid(rmid)) goto out; - val = __rmid_read(rmid); + if (is_mbm_event(event->attr.config)) + val = rmid_read_mbm(rmid, event->attr.config); + else + val = __rmid_read(rmid); /* * Ignore this reading on error states and do not update the value. @@ -961,6 +1064,17 @@ static inline bool cqm_group_leader(struct perf_event *event) return !list_empty(&event->hw.cqm_groups_entry); } +static void __intel_mbm_event_count(void *info) +{ + struct rmid_read *rr = info; + u64 val; + + val = rmid_read_mbm(rr->rmid, rr->evt_type); + if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) + return; + atomic64_add(val, &rr->value); +} + static u64 intel_cqm_event_count(struct perf_event *event) { unsigned long flags; @@ -1014,7 +1128,12 @@ static u64 intel_cqm_event_count(struct perf_event *event) if (!__rmid_valid(rr.rmid)) goto out; - on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); + if (is_mbm_event(event->attr.config)) { + rr.evt_type = event->attr.config; + on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_count, &rr, 1); + } else { + on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); + } raw_spin_lock_irqsave(&cache_lock, flags); if (event->hw.cqm_rmid == rr.rmid) @@ -1129,7 +1248,8 @@ static int intel_cqm_event_init(struct perf_event *event) if (event->attr.type != intel_cqm_pmu.type) return -ENOENT; - if (event->attr.config & ~QOS_EVENT_MASK) + if ((event->attr.config < QOS_L3_OCCUP_EVENT_ID) || + (event->attr.config > QOS_MBM_LOCAL_EVENT_ID)) return -EINVAL; /* unsupported modes and filters */ -- 1.9.1