Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753225AbcD2ExZ (ORCPT ); Fri, 29 Apr 2016 00:53:25 -0400 Received: from mail-pf0-f182.google.com ([209.85.192.182]:35591 "EHLO mail-pf0-f182.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752613AbcD2EqY (ORCPT ); Fri, 29 Apr 2016 00:46:24 -0400 From: David Carrillo-Cisneros To: Peter Zijlstra , Alexander Shishkin , Arnaldo Carvalho de Melo , Ingo Molnar Cc: Vikas Shivappa , Matt Fleming , Tony Luck , Stephane Eranian , Paul Turner , David Carrillo-Cisneros , x86@kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 04/32] perf/x86/intel/cqm: make read of RMIDs per package (Temporal) Date: Thu, 28 Apr 2016 21:43:10 -0700 Message-Id: <1461905018-86355-5-git-send-email-davidcc@google.com> X-Mailer: git-send-email 2.8.0.rc3.226.g39d4020 In-Reply-To: <1461905018-86355-1-git-send-email-davidcc@google.com> References: <1461905018-86355-1-git-send-email-davidcc@google.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7035 Lines: 239 The previous version of Intel's CQM introduced pmu::count as a replacement for reading CQM events. This was done to avoid using an IPI to read the CQM occupancy event when reading events attached to a thread. Using pmu->count in place of pmu->read is inconsistent with the usage by other PMUs and introduces several problems such as: 1) pmu::read for thread events returns bogus values when called from interrupts disabled contexts. 2) perf_event_count(), behavior depends on whether interruptions are enabled or not. 3) perf_event_count() will always read a fresh value from the PMU, which is inconsistent with the behavior of other events. 4) perf_event_count() will perform slow MSR read and writes and IPIs. This patches removes pmu::count from CQM and makes pmu::read always read from the local socket (package). Future patches will add a mechanism to add the event count from other packages. This patch also removes the unused field rmid_usecnt from intel_pqr_state. Reviewed-by: Stephane Eranian Signed-off-by: David Carrillo-Cisneros --- arch/x86/events/intel/cqm.c | 125 ++++++-------------------------------------- 1 file changed, 16 insertions(+), 109 deletions(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 3c1e247..afd60dd 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -20,7 +20,6 @@ static unsigned int cqm_l3_scale; /* supposedly cacheline size */ * struct intel_pqr_state - State cache for the PQR MSR * @rmid: The cached Resource Monitoring ID * @closid: The cached Class Of Service ID - * @rmid_usecnt: The usage counter for rmid * * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always @@ -32,7 +31,6 @@ static unsigned int cqm_l3_scale; /* supposedly cacheline size */ struct intel_pqr_state { u32 rmid; u32 closid; - int rmid_usecnt; }; /* @@ -44,6 +42,19 @@ struct intel_pqr_state { static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); /* + * Updates caller cpu's cache. + */ +static inline void __update_pqr_rmid(u32 rmid) +{ + struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); + + if (state->rmid == rmid) + return; + state->rmid = rmid; + wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid); +} + +/* * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. * Also protects event->hw.cqm_rmid * @@ -309,7 +320,7 @@ struct rmid_read { atomic64_t value; }; -static void __intel_cqm_event_count(void *info); +static void intel_cqm_event_read(struct perf_event *event); /* * If we fail to assign a new RMID for intel_cqm_rotation_rmid because @@ -376,12 +387,6 @@ static void intel_cqm_event_read(struct perf_event *event) u32 rmid; u64 val; - /* - * Task events are handled by intel_cqm_event_count(). - */ - if (event->cpu == -1) - return; - raw_spin_lock_irqsave(&cache_lock, flags); rmid = event->hw.cqm_rmid; @@ -401,123 +406,28 @@ out: raw_spin_unlock_irqrestore(&cache_lock, flags); } -static void __intel_cqm_event_count(void *info) -{ - struct rmid_read *rr = info; - u64 val; - - val = __rmid_read(rr->rmid); - - if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) - return; - - atomic64_add(val, &rr->value); -} - static inline bool cqm_group_leader(struct perf_event *event) { return !list_empty(&event->hw.cqm_groups_entry); } -static u64 intel_cqm_event_count(struct perf_event *event) -{ - unsigned long flags; - struct rmid_read rr = { - .value = ATOMIC64_INIT(0), - }; - - /* - * We only need to worry about task events. System-wide events - * are handled like usual, i.e. entirely with - * intel_cqm_event_read(). - */ - if (event->cpu != -1) - return __perf_event_count(event); - - /* - * Only the group leader gets to report values. This stops us - * reporting duplicate values to userspace, and gives us a clear - * rule for which task gets to report the values. - * - * Note that it is impossible to attribute these values to - * specific packages - we forfeit that ability when we create - * task events. - */ - if (!cqm_group_leader(event)) - return 0; - - /* - * Getting up-to-date values requires an SMP IPI which is not - * possible if we're being called in interrupt context. Return - * the cached values instead. - */ - if (unlikely(in_interrupt())) - goto out; - - /* - * Notice that we don't perform the reading of an RMID - * atomically, because we can't hold a spin lock across the - * IPIs. - * - * Speculatively perform the read, since @event might be - * assigned a different (possibly invalid) RMID while we're - * busying performing the IPI calls. It's therefore necessary to - * check @event's RMID afterwards, and if it has changed, - * discard the result of the read. - */ - rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid); - - if (!__rmid_valid(rr.rmid)) - goto out; - - on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); - - raw_spin_lock_irqsave(&cache_lock, flags); - if (event->hw.cqm_rmid == rr.rmid) - local64_set(&event->count, atomic64_read(&rr.value)); - raw_spin_unlock_irqrestore(&cache_lock, flags); -out: - return __perf_event_count(event); -} - static void intel_cqm_event_start(struct perf_event *event, int mode) { - struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); - u32 rmid = event->hw.cqm_rmid; - if (!(event->hw.cqm_state & PERF_HES_STOPPED)) return; event->hw.cqm_state &= ~PERF_HES_STOPPED; - - if (state->rmid_usecnt++) { - if (!WARN_ON_ONCE(state->rmid != rmid)) - return; - } else { - WARN_ON_ONCE(state->rmid); - } - - state->rmid = rmid; - wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid); + __update_pqr_rmid(event->hw.cqm_rmid); } static void intel_cqm_event_stop(struct perf_event *event, int mode) { - struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); - if (event->hw.cqm_state & PERF_HES_STOPPED) return; event->hw.cqm_state |= PERF_HES_STOPPED; - intel_cqm_event_read(event); - - if (!--state->rmid_usecnt) { - state->rmid = 0; - wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid); - } else { - WARN_ON_ONCE(!state->rmid); - } + __update_pqr_rmid(0); } static int intel_cqm_event_add(struct perf_event *event, int mode) @@ -534,7 +444,6 @@ static int intel_cqm_event_add(struct perf_event *event, int mode) intel_cqm_event_start(event, mode); raw_spin_unlock_irqrestore(&cache_lock, flags); - return 0; } @@ -720,7 +629,6 @@ static struct pmu intel_cqm_pmu = { .start = intel_cqm_event_start, .stop = intel_cqm_event_stop, .read = intel_cqm_event_read, - .count = intel_cqm_event_count, }; static inline void cqm_pick_event_reader(int cpu) @@ -743,7 +651,6 @@ static void intel_cqm_cpu_starting(unsigned int cpu) state->rmid = 0; state->closid = 0; - state->rmid_usecnt = 0; WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); -- 2.8.0.rc3.226.g39d4020