Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753819AbbEZN1M (ORCPT ); Tue, 26 May 2015 09:27:12 -0400 Received: from mail-oi0-f50.google.com ([209.85.218.50]:36750 "EHLO mail-oi0-f50.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754509AbbEZN1F (ORCPT ); Tue, 26 May 2015 09:27:05 -0400 MIME-Version: 1.0 In-Reply-To: <20150522133135.447912500@infradead.org> References: <20150522132905.416122812@infradead.org> <20150522133135.447912500@infradead.org> Date: Tue, 26 May 2015 02:37:52 -0700 Message-ID: Subject: Re: [PATCH v2 02/11] perf/x86: Improve HT workaround GP counter constraint From: Stephane Eranian To: Peter Zijlstra Cc: Ingo Molnar , Vince Weaver , Jiri Olsa , "Liang, Kan" , LKML Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10893 Lines: 261 On Fri, May 22, 2015 at 6:29 AM, Peter Zijlstra wrote: > The (SNB/IVB/HSW) HT bug only affects events that can be programmed > onto GP counters, therefore we should only limit the number of GP > counters that can be used per cpu -- iow we should not constrain the > FP counters. > > Furthermore, we should only enfore such a limit when there are in fact > exclusive events being scheduled on either sibling. > > Reported-by: Vince Weaver > Signed-off-by: Peter Zijlstra (Intel) > --- > arch/x86/kernel/cpu/perf_event.c | 36 +++++++++++++++++++++----- > arch/x86/kernel/cpu/perf_event.h | 11 +++++-- > arch/x86/kernel/cpu/perf_event_intel.c | 30 +++++++-------------- > arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 - > 4 files changed, 49 insertions(+), 30 deletions(-) > > --- a/arch/x86/kernel/cpu/perf_event.c > +++ b/arch/x86/kernel/cpu/perf_event.c > @@ -611,6 +611,7 @@ struct sched_state { > int event; /* event index */ > int counter; /* counter index */ > int unassigned; /* number of events to be assigned left */ > + int nr_gp; /* number of GP counters used */ > unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; > }; > > @@ -620,9 +621,10 @@ struct sched_state { > struct perf_sched { > int max_weight; > int max_events; > + int max_gp; > + int saved_states; > struct event_constraint **constraints; > struct sched_state state; > - int saved_states; > struct sched_state saved[SCHED_STATES_MAX]; > }; > > @@ -630,13 +632,14 @@ struct perf_sched { > * Initialize interator that runs through all events and counters. > */ > static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints, > - int num, int wmin, int wmax) > + int num, int wmin, int wmax, int gpmax) > { > int idx; > > memset(sched, 0, sizeof(*sched)); > sched->max_events = num; > sched->max_weight = wmax; > + sched->max_gp = gpmax; > sched->constraints = constraints; > > for (idx = 0; idx < num; idx++) { > @@ -696,11 +699,16 @@ static bool __perf_sched_find_counter(st > goto done; > } > } > + > /* Grab the first unused counter starting with idx */ > idx = sched->state.counter; > for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { > - if (!__test_and_set_bit(idx, sched->state.used)) > + if (!__test_and_set_bit(idx, sched->state.used)) { > + if (sched->state.nr_gp++ >= sched->max_gp) > + return false; > + > goto done; > + } > } > > return false; > @@ -757,11 +765,11 @@ static bool perf_sched_next_event(struct > * Assign a counter for each event. > */ > int perf_assign_events(struct event_constraint **constraints, int n, > - int wmin, int wmax, int *assign) > + int wmin, int wmax, int gpmax, int *assign) > { > struct perf_sched sched; > > - perf_sched_init(&sched, constraints, n, wmin, wmax); > + perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax); > > do { > if (!perf_sched_find_counter(&sched)) > @@ -822,8 +830,24 @@ int x86_schedule_events(struct cpu_hw_ev > > /* slow path */ > if (i != n) { > + int gpmax = x86_pmu.num_counters; > + > + /* > + * Do not allow scheduling of more than half the available > + * generic counters. > + * > + * This helps avoid counter starvation of sibling thread by > + * ensuring at most half the counters cannot be in exclusive > + * mode. There is no designated counters for the limits. Any > + * N/2 counters can be used. This helps with events with > + * specific counter constraints. > + */ > + if (is_ht_workaround_enabled() && !cpuc->is_fake && > + READ_ONCE(cpuc->excl_cntrs->exclusive_present)) > + gpmax /= 2; > + What I don't like about this part is that this is a hack to work around a bug on some limited Intel CPUs and yet it is in the middle of generic x86 code. I understand it will be inoperative on AMD PMU and is not used by Intel uncore. On KNC or P6, you will not have is_ht_workaround_enabled(). Could this be made a x86_pmu callback? x86_pmu.counter_limit()? > unsched = perf_assign_events(cpuc->event_constraint, n, wmin, > - wmax, assign); > + wmax, gpmax, assign); > } > > /* > --- a/arch/x86/kernel/cpu/perf_event.h > +++ b/arch/x86/kernel/cpu/perf_event.h > @@ -74,6 +74,7 @@ struct event_constraint { > #define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ > #define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ > #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ > +#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */ > > > struct amd_nb { > @@ -134,8 +135,6 @@ enum intel_excl_state_type { > struct intel_excl_states { > enum intel_excl_state_type init_state[X86_PMC_IDX_MAX]; > enum intel_excl_state_type state[X86_PMC_IDX_MAX]; > - int num_alloc_cntrs;/* #counters allocated */ > - int max_alloc_cntrs;/* max #counters allowed */ > bool sched_started; /* true if scheduling has started */ > }; > > @@ -144,6 +143,11 @@ struct intel_excl_cntrs { > > struct intel_excl_states states[2]; > > + union { > + u16 has_exclusive[2]; > + u32 exclusive_present; > + }; > + > int refcnt; /* per-core: #HT threads */ > unsigned core_id; /* per-core: core id */ > }; > @@ -176,6 +180,7 @@ struct cpu_hw_events { > struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ > struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; > > + int n_excl; /* the number of exclusive events */ > > unsigned int group_flag; > int is_fake; > @@ -719,7 +724,7 @@ static inline void __x86_pmu_enable_even > void x86_pmu_enable_all(int added); > > int perf_assign_events(struct event_constraint **constraints, int n, > - int wmin, int wmax, int *assign); > + int wmin, int wmax, int gpmax, int *assign); > int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); > > void x86_pmu_stop(struct perf_event *event, int flags); > --- a/arch/x86/kernel/cpu/perf_event_intel.c > +++ b/arch/x86/kernel/cpu/perf_event_intel.c > @@ -1923,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_eve > xl = &excl_cntrs->states[tid]; > > xl->sched_started = true; > - xl->num_alloc_cntrs = 0; > /* > * lock shared state until we are done scheduling > * in stop_event_scheduling() > @@ -2000,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw > * across HT threads > */ > is_excl = c->flags & PERF_X86_EVENT_EXCL; > + if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { > + event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; > + if (!cpuc->n_excl++) > + WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); > + } > > /* > * xl = state of current HT > @@ -2008,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw > xl = &excl_cntrs->states[tid]; > xlo = &excl_cntrs->states[o_tid]; > > - /* > - * do not allow scheduling of more than max_alloc_cntrs > - * which is set to half the available generic counters. > - * this helps avoid counter starvation of sibling thread > - * by ensuring at most half the counters cannot be in > - * exclusive mode. There is not designated counters for the > - * limits. Any N/2 counters can be used. This helps with > - * events with specifix counter constraints > - */ > - if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs) > - return &emptyconstraint; > - > cx = c; > > /* > @@ -2150,6 +2142,11 @@ static void intel_put_excl_constraints(s > > xl = &excl_cntrs->states[tid]; > xlo = &excl_cntrs->states[o_tid]; > + if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { > + hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; > + if (!--cpuc->n_excl) > + WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); > + } > > /* > * put_constraint may be called from x86_schedule_events() > @@ -2632,8 +2629,6 @@ static void intel_pmu_cpu_starting(int c > cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; > > if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { > - int h = x86_pmu.num_counters >> 1; > - > for_each_cpu(i, topology_thread_cpumask(cpu)) { > struct intel_excl_cntrs *c; > > @@ -2647,11 +2642,6 @@ static void intel_pmu_cpu_starting(int c > } > cpuc->excl_cntrs->core_id = core_id; > cpuc->excl_cntrs->refcnt++; > - /* > - * set hard limit to half the number of generic counters > - */ > - cpuc->excl_cntrs->states[0].max_alloc_cntrs = h; > - cpuc->excl_cntrs->states[1].max_alloc_cntrs = h; > } > } > > --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c > +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c > @@ -394,7 +394,7 @@ static int uncore_assign_events(struct i > /* slow path */ > if (i != n) > ret = perf_assign_events(box->event_constraint, n, > - wmin, wmax, assign); > + wmin, wmax, n, assign); > > if (!assign || ret) { > for (i = 0; i < n; i++) > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/