Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752856AbaBEQeL (ORCPT ); Wed, 5 Feb 2014 11:34:11 -0500 Received: from mail-oa0-f50.google.com ([209.85.219.50]:47363 "EHLO mail-oa0-f50.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752508AbaBEQeJ (ORCPT ); Wed, 5 Feb 2014 11:34:09 -0500 MIME-Version: 1.0 In-Reply-To: <1388728091-18564-4-git-send-email-zheng.z.yan@intel.com> References: <1388728091-18564-1-git-send-email-zheng.z.yan@intel.com> <1388728091-18564-4-git-send-email-zheng.z.yan@intel.com> Date: Wed, 5 Feb 2014 17:34:08 +0100 Message-ID: Subject: Re: [PATCH 03/14] perf, x86: use context switch callback to flush LBR stack From: Stephane Eranian To: "Yan, Zheng" Cc: LKML , Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Andi Kleen Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Fri, Jan 3, 2014 at 6:48 AM, Yan, Zheng wrote: > Enable the pmu context switch callback when LBR is used. Use the > callback to flush LBR stack when task is scheduled in. This allows > us to move code that flushes LBR stack from perf core to perf x86. > You forgot to remove perf_event_context.nr_branch_stack which does not appear to be needed anymore. > Signed-off-by: Yan, Zheng > --- > arch/x86/kernel/cpu/perf_event.c | 7 --- > arch/x86/kernel/cpu/perf_event.h | 2 - > arch/x86/kernel/cpu/perf_event_intel.c | 14 +----- > arch/x86/kernel/cpu/perf_event_intel_lbr.c | 32 ++++++++----- > include/linux/perf_event.h | 5 --- > kernel/events/core.c | 72 ------------------------------ > 6 files changed, 21 insertions(+), 111 deletions(-) > > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c > index 6703d17..69e2095 100644 > --- a/arch/x86/kernel/cpu/perf_event.c > +++ b/arch/x86/kernel/cpu/perf_event.c > @@ -1852,12 +1852,6 @@ static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) > x86_pmu.sched_task(ctx, sched_in); > } > > -static void x86_pmu_flush_branch_stack(void) > -{ > - if (x86_pmu.flush_branch_stack) > - x86_pmu.flush_branch_stack(); > -} > - > void perf_check_microcode(void) > { > if (x86_pmu.check_microcode) > @@ -1884,7 +1878,6 @@ static struct pmu pmu = { > .commit_txn = x86_pmu_commit_txn, > > .event_idx = x86_pmu_event_idx, > - .flush_branch_stack = x86_pmu_flush_branch_stack, > .sched_task = x86_pmu_sched_task, > }; > > diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h > index 3fdb751..80b8e83 100644 > --- a/arch/x86/kernel/cpu/perf_event.h > +++ b/arch/x86/kernel/cpu/perf_event.h > @@ -150,7 +150,6 @@ struct cpu_hw_events { > * Intel LBR bits > */ > int lbr_users; > - void *lbr_context; > struct perf_branch_stack lbr_stack; > struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; > struct er_account *lbr_sel; > @@ -416,7 +415,6 @@ struct x86_pmu { > void (*cpu_dead)(int cpu); > > void (*check_microcode)(void); > - void (*flush_branch_stack)(void); > void (*sched_task)(struct perf_event_context *ctx, > bool sched_in); > > diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c > index 0fa4f24..4325bae 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel.c > +++ b/arch/x86/kernel/cpu/perf_event_intel.c > @@ -2038,18 +2038,6 @@ static void intel_pmu_cpu_dying(int cpu) > fini_debug_store_on_cpu(cpu); > } > > -static void intel_pmu_flush_branch_stack(void) > -{ > - /* > - * Intel LBR does not tag entries with the > - * PID of the current task, then we need to > - * flush it on ctxsw > - * For now, we simply reset it > - */ > - if (x86_pmu.lbr_nr) > - intel_pmu_lbr_reset(); > -} > - > PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); > > PMU_FORMAT_ATTR(ldlat, "config1:0-15"); > @@ -2101,7 +2089,7 @@ static __initconst const struct x86_pmu intel_pmu = { > .cpu_starting = intel_pmu_cpu_starting, > .cpu_dying = intel_pmu_cpu_dying, > .guest_get_msrs = intel_guest_get_msrs, > - .flush_branch_stack = intel_pmu_flush_branch_stack, > + .sched_task = intel_pmu_lbr_sched_task, > }; > > static __init void intel_clovertown_quirk(void) > diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c > index 1ae2ec5..7ff2a99 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c > +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c > @@ -177,24 +177,32 @@ void intel_pmu_lbr_reset(void) > intel_pmu_lbr_reset_64(); > } > > -void intel_pmu_lbr_enable(struct perf_event *event) > +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) > { > - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); > - > if (!x86_pmu.lbr_nr) > return; > > /* > - * Reset the LBR stack if we changed task context to > - * avoid data leaks. > + * It is necessary to flush the stack on context switch. This happens > + * when the branch stack does not tag its entries with the pid of the > + * current task. > */ > - if (event->ctx->task && cpuc->lbr_context != event->ctx) { > + if (sched_in) > intel_pmu_lbr_reset(); > - cpuc->lbr_context = event->ctx; > - } > +} > + > +void intel_pmu_lbr_enable(struct perf_event *event) > +{ > + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); > + > + if (!x86_pmu.lbr_nr) > + return; > + > cpuc->br_sel = event->hw.branch_reg.reg; > > cpuc->lbr_users++; > + if (cpuc->lbr_users == 1) > + perf_sched_cb_enable(event->ctx->pmu); > } > > void intel_pmu_lbr_disable(struct perf_event *event) > @@ -207,10 +215,10 @@ void intel_pmu_lbr_disable(struct perf_event *event) > cpuc->lbr_users--; > WARN_ON_ONCE(cpuc->lbr_users < 0); > > - if (cpuc->enabled && !cpuc->lbr_users) { > - __intel_pmu_lbr_disable(); > - /* avoid stale pointer */ > - cpuc->lbr_context = NULL; > + if (!cpuc->lbr_users) { > + perf_sched_cb_disable(event->ctx->pmu); > + if (cpuc->enabled) > + __intel_pmu_lbr_disable(); > } > } > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 6a3e603..96cb88b 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -248,11 +248,6 @@ struct pmu { > int (*event_idx) (struct perf_event *event); /*optional */ > > /* > - * flush branch stack on context-switches (needed in cpu-wide mode) > - */ > - void (*flush_branch_stack) (void); > - > - /* > * PMU callback for context-switches. optional > */ > void (*sched_task) (struct perf_event_context *ctx, > diff --git a/kernel/events/core.c b/kernel/events/core.c > index d110a23..aba4d6d 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -140,7 +140,6 @@ enum event_type_t { > */ > struct static_key_deferred perf_sched_events __read_mostly; > static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); > -static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); > static DEFINE_PER_CPU(int, perf_sched_cb_usages); > > static atomic_t nr_mmap_events __read_mostly; > @@ -2566,65 +2565,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, > perf_pmu_rotate_start(ctx->pmu); > } > > -/* > - * When sampling the branck stack in system-wide, it may be necessary > - * to flush the stack on context switch. This happens when the branch > - * stack does not tag its entries with the pid of the current task. > - * Otherwise it becomes impossible to associate a branch entry with a > - * task. This ambiguity is more likely to appear when the branch stack > - * supports priv level filtering and the user sets it to monitor only > - * at the user level (which could be a useful measurement in system-wide > - * mode). In that case, the risk is high of having a branch stack with > - * branch from multiple tasks. Flushing may mean dropping the existing > - * entries or stashing them somewhere in the PMU specific code layer. > - * > - * This function provides the context switch callback to the lower code > - * layer. It is invoked ONLY when there is at least one system-wide context > - * with at least one active event using taken branch sampling. > - */ > -static void perf_branch_stack_sched_in(struct task_struct *prev, > - struct task_struct *task) > -{ > - struct perf_cpu_context *cpuctx; > - struct pmu *pmu; > - unsigned long flags; > - > - /* no need to flush branch stack if not changing task */ > - if (prev == task) > - return; > - > - local_irq_save(flags); > - > - rcu_read_lock(); > - > - list_for_each_entry_rcu(pmu, &pmus, entry) { > - cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); > - > - /* > - * check if the context has at least one > - * event using PERF_SAMPLE_BRANCH_STACK > - */ > - if (cpuctx->ctx.nr_branch_stack > 0 > - && pmu->flush_branch_stack) { > - > - pmu = cpuctx->ctx.pmu; > - > - perf_ctx_lock(cpuctx, cpuctx->task_ctx); > - > - perf_pmu_disable(pmu); > - > - pmu->flush_branch_stack(); > - > - perf_pmu_enable(pmu); > - > - perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > - } > - } > - > - rcu_read_unlock(); > - > - local_irq_restore(flags); > -} > > /* > * Called from scheduler to add the events of the current task > @@ -2658,10 +2598,6 @@ void __perf_event_task_sched_in(struct task_struct *prev, > if (atomic_read(&__get_cpu_var(perf_cgroup_events))) > perf_cgroup_sched_in(prev, task); > > - /* check for system-wide branch_stack events */ > - if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) > - perf_branch_stack_sched_in(prev, task); > - > if (__get_cpu_var(perf_sched_cb_usages)) > perf_pmu_sched_task(prev, task, true); > } > @@ -3226,10 +3162,6 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) > if (event->parent) > return; > > - if (has_branch_stack(event)) { > - if (!(event->attach_state & PERF_ATTACH_TASK)) > - atomic_dec(&per_cpu(perf_branch_stack_events, cpu)); > - } > if (is_cgroup_event(event)) > atomic_dec(&per_cpu(perf_cgroup_events, cpu)); > } > @@ -6655,10 +6587,6 @@ static void account_event_cpu(struct perf_event *event, int cpu) > if (event->parent) > return; > > - if (has_branch_stack(event)) { > - if (!(event->attach_state & PERF_ATTACH_TASK)) > - atomic_inc(&per_cpu(perf_branch_stack_events, cpu)); > - } > if (is_cgroup_event(event)) > atomic_inc(&per_cpu(perf_cgroup_events, cpu)); > } > -- > 1.8.4.2 > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/