Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753905AbdHWMDB (ORCPT ); Wed, 23 Aug 2017 08:03:01 -0400 Received: from mga04.intel.com ([192.55.52.120]:58326 "EHLO mga04.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753768AbdHWMDA (ORCPT ); Wed, 23 Aug 2017 08:03:00 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.41,416,1498546800"; d="scan'208";a="893323006" From: Alexander Shishkin To: Alexey Budankov , Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo Cc: Andi Kleen , Kan Liang , Dmitri Prokhorov , Valery Cherepennikov , Mark Rutland , Stephane Eranian , David Carrillo-Cisneros , linux-kernel Subject: Re: [PATCH v7 2/2] perf/core: add mux switch to skip to the current CPU's events list on mux interrupt In-Reply-To: References: <96c7776f-1f17-a39e-23e9-658596216d6b@linux.intel.com> User-Agent: Notmuch/0.23.7 (http://notmuchmail.org) Emacs/25.1.1 (x86_64-pc-linux-gnu) Date: Wed, 23 Aug 2017 14:54:27 +0300 Message-ID: <87r2w2zeh8.fsf@ashishki-desk.ger.corp.intel.com> MIME-Version: 1.0 Content-Type: text/plain Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 14428 Lines: 437 Alexey Budankov writes: > This patch implements mux switch that triggers skipping to the > current CPU's events list at mulitplexing hrtimer interrupt > handler as well as adoption of the switch in the existing > implementation. > > perf_event_groups_iterate_cpu() API is introduced to implement > iteration thru the certain CPU groups list skipping groups "through" > allocated for the other CPUs. > > Signed-off-by: Alexey Budankov > --- > kernel/events/core.c | 193 ++++++++++++++++++++++++++++++++++++--------------- > 1 file changed, 137 insertions(+), 56 deletions(-) > > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 08ccfb2..aeb0f81 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -556,11 +556,11 @@ void perf_sample_event_took(u64 sample_len_ns) > static atomic64_t perf_event_id; > > static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, > - enum event_type_t event_type); > + enum event_type_t event_type, int mux); > > static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, > enum event_type_t event_type, > - struct task_struct *task); > + struct task_struct *task, int mux); > > static void update_context_time(struct perf_event_context *ctx); > static u64 perf_event_time(struct perf_event *event); > @@ -702,6 +702,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) > struct perf_cpu_context *cpuctx; > struct list_head *list; > unsigned long flags; > + int mux = 0; > > /* > * Disable interrupts and preemption to avoid this CPU's > @@ -717,7 +718,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) > perf_pmu_disable(cpuctx->ctx.pmu); > > if (mode & PERF_CGROUP_SWOUT) { > - cpu_ctx_sched_out(cpuctx, EVENT_ALL); > + cpu_ctx_sched_out(cpuctx, EVENT_ALL, mux); > /* > * must not be done before ctxswout due > * to event_filter_match() in event_sched_out() > @@ -736,7 +737,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) > */ > cpuctx->cgrp = perf_cgroup_from_task(task, > &cpuctx->ctx); > - cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); > + cpu_ctx_sched_in(cpuctx, EVENT_ALL, task, mux); 'mux' is always zero in this function, isn't it? > } > perf_pmu_enable(cpuctx->ctx.pmu); > perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > @@ -1613,8 +1614,16 @@ perf_event_groups_rotate(struct rb_root *groups, int cpu) > */ > #define perf_event_groups_for_each(event, iter, tree, node, list, link) \ > for (iter = rb_first(tree); iter; iter = rb_next(iter)) \ > - list_for_each_entry(event, &(rb_entry(iter, \ > - typeof(*event), node)->list), link) > + list_for_each_entry(event, &(rb_entry(iter, \ > + typeof(*event), node)->list), link) Is this an indentation change? What is it doing here? > + > +/* > + * Iterate event groups related to specific cpu. > + */ > +#define perf_event_groups_for_each_cpu(event, cpu, tree, list, link) \ > + list = perf_event_groups_get_list(tree, cpu); \ > + if (list) \ > + list_for_each_entry(event, list, link) ..or not, if there's no list. > > /* > * Add a event from the lists for its context. > @@ -2397,36 +2406,38 @@ static void add_event_to_ctx(struct perf_event *event, > > static void ctx_sched_out(struct perf_event_context *ctx, > struct perf_cpu_context *cpuctx, > - enum event_type_t event_type); > + enum event_type_t event_type, int mux); > static void > ctx_sched_in(struct perf_event_context *ctx, > struct perf_cpu_context *cpuctx, > enum event_type_t event_type, > - struct task_struct *task); > + struct task_struct *task, int mux); > > static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, > struct perf_event_context *ctx, > enum event_type_t event_type) > { > + int mux = 0; > + > if (!cpuctx->task_ctx) > return; > > if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) > return; > > - ctx_sched_out(ctx, cpuctx, event_type); > + ctx_sched_out(ctx, cpuctx, event_type, mux); Just use 0. > } > > static void perf_event_sched_in(struct perf_cpu_context *cpuctx, > struct perf_event_context *ctx, > - struct task_struct *task) > + struct task_struct *task, int mux) > { > - cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task); > + cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task, mux); > if (ctx) > - ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); > - cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); > + ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task, mux); > + cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task, mux); > if (ctx) > - ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); > + ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task, mux); > } > > /* > @@ -2450,6 +2461,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, > { > enum event_type_t ctx_event_type = event_type & EVENT_ALL; > bool cpu_event = !!(event_type & EVENT_CPU); > + int mux = 0; > > /* > * If pinned groups are involved, flexible groups also need to be > @@ -2470,11 +2482,11 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, > * - otherwise, do nothing more. > */ > if (cpu_event) > - cpu_ctx_sched_out(cpuctx, ctx_event_type); > + cpu_ctx_sched_out(cpuctx, ctx_event_type, mux); > else if (ctx_event_type & EVENT_PINNED) > - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); > + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE, mux); > > - perf_event_sched_in(cpuctx, task_ctx, current); > + perf_event_sched_in(cpuctx, task_ctx, current, mux); Also mux==0 in all cases in this function. > perf_pmu_enable(cpuctx->ctx.pmu); > } > > @@ -2491,7 +2503,7 @@ static int __perf_install_in_context(void *info) > struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); > struct perf_event_context *task_ctx = cpuctx->task_ctx; > bool reprogram = true; > - int ret = 0; > + int ret = 0, mux =0; > > raw_spin_lock(&cpuctx->ctx.lock); > if (ctx->task) { > @@ -2518,7 +2530,7 @@ static int __perf_install_in_context(void *info) > } > > if (reprogram) { > - ctx_sched_out(ctx, cpuctx, EVENT_TIME); > + ctx_sched_out(ctx, cpuctx, EVENT_TIME, mux); > add_event_to_ctx(event, ctx); > ctx_resched(cpuctx, task_ctx, get_event_type(event)); > } else { > @@ -2655,13 +2667,14 @@ static void __perf_event_enable(struct perf_event *event, > { > struct perf_event *leader = event->group_leader; > struct perf_event_context *task_ctx; > + int mux = 0; > > if (event->state >= PERF_EVENT_STATE_INACTIVE || > event->state <= PERF_EVENT_STATE_ERROR) > return; > > if (ctx->is_active) > - ctx_sched_out(ctx, cpuctx, EVENT_TIME); > + ctx_sched_out(ctx, cpuctx, EVENT_TIME, mux); > > __perf_event_mark_enabled(event); > > @@ -2671,7 +2684,7 @@ static void __perf_event_enable(struct perf_event *event, > if (!event_filter_match(event)) { > if (is_cgroup_event(event)) > perf_cgroup_defer_enabled(event); > - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); > + ctx_sched_in(ctx, cpuctx, EVENT_TIME, current, mux); > return; > } > > @@ -2680,7 +2693,7 @@ static void __perf_event_enable(struct perf_event *event, > * then don't put it on unless the group is on. > */ > if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) { > - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); > + ctx_sched_in(ctx, cpuctx, EVENT_TIME, current, mux); And here. > return; > } > > @@ -2876,11 +2889,13 @@ EXPORT_SYMBOL_GPL(perf_event_refresh); > > static void ctx_sched_out(struct perf_event_context *ctx, > struct perf_cpu_context *cpuctx, > - enum event_type_t event_type) > + enum event_type_t event_type, int mux) > { > int is_active = ctx->is_active; > + struct list_head *group_list; > struct perf_event *event; > struct rb_node *node; > + int sw = -1, cpu = smp_processor_id(); Same thing seems to be happening with 'sw'. > lockdep_assert_held(&ctx->lock); > > if (likely(!ctx->nr_events)) { > @@ -2926,17 +2941,47 @@ static void ctx_sched_out(struct perf_event_context *ctx, > > perf_pmu_disable(ctx->pmu); > > - if (is_active & EVENT_PINNED) > - perf_event_groups_for_each(event, node, > - &ctx->pinned_groups, group_node, > - group_list, group_entry) > - group_sched_out(event, cpuctx, ctx); > + if (is_active & EVENT_PINNED) { > + if (mux) { So it's 'rotate', really. > + perf_event_groups_for_each_cpu(event, cpu, > + &ctx->pinned_groups, > + group_list, group_entry) { > + group_sched_out(event, cpuctx, ctx); > + } > + perf_event_groups_for_each_cpu(event, sw, > + &ctx->pinned_groups, > + group_list, group_entry) { > + group_sched_out(event, cpuctx, ctx); > + } > + } else { > + perf_event_groups_for_each(event, node, > + &ctx->pinned_groups, group_node, > + group_list, group_entry) { > + group_sched_out(event, cpuctx, ctx); > + } > + } > + } > > - if (is_active & EVENT_FLEXIBLE) > - perf_event_groups_for_each(event, node, > - &ctx->flexible_groups, group_node, > - group_list, group_entry) > - group_sched_out(event, cpuctx, ctx); > + if (is_active & EVENT_FLEXIBLE) { > + if (mux) { > + perf_event_groups_for_each_cpu(event, cpu, > + &ctx->flexible_groups, > + group_list, group_entry) { > + group_sched_out(event, cpuctx, ctx); > + } > + perf_event_groups_for_each_cpu(event, sw, > + &ctx->flexible_groups, > + group_list, group_entry) { > + group_sched_out(event, cpuctx, ctx); > + } > + } else { > + perf_event_groups_for_each(event, node, > + &ctx->flexible_groups, group_node, > + group_list, group_entry) { > + group_sched_out(event, cpuctx, ctx); > + } > + } > + } > > perf_pmu_enable(ctx->pmu); > } > @@ -3225,9 +3270,9 @@ void __perf_event_task_sched_out(struct task_struct *task, > * Called with IRQs disabled > */ > static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, > - enum event_type_t event_type) > + enum event_type_t event_type, int mux) > { > - ctx_sched_out(&cpuctx->ctx, cpuctx, event_type); > + ctx_sched_out(&cpuctx->ctx, cpuctx, event_type, mux); > } > > static void > @@ -3287,11 +3332,13 @@ static void > ctx_sched_in(struct perf_event_context *ctx, > struct perf_cpu_context *cpuctx, > enum event_type_t event_type, > - struct task_struct *task) > + struct task_struct *task, int mux) > { > int is_active = ctx->is_active; > + struct list_head *group_list; > struct perf_event *event; > struct rb_node *node; > + int sw = -1, cpu = smp_processor_id(); > > lockdep_assert_held(&ctx->lock); > > @@ -3319,35 +3366,69 @@ ctx_sched_in(struct perf_event_context *ctx, > * First go through the list and put on any pinned groups > * in order to give them the best chance of going on. > */ > - if (is_active & EVENT_PINNED) > - perf_event_groups_for_each(event, node, > - &ctx->pinned_groups, group_node, > - group_list, group_entry) > - ctx_pinned_sched_in(event, cpuctx, ctx); > + if (is_active & EVENT_PINNED) { > + if (mux) { > + perf_event_groups_for_each_cpu(event, sw, > + &ctx->pinned_groups, > + group_list, group_entry) { > + ctx_pinned_sched_in(event, cpuctx, ctx); > + } > + perf_event_groups_for_each_cpu(event, cpu, > + &ctx->pinned_groups, > + group_list, group_entry) { > + ctx_pinned_sched_in(event, cpuctx, ctx); > + } > + } else { > + perf_event_groups_for_each(event, node, > + &ctx->pinned_groups, group_node, > + group_list, group_entry) { > + ctx_pinned_sched_in(event, cpuctx, ctx); > + } > + } > + } > > /* Then walk through the lower prio flexible groups */ > if (is_active & EVENT_FLEXIBLE) { > int can_add_hw = 1; > - perf_event_groups_for_each(event, node, > - &ctx->flexible_groups, group_node, > - group_list, group_entry) > - ctx_flexible_sched_in(event, cpuctx, ctx, &can_add_hw); > + if (mux) { > + perf_event_groups_for_each_cpu(event, sw, > + &ctx->flexible_groups, > + group_list, group_entry) { > + ctx_flexible_sched_in(event, cpuctx, > + ctx, &can_add_hw); > + } > + can_add_hw = 1; > + perf_event_groups_for_each_cpu(event, cpu, > + &ctx->flexible_groups, > + group_list, group_entry) { > + ctx_flexible_sched_in(event, cpuctx, > + ctx, &can_add_hw); > + } > + } else { > + perf_event_groups_for_each(event, node, > + &ctx->flexible_groups, group_node, > + group_list, group_entry) { > + ctx_flexible_sched_in(event, cpuctx, > + ctx, &can_add_hw); > + } > + } > } > } > > static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, > enum event_type_t event_type, > - struct task_struct *task) > + struct task_struct *task, int mux) > { > struct perf_event_context *ctx = &cpuctx->ctx; > > - ctx_sched_in(ctx, cpuctx, event_type, task); > + ctx_sched_in(ctx, cpuctx, event_type, task, mux); > } > > static void perf_event_context_sched_in(struct perf_event_context *ctx, > struct task_struct *task) > { > struct perf_cpu_context *cpuctx; > + int mux = 0; > > cpuctx = __get_cpu_context(ctx); > if (cpuctx->task_ctx == ctx) > @@ -3371,8 +3452,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, > * events, no need to flip the cpuctx's events around. > */ > if (!RB_EMPTY_ROOT(&ctx->pinned_groups)) > - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); > - perf_event_sched_in(cpuctx, ctx, task); > + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE, mux); > + perf_event_sched_in(cpuctx, ctx, task, mux); > perf_pmu_enable(ctx->pmu); > > unlock: > @@ -3618,7 +3699,7 @@ static void rotate_ctx(struct perf_event_context *ctx) > static int perf_rotate_context(struct perf_cpu_context *cpuctx) > { > struct perf_event_context *ctx = NULL; > - int rotate = 0; > + int rotate = 0, mux = 1; > > if (cpuctx->ctx.nr_events) { > if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) > @@ -3637,15 +3718,15 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx) > perf_ctx_lock(cpuctx, cpuctx->task_ctx); > perf_pmu_disable(cpuctx->ctx.pmu); > > - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); > + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE, mux); It's '1'.