Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752335AbdHRFWx (ORCPT ); Fri, 18 Aug 2017 01:22:53 -0400 Received: from mga09.intel.com ([134.134.136.24]:46553 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751075AbdHRFWw (ORCPT ); Fri, 18 Aug 2017 01:22:52 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.41,391,1498546800"; d="scan'208";a="1207063048" Subject: [PATCH v7 2/2] perf/core: add mux switch to skip to the current CPU's events list on mux interrupt From: Alexey Budankov To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Alexander Shishkin Cc: Andi Kleen , Kan Liang , Dmitri Prokhorov , Valery Cherepennikov , Mark Rutland , Stephane Eranian , David Carrillo-Cisneros , linux-kernel References: <96c7776f-1f17-a39e-23e9-658596216d6b@linux.intel.com> Organization: Intel Corp. Message-ID: Date: Fri, 18 Aug 2017 08:22:47 +0300 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Thunderbird/52.2.1 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 14550 Lines: 446 This patch implements mux switch that triggers skipping to the current CPU's events list at mulitplexing hrtimer interrupt handler as well as adoption of the switch in the existing implementation. perf_event_groups_iterate_cpu() API is introduced to implement iteration thru the certain CPU groups list skipping groups allocated for the other CPUs. Signed-off-by: Alexey Budankov --- kernel/events/core.c | 193 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 137 insertions(+), 56 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 08ccfb2..aeb0f81 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -556,11 +556,11 @@ void perf_sample_event_took(u64 sample_len_ns) static atomic64_t perf_event_id; static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, - enum event_type_t event_type); + enum event_type_t event_type, int mux); static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, enum event_type_t event_type, - struct task_struct *task); + struct task_struct *task, int mux); static void update_context_time(struct perf_event_context *ctx); static u64 perf_event_time(struct perf_event *event); @@ -702,6 +702,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) struct perf_cpu_context *cpuctx; struct list_head *list; unsigned long flags; + int mux = 0; /* * Disable interrupts and preemption to avoid this CPU's @@ -717,7 +718,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) perf_pmu_disable(cpuctx->ctx.pmu); if (mode & PERF_CGROUP_SWOUT) { - cpu_ctx_sched_out(cpuctx, EVENT_ALL); + cpu_ctx_sched_out(cpuctx, EVENT_ALL, mux); /* * must not be done before ctxswout due * to event_filter_match() in event_sched_out() @@ -736,7 +737,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) */ cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx); - cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); + cpu_ctx_sched_in(cpuctx, EVENT_ALL, task, mux); } perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); @@ -1613,8 +1614,16 @@ perf_event_groups_rotate(struct rb_root *groups, int cpu) */ #define perf_event_groups_for_each(event, iter, tree, node, list, link) \ for (iter = rb_first(tree); iter; iter = rb_next(iter)) \ - list_for_each_entry(event, &(rb_entry(iter, \ - typeof(*event), node)->list), link) + list_for_each_entry(event, &(rb_entry(iter, \ + typeof(*event), node)->list), link) + +/* + * Iterate event groups related to specific cpu. + */ +#define perf_event_groups_for_each_cpu(event, cpu, tree, list, link) \ + list = perf_event_groups_get_list(tree, cpu); \ + if (list) \ + list_for_each_entry(event, list, link) /* * Add a event from the lists for its context. @@ -2397,36 +2406,38 @@ static void add_event_to_ctx(struct perf_event *event, static void ctx_sched_out(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, - enum event_type_t event_type); + enum event_type_t event_type, int mux); static void ctx_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, enum event_type_t event_type, - struct task_struct *task); + struct task_struct *task, int mux); static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, enum event_type_t event_type) { + int mux = 0; + if (!cpuctx->task_ctx) return; if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) return; - ctx_sched_out(ctx, cpuctx, event_type); + ctx_sched_out(ctx, cpuctx, event_type, mux); } static void perf_event_sched_in(struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, - struct task_struct *task) + struct task_struct *task, int mux) { - cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task); + cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task, mux); if (ctx) - ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); - cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); + ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task, mux); + cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task, mux); if (ctx) - ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); + ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task, mux); } /* @@ -2450,6 +2461,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, { enum event_type_t ctx_event_type = event_type & EVENT_ALL; bool cpu_event = !!(event_type & EVENT_CPU); + int mux = 0; /* * If pinned groups are involved, flexible groups also need to be @@ -2470,11 +2482,11 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, * - otherwise, do nothing more. */ if (cpu_event) - cpu_ctx_sched_out(cpuctx, ctx_event_type); + cpu_ctx_sched_out(cpuctx, ctx_event_type, mux); else if (ctx_event_type & EVENT_PINNED) - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE, mux); - perf_event_sched_in(cpuctx, task_ctx, current); + perf_event_sched_in(cpuctx, task_ctx, current, mux); perf_pmu_enable(cpuctx->ctx.pmu); } @@ -2491,7 +2503,7 @@ static int __perf_install_in_context(void *info) struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); struct perf_event_context *task_ctx = cpuctx->task_ctx; bool reprogram = true; - int ret = 0; + int ret = 0, mux =0; raw_spin_lock(&cpuctx->ctx.lock); if (ctx->task) { @@ -2518,7 +2530,7 @@ static int __perf_install_in_context(void *info) } if (reprogram) { - ctx_sched_out(ctx, cpuctx, EVENT_TIME); + ctx_sched_out(ctx, cpuctx, EVENT_TIME, mux); add_event_to_ctx(event, ctx); ctx_resched(cpuctx, task_ctx, get_event_type(event)); } else { @@ -2655,13 +2667,14 @@ static void __perf_event_enable(struct perf_event *event, { struct perf_event *leader = event->group_leader; struct perf_event_context *task_ctx; + int mux = 0; if (event->state >= PERF_EVENT_STATE_INACTIVE || event->state <= PERF_EVENT_STATE_ERROR) return; if (ctx->is_active) - ctx_sched_out(ctx, cpuctx, EVENT_TIME); + ctx_sched_out(ctx, cpuctx, EVENT_TIME, mux); __perf_event_mark_enabled(event); @@ -2671,7 +2684,7 @@ static void __perf_event_enable(struct perf_event *event, if (!event_filter_match(event)) { if (is_cgroup_event(event)) perf_cgroup_defer_enabled(event); - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); + ctx_sched_in(ctx, cpuctx, EVENT_TIME, current, mux); return; } @@ -2680,7 +2693,7 @@ static void __perf_event_enable(struct perf_event *event, * then don't put it on unless the group is on. */ if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) { - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); + ctx_sched_in(ctx, cpuctx, EVENT_TIME, current, mux); return; } @@ -2876,11 +2889,13 @@ EXPORT_SYMBOL_GPL(perf_event_refresh); static void ctx_sched_out(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, - enum event_type_t event_type) + enum event_type_t event_type, int mux) { int is_active = ctx->is_active; + struct list_head *group_list; struct perf_event *event; struct rb_node *node; + int sw = -1, cpu = smp_processor_id(); lockdep_assert_held(&ctx->lock); if (likely(!ctx->nr_events)) { @@ -2926,17 +2941,47 @@ static void ctx_sched_out(struct perf_event_context *ctx, perf_pmu_disable(ctx->pmu); - if (is_active & EVENT_PINNED) - perf_event_groups_for_each(event, node, - &ctx->pinned_groups, group_node, - group_list, group_entry) - group_sched_out(event, cpuctx, ctx); + if (is_active & EVENT_PINNED) { + if (mux) { + perf_event_groups_for_each_cpu(event, cpu, + &ctx->pinned_groups, + group_list, group_entry) { + group_sched_out(event, cpuctx, ctx); + } + perf_event_groups_for_each_cpu(event, sw, + &ctx->pinned_groups, + group_list, group_entry) { + group_sched_out(event, cpuctx, ctx); + } + } else { + perf_event_groups_for_each(event, node, + &ctx->pinned_groups, group_node, + group_list, group_entry) { + group_sched_out(event, cpuctx, ctx); + } + } + } - if (is_active & EVENT_FLEXIBLE) - perf_event_groups_for_each(event, node, - &ctx->flexible_groups, group_node, - group_list, group_entry) - group_sched_out(event, cpuctx, ctx); + if (is_active & EVENT_FLEXIBLE) { + if (mux) { + perf_event_groups_for_each_cpu(event, cpu, + &ctx->flexible_groups, + group_list, group_entry) { + group_sched_out(event, cpuctx, ctx); + } + perf_event_groups_for_each_cpu(event, sw, + &ctx->flexible_groups, + group_list, group_entry) { + group_sched_out(event, cpuctx, ctx); + } + } else { + perf_event_groups_for_each(event, node, + &ctx->flexible_groups, group_node, + group_list, group_entry) { + group_sched_out(event, cpuctx, ctx); + } + } + } perf_pmu_enable(ctx->pmu); } @@ -3225,9 +3270,9 @@ void __perf_event_task_sched_out(struct task_struct *task, * Called with IRQs disabled */ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, - enum event_type_t event_type) + enum event_type_t event_type, int mux) { - ctx_sched_out(&cpuctx->ctx, cpuctx, event_type); + ctx_sched_out(&cpuctx->ctx, cpuctx, event_type, mux); } static void @@ -3287,11 +3332,13 @@ static void ctx_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, enum event_type_t event_type, - struct task_struct *task) + struct task_struct *task, int mux) { int is_active = ctx->is_active; + struct list_head *group_list; struct perf_event *event; struct rb_node *node; + int sw = -1, cpu = smp_processor_id(); lockdep_assert_held(&ctx->lock); @@ -3319,35 +3366,69 @@ ctx_sched_in(struct perf_event_context *ctx, * First go through the list and put on any pinned groups * in order to give them the best chance of going on. */ - if (is_active & EVENT_PINNED) - perf_event_groups_for_each(event, node, - &ctx->pinned_groups, group_node, - group_list, group_entry) - ctx_pinned_sched_in(event, cpuctx, ctx); + if (is_active & EVENT_PINNED) { + if (mux) { + perf_event_groups_for_each_cpu(event, sw, + &ctx->pinned_groups, + group_list, group_entry) { + ctx_pinned_sched_in(event, cpuctx, ctx); + } + perf_event_groups_for_each_cpu(event, cpu, + &ctx->pinned_groups, + group_list, group_entry) { + ctx_pinned_sched_in(event, cpuctx, ctx); + } + } else { + perf_event_groups_for_each(event, node, + &ctx->pinned_groups, group_node, + group_list, group_entry) { + ctx_pinned_sched_in(event, cpuctx, ctx); + } + } + } /* Then walk through the lower prio flexible groups */ if (is_active & EVENT_FLEXIBLE) { int can_add_hw = 1; - perf_event_groups_for_each(event, node, - &ctx->flexible_groups, group_node, - group_list, group_entry) - ctx_flexible_sched_in(event, cpuctx, ctx, &can_add_hw); + if (mux) { + perf_event_groups_for_each_cpu(event, sw, + &ctx->flexible_groups, + group_list, group_entry) { + ctx_flexible_sched_in(event, cpuctx, + ctx, &can_add_hw); + } + can_add_hw = 1; + perf_event_groups_for_each_cpu(event, cpu, + &ctx->flexible_groups, + group_list, group_entry) { + ctx_flexible_sched_in(event, cpuctx, + ctx, &can_add_hw); + } + } else { + perf_event_groups_for_each(event, node, + &ctx->flexible_groups, group_node, + group_list, group_entry) { + ctx_flexible_sched_in(event, cpuctx, + ctx, &can_add_hw); + } + } } } static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, enum event_type_t event_type, - struct task_struct *task) + struct task_struct *task, int mux) { struct perf_event_context *ctx = &cpuctx->ctx; - ctx_sched_in(ctx, cpuctx, event_type, task); + ctx_sched_in(ctx, cpuctx, event_type, task, mux); } static void perf_event_context_sched_in(struct perf_event_context *ctx, struct task_struct *task) { struct perf_cpu_context *cpuctx; + int mux = 0; cpuctx = __get_cpu_context(ctx); if (cpuctx->task_ctx == ctx) @@ -3371,8 +3452,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, * events, no need to flip the cpuctx's events around. */ if (!RB_EMPTY_ROOT(&ctx->pinned_groups)) - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - perf_event_sched_in(cpuctx, ctx, task); + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE, mux); + perf_event_sched_in(cpuctx, ctx, task, mux); perf_pmu_enable(ctx->pmu); unlock: @@ -3618,7 +3699,7 @@ static void rotate_ctx(struct perf_event_context *ctx) static int perf_rotate_context(struct perf_cpu_context *cpuctx) { struct perf_event_context *ctx = NULL; - int rotate = 0; + int rotate = 0, mux = 1; if (cpuctx->ctx.nr_events) { if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) @@ -3637,15 +3718,15 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx) perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(cpuctx->ctx.pmu); - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE, mux); if (ctx) - ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); + ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE, mux); rotate_ctx(&cpuctx->ctx); if (ctx) rotate_ctx(ctx); - perf_event_sched_in(cpuctx, ctx, current); + perf_event_sched_in(cpuctx, ctx, current, mux); perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); @@ -3696,7 +3777,7 @@ static void perf_event_enable_on_exec(int ctxn) struct perf_cpu_context *cpuctx; struct perf_event *event; unsigned long flags; - int enabled = 0; + int enabled = 0, mux = 0; local_irq_save(flags); ctx = current->perf_event_ctxp[ctxn]; @@ -3705,7 +3786,7 @@ static void perf_event_enable_on_exec(int ctxn) cpuctx = __get_cpu_context(ctx); perf_ctx_lock(cpuctx, ctx); - ctx_sched_out(ctx, cpuctx, EVENT_TIME); + ctx_sched_out(ctx, cpuctx, EVENT_TIME, mux); list_for_each_entry(event, &ctx->event_list, event_entry) { enabled |= event_enable_on_exec(event, ctx); event_type |= get_event_type(event); @@ -3718,7 +3799,7 @@ static void perf_event_enable_on_exec(int ctxn) clone_ctx = unclone_ctx(ctx); ctx_resched(cpuctx, ctx, event_type); } else { - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); + ctx_sched_in(ctx, cpuctx, EVENT_TIME, current, mux); } perf_ctx_unlock(cpuctx, ctx);