Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751715AbdGRKpW (ORCPT ); Tue, 18 Jul 2017 06:45:22 -0400 Received: from terminus.zytor.com ([65.50.211.136]:58319 "EHLO terminus.zytor.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751404AbdGRKpS (ORCPT ); Tue, 18 Jul 2017 06:45:18 -0400 Date: Tue, 18 Jul 2017 03:39:49 -0700 From: tip-bot for Ingo Molnar Message-ID: Cc: acme@redhat.com, alexander.shishkin@linux.intel.com, peterz@infradead.org, eranian@gmail.com, mingo@kernel.org, linux-kernel@vger.kernel.org, jolsa@redhat.com, torvalds@linux-foundation.org, acme@infradead.org, hpa@zytor.com, tglx@linutronix.de, vincent.weaver@maine.edu Reply-To: acme@infradead.org, tglx@linutronix.de, vincent.weaver@maine.edu, hpa@zytor.com, peterz@infradead.org, eranian@gmail.com, acme@redhat.com, alexander.shishkin@linux.intel.com, jolsa@redhat.com, mingo@kernel.org, linux-kernel@vger.kernel.org, torvalds@linux-foundation.org In-Reply-To: <20170715110049.36jvxnidy2flh6ll@gmail.com> References: <20170715110049.36jvxnidy2flh6ll@gmail.com> To: linux-tip-commits@vger.kernel.org Subject: [tip:perf/urgent] Revert "perf/core: Optimize event rescheduling on active contexts" Git-Commit-ID: 770f8eb8a990a8904bfd8a6849be147b40b6e1aa X-Mailer: tip-git-log-daemon Robot-ID: Robot-Unsubscribe: Contact to get blacklisted from these emails MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset=UTF-8 Content-Disposition: inline Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8074 Lines: 234 Commit-ID: 770f8eb8a990a8904bfd8a6849be147b40b6e1aa Gitweb: http://git.kernel.org/tip/770f8eb8a990a8904bfd8a6849be147b40b6e1aa Author: Ingo Molnar AuthorDate: Sat, 15 Jul 2017 13:00:49 +0200 Committer: Ingo Molnar CommitDate: Tue, 18 Jul 2017 10:44:47 +0200 Revert "perf/core: Optimize event rescheduling on active contexts" This reverts commit 487f05e18aa4efacee6357480f293a5afe6593b5. Vince Weaver reported that it breaks a testcase for pinned events: | I've bisected one of them, this report is about: | | tests/overflow/simul_oneshot_group_overflow | | This test creates an event group containing two sampling events, set | to overflow to a signal handler (which disables and then refreshes the | event). | | On a good kernel you get the following: | Event perf::instructions with period 1000000 | Event perf::instructions with period 2000000 | fd 3 overflows: 946 (perf::instructions/1000000) | fd 4 overflows: 473 (perf::instructions/2000000) | Ending counts: | Count 0: 946379875 | Count 1: 946365218 | | With the broken kernels you get: | Event perf::instructions with period 1000000 | Event perf::instructions with period 2000000 | fd 3 overflows: 938 (perf::instructions/1000000) | fd 4 overflows: 318 (perf::instructions/2000000) | Ending counts: | Count 0: 946373080 | Count 1: 653373058 ... | additional relevant detail: | in the failing case, the group leader of the event set has | .pinned=1 | If I change that to .pinned=0 then the test passes. As it's an optimization we can revert it for now until the root cause is found. Adrian Hunter Reported-by: Vince Weaver Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170715110049.36jvxnidy2flh6ll@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 80 ++++++++-------------------------------------------- 1 file changed, 11 insertions(+), 69 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 9747e42..778aa25 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -359,8 +359,6 @@ enum event_type_t { EVENT_FLEXIBLE = 0x1, EVENT_PINNED = 0x2, EVENT_TIME = 0x4, - /* see ctx_resched() for details */ - EVENT_CPU = 0x8, EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, }; @@ -1445,20 +1443,6 @@ static void update_group_times(struct perf_event *leader) update_event_times(event); } -static enum event_type_t get_event_type(struct perf_event *event) -{ - struct perf_event_context *ctx = event->ctx; - enum event_type_t event_type; - - lockdep_assert_held(&ctx->lock); - - event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE; - if (!ctx->task) - event_type |= EVENT_CPU; - - return event_type; -} - static struct list_head * ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) { @@ -2232,8 +2216,7 @@ ctx_sched_in(struct perf_event_context *ctx, struct task_struct *task); static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx, - enum event_type_t event_type) + struct perf_event_context *ctx) { if (!cpuctx->task_ctx) return; @@ -2241,7 +2224,7 @@ static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) return; - ctx_sched_out(ctx, cpuctx, event_type); + ctx_sched_out(ctx, cpuctx, EVENT_ALL); } static void perf_event_sched_in(struct perf_cpu_context *cpuctx, @@ -2256,51 +2239,13 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx, ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); } -/* - * We want to maintain the following priority of scheduling: - * - CPU pinned (EVENT_CPU | EVENT_PINNED) - * - task pinned (EVENT_PINNED) - * - CPU flexible (EVENT_CPU | EVENT_FLEXIBLE) - * - task flexible (EVENT_FLEXIBLE). - * - * In order to avoid unscheduling and scheduling back in everything every - * time an event is added, only do it for the groups of equal priority and - * below. - * - * This can be called after a batch operation on task events, in which case - * event_type is a bit mask of the types of events involved. For CPU events, - * event_type is only either EVENT_PINNED or EVENT_FLEXIBLE. - */ static void ctx_resched(struct perf_cpu_context *cpuctx, - struct perf_event_context *task_ctx, - enum event_type_t event_type) + struct perf_event_context *task_ctx) { - enum event_type_t ctx_event_type = event_type & EVENT_ALL; - bool cpu_event = !!(event_type & EVENT_CPU); - - /* - * If pinned groups are involved, flexible groups also need to be - * scheduled out. - */ - if (event_type & EVENT_PINNED) - event_type |= EVENT_FLEXIBLE; - perf_pmu_disable(cpuctx->ctx.pmu); if (task_ctx) - task_ctx_sched_out(cpuctx, task_ctx, event_type); - - /* - * Decide which cpu ctx groups to schedule out based on the types - * of events that caused rescheduling: - * - EVENT_CPU: schedule out corresponding groups; - * - EVENT_PINNED task events: schedule out EVENT_FLEXIBLE groups; - * - otherwise, do nothing more. - */ - if (cpu_event) - cpu_ctx_sched_out(cpuctx, ctx_event_type); - else if (ctx_event_type & EVENT_PINNED) - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - + task_ctx_sched_out(cpuctx, task_ctx); + cpu_ctx_sched_out(cpuctx, EVENT_ALL); perf_event_sched_in(cpuctx, task_ctx, current); perf_pmu_enable(cpuctx->ctx.pmu); } @@ -2347,7 +2292,7 @@ static int __perf_install_in_context(void *info) if (reprogram) { ctx_sched_out(ctx, cpuctx, EVENT_TIME); add_event_to_ctx(event, ctx); - ctx_resched(cpuctx, task_ctx, get_event_type(event)); + ctx_resched(cpuctx, task_ctx); } else { add_event_to_ctx(event, ctx); } @@ -2514,7 +2459,7 @@ static void __perf_event_enable(struct perf_event *event, if (ctx->task) WARN_ON_ONCE(task_ctx != ctx); - ctx_resched(cpuctx, task_ctx, get_event_type(event)); + ctx_resched(cpuctx, task_ctx); } /* @@ -2941,7 +2886,7 @@ unlock: if (do_switch) { raw_spin_lock(&ctx->lock); - task_ctx_sched_out(cpuctx, ctx, EVENT_ALL); + task_ctx_sched_out(cpuctx, ctx); raw_spin_unlock(&ctx->lock); } } @@ -3498,7 +3443,6 @@ static int event_enable_on_exec(struct perf_event *event, static void perf_event_enable_on_exec(int ctxn) { struct perf_event_context *ctx, *clone_ctx = NULL; - enum event_type_t event_type = 0; struct perf_cpu_context *cpuctx; struct perf_event *event; unsigned long flags; @@ -3512,17 +3456,15 @@ static void perf_event_enable_on_exec(int ctxn) cpuctx = __get_cpu_context(ctx); perf_ctx_lock(cpuctx, ctx); ctx_sched_out(ctx, cpuctx, EVENT_TIME); - list_for_each_entry(event, &ctx->event_list, event_entry) { + list_for_each_entry(event, &ctx->event_list, event_entry) enabled |= event_enable_on_exec(event, ctx); - event_type |= get_event_type(event); - } /* * Unclone and reschedule this context if we enabled any event. */ if (enabled) { clone_ctx = unclone_ctx(ctx); - ctx_resched(cpuctx, ctx, event_type); + ctx_resched(cpuctx, ctx); } else { ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); } @@ -10466,7 +10408,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) * in. */ raw_spin_lock_irq(&child_ctx->lock); - task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx, EVENT_ALL); + task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx); /* * Now that the context is inactive, destroy the task <-> ctx relation