Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S942836AbcJ1HME (ORCPT ); Fri, 28 Oct 2016 03:12:04 -0400 Received: from smtp.codeaurora.org ([198.145.29.96]:44894 "EHLO smtp.codeaurora.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S942356AbcJ1HLo (ORCPT ); Fri, 28 Oct 2016 03:11:44 -0400 DMARC-Filter: OpenDMARC Filter v1.3.1 smtp.codeaurora.org E3F1861673 Authentication-Results: pdx-caf-mail.web.codeaurora.org; dmarc=none header.from=codeaurora.org Authentication-Results: pdx-caf-mail.web.codeaurora.org; spf=pass smtp.mailfrom=markivx@codeaurora.org From: Vikram Mulukutla To: linux-kernel@vger.kernel.org Cc: Peter Zijlstra , Ingo Molnar , Srivatsa Vaddagiri , Steve Muckle , Olav Haugan , Syed Rameez Mustafa , Joonwoo Park , Pavankumar Kondeti , Saravana Kannan , Bryan Huntsman , Juri Lelli , Morten Rasmussen , Dietmar Eggemann , Chris Redpath , Robin Randhawa , Patrick Bellasi , Todd Kjos , Srinath Sridharan , Andres Oportus , Leo Yan , Vincent Guittot , Vikram Mulukutla , Vikram Mulukutla Subject: [RFC PATCH 3/3] sched: Introduce WALT hooks into core and scheduling classes Date: Fri, 28 Oct 2016 00:10:42 -0700 Message-Id: <1477638642-17428-4-git-send-email-markivx@codeaurora.org> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1477638642-17428-1-git-send-email-markivx@codeaurora.org> References: <1477638642-17428-1-git-send-email-markivx@codeaurora.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9193 Lines: 301 From: Srivatsa Vaddagiri Add the necessary hooks to core and the various scheduling classes that will allow WALT to track CPU utilization and handle task migration between CPUs as well. With CONFIG_SCHED_WALT enabled, schedutil will use WALT's cpu utilization metric by default. This can be switched to PELT's util_avg at runtime by the following command: echo 0 > /proc/sys/kernel/sched_use_walt_metrics Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Vikram Mulukutla --- kernel/sched/core.c | 29 ++++++++++++++++++++++++++++- kernel/sched/deadline.c | 7 +++++++ kernel/sched/debug.c | 9 +++++++++ kernel/sched/fair.c | 9 +++++++-- kernel/sched/rt.c | 6 ++++++ 5 files changed, 57 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 44817c6..3b7f67d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -91,6 +91,8 @@ #define CREATE_TRACE_POINTS #include +#include "walt.h" + DEFINE_MUTEX(sched_domains_mutex); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); @@ -991,6 +993,7 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new p->on_rq = TASK_ON_RQ_MIGRATING; dequeue_task(rq, p, 0); + walt_prepare_migrate(p, rq, true); set_task_cpu(p, new_cpu); raw_spin_unlock(&rq->lock); @@ -998,6 +1001,7 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new raw_spin_lock(&rq->lock); BUG_ON(task_cpu(p) != new_cpu); + walt_finish_migrate(p, rq, true); enqueue_task(rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; check_preempt_curr(rq, p, 0); @@ -1257,7 +1261,9 @@ static void __migrate_swap_task(struct task_struct *p, int cpu) p->on_rq = TASK_ON_RQ_MIGRATING; deactivate_task(src_rq, p, 0); + walt_prepare_migrate(p, src_rq, true); set_task_cpu(p, cpu); + walt_finish_migrate(p, dst_rq, true); activate_task(dst_rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; check_preempt_curr(dst_rq, p, 0); @@ -2072,13 +2078,19 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ smp_cond_load_acquire(&p->on_cpu, !VAL); + raw_spin_lock(&task_rq(p)->lock); + walt_update_task_ravg(p, task_rq(p), TASK_WAKE, walt_ktime_clock(), 0); + raw_spin_unlock(&task_rq(p)->lock); + p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); if (task_cpu(p) != cpu) { wake_flags |= WF_MIGRATED; + walt_prepare_migrate(p, task_rq(p), false); set_task_cpu(p, cpu); + walt_finish_migrate(p, cpu_rq(cpu), false); } #endif /* CONFIG_SMP */ @@ -2129,8 +2141,10 @@ static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie trace_sched_waking(p); - if (!task_on_rq_queued(p)) + if (!task_on_rq_queued(p)) { + walt_update_task_ravg(p, rq, TASK_WAKE, walt_ktime_clock(), 0); ttwu_activate(rq, p, ENQUEUE_WAKEUP); + } ttwu_do_wakeup(rq, p, 0, cookie); if (schedstat_enabled()) @@ -2196,6 +2210,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.nr_migrations = 0; p->se.vruntime = 0; INIT_LIST_HEAD(&p->se.group_node); + walt_init_new_task_load(p); #ifdef CONFIG_FAIR_GROUP_SCHED p->se.cfs_rq = NULL; @@ -2570,6 +2585,8 @@ void wake_up_new_task(struct task_struct *p) rq = __task_rq_lock(p, &rf); post_init_entity_util_avg(&p->se); + walt_mark_task_starting(p); + activate_task(rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); @@ -3071,6 +3088,7 @@ void scheduler_tick(void) update_rq_clock(rq); curr->sched_class->task_tick(rq, curr, 0); cpu_load_update_active(rq); + walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, walt_ktime_clock(), 0); calc_global_load_tick(rq); raw_spin_unlock(&rq->lock); @@ -3322,6 +3340,7 @@ static void __sched notrace __schedule(bool preempt) struct pin_cookie cookie; struct rq *rq; int cpu; + u64 wallclock; cpu = smp_processor_id(); rq = cpu_rq(cpu); @@ -3385,6 +3404,9 @@ static void __sched notrace __schedule(bool preempt) update_rq_clock(rq); next = pick_next_task(rq, prev, cookie); + wallclock = walt_ktime_clock(); + walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0); + walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0); clear_tsk_need_resched(prev); clear_preempt_need_resched(); rq->clock_skip_update = 0; @@ -7284,6 +7306,8 @@ static void sched_rq_cpu_starting(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); + walt_set_window_start(rq); + rq->calc_load_update = calc_load_update; update_max_interval(); } @@ -7304,6 +7328,9 @@ int sched_cpu_dying(unsigned int cpu) /* Handle pending wakeups and then migrate everything off */ sched_ttwu_pending(); raw_spin_lock_irqsave(&rq->lock, flags); + + walt_migrate_sync_cpu(cpu); + if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1ce8867..0dd3c1f 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -15,6 +15,7 @@ * Fabio Checconi */ #include "sched.h" +#include "walt.h" #include @@ -278,7 +279,9 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p * By now the task is replenished and enqueued; migrate it. */ deactivate_task(rq, p, 0); + walt_prepare_migrate(p, rq, true); set_task_cpu(p, later_rq->cpu); + walt_finish_migrate(p, later_rq, true); activate_task(later_rq, p, 0); if (!fallback) @@ -1512,7 +1515,9 @@ retry: } deactivate_task(rq, next_task, 0); + walt_prepare_migrate(next_task, rq, true); set_task_cpu(next_task, later_rq->cpu); + walt_finish_migrate(next_task, later_rq, true); activate_task(later_rq, next_task, 0); ret = 1; @@ -1600,7 +1605,9 @@ static void pull_dl_task(struct rq *this_rq) resched = true; deactivate_task(src_rq, p, 0); + walt_prepare_migrate(p, src_rq, true); set_task_cpu(p, this_cpu); + walt_finish_migrate(p, this_rq, true); activate_task(this_rq, p, 0); dmin = p->dl.deadline; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2a0a999..ab10031 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -607,6 +607,15 @@ do { \ P(nr_switches); P(nr_load_updates); P(nr_uninterruptible); +#ifdef CONFIG_SMP + P(cpu_capacity_orig); + P(cpu_capacity); +#ifdef CONFIG_SCHED_WALT + P(window_start); + P(curr_runnable_sum); + P(prev_runnable_sum); +#endif +#endif PN(next_balance); SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); PN(clock); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 39c826d..182dcd3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -34,6 +34,7 @@ #include #include "sched.h" +#include "walt.h" /* * Targeted preemption latency for CPU-bound tasks: @@ -2885,6 +2886,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { unsigned long max = rq->cpu_capacity_orig; + unsigned long util = cpu_walt_util(rq); /* * There are a few boundary cases this might miss but it should @@ -2902,8 +2904,8 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) * * See cpu_util(). */ - cpufreq_update_util(rq_clock(rq), - min(cfs_rq->avg.util_avg, max), max); + + cpufreq_update_util(rq_clock(rq), min(util, max), max); } } @@ -6205,7 +6207,9 @@ static void detach_task(struct task_struct *p, struct lb_env *env) p->on_rq = TASK_ON_RQ_MIGRATING; deactivate_task(env->src_rq, p, 0); + walt_prepare_migrate(p, env->src_rq, true); set_task_cpu(p, env->dst_cpu); + /* update WALT later under the dest rq's lock */ } /* @@ -6337,6 +6341,7 @@ static void attach_task(struct rq *rq, struct task_struct *p) lockdep_assert_held(&rq->lock); BUG_ON(task_rq(p) != rq); + walt_finish_migrate(p, rq, true); activate_task(rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; check_preempt_curr(rq, p, 0); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d5690b7..130040c 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -8,6 +8,8 @@ #include #include +#include "walt.h" + int sched_rr_timeslice = RR_TIMESLICE; static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); @@ -1843,7 +1845,9 @@ retry: } deactivate_task(rq, next_task, 0); + walt_prepare_migrate(next_task, rq, true); set_task_cpu(next_task, lowest_rq->cpu); + walt_finish_migrate(next_task, lowest_rq, true); activate_task(lowest_rq, next_task, 0); ret = 1; @@ -2097,7 +2101,9 @@ static void pull_rt_task(struct rq *this_rq) resched = true; deactivate_task(src_rq, p, 0); + walt_prepare_migrate(p, src_rq, true); set_task_cpu(p, this_cpu); + walt_finish_migrate(p, this_rq, true); activate_task(this_rq, p, 0); /* * We continue with the search, just in -- TheMan