Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754014AbcL3LeA (ORCPT ); Fri, 30 Dec 2016 06:34:00 -0500 Received: from mail-wj0-f194.google.com ([209.85.210.194]:35383 "EHLO mail-wj0-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753977AbcL3Ld6 (ORCPT ); Fri, 30 Dec 2016 06:33:58 -0500 From: Luca Abeni X-Google-Original-From: Luca Abeni To: linux-kernel@vger.kernel.org Cc: Peter Zijlstra , Ingo Molnar , Juri Lelli , Claudio Scordino , Steven Rostedt , Tommaso Cucinotta , Daniel Bristot de Oliveira , Luca Abeni Subject: [RFC v4 2/6] sched/deadline: improve the tracking of active utilization Date: Fri, 30 Dec 2016 12:33:07 +0100 Message-Id: <1483097591-3871-3-git-send-email-lucabe72@gmail.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1483097591-3871-1-git-send-email-lucabe72@gmail.com> References: <1483097591-3871-1-git-send-email-lucabe72@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9199 Lines: 293 From: Luca Abeni This patch implements a more theoretically sound algorithm for tracking active utilization: instead of decreasing it when a task blocks, use a timer (the "inactive timer", named after the "Inactive" task state of the GRUB algorithm) to decrease the active utilization at the so called "0-lag time". Signed-off-by: Luca Abeni --- include/linux/sched.h | 18 +++++- kernel/sched/core.c | 2 + kernel/sched/deadline.c | 150 ++++++++++++++++++++++++++++++++++++++++++++---- kernel/sched/sched.h | 1 + 4 files changed, 158 insertions(+), 13 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 4d19052..f34633c2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1451,14 +1451,30 @@ struct sched_dl_entity { * * @dl_yielded tells if task gave up the cpu before consuming * all its available runtime during the last job. + * + * @dl_non_contending tells if task is inactive while still + * contributing to the active utilization. In other words, it + * indicates if the inactive timer has been armed and its handler + * has not been executed yet. This flag is useful to avoid race + * conditions between the inactive timer handler and the wakeup + * code. */ - int dl_throttled, dl_boosted, dl_yielded; + int dl_throttled, dl_boosted, dl_yielded, dl_non_contending; /* * Bandwidth enforcement timer. Each -deadline task has its * own bandwidth to be enforced, thus we need one timer per task. */ struct hrtimer dl_timer; + + /* + * Inactive timer, responsible for decreasing the active utilization + * at the "0-lag time". When a -deadline task blocks, it contributes + * to GRUB's active utilization until the "0-lag time", hence a + * timer is needed to decrease the active utilization at the correct + * time. + */ + struct hrtimer inactive_timer; }; union rcu_special { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c56fb57..98f9944 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2187,6 +2187,7 @@ void __dl_clear_params(struct task_struct *p) dl_se->dl_throttled = 0; dl_se->dl_yielded = 0; + dl_se->dl_non_contending = 0; } /* @@ -2218,6 +2219,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) RB_CLEAR_NODE(&p->dl.rb_node); init_dl_task_timer(&p->dl); + init_inactive_task_timer(&p->dl); __dl_clear_params(p); INIT_LIST_HEAD(&p->rt.run_list); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 23c840e..cdb7274 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -65,6 +65,46 @@ void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) dl_rq->running_bw = 0; } +static void task_go_inactive(struct task_struct *p) +{ + struct sched_dl_entity *dl_se = &p->dl; + struct hrtimer *timer = &dl_se->inactive_timer; + struct dl_rq *dl_rq = dl_rq_of_se(dl_se); + struct rq *rq = rq_of_dl_rq(dl_rq); + s64 zerolag_time; + + WARN_ON(dl_se->dl_runtime == 0); + + WARN_ON(hrtimer_active(&dl_se->inactive_timer)); + WARN_ON(dl_se->dl_non_contending); + + zerolag_time = dl_se->deadline - + div64_long((dl_se->runtime * dl_se->dl_period), + dl_se->dl_runtime); + + /* + * Using relative times instead of the absolute "0-lag time" + * allows to simplify the code + */ + zerolag_time -= rq_clock(rq); + + /* + * If the "0-lag time" already passed, decrease the active + * utilization now, instead of starting a timer + */ + if (zerolag_time < 0) { + sub_running_bw(dl_se, dl_rq); + if (!dl_task(p)) + __dl_clear_params(p); + + return; + } + + dl_se->dl_non_contending = 1; + get_task_struct(p); + hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL); +} + static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq) { struct sched_dl_entity *dl_se = &p->dl; @@ -610,10 +650,8 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) * The task might have changed its scheduling policy to something * different than SCHED_DEADLINE (through switched_from_dl()). */ - if (!dl_task(p)) { - __dl_clear_params(p); + if (!dl_task(p)) goto unlock; - } /* * The task might have been boosted by someone else and might be in the @@ -800,6 +838,48 @@ static void update_curr_dl(struct rq *rq) } } +static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) +{ + struct sched_dl_entity *dl_se = container_of(timer, + struct sched_dl_entity, + inactive_timer); + struct task_struct *p = dl_task_of(dl_se); + struct rq_flags rf; + struct rq *rq; + + rq = task_rq_lock(p, &rf); + + if (!dl_task(p) || p->state == TASK_DEAD) { + if (p->state == TASK_DEAD && dl_se->dl_non_contending) + sub_running_bw(&p->dl, dl_rq_of_se(&p->dl)); + + __dl_clear_params(p); + + goto unlock; + } + if (dl_se->dl_non_contending == 0) + goto unlock; + + sched_clock_tick(); + update_rq_clock(rq); + + sub_running_bw(dl_se, &rq->dl); + dl_se->dl_non_contending = 0; +unlock: + task_rq_unlock(rq, p, &rf); + put_task_struct(p); + + return HRTIMER_NORESTART; +} + +void init_inactive_task_timer(struct sched_dl_entity *dl_se) +{ + struct hrtimer *timer = &dl_se->inactive_timer; + + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + timer->function = inactive_task_timer; +} + #ifdef CONFIG_SMP static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) @@ -934,7 +1014,28 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, if (flags & ENQUEUE_WAKEUP) { struct dl_rq *dl_rq = dl_rq_of_se(dl_se); - add_running_bw(dl_se, dl_rq); + if (dl_se->dl_non_contending) { + /* + * If the timer handler is currently running and the + * timer cannot be cancelled, inactive_task_timer() + * will see that dl_not_contending is not set, and + * will do nothing, so we are still safe. + */ + if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) + put_task_struct(dl_task_of(dl_se)); + WARN_ON(dl_task_of(dl_se)->nr_cpus_allowed > 1); + dl_se->dl_non_contending = 0; + } else { + /* + * Since "dl_non_contending" is not set, the + * task's utilization has already been removed from + * active utilization (either when the task blocked, + * when the "inactive timer" fired, or when it has + * been cancelled in select_task_rq_dl()). + * So, add it back. + */ + add_running_bw(dl_se, dl_rq); + } update_dl_entity(dl_se, pi_se); } else if (flags & ENQUEUE_REPLENISH) @@ -1023,7 +1124,7 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) * or "inactive") */ if (flags & DEQUEUE_SLEEP) - sub_running_bw(&p->dl, &rq->dl); + task_go_inactive(p); } /* @@ -1097,6 +1198,22 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) } rcu_read_unlock(); + rq = task_rq(p); + raw_spin_lock(&rq->lock); + if (p->dl.dl_non_contending) { + sub_running_bw(&p->dl, &rq->dl); + p->dl.dl_non_contending = 0; + /* + * If the timer handler is currently running and the + * timer cannot be cancelled, inactive_task_timer() + * will see that dl_not_contending is not set, and + * will do nothing, so we are still safe. + */ + if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) + put_task_struct(p); + } + raw_spin_unlock(&rq->lock); + out: return cpu; } @@ -1743,16 +1860,25 @@ void __init init_sched_dl_class(void) static void switched_from_dl(struct rq *rq, struct task_struct *p) { /* - * Start the deadline timer; if we switch back to dl before this we'll - * continue consuming our current CBS slice. If we stay outside of - * SCHED_DEADLINE until the deadline passes, the timer will reset the - * task. + * task_go_inactive() can start the "inactive timer" (if the 0-lag + * time is in the future). If the task switches back to dl before + * the "inactive timer" fires, it can continue to consume its current + * runtime using its current deadline. If it stays outside of + * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer() + * will reset the task parameters. */ - if (!start_dl_timer(p)) - __dl_clear_params(p); + if (task_on_rq_queued(p) && p->dl.dl_runtime) + task_go_inactive(p); - if (task_on_rq_queued(p)) + /* + * We cannot use inactive_task_timer() to invoke sub_running_bw() + * at the 0-lag time, because the task could have been migrated + * while SCHED_OTHER in the meanwhile. + */ + if (p->dl.dl_non_contending) { sub_running_bw(&p->dl, &rq->dl); + p->dl.dl_non_contending = 0; + } /* * Since this might be the only -deadline task on the rq, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0659772..e422803 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1367,6 +1367,7 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime extern struct dl_bandwidth def_dl_bandwidth; extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime); extern void init_dl_task_timer(struct sched_dl_entity *dl_se); +extern void init_inactive_task_timer(struct sched_dl_entity *dl_se); unsigned long to_ratio(u64 period, u64 runtime); -- 2.7.4