Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932145AbdCWTxT (ORCPT ); Thu, 23 Mar 2017 15:53:19 -0400 Received: from mail.santannapisa.it ([193.205.80.99]:26882 "EHLO mail.santannapisa.it" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1756244AbdCWTxM (ORCPT ); Thu, 23 Mar 2017 15:53:12 -0400 From: luca abeni To: linux-kernel@vger.kernel.org Cc: Peter Zijlstra , Ingo Molnar , Juri Lelli , Claudio Scordino , Steven Rostedt , Tommaso Cucinotta , Daniel Bristot de Oliveira , Joel Fernandes , Mathieu Poirier , Luca Abeni Subject: [RFC v5 9/9] sched/deadline: also reclaim bandwidth not used by dl tasks Date: Fri, 24 Mar 2017 04:53:02 +0100 Message-Id: <1490327582-4376-10-git-send-email-luca.abeni@santannapisa.it> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1490327582-4376-1-git-send-email-luca.abeni@santannapisa.it> References: <1490327582-4376-1-git-send-email-luca.abeni@santannapisa.it> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6794 Lines: 233 From: Luca Abeni This commit introduces a per-runqueue "extra utilization" that can be reclaimed by deadline tasks. In this way, the maximum fraction of CPU time that can reclaimed by deadline tasks is fixed (and configurable) and does not depend on the total deadline utilization. Signed-off-by: Luca Abeni Tested-by: Daniel Bristot de Oliveira --- kernel/sched/core.c | 21 ++++++++++++--------- kernel/sched/deadline.c | 26 ++++++++++++++++---------- kernel/sched/sched.h | 37 +++++++++++++++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 21 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 88e108b..69895fb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2456,7 +2456,7 @@ inline struct dl_bw *dl_bw_of(int i) return &cpu_rq(i)->rd->dl_bw; } -static inline int dl_bw_cpus(int i) +inline int dl_bw_cpus(int i) { struct root_domain *rd = cpu_rq(i)->rd; int cpus = 0; @@ -2474,7 +2474,7 @@ inline struct dl_bw *dl_bw_of(int i) return &cpu_rq(i)->dl.dl_bw; } -static inline int dl_bw_cpus(int i) +inline int dl_bw_cpus(int i) { return 1; } @@ -2512,8 +2512,8 @@ static int dl_overflow(struct task_struct *p, int policy, if (dl_policy(policy) && !task_has_dl_policy(p) && !__dl_overflow(dl_b, cpus, 0, new_bw)) { if (hrtimer_active(&p->dl.inactive_timer)) - __dl_clear(dl_b, p->dl.dl_bw); - __dl_add(dl_b, new_bw); + __dl_clear(dl_b, p->dl.dl_bw, cpus); + __dl_add(dl_b, new_bw, cpus); err = 0; } else if (dl_policy(policy) && task_has_dl_policy(p) && !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) { @@ -2524,8 +2524,8 @@ static int dl_overflow(struct task_struct *p, int policy, * But this would require to set the task's "inactive * timer" when the task is not inactive. */ - __dl_clear(dl_b, p->dl.dl_bw); - __dl_add(dl_b, new_bw); + __dl_clear(dl_b, p->dl.dl_bw, cpus); + __dl_add(dl_b, new_bw, cpus); dl_change_utilization(p, new_bw); err = 0; } else if (!dl_policy(policy) && task_has_dl_policy(p)) { @@ -5460,7 +5460,7 @@ int task_can_attach(struct task_struct *p, * We will free resources in the source root_domain * later on (see set_cpus_allowed_dl()). */ - __dl_add(dl_b, p->dl.dl_bw); + __dl_add(dl_b, p->dl.dl_bw, cpus); } raw_spin_unlock_irqrestore(&dl_b->lock, flags); rcu_read_unlock_sched(); @@ -6717,12 +6717,15 @@ static void sched_dl_do_global(void) raw_spin_unlock_irqrestore(&dl_b->lock, flags); rcu_read_unlock_sched(); - if (dl_b->bw == -1) + if (dl_b->bw == -1) { cpu_rq(cpu)->dl.deadline_bw_inv = 1 << 8; - else + cpu_rq(cpu)->dl.extra_bw = 1 << 20; + } else { cpu_rq(cpu)->dl.deadline_bw_inv = to_ratio(global_rt_runtime(), global_rt_period()) >> 12; + cpu_rq(cpu)->dl.extra_bw = new_bw; + } } } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index c393c3d..5547101 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -153,7 +153,7 @@ static void task_non_contending(struct task_struct *p) if (p->state == TASK_DEAD) sub_rq_bw(p->dl.dl_bw, &rq->dl); raw_spin_lock(&dl_b->lock); - __dl_clear(dl_b, p->dl.dl_bw); + __dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); __dl_clear_params(p); raw_spin_unlock(&dl_b->lock); } @@ -243,11 +243,15 @@ void init_dl_rq(struct dl_rq *dl_rq) #else init_dl_bw(&dl_rq->dl_bw); #endif - if (global_rt_runtime() == RUNTIME_INF) + if (global_rt_runtime() == RUNTIME_INF) { dl_rq->deadline_bw_inv = 1 << 8; - else + dl_rq->extra_bw = 1 << 20; + } else { dl_rq->deadline_bw_inv = to_ratio(global_rt_runtime(), global_rt_period()) >> 12; + dl_rq->extra_bw = + to_ratio(global_rt_period(), global_rt_runtime()); + } } #ifdef CONFIG_SMP @@ -909,12 +913,14 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); */ u64 grub_reclaim(u64 delta, struct rq *rq, u64 u) { - u64 u_act; + s64 u_act; + s64 u_act_min; - if (rq->dl.this_bw - rq->dl.running_bw > (1 << 20) - u) - u_act = u; - else - u_act = (1 << 20) - rq->dl.this_bw + rq->dl.running_bw; + u_act = (1 << 20) - rq->dl.this_bw - rq->dl.extra_bw + + rq->dl.running_bw; + u_act_min = (u * rq->dl.deadline_bw_inv) >> 8; + if (u_act < u_act_min) + u_act = u_act_min; return (delta * u_act) >> 20; } @@ -1023,7 +1029,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) } raw_spin_lock(&dl_b->lock); - __dl_clear(dl_b, p->dl.dl_bw); + __dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); raw_spin_unlock(&dl_b->lock); __dl_clear_params(p); @@ -1989,7 +1995,7 @@ static void set_cpus_allowed_dl(struct task_struct *p, * until we complete the update. */ raw_spin_lock(&src_dl_b->lock); - __dl_clear(src_dl_b, p->dl.dl_bw); + __dl_clear(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); raw_spin_unlock(&src_dl_b->lock); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3818b3c..aec71f3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -219,22 +219,27 @@ static inline int dl_bandwidth_enabled(void) } extern struct dl_bw *dl_bw_of(int i); +extern int dl_bw_cpus(int i); struct dl_bw { raw_spinlock_t lock; u64 bw, total_bw; }; +static inline void __dl_update(struct dl_bw *dl_b, s64 bw); + static inline -void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw) +void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw, int cpus) { dl_b->total_bw -= tsk_bw; + __dl_update(dl_b, (s32)tsk_bw / cpus); } static inline -void __dl_add(struct dl_bw *dl_b, u64 tsk_bw) +void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus) { dl_b->total_bw += tsk_bw; + __dl_update(dl_b, -((s32)tsk_bw / cpus)); } static inline @@ -576,6 +581,7 @@ struct dl_rq { * runqueue (inactive utilization = this_bw - running_bw). */ u64 this_bw; + u64 extra_bw; /* * Inverse of the fraction of CPU utilization that can be reclaimed @@ -1951,6 +1957,33 @@ extern void nohz_balance_exit_idle(unsigned int cpu); static inline void nohz_balance_exit_idle(unsigned int cpu) { } #endif + +#ifdef CONFIG_SMP +static inline +void __dl_update(struct dl_bw *dl_b, s64 bw) +{ + struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw); + int i; + + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), + "sched RCU must be held"); + for_each_cpu_and(i, rd->span, cpu_active_mask) { + struct rq *rq = cpu_rq(i); + + rq->dl.extra_bw += bw; + } +} +#else +static inline +void __dl_update(struct dl_bw *dl_b, s64 bw) +{ + struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw); + + dl->extra_bw += bw; +} +#endif + + #ifdef CONFIG_IRQ_TIME_ACCOUNTING struct irqtime { u64 tick_delta; -- 2.7.4