Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751864Ab2J0MBh (ORCPT ); Sat, 27 Oct 2012 08:01:37 -0400 Received: from forward4.mail.yandex.net ([77.88.46.9]:34356 "EHLO forward4.mail.yandex.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750765Ab2J0MBf (ORCPT ); Sat, 27 Oct 2012 08:01:35 -0400 From: Kirill Tkhai To: "linux-kernel@vger.kernel.org" Cc: Steven Rostedt , Ingo Molnar , Peter Zijlstra In-Reply-To: <1256291351334215@web9f.yandex.ru> References: <1256291351334215@web9f.yandex.ru> Subject: Re: [PATCH][sched] Ignore RT throttling if rq->rt tasks are the only running tasks in the rq MIME-Version: 1.0 Message-Id: <1309341351339290@web20d.yandex.ru> Date: Sat, 27 Oct 2012 16:01:30 +0400 X-Mailer: Yamail [ http://yandex.ru ] 5.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset=koi8-r Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9389 Lines: 277 I need a little rework of this patch. I'll send it later. Sorry for the noise. Kirill 27.10.2012, 14:36, "Kirill Tkhai" : > The current throttling logic always skips RT class if rq->rt is throttled. > It doesn't handle the special case when RT tasks are the only running tasks > in the rq. So it's possible CPU picks idle task up when RTs are available. > > This patch aims to avoid the above situation. The modified > _pick_next_task_rt() looks at the number of total rq->rt tasks(with the sum > of all child rt_rq's) and compares it with the number of all running tasks > of the rq. If they are equal then scheduler picks the highest rq->rt task > (children are considered too). > > Later, the first unthrottled rq_rt will replace this task. The case > of appearance of fair task is handled in check_preempt_curr() function. > > The patch changes the logic of pick_rt_task() and pick_next_highest_task_rt(). > Now the negative cpu always makes task "picked". But there are no another > users of this posibility and nobody is touched by this change. > > Signed-off-by: Kirill V Tkhai > CC: Steven Rostedt > CC: Ingo Molnar > CC: Peter Zijlstra > > --- > ?kernel/sched/core.c ?| ???6 +++- > ?kernel/sched/rt.c ???| ??97 ++++++++++++++++++++++++++++++++------------------ > ?kernel/sched/sched.h | ???3 +- > ?3 files changed, 69 insertions(+), 37 deletions(-) > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index bf41f82..ecc9833 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -901,7 +901,9 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) > ?{ > ?????????const struct sched_class *class; > > - if (p->sched_class == rq->curr->sched_class) { > + if (rq->curr->sched_class == rq->extended_class) { > + resched_task(rq->curr); > + } else if (p->sched_class == rq->curr->sched_class) { > ?????????????????rq->curr->sched_class->check_preempt_curr(rq, p, flags); > ?????????} else { > ?????????????????for_each_class(class) { > @@ -2771,6 +2773,7 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) > ?????????if (prev->on_rq || rq->skip_clock_update < 0) > ?????????????????update_rq_clock(rq); > ?????????prev->sched_class->put_prev_task(rq, prev); > + rq->extended_class = NULL; > ?} > > ?/* > @@ -6892,6 +6895,7 @@ void __init sched_init(void) > ?????????????????rq->calc_load_update = jiffies + LOAD_FREQ; > ?????????????????init_cfs_rq(&rq->cfs); > ?????????????????init_rt_rq(&rq->rt, rq); > + rq->extended_class = NULL; > ?#ifdef CONFIG_FAIR_GROUP_SCHED > ?????????????????root_task_group.shares = ROOT_TASK_GROUP_LOAD; > ?????????????????INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); > diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c > index 418feb0..6f6da20 100644 > --- a/kernel/sched/rt.c > +++ b/kernel/sched/rt.c > @@ -274,15 +274,8 @@ static void update_rt_migration(struct rt_rq *rt_rq) > > ?static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) > ?{ > - struct task_struct *p; > - > - if (!rt_entity_is_task(rt_se)) > - return; > - > - p = rt_task_of(rt_se); > - rt_rq = &rq_of_rt_rq(rt_rq)->rt; > + struct task_struct *p = rt_task_of(rt_se); > > - rt_rq->rt_nr_total++; > ?????????if (p->nr_cpus_allowed > 1) > ?????????????????rt_rq->rt_nr_migratory++; > > @@ -291,15 +284,8 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) > > ?static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) > ?{ > - struct task_struct *p; > - > - if (!rt_entity_is_task(rt_se)) > - return; > - > - p = rt_task_of(rt_se); > - rt_rq = &rq_of_rt_rq(rt_rq)->rt; > + struct task_struct *p = rt_task_of(rt_se); > > - rt_rq->rt_nr_total--; > ?????????if (p->nr_cpus_allowed > 1) > ?????????????????rt_rq->rt_nr_migratory--; > > @@ -467,6 +453,16 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) > ?????????return p->prio != p->normal_prio; > ?} > > +static void extended_rt_unthrottles(struct rq *rq, struct rt_rq *rt_rq) > +{ > + struct task_struct *curr = rq->curr; > + > + if (rt_rq_of_se(&curr->rt) == rt_rq) > + rq->extended_class = NULL; > + else > + resched_task(curr); > +} > + > ?#ifdef CONFIG_SMP > ?static inline const struct cpumask *sched_rt_period_mask(void) > ?{ > @@ -826,6 +822,9 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) > ??????????????????????????????????*/ > ?????????????????????????????????if (rt_rq->rt_nr_running && rq->curr == rq->idle) > ?????????????????????????????????????????rq->skip_clock_update = -1; > + > + if (rq->extended_class == &rt_sched_class) > + extended_rt_unthrottles(rq, rt_rq); > ?????????????????????????} > ?????????????????????????if (rt_rq->rt_time || rt_rq->rt_nr_running) > ?????????????????????????????????idle = 0; > @@ -1071,8 +1070,14 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) > ?????????WARN_ON(!rt_prio(prio)); > ?????????rt_rq->rt_nr_running++; > > + if (rt_entity_is_task(rt_se)) { > + struct rt_rq *rt = &rq_of_rt_rq(rt_rq)->rt; > + > + rt->rt_nr_total++; > + inc_rt_migration(rt_se, rt); > + } > + > ?????????inc_rt_prio(rt_rq, prio); > - inc_rt_migration(rt_se, rt_rq); > ?????????inc_rt_group(rt_se, rt_rq); > ?} > > @@ -1083,8 +1088,15 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) > ?????????WARN_ON(!rt_rq->rt_nr_running); > ?????????rt_rq->rt_nr_running--; > > + if (rt_entity_is_task(rt_se)) { > + struct rt_rq *rt = &rq_of_rt_rq(rt_rq)->rt; > + > + WARN_ON(!rt->rt_nr_total); > + rt->rt_nr_total--; > + dec_rt_migration(rt_se, rt); > + } > + > ?????????dec_rt_prio(rt_rq, rt_se_prio(rt_se)); > - dec_rt_migration(rt_se, rt_rq); > ?????????dec_rt_group(rt_se, rt_rq); > ?} > > @@ -1362,28 +1374,41 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, > ?????????return next; > ?} > > +static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu); > + > ?static struct task_struct *_pick_next_task_rt(struct rq *rq) > ?{ > - struct sched_rt_entity *rt_se; > - struct task_struct *p; > ?????????struct rt_rq *rt_rq; > + struct task_struct *p; > + int running, rt_total; > > ?????????rt_rq = &rq->rt; > + running = rt_rq->rt_nr_running; > > - if (!rt_rq->rt_nr_running) > - return NULL; > + /* If rq->rt is suitable to get tasks */ > + if (running && !rt_rq_throttled(rt_rq)) { > + struct sched_rt_entity *rt_se; > > - if (rt_rq_throttled(rt_rq)) > + do { > + rt_se = pick_next_rt_entity(rq, rt_rq); > + BUG_ON(!rt_se); > + rt_rq = group_rt_rq(rt_se); > + } while (rt_rq); > + > + return rt_task_of(rt_se); > + } > + > + rt_total = rt_rq->rt_nr_total; > + > + /* If rq has no-RT tasks OR rt_rq and its children are empty */ > + if (rt_total != rq->nr_running || !rt_total) > ?????????????????return NULL; > > - do { > - rt_se = pick_next_rt_entity(rq, rt_rq); > - BUG_ON(!rt_se); > - rt_rq = group_rt_rq(rt_se); > - } while (rt_rq); > + /* All running tasks are RT. Let's avoid idle wasting CPU time */ > + p = pick_next_highest_task_rt(rq, -1); > + rq->extended_class = &rt_sched_class; > > - p = rt_task_of(rt_se); > - p->se.exec_start = rq->clock_task; > + WARN_ON(!p || rq->cfs.h_nr_running); > > ?????????return p; > ?} > @@ -1392,9 +1417,11 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) > ?{ > ?????????struct task_struct *p = _pick_next_task_rt(rq); > > - /* The running task is never eligible for pushing */ > - if (p) > + if (p) { > + /* The running task is never eligible for pushing */ > ?????????????????dequeue_pushable_task(rq, p); > + p->se.exec_start = rq->clock_task; > + } > > ?#ifdef CONFIG_SMP > ?????????/* > @@ -1426,9 +1453,9 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) > > ?static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) > ?{ > - if (!task_running(rq, p) && > - ???(cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && > - ???(p->nr_cpus_allowed > 1)) > + if (cpu < 0 || (!task_running(rq, p) > + && (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) > + && p->nr_cpus_allowed > 1))) > ?????????????????return 1; > ?????????return 0; > ?} > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h > index 508e77e..9fdacef 100644 > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -294,6 +294,7 @@ static inline int rt_bandwidth_enabled(void) > ?struct rt_rq { > ?????????struct rt_prio_array active; > ?????????unsigned int rt_nr_running; > + unsigned long rt_nr_total; > ?#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED > ?????????struct { > ?????????????????int curr; /* highest queued rt task prio */ > @@ -304,7 +305,6 @@ struct rt_rq { > ?#endif > ?#ifdef CONFIG_SMP > ?????????unsigned long rt_nr_migratory; > - unsigned long rt_nr_total; > ?????????int overloaded; > ?????????struct plist_head pushable_tasks; > ?#endif > @@ -396,6 +396,7 @@ struct rq { > ?#ifdef CONFIG_RT_GROUP_SCHED > ?????????struct list_head leaf_rt_rq_list; > ?#endif > + const struct sched_class *extended_class; > > ?????????/* > ??????????* This is part of a global counter where only the total sum -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/