In order to avoid having to do put/set on a whole cgroup hierarchy
when we context switch, push the put into pick_next_task() so that
both operations are in the same function. Further changes then allow
us to possibly optimize away redundant work.
Signed-off-by: Peter Zijlstra <[email protected]>
---
include/linux/sched.h | 3 ++-
kernel/sched/core.c | 13 +++++++------
kernel/sched/fair.c | 6 +++++-
kernel/sched/idle_task.c | 6 +++++-
kernel/sched/rt.c | 27 ++++++++++++++++-----------
kernel/sched/stop_task.c | 9 +++++++--
6 files changed, 42 insertions(+), 22 deletions(-)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1093,7 +1093,8 @@ struct sched_class {
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
- struct task_struct * (*pick_next_task) (struct rq *rq);
+ struct task_struct * (*pick_next_task) (struct rq *rq,
+ struct task_struct *prev);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3176,7 +3176,7 @@ static void put_prev_task(struct rq *rq,
* Pick up the highest-prio task:
*/
static inline struct task_struct *
-pick_next_task(struct rq *rq)
+pick_next_task(struct rq *rq, struct task_struct *prev)
{
const struct sched_class *class;
struct task_struct *p;
@@ -3186,13 +3186,13 @@ pick_next_task(struct rq *rq)
* the fair class we can call that function directly:
*/
if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
- p = fair_sched_class.pick_next_task(rq);
+ p = fair_sched_class.pick_next_task(rq, prev);
if (likely(p))
return p;
}
for_each_class(class) {
- p = class->pick_next_task(rq);
+ p = class->pick_next_task(rq, prev);
if (p)
return p;
}
@@ -3253,8 +3253,9 @@ static void __sched __schedule(void)
if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);
- put_prev_task(rq, prev);
- next = pick_next_task(rq);
+ if (prev->on_rq || rq->skip_clock_update < 0)
+ update_rq_clock(rq);
+ next = pick_next_task(rq, prev);
clear_tsk_need_resched(prev);
rq->skip_clock_update = 0;
@@ -5200,7 +5201,7 @@ static void migrate_tasks(unsigned int d
if (rq->nr_running == 1)
break;
- next = pick_next_task(rq);
+ next = pick_next_task(rq, NULL);
BUG_ON(!next);
next->sched_class->put_prev_task(rq, next);
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2990,7 +2990,8 @@ static void check_preempt_wakeup(struct
set_last_buddy(se);
}
-static struct task_struct *pick_next_task_fair(struct rq *rq)
+static struct task_struct *
+pick_next_task_fair(struct rq *rq, struct task_struct *prev)
{
struct task_struct *p;
struct cfs_rq *cfs_rq = &rq->cfs;
@@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
if (!cfs_rq->nr_running)
return NULL;
+ if (prev)
+ prev->sched_class->put_prev_task(rq, prev);
+
do {
se = pick_next_entity(cfs_rq);
set_next_entity(cfs_rq, se);
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
resched_task(rq->idle);
}
-static struct task_struct *pick_next_task_idle(struct rq *rq)
+static struct task_struct *
+pick_next_task_idle(struct rq *rq, struct task_struct *prev)
{
+ if (prev)
+ prev->sched_class->put_prev_task(rq, prev);
+
schedstat_inc(rq, sched_goidle);
calc_load_account_idle(rq);
return rq->idle;
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1346,15 +1346,7 @@ static struct task_struct *_pick_next_ta
{
struct sched_rt_entity *rt_se;
struct task_struct *p;
- struct rt_rq *rt_rq;
-
- rt_rq = &rq->rt;
-
- if (!rt_rq->rt_nr_running)
- return NULL;
-
- if (rt_rq_throttled(rt_rq))
- return NULL;
+ struct rt_rq *rt_rq = &rq->rt;
do {
rt_se = pick_next_rt_entity(rq, rt_rq);
@@ -1368,9 +1360,22 @@ static struct task_struct *_pick_next_ta
return p;
}
-static struct task_struct *pick_next_task_rt(struct rq *rq)
+static struct task_struct *
+pick_next_task_rt(struct rq *rq, struct task_struct *prev)
{
- struct task_struct *p = _pick_next_task_rt(rq);
+ struct task_struct *p;
+ struct rt_rq *rt_rq = &rq->rt;
+
+ if (!rt_rq->rt_nr_running)
+ return NULL;
+
+ if (rt_rq_throttled(rt_rq))
+ return NULL;
+
+ if (prev)
+ prev->sched_class->put_prev_task(rq, prev);
+
+ p = _pick_next_task_rt(rq);
/* The running task is never eligible for pushing */
if (p)
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -23,12 +23,17 @@ check_preempt_curr_stop(struct rq *rq, s
/* we're never preempted */
}
-static struct task_struct *pick_next_task_stop(struct rq *rq)
+static struct task_struct *
+pick_next_task_stop(struct rq *rq, struct task_struct *prev)
{
struct task_struct *stop = rq->stop;
- if (stop && stop->on_rq)
+ if (stop && stop->on_rq) {
+ if (prev)
+ prev->sched_class->put_prev_task(rq, prev);
+
return stop;
+ }
return NULL;
}
On Thu, 2012-06-14 at 15:29 +0200, Peter Zijlstra wrote:
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3176,7 +3176,7 @@ static void put_prev_task(struct rq *rq,
> * Pick up the highest-prio task:
> */
> static inline struct task_struct *
> -pick_next_task(struct rq *rq)
> +pick_next_task(struct rq *rq, struct task_struct *prev)
> {
> const struct sched_class *class;
> struct task_struct *p;
> @@ -3186,13 +3186,13 @@ pick_next_task(struct rq *rq)
> * the fair class we can call that function directly:
> */
> if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
> - p = fair_sched_class.pick_next_task(rq);
> + p = fair_sched_class.pick_next_task(rq, prev);
> if (likely(p))
> return p;
> }
>
> for_each_class(class) {
> - p = class->pick_next_task(rq);
> + p = class->pick_next_task(rq, prev);
> if (p)
> return p;
> }
> @@ -3253,8 +3253,9 @@ static void __sched __schedule(void)
> if (unlikely(!rq->nr_running))
> idle_balance(cpu, rq);
>
> - put_prev_task(rq, prev);
You remove the call to put_prev_task() which looks to be the only user
of this static function. But I don't see the deletion of this function.
-- Steve
> - next = pick_next_task(rq);
> + if (prev->on_rq || rq->skip_clock_update < 0)
> + update_rq_clock(rq);
> + next = pick_next_task(rq, prev);
> clear_tsk_need_resched(prev);
> rq->skip_clock_update = 0;
>
> @@ -5200,7 +5201,7 @@ static void migrate_tasks(unsigned int d
> if (rq->nr_running == 1)
> break;
>
> - next = pick_next_task(rq);
> + next = pick_next_task(rq, NULL);
> BUG_ON(!next);
> next->sched_class->put_prev_task(rq, next);
>
On Thu, 2012-06-14 at 10:32 -0400, Steven Rostedt wrote:
>
> You remove the call to put_prev_task() which looks to be the only user
> of this static function. But I don't see the deletion of this
> function.
>
>
Right you are.. must've overlooked my compiler complaining about this.
On 06/14/2012 05:29 PM, Peter Zijlstra wrote:
> +static struct task_struct *
> +pick_next_task_rt(struct rq *rq, struct task_struct *prev)
> {
> - struct task_struct *p = _pick_next_task_rt(rq);
> + struct task_struct *p;
> + struct rt_rq *rt_rq =&rq->rt;
> +
> + if (!rt_rq->rt_nr_running)
> + return NULL;
> +
> + if (rt_rq_throttled(rt_rq))
> + return NULL;
> +
> + if (prev)
> + prev->sched_class->put_prev_task(rq, prev);
> +
it might be me, but this one sounds strange. If the responsibility of
putting the task now lays with pic_next_task, you can't return NULL
without doing this first.
On Fri, 2012-06-15 at 13:49 +0400, Glauber Costa wrote:
> On 06/14/2012 05:29 PM, Peter Zijlstra wrote:
> > +static struct task_struct *
> > +pick_next_task_rt(struct rq *rq, struct task_struct *prev)
> > {
> > - struct task_struct *p = _pick_next_task_rt(rq);
> > + struct task_struct *p;
> > + struct rt_rq *rt_rq =&rq->rt;
> > +
> > + if (!rt_rq->rt_nr_running)
> > + return NULL;
> > +
> > + if (rt_rq_throttled(rt_rq))
> > + return NULL;
> > +
> > + if (prev)
> > + prev->sched_class->put_prev_task(rq, prev);
> > +
> it might be me, but this one sounds strange. If the responsibility of
> putting the task now lays with pic_next_task, you can't return NULL
> without doing this first.
Its the responsibility of the sched_class::pick_next_task implementation
that will return the next task. Not of the first one called.
Clearly this needs a comment.. /me adds.
---
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1096,6 +1096,11 @@ struct sched_class {
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
+ /*
+ * It is the responsibility of the pick_next_task() method that will
+ * return the next task to call put_prev_task() on the @prev task or
+ * something equivalent.
+ */
struct task_struct * (*pick_next_task) (struct rq *rq,
struct task_struct *prev);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
On 06/15/2012 02:03 PM, Peter Zijlstra wrote:
> On Fri, 2012-06-15 at 13:49 +0400, Glauber Costa wrote:
>> On 06/14/2012 05:29 PM, Peter Zijlstra wrote:
>>> +static struct task_struct *
>>> +pick_next_task_rt(struct rq *rq, struct task_struct *prev)
>>> {
>>> - struct task_struct *p = _pick_next_task_rt(rq);
>>> + struct task_struct *p;
>>> + struct rt_rq *rt_rq =&rq->rt;
>>> +
>>> + if (!rt_rq->rt_nr_running)
>>> + return NULL;
>>> +
>>> + if (rt_rq_throttled(rt_rq))
>>> + return NULL;
>>> +
>>> + if (prev)
>>> + prev->sched_class->put_prev_task(rq, prev);
>>> +
>> it might be me, but this one sounds strange. If the responsibility of
>> putting the task now lays with pic_next_task, you can't return NULL
>> without doing this first.
>
> Its the responsibility of the sched_class::pick_next_task implementation
> that will return the next task. Not of the first one called.
>
> Clearly this needs a comment.. /me adds.
hummmm, yeah, you are right. The comment does make it clearer.
> +pick_next_task_fair(struct rq *rq, struct task_struct *prev)
> {
> struct task_struct *p;
> struct cfs_rq *cfs_rq = &rq->cfs;
> @@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
> if (!cfs_rq->nr_running)
> return NULL;
>
> + if (prev)
> + prev->sched_class->put_prev_task(rq, prev);
> +
> do {
> se = pick_next_entity(cfs_rq);
> set_next_entity(cfs_rq, se);
> --- a/kernel/sched/idle_task.c
> +++ b/kernel/sched/idle_task.c
> @@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
> resched_task(rq->idle);
> }
Will it cause trouble when the last task on cfs_rq was dequeued before
been putted?
The cfs_rq->nr_running will be 0 and we will miss the chance to put
prev, will we?
Regards,
Michael Wang
On Thu, 2012-06-21 at 15:35 +0800, Michael Wang wrote:
> > +pick_next_task_fair(struct rq *rq, struct task_struct *prev)
> > {
> > struct task_struct *p;
> > struct cfs_rq *cfs_rq = &rq->cfs;
> > @@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
> > if (!cfs_rq->nr_running)
> > return NULL;
> >
> > + if (prev)
> > + prev->sched_class->put_prev_task(rq, prev);
> > +
> > do {
> > se = pick_next_entity(cfs_rq);
> > set_next_entity(cfs_rq, se);
> > --- a/kernel/sched/idle_task.c
> > +++ b/kernel/sched/idle_task.c
> > @@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
> > resched_task(rq->idle);
> > }
>
> Will it cause trouble when the last task on cfs_rq was dequeued before
> been putted?
>
> The cfs_rq->nr_running will be 0 and we will miss the chance to put
> prev, will we?
pick_next_task_idle() will then do the put, no?
On 06/21/2012 04:41 PM, Peter Zijlstra wrote:
> On Thu, 2012-06-21 at 15:35 +0800, Michael Wang wrote:
>>> +pick_next_task_fair(struct rq *rq, struct task_struct *prev)
>>> {
>>> struct task_struct *p;
>>> struct cfs_rq *cfs_rq = &rq->cfs;
>>> @@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
>>> if (!cfs_rq->nr_running)
>>> return NULL;
>>>
>>> + if (prev)
>>> + prev->sched_class->put_prev_task(rq, prev);
>>> +
>>> do {
>>> se = pick_next_entity(cfs_rq);
>>> set_next_entity(cfs_rq, se);
>>> --- a/kernel/sched/idle_task.c
>>> +++ b/kernel/sched/idle_task.c
>>> @@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
>>> resched_task(rq->idle);
>>> }
>>
>> Will it cause trouble when the last task on cfs_rq was dequeued before
>> been putted?
>>
>> The cfs_rq->nr_running will be 0 and we will miss the chance to put
>> prev, will we?
>
> pick_next_task_idle() will then do the put, no?
Oh, that's right, I see.
>