ttwu_runnable() is used as a fast wakeup path when the wakee task
is running on CPU or runnable on RQ, in both cases we can just
set its state to TASK_RUNNING to prevent a sleep.
If the wakee task is on_cpu running, we don't need to update_rq_clock()
or check_preempt_curr().
But if the wakee task is on_rq && !on_cpu (e.g. an IRQ hit before
the task got to schedule() and the task been preempted), we should
check_preempt_curr() to see if it can preempt the current running.
Reorganize ttwu_do_wakeup() and ttwu_do_activate() to make
ttwu_do_wakeup() only mark the task runnable, so it can be used
in ttwu_runnable() and try_to_wake_up() fast paths.
This also removes the class->task_woken() callback from ttwu_runnable(),
which wasn't required per the RT/DL implementations: any required push
operation would have been queued during class->set_next_task() when p
got preempted.
ttwu_runnable() also loses the update to rq->idle_stamp, as by definition
the rq cannot be idle in this scenario.
Signed-off-by: Chengming Zhou <[email protected]>
Suggested-by: Valentin Schneider <[email protected]>
Suggested-by: Peter Zijlstra <[email protected]>
Reviewed-by: Valentin Schneider <[email protected]>
---
v4:
- s/This patch reorg/Reorganize/ per Bagas Sanjaya. Thanks!
v3:
- Improve the changelog per Valentin Schneider. Thanks!
v2:
- keep check_preempt_curr() for on_rq && !on_cpu case in ttwu_runnable(),
per Valentin Schneider.
- reorg ttwu_do_wakeup() and ttwu_do_activate() code, so ttwu_do_wakeup()
can be reused in ttwu_runnable(), per Peter Zijlstra.
- reuse ttwu_do_wakeup() in try_to_wake_up() (p == current) fast path too,
so ttwu_do_wakeup() become the only place we mark task runnable.
---
kernel/sched/core.c | 73 ++++++++++++++++++++++++---------------------
1 file changed, 39 insertions(+), 34 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 314c2c0219d9..d8216485b0ad 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3623,14 +3623,39 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
}
/*
- * Mark the task runnable and perform wakeup-preemption.
+ * Mark the task runnable.
*/
-static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
- struct rq_flags *rf)
+static inline void ttwu_do_wakeup(struct task_struct *p)
{
- check_preempt_curr(rq, p, wake_flags);
WRITE_ONCE(p->__state, TASK_RUNNING);
trace_sched_wakeup(p);
+}
+
+static void
+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
+ struct rq_flags *rf)
+{
+ int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
+
+ lockdep_assert_rq_held(rq);
+
+ if (p->sched_contributes_to_load)
+ rq->nr_uninterruptible--;
+
+#ifdef CONFIG_SMP
+ if (wake_flags & WF_MIGRATED)
+ en_flags |= ENQUEUE_MIGRATED;
+ else
+#endif
+ if (p->in_iowait) {
+ delayacct_blkio_end(p);
+ atomic_dec(&task_rq(p)->nr_iowait);
+ }
+
+ activate_task(rq, p, en_flags);
+ check_preempt_curr(rq, p, wake_flags);
+
+ ttwu_do_wakeup(p);
#ifdef CONFIG_SMP
if (p->sched_class->task_woken) {
@@ -3660,31 +3685,6 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
#endif
}
-static void
-ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
- struct rq_flags *rf)
-{
- int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
-
- lockdep_assert_rq_held(rq);
-
- if (p->sched_contributes_to_load)
- rq->nr_uninterruptible--;
-
-#ifdef CONFIG_SMP
- if (wake_flags & WF_MIGRATED)
- en_flags |= ENQUEUE_MIGRATED;
- else
-#endif
- if (p->in_iowait) {
- delayacct_blkio_end(p);
- atomic_dec(&task_rq(p)->nr_iowait);
- }
-
- activate_task(rq, p, en_flags);
- ttwu_do_wakeup(rq, p, wake_flags, rf);
-}
-
/*
* Consider @p being inside a wait loop:
*
@@ -3718,9 +3718,15 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
rq = __task_rq_lock(p, &rf);
if (task_on_rq_queued(p)) {
- /* check_preempt_curr() may use rq clock */
- update_rq_clock(rq);
- ttwu_do_wakeup(rq, p, wake_flags, &rf);
+ if (!task_on_cpu(rq, p)) {
+ /*
+ * When on_rq && !on_cpu the task is preempted, see if
+ * it should preempt whatever is current there now.
+ */
+ update_rq_clock(rq);
+ check_preempt_curr(rq, p, wake_flags);
+ }
+ ttwu_do_wakeup(p);
ret = 1;
}
__task_rq_unlock(rq, &rf);
@@ -4086,8 +4092,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
goto out;
trace_sched_waking(p);
- WRITE_ONCE(p->__state, TASK_RUNNING);
- trace_sched_wakeup(p);
+ ttwu_do_wakeup(p);
goto out;
}
--
2.37.2
On 2022-12-02 at 16:06:44 +0800, Chengming Zhou wrote:
> ttwu_runnable() is used as a fast wakeup path when the wakee task
> is running on CPU or runnable on RQ, in both cases we can just
> set its state to TASK_RUNNING to prevent a sleep.
>
> If the wakee task is on_cpu running, we don't need to update_rq_clock()
> or check_preempt_curr().
>
> But if the wakee task is on_rq && !on_cpu (e.g. an IRQ hit before
> the task got to schedule() and the task been preempted), we should
> check_preempt_curr() to see if it can preempt the current running.
>
> Reorganize ttwu_do_wakeup() and ttwu_do_activate() to make
> ttwu_do_wakeup() only mark the task runnable, so it can be used
> in ttwu_runnable() and try_to_wake_up() fast paths.
>
> This also removes the class->task_woken() callback from ttwu_runnable(),
> which wasn't required per the RT/DL implementations: any required push
> operation would have been queued during class->set_next_task() when p
> got preempted.
>
> ttwu_runnable() also loses the update to rq->idle_stamp, as by definition
> the rq cannot be idle in this scenario.
>
> Signed-off-by: Chengming Zhou <[email protected]>
> Suggested-by: Valentin Schneider <[email protected]>
> Suggested-by: Peter Zijlstra <[email protected]>
> Reviewed-by: Valentin Schneider <[email protected]>
> ---
> v4:
> - s/This patch reorg/Reorganize/ per Bagas Sanjaya. Thanks!
>
> v3:
> - Improve the changelog per Valentin Schneider. Thanks!
>
> v2:
> - keep check_preempt_curr() for on_rq && !on_cpu case in ttwu_runnable(),
> per Valentin Schneider.
> - reorg ttwu_do_wakeup() and ttwu_do_activate() code, so ttwu_do_wakeup()
> can be reused in ttwu_runnable(), per Peter Zijlstra.
> - reuse ttwu_do_wakeup() in try_to_wake_up() (p == current) fast path too,
> so ttwu_do_wakeup() become the only place we mark task runnable.
> ---
> kernel/sched/core.c | 73 ++++++++++++++++++++++++---------------------
> 1 file changed, 39 insertions(+), 34 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 314c2c0219d9..d8216485b0ad 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3623,14 +3623,39 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
> }
>
> /*
> - * Mark the task runnable and perform wakeup-preemption.
> + * Mark the task runnable.
> */
> -static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
> - struct rq_flags *rf)
> +static inline void ttwu_do_wakeup(struct task_struct *p)
> {
> - check_preempt_curr(rq, p, wake_flags);
> WRITE_ONCE(p->__state, TASK_RUNNING);
> trace_sched_wakeup(p);
> +}
> +
> +static void
> +ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
> + struct rq_flags *rf)
> +{
> + int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
> +
> + lockdep_assert_rq_held(rq);
> +
> + if (p->sched_contributes_to_load)
> + rq->nr_uninterruptible--;
> +
> +#ifdef CONFIG_SMP
> + if (wake_flags & WF_MIGRATED)
> + en_flags |= ENQUEUE_MIGRATED;
> + else
> +#endif
> + if (p->in_iowait) {
> + delayacct_blkio_end(p);
> + atomic_dec(&task_rq(p)->nr_iowait);
> + }
> +
> + activate_task(rq, p, en_flags);
> + check_preempt_curr(rq, p, wake_flags);
> +
> + ttwu_do_wakeup(p);
>
> #ifdef CONFIG_SMP
> if (p->sched_class->task_woken) {
> @@ -3660,31 +3685,6 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
> #endif
> }
>
> -static void
> -ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
> - struct rq_flags *rf)
> -{
> - int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
> -
> - lockdep_assert_rq_held(rq);
> -
> - if (p->sched_contributes_to_load)
> - rq->nr_uninterruptible--;
> -
> -#ifdef CONFIG_SMP
> - if (wake_flags & WF_MIGRATED)
> - en_flags |= ENQUEUE_MIGRATED;
> - else
> -#endif
> - if (p->in_iowait) {
> - delayacct_blkio_end(p);
> - atomic_dec(&task_rq(p)->nr_iowait);
> - }
> -
> - activate_task(rq, p, en_flags);
> - ttwu_do_wakeup(rq, p, wake_flags, rf);
> -}
> -
> /*
> * Consider @p being inside a wait loop:
> *
> @@ -3718,9 +3718,15 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
>
> rq = __task_rq_lock(p, &rf);
> if (task_on_rq_queued(p)) {
> - /* check_preempt_curr() may use rq clock */
> - update_rq_clock(rq);
> - ttwu_do_wakeup(rq, p, wake_flags, &rf);
> + if (!task_on_cpu(rq, p)) {
> + /*
> + * When on_rq && !on_cpu the task is preempted, see if
> + * it should preempt whatever is current there now.
> + */
> + update_rq_clock(rq);
> + check_preempt_curr(rq, p, wake_flags);
> + }
> + ttwu_do_wakeup(p);
> ret = 1;
> }
> __task_rq_unlock(rq, &rf);
> @@ -4086,8 +4092,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
> goto out;
>
> trace_sched_waking(p);
> - WRITE_ONCE(p->__state, TASK_RUNNING);
> - trace_sched_wakeup(p);
> + ttwu_do_wakeup(p);
> goto out;
> }
>
Just wonder if we could split the reorganization and optimization into two patches,
so we can track the code change a little easier in the future?
thanks,
Chenyu
On 2022/12/3 15:55, Chen Yu wrote:
> On 2022-12-02 at 16:06:44 +0800, Chengming Zhou wrote:
>> ttwu_runnable() is used as a fast wakeup path when the wakee task
>> is running on CPU or runnable on RQ, in both cases we can just
>> set its state to TASK_RUNNING to prevent a sleep.
>>
>> If the wakee task is on_cpu running, we don't need to update_rq_clock()
>> or check_preempt_curr().
>>
>> But if the wakee task is on_rq && !on_cpu (e.g. an IRQ hit before
>> the task got to schedule() and the task been preempted), we should
>> check_preempt_curr() to see if it can preempt the current running.
>>
>> Reorganize ttwu_do_wakeup() and ttwu_do_activate() to make
>> ttwu_do_wakeup() only mark the task runnable, so it can be used
>> in ttwu_runnable() and try_to_wake_up() fast paths.
>>
>> This also removes the class->task_woken() callback from ttwu_runnable(),
>> which wasn't required per the RT/DL implementations: any required push
>> operation would have been queued during class->set_next_task() when p
>> got preempted.
>>
>> ttwu_runnable() also loses the update to rq->idle_stamp, as by definition
>> the rq cannot be idle in this scenario.
>>
>> Signed-off-by: Chengming Zhou <[email protected]>
>> Suggested-by: Valentin Schneider <[email protected]>
>> Suggested-by: Peter Zijlstra <[email protected]>
>> Reviewed-by: Valentin Schneider <[email protected]>
>> ---
>> v4:
>> - s/This patch reorg/Reorganize/ per Bagas Sanjaya. Thanks!
>>
>> v3:
>> - Improve the changelog per Valentin Schneider. Thanks!
>>
>> v2:
>> - keep check_preempt_curr() for on_rq && !on_cpu case in ttwu_runnable(),
>> per Valentin Schneider.
>> - reorg ttwu_do_wakeup() and ttwu_do_activate() code, so ttwu_do_wakeup()
>> can be reused in ttwu_runnable(), per Peter Zijlstra.
>> - reuse ttwu_do_wakeup() in try_to_wake_up() (p == current) fast path too,
>> so ttwu_do_wakeup() become the only place we mark task runnable.
>> ---
>> kernel/sched/core.c | 73 ++++++++++++++++++++++++---------------------
>> 1 file changed, 39 insertions(+), 34 deletions(-)
>>
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 314c2c0219d9..d8216485b0ad 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -3623,14 +3623,39 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
>> }
>>
>> /*
>> - * Mark the task runnable and perform wakeup-preemption.
>> + * Mark the task runnable.
>> */
>> -static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
>> - struct rq_flags *rf)
>> +static inline void ttwu_do_wakeup(struct task_struct *p)
>> {
>> - check_preempt_curr(rq, p, wake_flags);
>> WRITE_ONCE(p->__state, TASK_RUNNING);
>> trace_sched_wakeup(p);
>> +}
>> +
>> +static void
>> +ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
>> + struct rq_flags *rf)
>> +{
>> + int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
>> +
>> + lockdep_assert_rq_held(rq);
>> +
>> + if (p->sched_contributes_to_load)
>> + rq->nr_uninterruptible--;
>> +
>> +#ifdef CONFIG_SMP
>> + if (wake_flags & WF_MIGRATED)
>> + en_flags |= ENQUEUE_MIGRATED;
>> + else
>> +#endif
>> + if (p->in_iowait) {
>> + delayacct_blkio_end(p);
>> + atomic_dec(&task_rq(p)->nr_iowait);
>> + }
>> +
>> + activate_task(rq, p, en_flags);
>> + check_preempt_curr(rq, p, wake_flags);
>> +
>> + ttwu_do_wakeup(p);
>>
>> #ifdef CONFIG_SMP
>> if (p->sched_class->task_woken) {
>> @@ -3660,31 +3685,6 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
>> #endif
>> }
>>
>> -static void
>> -ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
>> - struct rq_flags *rf)
>> -{
>> - int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
>> -
>> - lockdep_assert_rq_held(rq);
>> -
>> - if (p->sched_contributes_to_load)
>> - rq->nr_uninterruptible--;
>> -
>> -#ifdef CONFIG_SMP
>> - if (wake_flags & WF_MIGRATED)
>> - en_flags |= ENQUEUE_MIGRATED;
>> - else
>> -#endif
>> - if (p->in_iowait) {
>> - delayacct_blkio_end(p);
>> - atomic_dec(&task_rq(p)->nr_iowait);
>> - }
>> -
>> - activate_task(rq, p, en_flags);
>> - ttwu_do_wakeup(rq, p, wake_flags, rf);
>> -}
>> -
>> /*
>> * Consider @p being inside a wait loop:
>> *
>> @@ -3718,9 +3718,15 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
>>
>> rq = __task_rq_lock(p, &rf);
>> if (task_on_rq_queued(p)) {
>> - /* check_preempt_curr() may use rq clock */
>> - update_rq_clock(rq);
>> - ttwu_do_wakeup(rq, p, wake_flags, &rf);
>> + if (!task_on_cpu(rq, p)) {
>> + /*
>> + * When on_rq && !on_cpu the task is preempted, see if
>> + * it should preempt whatever is current there now.
>> + */
>> + update_rq_clock(rq);
>> + check_preempt_curr(rq, p, wake_flags);
>> + }
>> + ttwu_do_wakeup(p);
>> ret = 1;
>> }
>> __task_rq_unlock(rq, &rf);
>> @@ -4086,8 +4092,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
>> goto out;
>>
>> trace_sched_waking(p);
>> - WRITE_ONCE(p->__state, TASK_RUNNING);
>> - trace_sched_wakeup(p);
>> + ttwu_do_wakeup(p);
>> goto out;
>> }
>>
> Just wonder if we could split the reorganization and optimization into two patches,
> so we can track the code change a little easier in the future?
Thanks for your suggestion.
I'll try to split into two patches later and resend.
Thanks.
>
> thanks,
> Chenyu