From: Liang Chen <[email protected]>
Changelog:
v1: wait_task_inactive() frequently call schedule_hrtimeout() and spend a lot of time,
i am trying to optimize it on rockchip platform.
v2: Use atomic_flags(PFA) instead of TIF flag, and add some comments.
Liang Chen (1):
sched/fair: do not preempt current task if it is going to call
schedule()
include/linux/sched.h | 5 +++++
kernel/kthread.c | 4 ++++
kernel/sched/fair.c | 13 +++++++++++++
3 files changed, 22 insertions(+)
--
2.17.1
From: Liang Chen <[email protected]>
when we create a kthread with ktrhead_create_on_cpu(),the child thread
entry is ktread.c:ktrhead() which will be preempted by the parent after
call complete(done) while schedule() is not called yet,then the parent
will call wait_task_inactive(child) but the child is still on the runqueue,
so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
time,especially on startup.
parent child
ktrhead_create_on_cpu()
wait_fo_completion(&done) -----> ktread.c:ktrhead()
|----- complete(done);--wakeup and preempted by parent
kthread_bind() <------------| |-> schedule();--dequeue here
wait_task_inactive(child) |
schedule_hrtimeout(1 jiffy) -|
So we hope the child just wakeup parent but not preempted by parent, and the
child is going to call schedule() soon,then the parent will not call
schedule_hrtimeout(1 jiffy) as the child is already dequeue.
The same issue for ktrhead_park()&&kthread_parkme().
This patch can save 120ms on rk312x startup with CONFIG_HZ=300.
Signed-off-by: Liang Chen <[email protected]>
---
include/linux/sched.h | 5 +++++
kernel/kthread.c | 4 ++++
kernel/sched/fair.c | 13 +++++++++++++
3 files changed, 22 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04278493bf15..54bf336f5790 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1533,6 +1533,7 @@ static inline bool is_percpu_thread(void)
#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
#define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */
+#define PFA_GOING_TO_SCHED 8 /* task is going to call schedule() */
#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
@@ -1575,6 +1576,10 @@ TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+TASK_PFA_TEST(GOING_TO_SCHED, going_to_sched)
+TASK_PFA_SET(GOING_TO_SCHED, going_to_sched)
+TASK_PFA_CLEAR(GOING_TO_SCHED, going_to_sched)
+
static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b262f47046ca..bc96de2648f6 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
break;
+ task_set_going_to_sched(current);
complete(&self->parked);
schedule();
+ task_clear_going_to_sched(current);
}
__set_current_state(TASK_RUNNING);
}
@@ -245,8 +247,10 @@ static int kthread(void *_create)
/* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_UNINTERRUPTIBLE);
create->result = current;
+ task_set_going_to_sched(current);
complete(done);
schedule();
+ task_clear_going_to_sched(current);
ret = -EINTR;
if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3c8a379c357e..78666cec794a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4330,6 +4330,12 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
return;
#endif
+ /*
+ * current task is going to call schedule(), do not preempt it or
+ * it will casue more useless contex_switch().
+ */
+ if (task_going_to_sched(rq_of(cfs_rq)->curr))
+ return;
if (cfs_rq->nr_running > 1)
check_preempt_tick(cfs_rq, curr);
@@ -6634,6 +6640,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
if (test_tsk_need_resched(curr))
return;
+ /*
+ * current task is going to call schedule(), do not preempt it or
+ * it will casue more useless contex_switch().
+ */
+ if (task_going_to_sched(curr))
+ return;
+
/* Idle tasks are by definition preempted by non-idle tasks. */
if (unlikely(task_has_idle_policy(curr)) &&
likely(!task_has_idle_policy(p)))
--
2.17.1
On Thu, Mar 05, 2020 at 05:59:48PM +0800, [email protected] wrote:
> From: Liang Chen <[email protected]>
>
> when we create a kthread with ktrhead_create_on_cpu(),the child thread
> entry is ktread.c:ktrhead() which will be preempted by the parent after
> call complete(done) while schedule() is not called yet,then the parent
> will call wait_task_inactive(child) but the child is still on the runqueue,
> so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
> time,especially on startup.
>
> parent child
> ktrhead_create_on_cpu()
> wait_fo_completion(&done) -----> ktread.c:ktrhead()
> |----- complete(done);--wakeup and preempted by parent
> kthread_bind() <------------| |-> schedule();--dequeue here
> wait_task_inactive(child) |
> schedule_hrtimeout(1 jiffy) -|
>
> So we hope the child just wakeup parent but not preempted by parent, and the
> child is going to call schedule() soon,then the parent will not call
> schedule_hrtimeout(1 jiffy) as the child is already dequeue.
>
> The same issue for ktrhead_park()&&kthread_parkme().
> This patch can save 120ms on rk312x startup with CONFIG_HZ=300.
>
> Signed-off-by: Liang Chen <[email protected]>
I'm not familiar with the subtleties of scheduler internals
(e.g. is there a race between the end of "schedule();" and calling
"task_clear_going_to_sched();" that effects the preemption test logic?),
so I'll leave that review to the others. But speaking to the PFA change,
it looks sane to me:
Reviewed-by: Kees Cook <[email protected]>
-Kees
> ---
> include/linux/sched.h | 5 +++++
> kernel/kthread.c | 4 ++++
> kernel/sched/fair.c | 13 +++++++++++++
> 3 files changed, 22 insertions(+)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 04278493bf15..54bf336f5790 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1533,6 +1533,7 @@ static inline bool is_percpu_thread(void)
> #define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
> #define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
> #define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */
> +#define PFA_GOING_TO_SCHED 8 /* task is going to call schedule() */
>
> #define TASK_PFA_TEST(name, func) \
> static inline bool task_##func(struct task_struct *p) \
> @@ -1575,6 +1576,10 @@ TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
> TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
> TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
>
> +TASK_PFA_TEST(GOING_TO_SCHED, going_to_sched)
> +TASK_PFA_SET(GOING_TO_SCHED, going_to_sched)
> +TASK_PFA_CLEAR(GOING_TO_SCHED, going_to_sched)
> +
> static inline void
> current_restore_flags(unsigned long orig_flags, unsigned long flags)
> {
> diff --git a/kernel/kthread.c b/kernel/kthread.c
> index b262f47046ca..bc96de2648f6 100644
> --- a/kernel/kthread.c
> +++ b/kernel/kthread.c
> @@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
> if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
> break;
>
> + task_set_going_to_sched(current);
> complete(&self->parked);
> schedule();
> + task_clear_going_to_sched(current);
> }
> __set_current_state(TASK_RUNNING);
> }
> @@ -245,8 +247,10 @@ static int kthread(void *_create)
> /* OK, tell user we're spawned, wait for stop or wakeup */
> __set_current_state(TASK_UNINTERRUPTIBLE);
> create->result = current;
> + task_set_going_to_sched(current);
> complete(done);
> schedule();
> + task_clear_going_to_sched(current);
>
> ret = -EINTR;
> if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 3c8a379c357e..78666cec794a 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4330,6 +4330,12 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
> hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
> return;
> #endif
> + /*
> + * current task is going to call schedule(), do not preempt it or
> + * it will casue more useless contex_switch().
> + */
> + if (task_going_to_sched(rq_of(cfs_rq)->curr))
> + return;
>
> if (cfs_rq->nr_running > 1)
> check_preempt_tick(cfs_rq, curr);
> @@ -6634,6 +6640,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
> if (test_tsk_need_resched(curr))
> return;
>
> + /*
> + * current task is going to call schedule(), do not preempt it or
> + * it will casue more useless contex_switch().
> + */
> + if (task_going_to_sched(curr))
> + return;
> +
> /* Idle tasks are by definition preempted by non-idle tasks. */
> if (unlikely(task_has_idle_policy(curr)) &&
> likely(!task_has_idle_policy(p)))
> --
> 2.17.1
>
>
>
--
Kees Cook