2021-08-16 15:46:40

by Tao Zhou

[permalink] [raw]
Subject: [PATCH] sched/core: An optimization of pick_next_task() not sure

When find a new candidate max, wipe the stale and start over.
Goto again: and use the new max to loop to pick the the task.

Here first want to get the max of the core and use this new
max to loop once to pick the task on each thread.

Not sure this is an optimization and just stop here a little
and move on..

Compiled.
---
kernel/sched/core.c | 52 +++++++++++++++++----------------------------
1 file changed, 20 insertions(+), 32 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 20ffcc044134..bddcd328df96 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5403,7 +5403,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
const struct sched_class *class;
const struct cpumask *smt_mask;
bool fi_before = false;
- int i, j, cpu, occ = 0;
+ int i, cpu, occ = 0;
bool need_sync;

if (!sched_core_enabled(rq))
@@ -5508,11 +5508,27 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* order.
*/
for_each_class(class) {
-again:
+ struct rq *rq_i;
+ struct task_struct *p;
+
for_each_cpu_wrap(i, smt_mask, cpu) {
- struct rq *rq_i = cpu_rq(i);
- struct task_struct *p;
+ rq_i = cpu_rq(i);
+ p = pick_task(rq_i, class, max, fi_before);
+ /*
+ * If this new candidate is of higher priority than the
+ * previous; and they're incompatible; pick_task makes
+ * sure that p's priority is more than max if it doesn't
+ * match max's cookie. Update max.
+ *
+ * NOTE: this is a linear max-filter and is thus bounded
+ * in execution time.
+ */
+ if (!max || !cookie_match(max, p))
+ max = p;
+ }

+ for_each_cpu_wrap(i, smt_mask, cpu) {
+ rq_i = cpu_rq(i);
if (rq_i->core_pick)
continue;

@@ -5536,34 +5552,6 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
rq->core->core_forceidle_seq++;
}

- /*
- * If this new candidate is of higher priority than the
- * previous; and they're incompatible; we need to wipe
- * the slate and start over. pick_task makes sure that
- * p's priority is more than max if it doesn't match
- * max's cookie.
- *
- * NOTE: this is a linear max-filter and is thus bounded
- * in execution time.
- */
- if (!max || !cookie_match(max, p)) {
- struct task_struct *old_max = max;
-
- rq->core->core_cookie = p->core_cookie;
- max = p;
-
- if (old_max) {
- rq->core->core_forceidle = false;
- for_each_cpu(j, smt_mask) {
- if (j == i)
- continue;
-
- cpu_rq(j)->core_pick = NULL;
- }
- occ = 1;
- goto again;
- }
- }
}
}

--
2.31.1


2021-08-16 19:05:06

by Josh Don

[permalink] [raw]
Subject: Re: [PATCH] sched/core: An optimization of pick_next_task() not sure

On Mon, Aug 16, 2021 at 11:53 AM Peter Zijlstra <[email protected]> wrote:
>
> On Mon, Aug 16, 2021 at 11:44:01PM +0800, Tao Zhou wrote:
> > When find a new candidate max, wipe the stale and start over.
> > Goto again: and use the new max to loop to pick the the task.
> >
> > Here first want to get the max of the core and use this new
> > max to loop once to pick the task on each thread.
> >
> > Not sure this is an optimization and just stop here a little
> > and move on..
> >
>
> Did you find this retry was an issue on your workload? Or was this from
> reading the source?

I would be surprised if this made a noticeable difference on SMT2, but
I could certainly see how this would help with larger SMT
configurations where we have more opportunities to invalidate the
other siblings.

2021-08-17 16:45:41

by Tao Zhou

[permalink] [raw]
Subject: Re: [PATCH] sched/core: An optimization of pick_next_task() not sure

Hi Peter,

On Mon, Aug 16, 2021 at 08:52:39PM +0200, Peter Zijlstra wrote:

> On Mon, Aug 16, 2021 at 11:44:01PM +0800, Tao Zhou wrote:
> > When find a new candidate max, wipe the stale and start over.
> > Goto again: and use the new max to loop to pick the the task.
> >
> > Here first want to get the max of the core and use this new
> > max to loop once to pick the task on each thread.
> >
> > Not sure this is an optimization and just stop here a little
> > and move on..
> >
>
> Did you find this retry was an issue on your workload? Or was this from
> reading the source?

Thank you for your reply. Sorry for my late reply.
This was from reading the source..

>
> > ---
> > kernel/sched/core.c | 52 +++++++++++++++++----------------------------
> > 1 file changed, 20 insertions(+), 32 deletions(-)
> >
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 20ffcc044134..bddcd328df96 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -5403,7 +5403,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> > const struct sched_class *class;
> > const struct cpumask *smt_mask;
> > bool fi_before = false;
> > - int i, j, cpu, occ = 0;
> > + int i, cpu, occ = 0;
> > bool need_sync;
> >
> > if (!sched_core_enabled(rq))
> > @@ -5508,11 +5508,27 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> > * order.
> > */
> > for_each_class(class) {
> > -again:
> > + struct rq *rq_i;
> > + struct task_struct *p;
> > +
> > for_each_cpu_wrap(i, smt_mask, cpu) {
> > - struct rq *rq_i = cpu_rq(i);
> > - struct task_struct *p;
> > + rq_i = cpu_rq(i);
> > + p = pick_task(rq_i, class, max, fi_before);
> > + /*
> > + * If this new candidate is of higher priority than the
> > + * previous; and they're incompatible; pick_task makes
> > + * sure that p's priority is more than max if it doesn't
> > + * match max's cookie. Update max.
> > + *
> > + * NOTE: this is a linear max-filter and is thus bounded
> > + * in execution time.
> > + */
> > + if (!max || !cookie_match(max, p))
> > + max = p;
> > + }
> >
> > + for_each_cpu_wrap(i, smt_mask, cpu) {
> > + rq_i = cpu_rq(i);
> > if (rq_i->core_pick)
> > continue;
> >
>
> This now calls pick_task() twice for each CPU, which seems unfortunate;
> perhaps add q->core_temp storage to cache that result. Also, since the
> first iteration is now explicitly about the max filter, perhaps we
> shouuld move that part of pick_task() into the loop and simplify things
> further?

Here is my ugly patch below..
Not compiled..


From b3de16fb6f3e6cd2a8a9f7a579e80df74fb2d865 Mon Sep 17 00:00:00 2001
From: Tao Zhou <[email protected]>
Date: Wed, 18 Aug 2021 00:07:38 +0800
Subject: [PATCH] optimize pick_next_task()

---
kernel/sched/core.c | 71 +++++++++++++++++++++++++++++++++-----------
kernel/sched/sched.h | 1 +
2 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 20ffcc044134..c2a403bacf99 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5380,18 +5380,32 @@ pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *ma
if (cookie_equals(class_pick, cookie))
return class_pick;

- cookie_pick = sched_core_find(rq, cookie);
+ return class_pick;
+}

- /*
- * If class > max && class > cookie, it is the highest priority task on
- * the core (so far) and it must be selected, otherwise we must go with
- * the cookie pick in order to satisfy the constraint.
- */
- if (prio_less(cookie_pick, class_pick, in_fi) &&
- (!max || prio_less(max, class_pick, in_fi)))
- return class_pick;
+static task_struct *
+filter_max_prio(struct rq *rq, struct task_struct *class_pick,
+ struct task_struct **cookie_pick, struct task_struct *max,
+ bool in_fi)
+{
+ unsigned long cookie = rq->core->core_cookie;

- return cookie_pick;
+ *cookie_pick = NULL;
+ if (cookie && !cookie_equals(class_pick, cookie)) {
+ *cookie_pick = sched_core_find(rq, cookie);
+ /*
+ * If class > max && class > cookie, it is the
+ * highest priority task on the core (so far)
+ * and it must be selected, otherwise we must
+ * go with the cookie pick in order to satisfy
+ * the constraint.
+ */
+ if (prio_less(cookie_pick, class_pick, in_fi) &&
+ (!max || prio_less(max, class_pick, in_fi)))
+ return class_pick;
+ }
+
+ return NULL;
}

extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
@@ -5508,24 +5522,44 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* order.
*/
for_each_class(class) {
-again:
+ struct task_struct *class_pick, *cookie_pick;
+ struct rq *rq_i;
+
+ for_each_cpu_wrap(i, smt_mask, cpu) {
+ rq_i = cpu_rq(i);
+ class_pick = pick_task(rq_i, class, max, fi_before);
+ rq_i->core_temp = class_pick;
+ /*
+ * This sibling doesn't yet have a suitable task to
+ * run.
+ */
+ if (!class_pick)
+ continue;
+
+ if (filter_max_prio(rq_i, class_pick, &cookie_pick, max, fi_before))
+ max = class_pick;
+ }
+
for_each_cpu_wrap(i, smt_mask, cpu) {
- struct rq *rq_i = cpu_rq(i);
struct task_struct *p;
+ rq_i = cpu_rq(i);

if (rq_i->core_pick)
continue;

/*
- * If this sibling doesn't yet have a suitable task to
- * run; ask for the most eligible task, given the
- * highest priority task already selected for this
- * core.
+ * This sibling doesn't yet have a suitable task to
+ * run.
*/
- p = pick_task(rq_i, class, max, fi_before);
- if (!p)
+ if (!rq_i->core_temp)
continue;

+ p = class_pick = rq_i->core_temp;
+ if (!filter_max_prio(rq_i, class_pick, &cookie_pick, max, fi_before)) {
+ if (cookie_pick)
+ p = cookie_pick;
+ }
+
if (!is_task_rq_idle(p))
occ++;

@@ -9024,6 +9058,7 @@ void __init sched_init(void)
#ifdef CONFIG_SCHED_CORE
rq->core = NULL;
rq->core_pick = NULL;
+ rq->core_temp = NULL;
rq->core_enabled = 0;
rq->core_tree = RB_ROOT;
rq->core_forceidle = false;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14a41a243f7b..2b21a3846b8e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1089,6 +1089,7 @@ struct rq {
/* per rq */
struct rq *core;
struct task_struct *core_pick;
+ struct task_struct *core_temp;
unsigned int core_enabled;
unsigned int core_sched_seq;
struct rb_root core_tree;
--
2.31.1


Thanks,
Tao