When cpudl_find() returns any among free_cpus, the cpu might not be
closer than others, considering sched domain. For example:
this_cpu: 15
free_cpus: 0, 1,..., 14 (== later_mask)
best_cpu: 0
topology:
0 --+
+--+
1 --+ |
+-- ... --+
2 --+ | |
+--+ |
3 --+ |
... ...
12 --+ |
+--+ |
13 --+ | |
+-- ... -+
14 --+ |
+--+
15 --+
In this case, it would be best to select 14 since it's a free cpu and
closest to 15(this_cpu). However, currently the code select 0(best_cpu)
even though that's just any among free_cpus. Fix it.
Signed-off-by: Byungchul Park <[email protected]>
---
kernel/sched/deadline.c | 29 +++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a2ce590..49c93b9 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1324,7 +1324,7 @@ static int find_later_rq(struct task_struct *task)
struct sched_domain *sd;
struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
int this_cpu = smp_processor_id();
- int best_cpu, cpu = task_cpu(task);
+ int cpu = task_cpu(task);
/* Make sure the mask is initialized first */
if (unlikely(!later_mask))
@@ -1337,17 +1337,14 @@ static int find_later_rq(struct task_struct *task)
* We have to consider system topology and task affinity
* first, then we can look for a suitable cpu.
*/
- best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
- task, later_mask);
- if (best_cpu == -1)
+ if (cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask) == -1)
return -1;
/*
- * If we are here, some target has been found,
- * the most suitable of which is cached in best_cpu.
- * This is, among the runqueues where the current tasks
- * have later deadlines than the task's one, the rq
- * with the latest possible one.
+ * If we are here, some targets have been found, including
+ * the most suitable which is, among the runqueues where the
+ * current tasks have later deadlines than the task's one, the
+ * rq with the latest possible one.
*
* Now we check how well this matches with task's
* affinity and system topology.
@@ -1367,6 +1364,7 @@ static int find_later_rq(struct task_struct *task)
rcu_read_lock();
for_each_domain(cpu, sd) {
if (sd->flags & SD_WAKE_AFFINE) {
+ int closest_cpu;
/*
* If possible, preempting this_cpu is
@@ -1378,14 +1376,17 @@ static int find_later_rq(struct task_struct *task)
return this_cpu;
}
+ closest_cpu = cpumask_first_and(later_mask,
+ sched_domain_span(sd));
/*
- * Last chance: if best_cpu is valid and is
- * in the mask, that becomes our choice.
+ * Last chance: if a cpu being in both later_mask
+ * and current sd span is valid, that becomes our
+ * choice. Of course, the latest possible cpu is
+ * already under consideration through later_mask.
*/
- if (best_cpu < nr_cpu_ids &&
- cpumask_test_cpu(best_cpu, sched_domain_span(sd))) {
+ if (closest_cpu < nr_cpu_ids) {
rcu_read_unlock();
- return best_cpu;
+ return closest_cpu;
}
}
}
--
1.9.1
It would be better to avoid pushing tasks to other cpu within
a SD_PREFER_SIBLING domain, instead, get more chances to check other
siblings.
Signed-off-by: Byungchul Park <[email protected]>
---
kernel/sched/deadline.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 49c93b9..ec15a8b 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1325,6 +1325,7 @@ static int find_later_rq(struct task_struct *task)
struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
+ int fallback_cpu = -1;
/* Make sure the mask is initialized first */
if (unlikely(!later_mask))
@@ -1385,6 +1386,15 @@ static int find_later_rq(struct task_struct *task)
* already under consideration through later_mask.
*/
if (closest_cpu < nr_cpu_ids) {
+ /*
+ * If current domain is SD_PREFER_SIBLING
+ * flaged, we have to get more chances to
+ * check other siblings.
+ */
+ if (sd->flags & SD_PREFER_SIBLING) {
+ fallback_cpu = closest_cpu;
+ continue;
+ }
rcu_read_unlock();
return closest_cpu;
}
@@ -1393,6 +1403,13 @@ static int find_later_rq(struct task_struct *task)
rcu_read_unlock();
/*
+ * If fallback_cpu is valid, all our guesses failed *except* for
+ * SD_PREFER_SIBLING domain. Now, we can return the fallback cpu.
+ */
+ if (fallback_cpu != -1)
+ return fallback_cpu;
+
+ /*
* At this point, all our guesses failed, we just return
* 'something', and let the caller sort the things out.
*/
--
1.9.1
It would be better to avoid pushing tasks to other cpu within
a SD_PREFER_SIBLING domain, instead, get more chances to check other
siblings.
Signed-off-by: Byungchul Park <[email protected]>
---
kernel/sched/rt.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 979b734..6332b2ad 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1624,6 +1624,7 @@ static int find_lowest_rq(struct task_struct *task)
struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
+ int fallback_cpu = -1;
/* Make sure the mask is initialized first */
if (unlikely(!lowest_mask))
@@ -1671,6 +1672,15 @@ static int find_lowest_rq(struct task_struct *task)
best_cpu = cpumask_first_and(lowest_mask,
sched_domain_span(sd));
if (best_cpu < nr_cpu_ids) {
+ /*
+ * If current domain is SD_PREFER_SIBLING
+ * flaged, we have to get more chances to
+ * check other siblings.
+ */
+ if (sd->flags & SD_PREFER_SIBLING) {
+ fallback_cpu = best_cpu;
+ continue;
+ }
rcu_read_unlock();
return best_cpu;
}
@@ -1679,6 +1689,13 @@ static int find_lowest_rq(struct task_struct *task)
rcu_read_unlock();
/*
+ * If fallback_cpu is valid, all our quesses failed *except* for
+ * SD_PREFER_SIBLING domain. Now, we can return the fallback cpu.
+ */
+ if (fallback_cpu != -1)
+ return fallback_cpu;
+
+ /*
* And finally, if there were no matches within the domains
* just give the caller *something* to work with from the compatible
* locations.
--
1.9.1
On Thu, Mar 23, 2017 at 11:12:49AM +0900, Byungchul Park wrote:
> It would be better to avoid pushing tasks to other cpu within
> a SD_PREFER_SIBLING domain, instead, get more chances to check other
> siblings.
+cc [email protected]
+cc [email protected]
>
> Signed-off-by: Byungchul Park <[email protected]>
> ---
> kernel/sched/rt.c | 17 +++++++++++++++++
> 1 file changed, 17 insertions(+)
>
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index 979b734..6332b2ad 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1624,6 +1624,7 @@ static int find_lowest_rq(struct task_struct *task)
> struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
> int this_cpu = smp_processor_id();
> int cpu = task_cpu(task);
> + int fallback_cpu = -1;
>
> /* Make sure the mask is initialized first */
> if (unlikely(!lowest_mask))
> @@ -1671,6 +1672,15 @@ static int find_lowest_rq(struct task_struct *task)
> best_cpu = cpumask_first_and(lowest_mask,
> sched_domain_span(sd));
> if (best_cpu < nr_cpu_ids) {
> + /*
> + * If current domain is SD_PREFER_SIBLING
> + * flaged, we have to get more chances to
> + * check other siblings.
> + */
> + if (sd->flags & SD_PREFER_SIBLING) {
> + fallback_cpu = best_cpu;
> + continue;
> + }
> rcu_read_unlock();
> return best_cpu;
> }
> @@ -1679,6 +1689,13 @@ static int find_lowest_rq(struct task_struct *task)
> rcu_read_unlock();
>
> /*
> + * If fallback_cpu is valid, all our quesses failed *except* for
> + * SD_PREFER_SIBLING domain. Now, we can return the fallback cpu.
> + */
> + if (fallback_cpu != -1)
> + return fallback_cpu;
> +
> + /*
> * And finally, if there were no matches within the domains
> * just give the caller *something* to work with from the compatible
> * locations.
> --
> 1.9.1
On Thu, Mar 23, 2017 at 11:12:49AM +0900, Byungchul Park wrote:
> It would be better to avoid pushing tasks to other cpu within
> a SD_PREFER_SIBLING domain, instead, get more chances to check other
> siblings.
Did you forget to post the rt equivalent to patch 1?
On Thu, Mar 23, 2017 at 10:44:45AM +0100, Peter Zijlstra wrote:
> On Thu, Mar 23, 2017 at 11:12:49AM +0900, Byungchul Park wrote:
> > It would be better to avoid pushing tasks to other cpu within
> > a SD_PREFER_SIBLING domain, instead, get more chances to check other
> > siblings.
>
> Did you forget to post the rt equivalent to patch 1?
No. Fortunately, rt currently works as patch 1 does.
On Thu, 23 Mar 2017 18:21:23 +0900
Byungchul Park <[email protected]> wrote:
> On Thu, Mar 23, 2017 at 11:12:49AM +0900, Byungchul Park wrote:
> > It would be better to avoid pushing tasks to other cpu within
> > a SD_PREFER_SIBLING domain, instead, get more chances to check other
> > siblings.
>
> +cc [email protected]
> +cc [email protected]
>
Neither of us work at those companies anymore.
-- Steve
On Thu, 23 Mar 2017 19:36:51 +0900
Byungchul Park <[email protected]> wrote:
> On Thu, Mar 23, 2017 at 10:44:45AM +0100, Peter Zijlstra wrote:
> > On Thu, Mar 23, 2017 at 11:12:49AM +0900, Byungchul Park wrote:
> > > It would be better to avoid pushing tasks to other cpu within
> > > a SD_PREFER_SIBLING domain, instead, get more chances to check
> > > other siblings.
> >
> > Did you forget to post the rt equivalent to patch 1?
>
> No. Fortunately, rt currently works as patch 1 does.
I'm thinking that the rt and deadline search for lowest rq functions
should be merged as one.
What they are doing is looking for the rq with the lowest priority.
deadline currently doesn't care if it picks an rq with an rt task
running on it, even when there's an rq with no rt tasks that the dl task
can migrate to. The same goes with rt. It could place an RT task on an
rq running a deadline task without knowing the rt task wont be able to
run on that cpu immediately.
-- Steve
On Thu, Mar 23, 2017 at 07:08:24AM -0700, Steven Rostedt wrote:
> On Thu, 23 Mar 2017 19:36:51 +0900
> Byungchul Park <[email protected]> wrote:
>
> > On Thu, Mar 23, 2017 at 10:44:45AM +0100, Peter Zijlstra wrote:
> > > On Thu, Mar 23, 2017 at 11:12:49AM +0900, Byungchul Park wrote:
> > > > It would be better to avoid pushing tasks to other cpu within
> > > > a SD_PREFER_SIBLING domain, instead, get more chances to check
> > > > other siblings.
> > >
> > > Did you forget to post the rt equivalent to patch 1?
> >
> > No. Fortunately, rt currently works as patch 1 does.
>
> I'm thinking that the rt and deadline search for lowest rq functions
> should be merged as one.
>
> What they are doing is looking for the rq with the lowest priority.
> deadline currently doesn't care if it picks an rq with an rt task
> running on it, even when there's an rq with no rt tasks that the dl task
> can migrate to. The same goes with rt. It could place an RT task on an
> rq running a deadline task without knowing the rt task wont be able to
> run on that cpu immediately.
I also think so. And IMHO there are more things to care wrt rt/dl
migration. As you said, the cases should be considered and fixed. I
wonder if the rt and dl seartch should be merged as one though..
Thank you,
Byungchul
Hi,
On 23/03/17 11:12, Byungchul Park wrote:
> When cpudl_find() returns any among free_cpus, the cpu might not be
> closer than others, considering sched domain. For example:
>
> this_cpu: 15
> free_cpus: 0, 1,..., 14 (== later_mask)
> best_cpu: 0
>
> topology:
>
> 0 --+
> +--+
> 1 --+ |
> +-- ... --+
> 2 --+ | |
> +--+ |
> 3 --+ |
>
> ... ...
>
> 12 --+ |
> +--+ |
> 13 --+ | |
> +-- ... -+
> 14 --+ |
> +--+
> 15 --+
>
> In this case, it would be best to select 14 since it's a free cpu and
> closest to 15(this_cpu). However, currently the code select 0(best_cpu)
> even though that's just any among free_cpus. Fix it.
>
> Signed-off-by: Byungchul Park <[email protected]>
> ---
> kernel/sched/deadline.c | 29 +++++++++++++++--------------
> 1 file changed, 15 insertions(+), 14 deletions(-)
>
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index a2ce590..49c93b9 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1324,7 +1324,7 @@ static int find_later_rq(struct task_struct *task)
> struct sched_domain *sd;
> struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
> int this_cpu = smp_processor_id();
> - int best_cpu, cpu = task_cpu(task);
> + int cpu = task_cpu(task);
>
> /* Make sure the mask is initialized first */
> if (unlikely(!later_mask))
> @@ -1337,17 +1337,14 @@ static int find_later_rq(struct task_struct *task)
> * We have to consider system topology and task affinity
> * first, then we can look for a suitable cpu.
> */
> - best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
> - task, later_mask);
> - if (best_cpu == -1)
> + if (cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask) == -1)
It seems that with this we loose the last user of the current return
value of cpudl_find() (heap maximum). I guess we want to change the
return value to be (int)bool, as in rt, so that we can simplify this and
the conditions in check_preempt_equal_dl.
> return -1;
>
> /*
> - * If we are here, some target has been found,
> - * the most suitable of which is cached in best_cpu.
> - * This is, among the runqueues where the current tasks
> - * have later deadlines than the task's one, the rq
> - * with the latest possible one.
> + * If we are here, some targets have been found, including
> + * the most suitable which is, among the runqueues where the
> + * current tasks have later deadlines than the task's one, the
> + * rq with the latest possible one.
> *
> * Now we check how well this matches with task's
> * affinity and system topology.
> @@ -1367,6 +1364,7 @@ static int find_later_rq(struct task_struct *task)
> rcu_read_lock();
> for_each_domain(cpu, sd) {
> if (sd->flags & SD_WAKE_AFFINE) {
> + int closest_cpu;
Can we still call this best_cpu, so that we are aligned with rt?
Thanks,
- Juri
On Mon, Mar 27, 2017 at 03:33:43PM +0100, Juri Lelli wrote:
> > diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> > index a2ce590..49c93b9 100644
> > --- a/kernel/sched/deadline.c
> > +++ b/kernel/sched/deadline.c
> > @@ -1324,7 +1324,7 @@ static int find_later_rq(struct task_struct *task)
> > struct sched_domain *sd;
> > struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
> > int this_cpu = smp_processor_id();
> > - int best_cpu, cpu = task_cpu(task);
> > + int cpu = task_cpu(task);
> >
> > /* Make sure the mask is initialized first */
> > if (unlikely(!later_mask))
> > @@ -1337,17 +1337,14 @@ static int find_later_rq(struct task_struct *task)
> > * We have to consider system topology and task affinity
> > * first, then we can look for a suitable cpu.
> > */
> > - best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
> > - task, later_mask);
> > - if (best_cpu == -1)
> > + if (cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask) == -1)
>
> It seems that with this we loose the last user of the current return
> value of cpudl_find() (heap maximum). I guess we want to change the
> return value to be (int)bool, as in rt, so that we can simplify this and
> the conditions in check_preempt_equal_dl.
Hi Juri,
Actually I changed the return value to be bool, but didn't include the
patch since it looks not that valuable. But I will add it if you also
think so. ;)
>
> > return -1;
> >
> > /*
> > - * If we are here, some target has been found,
> > - * the most suitable of which is cached in best_cpu.
> > - * This is, among the runqueues where the current tasks
> > - * have later deadlines than the task's one, the rq
> > - * with the latest possible one.
> > + * If we are here, some targets have been found, including
> > + * the most suitable which is, among the runqueues where the
> > + * current tasks have later deadlines than the task's one, the
> > + * rq with the latest possible one.
> > *
> > * Now we check how well this matches with task's
> > * affinity and system topology.
> > @@ -1367,6 +1364,7 @@ static int find_later_rq(struct task_struct *task)
> > rcu_read_lock();
> > for_each_domain(cpu, sd) {
> > if (sd->flags & SD_WAKE_AFFINE) {
> > + int closest_cpu;
>
> Can we still call this best_cpu, so that we are aligned with rt?
OK. I will rename it to best_cpu.
Thanks,
Byungchul