2021-02-24 14:34:39

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH 2/6] sched: Simplify migration_cpu_stop()

When affine_move_task() issues a migration_cpu_stop(), the purpose of
that function is to complete that @pending, not any random other
p->migration_pending that might have gotten installed since.

This realization much simplifies migration_cpu_stop() and allows
further necessary steps to fix all this as it provides the guarantee
that @pending's stopper will complete @pending (and not some random
other @pending).

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
kernel/sched/core.c | 56 +++++++---------------------------------------------
1 file changed, 8 insertions(+), 48 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1898,8 +1898,8 @@ static struct rq *__migrate_task(struct
*/
static int migration_cpu_stop(void *data)
{
- struct set_affinity_pending *pending;
struct migration_arg *arg = data;
+ struct set_affinity_pending *pending = arg->pending;
struct task_struct *p = arg->task;
int dest_cpu = arg->dest_cpu;
struct rq *rq = this_rq();
@@ -1921,25 +1921,6 @@ static int migration_cpu_stop(void *data
raw_spin_lock(&p->pi_lock);
rq_lock(rq, &rf);

- pending = p->migration_pending;
- if (pending && !arg->pending) {
- /*
- * This happens from sched_exec() and migrate_task_to(),
- * neither of them care about pending and just want a task to
- * maybe move about.
- *
- * Even if there is a pending, we can ignore it, since
- * affine_move_task() will have it's own stop_work's in flight
- * which will manage the completion.
- *
- * Notably, pending doesn't need to match arg->pending. This can
- * happen when tripple concurrent affine_move_task() first sets
- * pending, then clears pending and eventually sets another
- * pending.
- */
- pending = NULL;
- }
-
/*
* If task_rq(p) != rq, it cannot be migrated here, because we're
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data
goto out;

if (pending) {
- p->migration_pending = NULL;
+ if (p->migration_pending == pending)
+ p->migration_pending = NULL;
complete = true;
}

- /* migrate_enable() -- we must not race against SCA */
- if (dest_cpu < 0) {
- /*
- * When this was migrate_enable() but we no longer
- * have a @pending, a concurrent SCA 'fixed' things
- * and we should be valid again. Nothing to do.
- */
- if (!pending) {
- WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
- goto out;
- }
-
+ if (dest_cpu < 0)
dest_cpu = cpumask_any_distribute(&p->cpus_mask);
- }

if (task_on_rq_queued(p))
rq = __migrate_task(rq, &rf, p, dest_cpu);
else
p->wake_cpu = dest_cpu;

- } else if (dest_cpu < 0 || pending) {
+ } else if (pending) {
/*
* This happens when we get migrated between migrate_enable()'s
* preempt_enable() and scheduling the stopper task. At that
@@ -1989,23 +1959,14 @@ static int migration_cpu_stop(void *data
* ->pi_lock, so the allowed mask is stable - if it got
* somewhere allowed, we're done.
*/
- if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
- p->migration_pending = NULL;
+ if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
+ if (p->migration_pending == pending)
+ p->migration_pending = NULL;
complete = true;
goto out;
}

/*
- * When this was migrate_enable() but we no longer have an
- * @pending, a concurrent SCA 'fixed' things and we should be
- * valid again. Nothing to do.
- */
- if (!pending) {
- WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
- goto out;
- }
-
- /*
* When migrate_enable() hits a rq mis-match we can't reliably
* determine is_migration_disabled() and so have to chase after
* it.
@@ -2022,7 +1983,6 @@ static int migration_cpu_stop(void *data
complete_all(&pending->done);

/* For pending->{arg,stop_work} */
- pending = arg->pending;
if (pending && refcount_dec_and_test(&pending->refs))
wake_up_var(&pending->refs);




2021-02-25 00:59:48

by Valentin Schneider

[permalink] [raw]
Subject: Re: [PATCH 2/6] sched: Simplify migration_cpu_stop()

On 24/02/21 13:24, Peter Zijlstra wrote:
> @@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data
> goto out;
>
> if (pending) {
> - p->migration_pending = NULL;
> + if (p->migration_pending == pending)
> + p->migration_pending = NULL;
> complete = true;
> }
>
> - /* migrate_enable() -- we must not race against SCA */
> - if (dest_cpu < 0) {
> - /*
> - * When this was migrate_enable() but we no longer
> - * have a @pending, a concurrent SCA 'fixed' things
> - * and we should be valid again. Nothing to do.
> - */
> - if (!pending) {
> - WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
> - goto out;
> - }
> -

This is fixed by 5+6, but at this patch I think you can have double
completions - I thought this was an issue, but briefly looking at
completion stuff it might not. In any case, consider:

task_cpu(p) == Y

SCA(p, X);
SCA(p, Y);


SCA(p, Y) will uninstall SCA(p, X)'s pending and complete.

migration/Y kicked by SCA(p, X) will grab arg->pending, which is still
SCA(p, X)'s pending and also complete.

> + if (dest_cpu < 0)
> dest_cpu = cpumask_any_distribute(&p->cpus_mask);
> - }
>
> if (task_on_rq_queued(p))
> rq = __migrate_task(rq, &rf, p, dest_cpu);
> else
> p->wake_cpu = dest_cpu;
>
> - } else if (dest_cpu < 0 || pending) {
> + } else if (pending) {
> /*
> * This happens when we get migrated between migrate_enable()'s
> * preempt_enable() and scheduling the stopper task. At that

2021-02-25 09:58:23

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 2/6] sched: Simplify migration_cpu_stop()

On Wed, Feb 24, 2021 at 03:34:36PM +0000, Valentin Schneider wrote:
> On 24/02/21 13:24, Peter Zijlstra wrote:
> > @@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data
> > goto out;
> >
> > if (pending) {
> > - p->migration_pending = NULL;
> > + if (p->migration_pending == pending)
> > + p->migration_pending = NULL;
> > complete = true;
> > }
> >
> > - /* migrate_enable() -- we must not race against SCA */
> > - if (dest_cpu < 0) {
> > - /*
> > - * When this was migrate_enable() but we no longer
> > - * have a @pending, a concurrent SCA 'fixed' things
> > - * and we should be valid again. Nothing to do.
> > - */
> > - if (!pending) {
> > - WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
> > - goto out;
> > - }
> > -
>
> This is fixed by 5+6, but at this patch I think you can have double
> completions - I thought this was an issue, but briefly looking at
> completion stuff it might not. In any case, consider:
>
> task_cpu(p) == Y
>
> SCA(p, X);
> SCA(p, Y);
>
>
> SCA(p, Y) will uninstall SCA(p, X)'s pending and complete.
>
> migration/Y kicked by SCA(p, X) will grab arg->pending, which is still
> SCA(p, X)'s pending and also complete.

Right, so I didn't really think too hard about the intermediate states,
given it's all pretty buggered until at least 5. But yeah, double
complete is harmless.

Specifically, the refcount the stopper has should avoid the stack from
getting released.

2021-02-25 11:49:02

by Valentin Schneider

[permalink] [raw]
Subject: Re: [PATCH 2/6] sched: Simplify migration_cpu_stop()

On 25/02/21 09:45, Peter Zijlstra wrote:
> On Wed, Feb 24, 2021 at 03:34:36PM +0000, Valentin Schneider wrote:
>> On 24/02/21 13:24, Peter Zijlstra wrote:
>> > @@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data
>> > goto out;
>> >
>> > if (pending) {
>> > - p->migration_pending = NULL;
>> > + if (p->migration_pending == pending)
>> > + p->migration_pending = NULL;
>> > complete = true;
>> > }
>> >
>> > - /* migrate_enable() -- we must not race against SCA */
>> > - if (dest_cpu < 0) {
>> > - /*
>> > - * When this was migrate_enable() but we no longer
>> > - * have a @pending, a concurrent SCA 'fixed' things
>> > - * and we should be valid again. Nothing to do.
>> > - */
>> > - if (!pending) {
>> > - WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
>> > - goto out;
>> > - }
>> > -
>>
>> This is fixed by 5+6, but at this patch I think you can have double
>> completions - I thought this was an issue, but briefly looking at
>> completion stuff it might not. In any case, consider:
>>
>> task_cpu(p) == Y
>>
>> SCA(p, X);
>> SCA(p, Y);
>>
>>
>> SCA(p, Y) will uninstall SCA(p, X)'s pending and complete.
>>
>> migration/Y kicked by SCA(p, X) will grab arg->pending, which is still
>> SCA(p, X)'s pending and also complete.
>
> Right, so I didn't really think too hard about the intermediate states,
> given it's all pretty buggered until at least 5. But yeah, double
> complete is harmless.
>
> Specifically, the refcount the stopper has should avoid the stack from
> getting released.

Aye that should be fine, it really was just the double complete which I
was unsure about.

2021-03-01 10:23:11

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: sched/urgent] sched: Simplify migration_cpu_stop()

The following commit has been merged into the sched/urgent branch of tip:

Commit-ID: 6430eb536a97036b1d529cbf383cfe36e41a2f97
Gitweb: https://git.kernel.org/tip/6430eb536a97036b1d529cbf383cfe36e41a2f97
Author: Peter Zijlstra <[email protected]>
AuthorDate: Wed, 24 Feb 2021 11:50:39 +01:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Mon, 01 Mar 2021 11:02:13 +01:00

sched: Simplify migration_cpu_stop()

When affine_move_task() issues a migration_cpu_stop(), the purpose of
that function is to complete that @pending, not any random other
p->migration_pending that might have gotten installed since.

This realization much simplifies migration_cpu_stop() and allows
further necessary steps to fix all this as it provides the guarantee
that @pending's stopper will complete @pending (and not some random
other @pending).

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: [email protected]
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Valentin Schneider <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
kernel/sched/core.c | 56 ++++++--------------------------------------
1 file changed, 8 insertions(+), 48 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 79ddba5..088e8f4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1898,8 +1898,8 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
*/
static int migration_cpu_stop(void *data)
{
- struct set_affinity_pending *pending;
struct migration_arg *arg = data;
+ struct set_affinity_pending *pending = arg->pending;
struct task_struct *p = arg->task;
int dest_cpu = arg->dest_cpu;
struct rq *rq = this_rq();
@@ -1921,25 +1921,6 @@ static int migration_cpu_stop(void *data)
raw_spin_lock(&p->pi_lock);
rq_lock(rq, &rf);

- pending = p->migration_pending;
- if (pending && !arg->pending) {
- /*
- * This happens from sched_exec() and migrate_task_to(),
- * neither of them care about pending and just want a task to
- * maybe move about.
- *
- * Even if there is a pending, we can ignore it, since
- * affine_move_task() will have it's own stop_work's in flight
- * which will manage the completion.
- *
- * Notably, pending doesn't need to match arg->pending. This can
- * happen when tripple concurrent affine_move_task() first sets
- * pending, then clears pending and eventually sets another
- * pending.
- */
- pending = NULL;
- }
-
/*
* If task_rq(p) != rq, it cannot be migrated here, because we're
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data)
goto out;

if (pending) {
- p->migration_pending = NULL;
+ if (p->migration_pending == pending)
+ p->migration_pending = NULL;
complete = true;
}

- /* migrate_enable() -- we must not race against SCA */
- if (dest_cpu < 0) {
- /*
- * When this was migrate_enable() but we no longer
- * have a @pending, a concurrent SCA 'fixed' things
- * and we should be valid again. Nothing to do.
- */
- if (!pending) {
- WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
- goto out;
- }
-
+ if (dest_cpu < 0)
dest_cpu = cpumask_any_distribute(&p->cpus_mask);
- }

if (task_on_rq_queued(p))
rq = __migrate_task(rq, &rf, p, dest_cpu);
else
p->wake_cpu = dest_cpu;

- } else if (dest_cpu < 0 || pending) {
+ } else if (pending) {
/*
* This happens when we get migrated between migrate_enable()'s
* preempt_enable() and scheduling the stopper task. At that
@@ -1989,23 +1959,14 @@ static int migration_cpu_stop(void *data)
* ->pi_lock, so the allowed mask is stable - if it got
* somewhere allowed, we're done.
*/
- if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
- p->migration_pending = NULL;
+ if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
+ if (p->migration_pending == pending)
+ p->migration_pending = NULL;
complete = true;
goto out;
}

/*
- * When this was migrate_enable() but we no longer have an
- * @pending, a concurrent SCA 'fixed' things and we should be
- * valid again. Nothing to do.
- */
- if (!pending) {
- WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
- goto out;
- }
-
- /*
* When migrate_enable() hits a rq mis-match we can't reliably
* determine is_migration_disabled() and so have to chase after
* it.
@@ -2022,7 +1983,6 @@ out:
complete_all(&pending->done);

/* For pending->{arg,stop_work} */
- pending = arg->pending;
if (pending && refcount_dec_and_test(&pending->refs))
wake_up_var(&pending->refs);

2021-03-06 11:47:53

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: sched/core] sched: Simplify migration_cpu_stop()

The following commit has been merged into the sched/core branch of tip:

Commit-ID: c20cf065d4a619d394d23290093b1002e27dff86
Gitweb: https://git.kernel.org/tip/c20cf065d4a619d394d23290093b1002e27dff86
Author: Peter Zijlstra <[email protected]>
AuthorDate: Wed, 24 Feb 2021 11:50:39 +01:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Sat, 06 Mar 2021 12:40:20 +01:00

sched: Simplify migration_cpu_stop()

When affine_move_task() issues a migration_cpu_stop(), the purpose of
that function is to complete that @pending, not any random other
p->migration_pending that might have gotten installed since.

This realization much simplifies migration_cpu_stop() and allows
further necessary steps to fix all this as it provides the guarantee
that @pending's stopper will complete @pending (and not some random
other @pending).

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: [email protected]
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Valentin Schneider <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
kernel/sched/core.c | 56 ++++++--------------------------------------
1 file changed, 8 insertions(+), 48 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 79ddba5..088e8f4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1898,8 +1898,8 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
*/
static int migration_cpu_stop(void *data)
{
- struct set_affinity_pending *pending;
struct migration_arg *arg = data;
+ struct set_affinity_pending *pending = arg->pending;
struct task_struct *p = arg->task;
int dest_cpu = arg->dest_cpu;
struct rq *rq = this_rq();
@@ -1921,25 +1921,6 @@ static int migration_cpu_stop(void *data)
raw_spin_lock(&p->pi_lock);
rq_lock(rq, &rf);

- pending = p->migration_pending;
- if (pending && !arg->pending) {
- /*
- * This happens from sched_exec() and migrate_task_to(),
- * neither of them care about pending and just want a task to
- * maybe move about.
- *
- * Even if there is a pending, we can ignore it, since
- * affine_move_task() will have it's own stop_work's in flight
- * which will manage the completion.
- *
- * Notably, pending doesn't need to match arg->pending. This can
- * happen when tripple concurrent affine_move_task() first sets
- * pending, then clears pending and eventually sets another
- * pending.
- */
- pending = NULL;
- }
-
/*
* If task_rq(p) != rq, it cannot be migrated here, because we're
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data)
goto out;

if (pending) {
- p->migration_pending = NULL;
+ if (p->migration_pending == pending)
+ p->migration_pending = NULL;
complete = true;
}

- /* migrate_enable() -- we must not race against SCA */
- if (dest_cpu < 0) {
- /*
- * When this was migrate_enable() but we no longer
- * have a @pending, a concurrent SCA 'fixed' things
- * and we should be valid again. Nothing to do.
- */
- if (!pending) {
- WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
- goto out;
- }
-
+ if (dest_cpu < 0)
dest_cpu = cpumask_any_distribute(&p->cpus_mask);
- }

if (task_on_rq_queued(p))
rq = __migrate_task(rq, &rf, p, dest_cpu);
else
p->wake_cpu = dest_cpu;

- } else if (dest_cpu < 0 || pending) {
+ } else if (pending) {
/*
* This happens when we get migrated between migrate_enable()'s
* preempt_enable() and scheduling the stopper task. At that
@@ -1989,23 +1959,14 @@ static int migration_cpu_stop(void *data)
* ->pi_lock, so the allowed mask is stable - if it got
* somewhere allowed, we're done.
*/
- if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
- p->migration_pending = NULL;
+ if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
+ if (p->migration_pending == pending)
+ p->migration_pending = NULL;
complete = true;
goto out;
}

/*
- * When this was migrate_enable() but we no longer have an
- * @pending, a concurrent SCA 'fixed' things and we should be
- * valid again. Nothing to do.
- */
- if (!pending) {
- WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
- goto out;
- }
-
- /*
* When migrate_enable() hits a rq mis-match we can't reliably
* determine is_migration_disabled() and so have to chase after
* it.
@@ -2022,7 +1983,6 @@ out:
complete_all(&pending->done);

/* For pending->{arg,stop_work} */
- pending = arg->pending;
if (pending && refcount_dec_and_test(&pending->refs))
wake_up_var(&pending->refs);