padata_flush_queues() is broken for an async ->parallel() function
because flush_work() can't wait on it:
# modprobe tcrypt alg="pcrypt(cryptd(rfc4106(gcm_base(ctr(aes-generic),ghash-generic))))" type=3
# modprobe tcrypt mode=215 sec=1 &
# sleep 5; echo 7 > /sys/kernel/pcrypt/pencrypt/parallel_cpumask
kernel BUG at kernel/padata.c:473!
invalid opcode: 0000 [#1] SMP PTI
CPU: 0 PID: 282 Comm: sh Not tainted 5.3.0-rc5-padata-base+ #3
RIP: 0010:padata_flush_queues+0xa7/0xb0
Call Trace:
padata_replace+0xa1/0x110
padata_set_cpumask+0xae/0x130
store_cpumask+0x75/0xa0
padata_sysfs_store+0x20/0x30
...
Wait instead for the parallel_data (pd) object's refcount to drop to
zero.
Simplify by taking an initial reference on a pd when allocating an
instance. That ref is dropped during flushing, which allows calling
wait_for_completion() unconditionally.
If the initial ref weren't used, the only other alternative I could
think of is that complete() would be conditional on !PADATA_INIT or
PADATA_REPLACE (in addition to zero pd->refcnt), and
wait_for_completion() on nonzero pd->refcnt. But then complete() could
be called without a matching wait:
completer waiter
--------- ------
DEC pd->refcnt // 0
pinst->flags |= PADATA_REPLACE
LOAD pinst->flags // REPLACE
LOAD pd->refcnt // 0
/* return without wait_for_completion() */
complete()
No more flushing per-CPU work items, so no more CPU hotplug lock in
__padata_stop.
Fixes: 2b73b07ab8a4 ("padata: Flush the padata queues actively")
Reported-by: Herbert Xu <[email protected]>
Suggested-by: Steffen Klassert <[email protected]>
Signed-off-by: Daniel Jordan <[email protected]>
Cc: Sebastian Andrzej Siewior <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
include/linux/padata.h | 3 +++
kernel/padata.c | 32 ++++++++++++--------------------
2 files changed, 15 insertions(+), 20 deletions(-)
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 8da673861d99..1c73f9cc7b72 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -14,6 +14,7 @@
#include <linux/list.h>
#include <linux/notifier.h>
#include <linux/kobject.h>
+#include <linux/completion.h>
#define PADATA_CPU_SERIAL 0x01
#define PADATA_CPU_PARALLEL 0x02
@@ -104,6 +105,7 @@ struct padata_cpumask {
* @squeue: percpu padata queues used for serialuzation.
* @reorder_objects: Number of objects waiting in the reorder queues.
* @refcnt: Number of objects holding a reference on this parallel_data.
+ * @flushing_done: Wait for all objects to be sent out.
* @max_seq_nr: Maximal used sequence number.
* @cpu: Next CPU to be processed.
* @cpumask: The cpumasks in use for parallel and serial workers.
@@ -116,6 +118,7 @@ struct parallel_data {
struct padata_serial_queue __percpu *squeue;
atomic_t reorder_objects;
atomic_t refcnt;
+ struct completion flushing_done;
atomic_t seq_nr;
int cpu;
struct padata_cpumask cpumask;
diff --git a/kernel/padata.c b/kernel/padata.c
index b60cc3dcee58..958166e23123 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -301,7 +301,8 @@ static void padata_serial_worker(struct work_struct *serial_work)
list_del_init(&padata->list);
padata->serial(padata);
- atomic_dec(&pd->refcnt);
+ if (atomic_dec_return(&pd->refcnt) == 0)
+ complete(&pd->flushing_done);
}
local_bh_enable();
}
@@ -423,7 +424,9 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
padata_init_squeues(pd);
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
- atomic_set(&pd->refcnt, 0);
+ /* Initial ref dropped in padata_flush_queues. */
+ atomic_set(&pd->refcnt, 1);
+ init_completion(&pd->flushing_done);
pd->pinst = pinst;
spin_lock_init(&pd->lock);
pd->cpu = cpumask_first(pd->cpumask.pcpu);
@@ -453,24 +456,15 @@ static void padata_free_pd(struct parallel_data *pd)
/* Flush all objects out of the padata queues. */
static void padata_flush_queues(struct parallel_data *pd)
{
- int cpu;
- struct padata_parallel_queue *pqueue;
- struct padata_serial_queue *squeue;
-
- for_each_cpu(cpu, pd->cpumask.pcpu) {
- pqueue = per_cpu_ptr(pd->pqueue, cpu);
- flush_work(&pqueue->work);
- }
-
- if (atomic_read(&pd->reorder_objects))
- padata_reorder(pd);
+ if (!(pd->pinst->flags & PADATA_INIT))
+ return;
- for_each_cpu(cpu, pd->cpumask.cbcpu) {
- squeue = per_cpu_ptr(pd->squeue, cpu);
- flush_work(&squeue->work);
- }
+ if (atomic_dec_return(&pd->refcnt) == 0)
+ complete(&pd->flushing_done);
- BUG_ON(atomic_read(&pd->refcnt) != 0);
+ wait_for_completion(&pd->flushing_done);
+ reinit_completion(&pd->flushing_done);
+ atomic_set(&pd->refcnt, 1);
}
static void __padata_start(struct padata_instance *pinst)
@@ -487,9 +481,7 @@ static void __padata_stop(struct padata_instance *pinst)
synchronize_rcu();
- get_online_cpus();
padata_flush_queues(pinst->pd);
- put_online_cpus();
}
/* Replace the internal control structure with a new one. */
--
2.23.0
On Wed, Aug 28, 2019 at 06:14:21PM -0400, Daniel Jordan wrote:
>
> @@ -453,24 +456,15 @@ static void padata_free_pd(struct parallel_data *pd)
> /* Flush all objects out of the padata queues. */
> static void padata_flush_queues(struct parallel_data *pd)
> {
> - int cpu;
> - struct padata_parallel_queue *pqueue;
> - struct padata_serial_queue *squeue;
> -
> - for_each_cpu(cpu, pd->cpumask.pcpu) {
> - pqueue = per_cpu_ptr(pd->pqueue, cpu);
> - flush_work(&pqueue->work);
> - }
> -
> - if (atomic_read(&pd->reorder_objects))
> - padata_reorder(pd);
> + if (!(pd->pinst->flags & PADATA_INIT))
> + return;
>
> - for_each_cpu(cpu, pd->cpumask.cbcpu) {
> - squeue = per_cpu_ptr(pd->squeue, cpu);
> - flush_work(&squeue->work);
> - }
> + if (atomic_dec_return(&pd->refcnt) == 0)
> + complete(&pd->flushing_done);
>
> - BUG_ON(atomic_read(&pd->refcnt) != 0);
> + wait_for_completion(&pd->flushing_done);
> + reinit_completion(&pd->flushing_done);
> + atomic_set(&pd->refcnt, 1);
> }
I don't think waiting is an option. In a pathological case the
hardware may not return at all. We cannot and should not hold off
CPU hotplug for an arbitrary amount of time when the event we are
waiting for isn't even occuring on that CPU.
I don't think flushing is needed at all. All we need to do is
maintain consistency before and after the CPU hotplug event.
Cheers,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Thu, Sep 05, 2019 at 02:17:35PM +1000, Herbert Xu wrote:
> On Wed, Aug 28, 2019 at 06:14:21PM -0400, Daniel Jordan wrote:
> >
> > @@ -453,24 +456,15 @@ static void padata_free_pd(struct parallel_data *pd)
> > /* Flush all objects out of the padata queues. */
> > static void padata_flush_queues(struct parallel_data *pd)
> > {
> > - int cpu;
> > - struct padata_parallel_queue *pqueue;
> > - struct padata_serial_queue *squeue;
> > -
> > - for_each_cpu(cpu, pd->cpumask.pcpu) {
> > - pqueue = per_cpu_ptr(pd->pqueue, cpu);
> > - flush_work(&pqueue->work);
> > - }
> > -
> > - if (atomic_read(&pd->reorder_objects))
> > - padata_reorder(pd);
> > + if (!(pd->pinst->flags & PADATA_INIT))
> > + return;
> >
> > - for_each_cpu(cpu, pd->cpumask.cbcpu) {
> > - squeue = per_cpu_ptr(pd->squeue, cpu);
> > - flush_work(&squeue->work);
> > - }
> > + if (atomic_dec_return(&pd->refcnt) == 0)
> > + complete(&pd->flushing_done);
> >
> > - BUG_ON(atomic_read(&pd->refcnt) != 0);
> > + wait_for_completion(&pd->flushing_done);
> > + reinit_completion(&pd->flushing_done);
> > + atomic_set(&pd->refcnt, 1);
> > }
>
> I don't think waiting is an option. In a pathological case the
> hardware may not return at all. We cannot and should not hold off
> CPU hotplug for an arbitrary amount of time when the event we are
> waiting for isn't even occuring on that CPU.
Ok, I hadn't considered hardware not returning.
> I don't think flushing is needed at all. All we need to do is
> maintain consistency before and after the CPU hotplug event.
I could imagine not flushing would work for replacing a pd. The old pd could
be freed by whatever drops the last reference and the new pd could be
installed, all without flushing.
In the case of freeing an instance, though, padata needs to wait for all the
jobs to complete so they don't use the instance's data after it's been freed.
Holding the CPU hotplug lock isn't necessary for this, though, so I think we're
ok to wait here.
On Thu, Sep 05, 2019 at 06:37:56PM -0400, Daniel Jordan wrote:
> On Thu, Sep 05, 2019 at 02:17:35PM +1000, Herbert Xu wrote:
> > I don't think waiting is an option. In a pathological case the
> > hardware may not return at all. We cannot and should not hold off
> > CPU hotplug for an arbitrary amount of time when the event we are
> > waiting for isn't even occuring on that CPU.
>
> Ok, I hadn't considered hardware not returning.
>
> > I don't think flushing is needed at all. All we need to do is
> > maintain consistency before and after the CPU hotplug event.
>
> I could imagine not flushing would work for replacing a pd. The old pd could
> be freed by whatever drops the last reference and the new pd could be
> installed, all without flushing.
>
> In the case of freeing an instance, though, padata needs to wait for all the
> jobs to complete so they don't use the instance's data after it's been freed.
> Holding the CPU hotplug lock isn't necessary for this, though, so I think we're
> ok to wait here.
[FYI, I'm currently on leave until mid-October and will return to this series
then.]