LinuxLists.cc - [PATCH 1/4] padata: Use a timer to handle remaining objects in the reorder queues

2010-05-14 11:45:52

Subject: [PATCH 1/4] padata: Use a timer to handle remaining objects in the reorder queues

padata_get_next needs to check whether the next object that
need serialization must be parallel processed by the local cpu.
This check was wrong implemented and returned always true,
so the try_again loop in padata_reorder was never taken. This
can lead to object leaks in some rare cases due to a race that
appears with the trylock in padata_reorder. The try_again loop
was not a good idea after all, because a cpu could take that
loop frequently, so we handle this with a timer instead.

This patch adds a timer to handle the race that appears with
the trylock. If cpu1 queues an object to the reorder queue while
cpu2 holds the pd->lock but left the while loop in padata_reorder
already, cpu2 can't care for this object and cpu1 exits because
it can't get the lock. Usually the next cpu that takes the lock
cares for this object too. We need the timer just if this object
was the last one that arrives to the reorder queues. The timer
function sends it out in this case.

Signed-off-by: Steffen Klassert <[email protected]>
---
include/linux/padata.h | 2 ++
kernel/padata.c | 25 ++++++++++++++++++-------
2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 51611da..64836a6 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -24,6 +24,7 @@
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <linux/list.h>
+#include <linux/timer.h>

struct padata_priv {
struct list_head list;
@@ -60,6 +61,7 @@ struct parallel_data {
unsigned int max_seq_nr;
cpumask_var_t cpumask;
spinlock_t lock;
+ struct timer_list timer;
};

struct padata_instance {
diff --git a/kernel/padata.c b/kernel/padata.c
index 82958e0..6d7ea48 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -231,7 +231,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
goto out;
}

- if (next_nr % num_cpus == next_queue->cpu_index) {
+ queue = per_cpu_ptr(pd->queue, smp_processor_id());
+ if (queue->cpu_index == next_queue->cpu_index) {
padata = ERR_PTR(-ENODATA);
goto out;
}
@@ -247,9 +248,8 @@ static void padata_reorder(struct parallel_data *pd)
struct padata_queue *queue;
struct padata_instance *pinst = pd->pinst;

-try_again:
if (!spin_trylock_bh(&pd->lock))
- goto out;
+ return;

while (1) {
padata = padata_get_next(pd);
@@ -258,8 +258,9 @@ try_again:
break;

if (PTR_ERR(padata) == -ENODATA) {
+ del_timer(&pd->timer);
spin_unlock_bh(&pd->lock);
- goto out;
+ return;
}

queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
@@ -273,13 +274,22 @@ try_again:

spin_unlock_bh(&pd->lock);

- if (atomic_read(&pd->reorder_objects))
- goto try_again;
+ if (atomic_read(&pd->reorder_objects)
+ && !(pinst->flags & PADATA_RESET))
+ mod_timer(&pd->timer, jiffies + HZ);
+ else
+ del_timer(&pd->timer);

-out:
return;
}

+static void padata_reorder_timer(unsigned long arg)
+{
+ struct parallel_data *pd = (struct parallel_data *)arg;
+
+ padata_reorder(pd);
+}
+
static void padata_serial_worker(struct work_struct *work)
{
struct padata_queue *queue;
@@ -383,6 +393,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
num_cpus = cpumask_weight(pd->cpumask);
pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;

+ setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
atomic_set(&pd->refcnt, 0);
--
1.5.6.5

2010-05-14 11:46:40

by Steffen Klassert

[permalink] [raw]

Subject: [PATCH 2/4] padata: Flush the padata queues actively

yield was used to wait until all references of the internal control
structure in use are dropped before it is freed. This patch implements
padata_flush_queues which actively flushes the padata percpu queues
in this case.

Signed-off-by: Steffen Klassert <[email protected]>
---
kernel/padata.c | 33 +++++++++++++++++++++++++--------
1 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/kernel/padata.c b/kernel/padata.c
index 6d7ea48..ec6b8b7 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -417,6 +417,29 @@ static void padata_free_pd(struct parallel_data *pd)
kfree(pd);
}

+static void padata_flush_queues(struct parallel_data *pd)
+{
+ int cpu;
+ struct padata_queue *queue;
+
+ for_each_cpu(cpu, pd->cpumask) {
+ queue = per_cpu_ptr(pd->queue, cpu);
+ flush_work(&queue->pwork);
+ }
+
+ del_timer_sync(&pd->timer);
+
+ if (atomic_read(&pd->reorder_objects))
+ padata_reorder(pd);
+
+ for_each_cpu(cpu, pd->cpumask) {
+ queue = per_cpu_ptr(pd->queue, cpu);
+ flush_work(&queue->swork);
+ }
+
+ BUG_ON(atomic_read(&pd->refcnt) != 0);
+}
+
static void padata_replace(struct padata_instance *pinst,
struct parallel_data *pd_new)
{
@@ -428,11 +451,7 @@ static void padata_replace(struct padata_instance *pinst,

synchronize_rcu();

- while (atomic_read(&pd_old->refcnt) != 0)
- yield();
-
- flush_workqueue(pinst->wq);
-
+ padata_flush_queues(pd_old);
padata_free_pd(pd_old);

pinst->flags &= ~PADATA_RESET;
@@ -695,12 +714,10 @@ void padata_free(struct padata_instance *pinst)

synchronize_rcu();

- while (atomic_read(&pinst->pd->refcnt) != 0)
- yield();
-
#ifdef CONFIG_HOTPLUG_CPU
unregister_hotcpu_notifier(&pinst->cpu_notifier);
#endif
+ padata_flush_queues(pinst->pd);
padata_free_pd(pinst->pd);
free_cpumask_var(pinst->cpumask);
kfree(pinst);
--
1.5.6.5

2010-05-14 11:48:20

by Steffen Klassert

[permalink] [raw]

Subject: [PATCH 3/4] padata: Add some code comments

Signed-off-by: Steffen Klassert <[email protected]>
---
include/linux/padata.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++
kernel/padata.c | 50 +++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 64836a6..e8aac0f 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -26,6 +26,17 @@
#include <linux/list.h>
#include <linux/timer.h>

+/*
+ * struct padata_priv - Embedded to the users data structure.
+ *
+ * @list: List entry, to attach to the padata lists.
+ * @pd: Pointer to the internal control structure.
+ * @cb_cpu: Callback cpu for serializatioon.
+ * @seq_nr: Sequence number of the parallelized data object.
+ * @info: Used to pass information from the parallel to the serial function.
+ * @parallel: Parallel execution function.
+ * @serial: Serial complete function.
+ */
struct padata_priv {
struct list_head list;
struct parallel_data *pd;
@@ -36,11 +47,29 @@ struct padata_priv {
void (*serial)(struct padata_priv *padata);
};

+/*
+ * struct padata_list
+ *
+ * @list: List head.
+ * @lock: List lock.
+ */
struct padata_list {
struct list_head list;
spinlock_t lock;
};

+/*
+ * struct padata_queue - The percpu padata queues.
+ *
+ * @parallel: List to wait for parallelization.
+ * @reorder: List to wait for reordering after parallel processing.
+ * @serial: List to wait for serialization after reordering.
+ * @pwork: work struct for parallelization.
+ * @swork: work struct for serialization.
+ * @pd: Backpointer to the internal control structure.
+ * @num_obj: Number of objects that are processed by this cpu.
+ * @cpu_index: Index of the cpu.
+ */
struct padata_queue {
struct padata_list parallel;
struct padata_list reorder;
@@ -52,6 +81,20 @@ struct padata_queue {
int cpu_index;
};

+/*
+ * struct parallel_data - Internal control structure, covers everything
+ * that depends on the cpumask in use.
+ *
+ * @pinst: padata instance.
+ * @queue: percpu padata queues.
+ * @seq_nr: The sequence number that will be attached to the next object.
+ * @reorder_objects: Number of objects waiting in the reorder queues.
+ * @refcnt: Number of objects holding a reference on this parallel_data.
+ * @max_seq_nr: Maximal used sequence number.
+ * @cpumask: cpumask in use.
+ * @lock: Reorder lock.
+ * @timer: Reorder timer.
+ */
struct parallel_data {
struct padata_instance *pinst;
struct padata_queue *queue;
@@ -64,6 +107,16 @@ struct parallel_data {
struct timer_list timer;
};

+/*
+ * struct padata_instance - The overall control structure.
+ *
+ * @cpu_notifier: cpu hotplug notifier.
+ * @wq: The workqueue in use.
+ * @pd: The internal control structure.
+ * @cpumask: User supplied cpumask.
+ * @lock: padata instance lock.
+ * @flags: padata flags.
+ */
struct padata_instance {
struct notifier_block cpu_notifier;
struct workqueue_struct *wq;
diff --git a/kernel/padata.c b/kernel/padata.c
index ec6b8b7..629bef3 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -152,6 +152,23 @@ out:
}
EXPORT_SYMBOL(padata_do_parallel);

+/*
+ * padata_get_next - Get the next object that needs serialization.
+ *
+ * Return values are:
+ *
+ * A pointer to the control struct of the next object that needs
+ * serialization, if present in one of the percpu reorder queues.
+ *
+ * NULL, if all percpu reorder queues are empty.
+ *
+ * -EINPROGRESS, if the next object that needs serialization will
+ * be parallel processed by another cpu and is not yet present in
+ * the cpu's reorder queue.
+ *
+ * -ENODATA, if this cpu has to do the parallel processing for
+ * the next object.
+ */
static struct padata_priv *padata_get_next(struct parallel_data *pd)
{
int cpu, num_cpus, empty, calc_seq_nr;
@@ -173,7 +190,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)

/*
* Calculate the seq_nr of the object that should be
- * next in this queue.
+ * next in this reorder queue.
*/
overrun = 0;
calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
@@ -248,15 +265,36 @@ static void padata_reorder(struct parallel_data *pd)
struct padata_queue *queue;
struct padata_instance *pinst = pd->pinst;

+ /*
+ * We need to ensure that only one cpu can work on dequeueing of
+ * the reorder queue the time. Calculating in which percpu reorder
+ * queue the next object will arrive takes some time. A spinlock
+ * would be highly contended. Also it is not clear in which order
+ * the objects arrive to the reorder queues. So a cpu could wait to
+ * get the lock just to notice that there is nothing to do at the
+ * moment. Therefore we use a trylock and let the holder of the lock
+ * care for all the objects enqueued during the holdtime of the lock.
+ */
if (!spin_trylock_bh(&pd->lock))
return;

while (1) {
padata = padata_get_next(pd);

+ /*
+ * All reorder queues are empty, or the next object that needs
+ * serialization is parallel processed by another cpu and is
+ * still on it's way to the cpu's reorder queue, nothing to
+ * do for now.
+ */
if (!padata || PTR_ERR(padata) == -EINPROGRESS)
break;

+ /*
+ * This cpu has to do the parallel processing of the next
+ * object. It's waiting in the cpu's parallelization queue,
+ * so exit imediately.
+ */
if (PTR_ERR(padata) == -ENODATA) {
del_timer(&pd->timer);
spin_unlock_bh(&pd->lock);
@@ -274,6 +312,11 @@ static void padata_reorder(struct parallel_data *pd)

spin_unlock_bh(&pd->lock);

+ /*
+ * The next object that needs serialization might have arrived to
+ * the reorder queues in the meantime, we will be called again
+ * from the timer function if noone else cares for it.
+ */
if (atomic_read(&pd->reorder_objects)
&& !(pinst->flags & PADATA_RESET))
mod_timer(&pd->timer, jiffies + HZ);
@@ -348,6 +391,7 @@ void padata_do_serial(struct padata_priv *padata)
}
EXPORT_SYMBOL(padata_do_serial);

+/* Allocate and initialize the internal cpumask dependend resources. */
static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
const struct cpumask *cpumask)
{
@@ -417,6 +461,7 @@ static void padata_free_pd(struct parallel_data *pd)
kfree(pd);
}

+/* Flush all objects out of the padata queues. */
static void padata_flush_queues(struct parallel_data *pd)
{
int cpu;
@@ -440,6 +485,7 @@ static void padata_flush_queues(struct parallel_data *pd)
BUG_ON(atomic_read(&pd->refcnt) != 0);
}

+/* Replace the internal control stucture with a new one. */
static void padata_replace(struct padata_instance *pinst,
struct parallel_data *pd_new)
{
@@ -706,7 +752,7 @@ EXPORT_SYMBOL(padata_alloc);
/*
* padata_free - free a padata instance
*
- * @ padata_inst: padata instance to free
+ * @padata_inst: padata instance to free
*/
void padata_free(struct padata_instance *pinst)
{
--
1.5.6.5

2010-05-14 11:48:59

by Steffen Klassert

[permalink] [raw]

Subject: [PATCH 4/4] padata: Use get_online_cpus/put_online_cpus in padata_free

Add get_online_cpus/put_online_cpus to ensure that no cpu goes
offline during the flushing of the padata percpu queues.

Signed-off-by: Steffen Klassert <[email protected]>
---
kernel/padata.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/kernel/padata.c b/kernel/padata.c
index 629bef3..1f40560 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -763,7 +763,10 @@ void padata_free(struct padata_instance *pinst)
#ifdef CONFIG_HOTPLUG_CPU
unregister_hotcpu_notifier(&pinst->cpu_notifier);
#endif
+ get_online_cpus();
padata_flush_queues(pinst->pd);
+ put_online_cpus();
+
padata_free_pd(pinst->pd);
free_cpumask_var(pinst->cpumask);
kfree(pinst);
--
1.5.6.5

2010-05-14 16:21:30

by Randy Dunlap

[permalink] [raw]

Subject: Re: [PATCH 3/4] padata: Add some code comments

On Fri, 14 May 2010 13:46:06 +0200 Steffen Klassert wrote:

>
> Signed-off-by: Steffen Klassert <[email protected]>
> ---
> include/linux/padata.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++
> kernel/padata.c | 50 +++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 101 insertions(+), 2 deletions(-)

Hi Steffen,

These comments are roughly 90% of the way to being kernel-doc notation,
so how about going the rest of the way, please?

> diff --git a/include/linux/padata.h b/include/linux/padata.h
> index 64836a6..e8aac0f 100644
> --- a/include/linux/padata.h
> +++ b/include/linux/padata.h
> @@ -26,6 +26,17 @@
> #include <linux/list.h>
> #include <linux/timer.h>
>
> +/*

Use:
/**
in multiple places.

> + * struct padata_priv - Embedded to the users data structure.
> + *
> + * @list: List entry, to attach to the padata lists.
> + * @pd: Pointer to the internal control structure.
> + * @cb_cpu: Callback cpu for serializatioon.
> + * @seq_nr: Sequence number of the parallelized data object.
> + * @info: Used to pass information from the parallel to the serial function.
> + * @parallel: Parallel execution function.
> + * @serial: Serial complete function.
> + */
> struct padata_priv {
> struct list_head list;
> struct parallel_data *pd;
> @@ -36,11 +47,29 @@ struct padata_priv {
> void (*serial)(struct padata_priv *padata);
> };
>
> +/*
> + * struct padata_list
> + *
> + * @list: List head.
> + * @lock: List lock.
> + */
> struct padata_list {
> struct list_head list;
> spinlock_t lock;
> };
>
> +/*
> + * struct padata_queue - The percpu padata queues.
> + *
> + * @parallel: List to wait for parallelization.
> + * @reorder: List to wait for reordering after parallel processing.
> + * @serial: List to wait for serialization after reordering.
> + * @pwork: work struct for parallelization.
> + * @swork: work struct for serialization.
> + * @pd: Backpointer to the internal control structure.
> + * @num_obj: Number of objects that are processed by this cpu.
> + * @cpu_index: Index of the cpu.
> + */
> struct padata_queue {
> struct padata_list parallel;
> struct padata_list reorder;
> @@ -52,6 +81,20 @@ struct padata_queue {
> int cpu_index;
> };
>
> +/*
> + * struct parallel_data - Internal control structure, covers everything
> + * that depends on the cpumask in use.
> + *
> + * @pinst: padata instance.
> + * @queue: percpu padata queues.
> + * @seq_nr: The sequence number that will be attached to the next object.
> + * @reorder_objects: Number of objects waiting in the reorder queues.
> + * @refcnt: Number of objects holding a reference on this parallel_data.
> + * @max_seq_nr: Maximal used sequence number.
> + * @cpumask: cpumask in use.
> + * @lock: Reorder lock.
> + * @timer: Reorder timer.
> + */
> struct parallel_data {
> struct padata_instance *pinst;
> struct padata_queue *queue;
> @@ -64,6 +107,16 @@ struct parallel_data {
> struct timer_list timer;
> };
>
> +/*
> + * struct padata_instance - The overall control structure.
> + *
> + * @cpu_notifier: cpu hotplug notifier.
> + * @wq: The workqueue in use.
> + * @pd: The internal control structure.
> + * @cpumask: User supplied cpumask.
> + * @lock: padata instance lock.
> + * @flags: padata flags.
> + */
> struct padata_instance {
> struct notifier_block cpu_notifier;
> struct workqueue_struct *wq;
> diff --git a/kernel/padata.c b/kernel/padata.c
> index ec6b8b7..629bef3 100644
> --- a/kernel/padata.c
> +++ b/kernel/padata.c
> @@ -152,6 +152,23 @@ out:
> }
> EXPORT_SYMBOL(padata_do_parallel);
>
> +/*

/**

> + * padata_get_next - Get the next object that needs serialization.

* @pd: <parameter description>

> + *
> + * Return values are:
> + *
> + * A pointer to the control struct of the next object that needs
> + * serialization, if present in one of the percpu reorder queues.
> + *
> + * NULL, if all percpu reorder queues are empty.
> + *
> + * -EINPROGRESS, if the next object that needs serialization will
> + * be parallel processed by another cpu and is not yet present in
> + * the cpu's reorder queue.
> + *
> + * -ENODATA, if this cpu has to do the parallel processing for
> + * the next object.
> + */
> static struct padata_priv *padata_get_next(struct parallel_data *pd)
> {
> int cpu, num_cpus, empty, calc_seq_nr;
> @@ -173,7 +190,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
>
> /*
> * Calculate the seq_nr of the object that should be
> - * next in this queue.
> + * next in this reorder queue.
> */
> overrun = 0;
> calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)

Thanks.

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

2010-05-17 07:04:46

by Steffen Klassert

[permalink] [raw]

Subject: Re: [PATCH 3/4] padata: Add some code comments

Hi Randy,

On Fri, May 14, 2010 at 09:18:57AM -0700, Randy Dunlap wrote:
>
> Hi Steffen,
>
> These comments are roughly 90% of the way to being kernel-doc notation,
> so how about going the rest of the way, please?
>

yes of course we can. I read Documentation/kernel-doc-nano-HOWTO.txt,
so I need to use /** for all comments that I want to add to kernel-doc.
Anything else to do?

>
> Use:
> /**
> in multiple places.
>

I wondered several times why some comments start with /** while
others start with /*. Anyway, I did not pay too much attention
to this, because it was just the beginning line of a comment.
Now I know the difference :)

> /**
>
> > + * padata_get_next - Get the next object that needs serialization.
>

This one is a static function, I would not mind to add it to kernel-doc
too, but that's probaply not needed.

I'll resend this one with the suggested changes,
thanks for pointing this out.

Steffen

2010-05-18 05:51:09

by Steffen Klassert

[permalink] [raw]

Subject: [PATCH 3/4 v2] padata: Add some code comments

Signed-off-by: Steffen Klassert <[email protected]>
---
include/linux/padata.h | 53 +++++++++++++++++++++++++++++++++++++
kernel/padata.c | 68 ++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 64836a6..8d84062 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -26,6 +26,17 @@
#include <linux/list.h>
#include <linux/timer.h>

+/**
+ * struct padata_priv - Embedded to the users data structure.
+ *
+ * @list: List entry, to attach to the padata lists.
+ * @pd: Pointer to the internal control structure.
+ * @cb_cpu: Callback cpu for serializatioon.
+ * @seq_nr: Sequence number of the parallelized data object.
+ * @info: Used to pass information from the parallel to the serial function.
+ * @parallel: Parallel execution function.
+ * @serial: Serial complete function.
+ */
struct padata_priv {
struct list_head list;
struct parallel_data *pd;
@@ -36,11 +47,29 @@ struct padata_priv {
void (*serial)(struct padata_priv *padata);
};

+/**
+ * struct padata_list
+ *
+ * @list: List head.
+ * @lock: List lock.
+ */
struct padata_list {
struct list_head list;
spinlock_t lock;
};

+/**
+ * struct padata_queue - The percpu padata queues.
+ *
+ * @parallel: List to wait for parallelization.
+ * @reorder: List to wait for reordering after parallel processing.
+ * @serial: List to wait for serialization after reordering.
+ * @pwork: work struct for parallelization.
+ * @swork: work struct for serialization.
+ * @pd: Backpointer to the internal control structure.
+ * @num_obj: Number of objects that are processed by this cpu.
+ * @cpu_index: Index of the cpu.
+ */
struct padata_queue {
struct padata_list parallel;
struct padata_list reorder;
@@ -52,6 +81,20 @@ struct padata_queue {
int cpu_index;
};

+/**
+ * struct parallel_data - Internal control structure, covers everything
+ * that depends on the cpumask in use.
+ *
+ * @pinst: padata instance.
+ * @queue: percpu padata queues.
+ * @seq_nr: The sequence number that will be attached to the next object.
+ * @reorder_objects: Number of objects waiting in the reorder queues.
+ * @refcnt: Number of objects holding a reference on this parallel_data.
+ * @max_seq_nr: Maximal used sequence number.
+ * @cpumask: cpumask in use.
+ * @lock: Reorder lock.
+ * @timer: Reorder timer.
+ */
struct parallel_data {
struct padata_instance *pinst;
struct padata_queue *queue;
@@ -64,6 +107,16 @@ struct parallel_data {
struct timer_list timer;
};

+/**
+ * struct padata_instance - The overall control structure.
+ *
+ * @cpu_notifier: cpu hotplug notifier.
+ * @wq: The workqueue in use.
+ * @pd: The internal control structure.
+ * @cpumask: User supplied cpumask.
+ * @lock: padata instance lock.
+ * @flags: padata flags.
+ */
struct padata_instance {
struct notifier_block cpu_notifier;
struct workqueue_struct *wq;
diff --git a/kernel/padata.c b/kernel/padata.c
index ec6b8b7..ca89dfb 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -88,7 +88,7 @@ static void padata_parallel_worker(struct work_struct *work)
local_bh_enable();
}

-/*
+/**
* padata_do_parallel - padata parallelization function
*
* @pinst: padata instance
@@ -152,6 +152,23 @@ out:
}
EXPORT_SYMBOL(padata_do_parallel);

+/*
+ * padata_get_next - Get the next object that needs serialization.
+ *
+ * Return values are:
+ *
+ * A pointer to the control struct of the next object that needs
+ * serialization, if present in one of the percpu reorder queues.
+ *
+ * NULL, if all percpu reorder queues are empty.
+ *
+ * -EINPROGRESS, if the next object that needs serialization will
+ * be parallel processed by another cpu and is not yet present in
+ * the cpu's reorder queue.
+ *
+ * -ENODATA, if this cpu has to do the parallel processing for
+ * the next object.
+ */
static struct padata_priv *padata_get_next(struct parallel_data *pd)
{
int cpu, num_cpus, empty, calc_seq_nr;
@@ -173,7 +190,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)

/*
* Calculate the seq_nr of the object that should be
- * next in this queue.
+ * next in this reorder queue.
*/
overrun = 0;
calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
@@ -248,15 +265,36 @@ static void padata_reorder(struct parallel_data *pd)
struct padata_queue *queue;
struct padata_instance *pinst = pd->pinst;

+ /*
+ * We need to ensure that only one cpu can work on dequeueing of
+ * the reorder queue the time. Calculating in which percpu reorder
+ * queue the next object will arrive takes some time. A spinlock
+ * would be highly contended. Also it is not clear in which order
+ * the objects arrive to the reorder queues. So a cpu could wait to
+ * get the lock just to notice that there is nothing to do at the
+ * moment. Therefore we use a trylock and let the holder of the lock
+ * care for all the objects enqueued during the holdtime of the lock.
+ */
if (!spin_trylock_bh(&pd->lock))
return;

while (1) {
padata = padata_get_next(pd);

+ /*
+ * All reorder queues are empty, or the next object that needs
+ * serialization is parallel processed by another cpu and is
+ * still on it's way to the cpu's reorder queue, nothing to
+ * do for now.
+ */
if (!padata || PTR_ERR(padata) == -EINPROGRESS)
break;

+ /*
+ * This cpu has to do the parallel processing of the next
+ * object. It's waiting in the cpu's parallelization queue,
+ * so exit imediately.
+ */
if (PTR_ERR(padata) == -ENODATA) {
del_timer(&pd->timer);
spin_unlock_bh(&pd->lock);
@@ -274,6 +312,11 @@ static void padata_reorder(struct parallel_data *pd)

spin_unlock_bh(&pd->lock);

+ /*
+ * The next object that needs serialization might have arrived to
+ * the reorder queues in the meantime, we will be called again
+ * from the timer function if noone else cares for it.
+ */
if (atomic_read(&pd->reorder_objects)
&& !(pinst->flags & PADATA_RESET))
mod_timer(&pd->timer, jiffies + HZ);
@@ -318,7 +361,7 @@ static void padata_serial_worker(struct work_struct *work)
local_bh_enable();
}

-/*
+/**
* padata_do_serial - padata serialization function
*
* @padata: object to be serialized.
@@ -348,6 +391,7 @@ void padata_do_serial(struct padata_priv *padata)
}
EXPORT_SYMBOL(padata_do_serial);

+/* Allocate and initialize the internal cpumask dependend resources. */
static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
const struct cpumask *cpumask)
{
@@ -417,6 +461,7 @@ static void padata_free_pd(struct parallel_data *pd)
kfree(pd);
}

+/* Flush all objects out of the padata queues. */
static void padata_flush_queues(struct parallel_data *pd)
{
int cpu;
@@ -440,6 +485,7 @@ static void padata_flush_queues(struct parallel_data *pd)
BUG_ON(atomic_read(&pd->refcnt) != 0);
}

+/* Replace the internal control stucture with a new one. */
static void padata_replace(struct padata_instance *pinst,
struct parallel_data *pd_new)
{
@@ -457,7 +503,7 @@ static void padata_replace(struct padata_instance *pinst,
pinst->flags &= ~PADATA_RESET;
}

-/*
+/**
* padata_set_cpumask - set the cpumask that padata should use
*
* @pinst: padata instance
@@ -507,7 +553,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
return 0;
}

-/*
+/**
* padata_add_cpu - add a cpu to the padata cpumask
*
* @pinst: padata instance
@@ -545,7 +591,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
return 0;
}

-/*
+/**
* padata_remove_cpu - remove a cpu from the padata cpumask
*
* @pinst: padata instance
@@ -568,7 +614,7 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
}
EXPORT_SYMBOL(padata_remove_cpu);

-/*
+/**
* padata_start - start the parallel processing
*
* @pinst: padata instance to start
@@ -581,7 +627,7 @@ void padata_start(struct padata_instance *pinst)
}
EXPORT_SYMBOL(padata_start);

-/*
+/**
* padata_stop - stop the parallel processing
*
* @pinst: padata instance to stop
@@ -648,7 +694,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
}
#endif

-/*
+/**
* padata_alloc - allocate and initialize a padata instance
*
* @cpumask: cpumask that padata uses for parallelization
@@ -703,10 +749,10 @@ err:
}
EXPORT_SYMBOL(padata_alloc);

-/*
+/**
* padata_free - free a padata instance
*
- * @ padata_inst: padata instance to free
+ * @padata_inst: padata instance to free
*/
void padata_free(struct padata_instance *pinst)
{
--
1.5.6.5

2010-05-19 03:46:10

by Herbert Xu

[permalink] [raw]

Subject: Re: [PATCH 3/4 v2] padata: Add some code comments

On Tue, May 18, 2010 at 07:49:12AM +0200, Steffen Klassert wrote:
>
> Signed-off-by: Steffen Klassert <[email protected]>

All four patches applied. Thanks a lot!
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt