allocating memory for every cpu for single workqueue is waste.
Signed-off-by: Lai Jiangshan <[email protected]>
---
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2f44583..ecd693d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -99,7 +99,7 @@ static
struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
{
if (unlikely(is_wq_single_threaded(wq)))
- cpu = singlethread_cpu;
+ return wq->cpu_wq;
return per_cpu_ptr(wq->cpu_wq, cpu);
}
@@ -417,7 +417,7 @@ void flush_workqueue(struct workqueue_struct *wq)
lock_map_acquire(&wq->lockdep_map);
lock_map_release(&wq->lockdep_map);
for_each_cpu_mask_nr(cpu, *cpu_map)
- flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
+ flush_cpu_workqueue(wq_per_cpu(wq, cpu));
}
EXPORT_SYMBOL_GPL(flush_workqueue);
@@ -548,7 +548,7 @@ static void wait_on_work(struct work_struct *work)
cpu_map = wq_cpu_map(wq);
for_each_cpu_mask_nr(cpu, *cpu_map)
- wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+ wait_on_cpu_work(wq_per_cpu(wq, cpu), work);
}
static int __cancel_work_timer(struct work_struct *work,
@@ -752,17 +752,13 @@ int current_is_keventd(void)
}
-static struct cpu_workqueue_struct *
-init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
+static void init_cpu_workqueue(struct workqueue_struct *wq,
+ struct cpu_workqueue_struct *cwq)
{
- struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
-
cwq->wq = wq;
spin_lock_init(&cwq->lock);
INIT_LIST_HEAD(&cwq->worklist);
init_waitqueue_head(&cwq->more_work);
-
- return cwq;
}
static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
@@ -816,7 +812,10 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
if (!wq)
return NULL;
- wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
+ if (singlethread)
+ wq->cpu_wq = kmalloc(sizeof(*cwq), GFP_KERNEL);
+ else
+ wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
if (!wq->cpu_wq) {
kfree(wq);
return NULL;
@@ -830,7 +829,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
INIT_LIST_HEAD(&wq->list);
if (singlethread) {
- cwq = init_cpu_workqueue(wq, singlethread_cpu);
+ cwq = wq->cpu_wq;
+ init_cpu_workqueue(wq, cwq);
err = create_workqueue_thread(cwq, singlethread_cpu);
start_workqueue_thread(cwq, -1);
} else {
@@ -851,7 +851,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
* lock.
*/
for_each_possible_cpu(cpu) {
- cwq = init_cpu_workqueue(wq, cpu);
+ cwq = per_cpu_ptr(wq->cpu_wq, cpu);
+ init_cpu_workqueue(wq, cwq);
if (err || !cpu_online(cpu))
continue;
err = create_workqueue_thread(cwq, cpu);
@@ -906,6 +907,13 @@ void destroy_workqueue(struct workqueue_struct *wq)
const struct cpumask *cpu_map = wq_cpu_map(wq);
int cpu;
+ if (is_wq_single_threaded(wq)) {
+ cleanup_workqueue_thread(wq->cpu_wq);
+ kfree(wq->cpu_wq);
+ kfree(wq);
+ return;
+ }
+
cpu_maps_update_begin();
spin_lock(&workqueue_lock);
list_del(&wq->list);
2009/1/21 Lai Jiangshan <[email protected]>:
>
> allocating memory for every cpu for single workqueue is waste.
>
> Signed-off-by: Lai Jiangshan <[email protected]>
> ---
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index 2f44583..ecd693d 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -99,7 +99,7 @@ static
> struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
> {
> if (unlikely(is_wq_single_threaded(wq)))
> - cpu = singlethread_cpu;
> + return wq->cpu_wq;
> return per_cpu_ptr(wq->cpu_wq, cpu);
> }
>
> @@ -417,7 +417,7 @@ void flush_workqueue(struct workqueue_struct *wq)
> lock_map_acquire(&wq->lockdep_map);
> lock_map_release(&wq->lockdep_map);
> for_each_cpu_mask_nr(cpu, *cpu_map)
> - flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
> + flush_cpu_workqueue(wq_per_cpu(wq, cpu));
> }
> EXPORT_SYMBOL_GPL(flush_workqueue);
>
> @@ -548,7 +548,7 @@ static void wait_on_work(struct work_struct *work)
> cpu_map = wq_cpu_map(wq);
>
> for_each_cpu_mask_nr(cpu, *cpu_map)
> - wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
> + wait_on_cpu_work(wq_per_cpu(wq, cpu), work);
> }
>
> static int __cancel_work_timer(struct work_struct *work,
> @@ -752,17 +752,13 @@ int current_is_keventd(void)
>
> }
>
> -static struct cpu_workqueue_struct *
> -init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
> +static void init_cpu_workqueue(struct workqueue_struct *wq,
> + struct cpu_workqueue_struct *cwq)
> {
> - struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
> -
> cwq->wq = wq;
> spin_lock_init(&cwq->lock);
> INIT_LIST_HEAD(&cwq->worklist);
> init_waitqueue_head(&cwq->more_work);
> -
> - return cwq;
> }
>
> static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
> @@ -816,7 +812,10 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
> if (!wq)
> return NULL;
>
> - wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
> + if (singlethread)
> + wq->cpu_wq = kmalloc(sizeof(*cwq), GFP_KERNEL);
> + else
> + wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
> if (!wq->cpu_wq) {
> kfree(wq);
> return NULL;
> @@ -830,7 +829,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
> INIT_LIST_HEAD(&wq->list);
>
> if (singlethread) {
> - cwq = init_cpu_workqueue(wq, singlethread_cpu);
> + cwq = wq->cpu_wq;
> + init_cpu_workqueue(wq, cwq);
> err = create_workqueue_thread(cwq, singlethread_cpu);
> start_workqueue_thread(cwq, -1);
> } else {
> @@ -851,7 +851,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
> * lock.
> */
> for_each_possible_cpu(cpu) {
> - cwq = init_cpu_workqueue(wq, cpu);
> + cwq = per_cpu_ptr(wq->cpu_wq, cpu);
> + init_cpu_workqueue(wq, cwq);
> if (err || !cpu_online(cpu))
> continue;
> err = create_workqueue_thread(cwq, cpu);
> @@ -906,6 +907,13 @@ void destroy_workqueue(struct workqueue_struct *wq)
> const struct cpumask *cpu_map = wq_cpu_map(wq);
> int cpu;
>
> + if (is_wq_single_threaded(wq)) {
> + cleanup_workqueue_thread(wq->cpu_wq);
> + kfree(wq->cpu_wq);
> + kfree(wq);
> + return;
> + }
> +
> cpu_maps_update_begin();
> spin_lock(&workqueue_lock);
> list_del(&wq->list);
>
Looks like a nice catch!
* Lai Jiangshan <[email protected]> wrote:
> allocating memory for every cpu for single workqueue is waste.
good idea.
One detail:
> @@ -906,6 +907,13 @@ void destroy_workqueue(struct workqueue_struct *wq)
> const struct cpumask *cpu_map = wq_cpu_map(wq);
> int cpu;
>
> + if (is_wq_single_threaded(wq)) {
> + cleanup_workqueue_thread(wq->cpu_wq);
> + kfree(wq->cpu_wq);
> + kfree(wq);
> + return;
> + }
> +
> cpu_maps_update_begin();
> spin_lock(&workqueue_lock);
> list_del(&wq->list);
Arent we forgetting to remove the workqueue from the &workqueues list in
the new single-thread case?
Also, see the checkpatch output from kernel/workqueue.c - the warnings
below are correct and should be cleaned up.
Ingo
WARNING: printk() should include KERN_ facility level
#268: FILE: workqueue.c:268:
+ printk("%s: recursion depth exceeded: %d\n",
ERROR: code indent should use tabs where possible
#304: FILE: workqueue.c:304:
+^I^I^I^I ^Itask_pid_nr(current));$
ERROR: "foo* bar" should be "foo *bar"
#555: FILE: workqueue.c:555:
+ struct timer_list* timer)
ERROR: code indent should use tabs where possible
#916: FILE: workqueue.c:916:
+ ^Icpu_maps_update_done();$
kernel/workqueue.c has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
On 01/21, Lai Jiangshan wrote:
>
> allocating memory for every cpu for single workqueue is waste.
Yes, perhaps this makes sense, we can save a bit of per-cpu memory
for each single-threaded wq, and the patch looks correct.
> -static struct cpu_workqueue_struct *
> -init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
> +static void init_cpu_workqueue(struct workqueue_struct *wq,
> + struct cpu_workqueue_struct *cwq)
> {
> - struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
> -
> cwq->wq = wq;
> spin_lock_init(&cwq->lock);
> INIT_LIST_HEAD(&cwq->worklist);
> init_waitqueue_head(&cwq->more_work);
> -
> - return cwq;
> }
Do we really need to change the prototype of init_cpu_workqueue()
and change then change __create_workqueue_key() accordingly?
Afaics, the only change in init_cpu_workqueue() we need is
- struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
+ struct cpu_workqueue_struct *cwq = wq_per_cpu(wq, cpu);
no?
> @@ -906,6 +907,13 @@ void destroy_workqueue(struct workqueue_struct *wq)
> const struct cpumask *cpu_map = wq_cpu_map(wq);
> int cpu;
>
> + if (is_wq_single_threaded(wq)) {
> + cleanup_workqueue_thread(wq->cpu_wq);
> + kfree(wq->cpu_wq);
> + kfree(wq);
> + return;
> + }
again, not sure I understand why this change is needed. Afaics we
only need to use kfree(wq->cpu_wq) instead of free_percpu() if
it is single-threaded.
Oleg.
Lai Jiangshan a écrit :
> allocating memory for every cpu for single workqueue is waste.
>
> Signed-off-by: Lai Jiangshan <[email protected]>
> ---
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index 2f44583..ecd693d 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -99,7 +99,7 @@ static
> struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
> {
> if (unlikely(is_wq_single_threaded(wq)))
> - cpu = singlethread_cpu;
> + return wq->cpu_wq;
> return per_cpu_ptr(wq->cpu_wq, cpu);
> }
>
> @@ -417,7 +417,7 @@ void flush_workqueue(struct workqueue_struct *wq)
> lock_map_acquire(&wq->lockdep_map);
> lock_map_release(&wq->lockdep_map);
> for_each_cpu_mask_nr(cpu, *cpu_map)
> - flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
> + flush_cpu_workqueue(wq_per_cpu(wq, cpu));
> }
> EXPORT_SYMBOL_GPL(flush_workqueue);
>
> @@ -548,7 +548,7 @@ static void wait_on_work(struct work_struct *work)
> cpu_map = wq_cpu_map(wq);
>
> for_each_cpu_mask_nr(cpu, *cpu_map)
> - wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
> + wait_on_cpu_work(wq_per_cpu(wq, cpu), work);
> }
>
> static int __cancel_work_timer(struct work_struct *work,
> @@ -752,17 +752,13 @@ int current_is_keventd(void)
>
> }
>
> -static struct cpu_workqueue_struct *
> -init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
> +static void init_cpu_workqueue(struct workqueue_struct *wq,
> + struct cpu_workqueue_struct *cwq)
> {
> - struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
> -
> cwq->wq = wq;
> spin_lock_init(&cwq->lock);
> INIT_LIST_HEAD(&cwq->worklist);
> init_waitqueue_head(&cwq->more_work);
> -
> - return cwq;
> }
>
> static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
> @@ -816,7 +812,10 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
> if (!wq)
> return NULL;
>
> - wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
> + if (singlethread)
> + wq->cpu_wq = kmalloc(sizeof(*cwq), GFP_KERNEL);
kzalloc() here, since alloc_percpu() does the zeroing for you.
Or else run_depth is not initialized for example.
> + else
> + wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
> if (!wq->cpu_wq) {
> kfree(wq);
> return NULL;
Ingo Molnar wrote:
> * Lai Jiangshan <[email protected]> wrote:
>
>> allocating memory for every cpu for single workqueue is waste.
>
> good idea.
>
> One detail:
>
>> @@ -906,6 +907,13 @@ void destroy_workqueue(struct workqueue_struct *wq)
>> const struct cpumask *cpu_map = wq_cpu_map(wq);
>> int cpu;
>>
>> + if (is_wq_single_threaded(wq)) {
>> + cleanup_workqueue_thread(wq->cpu_wq);
>> + kfree(wq->cpu_wq);
>> + kfree(wq);
>> + return;
>> + }
>> +
>> cpu_maps_update_begin();
>> spin_lock(&workqueue_lock);
>> list_del(&wq->list);
>
> Arent we forgetting to remove the workqueue from the &workqueues list in
> the new single-thread case?
Single-thread workqueue is not inserted into &workqueues list.
See also the single thread case in __create_workqueue_key():
.....
if (singlethread) {
cwq = wq->cpu_wq;
init_cpu_workqueue(wq, cwq);
err = create_workqueue_thread(cwq, singlethread_cpu);
start_workqueue_thread(cwq, -1);
}
.....
>
> Also, see the checkpatch output from kernel/workqueue.c - the warnings
> below are correct and should be cleaned up.
>
> Ingo
>
> WARNING: printk() should include KERN_ facility level
> #268: FILE: workqueue.c:268:
> + printk("%s: recursion depth exceeded: %d\n",
>
> ERROR: code indent should use tabs where possible
> #304: FILE: workqueue.c:304:
> +^I^I^I^I ^Itask_pid_nr(current));$
>
> ERROR: "foo* bar" should be "foo *bar"
> #555: FILE: workqueue.c:555:
> + struct timer_list* timer)
>
> ERROR: code indent should use tabs where possible
> #916: FILE: workqueue.c:916:
> + ^Icpu_maps_update_done();$
>
> kernel/workqueue.c has style problems, please review. If any of these errors
> are false positives report them to the maintainer, see
> CHECKPATCH in MAINTAINERS.
>
>
>
Oleg Nesterov wrote:
> On 01/21, Lai Jiangshan wrote:
>> allocating memory for every cpu for single workqueue is waste.
>
> Yes, perhaps this makes sense, we can save a bit of per-cpu memory
> for each single-threaded wq, and the patch looks correct.
>
>> -static struct cpu_workqueue_struct *
>> -init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
>> +static void init_cpu_workqueue(struct workqueue_struct *wq,
>> + struct cpu_workqueue_struct *cwq)
>> {
>> - struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
>> -
>> cwq->wq = wq;
>> spin_lock_init(&cwq->lock);
>> INIT_LIST_HEAD(&cwq->worklist);
>> init_waitqueue_head(&cwq->more_work);
>> -
>> - return cwq;
>> }
>
> Do we really need to change the prototype of init_cpu_workqueue()
> and change then change __create_workqueue_key() accordingly?
> Afaics, the only change in init_cpu_workqueue() we need is
>
> - struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
> + struct cpu_workqueue_struct *cwq = wq_per_cpu(wq, cpu);
>
> no?
Thanks, it is simpler.
>
>> @@ -906,6 +907,13 @@ void destroy_workqueue(struct workqueue_struct *wq)
>> const struct cpumask *cpu_map = wq_cpu_map(wq);
>> int cpu;
>>
>> + if (is_wq_single_threaded(wq)) {
>> + cleanup_workqueue_thread(wq->cpu_wq);
>> + kfree(wq->cpu_wq);
>> + kfree(wq);
>> + return;
>> + }
>
> again, not sure I understand why this change is needed. Afaics we
> only need to use kfree(wq->cpu_wq) instead of free_percpu() if
> it is single-threaded.
>
I think this change is needed.
In the single thread case, we don't need
1) cpu_maps_update_begin(). --> require cpu_add_remove_lock
2) remove workqueue from the list. (we did not inserted it)
It is indeed that there is no bad result occurred when we do these
things for single thread. But I think the destroying should not
do things more than the creating.
Thanks, Lai
On 01/22, Lai Jiangshan wrote:
>
> Oleg Nesterov wrote:
> > On 01/21, Lai Jiangshan wrote:
> >> @@ -906,6 +907,13 @@ void destroy_workqueue(struct workqueue_struct *wq)
> >> const struct cpumask *cpu_map = wq_cpu_map(wq);
> >> int cpu;
> >>
> >> + if (is_wq_single_threaded(wq)) {
> >> + cleanup_workqueue_thread(wq->cpu_wq);
> >> + kfree(wq->cpu_wq);
> >> + kfree(wq);
> >> + return;
> >> + }
> >
> > again, not sure I understand why this change is needed. Afaics we
> > only need to use kfree(wq->cpu_wq) instead of free_percpu() if
> > it is single-threaded.
> >
>
> I think this change is needed.
> In the single thread case, we don't need
> 1) cpu_maps_update_begin(). --> require cpu_add_remove_lock
> 2) remove workqueue from the list. (we did not inserted it)
>
> It is indeed that there is no bad result occurred when we do these
> things for single thread. But I think the destroying should not
> do things more than the creating.
I disagree.
Firstly, this path is rare and not time critical, it is better
to save a couple of bytes from .text.
But mostly I dislike the fact that we add another special case
for the single-threaded wqs which is not strictly needed.
Following your logic we can also change flush_workqueue(), it
doesn't need for_each_cpu_mask_nr() when single-threaded.
That said, I agree this is a matter of taste, I won't persist.
Oleg.