It is not good to disable interrupts while traversing all tasks in the
system. As I see it, sys_ioprio_get() doesn't need to cli() at all,
sys_ioprio_set() does it for cfq_ioc_set_ioprio() which can do it itself.
Signed-off-by: Oleg Nesterov <[email protected]>
--- 2.6.18-rc4/block/cfq-iosched.c~5_setpr 2006-08-09 22:00:44.000000000 +0400
+++ 2.6.18-rc4/block/cfq-iosched.c 2006-08-20 21:23:25.000000000 +0400
@@ -1421,14 +1421,14 @@ static inline void changed_ioprio(struct
}
/*
- * callback from sys_ioprio_set, irqs are disabled
+ * callback from sys_ioprio_set.
*/
static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
{
struct cfq_io_context *cic;
struct rb_node *n;
- spin_lock(&cfq_exit_lock);
+ spin_lock_irq(&cfq_exit_lock);
n = rb_first(&ioc->cic_root);
while (n != NULL) {
@@ -1438,7 +1438,7 @@ static int cfq_ioc_set_ioprio(struct io_
n = rb_next(n);
}
- spin_unlock(&cfq_exit_lock);
+ spin_unlock_irq(&cfq_exit_lock);
return 0;
}
--- 2.6.18-rc4/fs/ioprio.c~5_setpr 2006-08-20 20:16:39.000000000 +0400
+++ 2.6.18-rc4/fs/ioprio.c 2006-08-20 21:21:26.000000000 +0400
@@ -81,7 +81,7 @@ asmlinkage long sys_ioprio_set(int which
}
ret = -ESRCH;
- read_lock_irq(&tasklist_lock);
+ read_lock(&tasklist_lock);
switch (which) {
case IOPRIO_WHO_PROCESS:
if (!who)
@@ -124,7 +124,7 @@ free_uid:
ret = -EINVAL;
}
- read_unlock_irq(&tasklist_lock);
+ read_unlock(&tasklist_lock);
return ret;
}
@@ -170,7 +170,7 @@ asmlinkage long sys_ioprio_get(int which
int ret = -ESRCH;
int tmpio;
- read_lock_irq(&tasklist_lock);
+ read_lock(&tasklist_lock);
switch (which) {
case IOPRIO_WHO_PROCESS:
if (!who)
@@ -221,7 +221,7 @@ asmlinkage long sys_ioprio_get(int which
ret = -EINVAL;
}
- read_unlock_irq(&tasklist_lock);
+ read_unlock(&tasklist_lock);
return ret;
}
Question: why do we need to disable irqs in exit_io_context() ?
Why do we need ->alloc_lock to clear io_context->task ?
In other words, could you explain why the patch below is not correct.
Thanks,
Oleg.
--- 2.6.18-rc4/block/ll_rw_blk.c~6_exit 2006-08-20 19:30:10.000000000 +0400
+++ 2.6.18-rc4/block/ll_rw_blk.c 2006-08-20 22:34:46.000000000 +0400
@@ -3580,25 +3580,22 @@ EXPORT_SYMBOL(put_io_context);
/* Called by the exitting task */
void exit_io_context(void)
{
- unsigned long flags;
struct io_context *ioc;
struct cfq_io_context *cic;
- local_irq_save(flags);
task_lock(current);
ioc = current->io_context;
current->io_context = NULL;
- ioc->task = NULL;
task_unlock(current);
- local_irq_restore(flags);
+ ioc->task = NULL;
if (ioc->aic && ioc->aic->exit)
ioc->aic->exit(ioc->aic);
if (ioc->cic_root.rb_node != NULL) {
cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
cic->exit(ioc);
}
-
+
put_io_context(ioc);
}
On Mon, 21 Aug 2006 00:50:34 +0400
Oleg Nesterov <[email protected]> wrote:
> Question: why do we need to disable irqs in exit_io_context() ?
iirc it was to prevent IRQ-context code from getting a hold on
current->io_context and then playing around with it while it's getting
freed.
In practice, a preempt_disable() there would probably suffice (ie: if this
CPU is running an ISR, it won't be running exit_io_context as well). But
local_irq_disable() is clearer, albeit more expensive.
> Why do we need ->alloc_lock to clear io_context->task ?
To prevent races against elv_unregister(), I guess.
> In other words, could you explain why the patch below is not correct.
>
> Thanks,
>
> Oleg.
>
> --- 2.6.18-rc4/block/ll_rw_blk.c~6_exit 2006-08-20 19:30:10.000000000 +0400
> +++ 2.6.18-rc4/block/ll_rw_blk.c 2006-08-20 22:34:46.000000000 +0400
> @@ -3580,25 +3580,22 @@ EXPORT_SYMBOL(put_io_context);
> /* Called by the exitting task */
> void exit_io_context(void)
> {
> - unsigned long flags;
> struct io_context *ioc;
> struct cfq_io_context *cic;
>
> - local_irq_save(flags);
> task_lock(current);
> ioc = current->io_context;
> current->io_context = NULL;
> - ioc->task = NULL;
> task_unlock(current);
> - local_irq_restore(flags);
>
> + ioc->task = NULL;
> if (ioc->aic && ioc->aic->exit)
> ioc->aic->exit(ioc->aic);
> if (ioc->cic_root.rb_node != NULL) {
> cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
> cic->exit(ioc);
> }
> -
> +
> put_io_context(ioc);
> }
>
An exiting task or process which didn't do I/O yet have no io context,
elv_unregister() should check it is not NULL.
Signed-off-by: Oleg Nesterov <[email protected]>
--- 2.6.18-rc4/block/elevator.c~8_crash 2006-07-16 01:53:08.000000000 +0400
+++ 2.6.18-rc4/block/elevator.c 2006-08-22 21:13:06.000000000 +0400
@@ -765,7 +765,8 @@ void elv_unregister(struct elevator_type
read_lock(&tasklist_lock);
do_each_thread(g, p) {
task_lock(p);
- e->ops.trim(p->io_context);
+ if (p->io_context)
+ e->ops.trim(p->io_context);
task_unlock(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
On Tue, Aug 22 2006, Oleg Nesterov wrote:
> An exiting task or process which didn't do I/O yet have no io context,
> elv_unregister() should check it is not NULL.
>
> Signed-off-by: Oleg Nesterov <[email protected]>
>
> --- 2.6.18-rc4/block/elevator.c~8_crash 2006-07-16 01:53:08.000000000 +0400
> +++ 2.6.18-rc4/block/elevator.c 2006-08-22 21:13:06.000000000 +0400
> @@ -765,7 +765,8 @@ void elv_unregister(struct elevator_type
> read_lock(&tasklist_lock);
> do_each_thread(g, p) {
> task_lock(p);
> - e->ops.trim(p->io_context);
> + if (p->io_context)
> + e->ops.trim(p->io_context);
> task_unlock(p);
> } while_each_thread(g, p);
> read_unlock(&tasklist_lock);
Good catch, applied. Thanks! I wonder why this hasn't been seen on
switching io schedulers, which makes me a little suspicious. Did you see
it trigger?
--
Jens Axboe
On 08/21, Andrew Morton wrote:
>
> On Mon, 21 Aug 2006 00:50:34 +0400
> Oleg Nesterov <[email protected]> wrote:
>
> > Question: why do we need to disable irqs in exit_io_context() ?
>
> iirc it was to prevent IRQ-context code from getting a hold on
> current->io_context and then playing around with it while it's getting
> freed.
>
> In practice, a preempt_disable() there would probably suffice (ie: if this
> CPU is running an ISR, it won't be running exit_io_context as well). But
> local_irq_disable() is clearer, albeit more expensive.
Looks like my understanding of block I/O is even less than nothing :(
irq_disable() can't prevent from IRQ-context code playing with our io_context
on other CPUs. But this doesn't matter, we are only changing ioc->task.
What does matter, we are clearing the pointer to it: task_struct->io_context,
and IRQ should not look at it, no?
Or... Do you mean it is possible to submit I/O from IRQ on behalf of current ?????
In that case current_io_context() will re-instantiate ->io_context after irq_enable().
What is exit_io_context() for then? It is only called from do_exit() when we know
the task won't start IO.
(please don't beat a newbie)
> > Why do we need ->alloc_lock to clear io_context->task ?
>
> To prevent races against elv_unregister(), I guess.
elv_unregister() takes task_lock(), should see ->io_context == NULL.
Oleg.