by Steven Whitehouse

[permalink] [raw]

Subject: Re: Strange block/scsi/workqueue issue

Hi,

On Tue, 2011-04-12 at 00:02 -0500, James Bottomley wrote:
> On Mon, 2011-04-11 at 23:49 -0500, James Bottomley wrote:
> > The entangled deadlock seems to have been introduced by commit
> > 3cca6dc1c81e2407928dc4c6105252146fd3924f prior to that, there was no
> > synchronous cancel in the destroy path.
> >
> > A fix might be to shunt more stuff off to workqueues, but that's
> > producing a more complex system which would be prone to entanglements
> > that would be even harder to spot.
> >
> > Perhaps a better solution is just not to use sync cancellations in
> > block? As long as the work in the queue holds a queue ref, they can be
> > done asynchronously.
>
> So this is a possible implementation, does this fix the problem?
> (compile tested only).
>
> James
>
> ---
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 90f22cc..f600f88 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -219,6 +219,7 @@ static void blk_delay_work(struct work_struct *work)
> spin_lock_irq(q->queue_lock);
> __blk_run_queue(q, false);
> spin_unlock_irq(q->queue_lock);
> + blk_put_queue(q);
> }
>
> /**
> @@ -233,7 +234,8 @@ static void blk_delay_work(struct work_struct *work)
> */
> void blk_delay_queue(struct request_queue *q, unsigned long msecs)
> {
> - schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));
> + if (!blk_get_queue(q))
> + schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));
> }
> EXPORT_SYMBOL(blk_delay_queue);
>
> @@ -271,7 +273,8 @@ EXPORT_SYMBOL(blk_start_queue);
> **/
> void blk_stop_queue(struct request_queue *q)
> {
> - __cancel_delayed_work(&q->delay_work);
> + if (__cancel_delayed_work(&q->delay_work))
> + blk_put_queue(q);
> queue_flag_set(QUEUE_FLAG_STOPPED, q);
> }
> EXPORT_SYMBOL(blk_stop_queue);
> @@ -297,7 +300,8 @@ EXPORT_SYMBOL(blk_stop_queue);
> void blk_sync_queue(struct request_queue *q)
> {
> del_timer_sync(&q->timeout);
> - cancel_delayed_work_sync(&q->delay_work);
> + if (__cancel_delayed_work(&q->delay_work))
> + blk_put_queue(q);
> queue_sync_plugs(q);
> }
> EXPORT_SYMBOL(blk_sync_queue);
>
>

Just done a couple of tests, with the following results:

#1. Including Tejun's patch:

scsi 0:0:32:0: Enclosure DP BACKPLANE 1.07 PQ: 0 ANSI: 5
scsi 0:2:0:0: Direct-Access DELL PERC 6/i 1.22 PQ: 0 ANSI: 5
scsi 0:2:1:0: Direct-Access DELL PERC 6/i 1.22 PQ: 0 ANSI: 5
------------[ cut here ]------------
WARNING: at lib/kref.c:34 kref_get+0x2d/0x30()
Hardware name: PowerEdge R710
Modules linked in:
Pid: 18, comm: kworker/4:0 Not tainted 2.6.39-rc2+ #189
Call Trace:
[<ffffffff8108fa9a>] warn_slowpath_common+0x7a/0xb0
[<ffffffff8108fae5>] warn_slowpath_null+0x15/0x20
[<ffffffff813c983d>] kref_get+0x2d/0x30
[<ffffffff813c823a>] kobject_get+0x1a/0x30
[<ffffffff81460864>] get_device+0x14/0x20
[<ffffffff81478bc7>] scsi_request_fn+0x37/0x4a0
[<ffffffff813aff2a>] __blk_run_queue+0x6a/0x110
[<ffffffff813b2481>] blk_delay_work+0x31/0x60
[<ffffffff810aa9c7>] process_one_work+0x197/0x520
[<ffffffff810aa961>] ? process_one_work+0x131/0x520
[<ffffffff813b2450>] ? blk_alloc_queue+0x10/0x10
[<ffffffff810acfec>] worker_thread+0x15c/0x330
[<ffffffff810ace90>] ? manage_workers.clone.20+0x240/0x240
[<ffffffff810ace90>] ? manage_workers.clone.20+0x240/0x240
[<ffffffff810b1f16>] kthread+0xa6/0xb0
[<ffffffff81687164>] kernel_thread_helper+0x4/0x10
[<ffffffff8107fe1f>] ? finish_task_switch+0x6f/0x110
[<ffffffff8167ecd6>] ? _raw_spin_unlock_irq+0x46/0x70
[<ffffffff8167f098>] ? retint_restore_args+0x13/0x13
[<ffffffff810b1e70>] ? __init_kthread_worker+0x70/0x70
[<ffffffff81687160>] ? gs_change+0x13/0x13
---[ end trace c35781f847a41f31 ]---

#2 On its own:

scsi 0:0:32:0: Enclosure DP BACKPLANE 1.07 PQ: 0 ANSI: 5
scsi 0:2:0:0: Direct-Access DELL PERC 6/i 1.22 PQ: 0 ANSI: 5
scsi 0:2:1:0: Direct-Access DELL PERC 6/i 1.22 PQ: 0 ANSI: 5
BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
IP: [<ffffffff810ab4d3>] __queue_work+0x403/0x460
PGD 0
Oops: 0000 [#1] PREEMPT SMP
last sysfs file:
CPU 0
Modules linked in:

Pid: 3, comm: ksoftirqd/0 Not tainted 2.6.39-rc2+ #190 Dell Inc. PowerEdge R710/0N047H
RIP: 0010:[<ffffffff810ab4d3>] [<ffffffff810ab4d3>] __queue_work+0x403/0x460
RSP: 0018:ffff8800c90abb20 EFLAGS: 00010046
RAX: 0000000000000000 RBX: ffff8800c56a8c58 RCX: ffff8800c56a8c60
RDX: ffff8800c56a8c60 RSI: 0000000000000000 RDI: ffff8800cb00e388
RBP: ffff8800c90abb70 R08: 0000000000000900 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000001 R12: ffff8800cb00e340
R13: 0000000000000000 R14: ffff8800cb1d6700 R15: 0000000000000000
FS: 0000000000000000(0000) GS:ffff8800cb000000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000008 CR3: 0000000001c33000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process ksoftirqd/0 (pid: 3, threadinfo ffff8800c90aa000, task ffff8800c90a80c0)
Stack:
ffff8800c90abb90 0000000000000086 0000000000000000 0000000000000086
ffff8800c90a80c0 ffff8800c92ebe00 ffff8800c56a8c58 ffff8800c88bac28
ffff8800c90abc30 ffff8800c565c080 ffff8800c90abb80 ffffffff810ab59d
Call Trace:
[<ffffffff810ab59d>] queue_work_on+0x1d/0x30
[<ffffffff810ac749>] queue_work+0x29/0x60
[<ffffffff810ac865>] queue_delayed_work+0x25/0x30
[<ffffffff813aff75>] __blk_run_queue+0xb5/0x110
[<ffffffff813b04ed>] blk_run_queue+0x2d/0x50
[<ffffffff8147715a>] scsi_run_queue+0xea/0x3f0
[<ffffffff814710cf>] ? __scsi_put_command+0x5f/0xa0
[<ffffffff814793cd>] scsi_next_command+0x3d/0x60
[<ffffffff81479602>] scsi_io_completion+0x1b2/0x630
[<ffffffff81470cd7>] scsi_finish_command+0xc7/0x130
[<ffffffff8147936f>] scsi_softirq_done+0x13f/0x160
[<ffffffff813b8252>] blk_done_softirq+0xa2/0xc0
[<ffffffff810970ac>] __do_softirq+0xdc/0x290
[<ffffffff81097395>] run_ksoftirqd+0x135/0x250
[<ffffffff81097260>] ? __do_softirq+0x290/0x290
[<ffffffff810b1f16>] kthread+0xa6/0xb0
[<ffffffff816870e4>] kernel_thread_helper+0x4/0x10
[<ffffffff8107fe1f>] ? finish_task_switch+0x6f/0x110
[<ffffffff8167ec56>] ? _raw_spin_unlock_irq+0x46/0x70
[<ffffffff8167f018>] ? retint_restore_args+0x13/0x13
[<ffffffff810b1e70>] ? __init_kthread_worker+0x70/0x70
[<ffffffff816870e0>] ? gs_change+0x13/0x13
Code: 8b 44 24 48 49 8d 7c 24 48 48 83 e8 08 31 f6 eb 27 0f 1f 80 00 00 00 00 48 8b 00 49 89 c0 41 80 e0 00 a8 04 48 89 f0 49 0f 45 c0
8b 40 08 f6 00 10 74 14 48 8d 41 f8 48 8b 48 08 48 8d 50 08
RIP [<ffffffff810ab4d3>] __queue_work+0x403/0x460
RSP <ffff8800c90abb20>
CR2: 0000000000000008
---[ end trace 52fce2e9afb9bf59 ]---
Kernel panic - not syncing: Fatal exception in interrupt

These are both on the same base kernel as yesterday (i.e. Linus tree
from this time yesterday morning) plus a few GFS2 patches, but they
only affect the GFS2 module which is not loaded.

I don't do anything special to trigger the problem - it happens of its
own accord during normal boot. The initscripts are those from f14.

Steve.

2011-04-12 08:46:37

Hi,

On Tue, 2011-04-12 at 08:42 -0500, James Bottomley wrote:
> On Tue, 2011-04-12 at 09:42 +0100, Steven Whitehouse wrote:
> > #2 On its own:
>
> Right, that's how it's supposed to be applied
>
> > scsi 0:0:32:0: Enclosure DP BACKPLANE 1.07 PQ: 0 ANSI: 5
> > scsi 0:2:0:0: Direct-Access DELL PERC 6/i 1.22 PQ: 0 ANSI: 5
> > scsi 0:2:1:0: Direct-Access DELL PERC 6/i 1.22 PQ: 0 ANSI: 5
> > BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
> > IP: [<ffffffff810ab4d3>] __queue_work+0x403/0x460
> > PGD 0
> > Oops: 0000 [#1] PREEMPT SMP
> > last sysfs file:
> > CPU 0
> > Modules linked in:
> >
> > Pid: 3, comm: ksoftirqd/0 Not tainted 2.6.39-rc2+ #190 Dell Inc. PowerEdge R710/0N047H
> > RIP: 0010:[<ffffffff810ab4d3>] [<ffffffff810ab4d3>] __queue_work+0x403/0x460
> > RSP: 0018:ffff8800c90abb20 EFLAGS: 00010046
> > RAX: 0000000000000000 RBX: ffff8800c56a8c58 RCX: ffff8800c56a8c60
> > RDX: ffff8800c56a8c60 RSI: 0000000000000000 RDI: ffff8800cb00e388
> > RBP: ffff8800c90abb70 R08: 0000000000000900 R09: 0000000000000000
> > R10: 0000000000000001 R11: 0000000000000001 R12: ffff8800cb00e340
> > R13: 0000000000000000 R14: ffff8800cb1d6700 R15: 0000000000000000
> > FS: 0000000000000000(0000) GS:ffff8800cb000000(0000) knlGS:0000000000000000
> > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> > CR2: 0000000000000008 CR3: 0000000001c33000 CR4: 00000000000006f0
> > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> > Process ksoftirqd/0 (pid: 3, threadinfo ffff8800c90aa000, task ffff8800c90a80c0)
> > Stack:
> > ffff8800c90abb90 0000000000000086 0000000000000000 0000000000000086
> > ffff8800c90a80c0 ffff8800c92ebe00 ffff8800c56a8c58 ffff8800c88bac28
> > ffff8800c90abc30 ffff8800c565c080 ffff8800c90abb80 ffffffff810ab59d
> > Call Trace:
> > [<ffffffff810ab59d>] queue_work_on+0x1d/0x30
> > [<ffffffff810ac749>] queue_work+0x29/0x60
> > [<ffffffff810ac865>] queue_delayed_work+0x25/0x30
> > [<ffffffff813aff75>] __blk_run_queue+0xb5/0x110
>
> OK, this is because the patch is incomplete ... it needs an additional
> reference which I missed in run queue ... can you apply the update patch
> below on top?
>
> Actually, there are probably more places I missed, sigh.
>
> > [<ffffffff813b04ed>] blk_run_queue+0x2d/0x50
> > [<ffffffff8147715a>] scsi_run_queue+0xea/0x3f0
> > [<ffffffff814710cf>] ? __scsi_put_command+0x5f/0xa0
> > [<ffffffff814793cd>] scsi_next_command+0x3d/0x60
> > [<ffffffff81479602>] scsi_io_completion+0x1b2/0x630
> > [<ffffffff81470cd7>] scsi_finish_command+0xc7/0x130
> > [<ffffffff8147936f>] scsi_softirq_done+0x13f/0x160
> > [<ffffffff813b8252>] blk_done_softirq+0xa2/0xc0
> > [<ffffffff810970ac>] __do_softirq+0xdc/0x290
> > [<ffffffff81097395>] run_ksoftirqd+0x135/0x250
> > [<ffffffff81097260>] ? __do_softirq+0x290/0x290
> > [<ffffffff810b1f16>] kthread+0xa6/0xb0
> > [<ffffffff816870e4>] kernel_thread_helper+0x4/0x10
> > [<ffffffff8107fe1f>] ? finish_task_switch+0x6f/0x110
> > [<ffffffff8167ec56>] ? _raw_spin_unlock_irq+0x46/0x70
> > [<ffffffff8167f018>] ? retint_restore_args+0x13/0x13
> > [<ffffffff810b1e70>] ? __init_kthread_worker+0x70/0x70
> > [<ffffffff816870e0>] ? gs_change+0x13/0x13
> > Code: 8b 44 24 48 49 8d 7c 24 48 48 83 e8 08 31 f6 eb 27 0f 1f 80 00 00 00 00 48 8b 00 49 89 c0 41 80 e0 00 a8 04 48 89 f0 49 0f 45 c0
> > 8b 40 08 f6 00 10 74 14 48 8d 41 f8 48 8b 48 08 48 8d 50 08
> > RIP [<ffffffff810ab4d3>] __queue_work+0x403/0x460
> > RSP <ffff8800c90abb20>
> > CR2: 0000000000000008
> > ---[ end trace 52fce2e9afb9bf59 ]---
> > Kernel panic - not syncing: Fatal exception in interrupt
> >
> > These are both on the same base kernel as yesterday (i.e. Linus tree
> > from this time yesterday morning) plus a few GFS2 patches, but they
> > only affect the GFS2 module which is not loaded.
> >
> > I don't do anything special to trigger the problem - it happens of its
> > own accord during normal boot. The initscripts are those from f14.
>
> That's useful ... at least it's not hard to reproduce ... it's just I
> don't see anything like this in my boot sequence.
>
> James
>
> ---
>
> Index: linux-2.6/block/blk-core.c
> ===================================================================
> --- linux-2.6.orig/block/blk-core.c
> +++ linux-2.6/block/blk-core.c
> @@ -328,7 +328,7 @@ void __blk_run_queue(struct request_queu
> if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
> q->request_fn(q);
> queue_flag_clear(QUEUE_FLAG_REENTER, q);
> - } else
> + } else if (!blk_get_queue(q))
> queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
> }
> EXPORT_SYMBOL(__blk_run_queue);
>
>

Messages attached - getting closer I think, but still not quite there.
It is very odd that so far (at least I've seen no other reports) that I
seem to be the only one who hits this. I wonder what is different about
my kernel/hardware...

Steve.

Attachments:

james3.txt (26.45 kB)

2011-04-12 15:14:27

by James Bottomley

[permalink] [raw]

Subject: Re: Strange block/scsi/workqueue issue

On Tue, 2011-04-12 at 15:06 +0100, Steven Whitehouse wrote:
> Messages attached - getting closer I think, but still not quite there.
> It is very odd that so far (at least I've seen no other reports) that I
> seem to be the only one who hits this. I wonder what is different about
> my kernel/hardware...

> csi: killing requests for dead queue
> scsi: killing requests for dead queue
> BUG: unable to handle kernel scsi: killing requests for dead queue
> NULL pointer dereference at 0000000000000028
> IP: [<ffffffff813b34ed>] blk_peek_request+0x1ad/0x220

Hmm, I don't see where this is ... it's a null pointer deref offset by
0x28 bytes ... but I can't see where ... can you resolve this to a line?

James

2011-04-12 15:15:22

by James Bottomley

[permalink] [raw]

Subject: Re: Strange block/scsi/workqueue issue

On Tue, 2011-04-12 at 14:15 +0900, Tejun Heo wrote:
> > A fix might be to shunt more stuff off to workqueues, but that's
> > producing a more complex system which would be prone to entanglements
> > that would be even harder to spot.
>
> I don't agree there. To me, the cause for entanglement seems to be
> request_fn calling all the way through blocking destruction because it
> detected that the final put was called with sleepable context. It's
> just weird and difficult to anticipate to directly call into sleepable
> destruction path from request_fn whether it had sleepable context or
> not. With the yet-to-be-debugged bug caused by the conversion aside,
> I think simply using workqueue is the better solution.

So your idea is that all final puts should go through a workqueue? Like
I said, that would work, but it's not just SCSI ... any call path that
destroys a queue has to be audited.

The problem is nothing to do with sleeping context ... it's that any
work called by the block workqueue can't destroy that queue. In a
refcounted model, that's a bit nasty.

> > Perhaps a better solution is just not to use sync cancellations in
> > block? As long as the work in the queue holds a queue ref, they can be
> > done asynchronously.
>
> Hmmm... maybe but at least I prefer doing explicit shutdown/draining
> on destruction even if the base data structure is refcounted. Things
> become much more predictable that way.

It is pretty much instantaneous. Unless we're executing, we cancel the
work. If the work is already running, we just let it complete instead
of waiting for it.

Synchronous waits are dangerous because they cause entanglement.

James

2011-04-12 16:01:44

Hi,

On Tue, 2011-04-12 at 11:27 -0500, James Bottomley wrote:
> On Tue, 2011-04-12 at 17:04 +0100, Steven Whitehouse wrote:
> > Hi,
> >
> > On Tue, 2011-04-12 at 10:14 -0500, James Bottomley wrote:
> > > On Tue, 2011-04-12 at 15:06 +0100, Steven Whitehouse wrote:
> > > > Messages attached - getting closer I think, but still not quite there.
> > > > It is very odd that so far (at least I've seen no other reports) that I
> > > > seem to be the only one who hits this. I wonder what is different about
> > > > my kernel/hardware...
> > >
> > >
> > > > csi: killing requests for dead queue
> > > > scsi: killing requests for dead queue
> > > > BUG: unable to handle kernel scsi: killing requests for dead queue
> > > > NULL pointer dereference at 0000000000000028
> > > > IP: [<ffffffff813b34ed>] blk_peek_request+0x1ad/0x220
> > >
> > > Hmm, I don't see where this is ... it's a null pointer deref offset by
> > > 0x28 bytes ... but I can't see where ... can you resolve this to a line?
> > >
> > > James
> > >
> > >
> >
> > So assuming I've calculated this correctly, it should be just about....
> >
> > /home/steve/linux-2.6/include/linux/rcupdate.h:677
> > 6de8: a8 08 test $0x8,%al
> > 6dea: 0f 84 65 ff ff ff je 6d55 <blk_peek_request+0xf5>
> > 6df0: e8 00 00 00 00 callq 6df5 <blk_peek_request+0x195>
> > 6df1: R_X86_64_PC32 preempt_schedule-0x4
> > 6df5: e9 5b ff ff ff jmpq 6d55 <blk_peek_request+0xf5>
> > 6dfa: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
> > __elv_next_request():
> > /home/steve/linux-2.6/block/blk.h:65
> > if (!list_empty(&q->queue_head)) {
> > rq = list_entry_rq(q->queue_head.next);
> > return rq;
> > }
> >
> > if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
> > 6e00: 49 8b 44 24 18 mov 0x18(%r12),%rax
> > 6e05: 31 f6 xor %esi,%esi
> > 6e07: 48 8b 00 mov (%rax),%rax
> > 6e0a: 4c 89 e7 mov %r12,%rdi <---- here!
> > 6e0d: ff 50 28 callq *0x28(%rax)
> > 6e10: 85 c0 test %eax,%eax
> > 6e12: 0f 85 f4 fe ff ff jne 6d0c <blk_peek_request+0xac>
> > 6e18: 45 31 ed xor %r13d,%r13d
> > 6e1b: e9 70 ff ff ff jmpq 6d90 <blk_peek_request+0x130>
> > blk_peek_request():
> > /home/steve/linux-2.6/block/blk-core.c:1912
> > * so that we don't add it again
> > */
> >
> > The offset I got was 6e0d, but I guess thats the start of the following instruction
> > by the time the fault is logged. Also I've attached my current .config just in case
> > that might be a help is reproducing it,
>
> Good work! it's simpler if you compile with -g ... then the gdb line
> command will just tell you all of this. However, it seems clear that
> elevator->ops is NULL (elevator_dispatch_fn is 0x28 offset into that in
> a 64 bit system).
>
> That's because blk_cleanup_queue called elevator_exit, so we need an out
> in the peek case.
>
> So unwrapping the onion, this is the next level
>
> James
>
> ---
>
> Index: linux-2.6/block/blk.h
> ===================================================================
> --- linux-2.6.orig/block/blk.h
> +++ linux-2.6/block/blk.h
> @@ -62,7 +62,7 @@ static inline struct request *__elv_next
> return rq;
> }
>
> - if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
> + if (!q->elevator->ops || !q->elevator->ops->elevator_dispatch_fn(q, 0))
> return NULL;
> }
> }
>
>

Still not quite there, but looking more hopeful now,

Steve.

Attachments:

james4.txt (38.89 kB)

2011-04-12 17:41:38

by James Bottomley

[permalink] [raw]

Subject: Re: Strange block/scsi/workqueue issue

On Tue, 2011-04-12 at 17:51 +0100, Steven Whitehouse wrote:
> Still not quite there, but looking more hopeful now,

Not sure I share your optimism; but this one

> scsi 0:2:1:0: Direct-Access DELL PERC 6/i 1.22 PQ: 0 ANSI: 5
> scsi: killing requests for dead queue
> ------------[ cut here ]------------
> WARNING: at lib/kref.c:34 kref_get+0x2d/0x30()
> Hardware name: PowerEdge R710
> Modules linked in:
> Pid: 386, comm: kworker/6:1 Not tainted 2.6.39-rc2+ #193
> Call Trace:
> [<ffffffff8108fa9a>] warn_slowpath_common+0x7a/0xb0
> [<ffffffff8108fae5>] warn_slowpath_null+0x15/0x20
> [<ffffffff813c984d>] kref_get+0x2d/0x30
> [<ffffffff813c824a>] kobject_get+0x1a/0x30
> [<ffffffff81460874>] get_device+0x14/0x20
> [<ffffffff81478bd7>] scsi_request_fn+0x37/0x4a0

Is definitely a race between the last put of the SCSI device and the
block delayed work. The signal that mediates that race is supposed to
be the q->queuedata being null, but that doesn't get set until some time
into the release function (by which time the ref is already zero).

Closing the window completely involves setting this to NULL before we do
the final put when we know everything else is gone. So, here's the next
incremental.

James

---

Index: linux-2.6/drivers/scsi/scsi_sysfs.c
===================================================================
--- linux-2.6.orig/drivers/scsi/scsi_sysfs.c
+++ linux-2.6/drivers/scsi/scsi_sysfs.c
@@ -323,7 +323,6 @@ static void scsi_device_dev_release_user
}

if (sdev->request_queue) {
- sdev->request_queue->queuedata = NULL;
/* user context needed to free queue */
scsi_free_queue(sdev->request_queue);
/* temporary expedient, try to catch use of queue lock
@@ -937,6 +936,7 @@ void __scsi_remove_device(struct scsi_de
if (sdev->host->hostt->slave_destroy)
sdev->host->hostt->slave_destroy(sdev);
transport_destroy_device(dev);
+ sdev->request_queue->queuedata = NULL;
put_device(dev);
}

2011-04-12 18:30:43

Hi

> While you still have the problematic system, can you try this patch?
It seems that a scsi_sysfs.c half of this patch there is in 2.6.39, but
not blk.h part. Now we have frequent panics caused NULL in
q->elevator->ops in block/blk.h.

Panic is triggered by connecting USB Storage device without medium
(card reader, android phone). We saw it on different computers with
different kernel builds. Most frequent panics have seen on kernel build
with forsed preemption under relatively havy load (for example, boot).

Regards, Anton

> It avoids changing anything in block (other than to add a missing
> state guard for the elv_next_request). If it works, we can defer the
> sync vs async discussion and use it for a -stable fix.
>
> Thanks,
>
> James
>
> ---
>
> diff --git a/block/blk.h b/
> index c8db371..11d0d25 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -62,7 +62,8 @@ static inline struct request
> *__elv_next_request(struct request_queue *q) return rq;
> }
>
> - if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
> + if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) ||
> + !q->elevator->ops->elevator_dispatch_fn(q, 0))
> return NULL;
> }
> }
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index e44ff64..5aa4246 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -322,14 +322,9 @@ static void
> scsi_device_dev_release_usercontext(struct work_struct *work)
> kfree(evt); }
>
> - if (sdev->request_queue) {
> - sdev->request_queue->queuedata = NULL;
> - /* user context needed to free queue */
> - scsi_free_queue(sdev->request_queue);
> - /* temporary expedient, try to catch use of queue
> lock
> - * after free of sdev */
> - sdev->request_queue = NULL;
> - }
> + /* temporary expedient, try to catch use of queue lock after
> + * free of sdev */
> + sdev->request_queue = NULL;
>
> scsi_target_reap(scsi_target(sdev));
>
> @@ -937,6 +932,11 @@ void __scsi_remove_device(struct scsi_device
> *sdev) if (sdev->host->hostt->slave_destroy)
> sdev->host->hostt->slave_destroy(sdev);
> transport_destroy_device(dev);
> + /* Setting this to NULL causes the request function to reject
> + * any I/O requests */
> + sdev->request_queue->queuedata = NULL;
> + /* Freeing the queue signals to block that we're done */
> + scsi_free_queue(sdev->request_queue);
> put_device(dev);
> }
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe
> linux-kernel" in the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/