__blk_run_queue() automatically either calls q->request_fn() directly
or schedules kblockd depending on whether the function is recursed.
blk-flush implementation needs to be able to explicitly choose
kblockd. Add @force_kblockd.
All the current users are converted to specify %false for the
parameter and this patch doesn't introduce any behavior change.
stable: This is prerequisite for fixing ide oops caused by the new
blk-flush implementation.
Signed-off-by: Tejun Heo <[email protected]>
Cc: Jan Beulich <[email protected]>
Cc: James Bottomley <[email protected]>
Cc: [email protected]
---
block/blk-core.c | 11 ++++++-----
block/blk-flush.c | 2 +-
block/cfq-iosched.c | 6 +++---
block/elevator.c | 4 ++--
drivers/scsi/scsi_lib.c | 2 +-
drivers/scsi/scsi_transport_fc.c | 2 +-
include/linux/blkdev.h | 2 +-
7 files changed, 15 insertions(+), 14 deletions(-)
Index: work/block/blk-core.c
===================================================================
--- work.orig/block/blk-core.c
+++ work/block/blk-core.c
@@ -352,7 +352,7 @@ void blk_start_queue(struct request_queu
WARN_ON(!irqs_disabled());
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
}
EXPORT_SYMBOL(blk_start_queue);
@@ -403,13 +403,14 @@ EXPORT_SYMBOL(blk_sync_queue);
/**
* __blk_run_queue - run a single device queue
* @q: The queue to run
+ * @force_kblockd: Don't run @q->request_fn directly. Use kblockd.
*
* Description:
* See @blk_run_queue. This variant must be called with the queue lock
* held and interrupts disabled.
*
*/
-void __blk_run_queue(struct request_queue *q)
+void __blk_run_queue(struct request_queue *q, bool force_kblockd)
{
blk_remove_plug(q);
@@ -423,7 +424,7 @@ void __blk_run_queue(struct request_queu
* Only recurse once to avoid overrunning the stack, let the unplug
* handling reinvoke the handler shortly if we already got there.
*/
- if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+ if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
@@ -446,7 +447,7 @@ void blk_run_queue(struct request_queue
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_run_queue);
@@ -1053,7 +1054,7 @@ void blk_insert_request(struct request_q
drive_stat_acct(rq, 1);
__elv_add_request(q, rq, where, 0);
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_insert_request);
Index: work/block/blk-flush.c
===================================================================
--- work.orig/block/blk-flush.c
+++ work/block/blk-flush.c
@@ -69,7 +69,7 @@ static void blk_flush_complete_seq_end_i
* queue. Kick the queue in those cases.
*/
if (was_empty && next_rq)
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
}
static void pre_flush_end_io(struct request *rq, int error)
Index: work/block/cfq-iosched.c
===================================================================
--- work.orig/block/cfq-iosched.c
+++ work/block/cfq-iosched.c
@@ -3355,7 +3355,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, s
cfqd->busy_queues > 1) {
cfq_del_timer(cfqd, cfqq);
cfq_clear_cfqq_wait_request(cfqq);
- __blk_run_queue(cfqd->queue);
+ __blk_run_queue(cfqd->queue, false);
} else {
cfq_blkiocg_update_idle_time_stats(
&cfqq->cfqg->blkg);
@@ -3370,7 +3370,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, s
* this new queue is RT and the current one is BE
*/
cfq_preempt_queue(cfqd, cfqq);
- __blk_run_queue(cfqd->queue);
+ __blk_run_queue(cfqd->queue, false);
}
}
@@ -3731,7 +3731,7 @@ static void cfq_kick_queue(struct work_s
struct request_queue *q = cfqd->queue;
spin_lock_irq(q->queue_lock);
- __blk_run_queue(cfqd->queue);
+ __blk_run_queue(cfqd->queue, false);
spin_unlock_irq(q->queue_lock);
}
Index: work/block/elevator.c
===================================================================
--- work.orig/block/elevator.c
+++ work/block/elevator.c
@@ -602,7 +602,7 @@ void elv_quiesce_start(struct request_qu
*/
elv_drain_elevator(q);
while (q->rq.elvpriv) {
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
spin_unlock_irq(q->queue_lock);
msleep(10);
spin_lock_irq(q->queue_lock);
@@ -651,7 +651,7 @@ void elv_insert(struct request_queue *q,
* with anything. There's no point in delaying queue
* processing.
*/
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
break;
case ELEVATOR_INSERT_SORT:
Index: work/drivers/scsi/scsi_lib.c
===================================================================
--- work.orig/drivers/scsi/scsi_lib.c
+++ work/drivers/scsi/scsi_lib.c
@@ -443,7 +443,7 @@ static void scsi_run_queue(struct reques
&sdev->request_queue->queue_flags);
if (flagset)
queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
- __blk_run_queue(sdev->request_queue);
+ __blk_run_queue(sdev->request_queue, false);
if (flagset)
queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
spin_unlock(sdev->request_queue->queue_lock);
Index: work/drivers/scsi/scsi_transport_fc.c
===================================================================
--- work.orig/drivers/scsi/scsi_transport_fc.c
+++ work/drivers/scsi/scsi_transport_fc.c
@@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rpor
!test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
if (flagset)
queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
- __blk_run_queue(rport->rqst_q);
+ __blk_run_queue(rport->rqst_q, false);
if (flagset)
queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
Index: work/include/linux/blkdev.h
===================================================================
--- work.orig/include/linux/blkdev.h
+++ work/include/linux/blkdev.h
@@ -699,7 +699,7 @@ extern void blk_start_queue(struct reque
extern void blk_stop_queue(struct request_queue *q);
extern void blk_sync_queue(struct request_queue *q);
extern void __blk_stop_queue(struct request_queue *q);
-extern void __blk_run_queue(struct request_queue *);
+extern void __blk_run_queue(struct request_queue *q, bool force_kblockd);
extern void blk_run_queue(struct request_queue *);
extern int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long,
blk-flush decomposes a flush into sequence of multiple requests. On
completion of a request, the next one is queued; however, block layer
must not implicitly call into q->request_fn() directly from completion
path. This makes the queue behave unexpectedly when seen from the
drivers and violates the assumption that q->request_fn() is called
with process context + queue_lock.
This patch makes blk-flush the following two changes to make sure
q->request_fn() is not called directly from request completion path.
- blk_flush_complete_seq_end_io() now asks __blk_run_queue() to always
use kblockd instead of calling directly into q->request_fn().
- queue_next_fseq() uses ELEVATOR_INSERT_REQUEUE instead of
ELEVATOR_INSERT_FRONT so that elv_insert() doesn't try to unplug the
request queue directly.
Reported by Jan in the following threads.
http://thread.gmane.org/gmane.linux.ide/48778
http://thread.gmane.org/gmane.linux.ide/48786
stable: applicable to v2.6.37.
Signed-off-by: Tejun Heo <[email protected]>
Reported-by: Jan Beulich <[email protected]>
Cc: "David S. Miller" <[email protected]>
Cc: [email protected]
---
Jens, this is applicable to v2.6.37 and 38. The new implementation
for 39 would need similar fix but I couldn't find where the tree is.
Which branch is it?
Thanks.
block/blk-flush.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
Index: work/block/blk-flush.c
===================================================================
--- work.orig/block/blk-flush.c
+++ work/block/blk-flush.c
@@ -66,10 +66,12 @@ static void blk_flush_complete_seq_end_i
/*
* Moving a request silently to empty queue_head may stall the
- * queue. Kick the queue in those cases.
+ * queue. Kick the queue in those cases. This function is called
+ * from request completion path and calling directly into
+ * request_fn may confuse the driver. Always use kblockd.
*/
if (was_empty && next_rq)
- __blk_run_queue(q, false);
+ __blk_run_queue(q, true);
}
static void pre_flush_end_io(struct request *rq, int error)
@@ -130,7 +132,7 @@ static struct request *queue_next_fseq(s
BUG();
}
- elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+ elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
return rq;
}
Hi Tejun,
On Thu, Feb 17, 2011 at 6:16 AM, Tejun Heo <[email protected]> wrote:
> Jens, this is applicable to v2.6.37 and 38. ?The new implementation
> for 39 would need similar fix but I couldn't find where the tree is.
> Which branch is it?
for-2.6.39/core
On Thu, Feb 17, 2011 at 10:01:51AM -0500, Mike Snitzer wrote:
> Hi Tejun,
>
> On Thu, Feb 17, 2011 at 6:16 AM, Tejun Heo <[email protected]> wrote:
>
> > Jens, this is applicable to v2.6.37 and 38. ?The new implementation
> > for 39 would need similar fix but I couldn't find where the tree is.
> > Which branch is it?
>
> for-2.6.39/core
Right, thanks. Jens, after you apply the two fixes for 2.6.38, I can
create a merge branch for for-2.6.39/core which you can pull. Would
that work for you?
Thanks.
--
tejun
>>> On 17.02.11 at 12:15, Tejun Heo <[email protected]> wrote:
> __blk_run_queue() automatically either calls q->request_fn() directly
> or schedules kblockd depending on whether the function is recursed.
> blk-flush implementation needs to be able to explicitly choose
> kblockd. Add @force_kblockd.
>
> All the current users are converted to specify %false for the
> parameter and this patch doesn't introduce any behavior change.
>
> stable: This is prerequisite for fixing ide oops caused by the new
> blk-flush implementation.
>
> Signed-off-by: Tejun Heo <[email protected]>
> Cc: Jan Beulich <[email protected]>
> Cc: James Bottomley <[email protected]>
> Cc: [email protected]
May I ask what the disposition of this and the second patch is?
Looking at 2.6.38-rc7 I still don't see either of them, while I
had hoped that they would both also make it into 2.6.37.2...
Thanks, Jan
> ---
> block/blk-core.c | 11 ++++++-----
> block/blk-flush.c | 2 +-
> block/cfq-iosched.c | 6 +++---
> block/elevator.c | 4 ++--
> drivers/scsi/scsi_lib.c | 2 +-
> drivers/scsi/scsi_transport_fc.c | 2 +-
> include/linux/blkdev.h | 2 +-
> 7 files changed, 15 insertions(+), 14 deletions(-)
>
> Index: work/block/blk-core.c
> ===================================================================
> --- work.orig/block/blk-core.c
> +++ work/block/blk-core.c
> @@ -352,7 +352,7 @@ void blk_start_queue(struct request_queu
> WARN_ON(!irqs_disabled());
>
> queue_flag_clear(QUEUE_FLAG_STOPPED, q);
> - __blk_run_queue(q);
> + __blk_run_queue(q, false);
> }
> EXPORT_SYMBOL(blk_start_queue);
>
> @@ -403,13 +403,14 @@ EXPORT_SYMBOL(blk_sync_queue);
> /**
> * __blk_run_queue - run a single device queue
> * @q: The queue to run
> + * @force_kblockd: Don't run @q->request_fn directly. Use kblockd.
> *
> * Description:
> * See @blk_run_queue. This variant must be called with the queue lock
> * held and interrupts disabled.
> *
> */
> -void __blk_run_queue(struct request_queue *q)
> +void __blk_run_queue(struct request_queue *q, bool force_kblockd)
> {
> blk_remove_plug(q);
>
> @@ -423,7 +424,7 @@ void __blk_run_queue(struct request_queu
> * Only recurse once to avoid overrunning the stack, let the unplug
> * handling reinvoke the handler shortly if we already got there.
> */
> - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
> + if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
> q->request_fn(q);
> queue_flag_clear(QUEUE_FLAG_REENTER, q);
> } else {
> @@ -446,7 +447,7 @@ void blk_run_queue(struct request_queue
> unsigned long flags;
>
> spin_lock_irqsave(q->queue_lock, flags);
> - __blk_run_queue(q);
> + __blk_run_queue(q, false);
> spin_unlock_irqrestore(q->queue_lock, flags);
> }
> EXPORT_SYMBOL(blk_run_queue);
> @@ -1053,7 +1054,7 @@ void blk_insert_request(struct request_q
>
> drive_stat_acct(rq, 1);
> __elv_add_request(q, rq, where, 0);
> - __blk_run_queue(q);
> + __blk_run_queue(q, false);
> spin_unlock_irqrestore(q->queue_lock, flags);
> }
> EXPORT_SYMBOL(blk_insert_request);
> Index: work/block/blk-flush.c
> ===================================================================
> --- work.orig/block/blk-flush.c
> +++ work/block/blk-flush.c
> @@ -69,7 +69,7 @@ static void blk_flush_complete_seq_end_i
> * queue. Kick the queue in those cases.
> */
> if (was_empty && next_rq)
> - __blk_run_queue(q);
> + __blk_run_queue(q, false);
> }
>
> static void pre_flush_end_io(struct request *rq, int error)
> Index: work/block/cfq-iosched.c
> ===================================================================
> --- work.orig/block/cfq-iosched.c
> +++ work/block/cfq-iosched.c
> @@ -3355,7 +3355,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, s
> cfqd->busy_queues > 1) {
> cfq_del_timer(cfqd, cfqq);
> cfq_clear_cfqq_wait_request(cfqq);
> - __blk_run_queue(cfqd->queue);
> + __blk_run_queue(cfqd->queue, false);
> } else {
> cfq_blkiocg_update_idle_time_stats(
> &cfqq->cfqg->blkg);
> @@ -3370,7 +3370,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, s
> * this new queue is RT and the current one is BE
> */
> cfq_preempt_queue(cfqd, cfqq);
> - __blk_run_queue(cfqd->queue);
> + __blk_run_queue(cfqd->queue, false);
> }
> }
>
> @@ -3731,7 +3731,7 @@ static void cfq_kick_queue(struct work_s
> struct request_queue *q = cfqd->queue;
>
> spin_lock_irq(q->queue_lock);
> - __blk_run_queue(cfqd->queue);
> + __blk_run_queue(cfqd->queue, false);
> spin_unlock_irq(q->queue_lock);
> }
>
> Index: work/block/elevator.c
> ===================================================================
> --- work.orig/block/elevator.c
> +++ work/block/elevator.c
> @@ -602,7 +602,7 @@ void elv_quiesce_start(struct request_qu
> */
> elv_drain_elevator(q);
> while (q->rq.elvpriv) {
> - __blk_run_queue(q);
> + __blk_run_queue(q, false);
> spin_unlock_irq(q->queue_lock);
> msleep(10);
> spin_lock_irq(q->queue_lock);
> @@ -651,7 +651,7 @@ void elv_insert(struct request_queue *q,
> * with anything. There's no point in delaying queue
> * processing.
> */
> - __blk_run_queue(q);
> + __blk_run_queue(q, false);
> break;
>
> case ELEVATOR_INSERT_SORT:
> Index: work/drivers/scsi/scsi_lib.c
> ===================================================================
> --- work.orig/drivers/scsi/scsi_lib.c
> +++ work/drivers/scsi/scsi_lib.c
> @@ -443,7 +443,7 @@ static void scsi_run_queue(struct reques
> &sdev->request_queue->queue_flags);
> if (flagset)
> queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
> - __blk_run_queue(sdev->request_queue);
> + __blk_run_queue(sdev->request_queue, false);
> if (flagset)
> queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
> spin_unlock(sdev->request_queue->queue_lock);
> Index: work/drivers/scsi/scsi_transport_fc.c
> ===================================================================
> --- work.orig/drivers/scsi/scsi_transport_fc.c
> +++ work/drivers/scsi/scsi_transport_fc.c
> @@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rpor
> !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
> if (flagset)
> queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
> - __blk_run_queue(rport->rqst_q);
> + __blk_run_queue(rport->rqst_q, false);
> if (flagset)
> queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
> spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
> Index: work/include/linux/blkdev.h
> ===================================================================
> --- work.orig/include/linux/blkdev.h
> +++ work/include/linux/blkdev.h
> @@ -699,7 +699,7 @@ extern void blk_start_queue(struct reque
> extern void blk_stop_queue(struct request_queue *q);
> extern void blk_sync_queue(struct request_queue *q);
> extern void __blk_stop_queue(struct request_queue *q);
> -extern void __blk_run_queue(struct request_queue *);
> +extern void __blk_run_queue(struct request_queue *q, bool force_kblockd);
> extern void blk_run_queue(struct request_queue *);
> extern int blk_rq_map_user(struct request_queue *, struct request *,
> struct rq_map_data *, void __user *, unsigned long,
On 2011-02-18 04:49, Tejun Heo wrote:
> On Thu, Feb 17, 2011 at 10:01:51AM -0500, Mike Snitzer wrote:
>> Hi Tejun,
>>
>> On Thu, Feb 17, 2011 at 6:16 AM, Tejun Heo <[email protected]> wrote:
>>
>>> Jens, this is applicable to v2.6.37 and 38. The new implementation
>>> for 39 would need similar fix but I couldn't find where the tree is.
>>> Which branch is it?
>>
>> for-2.6.39/core
>
> Right, thanks. Jens, after you apply the two fixes for 2.6.38, I can
> create a merge branch for for-2.6.39/core which you can pull. Would
> that work for you?
Thanks, that would be great. I'm applying them now.
--
Jens Axboe
Hello, Jens.
On Wed, Mar 02, 2011 at 08:46:46AM -0500, Jens Axboe wrote:
> > Right, thanks. Jens, after you apply the two fixes for 2.6.38, I can
> > create a merge branch for for-2.6.39/core which you can pull. Would
> > that work for you?
>
> Thanks, that would be great. I'm applying them now.
Okay, please pull from the following branch to receive the merge
between linux-2.6-block:for-linus and :for-2.6.39/core.
git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
HEAD is e83a46bbb1d4c03defd733a64b727632a40059ad but git.korg seems a
bit slow to sync, so if you don't see the commit there, please pull
from master.korg.
ssh://master.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
Thanks.
--
tejun
On 2011-03-04 19:25, Tejun Heo wrote:
> Hello, Jens.
>
> On Wed, Mar 02, 2011 at 08:46:46AM -0500, Jens Axboe wrote:
>>> Right, thanks. Jens, after you apply the two fixes for 2.6.38, I can
>>> create a merge branch for for-2.6.39/core which you can pull. Would
>>> that work for you?
>>
>> Thanks, that would be great. I'm applying them now.
>
> Okay, please pull from the following branch to receive the merge
> between linux-2.6-block:for-linus and :for-2.6.39/core.
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
>
> HEAD is e83a46bbb1d4c03defd733a64b727632a40059ad but git.korg seems a
> bit slow to sync, so if you don't see the commit there, please pull
> from master.korg.
>
> ssh://master.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
>
> Thanks.
Pulled, thanks Tejun.
--
Jens Axboe
Tejun Heo <[email protected]> writes:
> Hello, Jens.
>
> On Wed, Mar 02, 2011 at 08:46:46AM -0500, Jens Axboe wrote:
>> > Right, thanks. Jens, after you apply the two fixes for 2.6.38, I can
>> > create a merge branch for for-2.6.39/core which you can pull. Would
>> > that work for you?
>>
>> Thanks, that would be great. I'm applying them now.
>
> Okay, please pull from the following branch to receive the merge
> between linux-2.6-block:for-linus and :for-2.6.39/core.
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
>
> HEAD is e83a46bbb1d4c03defd733a64b727632a40059ad but git.korg seems a
> bit slow to sync, so if you don't see the commit there, please pull
> from master.korg.
>
> ssh://master.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
>
> Thanks.
I know I'm coming to the party late (and maybe wrong), but I've got some
questions here.
Tejun, you introduced a commit to the ide driver that made it block in
its request function. As far as I know, that's not allowed. For scsi,
at least, it has always allowed calling back into the request function
from the completion handler, and I think this is actully the common case
(not some corner case).
So, why doesn't the ide driver see calls back into its request function
from the completion handler? It's clear that it calls blk_end_request
from ide_end_rq, which can definitely call __blk_run_queue. In other
words, why is it that the flush requests are triggerring this problem
while normal I/O isn't?
I think the real issue may just be that the ide driver is blocking in
its request function. What have I missed?
Thanks!
Jeff
On 2011-03-07 20:33, Jeff Moyer wrote:
> Tejun Heo <[email protected]> writes:
>
>> Hello, Jens.
>>
>> On Wed, Mar 02, 2011 at 08:46:46AM -0500, Jens Axboe wrote:
>>>> Right, thanks. Jens, after you apply the two fixes for 2.6.38, I can
>>>> create a merge branch for for-2.6.39/core which you can pull. Would
>>>> that work for you?
>>>
>>> Thanks, that would be great. I'm applying them now.
>>
>> Okay, please pull from the following branch to receive the merge
>> between linux-2.6-block:for-linus and :for-2.6.39/core.
>>
>> git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
>>
>> HEAD is e83a46bbb1d4c03defd733a64b727632a40059ad but git.korg seems a
>> bit slow to sync, so if you don't see the commit there, please pull
>> from master.korg.
>>
>> ssh://master.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
>>
>> Thanks.
>
> I know I'm coming to the party late (and maybe wrong), but I've got some
> questions here.
>
> Tejun, you introduced a commit to the ide driver that made it block in
> its request function. As far as I know, that's not allowed. For scsi,
> at least, it has always allowed calling back into the request function
> from the completion handler, and I think this is actully the common case
> (not some corner case).
>
> So, why doesn't the ide driver see calls back into its request function
> from the completion handler? It's clear that it calls blk_end_request
> from ide_end_rq, which can definitely call __blk_run_queue. In other
> words, why is it that the flush requests are triggerring this problem
> while normal I/O isn't?
>
> I think the real issue may just be that the ide driver is blocking in
> its request function. What have I missed?
So the only case where the request_fn is called and you cannot block, is
if you call it from your completion function. Any other invocation
should be from process context. As long as you remember to drop the
queue lock and re-enable interrupts, it should work. It's not great
style and I would not recommend it for a performance environment, but it
should work.
--
Jens Axboe
Jens Axboe <[email protected]> writes:
> On 2011-03-07 20:33, Jeff Moyer wrote:
>> Tejun Heo <[email protected]> writes:
>>
>>> Hello, Jens.
>>>
>>> On Wed, Mar 02, 2011 at 08:46:46AM -0500, Jens Axboe wrote:
>>>>> Right, thanks. Jens, after you apply the two fixes for 2.6.38, I can
>>>>> create a merge branch for for-2.6.39/core which you can pull. Would
>>>>> that work for you?
>>>>
>>>> Thanks, that would be great. I'm applying them now.
>>>
>>> Okay, please pull from the following branch to receive the merge
>>> between linux-2.6-block:for-linus and :for-2.6.39/core.
>>>
>>> git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
>>>
>>> HEAD is e83a46bbb1d4c03defd733a64b727632a40059ad but git.korg seems a
>>> bit slow to sync, so if you don't see the commit there, please pull
>>> from master.korg.
>>>
>>> ssh://master.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
>>>
>>> Thanks.
>>
>> I know I'm coming to the party late (and maybe wrong), but I've got some
>> questions here.
>>
>> Tejun, you introduced a commit to the ide driver that made it block in
>> its request function. As far as I know, that's not allowed. For scsi,
>> at least, it has always allowed calling back into the request function
>> from the completion handler, and I think this is actully the common case
>> (not some corner case).
>>
>> So, why doesn't the ide driver see calls back into its request function
>> from the completion handler? It's clear that it calls blk_end_request
>> from ide_end_rq, which can definitely call __blk_run_queue. In other
>> words, why is it that the flush requests are triggerring this problem
>> while normal I/O isn't?
>>
>> I think the real issue may just be that the ide driver is blocking in
>> its request function. What have I missed?
>
> So the only case where the request_fn is called and you cannot block, is
> if you call it from your completion function. Any other invocation
> should be from process context. As long as you remember to drop the
> queue lock and re-enable interrupts, it should work. It's not great
> style and I would not recommend it for a performance environment, but it
> should work.
So are you agreeing with me or disagreeing? ;-) It sounds to me like
you're saying that the ide driver should be able to cope with being
called from softirq context.
Cheers,
Jeff
On 2011-03-07 20:39, Jeff Moyer wrote:
> Jens Axboe <[email protected]> writes:
>
>> On 2011-03-07 20:33, Jeff Moyer wrote:
>>> Tejun Heo <[email protected]> writes:
>>>
>>>> Hello, Jens.
>>>>
>>>> On Wed, Mar 02, 2011 at 08:46:46AM -0500, Jens Axboe wrote:
>>>>>> Right, thanks. Jens, after you apply the two fixes for 2.6.38, I can
>>>>>> create a merge branch for for-2.6.39/core which you can pull. Would
>>>>>> that work for you?
>>>>>
>>>>> Thanks, that would be great. I'm applying them now.
>>>>
>>>> Okay, please pull from the following branch to receive the merge
>>>> between linux-2.6-block:for-linus and :for-2.6.39/core.
>>>>
>>>> git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
>>>>
>>>> HEAD is e83a46bbb1d4c03defd733a64b727632a40059ad but git.korg seems a
>>>> bit slow to sync, so if you don't see the commit there, please pull
>>>> from master.korg.
>>>>
>>>> ssh://master.kernel.org/pub/scm/linux/kernel/git/tj/misc.git block-for-2.6.39-core
>>>>
>>>> Thanks.
>>>
>>> I know I'm coming to the party late (and maybe wrong), but I've got some
>>> questions here.
>>>
>>> Tejun, you introduced a commit to the ide driver that made it block in
>>> its request function. As far as I know, that's not allowed. For scsi,
>>> at least, it has always allowed calling back into the request function
>>> from the completion handler, and I think this is actully the common case
>>> (not some corner case).
>>>
>>> So, why doesn't the ide driver see calls back into its request function
>>> from the completion handler? It's clear that it calls blk_end_request
>>> from ide_end_rq, which can definitely call __blk_run_queue. In other
>>> words, why is it that the flush requests are triggerring this problem
>>> while normal I/O isn't?
>>>
>>> I think the real issue may just be that the ide driver is blocking in
>>> its request function. What have I missed?
>>
>> So the only case where the request_fn is called and you cannot block, is
>> if you call it from your completion function. Any other invocation
>> should be from process context. As long as you remember to drop the
>> queue lock and re-enable interrupts, it should work. It's not great
>> style and I would not recommend it for a performance environment, but it
>> should work.
>
> So are you agreeing with me or disagreeing? ;-) It sounds to me like
> you're saying that the ide driver should be able to cope with being
> called from softirq context.
I'm just stating how it should work :-)
But yes, it sounds like IDE is violating this rule and that's why it was
broken. Even with that, having explicit control of the queue running
does make sense.
--
Jens Axboe
Jens Axboe <[email protected]> writes:
> On 2011-03-07 20:39, Jeff Moyer wrote:
>>>> I think the real issue may just be that the ide driver is blocking in
>>>> its request function. What have I missed?
>>>
>>> So the only case where the request_fn is called and you cannot block, is
>>> if you call it from your completion function. Any other invocation
>>> should be from process context. As long as you remember to drop the
>>> queue lock and re-enable interrupts, it should work. It's not great
>>> style and I would not recommend it for a performance environment, but it
>>> should work.
>>
>> So are you agreeing with me or disagreeing? ;-) It sounds to me like
>> you're saying that the ide driver should be able to cope with being
>> called from softirq context.
>
> I'm just stating how it should work :-)
>
> But yes, it sounds like IDE is violating this rule and that's why it was
> broken. Even with that, having explicit control of the queue running
> does make sense.
Well, I wonder if it makes sense *in this case*. With all of the work
going into optimizing the flushing, is deferring work to kblockd really
the best idea? Tejun, do you know if it has any measurable impact?
Cheers,
Jeff
Hello,
On Mon, Mar 07, 2011 at 02:50:28PM -0500, Jeff Moyer wrote:
> > But yes, it sounds like IDE is violating this rule and that's why it was
> > broken. Even with that, having explicit control of the queue running
> > does make sense.
Yeah, IDE is the only one which actually depends on it. At the time I
thought more drivers would use the sleeping context but that didn't
happen (yet). That said, I think it's generally a good idea to
guarantee context on the issue path (the request_lock requirement
makes it quite ugly to use but that's a separate issue). It makes
things much easier - e.g. we can use mempool and friends for forward
progress guarantee instead of doing explicit retrying which also can
hide deadlocks quite effectively.
> Well, I wonder if it makes sense *in this case*. With all of the work
> going into optimizing the flushing, is deferring work to kblockd really
> the best idea? Tejun, do you know if it has any measurable impact?
I don't think it matters. First of all, the overhead itself isn't
that big to begin with. More importantly, the flush optimization is
not about squeezing out the last cpu cycles out of the existing path.
It's about consolidating similar operations and thus increasing
scalability when a storm of fsync's are issued in parallel likely with
a lot of other IOs. The kicking path won't be used at all on busy
queues (queue won't be empty).
So, I don't think there's anything to worry about here.
Thanks.
--
tejun