Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context

From: Bartlomiej Zolnierkiewicz <[email protected]>
Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context

* Tell the block layer that we are not done handling requests by using
blk_plug_device() in ide_do_request() (request handling function)
and ide_timer_expiry() (timeout handler) if the queue is not empty.

* Remove optimization which directly calls ide_do_request() for the next
queued command from the ide_intr() (IRQ handler) and ide_timer_expiry().

* Remove no longer needed IRQ masking from ide_do_request() - in case of
IDE ports needing serialization disable_irq_nosync()/enable_irq() was
used for the (possibly shared) IRQ of the other IDE port.

* Put the misplaced comment in the right place in ide_do_request().

* Drop no longer needed 'int masked_irq' argument from ide_do_request().

* Merge ide_do_request() into do_ide_request().

* Remove no longer needed IDE_NO_IRQ define.

While at it:

* Don't use HWGROUP() macro in do_ide_request().

* Use __func__ in ide_intr().

This patch reduces IRQ hadling latency for IDE and improves the system-wide
handling of shared IRQs (which should result in more timeout resistant and
stable IDE systems). It also makes it possible to do some further changes
later (i.e. replace some busy-waiting delays with sleeping equivalents).

Signed-off-by: Bartlomiej Zolnierkiewicz <[email protected]>
---
on top of per-hwgroup locks patch and with a special dedication to people
complaining about improving IDE ;)

drivers/ide/ide-io.c | 59 ++++++++++++++++++++++-----------------------------
include/linux/ide.h | 7 ------
2 files changed, 26 insertions(+), 40 deletions(-)

Index: b/drivers/ide/ide-io.c
===================================================================
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -939,8 +939,10 @@ repeat:
* the driver. This makes the driver much more friendlier to shared IRQs
* than previous designs, while remaining 100% (?) SMP safe and capable.
*/
-static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
+void do_ide_request(struct request_queue *q)
{
+ ide_drive_t *orig_drive = q->queuedata;
+ ide_hwgroup_t *hwgroup = orig_drive->hwif->hwgroup;
ide_drive_t *drive;
ide_hwif_t *hwif;
struct request *rq;
@@ -999,8 +1001,11 @@ static void ide_do_request (ide_hwgroup_
}

/* no more work for this hwgroup (for now) */
- return;
+ goto plug_device;
}
+
+ if (drive != orig_drive)
+ goto plug_device;
again:
hwif = HWIF(drive);
if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) {
@@ -1055,41 +1060,27 @@ static void ide_do_request (ide_hwgroup_
goto again;
/* We clear busy, there should be no pending ATA command at this point. */
hwgroup->busy = 0;
- break;
+ goto plug_device;
}

hwgroup->rq = rq;

- /*
- * Some systems have trouble with IDE IRQs arriving while
- * the driver is still setting things up. So, here we disable
- * the IRQ used by this interface while the request is being started.
- * This may look bad at first, but pretty much the same thing
- * happens anyway when any interrupt comes in, IDE or otherwise
- * -- the kernel masks the IRQ while it is being handled.
- */
- if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
- disable_irq_nosync(hwif->irq);
spin_unlock(&hwgroup->lock);
+ /* allow other IRQs while we start this request */
local_irq_enable_in_hardirq();
- /* allow other IRQs while we start this request */
startstop = start_request(drive, rq);
spin_lock_irq(&hwgroup->lock);
- if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
- enable_irq(hwif->irq);
- if (startstop == ide_stopped)
+
+ if (startstop == ide_stopped) {
hwgroup->busy = 0;
+ goto plug_device;
+ }
}
-}
+ return;

-/*
- * Passes the stuff to ide_do_request
- */
-void do_ide_request(struct request_queue *q)
-{
- ide_drive_t *drive = q->queuedata;
-
- ide_do_request(HWGROUP(drive), IDE_NO_IRQ);
+plug_device:
+ if (!elv_queue_empty(orig_drive->queue))
+ blk_plug_device(orig_drive->queue);
}

/*
@@ -1241,11 +1232,13 @@ void ide_timer_expiry (unsigned long dat
drive->service_time = jiffies - drive->service_start;
spin_lock_irq(&hwgroup->lock);
enable_irq(hwif->irq);
- if (startstop == ide_stopped)
+ if (startstop == ide_stopped) {
hwgroup->busy = 0;
+ if (!elv_queue_empty(drive->queue))
+ blk_plug_device(drive->queue);
+ }
}
}
- ide_do_request(hwgroup, IDE_NO_IRQ);
spin_unlock_irqrestore(&hwgroup->lock, flags);
}

@@ -1438,11 +1431,11 @@ irqreturn_t ide_intr (int irq, void *dev
if (startstop == ide_stopped) {
if (hwgroup->handler == NULL) { /* paranoia */
hwgroup->busy = 0;
- ide_do_request(hwgroup, hwif->irq);
- } else {
- printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler "
- "on exit\n", drive->name);
- }
+ if (!elv_queue_empty(drive->queue))
+ blk_plug_device(drive->queue);
+ } else
+ printk(KERN_ERR "%s: %s: huh? expected NULL handler "
+ "on exit\n", __func__, drive->name);
}
out_handled:
irq_ret = IRQ_HANDLED;
Index: b/include/linux/ide.h
===================================================================
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -32,13 +32,6 @@
# define SUPPORT_VLB_SYNC 1
#endif

-/*
- * Used to indicate "no IRQ", should be a value that cannot be an IRQ
- * number.
- */
-
-#define IDE_NO_IRQ (-1)
-
typedef unsigned char byte; /* used everywhere */

/*


2008-10-12 18:03:21

by Elias Oltmanns

[permalink] [raw]
Subject: Re: [PATCH] ide: don't execute the next queued command from the hard-IRQ context

Bartlomiej Zolnierkiewicz <[email protected]> wrote:
> From: Bartlomiej Zolnierkiewicz <[email protected]>
> Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context
>
> * Tell the block layer that we are not done handling requests by using
> blk_plug_device() in ide_do_request() (request handling function)
> and ide_timer_expiry() (timeout handler) if the queue is not empty.
>
> * Remove optimization which directly calls ide_do_request() for the next
> queued command from the ide_intr() (IRQ handler) and ide_timer_expiry().
>
> * Remove no longer needed IRQ masking from ide_do_request() - in case of
> IDE ports needing serialization disable_irq_nosync()/enable_irq() was
> used for the (possibly shared) IRQ of the other IDE port.
>
> * Put the misplaced comment in the right place in ide_do_request().
>
> * Drop no longer needed 'int masked_irq' argument from ide_do_request().
>
> * Merge ide_do_request() into do_ide_request().
>
> * Remove no longer needed IDE_NO_IRQ define.
>
> While at it:
>
> * Don't use HWGROUP() macro in do_ide_request().
>
> * Use __func__ in ide_intr().
>
> This patch reduces IRQ hadling latency for IDE and improves the system-wide
> handling of shared IRQs (which should result in more timeout resistant and
> stable IDE systems). It also makes it possible to do some further changes
> later (i.e. replace some busy-waiting delays with sleeping equivalents).
>
> Signed-off-by: Bartlomiej Zolnierkiewicz <[email protected]>
> ---
> on top of per-hwgroup locks patch and with a special dedication to people
> complaining about improving IDE ;)

Some comments / questions. Admittedly, I don't always know enough about
the things I'm talking about here, but I'm hoping to learn something
that way ;-).

[...]
> Index: b/drivers/ide/ide-io.c
> ===================================================================
> --- a/drivers/ide/ide-io.c
> +++ b/drivers/ide/ide-io.c
[...]
> @@ -999,8 +1001,11 @@ static void ide_do_request (ide_hwgroup_
> }
>
> /* no more work for this hwgroup (for now) */
> - return;
> + goto plug_device;
> }
> +
> + if (drive != orig_drive)
> + goto plug_device;
> again:
> hwif = HWIF(drive);

Didn't you want to get rid of HWIF() too?

> if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) {
> @@ -1055,41 +1060,27 @@ static void ide_do_request (ide_hwgroup_
> goto again;
> /* We clear busy, there should be no pending ATA command at this point. */
> hwgroup->busy = 0;
> - break;
> + goto plug_device;
> }
>
> hwgroup->rq = rq;
>
> - /*
> - * Some systems have trouble with IDE IRQs arriving while
> - * the driver is still setting things up. So, here we disable
> - * the IRQ used by this interface while the request is being started.
> - * This may look bad at first, but pretty much the same thing
> - * happens anyway when any interrupt comes in, IDE or otherwise
> - * -- the kernel masks the IRQ while it is being handled.
> - */
> - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
> - disable_irq_nosync(hwif->irq);
> spin_unlock(&hwgroup->lock);
> + /* allow other IRQs while we start this request */
> local_irq_enable_in_hardirq();
> - /* allow other IRQs while we start this request */

That's the part I don't understand completely. Wouldn't it be alright to
do just spin_unlock_irq() here and be done with it? SCSI does exactly
that and as far as I can see, IDE is in a similar situation now that the
->request_fn() is not called from ide_intr() and ide_timer_expiry()
anymore, i.e. the ->request_fn() will always be executed in process
context.

> startstop = start_request(drive, rq);
> spin_lock_irq(&hwgroup->lock);
> - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
> - enable_irq(hwif->irq);
> - if (startstop == ide_stopped)
> +
> + if (startstop == ide_stopped) {
> hwgroup->busy = 0;
> + goto plug_device;

This goto statement is wrong. Simply set ->busy to zero and be done with
it. This way, the loop will start again and either elv_next_request()
returns NULL, in which case the queue need not be plugged, or the next
request will be processed immediately, which is exactly what we want.

[...]
> @@ -1438,11 +1431,11 @@ irqreturn_t ide_intr (int irq, void *dev
> if (startstop == ide_stopped) {
> if (hwgroup->handler == NULL) { /* paranoia */
> hwgroup->busy = 0;
> - ide_do_request(hwgroup, hwif->irq);
> - } else {
> - printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler "
> - "on exit\n", drive->name);
> - }
> + if (!elv_queue_empty(drive->queue))
> + blk_plug_device(drive->queue);

This is perhaps not exactly what you really want. It basically means
that there will be a delay (q->unplug_delay) after each command which
may well hurt I/O performance. Instead, I'd suggest something like the
following:

if (!elv_queue_empty(drive->queue))
blk_schedule_queue_run(drive->queue);

and a function

void blk_schedule_queue_run(struct request_queue *q)
{
blk_plug_device(q);
kblockd_schedule_work(&q->unplug_work);
}

in blk-core.c. This can also be used as a helper function in blk-core.c
itself.

Regards,

Elias

Subject: Re: [PATCH] ide: don't execute the next queued command from the hard-IRQ context

On Sunday 12 October 2008, Elias Oltmanns wrote:
> Bartlomiej Zolnierkiewicz <[email protected]> wrote:
> > From: Bartlomiej Zolnierkiewicz <[email protected]>
> > Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context
> >
> > * Tell the block layer that we are not done handling requests by using
> > blk_plug_device() in ide_do_request() (request handling function)
> > and ide_timer_expiry() (timeout handler) if the queue is not empty.
> >
> > * Remove optimization which directly calls ide_do_request() for the next
> > queued command from the ide_intr() (IRQ handler) and ide_timer_expiry().
> >
> > * Remove no longer needed IRQ masking from ide_do_request() - in case of
> > IDE ports needing serialization disable_irq_nosync()/enable_irq() was
> > used for the (possibly shared) IRQ of the other IDE port.
> >
> > * Put the misplaced comment in the right place in ide_do_request().
> >
> > * Drop no longer needed 'int masked_irq' argument from ide_do_request().
> >
> > * Merge ide_do_request() into do_ide_request().
> >
> > * Remove no longer needed IDE_NO_IRQ define.
> >
> > While at it:
> >
> > * Don't use HWGROUP() macro in do_ide_request().
> >
> > * Use __func__ in ide_intr().
> >
> > This patch reduces IRQ hadling latency for IDE and improves the system-wide
> > handling of shared IRQs (which should result in more timeout resistant and
> > stable IDE systems). It also makes it possible to do some further changes
> > later (i.e. replace some busy-waiting delays with sleeping equivalents).
> >
> > Signed-off-by: Bartlomiej Zolnierkiewicz <[email protected]>
> > ---
> > on top of per-hwgroup locks patch and with a special dedication to people
> > complaining about improving IDE ;)
>
> Some comments / questions. Admittedly, I don't always know enough about
> the things I'm talking about here, but I'm hoping to learn something
> that way ;-).
>
> [...]
> > Index: b/drivers/ide/ide-io.c
> > ===================================================================
> > --- a/drivers/ide/ide-io.c
> > +++ b/drivers/ide/ide-io.c
> [...]
> > @@ -999,8 +1001,11 @@ static void ide_do_request (ide_hwgroup_
> > }
> >
> > /* no more work for this hwgroup (for now) */
> > - return;
> > + goto plug_device;
> > }
> > +
> > + if (drive != orig_drive)
> > + goto plug_device;
> > again:
> > hwif = HWIF(drive);
>
> Didn't you want to get rid of HWIF() too?

Fixed.

> > if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) {
> > @@ -1055,41 +1060,27 @@ static void ide_do_request (ide_hwgroup_
> > goto again;
> > /* We clear busy, there should be no pending ATA command at this point. */
> > hwgroup->busy = 0;
> > - break;
> > + goto plug_device;
> > }
> >
> > hwgroup->rq = rq;
> >
> > - /*
> > - * Some systems have trouble with IDE IRQs arriving while
> > - * the driver is still setting things up. So, here we disable
> > - * the IRQ used by this interface while the request is being started.
> > - * This may look bad at first, but pretty much the same thing
> > - * happens anyway when any interrupt comes in, IDE or otherwise
> > - * -- the kernel masks the IRQ while it is being handled.
> > - */
> > - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
> > - disable_irq_nosync(hwif->irq);
> > spin_unlock(&hwgroup->lock);
> > + /* allow other IRQs while we start this request */
> > local_irq_enable_in_hardirq();
> > - /* allow other IRQs while we start this request */
>
> That's the part I don't understand completely. Wouldn't it be alright to
> do just spin_unlock_irq() here and be done with it? SCSI does exactly
> that and as far as I can see, IDE is in a similar situation now that the
> ->request_fn() is not called from ide_intr() and ide_timer_expiry()
> anymore, i.e. the ->request_fn() will always be executed in process
> context.

Fixed.

> > startstop = start_request(drive, rq);
> > spin_lock_irq(&hwgroup->lock);
> > - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
> > - enable_irq(hwif->irq);
> > - if (startstop == ide_stopped)
> > +
> > + if (startstop == ide_stopped) {
> > hwgroup->busy = 0;
> > + goto plug_device;
>
> This goto statement is wrong. Simply set ->busy to zero and be done with
> it. This way, the loop will start again and either elv_next_request()
> returns NULL, in which case the queue need not be plugged, or the next
> request will be processed immediately, which is exactly what we want.

The problem is that the next loop can choose the different drive than
the current one so we can end up with the situation where we will "lose"
the blk_plug_device() call.

I fixed it by just inlining "plug_device" code for now.

> [...]
> > @@ -1438,11 +1431,11 @@ irqreturn_t ide_intr (int irq, void *dev
> > if (startstop == ide_stopped) {
> > if (hwgroup->handler == NULL) { /* paranoia */
> > hwgroup->busy = 0;
> > - ide_do_request(hwgroup, hwif->irq);
> > - } else {
> > - printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler "
> > - "on exit\n", drive->name);
> > - }
> > + if (!elv_queue_empty(drive->queue))
> > + blk_plug_device(drive->queue);
>
> This is perhaps not exactly what you really want. It basically means
> that there will be a delay (q->unplug_delay) after each command which
> may well hurt I/O performance. Instead, I'd suggest something like the
> following:
>
> if (!elv_queue_empty(drive->queue))
> blk_schedule_queue_run(drive->queue);
>
> and a function
>
> void blk_schedule_queue_run(struct request_queue *q)
> {
> blk_plug_device(q);
> kblockd_schedule_work(&q->unplug_work);
> }
>
> in blk-core.c. This can also be used as a helper function in blk-core.c
> itself.

Care to make a patch adding such helper to blk-core.c?

I'll update this patch to use it then, in the meantime v1->v2 interdiff:

...
v2:
Changes per review from Elias Oltmanns:
- fix wrong goto statement in 'if (startstop == ide_stopped)' block
- use spin_unlock_irq()
- don't use obsolete HWIF() macro
...

diff -u b/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
--- b/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1006,8 +1006,9 @@

if (drive != orig_drive)
goto plug_device;
- again:
- hwif = HWIF(drive);
+again:
+ hwif = drive->hwif;
+
if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) {
/*
* set nIEN for previous hwif, drives in the
@@ -1065,15 +1066,14 @@

hwgroup->rq = rq;

- spin_unlock(&hwgroup->lock);
- /* allow other IRQs while we start this request */
- local_irq_enable_in_hardirq();
+ spin_unlock_irq(&hwgroup->lock);
startstop = start_request(drive, rq);
spin_lock_irq(&hwgroup->lock);

if (startstop == ide_stopped) {
hwgroup->busy = 0;
- goto plug_device;
+ if (!elv_queue_empty(orig_drive->queue))
+ blk_plug_device(orig_drive->queue);
}
}
return;

2008-10-22 15:01:44

by Elias Oltmanns

[permalink] [raw]
Subject: Re: [PATCH] ide: don't execute the next queued command from the hard-IRQ context

Bartlomiej Zolnierkiewicz <[email protected]> wrote:
> On Sunday 12 October 2008, Elias Oltmanns wrote:
>> Bartlomiej Zolnierkiewicz <[email protected]> wrote:
>
>> > From: Bartlomiej Zolnierkiewicz <[email protected]>
>> > Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context
>> >
>> > * Tell the block layer that we are not done handling requests by using
>> > blk_plug_device() in ide_do_request() (request handling function)
>> > and ide_timer_expiry() (timeout handler) if the queue is not empty.
>> >
>> > * Remove optimization which directly calls ide_do_request() for the next
>> > queued command from the ide_intr() (IRQ handler) and ide_timer_expiry().
>> >
>> > * Remove no longer needed IRQ masking from ide_do_request() - in case of
>> > IDE ports needing serialization disable_irq_nosync()/enable_irq() was
>> > used for the (possibly shared) IRQ of the other IDE port.
>> >
>> > * Put the misplaced comment in the right place in ide_do_request().
>> >
>> > * Drop no longer needed 'int masked_irq' argument from ide_do_request().
>> >
>> > * Merge ide_do_request() into do_ide_request().
>> >
>> > * Remove no longer needed IDE_NO_IRQ define.
>> >
>> > While at it:
>> >
>> > * Don't use HWGROUP() macro in do_ide_request().
>> >
>> > * Use __func__ in ide_intr().
>> >
>> > This patch reduces IRQ hadling latency for IDE and improves the system-wide
>> > handling of shared IRQs (which should result in more timeout resistant and
>> > stable IDE systems). It also makes it possible to do some further changes
>> > later (i.e. replace some busy-waiting delays with sleeping equivalents).
>> >
>> > Signed-off-by: Bartlomiej Zolnierkiewicz <[email protected]>
>> > ---
[...]
>> > Index: b/drivers/ide/ide-io.c
>> > ===================================================================
>> > --- a/drivers/ide/ide-io.c
>> > +++ b/drivers/ide/ide-io.c
[...]
>> > startstop = start_request(drive, rq);
>> > spin_lock_irq(&hwgroup->lock);
>> > - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
>> > - enable_irq(hwif->irq);
>> > - if (startstop == ide_stopped)
>> > +
>> > + if (startstop == ide_stopped) {
>> > hwgroup->busy = 0;
>> > + goto plug_device;
>>
>> This goto statement is wrong. Simply set ->busy to zero and be done with
>> it. This way, the loop will start again and either elv_next_request()
>> returns NULL, in which case the queue need not be plugged, or the next
>> request will be processed immediately, which is exactly what we want.
>
> The problem is that the next loop can choose the different drive than
> the current one so we can end up with the situation where we will "lose"
> the blk_plug_device() call.
>
> I fixed it by just inlining "plug_device" code for now.

Right, I had missed that. Still, I'm not really convinced yet that this
is the right way to handle things. In fact, I believe that the role of
choose_drive() has changed since it is now called directly from the
->request_fn() and it should probably be rewritten and renamed along the
way. However, this needs further discussion and the issue below has some
bearing on this too.

>
>> [...]
>> > @@ -1438,11 +1431,11 @@ irqreturn_t ide_intr (int irq, void *dev
>> > if (startstop == ide_stopped) {
>> > if (hwgroup->handler == NULL) { /* paranoia */
>> > hwgroup->busy = 0;
>> > - ide_do_request(hwgroup, hwif->irq);
>> > - } else {
>> > - printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler "
>> > - "on exit\n", drive->name);
>> > - }
>> > + if (!elv_queue_empty(drive->queue))
>> > + blk_plug_device(drive->queue);
>>
>> This is perhaps not exactly what you really want. It basically means
>> that there will be a delay (q->unplug_delay) after each command which
>> may well hurt I/O performance. Instead, I'd suggest something like the
>> following:
>>
>> if (!elv_queue_empty(drive->queue))
>> blk_schedule_queue_run(drive->queue);
>>
>> and a function
>>
>> void blk_schedule_queue_run(struct request_queue *q)
>> {
>> blk_plug_device(q);
>> kblockd_schedule_work(&q->unplug_work);
>> }
>>
>> in blk-core.c. This can also be used as a helper function in blk-core.c
>> itself.
>
> Care to make a patch adding such helper to blk-core.c?

Thinking about this a bit more, I'd like to raise this issue with Jens
and discuss it in some more generality.

Regards,

Elias

Subject: Re: [PATCH] ide: don't execute the next queued command from the hard-IRQ context

On Wednesday 22 October 2008, Elias Oltmanns wrote:
> Bartlomiej Zolnierkiewicz <[email protected]> wrote:
> > On Sunday 12 October 2008, Elias Oltmanns wrote:
> >> Bartlomiej Zolnierkiewicz <[email protected]> wrote:
> >
> >> > From: Bartlomiej Zolnierkiewicz <[email protected]>
> >> > Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context
> >> >
> >> > * Tell the block layer that we are not done handling requests by using
> >> > blk_plug_device() in ide_do_request() (request handling function)
> >> > and ide_timer_expiry() (timeout handler) if the queue is not empty.
> >> >
> >> > * Remove optimization which directly calls ide_do_request() for the next
> >> > queued command from the ide_intr() (IRQ handler) and ide_timer_expiry().
> >> >
> >> > * Remove no longer needed IRQ masking from ide_do_request() - in case of
> >> > IDE ports needing serialization disable_irq_nosync()/enable_irq() was
> >> > used for the (possibly shared) IRQ of the other IDE port.
> >> >
> >> > * Put the misplaced comment in the right place in ide_do_request().
> >> >
> >> > * Drop no longer needed 'int masked_irq' argument from ide_do_request().
> >> >
> >> > * Merge ide_do_request() into do_ide_request().
> >> >
> >> > * Remove no longer needed IDE_NO_IRQ define.
> >> >
> >> > While at it:
> >> >
> >> > * Don't use HWGROUP() macro in do_ide_request().
> >> >
> >> > * Use __func__ in ide_intr().
> >> >
> >> > This patch reduces IRQ hadling latency for IDE and improves the system-wide
> >> > handling of shared IRQs (which should result in more timeout resistant and
> >> > stable IDE systems). It also makes it possible to do some further changes
> >> > later (i.e. replace some busy-waiting delays with sleeping equivalents).
> >> >
> >> > Signed-off-by: Bartlomiej Zolnierkiewicz <[email protected]>
> >> > ---
> [...]
> >> > Index: b/drivers/ide/ide-io.c
> >> > ===================================================================
> >> > --- a/drivers/ide/ide-io.c
> >> > +++ b/drivers/ide/ide-io.c
> [...]
> >> > startstop = start_request(drive, rq);
> >> > spin_lock_irq(&hwgroup->lock);
> >> > - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
> >> > - enable_irq(hwif->irq);
> >> > - if (startstop == ide_stopped)
> >> > +
> >> > + if (startstop == ide_stopped) {
> >> > hwgroup->busy = 0;
> >> > + goto plug_device;
> >>
> >> This goto statement is wrong. Simply set ->busy to zero and be done with
> >> it. This way, the loop will start again and either elv_next_request()
> >> returns NULL, in which case the queue need not be plugged, or the next
> >> request will be processed immediately, which is exactly what we want.
> >
> > The problem is that the next loop can choose the different drive than
> > the current one so we can end up with the situation where we will "lose"
> > the blk_plug_device() call.
> >
> > I fixed it by just inlining "plug_device" code for now.
>
> Right, I had missed that. Still, I'm not really convinced yet that this
> is the right way to handle things. In fact, I believe that the role of
> choose_drive() has changed since it is now called directly from the
> ->request_fn() and it should probably be rewritten and renamed along the
> way. However, this needs further discussion and the issue below has some
> bearing on this too.

Well, in my patch to use per-device request queue lock I was just going
to remove choose_drive() since it should be handled at the block layer
(probably using per-queue io priorites if needed).

---
current draft patch just to visualize the idea

drivers/ide/ide-io.c | 172 +++++++++++++++---------------------------------
drivers/ide/ide-probe.c | 3
include/linux/ide.h | 2
3 files changed, 55 insertions(+), 122 deletions(-)

Index: b/drivers/ide/ide-io.c
===================================================================
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -828,85 +828,10 @@ void ide_stall_queue (ide_drive_t *drive
drive->sleep = timeout + jiffies;
drive->dev_flags |= IDE_DFLAG_SLEEPING;
}
-
EXPORT_SYMBOL(ide_stall_queue);

-#define WAKEUP(drive) ((drive)->service_start + 2 * (drive)->service_time)
-
-/**
- * choose_drive - select a drive to service
- * @hwgroup: hardware group to select on
- *
- * choose_drive() selects the next drive which will be serviced.
- * This is necessary because the IDE layer can't issue commands
- * to both drives on the same cable, unlike SCSI.
- */
-
-static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup)
-{
- ide_drive_t *drive, *best;
-
-repeat:
- best = NULL;
- drive = hwgroup->drive;
-
- /*
- * drive is doing pre-flush, ordered write, post-flush sequence. even
- * though that is 3 requests, it must be seen as a single transaction.
- * we must not preempt this drive until that is complete
- */
- if (blk_queue_flushing(drive->queue)) {
- /*
- * small race where queue could get replugged during
- * the 3-request flush cycle, just yank the plug since
- * we want it to finish asap
- */
- blk_remove_plug(drive->queue);
- return drive;
- }
-
- do {
- u8 dev_s = !!(drive->dev_flags & IDE_DFLAG_SLEEPING);
- u8 best_s = (best && !!(best->dev_flags & IDE_DFLAG_SLEEPING));
-
- if ((dev_s == 0 || time_after_eq(jiffies, drive->sleep)) &&
- !elv_queue_empty(drive->queue)) {
- if (best == NULL ||
- (dev_s && (best_s == 0 || time_before(drive->sleep, best->sleep))) ||
- (best_s == 0 && time_before(WAKEUP(drive), WAKEUP(best)))) {
- if (!blk_queue_plugged(drive->queue))
- best = drive;
- }
- }
- } while ((drive = drive->next) != hwgroup->drive);
-
- if (best && (best->dev_flags & IDE_DFLAG_NICE1) &&
- (best->dev_flags & IDE_DFLAG_SLEEPING) == 0 &&
- best != hwgroup->drive && best->service_time > WAIT_MIN_SLEEP) {
- long t = (signed long)(WAKEUP(best) - jiffies);
- if (t >= WAIT_MIN_SLEEP) {
- /*
- * We *may* have some time to spare, but first let's see if
- * someone can potentially benefit from our nice mood today..
- */
- drive = best->next;
- do {
- if ((drive->dev_flags & IDE_DFLAG_SLEEPING) == 0
- && time_before(jiffies - best->service_time, WAKEUP(drive))
- && time_before(WAKEUP(drive), jiffies + t))
- {
- ide_stall_queue(best, min_t(long, t, 10 * WAIT_MIN_SLEEP));
- goto repeat;
- }
- } while ((drive = drive->next) != best);
- }
- }
- return best;
-}
-
/*
* Issue a new request to a drive from hwgroup
- * Caller must have already done spin_lock_irqsave(&hwgroup->lock, ..);
*
* A hwgroup is a serialized group of IDE interfaces. Usually there is
* exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
@@ -941,13 +866,15 @@ repeat:
*/
void do_ide_request(struct request_queue *q)
{
- ide_drive_t *orig_drive = q->queuedata;
- ide_hwgroup_t *hwgroup = orig_drive->hwif->hwgroup;
- ide_drive_t *drive;
+ ide_drive_t *drive = q->queuedata;
+ ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
ide_hwif_t *hwif;
struct request *rq;
ide_startstop_t startstop;

+ spin_unlock_irq(q->queue_lock);
+ spin_lock_irq(&hwgroup->lock);
+
/* for atari only: POSSIBLY BROKEN HERE(?) */
ide_get_lock(ide_intr, hwgroup);

@@ -956,21 +883,13 @@ void do_ide_request(struct request_queue

while (!hwgroup->busy) {
hwgroup->busy = 1;
- drive = choose_drive(hwgroup);
- if (drive == NULL) {
- int sleeping = 0;
- unsigned long sleep = 0; /* shut up, gcc */
+
+ if (1) {
hwgroup->rq = NULL;
- drive = hwgroup->drive;
- do {
- if ((drive->dev_flags & IDE_DFLAG_SLEEPING) &&
- (sleeping == 0 ||
- time_before(drive->sleep, sleep))) {
- sleeping = 1;
- sleep = drive->sleep;
- }
- } while ((drive = drive->next) != hwgroup->drive);
- if (sleeping) {
+
+ if (drive->dev_flags & IDE_DFLAG_SLEEPING) {
+ unsigned long sleep = drive->sleep;
+
/*
* Take a short snooze, and then wake up this hwgroup again.
* This gives other hwgroups on the same a chance to
@@ -989,23 +908,12 @@ void do_ide_request(struct request_queue
mod_timer(&hwgroup->timer, sleep);
/* we purposely leave hwgroup->busy==1
* while sleeping */
- } else {
- /* Ugly, but how can we sleep for the lock
- * otherwise? perhaps from tq_disk?
- */

- /* for atari only */
- ide_release_lock();
- hwgroup->busy = 0;
+ /* no more work for this hwgroup (for now) */
+ goto plug_device;
}
-
- /* no more work for this hwgroup (for now) */
- goto plug_device;
}

- if (drive != orig_drive)
- goto plug_device;
-
hwif = drive->hwif;

if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) {
@@ -1019,13 +927,17 @@ void do_ide_request(struct request_queue
hwgroup->hwif = hwif;
hwgroup->drive = drive;
drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
- drive->service_start = jiffies;

+ spin_unlock_irq(&hwgroup->lock);
+ spin_lock_irq(q->queue_lock);
/*
* we know that the queue isn't empty, but this can happen
* if the q->prep_rq_fn() decides to kill a request
*/
rq = elv_next_request(drive->queue);
+ spin_unlock_irq(q->queue_lock);
+ spin_lock_irq(&hwgroup->lock);
+
if (!rq) {
hwgroup->busy = 0;
break;
@@ -1060,15 +972,21 @@ void do_ide_request(struct request_queue

if (startstop == ide_stopped) {
hwgroup->busy = 0;
- if (!elv_queue_empty(orig_drive->queue))
- blk_plug_device(orig_drive->queue);
+ /* give other devices a chance */
+ goto plug_device;
}
}
+
+ spin_unlock_irq(&hwgroup->lock);
+ spin_lock_irq(q->queue_lock);
return;

plug_device:
- if (!elv_queue_empty(orig_drive->queue))
- blk_plug_device(orig_drive->queue);
+ spin_unlock_irq(&hwgroup->lock);
+ spin_lock_irq(q->queue_lock);
+
+ if (!elv_queue_empty(q))
+ blk_plug_device(q);
}

/*
@@ -1129,6 +1047,17 @@ out:
return ret;
}

+static void ide_plug_device(ide_drive_t *drive)
+{
+ struct request_queue *q = drive->queue;
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ if (!elv_queue_empty(q))
+ blk_plug_device(q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
/**
* ide_timer_expiry - handle lack of an IDE interrupt
* @data: timer callback magic (hwgroup)
@@ -1146,10 +1075,12 @@ out:
void ide_timer_expiry (unsigned long data)
{
ide_hwgroup_t *hwgroup = (ide_hwgroup_t *) data;
+ ide_drive_t *uninitialized_var(drive);
ide_handler_t *handler;
ide_expiry_t *expiry;
unsigned long flags;
unsigned long wait = -1;
+ int plug_device = 0;

spin_lock_irqsave(&hwgroup->lock, flags);

@@ -1166,7 +1097,7 @@ void ide_timer_expiry (unsigned long dat
hwgroup->busy = 0;
}
} else {
- ide_drive_t *drive = hwgroup->drive;
+ drive = hwgroup->drive;
if (!drive) {
printk(KERN_ERR "ide_timer_expiry: hwgroup->drive was NULL\n");
hwgroup->handler = NULL;
@@ -1217,17 +1148,19 @@ void ide_timer_expiry (unsigned long dat
ide_error(drive, "irq timeout",
hwif->tp_ops->read_status(hwif));
}
- drive->service_time = jiffies - drive->service_start;
+
spin_lock_irq(&hwgroup->lock);
enable_irq(hwif->irq);
if (startstop == ide_stopped) {
hwgroup->busy = 0;
- if (!elv_queue_empty(drive->queue))
- blk_plug_device(drive->queue);
+ plug_device = 1;
}
}
}
spin_unlock_irqrestore(&hwgroup->lock, flags);
+
+ if (plug_device)
+ ide_plug_device(drive);
}

/**
@@ -1321,10 +1254,11 @@ irqreturn_t ide_intr (int irq, void *dev
unsigned long flags;
ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
ide_hwif_t *hwif = hwgroup->hwif;
- ide_drive_t *drive;
+ ide_drive_t *uninitialized_var(drive);
ide_handler_t *handler;
ide_startstop_t startstop;
irqreturn_t irq_ret = IRQ_NONE;
+ int plug_device = 0;

spin_lock_irqsave(&hwgroup->lock, flags);

@@ -1415,12 +1349,10 @@ irqreturn_t ide_intr (int irq, void *dev
* same irq as is currently being serviced here, and Linux
* won't allow another of the same (on any CPU) until we return.
*/
- drive->service_time = jiffies - drive->service_start;
if (startstop == ide_stopped) {
if (hwgroup->handler == NULL) { /* paranoia */
hwgroup->busy = 0;
- if (!elv_queue_empty(drive->queue))
- blk_plug_device(drive->queue);
+ plug_device = 1;
} else
printk(KERN_ERR "%s: %s: huh? expected NULL handler "
"on exit\n", __func__, drive->name);
@@ -1429,6 +1361,10 @@ out_handled:
irq_ret = IRQ_HANDLED;
out:
spin_unlock_irqrestore(&hwgroup->lock, flags);
+
+ if (plug_device)
+ ide_plug_device(drive);
+
return irq_ret;
}

Index: b/drivers/ide/ide-probe.c
===================================================================
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -905,8 +905,7 @@ static int ide_init_queue(ide_drive_t *d
* do not.
*/

- q = blk_init_queue_node(do_ide_request, &hwif->hwgroup->lock,
- hwif_to_node(hwif));
+ q = blk_init_queue_node(do_ide_request, NULL, hwif_to_node(hwif));
if (!q)
return 1;

Index: b/include/linux/ide.h
===================================================================
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -605,8 +605,6 @@ struct ide_drive_s {
unsigned long dev_flags;

unsigned long sleep; /* sleep until this time */
- unsigned long service_start; /* time we started last request */
- unsigned long service_time; /* service time of last request */
unsigned long timeout; /* max time to wait for irq */

special_t special; /* special action flags */