2008-10-22 07:40:43

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 0/2] libata tag patches

Hi,

Two tag related patches for libata.

--
Jens Axboe


2008-10-22 07:40:58

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 1/2] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple()

We very rarely (if ever) complete more than one command in the
sactive mask at the time, even for extremely high IO rates. So
looping over the entire range of possible tags is pointless,
instead use __ffs() to just find the completed tags directly.

Signed-off-by: Jens Axboe <[email protected]>
---
drivers/ata/libata-core.c | 15 +++++++++------
1 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 1ee9499..c3c53e7 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4799,9 +4799,9 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
*/
int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
{
+ unsigned int i = 0;
int nr_done = 0;
u32 done_mask;
- int i;

done_mask = ap->qc_active ^ qc_active;

@@ -4811,16 +4811,19 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
return -EINVAL;
}

- for (i = 0; i < ATA_MAX_QUEUE; i++) {
+ while (done_mask) {
struct ata_queued_cmd *qc;
+ unsigned int next = __ffs(done_mask);

- if (!(done_mask & (1 << i)))
- continue;
-
- if ((qc = ata_qc_from_tag(ap, i))) {
+ qc = ata_qc_from_tag(ap, i + next);
+ if (qc) {
ata_qc_complete(qc);
nr_done++;
}
+ if (++next >= ATA_MAX_QUEUE)
+ break;
+ i += next;
+ done_mask >>= next;
}

return nr_done;
--
1.6.0.2.588.g3102

2008-10-22 07:41:22

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 2/2] libata: switch to using block layer tagging support

libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
a free tag to use. Instead of fixing that up, convert libata to
using block layer tagging - gets rid of code in libata, and is also
much faster.

Signed-off-by: Jens Axboe <[email protected]>
---
drivers/ata/libata-core.c | 66 ++++----------------------------------------
drivers/ata/libata-scsi.c | 10 +++++-
drivers/ata/libata.h | 19 +++++++++++-
include/linux/libata.h | 1 -
4 files changed, 31 insertions(+), 65 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c3c53e7..3d5da01 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1713,8 +1713,6 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
else
tag = 0;

- if (test_and_set_bit(tag, &ap->qc_allocated))
- BUG();
qc = __ata_qc_from_tag(ap, tag);

qc->tag = tag;
@@ -4553,37 +4551,6 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
}

/**
- * ata_qc_new - Request an available ATA command, for queueing
- * @ap: Port associated with device @dev
- * @dev: Device from whom we request an available command structure
- *
- * LOCKING:
- * None.
- */
-
-static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
-{
- struct ata_queued_cmd *qc = NULL;
- unsigned int i;
-
- /* no command while frozen */
- if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
- return NULL;
-
- /* the last tag is reserved for internal command. */
- for (i = 0; i < ATA_MAX_QUEUE - 1; i++)
- if (!test_and_set_bit(i, &ap->qc_allocated)) {
- qc = __ata_qc_from_tag(ap, i);
- break;
- }
-
- if (qc)
- qc->tag = i;
-
- return qc;
-}
-
-/**
* ata_qc_new_init - Request an available ATA command, and initialize it
* @dev: Device from whom we request an available command structure
*
@@ -4591,16 +4558,20 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
* None.
*/

-struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev)
+struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
{
struct ata_port *ap = dev->link->ap;
struct ata_queued_cmd *qc;

- qc = ata_qc_new(ap);
+ if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+ return NULL;
+
+ qc = __ata_qc_from_tag(ap, tag);
if (qc) {
qc->scsicmd = NULL;
qc->ap = ap;
qc->dev = dev;
+ qc->tag = tag;

ata_qc_reinit(qc);
}
@@ -4608,31 +4579,6 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev)
return qc;
}

-/**
- * ata_qc_free - free unused ata_queued_cmd
- * @qc: Command to complete
- *
- * Designed to free unused ata_queued_cmd object
- * in case something prevents using it.
- *
- * LOCKING:
- * spin_lock_irqsave(host lock)
- */
-void ata_qc_free(struct ata_queued_cmd *qc)
-{
- struct ata_port *ap = qc->ap;
- unsigned int tag;
-
- WARN_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
-
- qc->flags = 0;
- tag = qc->tag;
- if (likely(ata_tag_valid(tag))) {
- qc->tag = ATA_TAG_POISON;
- clear_bit(tag, &ap->qc_allocated);
- }
-}
-
void __ata_qc_complete(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 5d312dc..d5b9b72 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -708,7 +708,7 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
{
struct ata_queued_cmd *qc;

- qc = ata_qc_new_init(dev);
+ qc = ata_qc_new_init(dev, cmd->request->tag);
if (qc) {
qc->scsicmd = cmd;
qc->scsidone = done;
@@ -1103,7 +1103,8 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,

depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id));
depth = min(ATA_MAX_QUEUE - 1, depth);
- scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
+ scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
+ scsi_activate_tcq(sdev, depth);
}

return 0;
@@ -1943,6 +1944,11 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
hdr[1] |= (1 << 7);

memcpy(rbuf, hdr, sizeof(hdr));
+
+ /* if ncq, set tags supported */
+ if (ata_id_has_ncq(args->id))
+ rbuf[7] |= (1 << 1);
+
memcpy(&rbuf[8], "ATA ", 8);
ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16);
ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index fe2839e..d3831d3 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -74,7 +74,7 @@ extern struct ata_link *ata_dev_phys_link(struct ata_device *dev);
extern void ata_force_cbl(struct ata_port *ap);
extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
-extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev);
+extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
u64 block, u32 n_block, unsigned int tf_flags,
unsigned int tag);
@@ -103,7 +103,6 @@ extern int ata_dev_configure(struct ata_device *dev);
extern int sata_down_spd_limit(struct ata_link *link);
extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel);
extern void ata_sg_clean(struct ata_queued_cmd *qc);
-extern void ata_qc_free(struct ata_queued_cmd *qc);
extern void ata_qc_issue(struct ata_queued_cmd *qc);
extern void __ata_qc_complete(struct ata_queued_cmd *qc);
extern int atapi_check_dma(struct ata_queued_cmd *qc);
@@ -119,6 +118,22 @@ extern struct ata_port *ata_port_alloc(struct ata_host *host);
extern void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy);
extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm);

+/**
+ * ata_qc_free - free unused ata_queued_cmd
+ * @qc: Command to complete
+ *
+ * Designed to free unused ata_queued_cmd object
+ * in case something prevents using it.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ */
+static inline void ata_qc_free(struct ata_queued_cmd *qc)
+{
+ qc->flags = 0;
+ qc->tag = ATA_TAG_POISON;
+}
+
/* libata-acpi.c */
#ifdef CONFIG_ATA_ACPI
extern void ata_acpi_associate_sata_port(struct ata_port *ap);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 947cf84..53ad28f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -692,7 +692,6 @@ struct ata_port {
unsigned int cbl; /* cable type; ATA_CBL_xxx */

struct ata_queued_cmd qcmd[ATA_MAX_QUEUE];
- unsigned long qc_allocated;
unsigned int qc_active;
int nr_active_links; /* #links with active qcs */

--
1.6.0.2.588.g3102

2008-10-22 18:24:20

by Elias Oltmanns

[permalink] [raw]
Subject: Re: [PATCH 1/2] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple()

Jens Axboe <[email protected]> wrote:
> We very rarely (if ever) complete more than one command in the
> sactive mask at the time, even for extremely high IO rates. So
> looping over the entire range of possible tags is pointless,
> instead use __ffs() to just find the completed tags directly.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> drivers/ata/libata-core.c | 15 +++++++++------
> 1 files changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index 1ee9499..c3c53e7 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -4799,9 +4799,9 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
> */
> int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
> {
> + unsigned int i = 0;
> int nr_done = 0;
> u32 done_mask;
> - int i;
>
> done_mask = ap->qc_active ^ qc_active;
>
> @@ -4811,16 +4811,19 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
> return -EINVAL;
> }
>
> - for (i = 0; i < ATA_MAX_QUEUE; i++) {
> + while (done_mask) {
> struct ata_queued_cmd *qc;
> + unsigned int next = __ffs(done_mask);
>
> - if (!(done_mask & (1 << i)))
> - continue;
> -
> - if ((qc = ata_qc_from_tag(ap, i))) {
> + qc = ata_qc_from_tag(ap, i + next);
> + if (qc) {
> ata_qc_complete(qc);
> nr_done++;
> }
> + if (++next >= ATA_MAX_QUEUE)
> + break;

If you think about it, this statement is equivalent to

if (ap->qc_active ^ qc_active == (1 << (ATA_MAX_QUEUE - 1)))

To fix this, you could say

if (++next + i >= ATA_MAX_QUEUE)

but perhaps it would be even more efficient (or not much worse) to skip
this check entirely.

Regards,

Elias

2008-10-23 00:46:53

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH 2/2] libata: switch to using block layer tagging support

Jens Axboe wrote:
> libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
> a free tag to use. Instead of fixing that up, convert libata to
> using block layer tagging - gets rid of code in libata, and is also
> much faster.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> drivers/ata/libata-core.c | 66 ++++----------------------------------------
> drivers/ata/libata-scsi.c | 10 +++++-
> drivers/ata/libata.h | 19 +++++++++++-
> include/linux/libata.h | 1 -
> 4 files changed, 31 insertions(+), 65 deletions(-)

Just to be sure, I take it this patch series is for 2.6.29?

Seems nice to have, but since it wasn't ready for the merge window
opening...

Jeff


2008-10-23 03:42:06

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH 2/2] libata: switch to using block layer tagging support

Jens Axboe wrote:
> libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
> a free tag to use. Instead of fixing that up, convert libata to
> using block layer tagging - gets rid of code in libata, and is also
> much faster.
>
> Signed-off-by: Jens Axboe <[email protected]>

Acked-by: Tejun Heo <[email protected]>

Thanks.

--
tejun

2008-10-23 04:14:50

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH 1/2] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple()

Jens Axboe wrote:
> @@ -4811,16 +4811,19 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
> return -EINVAL;
> }
>
> - for (i = 0; i < ATA_MAX_QUEUE; i++) {
> + while (done_mask) {
> struct ata_queued_cmd *qc;
> + unsigned int next = __ffs(done_mask);
>
> - if (!(done_mask & (1 << i)))
> - continue;
> -
> - if ((qc = ata_qc_from_tag(ap, i))) {
> + qc = ata_qc_from_tag(ap, i + next);
> + if (qc) {
> ata_qc_complete(qc);
> nr_done++;
> }
> + if (++next >= ATA_MAX_QUEUE)
> + break;
> + i += next;
> + done_mask >>= next;

Shouldn't this be...

i += next + 1;
if (i >= ATA_MAX_QUEUE)
break;

or better...

while (done_mask) {
struct ata_queued_cmd *qc;
unsigned int next = __ffs(done_mask);

tag += next;
if ((qc = ata_qc_from_tag(ap, tag))) {
ata_qc_complete(qc);
nr_done++;
}
next++;
tag += next;
done_mask >>= next;
}

done_mask is guaranteed to be zero at when tag reaches ATA_MAX_QUEUE.

Thanks.

--
tejun

2008-10-23 06:34:58

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 2/2] libata: switch to using block layer tagging support

On Wed, Oct 22 2008, Jeff Garzik wrote:
> Jens Axboe wrote:
> >libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
> >a free tag to use. Instead of fixing that up, convert libata to
> >using block layer tagging - gets rid of code in libata, and is also
> >much faster.
> >
> >Signed-off-by: Jens Axboe <[email protected]>
> >---
> > drivers/ata/libata-core.c | 66
> > ++++----------------------------------------
> > drivers/ata/libata-scsi.c | 10 +++++-
> > drivers/ata/libata.h | 19 +++++++++++-
> > include/linux/libata.h | 1 -
> > 4 files changed, 31 insertions(+), 65 deletions(-)
>
> Just to be sure, I take it this patch series is for 2.6.29?

Sure yes, it was written and tested just the other day :-)

> Seems nice to have, but since it wasn't ready for the merge window
> opening...

Indeed, but it can certainly wait for 2.6.29.

--
Jens Axboe

2008-10-23 06:38:27

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 1/2] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple()

On Thu, Oct 23 2008, Tejun Heo wrote:
> Jens Axboe wrote:
> > @@ -4811,16 +4811,19 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
> > return -EINVAL;
> > }
> >
> > - for (i = 0; i < ATA_MAX_QUEUE; i++) {
> > + while (done_mask) {
> > struct ata_queued_cmd *qc;
> > + unsigned int next = __ffs(done_mask);
> >
> > - if (!(done_mask & (1 << i)))
> > - continue;
> > -
> > - if ((qc = ata_qc_from_tag(ap, i))) {
> > + qc = ata_qc_from_tag(ap, i + next);
> > + if (qc) {
> > ata_qc_complete(qc);
> > nr_done++;
> > }
> > + if (++next >= ATA_MAX_QUEUE)
> > + break;
> > + i += next;
> > + done_mask >>= next;
>
> Shouldn't this be...
>
> i += next + 1;
> if (i >= ATA_MAX_QUEUE)
> break;
>
> or better...
>
> while (done_mask) {
> struct ata_queued_cmd *qc;
> unsigned int next = __ffs(done_mask);
>
> tag += next;
> if ((qc = ata_qc_from_tag(ap, tag))) {
> ata_qc_complete(qc);
> nr_done++;
> }
> next++;
> tag += next;
> done_mask >>= next;
> }
>
> done_mask is guaranteed to be zero at when tag reaches ATA_MAX_QUEUE.

That does indeed look a lot cleaner. I think it was rewritten at some
point and kept some of the logic for not passing 0 into __ffs, but it's
clearly pointless now.

I'll send out a revised patch when it's tested.

--
Jens Axboe

2008-10-23 06:45:26

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 1/2] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple()

On Thu, Oct 23 2008, Tejun Heo wrote:
> while (done_mask) {
> struct ata_queued_cmd *qc;
> unsigned int next = __ffs(done_mask);
>
> tag += next;
> if ((qc = ata_qc_from_tag(ap, tag))) {
> ata_qc_complete(qc);
> nr_done++;
> }
> next++;
> tag += next;
> done_mask >>= next;
> }

That doesn't work (you're adding next to tag twice), it needs a little
tweak:

while (done_mask) {
struct ata_queued_cmd *qc;
unsigned int next = __ffs(done_mask);

if ((qc = ata_qc_from_tag(ap, tag + next))) {
ata_qc_complete(qc);
nr_done++;
}
next++;
tag += next;
done_mask >>= next;
}

and I think it should work. Not tested yet :-)

--
Jens Axboe

2008-10-23 08:24:58

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 1/2] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple()

On Wed, Oct 22 2008, Elias Oltmanns wrote:
> Jens Axboe <[email protected]> wrote:
> > We very rarely (if ever) complete more than one command in the
> > sactive mask at the time, even for extremely high IO rates. So
> > looping over the entire range of possible tags is pointless,
> > instead use __ffs() to just find the completed tags directly.
> >
> > Signed-off-by: Jens Axboe <[email protected]>
> > ---
> > drivers/ata/libata-core.c | 15 +++++++++------
> > 1 files changed, 9 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> > index 1ee9499..c3c53e7 100644
> > --- a/drivers/ata/libata-core.c
> > +++ b/drivers/ata/libata-core.c
> > @@ -4799,9 +4799,9 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
> > */
> > int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
> > {
> > + unsigned int i = 0;
> > int nr_done = 0;
> > u32 done_mask;
> > - int i;
> >
> > done_mask = ap->qc_active ^ qc_active;
> >
> > @@ -4811,16 +4811,19 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
> > return -EINVAL;
> > }
> >
> > - for (i = 0; i < ATA_MAX_QUEUE; i++) {
> > + while (done_mask) {
> > struct ata_queued_cmd *qc;
> > + unsigned int next = __ffs(done_mask);
> >
> > - if (!(done_mask & (1 << i)))
> > - continue;
> > -
> > - if ((qc = ata_qc_from_tag(ap, i))) {
> > + qc = ata_qc_from_tag(ap, i + next);
> > + if (qc) {
> > ata_qc_complete(qc);
> > nr_done++;
> > }
> > + if (++next >= ATA_MAX_QUEUE)
> > + break;
>
> If you think about it, this statement is equivalent to
>
> if (ap->qc_active ^ qc_active == (1 << (ATA_MAX_QUEUE - 1)))
>
> To fix this, you could say
>
> if (++next + i >= ATA_MAX_QUEUE)
>
> but perhaps it would be even more efficient (or not much worse) to skip
> this check entirely.

Yeah, the check should just be killed, that's the version I posted in
the reply to Tejun as well.

--
Jens Axboe

2008-10-23 13:42:43

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 1/2] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple()

On Thu, Oct 23 2008, Jens Axboe wrote:
> On Thu, Oct 23 2008, Tejun Heo wrote:
> > while (done_mask) {
> > struct ata_queued_cmd *qc;
> > unsigned int next = __ffs(done_mask);
> >
> > tag += next;
> > if ((qc = ata_qc_from_tag(ap, tag))) {
> > ata_qc_complete(qc);
> > nr_done++;
> > }
> > next++;
> > tag += next;
> > done_mask >>= next;
> > }
>
> That doesn't work (you're adding next to tag twice), it needs a little
> tweak:
>
> while (done_mask) {
> struct ata_queued_cmd *qc;
> unsigned int next = __ffs(done_mask);
>
> if ((qc = ata_qc_from_tag(ap, tag + next))) {
> ata_qc_complete(qc);
> nr_done++;
> }
> next++;
> tag += next;
> done_mask >>= next;
> }
>
> and I think it should work. Not tested yet :-)

Pondered some more, and it can't work. The problem is that if we
complete tag 31, we attempt to shift done_mask down by 32 bits. On a
32-bit arch, that's not defined. So we DO need a check like the existing
one, or something similar.

So I don't think we need to make changes to this patch either, at least
unless one of you can come up with a better check that avoids a branch.

--
Jens Axboe

2008-10-23 15:19:51

by Elias Oltmanns

[permalink] [raw]
Subject: Re: [PATCH 1/2] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple()

Jens Axboe <[email protected]> wrote:
> On Thu, Oct 23 2008, Jens Axboe wrote:
>> On Thu, Oct 23 2008, Tejun Heo wrote:
>
>> > while (done_mask) {
>> > struct ata_queued_cmd *qc;
>> > unsigned int next = __ffs(done_mask);
>> >
>> > tag += next;
>> > if ((qc = ata_qc_from_tag(ap, tag))) {
>> > ata_qc_complete(qc);
>> > nr_done++;
>> > }
>> > next++;
>> > tag += next;
>> > done_mask >>= next;
>> > }
>>
>> That doesn't work (you're adding next to tag twice), it needs a little
>> tweak:
>>
>> while (done_mask) {
>> struct ata_queued_cmd *qc;
>> unsigned int next = __ffs(done_mask);
>>
>> if ((qc = ata_qc_from_tag(ap, tag + next))) {
>> ata_qc_complete(qc);
>> nr_done++;
>> }
>> next++;
>> tag += next;
>> done_mask >>= next;
>> }
>>
>> and I think it should work. Not tested yet :-)
>
> Pondered some more, and it can't work. The problem is that if we
> complete tag 31, we attempt to shift done_mask down by 32 bits. On a
> 32-bit arch, that's not defined. So we DO need a check like the existing
> one, or something similar.
>
> So I don't think we need to make changes to this patch either, at least
> unless one of you can come up with a better check that avoids a branch.

What about a switch outside the while loop:

if (done_mask == ATA_MAX_QUEUE >> 1) {
if ((qc = ata_qc_from_tag(ap, ATA_MAX_QUEUE >> 1))) {
ata_qc_complete(qc);
nr_done = 1;
}
} else
while (done_mask)
...

Alternatively, you could just alter tag and done_mask (tag =
ATA_MAX_QUEUE >> 2, done_mask = 2) and enter the while loop
unconditionally. But then, you claimed that there will hardly ever be
more than one command to complete, so my suggestions will probably not
improve anything in real life.

Regards,

Elias

2008-10-24 07:09:45

by Paul Mundt

[permalink] [raw]
Subject: Re: [PATCH 2/2] libata: switch to using block layer tagging support

On Wed, Oct 22, 2008 at 09:40:43AM +0200, Jens Axboe wrote:
> libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
> a free tag to use. Instead of fixing that up, convert libata to
> using block layer tagging - gets rid of code in libata, and is also
> much faster.
>
> Signed-off-by: Jens Axboe <[email protected]>

Unfortunately this change breaks SATA for me, bisecting picked out this
commit especially.

Before:

sata_sil 0000:00:01.0: Applying R_ERR on DMA activate FIS errata fix
scsi0 : sata_sil
scsi1 : sata_sil
ata1: SATA max UDMA/100 mmio m512@0xfd000200 tf 0xfd000280 irq 66
ata2: SATA max UDMA/100 mmio m512@0xfd000200 tf 0xfd0002c0 irq 66
ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 310)
ata1.00: ATA-7: SAMSUNG HM080JI, YC100-02, max UDMA7
ata1.00: 156368016 sectors, multi 0: LBA48 NCQ (depth 0/32)
ata1.00: configured for UDMA/100
ata2: SATA link down (SStatus 0 SControl 310)
scsi 0:0:0:0: Direct-Access ATA SAMSUNG HM080JI YC10 PQ: 0 ANSI: 5
sd 0:0:0:0: [sda] 156368016 512-byte hardware sectors: (80.0 GB/74.5 GiB)
sd 0:0:0:0: [sda] Write Protect is off
sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
sd 0:0:0:0: [sda] 156368016 512-byte hardware sectors: (80.0 GB/74.5 GiB)
sd 0:0:0:0: [sda] Write Protect is off
sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
sda: sda1 sda2
sd 0:0:0:0: [sda] Attached SCSI disk

After:

sata_sil 0000:00:01.0: Applying R_ERR on DMA activate FIS errata fix
scsi0 : sata_sil
scsi1 : sata_sil
ata1: SATA max UDMA/100 mmio m512@0xfd000200 tf 0xfd000280 irq 66
ata2: SATA max UDMA/100 mmio m512@0xfd000200 tf 0xfd0002c0 irq 66
ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 310)
ata1.00: ATA-7: SAMSUNG HM080JI, YC100-02, max UDMA7
ata1.00: 156368016 sectors, multi 0: LBA48 NCQ (depth 0/32)
ata1.00: configured for UDMA/100
ata2: SATA link down (SStatus 0 SControl 310)
scsi 0:0:0:0: Direct-Access ATA SAMSUNG HM080JI YC10 PQ: 0 ANSI: 5
sd 0:0:0:0: [sda] 156368016 512-byte hardware sectors: (80.0 GB/74.5 GiB)
sd 0:0:0:0: [sda] Write Protect is off
sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
sd 0:0:0:0: [sda] 156368016 512-byte hardware sectors: (80.0 GB/74.5 GiB)
sd 0:0:0:0: [sda] Write Protect is off
sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
sda:

Where it hangs until it times out after 180 seconds.

Built with:

CONFIG_ATA=y
CONFIG_SATA_PMP=y
CONFIG_ATA_SFF=y
CONFIG_SATA_SIL=y

I have no idea where to start looking at this, so hopefully someone has some
suggestions :-)

2008-10-24 08:16:12

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 1/2] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple()

On Thu, Oct 23 2008, Elias Oltmanns wrote:
> Jens Axboe <[email protected]> wrote:
> > On Thu, Oct 23 2008, Jens Axboe wrote:
> >> On Thu, Oct 23 2008, Tejun Heo wrote:
> >
> >> > while (done_mask) {
> >> > struct ata_queued_cmd *qc;
> >> > unsigned int next = __ffs(done_mask);
> >> >
> >> > tag += next;
> >> > if ((qc = ata_qc_from_tag(ap, tag))) {
> >> > ata_qc_complete(qc);
> >> > nr_done++;
> >> > }
> >> > next++;
> >> > tag += next;
> >> > done_mask >>= next;
> >> > }
> >>
> >> That doesn't work (you're adding next to tag twice), it needs a little
> >> tweak:
> >>
> >> while (done_mask) {
> >> struct ata_queued_cmd *qc;
> >> unsigned int next = __ffs(done_mask);
> >>
> >> if ((qc = ata_qc_from_tag(ap, tag + next))) {
> >> ata_qc_complete(qc);
> >> nr_done++;
> >> }
> >> next++;
> >> tag += next;
> >> done_mask >>= next;
> >> }
> >>
> >> and I think it should work. Not tested yet :-)
> >
> > Pondered some more, and it can't work. The problem is that if we
> > complete tag 31, we attempt to shift done_mask down by 32 bits. On a
> > 32-bit arch, that's not defined. So we DO need a check like the existing
> > one, or something similar.
> >
> > So I don't think we need to make changes to this patch either, at least
> > unless one of you can come up with a better check that avoids a branch.
>
> What about a switch outside the while loop:
>
> if (done_mask == ATA_MAX_QUEUE >> 1) {
> if ((qc = ata_qc_from_tag(ap, ATA_MAX_QUEUE >> 1))) {
> ata_qc_complete(qc);
> nr_done = 1;
> }
> } else
> while (done_mask)
> ...
>
> Alternatively, you could just alter tag and done_mask (tag =
> ATA_MAX_QUEUE >> 2, done_mask = 2) and enter the while loop
> unconditionally. But then, you claimed that there will hardly ever be
> more than one command to complete, so my suggestions will probably not
> improve anything in real life.

Honestly, I think the current check is a lot cleaner then.

--
Jens Axboe