2007-05-19 05:35:44

by Jeff Garzik

[permalink] [raw]
Subject: [PATCH] sata_sil: Greatly improve DMA support


Since Alan expressed a desire to see Large Block Transfer (LBT) support
in pata_sil680, I though I would re-post my patch for adding LBT support
to sata_sil.

Silicon Image's Large Block Transfer (LBT) support is a vendor-specific
DMA scatter/gather engine, which enables 64-bit DMA addresses (where
supported by platform) and eliminates the annoying 64k DMA boundary
found in legacy PCI IDE BMDMA engines.

This hardware is open hardware. Docs for both PATA and SATA can be
found at http://gkernel.sourceforge.net/specs/sii/

LBT support works the same way in both pata_sil680 and sata_sil, so
maybe a motivated individual could take use this patch as an inspiration
for doing the same thing with pata_sil680.

NOTE: This patch works for me on x86 and x86-64. However, other
testers reported problems, which is why it is not upstream.

These changes can be found in the 'sii-lbt' branch of
git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik/libata-dev.git

commit cde8d26737acdb5d8b00f6f0952a0241863ca5fa
[libata sii-lbt] sata_sil: warning fix

commit 22c63ef4beb7c0a09ecfa61b4d3a884cb4eadfa0
[libata] sata_sil sii-lbt: iomap build fix

commit b2a4d29570403f611689e786372e8a4afbdf8891
Author: Jeff Garzik <[email protected]>
Date: Fri Dec 1 22:58:31 2006 -0500

[libata sata_sil] Greatly improve DMA handling

311x hardware includes a vendor-specific scatter/gather format which
eliminates the traditional 64k boundary limit found in PCI IDE DMA.

Since implementing this feature required custom implementations of the
bmdma_xxx hooks, I took the opportunity to eliminate a few unnecessary
MMIO register reads/writes.

Signed-off-by: Jeff Garzik <[email protected]>

drivers/ata/sata_sil.c | 111 ++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 101 insertions(+), 10 deletions(-)

diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index e8483aa..c44c398 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -46,7 +46,11 @@
#include <linux/libata.h>

#define DRV_NAME "sata_sil"
-#define DRV_VERSION "2.2"
+#define DRV_VERSION "2.2lbt"
+
+enum {
+ SIL_DMA_BOUNDARY = 0xffffffffU,
+};

enum {
SIL_MMIO_BAR = 5,
@@ -118,6 +122,11 @@ static void sil_dev_config(struct ata_device *dev);
static u32 sil_scr_read (struct ata_port *ap, unsigned int sc_reg);
static void sil_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
static int sil_set_mode (struct ata_port *ap, struct ata_device **r_failed);
+static void sil_qc_prep(struct ata_queued_cmd *qc);
+static void sil_bmdma_setup (struct ata_queued_cmd *qc);
+static void sil_bmdma_start (struct ata_queued_cmd *qc);
+static void sil_bmdma_stop(struct ata_queued_cmd *qc);
+static irqreturn_t sil_interrupt(int irq, void *dev_instance);
static void sil_freeze(struct ata_port *ap);
static void sil_thaw(struct ata_port *ap);

@@ -173,12 +182,12 @@ static struct scsi_host_template sil_sht = {
.queuecommand = ata_scsi_queuecmd,
.can_queue = ATA_DEF_QUEUE,
.this_id = ATA_SHT_THIS_ID,
- .sg_tablesize = LIBATA_MAX_PRD,
+ .sg_tablesize = ATA_MAX_PRD,
.cmd_per_lun = ATA_SHT_CMD_PER_LUN,
.emulated = ATA_SHT_EMULATED,
.use_clustering = ATA_SHT_USE_CLUSTERING,
.proc_name = DRV_NAME,
- .dma_boundary = ATA_DMA_BOUNDARY,
+ .dma_boundary = SIL_DMA_BOUNDARY,
.slave_configure = ata_scsi_slave_config,
.slave_destroy = ata_scsi_slave_destroy,
.bios_param = ata_std_bios_param,
@@ -193,11 +202,11 @@ static const struct ata_port_operations sil_ops = {
.exec_command = ata_exec_command,
.dev_select = ata_std_dev_select,
.set_mode = sil_set_mode,
- .bmdma_setup = ata_bmdma_setup,
- .bmdma_start = ata_bmdma_start,
- .bmdma_stop = ata_bmdma_stop,
+ .bmdma_setup = sil_bmdma_setup,
+ .bmdma_start = sil_bmdma_start,
+ .bmdma_stop = sil_bmdma_stop,
.bmdma_status = ata_bmdma_status,
- .qc_prep = ata_qc_prep,
+ .qc_prep = sil_qc_prep,
.qc_issue = ata_qc_issue_prot,
.data_xfer = ata_data_xfer,
.freeze = sil_freeze,
@@ -262,8 +271,9 @@ static const struct {
unsigned long sfis_cfg; /* SATA FIS reception config register */
} sil_port[] = {
/* port 0 ... */
- { 0x80, 0x8A, 0x00, 0x10, 0x40, 0x100, 0x148, 0xb4, 0x14c },
- { 0xC0, 0xCA, 0x08, 0x18, 0x44, 0x180, 0x1c8, 0xf4, 0x1cc },
+ /* tf ctl bmdma bmdma2 fifo scr sien mode sfis */
+ { 0x80, 0x8A, 0x0, 0x10, 0x40, 0x100, 0x148, 0xb4, 0x14c },
+ { 0xC0, 0xCA, 0x8, 0x18, 0x44, 0x180, 0x1c8, 0xf4, 0x1cc },
{ 0x280, 0x28A, 0x200, 0x210, 0x240, 0x300, 0x348, 0x2b4, 0x34c },
{ 0x2C0, 0x2CA, 0x208, 0x218, 0x244, 0x380, 0x3c8, 0x2f4, 0x3cc },
/* ... port 3 */
@@ -280,6 +290,87 @@ module_param(slow_down, int, 0444);
MODULE_PARM_DESC(slow_down, "Sledgehammer used to work around random problems, by limiting commands to 15 sectors (0=off, 1=on)");


+static void sil_bmdma_stop(struct ata_queued_cmd *qc)
+{
+ struct ata_port *ap = qc->ap;
+ void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr;
+ u32 val;
+
+ /* clear start/stop bit */
+ if (ap->port_no == 2)
+ val = SIL_INTR_STEERING;
+ else
+ val = 0;
+ writeb(val, mmio + ATA_DMA_CMD);
+
+ /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
+ ata_altstatus(ap); /* dummy read */
+}
+
+static void sil_bmdma_setup (struct ata_queued_cmd *qc)
+{
+ struct ata_port *ap = qc->ap;
+ void __iomem *mmio_base = ap->host->iomap[SIL_MMIO_BAR];
+ void __iomem *mmio;
+
+ mmio = mmio_base + sil_port[ap->port_no].bmdma;
+
+ /* load PRD table addr. */
+ mb(); /* make sure PRD table writes are visible to controller */
+ writel(ap->prd_dma, mmio + ATA_DMA_TABLE_OFS);
+
+ /* issue r/w command */
+ ata_exec_command(ap, &qc->tf);
+}
+
+static void sil_bmdma_start (struct ata_queued_cmd *qc)
+{
+ unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE);
+ struct ata_port *ap = qc->ap;
+ void __iomem *mmio_base = ap->host->iomap[SIL_MMIO_BAR];
+ void __iomem *mmio;
+ u8 dmactl;
+
+ mmio = mmio_base + sil_port[ap->port_no].bmdma2;
+
+ /* set transfer direction, start host DMA transaction */
+ dmactl = readb(mmio + ATA_DMA_CMD);
+ dmactl &= ~ATA_DMA_WR;
+ if (!rw)
+ dmactl |= ATA_DMA_WR;
+ writeb(dmactl | ATA_DMA_START, mmio + ATA_DMA_CMD);
+}
+
+/* The way God intended PCI IDE scatter/gather lists to look and behave... */
+static inline void sil_fill_sg(struct ata_queued_cmd *qc)
+{
+ struct scatterlist *sg;
+ struct ata_port *ap = qc->ap;
+ struct ata_prd *prd;
+
+ prd = &ap->prd[0];
+ ata_for_each_sg(sg, qc) {
+ u32 addr = sg_dma_address(sg);
+ u32 sg_len = sg_dma_len(sg);
+
+ prd->addr = cpu_to_le32(addr);
+ prd->flags_len = cpu_to_le32(sg_len);
+
+ if (ata_sg_is_last(sg, qc))
+ prd->flags_len |= cpu_to_le32(ATA_PRD_EOT);
+
+ prd++;
+ }
+}
+
+static void sil_qc_prep(struct ata_queued_cmd *qc)
+{
+ if (!(qc->flags & ATA_QCFLAG_DMAMAP))
+ return;
+
+ sil_fill_sg(qc);
+}
+
static unsigned char sil_get_device_cache_line(struct pci_dev *pdev)
{
u8 cache_line = 0;
@@ -683,7 +774,7 @@ static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
ioaddr->cmd_addr = mmio_base + sil_port[i].tf;
ioaddr->altstatus_addr =
ioaddr->ctl_addr = mmio_base + sil_port[i].ctl;
- ioaddr->bmdma_addr = mmio_base + sil_port[i].bmdma;
+ ioaddr->bmdma_addr = mmio_base + sil_port[i].bmdma2;
ioaddr->scr_addr = mmio_base + sil_port[i].scr;
ata_std_ports(ioaddr);
}


2007-05-19 16:20:41

by Indan Zupancic

[permalink] [raw]
Subject: Re: [PATCH] sata_sil: Greatly improve DMA support

On Sat, May 19, 2007 07:35, Jeff Garzik wrote:
>
> Since Alan expressed a desire to see Large Block Transfer (LBT) support
> in pata_sil680, I though I would re-post my patch for adding LBT support
> to sata_sil.
>
> Silicon Image's Large Block Transfer (LBT) support is a vendor-specific
> DMA scatter/gather engine, which enables 64-bit DMA addresses (where
> supported by platform) and eliminates the annoying 64k DMA boundary
> found in legacy PCI IDE BMDMA engines.
>
> This hardware is open hardware. Docs for both PATA and SATA can be
> found at http://gkernel.sourceforge.net/specs/sii/
>
> LBT support works the same way in both pata_sil680 and sata_sil, so
> maybe a motivated individual could take use this patch as an inspiration
> for doing the same thing with pata_sil680.
>
> NOTE: This patch works for me on x86 and x86-64. However, other
> testers reported problems, which is why it is not upstream.

This patch seems to work with my SiI 3512, though I don't notice any
difference, neither a speedup, nor a slowdown. Hdparm gives the same
speeds (-tT), and cp -a'ing kernel sources is abysmal slow in both cases,
(need to look into that one) so I didn't really test it that well.

Greetings,

Indan


2007-05-19 17:02:18

by Robert Hancock

[permalink] [raw]
Subject: Re: [PATCH] sata_sil: Greatly improve DMA support

Jeff Garzik wrote:
> Since Alan expressed a desire to see Large Block Transfer (LBT) support
> in pata_sil680, I though I would re-post my patch for adding LBT support
> to sata_sil.
>
> Silicon Image's Large Block Transfer (LBT) support is a vendor-specific
> DMA scatter/gather engine, which enables 64-bit DMA addresses (where
> supported by platform) and eliminates the annoying 64k DMA boundary
> found in legacy PCI IDE BMDMA engines.

Looks like it doesn't allow 64-bit DMA addresses, it only gets rid of
the 64K boundary limitation.

--
Robert Hancock Saskatoon, SK, Canada
To email, remove "nospam" from [email protected]
Home Page: http://www.roberthancock.com/

2007-05-19 21:27:24

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] sata_sil: Greatly improve DMA support

Robert Hancock wrote:
> Jeff Garzik wrote:
>> Since Alan expressed a desire to see Large Block Transfer (LBT) support
>> in pata_sil680, I though I would re-post my patch for adding LBT support
>> to sata_sil.
>>
>> Silicon Image's Large Block Transfer (LBT) support is a vendor-specific
>> DMA scatter/gather engine, which enables 64-bit DMA addresses (where
>> supported by platform) and eliminates the annoying 64k DMA boundary
>> found in legacy PCI IDE BMDMA engines.
>
> Looks like it doesn't allow 64-bit DMA addresses, it only gets rid of
> the 64K boundary limitation.

rotfl. Boy am I dumb. I -thought- my patch, written months ago,
included that bit. But obviously it does not. Let's add that...

Jeff



2007-05-19 21:31:21

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] sata_sil: Greatly improve DMA support

Jeff Garzik wrote:
> rotfl. Boy am I dumb. I -thought- my patch, written months ago,
> included that bit. But obviously it does not. Let's add that...

Or not. It just gets rid of the 64k boundary -and- allows for larger
scatter/gather entries. LBT does not add 64-bit DMA support.

That answers that :) Thanks for poking me to take a trip down memory
lane...

Jeff


2007-05-19 22:03:56

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] sata_sil: Greatly improve DMA support

Indan Zupancic wrote:
> This patch seems to work with my SiI 3512, though I don't notice any
> difference, neither a speedup, nor a slowdown. Hdparm gives the same
> speeds (-tT), and cp -a'ing kernel sources is abysmal slow in both cases,
> (need to look into that one) so I didn't really test it that well.


It won't result in much of a speedup, except in situations where IOMMU
or other situation that causes you to run into the 64k boundary being an
issue -- generally only on huge transfers.

A good measure is to dd(1) to/from the block device, rather than using a
filesystem. As has been shown on LKML, the filesystem can really slow
things down in some cases.

Jeff


2007-05-19 23:22:28

by Indan Zupancic

[permalink] [raw]
Subject: Re: [PATCH] sata_sil: Greatly improve DMA support

On Sun, May 20, 2007 00:03, Jeff Garzik wrote:
> Indan Zupancic wrote:
>> This patch seems to work with my SiI 3512, though I don't notice any
>> difference, neither a speedup, nor a slowdown. Hdparm gives the same
>> speeds (-tT), and cp -a'ing kernel sources is abysmal slow in both cases,
>> (need to look into that one) so I didn't really test it that well.
>
>
> It won't result in much of a speedup, except in situations where IOMMU
> or other situation that causes you to run into the 64k boundary being an
> issue -- generally only on huge transfers.
>
> A good measure is to dd(1) to/from the block device, rather than using a
> filesystem. As has been shown on LKML, the filesystem can really slow
> things down in some cases.

I didn't really expect a speedup, it's more that I've no regression to report.

I could benchmark the patch more thoroughly, but right now I'm more worried
about the crawling cp I just discovered. Talking about filesystems slowing down
things...

Test:

$ cp -a linux-2.6/ /tmp/

done on the same ext3 partition. linux-2.6 contains source and git repo only,
I'm compiling stuff with O=../obj.

$ vmstat 10
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
r b swpd free buff cache si so bi bo in cs us sy id wa
0 1 0 4168 3316 195700 0 0 739 494 530 393 15 3 66 16
0 3 4 4120 2040 198196 0 0 14677 14111 1247 435 0 17 0 83
0 1 4 3588 1444 199696 0 0 8892 9472 1362 438 0 12 0 88
1 0 4 3772 4228 196012 0 0 764 454 1161 345 0 4 0 96
0 1 4 3548 6156 193088 0 0 793 851 1158 340 0 4 0 96
0 1 4 3852 7608 189096 0 0 798 523 1160 474 1 4 0 95
1 1 4 3612 8684 186048 0 0 1244 864 1178 430 2 5 0 93
0 1 4 90660 9308 96396 0 0 853 906 1244 578 7 6 0 87
0 1 4 72280 9816 112368 0 0 830 854 1278 429 12 5 0 83
1 0 4 52488 10296 130560 0 0 935 861 1178 418 1 6 0 94
0 1 4 30500 10788 149776 0 0 977 858 1178 371 0 6 0 94
0 1 4 9792 11244 167856 0 0 918 1394 1182 350 1 5 0 94
0 1 4 4016 11216 172504 0 0 1017 858 1181 382 1 6 0 94
0 1 4 3660 11484 171484 0 0 966 861 1182 410 1 6 0 94

It never finished, as I had no patience to copy about 900 Mb with this rate.

As it's a git tree, I suppose it's heavily fragmented, but this is still rather
pathetic. Should I blame cp, or is something else wrong? Any ideas how
to figure this one out would be appreciated. Sorry for the off-topicness.

Greetings,

Indan


2007-05-20 10:20:11

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH] sata_sil: Greatly improve DMA support

Indan Zupancic wrote:
> On Sun, May 20, 2007 00:03, Jeff Garzik wrote:
>> Indan Zupancic wrote:
>>> This patch seems to work with my SiI 3512, though I don't notice any
>>> difference, neither a speedup, nor a slowdown. Hdparm gives the same
>>> speeds (-tT), and cp -a'ing kernel sources is abysmal slow in both cases,
>>> (need to look into that one) so I didn't really test it that well.
>>
>> It won't result in much of a speedup, except in situations where IOMMU
>> or other situation that causes you to run into the 64k boundary being an
>> issue -- generally only on huge transfers.
>>
>> A good measure is to dd(1) to/from the block device, rather than using a
>> filesystem. As has been shown on LKML, the filesystem can really slow
>> things down in some cases.
>
> I didn't really expect a speedup, it's more that I've no regression to report.
>
> I could benchmark the patch more thoroughly, but right now I'm more worried
> about the crawling cp I just discovered. Talking about filesystems slowing down
> things...
>
> Test:
>
> $ cp -a linux-2.6/ /tmp/
>
> done on the same ext3 partition. linux-2.6 contains source and git repo only,
> I'm compiling stuff with O=../obj.
>
> $ vmstat 10
> procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
> r b swpd free buff cache si so bi bo in cs us sy id wa
> 0 1 0 4168 3316 195700 0 0 739 494 530 393 15 3 66 16
> 0 3 4 4120 2040 198196 0 0 14677 14111 1247 435 0 17 0 83
> 0 1 4 3588 1444 199696 0 0 8892 9472 1362 438 0 12 0 88
> 1 0 4 3772 4228 196012 0 0 764 454 1161 345 0 4 0 96
> 0 1 4 3548 6156 193088 0 0 793 851 1158 340 0 4 0 96
> 0 1 4 3852 7608 189096 0 0 798 523 1160 474 1 4 0 95
> 1 1 4 3612 8684 186048 0 0 1244 864 1178 430 2 5 0 93
> 0 1 4 90660 9308 96396 0 0 853 906 1244 578 7 6 0 87
> 0 1 4 72280 9816 112368 0 0 830 854 1278 429 12 5 0 83
> 1 0 4 52488 10296 130560 0 0 935 861 1178 418 1 6 0 94
> 0 1 4 30500 10788 149776 0 0 977 858 1178 371 0 6 0 94
> 0 1 4 9792 11244 167856 0 0 918 1394 1182 350 1 5 0 94
> 0 1 4 4016 11216 172504 0 0 1017 858 1181 382 1 6 0 94
> 0 1 4 3660 11484 171484 0 0 966 861 1182 410 1 6 0 94
>
> It never finished, as I had no patience to copy about 900 Mb with this rate.
>
> As it's a git tree, I suppose it's heavily fragmented, but this is still rather
> pathetic. Should I blame cp, or is something else wrong? Any ideas how
> to figure this one out would be appreciated. Sorry for the off-topicness.

Do things improve if you change the io scheduler to deadline?

# echo deadline > /sys/block/sda/queue/scheduler

Also worth looking at is the following bug entry.

http://bugzilla.kernel.org/show_bug.cgi?id=7372

There seems to be weird interaction among the scheduler / VM / IO. The
exact cause is still not verified. :-(

--
tejun

2007-05-20 13:39:04

by Indan Zupancic

[permalink] [raw]
Subject: Re: [PATCH] sata_sil: Greatly improve DMA support

On Sun, May 20, 2007 12:19, Tejun Heo wrote:
> Indan Zupancic wrote:
>> On Sun, May 20, 2007 00:03, Jeff Garzik wrote:
>>> Indan Zupancic wrote:
>>>> This patch seems to work with my SiI 3512, though I don't notice any
>>>> difference, neither a speedup, nor a slowdown. Hdparm gives the same
>>>> speeds (-tT), and cp -a'ing kernel sources is abysmal slow in both cases,
>>>> (need to look into that one) so I didn't really test it that well.
>>>
>>> It won't result in much of a speedup, except in situations where IOMMU
>>> or other situation that causes you to run into the 64k boundary being an
>>> issue -- generally only on huge transfers.
>>>
>>> A good measure is to dd(1) to/from the block device, rather than using a
>>> filesystem. As has been shown on LKML, the filesystem can really slow
>>> things down in some cases.
>>
>> I didn't really expect a speedup, it's more that I've no regression to report.
>>
>> I could benchmark the patch more thoroughly, but right now I'm more worried
>> about the crawling cp I just discovered. Talking about filesystems slowing down
>> things...
>>
>> Test:
>>
>> $ cp -a linux-2.6/ /tmp/
>>
>> done on the same ext3 partition. linux-2.6 contains source and git repo only,
>> I'm compiling stuff with O=../obj.
>>
>> $ vmstat 10
>> procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
>> r b swpd free buff cache si so bi bo in cs us sy id wa
>> 0 1 0 4168 3316 195700 0 0 739 494 530 393 15 3 66 16
>> 0 3 4 4120 2040 198196 0 0 14677 14111 1247 435 0 17 0 83
>> 0 1 4 3588 1444 199696 0 0 8892 9472 1362 438 0 12 0 88
>> 1 0 4 3772 4228 196012 0 0 764 454 1161 345 0 4 0 96
>> 0 1 4 3548 6156 193088 0 0 793 851 1158 340 0 4 0 96
>> 0 1 4 3852 7608 189096 0 0 798 523 1160 474 1 4 0 95
>> 1 1 4 3612 8684 186048 0 0 1244 864 1178 430 2 5 0 93
>> 0 1 4 90660 9308 96396 0 0 853 906 1244 578 7 6 0 87
>> 0 1 4 72280 9816 112368 0 0 830 854 1278 429 12 5 0 83
>> 1 0 4 52488 10296 130560 0 0 935 861 1178 418 1 6 0 94
>> 0 1 4 30500 10788 149776 0 0 977 858 1178 371 0 6 0 94
>> 0 1 4 9792 11244 167856 0 0 918 1394 1182 350 1 5 0 94
>> 0 1 4 4016 11216 172504 0 0 1017 858 1181 382 1 6 0 94
>> 0 1 4 3660 11484 171484 0 0 966 861 1182 410 1 6 0 94
>>
>> It never finished, as I had no patience to copy about 900 Mb with this rate.
>>
>> As it's a git tree, I suppose it's heavily fragmented, but this is still rather
>> pathetic. Should I blame cp, or is something else wrong? Any ideas how
>> to figure this one out would be appreciated. Sorry for the off-topicness.
>
> Do things improve if you change the io scheduler to deadline?
>
> # echo deadline > /sys/block/sda/queue/scheduler

I also tried noop, anticipatory, and now deadline, but it doesn't matter.

> Also worth looking at is the following bug entry.
>
> http://bugzilla.kernel.org/show_bug.cgi?id=7372
>
> There seems to be weird interaction among the scheduler / VM / IO. The
> exact cause is still not verified. :-(

I know, I posted a bugreport too, but for starvation with the anticipatory scheduler.

Anyway, that bug seems unrelated to my case, as they have system unresponsiveness
or other nastiness, while I only have a crawling cp, which I blame on weaknesses
within ext3, a badly designed cp program and very fragmented filesystem. I just need
to verify that, somehow. I'll try with older kernels later.

Greetings,

Indan