Commit 2c941a204070ab32d92d40318a3196a7fb994c00 looks incomplete. The
helper functions like prepare_sg() need to support sg chaining too.
Signed-off-by: FUJITA Tomonori <[email protected]>
---
arch/sparc64/kernel/iommu.c | 1 -
arch/sparc64/kernel/iommu_common.c | 51 +++++++++++++++++++++---------------
arch/sparc64/kernel/iommu_common.h | 1 +
arch/sparc64/kernel/pci_sun4v.c | 1 -
4 files changed, 31 insertions(+), 23 deletions(-)
diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c
index db3ffcf..5d4e96d 100644
--- a/arch/sparc64/kernel/iommu.c
+++ b/arch/sparc64/kernel/iommu.c
@@ -10,7 +10,6 @@
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/errno.h>
-#include <linux/scatterlist.h>
#ifdef CONFIG_PCI
#include <linux/pci.h>
diff --git a/arch/sparc64/kernel/iommu_common.c b/arch/sparc64/kernel/iommu_common.c
index 12c93a3..d7ca900 100644
--- a/arch/sparc64/kernel/iommu_common.c
+++ b/arch/sparc64/kernel/iommu_common.c
@@ -12,18 +12,22 @@
*/
#ifdef VERIFY_SG
-static int verify_lengths(struct scatterlist *sg, int nents, int npages)
+static int verify_lengths(struct scatterlist *sglist, int nents, int npages)
{
int sg_len, dma_len;
int i, pgcount;
+ struct scatterlist *sg;
sg_len = 0;
- for (i = 0; i < nents; i++)
- sg_len += sg[i].length;
+ for_each_sg(sglist, sg, nents, i)
+ sg_len += sg->length;
dma_len = 0;
- for (i = 0; i < nents && sg[i].dma_length; i++)
- dma_len += sg[i].dma_length;
+ for_each_sg(sglist, sg, nents, i) {
+ if (!sg->dma_length)
+ break;
+ dma_len += sg->dma_length;
+ }
if (sg_len != dma_len) {
printk("verify_lengths: Error, different, sg[%d] dma[%d]\n",
@@ -32,13 +36,16 @@ static int verify_lengths(struct scatterlist *sg, int nents, int npages)
}
pgcount = 0;
- for (i = 0; i < nents && sg[i].dma_length; i++) {
+ for_each_sg(sglist, sg, nents, i) {
unsigned long start, end;
- start = sg[i].dma_address;
+ if (!sg->dma_length)
+ break;
+
+ start = sg->dma_address;
start = start & IO_PAGE_MASK;
- end = sg[i].dma_address + sg[i].dma_length;
+ end = sg->dma_address + sg->dma_length;
end = (end + (IO_PAGE_SIZE - 1)) & IO_PAGE_MASK;
pgcount += ((end - start) >> IO_PAGE_SHIFT);
@@ -113,7 +120,7 @@ static int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg,
if (dlen > 0 && ((daddr & ~IO_PAGE_MASK) == 0))
iopte++;
- sg++;
+ sg = sg_next(sg);
if (--nents <= 0)
break;
sgaddr = (unsigned long) (page_address(sg->page) + sg->offset);
@@ -147,7 +154,7 @@ static int verify_maps(struct scatterlist *sg, int nents, iopte_t *iopte)
nents = verify_one_map(dma_sg, &sg, nents, &iopte);
if (nents <= 0)
break;
- dma_sg++;
+ dma_sg = sg_next(dma_sg);
if (dma_sg->dma_length == 0)
break;
}
@@ -169,22 +176,24 @@ static int verify_maps(struct scatterlist *sg, int nents, iopte_t *iopte)
return 0;
}
-void verify_sglist(struct scatterlist *sg, int nents, iopte_t *iopte, int npages)
+void verify_sglist(struct scatterlist *sglist, int nents, iopte_t *iopte, int npages)
{
- if (verify_lengths(sg, nents, npages) < 0 ||
- verify_maps(sg, nents, iopte) < 0) {
+ struct scatterlist *sg;
+
+ if (verify_lengths(sglist, nents, npages) < 0 ||
+ verify_maps(sglist, nents, iopte) < 0) {
int i;
printk("verify_sglist: Crap, messed up mappings, dumping, iodma at ");
- printk("%016lx.\n", sg->dma_address & IO_PAGE_MASK);
+ printk("%016lx.\n", sglist->dma_address & IO_PAGE_MASK);
- for (i = 0; i < nents; i++) {
+ for_each_sg(sglist, sg, nents, i) {
printk("sg(%d): page_addr(%p) off(%x) length(%x) "
- "dma_address[%016lx] dma_length[%016lx]\n",
+ "dma_address[%016x] dma_length[%016x]\n",
i,
- page_address(sg[i].page), sg[i].offset,
- sg[i].length,
- sg[i].dma_address, sg[i].dma_length);
+ page_address(sg->page), sg->offset,
+ sg->length,
+ sg->dma_address, sg->dma_length);
}
}
@@ -205,12 +214,12 @@ unsigned long prepare_sg(struct scatterlist *sg, int nents)
while (--nents) {
unsigned long addr;
- sg++;
+ sg = sg_next(sg);
addr = (unsigned long) (page_address(sg->page) + sg->offset);
if (! VCONTIG(prev, addr)) {
dma_sg->dma_address = dent_addr;
dma_sg->dma_length = dent_len;
- dma_sg++;
+ dma_sg = sg_next(dma_sg);
dent_addr = ((dent_addr +
dent_len +
diff --git a/arch/sparc64/kernel/iommu_common.h b/arch/sparc64/kernel/iommu_common.h
index ad79101..75b5a58 100644
--- a/arch/sparc64/kernel/iommu_common.h
+++ b/arch/sparc64/kernel/iommu_common.h
@@ -8,6 +8,7 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/scatterlist.h>
#include <asm/iommu.h>
#include <asm/scatterlist.h>
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index cacacfa..119f8ef 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -13,7 +13,6 @@
#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/log2.h>
-#include <linux/scatterlist.h>
#include <asm/iommu.h>
#include <asm/irq.h>
--
1.5.2.4
On Wed, Oct 17 2007, FUJITA Tomonori wrote:
> Commit 2c941a204070ab32d92d40318a3196a7fb994c00 looks incomplete. The
> helper functions like prepare_sg() need to support sg chaining too.
Thanks Tomo, applied. I'll get this pushed out later today with any
other sg chaining fallout we may see.
--
Jens Axboe
From: Jens Axboe <[email protected]>
Date: Wed, 17 Oct 2007 09:21:49 +0200
> On Wed, Oct 17 2007, FUJITA Tomonori wrote:
> > Commit 2c941a204070ab32d92d40318a3196a7fb994c00 looks incomplete. The
> > helper functions like prepare_sg() need to support sg chaining too.
>
> Thanks Tomo, applied. I'll get this pushed out later today with any
> other sg chaining fallout we may see.
I'm still debugging crashes on sparc64 early on boot even with this
latest fix applied.
I think there are still problems in functions like fill_sg().
There is a lot of confusion involving loop termination. sg_next()
gives you a NULL after the last entry, but tests have been changed
to compare against sg_last() which is likely not what we want for
those checks.
I'll try to figure it out, just a heads up...
From: David Miller <[email protected]>
Date: Wed, 17 Oct 2007 01:33:25 -0700 (PDT)
> sg_next() gives you a NULL after the last entry, but tests have been
> changed to compare against sg_last() which is likely not what we
> want for those checks.
This of course isn't true, ignore me as I'm still learning how this
new stuff works :-)
On Wed, Oct 17 2007, David Miller wrote:
> From: David Miller <[email protected]>
> Date: Wed, 17 Oct 2007 01:33:25 -0700 (PDT)
>
> > sg_next() gives you a NULL after the last entry, but tests have been
> > changed to compare against sg_last() which is likely not what we
> > want for those checks.
>
> This of course isn't true, ignore me as I'm still learning how this
> new stuff works :-)
Righto, it's invalid to call sg_next() on the last entry! Let me know if
you need any help with debugging this, unfortunately I cannot test on
sparc64 myself...
(I can say that easily, since I know that davem will a) not rest until
this is fixed, and b) is a really good debugger and will not need my
help)
--
Jens Axboe
From: Jens Axboe <[email protected]>
Date: Wed, 17 Oct 2007 10:45:28 +0200
> Righto, it's invalid to call sg_next() on the last entry!
Unfortunately, that's what the sparc64 code wanted to do, this
transformation in the sparc64 sg chaining patch is not equilavent:
- struct scatterlist *sg_end = sg + nelems;
+ struct scatterlist *sg_end = sg_last(sg, nelems);
...
- while (sg < sg_end &&
+ while (sg != sg_end &&
No, that's not what the code was doing. The while loop
has to process the last entry in the list,
We really needed "sg_end" to be "one past the last element",
rather than "the last element".
Since you say that sg_next() on the last entry is illegal,
and that's what this code would have done to try and reach
loop termination (it doesn't actually derefrence that
"end plus one" scatterlist entry) I'll try to code this up
some other way.
Besides, sg_last() is so absurdly expensive, it has to walk the entire
chain in the chaining case. So better to implement this without it.
I would suggest that other sg_last() uses be audited for the same bug.
On Wed, Oct 17 2007, David Miller wrote:
> From: Jens Axboe <[email protected]>
> Date: Wed, 17 Oct 2007 10:45:28 +0200
>
> > Righto, it's invalid to call sg_next() on the last entry!
>
> Unfortunately, that's what the sparc64 code wanted to do, this
> transformation in the sparc64 sg chaining patch is not equilavent:
>
> - struct scatterlist *sg_end = sg + nelems;
> + struct scatterlist *sg_end = sg_last(sg, nelems);
> ...
> - while (sg < sg_end &&
> + while (sg != sg_end &&
Auch indeed. That'd probably be better as a
do {
...
} while (sg != sg_end);
> No, that's not what the code was doing. The while loop
> has to process the last entry in the list,
>
> We really needed "sg_end" to be "one past the last element",
> rather than "the last element".
>
> Since you say that sg_next() on the last entry is illegal,
> and that's what this code would have done to try and reach
> loop termination (it doesn't actually derefrence that
> "end plus one" scatterlist entry) I'll try to code this up
> some other way.
>
> Besides, sg_last() is so absurdly expensive, it has to walk the entire
> chain in the chaining case. So better to implement this without it.
It is, sg_last() should really not be used a lot since it'll leaf
through the entire sg list. People should either keep count of the
number of entries so that they know when they are dealing with the last
valid entry. Or use the for_each_sg() loop helper, if possible.
Drivers are usually very simple, the iommu code does more sg tricks and
thus is more complex to audit.
> I would suggest that other sg_last() uses be audited for the same bug.
Agree.
--
Jens Axboe
On Wed, 17 Oct 2007 11:16:29 +0200
Jens Axboe <[email protected]> wrote:
> On Wed, Oct 17 2007, David Miller wrote:
> > From: Jens Axboe <[email protected]>
> > Date: Wed, 17 Oct 2007 10:45:28 +0200
> >
> > > Righto, it's invalid to call sg_next() on the last entry!
> >
> > Unfortunately, that's what the sparc64 code wanted to do, this
> > transformation in the sparc64 sg chaining patch is not equilavent:
> >
> > - struct scatterlist *sg_end = sg + nelems;
> > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > ...
> > - while (sg < sg_end &&
> > + while (sg != sg_end &&
>
> Auch indeed. That'd probably be better as a
>
> do {
> ...
> } while (sg != sg_end);
>
> > No, that's not what the code was doing. The while loop
> > has to process the last entry in the list,
> >
> > We really needed "sg_end" to be "one past the last element",
> > rather than "the last element".
> >
> > Since you say that sg_next() on the last entry is illegal,
> > and that's what this code would have done to try and reach
> > loop termination (it doesn't actually derefrence that
> > "end plus one" scatterlist entry) I'll try to code this up
> > some other way.
> >
> > Besides, sg_last() is so absurdly expensive, it has to walk the entire
> > chain in the chaining case. So better to implement this without it.
>
> It is, sg_last() should really not be used a lot since it'll leaf
> through the entire sg list. People should either keep count of the
> number of entries so that they know when they are dealing with the last
> valid entry. Or use the for_each_sg() loop helper, if possible.
>
> Drivers are usually very simple, the iommu code does more sg tricks and
> thus is more complex to audit.
Can we just remove sg_last?
> > I would suggest that other sg_last() uses be audited for the same bug.
>
> Agree.
Only libata, I think.
On Wed, Oct 17 2007, FUJITA Tomonori wrote:
> On Wed, 17 Oct 2007 11:16:29 +0200
> Jens Axboe <[email protected]> wrote:
>
> > On Wed, Oct 17 2007, David Miller wrote:
> > > From: Jens Axboe <[email protected]>
> > > Date: Wed, 17 Oct 2007 10:45:28 +0200
> > >
> > > > Righto, it's invalid to call sg_next() on the last entry!
> > >
> > > Unfortunately, that's what the sparc64 code wanted to do, this
> > > transformation in the sparc64 sg chaining patch is not equilavent:
> > >
> > > - struct scatterlist *sg_end = sg + nelems;
> > > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > > ...
> > > - while (sg < sg_end &&
> > > + while (sg != sg_end &&
> >
> > Auch indeed. That'd probably be better as a
> >
> > do {
> > ...
> > } while (sg != sg_end);
> >
> > > No, that's not what the code was doing. The while loop
> > > has to process the last entry in the list,
> > >
> > > We really needed "sg_end" to be "one past the last element",
> > > rather than "the last element".
> > >
> > > Since you say that sg_next() on the last entry is illegal,
> > > and that's what this code would have done to try and reach
> > > loop termination (it doesn't actually derefrence that
> > > "end plus one" scatterlist entry) I'll try to code this up
> > > some other way.
> > >
> > > Besides, sg_last() is so absurdly expensive, it has to walk the entire
> > > chain in the chaining case. So better to implement this without it.
> >
> > It is, sg_last() should really not be used a lot since it'll leaf
> > through the entire sg list. People should either keep count of the
> > number of entries so that they know when they are dealing with the last
> > valid entry. Or use the for_each_sg() loop helper, if possible.
> >
> > Drivers are usually very simple, the iommu code does more sg tricks and
> > thus is more complex to audit.
>
> Can we just remove sg_last?
I think that would be best, we don't want to encourage use of it, it's
not really clear to people how expensive it is unless they go and look
at it. The absolute worst case would be someone doing a
for_each_sg(sgl, sg, nents) {
...
if (sg == sg_last(sgl))
...
}
which would be absolutely awful.
But lets let things settle for a few days, then kill sg_last().
--
Jens Axboe
From: FUJITA Tomonori <[email protected]>
Date: Wed, 17 Oct 2007 18:24:01 +0900
> On Wed, 17 Oct 2007 11:16:29 +0200
> Jens Axboe <[email protected]> wrote:
>
> > On Wed, Oct 17 2007, David Miller wrote:
> > > I would suggest that other sg_last() uses be audited for the same bug.
> >
> > Agree.
>
> Only libata, I think.
There are a few other spots, like ide-scsi
From: Jens Axboe <[email protected]>
Date: Wed, 17 Oct 2007 11:16:29 +0200
> On Wed, Oct 17 2007, David Miller wrote:
> > From: Jens Axboe <[email protected]>
> > Date: Wed, 17 Oct 2007 10:45:28 +0200
> >
> > > Righto, it's invalid to call sg_next() on the last entry!
> >
> > Unfortunately, that's what the sparc64 code wanted to do, this
> > transformation in the sparc64 sg chaining patch is not equilavent:
> >
> > - struct scatterlist *sg_end = sg + nelems;
> > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > ...
> > - while (sg < sg_end &&
> > + while (sg != sg_end &&
>
> Auch indeed. That'd probably be better as a
>
> do {
> ...
> } while (sg != sg_end);
Ok, next bug, introduced by this change:
commit f565913ef8a8d0cfa46a1faaf8340cc357a46f3a
Author: Jens Axboe <[email protected]>
Date: Fri Sep 21 10:44:19 2007 +0200
block: convert to using sg helpers
Convert the main rq mapper (blk_rq_map_sg()) to the sg helper setup.
Signed-off-by: Jens Axboe <[email protected]>
Specifically this part:
new_segment:
- memset(&sg[nsegs],0,sizeof(struct scatterlist));
- sg[nsegs].page = bvec->bv_page;
- sg[nsegs].length = nbytes;
- sg[nsegs].offset = bvec->bv_offset;
+ sg = next_sg;
+ next_sg = sg_next(sg);
+ sg->page = bvec->bv_page;
+ sg->length = nbytes;
+ sg->offset = bvec->bv_offset;
You can't remove that memset(), it's there for a reason. The IOMMU
layers depended upon the code zero'ing out the whole scatterlist
struct, there might be more to it than page, length and offset :-)
In sparc64's case, this zero'd the dma_address and dma_length members
and the mapping algorithms use that to their advantage.
On Wed, Oct 17 2007, David Miller wrote:
> From: Jens Axboe <[email protected]>
> Date: Wed, 17 Oct 2007 11:16:29 +0200
>
> > On Wed, Oct 17 2007, David Miller wrote:
> > > From: Jens Axboe <[email protected]>
> > > Date: Wed, 17 Oct 2007 10:45:28 +0200
> > >
> > > > Righto, it's invalid to call sg_next() on the last entry!
> > >
> > > Unfortunately, that's what the sparc64 code wanted to do, this
> > > transformation in the sparc64 sg chaining patch is not equilavent:
> > >
> > > - struct scatterlist *sg_end = sg + nelems;
> > > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > > ...
> > > - while (sg < sg_end &&
> > > + while (sg != sg_end &&
> >
> > Auch indeed. That'd probably be better as a
> >
> > do {
> > ...
> > } while (sg != sg_end);
>
> Ok, next bug, introduced by this change:
>
> commit f565913ef8a8d0cfa46a1faaf8340cc357a46f3a
> Author: Jens Axboe <[email protected]>
> Date: Fri Sep 21 10:44:19 2007 +0200
>
> block: convert to using sg helpers
>
> Convert the main rq mapper (blk_rq_map_sg()) to the sg helper setup.
>
> Signed-off-by: Jens Axboe <[email protected]>
>
> Specifically this part:
>
> new_segment:
> - memset(&sg[nsegs],0,sizeof(struct scatterlist));
> - sg[nsegs].page = bvec->bv_page;
> - sg[nsegs].length = nbytes;
> - sg[nsegs].offset = bvec->bv_offset;
> + sg = next_sg;
> + next_sg = sg_next(sg);
>
> + sg->page = bvec->bv_page;
> + sg->length = nbytes;
> + sg->offset = bvec->bv_offset;
>
> You can't remove that memset(), it's there for a reason. The IOMMU
> layers depended upon the code zero'ing out the whole scatterlist
> struct, there might be more to it than page, length and offset :-)
I realize that, and I was pretty worried about this specific change. But
there's only been one piece of fallout because if it until now - well
two, with the sparc64 stuff.
The problem is that you cannot zero the entire sg entry, because then
you'd potentially overwrite the chain pointer.
I'd propose just adding a
sg_dma_address(sg) = 0;
sg_dma_len(sg) = 0;
there for now, or provide an arch_clear_sg_entry() helper if we need
more killed.
--
Jens Axboe
On Wed, Oct 17 2007, Jens Axboe wrote:
> On Wed, Oct 17 2007, David Miller wrote:
> > From: Jens Axboe <[email protected]>
> > Date: Wed, 17 Oct 2007 11:16:29 +0200
> >
> > > On Wed, Oct 17 2007, David Miller wrote:
> > > > From: Jens Axboe <[email protected]>
> > > > Date: Wed, 17 Oct 2007 10:45:28 +0200
> > > >
> > > > > Righto, it's invalid to call sg_next() on the last entry!
> > > >
> > > > Unfortunately, that's what the sparc64 code wanted to do, this
> > > > transformation in the sparc64 sg chaining patch is not equilavent:
> > > >
> > > > - struct scatterlist *sg_end = sg + nelems;
> > > > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > > > ...
> > > > - while (sg < sg_end &&
> > > > + while (sg != sg_end &&
> > >
> > > Auch indeed. That'd probably be better as a
> > >
> > > do {
> > > ...
> > > } while (sg != sg_end);
> >
> > Ok, next bug, introduced by this change:
> >
> > commit f565913ef8a8d0cfa46a1faaf8340cc357a46f3a
> > Author: Jens Axboe <[email protected]>
> > Date: Fri Sep 21 10:44:19 2007 +0200
> >
> > block: convert to using sg helpers
> >
> > Convert the main rq mapper (blk_rq_map_sg()) to the sg helper setup.
> >
> > Signed-off-by: Jens Axboe <[email protected]>
> >
> > Specifically this part:
> >
> > new_segment:
> > - memset(&sg[nsegs],0,sizeof(struct scatterlist));
> > - sg[nsegs].page = bvec->bv_page;
> > - sg[nsegs].length = nbytes;
> > - sg[nsegs].offset = bvec->bv_offset;
> > + sg = next_sg;
> > + next_sg = sg_next(sg);
> >
> > + sg->page = bvec->bv_page;
> > + sg->length = nbytes;
> > + sg->offset = bvec->bv_offset;
> >
> > You can't remove that memset(), it's there for a reason. The IOMMU
> > layers depended upon the code zero'ing out the whole scatterlist
> > struct, there might be more to it than page, length and offset :-)
>
> I realize that, and I was pretty worried about this specific change. But
> there's only been one piece of fallout because if it until now - well
> two, with the sparc64 stuff.
>
> The problem is that you cannot zero the entire sg entry, because then
> you'd potentially overwrite the chain pointer.
>
> I'd propose just adding a
>
> sg_dma_address(sg) = 0;
> sg_dma_len(sg) = 0;
>
> there for now, or provide an arch_clear_sg_entry() helper if we need
> more killed.
Actually, just clearing AFTER sg_next() would be fine, since we know
that is not a link entry. Duh...
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9eabac9..1014d34 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1352,6 +1352,7 @@ new_segment:
sg = next_sg;
next_sg = sg_next(sg);
+ memset(sg, 0, sizeof(*sg));
sg->page = bvec->bv_page;
sg->length = nbytes;
sg->offset = bvec->bv_offset;
--
Jens Axboe
From: Jens Axboe <[email protected]>
Date: Wed, 17 Oct 2007 12:58:40 +0200
> The problem is that you cannot zero the entire sg entry, because then
> you'd potentially overwrite the chain pointer.
>
> I'd propose just adding a
>
> sg_dma_address(sg) = 0;
> sg_dma_len(sg) = 0;
>
> there for now, or provide an arch_clear_sg_entry() helper if we need
> more killed.
The "chain pointer" is indicated by an sg->page with the low
bit set, but we're explicitly initializing it here to a
non-chain page pointer value.
What's the problem?
On Wed, Oct 17 2007, David Miller wrote:
> From: Jens Axboe <[email protected]>
> Date: Wed, 17 Oct 2007 12:58:40 +0200
>
> > The problem is that you cannot zero the entire sg entry, because then
> > you'd potentially overwrite the chain pointer.
> >
> > I'd propose just adding a
> >
> > sg_dma_address(sg) = 0;
> > sg_dma_len(sg) = 0;
> >
> > there for now, or provide an arch_clear_sg_entry() helper if we need
> > more killed.
>
> The "chain pointer" is indicated by an sg->page with the low
> bit set, but we're explicitly initializing it here to a
> non-chain page pointer value.
>
> What's the problem?
See next mail :-)
--
Jens Axboe
On Wed, 17 Oct 2007 12:58:40 +0200
Jens Axboe <[email protected]> wrote:
> On Wed, Oct 17 2007, David Miller wrote:
> > From: Jens Axboe <[email protected]>
> > Date: Wed, 17 Oct 2007 11:16:29 +0200
> >
> > > On Wed, Oct 17 2007, David Miller wrote:
> > > > From: Jens Axboe <[email protected]>
> > > > Date: Wed, 17 Oct 2007 10:45:28 +0200
> > > >
> > > > > Righto, it's invalid to call sg_next() on the last entry!
> > > >
> > > > Unfortunately, that's what the sparc64 code wanted to do, this
> > > > transformation in the sparc64 sg chaining patch is not equilavent:
> > > >
> > > > - struct scatterlist *sg_end = sg + nelems;
> > > > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > > > ...
> > > > - while (sg < sg_end &&
> > > > + while (sg != sg_end &&
> > >
> > > Auch indeed. That'd probably be better as a
> > >
> > > do {
> > > ...
> > > } while (sg != sg_end);
> >
> > Ok, next bug, introduced by this change:
> >
> > commit f565913ef8a8d0cfa46a1faaf8340cc357a46f3a
> > Author: Jens Axboe <[email protected]>
> > Date: Fri Sep 21 10:44:19 2007 +0200
> >
> > block: convert to using sg helpers
> >
> > Convert the main rq mapper (blk_rq_map_sg()) to the sg helper setup.
> >
> > Signed-off-by: Jens Axboe <[email protected]>
> >
> > Specifically this part:
> >
> > new_segment:
> > - memset(&sg[nsegs],0,sizeof(struct scatterlist));
> > - sg[nsegs].page = bvec->bv_page;
> > - sg[nsegs].length = nbytes;
> > - sg[nsegs].offset = bvec->bv_offset;
> > + sg = next_sg;
> > + next_sg = sg_next(sg);
> >
> > + sg->page = bvec->bv_page;
> > + sg->length = nbytes;
> > + sg->offset = bvec->bv_offset;
> >
> > You can't remove that memset(), it's there for a reason. The IOMMU
> > layers depended upon the code zero'ing out the whole scatterlist
> > struct, there might be more to it than page, length and offset :-)
>
> I realize that, and I was pretty worried about this specific change. But
> there's only been one piece of fallout because if it until now - well
> two, with the sparc64 stuff.
>
> The problem is that you cannot zero the entire sg entry, because then
> you'd potentially overwrite the chain pointer.
>
> I'd propose just adding a
>
> sg_dma_address(sg) = 0;
> sg_dma_len(sg) = 0;
>
> there for now, or provide an arch_clear_sg_entry() helper if we need
> more killed.
Except for sparc64, the IOMMU implementations do something like this,
I think.
-
sparc64: zero out dma_length
zero out dma_length in the entry immediately following the last mapped
entry for unmap_sg.
Signed-off-by: FUJITA Tomonori <[email protected]>
---
arch/sparc64/kernel/iommu_common.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/arch/sparc64/kernel/iommu_common.c b/arch/sparc64/kernel/iommu_common.c
index d7ca900..836455d 100644
--- a/arch/sparc64/kernel/iommu_common.c
+++ b/arch/sparc64/kernel/iommu_common.c
@@ -234,6 +234,11 @@ unsigned long prepare_sg(struct scatterlist *sg, int nents)
dma_sg->dma_address = dent_addr;
dma_sg->dma_length = dent_len;
+ if (dma_sg != sg) {
+ dma_sg = next_sg(dma_sg);
+ dma_sg->dma_length = 0;
+ }
+
return ((unsigned long) dent_addr +
(unsigned long) dent_len +
(IO_PAGE_SIZE - 1UL)) >> IO_PAGE_SHIFT;
--
1.5.2.4
On Wed, 17 Oct 2007 02:45:47 -0700 (PDT)
David Miller <[email protected]> wrote:
> From: FUJITA Tomonori <[email protected]>
> Date: Wed, 17 Oct 2007 18:24:01 +0900
>
> > On Wed, 17 Oct 2007 11:16:29 +0200
> > Jens Axboe <[email protected]> wrote:
> >
> > > On Wed, Oct 17 2007, David Miller wrote:
> > > > I would suggest that other sg_last() uses be audited for the same bug.
> > >
> > > Agree.
> >
> > Only libata, I think.
>
> There are a few other spots, like ide-scsi
Oops, I missed that. But seems that we have only three, ide-scsi,
libata, sparc64 iommu.
From: Jens Axboe <[email protected]>
Date: Wed, 17 Oct 2007 13:01:42 +0200
> Actually, just clearing AFTER sg_next() would be fine, since we know
> that is not a link entry. Duh...
Yes and I'm running a kernel successfully with this fix.
Jens, please also add the following on top of Fujita-san's most recent
sparc64 patch and we should be good to go.
>From 0f6e2c3085ec57df78b249a8722323692f33a1b2 Mon Sep 17 00:00:00 2001
From: David S. Miller <[email protected]>
Date: Wed, 17 Oct 2007 04:08:48 -0700
Subject: [PATCH] [SPARC64]: Fix loop terminating conditions in fill_sg().
Signed-off-by: David S. Miller <[email protected]>
---
arch/sparc64/kernel/iommu.c | 12 +++++++-----
arch/sparc64/kernel/pci_sun4v.c | 15 ++++++++-------
2 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c
index 5d4e96d..29af777 100644
--- a/arch/sparc64/kernel/iommu.c
+++ b/arch/sparc64/kernel/iommu.c
@@ -475,12 +475,11 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
#define SG_ENT_PHYS_ADDRESS(SG) \
(__pa(page_address((SG)->page)) + (SG)->offset)
-static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
- int nused, int nelems,
- unsigned long iopte_protection)
+static void fill_sg(iopte_t *iopte, struct scatterlist *sg,
+ int nused, int nelems,
+ unsigned long iopte_protection)
{
struct scatterlist *dma_sg = sg;
- struct scatterlist *sg_end = sg_last(sg, nelems);
int i;
for (i = 0; i < nused; i++) {
@@ -516,6 +515,7 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
break;
}
sg = sg_next(sg);
+ nelems--;
}
pteval = iopte_protection | (pteval & IOPTE_PAGE);
@@ -529,18 +529,20 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
pteval = (pteval & IOPTE_PAGE) + len;
sg = sg_next(sg);
+ nelems--;
/* Skip over any tail mappings we've fully mapped,
* adjusting pteval along the way. Stop when we
* detect a page crossing event.
*/
- while (sg != sg_end &&
+ while (nelems &&
(pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
(pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
((pteval ^
(SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
pteval += sg->length;
sg = sg_next(sg);
+ nelems--;
}
if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
pteval = ~0UL;
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index 119f8ef..fe46ace 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -368,12 +368,11 @@ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
#define SG_ENT_PHYS_ADDRESS(SG) \
(__pa(page_address((SG)->page)) + (SG)->offset)
-static inline long fill_sg(long entry, struct device *dev,
- struct scatterlist *sg,
- int nused, int nelems, unsigned long prot)
+static long fill_sg(long entry, struct device *dev,
+ struct scatterlist *sg,
+ int nused, int nelems, unsigned long prot)
{
struct scatterlist *dma_sg = sg;
- struct scatterlist *sg_end = sg_last(sg, nelems);
unsigned long flags;
int i;
@@ -414,6 +413,7 @@ static inline long fill_sg(long entry, struct device *dev,
break;
}
sg = sg_next(sg);
+ nelems--;
}
pteval = (pteval & IOPTE_PAGE);
@@ -432,19 +432,20 @@ static inline long fill_sg(long entry, struct device *dev,
pteval = (pteval & IOPTE_PAGE) + len;
sg = sg_next(sg);
+ nelems--;
/* Skip over any tail mappings we've fully mapped,
* adjusting pteval along the way. Stop when we
* detect a page crossing event.
*/
- while ((pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
+ while (nelems &&
+ (pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
(pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
((pteval ^
(SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
pteval += sg->length;
- if (sg == sg_end)
- break;
sg = sg_next(sg);
+ nelems--;
}
if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
pteval = ~0UL;
--
1.5.3.4
On Wed, Oct 17 2007, David Miller wrote:
> From: Jens Axboe <[email protected]>
> Date: Wed, 17 Oct 2007 13:01:42 +0200
>
> > Actually, just clearing AFTER sg_next() would be fine, since we know
> > that is not a link entry. Duh...
>
> Yes and I'm running a kernel successfully with this fix.
Great!
> Jens, please also add the following on top of Fujita-san's most recent
> sparc64 patch and we should be good to go.
Awesome, thanks. And sorry for messing up sparc64.
--
Jens Axboe
On Wed, Oct 17 2007, FUJITA Tomonori wrote:
> On Wed, 17 Oct 2007 02:45:47 -0700 (PDT)
> David Miller <[email protected]> wrote:
>
> > From: FUJITA Tomonori <[email protected]>
> > Date: Wed, 17 Oct 2007 18:24:01 +0900
> >
> > > On Wed, 17 Oct 2007 11:16:29 +0200
> > > Jens Axboe <[email protected]> wrote:
> > >
> > > > On Wed, Oct 17 2007, David Miller wrote:
> > > > > I would suggest that other sg_last() uses be audited for the same bug.
> > > >
> > > > Agree.
> > >
> > > Only libata, I think.
> >
> > There are a few other spots, like ide-scsi
>
> Oops, I missed that. But seems that we have only three, ide-scsi,
> libata, sparc64 iommu.
And now only ide-scsi and libata, Dave killed sg_last() in sparc64
already. So less than I remember, I'd say kill it NOW...
--
Jens Axboe
On Wed, Oct 17 2007, Jens Axboe wrote:
> On Wed, Oct 17 2007, FUJITA Tomonori wrote:
> > On Wed, 17 Oct 2007 02:45:47 -0700 (PDT)
> > David Miller <[email protected]> wrote:
> >
> > > From: FUJITA Tomonori <[email protected]>
> > > Date: Wed, 17 Oct 2007 18:24:01 +0900
> > >
> > > > On Wed, 17 Oct 2007 11:16:29 +0200
> > > > Jens Axboe <[email protected]> wrote:
> > > >
> > > > > On Wed, Oct 17 2007, David Miller wrote:
> > > > > > I would suggest that other sg_last() uses be audited for the same bug.
> > > > >
> > > > > Agree.
> > > >
> > > > Only libata, I think.
> > >
> > > There are a few other spots, like ide-scsi
> >
> > Oops, I missed that. But seems that we have only three, ide-scsi,
> > libata, sparc64 iommu.
>
> And now only ide-scsi and libata, Dave killed sg_last() in sparc64
> already. So less than I remember, I'd say kill it NOW...
libata remains.
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index bd74f6c..fa7ba64 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -70,7 +70,7 @@ typedef struct idescsi_pc_s {
u8 *buffer; /* Data buffer */
u8 *current_position; /* Pointer into the above buffer */
struct scatterlist *sg; /* Scatter gather table */
- struct scatterlist *last_sg; /* Last sg element */
+ unsigned int sg_cnt; /* Number of entries in sg */
int b_count; /* Bytes transferred from current entry */
struct scsi_cmnd *scsi_cmd; /* SCSI command */
void (*done)(struct scsi_cmnd *); /* Scsi completion routine */
@@ -192,7 +192,7 @@ static void idescsi_input_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsigne
}
bcount -= count; pc->b_count += count;
if (pc->b_count == pc->sg->length) {
- if (pc->sg == pc->last_sg)
+ if (!--pc->sg_cnt)
break;
pc->sg = sg_next(pc->sg);
pc->b_count = 0;
@@ -229,7 +229,7 @@ static void idescsi_output_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsign
}
bcount -= count; pc->b_count += count;
if (pc->b_count == pc->sg->length) {
- if (pc->sg == pc->last_sg)
+ if (!--pc->sg_cnt)
break;
pc->sg = sg_next(pc->sg);
pc->b_count = 0;
@@ -807,7 +807,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
memcpy (pc->c, cmd->cmnd, cmd->cmd_len);
pc->buffer = NULL;
pc->sg = scsi_sglist(cmd);
- pc->last_sg = sg_last(pc->sg, scsi_sg_count(cmd));
+ pc->sg_cnt = scsi_sg_count(cmd);
pc->b_count = 0;
pc->request_transfer = pc->buffer_size = scsi_bufflen(cmd);
pc->scsi_cmd = cmd;
--
Jens Axboe
From: Jens Axboe <[email protected]>
Date: Wed, 17 Oct 2007 13:11:46 +0200
> On Wed, Oct 17 2007, David Miller wrote:
> > Jens, please also add the following on top of Fujita-san's most recent
> > sparc64 patch and we should be good to go.
>
> Awesome, thanks. And sorry for messing up sparc64.
Don't worry, I'll keep breaking your network interfaces
just to keep things even :-)
On Wed, Oct 17 2007, David Miller wrote:
> From: Jens Axboe <[email protected]>
> Date: Wed, 17 Oct 2007 13:11:46 +0200
>
> > On Wed, Oct 17 2007, David Miller wrote:
> > > Jens, please also add the following on top of Fujita-san's most recent
> > > sparc64 patch and we should be good to go.
> >
> > Awesome, thanks. And sorry for messing up sparc64.
>
> Don't worry, I'll keep breaking your network interfaces
> just to keep things even :-)
It's a deal - I think I'm ahead so far, so I promise to smile politely
when/if my networking craps out :)
--
Jens Axboe
On Wed, 17 Oct 2007 13:01:42 +0200
Jens Axboe <[email protected]> wrote:
> On Wed, Oct 17 2007, Jens Axboe wrote:
> > On Wed, Oct 17 2007, David Miller wrote:
> > > From: Jens Axboe <[email protected]>
> > > Date: Wed, 17 Oct 2007 11:16:29 +0200
> > >
> > > > On Wed, Oct 17 2007, David Miller wrote:
> > > > > From: Jens Axboe <[email protected]>
> > > > > Date: Wed, 17 Oct 2007 10:45:28 +0200
> > > > >
> > > > > > Righto, it's invalid to call sg_next() on the last entry!
> > > > >
> > > > > Unfortunately, that's what the sparc64 code wanted to do, this
> > > > > transformation in the sparc64 sg chaining patch is not equilavent:
> > > > >
> > > > > - struct scatterlist *sg_end = sg + nelems;
> > > > > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > > > > ...
> > > > > - while (sg < sg_end &&
> > > > > + while (sg != sg_end &&
> > > >
> > > > Auch indeed. That'd probably be better as a
> > > >
> > > > do {
> > > > ...
> > > > } while (sg != sg_end);
> > >
> > > Ok, next bug, introduced by this change:
> > >
> > > commit f565913ef8a8d0cfa46a1faaf8340cc357a46f3a
> > > Author: Jens Axboe <[email protected]>
> > > Date: Fri Sep 21 10:44:19 2007 +0200
> > >
> > > block: convert to using sg helpers
> > >
> > > Convert the main rq mapper (blk_rq_map_sg()) to the sg helper setup.
> > >
> > > Signed-off-by: Jens Axboe <[email protected]>
> > >
> > > Specifically this part:
> > >
> > > new_segment:
> > > - memset(&sg[nsegs],0,sizeof(struct scatterlist));
> > > - sg[nsegs].page = bvec->bv_page;
> > > - sg[nsegs].length = nbytes;
> > > - sg[nsegs].offset = bvec->bv_offset;
> > > + sg = next_sg;
> > > + next_sg = sg_next(sg);
> > >
> > > + sg->page = bvec->bv_page;
> > > + sg->length = nbytes;
> > > + sg->offset = bvec->bv_offset;
> > >
> > > You can't remove that memset(), it's there for a reason. The IOMMU
> > > layers depended upon the code zero'ing out the whole scatterlist
> > > struct, there might be more to it than page, length and offset :-)
> >
> > I realize that, and I was pretty worried about this specific change. But
> > there's only been one piece of fallout because if it until now - well
> > two, with the sparc64 stuff.
> >
> > The problem is that you cannot zero the entire sg entry, because then
> > you'd potentially overwrite the chain pointer.
> >
> > I'd propose just adding a
> >
> > sg_dma_address(sg) = 0;
> > sg_dma_len(sg) = 0;
> >
> > there for now, or provide an arch_clear_sg_entry() helper if we need
> > more killed.
>
> Actually, just clearing AFTER sg_next() would be fine, since we know
> that is not a link entry. Duh...
>
> diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
> index 9eabac9..1014d34 100644
> --- a/block/ll_rw_blk.c
> +++ b/block/ll_rw_blk.c
> @@ -1352,6 +1352,7 @@ new_segment:
> sg = next_sg;
> next_sg = sg_next(sg);
>
> + memset(sg, 0, sizeof(*sg));
> sg->page = bvec->bv_page;
> sg->length = nbytes;
> sg->offset = bvec->bv_offset;
>
> --
So now how about removing zero'ing out sglist in scsi-ml?
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index aac8a02..0c86be7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -764,8 +764,6 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
if (unlikely(!sgl))
goto enomem;
- memset(sgl, 0, sizeof(*sgl) * sgp->size);
-
/*
* first loop through, set initial index and return value
*/
On Wed, Oct 17 2007, FUJITA Tomonori wrote:
> On Wed, 17 Oct 2007 13:01:42 +0200
> Jens Axboe <[email protected]> wrote:
>
> > On Wed, Oct 17 2007, Jens Axboe wrote:
> > > On Wed, Oct 17 2007, David Miller wrote:
> > > > From: Jens Axboe <[email protected]>
> > > > Date: Wed, 17 Oct 2007 11:16:29 +0200
> > > >
> > > > > On Wed, Oct 17 2007, David Miller wrote:
> > > > > > From: Jens Axboe <[email protected]>
> > > > > > Date: Wed, 17 Oct 2007 10:45:28 +0200
> > > > > >
> > > > > > > Righto, it's invalid to call sg_next() on the last entry!
> > > > > >
> > > > > > Unfortunately, that's what the sparc64 code wanted to do, this
> > > > > > transformation in the sparc64 sg chaining patch is not equilavent:
> > > > > >
> > > > > > - struct scatterlist *sg_end = sg + nelems;
> > > > > > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > > > > > ...
> > > > > > - while (sg < sg_end &&
> > > > > > + while (sg != sg_end &&
> > > > >
> > > > > Auch indeed. That'd probably be better as a
> > > > >
> > > > > do {
> > > > > ...
> > > > > } while (sg != sg_end);
> > > >
> > > > Ok, next bug, introduced by this change:
> > > >
> > > > commit f565913ef8a8d0cfa46a1faaf8340cc357a46f3a
> > > > Author: Jens Axboe <[email protected]>
> > > > Date: Fri Sep 21 10:44:19 2007 +0200
> > > >
> > > > block: convert to using sg helpers
> > > >
> > > > Convert the main rq mapper (blk_rq_map_sg()) to the sg helper setup.
> > > >
> > > > Signed-off-by: Jens Axboe <[email protected]>
> > > >
> > > > Specifically this part:
> > > >
> > > > new_segment:
> > > > - memset(&sg[nsegs],0,sizeof(struct scatterlist));
> > > > - sg[nsegs].page = bvec->bv_page;
> > > > - sg[nsegs].length = nbytes;
> > > > - sg[nsegs].offset = bvec->bv_offset;
> > > > + sg = next_sg;
> > > > + next_sg = sg_next(sg);
> > > >
> > > > + sg->page = bvec->bv_page;
> > > > + sg->length = nbytes;
> > > > + sg->offset = bvec->bv_offset;
> > > >
> > > > You can't remove that memset(), it's there for a reason. The IOMMU
> > > > layers depended upon the code zero'ing out the whole scatterlist
> > > > struct, there might be more to it than page, length and offset :-)
> > >
> > > I realize that, and I was pretty worried about this specific change. But
> > > there's only been one piece of fallout because if it until now - well
> > > two, with the sparc64 stuff.
> > >
> > > The problem is that you cannot zero the entire sg entry, because then
> > > you'd potentially overwrite the chain pointer.
> > >
> > > I'd propose just adding a
> > >
> > > sg_dma_address(sg) = 0;
> > > sg_dma_len(sg) = 0;
> > >
> > > there for now, or provide an arch_clear_sg_entry() helper if we need
> > > more killed.
> >
> > Actually, just clearing AFTER sg_next() would be fine, since we know
> > that is not a link entry. Duh...
> >
> > diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
> > index 9eabac9..1014d34 100644
> > --- a/block/ll_rw_blk.c
> > +++ b/block/ll_rw_blk.c
> > @@ -1352,6 +1352,7 @@ new_segment:
> > sg = next_sg;
> > next_sg = sg_next(sg);
> >
> > + memset(sg, 0, sizeof(*sg));
> > sg->page = bvec->bv_page;
> > sg->length = nbytes;
> > sg->offset = bvec->bv_offset;
> >
> > --
>
> So now how about removing zero'ing out sglist in scsi-ml?
>
>
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index aac8a02..0c86be7 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -764,8 +764,6 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
> if (unlikely(!sgl))
> goto enomem;
>
> - memset(sgl, 0, sizeof(*sgl) * sgp->size);
> -
> /*
> * first loop through, set initial index and return value
> */
Sure, that should be quite alright then. I'll add it.
--
Jens Axboe
On Wed, 17 Oct 2007 13:41:17 +0200
Jens Axboe <[email protected]> wrote:
> On Wed, Oct 17 2007, FUJITA Tomonori wrote:
> > On Wed, 17 Oct 2007 13:01:42 +0200
> > Jens Axboe <[email protected]> wrote:
> >
> > > On Wed, Oct 17 2007, Jens Axboe wrote:
> > > > On Wed, Oct 17 2007, David Miller wrote:
> > > > > From: Jens Axboe <[email protected]>
> > > > > Date: Wed, 17 Oct 2007 11:16:29 +0200
> > > > >
> > > > > > On Wed, Oct 17 2007, David Miller wrote:
> > > > > > > From: Jens Axboe <[email protected]>
> > > > > > > Date: Wed, 17 Oct 2007 10:45:28 +0200
> > > > > > >
> > > > > > > > Righto, it's invalid to call sg_next() on the last entry!
> > > > > > >
> > > > > > > Unfortunately, that's what the sparc64 code wanted to do, this
> > > > > > > transformation in the sparc64 sg chaining patch is not equilavent:
> > > > > > >
> > > > > > > - struct scatterlist *sg_end = sg + nelems;
> > > > > > > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > > > > > > ...
> > > > > > > - while (sg < sg_end &&
> > > > > > > + while (sg != sg_end &&
> > > > > >
> > > > > > Auch indeed. That'd probably be better as a
> > > > > >
> > > > > > do {
> > > > > > ...
> > > > > > } while (sg != sg_end);
> > > > >
> > > > > Ok, next bug, introduced by this change:
> > > > >
> > > > > commit f565913ef8a8d0cfa46a1faaf8340cc357a46f3a
> > > > > Author: Jens Axboe <[email protected]>
> > > > > Date: Fri Sep 21 10:44:19 2007 +0200
> > > > >
> > > > > block: convert to using sg helpers
> > > > >
> > > > > Convert the main rq mapper (blk_rq_map_sg()) to the sg helper setup.
> > > > >
> > > > > Signed-off-by: Jens Axboe <[email protected]>
> > > > >
> > > > > Specifically this part:
> > > > >
> > > > > new_segment:
> > > > > - memset(&sg[nsegs],0,sizeof(struct scatterlist));
> > > > > - sg[nsegs].page = bvec->bv_page;
> > > > > - sg[nsegs].length = nbytes;
> > > > > - sg[nsegs].offset = bvec->bv_offset;
> > > > > + sg = next_sg;
> > > > > + next_sg = sg_next(sg);
> > > > >
> > > > > + sg->page = bvec->bv_page;
> > > > > + sg->length = nbytes;
> > > > > + sg->offset = bvec->bv_offset;
> > > > >
> > > > > You can't remove that memset(), it's there for a reason. The IOMMU
> > > > > layers depended upon the code zero'ing out the whole scatterlist
> > > > > struct, there might be more to it than page, length and offset :-)
> > > >
> > > > I realize that, and I was pretty worried about this specific change. But
> > > > there's only been one piece of fallout because if it until now - well
> > > > two, with the sparc64 stuff.
> > > >
> > > > The problem is that you cannot zero the entire sg entry, because then
> > > > you'd potentially overwrite the chain pointer.
> > > >
> > > > I'd propose just adding a
> > > >
> > > > sg_dma_address(sg) = 0;
> > > > sg_dma_len(sg) = 0;
> > > >
> > > > there for now, or provide an arch_clear_sg_entry() helper if we need
> > > > more killed.
> > >
> > > Actually, just clearing AFTER sg_next() would be fine, since we know
> > > that is not a link entry. Duh...
> > >
> > > diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
> > > index 9eabac9..1014d34 100644
> > > --- a/block/ll_rw_blk.c
> > > +++ b/block/ll_rw_blk.c
> > > @@ -1352,6 +1352,7 @@ new_segment:
> > > sg = next_sg;
> > > next_sg = sg_next(sg);
> > >
> > > + memset(sg, 0, sizeof(*sg));
> > > sg->page = bvec->bv_page;
> > > sg->length = nbytes;
> > > sg->offset = bvec->bv_offset;
> > >
> > > --
> >
> > So now how about removing zero'ing out sglist in scsi-ml?
> >
> >
> > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> > index aac8a02..0c86be7 100644
> > --- a/drivers/scsi/scsi_lib.c
> > +++ b/drivers/scsi/scsi_lib.c
> > @@ -764,8 +764,6 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
> > if (unlikely(!sgl))
> > goto enomem;
> >
> > - memset(sgl, 0, sizeof(*sgl) * sgp->size);
> > -
> > /*
> > * first loop through, set initial index and return value
> > */
>
> Sure, that should be quite alright then. I'll add it.
Thanks, it would be. Before sg chaining, scsi-ml didn't zero out.
I think that it would be better that IOMMU code handles uninitialized
sg entries (sg list can be pretty large). Execpt for sparc64, the
IOMMU code can do, I think.
On Wed, 17 Oct 2007 20:57:17 +0900
FUJITA Tomonori <[email protected]> wrote:
> On Wed, 17 Oct 2007 13:41:17 +0200
> Jens Axboe <[email protected]> wrote:
>
> > On Wed, Oct 17 2007, FUJITA Tomonori wrote:
> > > On Wed, 17 Oct 2007 13:01:42 +0200
> > > Jens Axboe <[email protected]> wrote:
> > >
> > > > On Wed, Oct 17 2007, Jens Axboe wrote:
> > > > > On Wed, Oct 17 2007, David Miller wrote:
> > > > > > From: Jens Axboe <[email protected]>
> > > > > > Date: Wed, 17 Oct 2007 11:16:29 +0200
> > > > > >
> > > > > > > On Wed, Oct 17 2007, David Miller wrote:
> > > > > > > > From: Jens Axboe <[email protected]>
> > > > > > > > Date: Wed, 17 Oct 2007 10:45:28 +0200
> > > > > > > >
> > > > > > > > > Righto, it's invalid to call sg_next() on the last entry!
> > > > > > > >
> > > > > > > > Unfortunately, that's what the sparc64 code wanted to do, this
> > > > > > > > transformation in the sparc64 sg chaining patch is not equilavent:
> > > > > > > >
> > > > > > > > - struct scatterlist *sg_end = sg + nelems;
> > > > > > > > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > > > > > > > ...
> > > > > > > > - while (sg < sg_end &&
> > > > > > > > + while (sg != sg_end &&
> > > > > > >
> > > > > > > Auch indeed. That'd probably be better as a
> > > > > > >
> > > > > > > do {
> > > > > > > ...
> > > > > > > } while (sg != sg_end);
> > > > > >
> > > > > > Ok, next bug, introduced by this change:
> > > > > >
> > > > > > commit f565913ef8a8d0cfa46a1faaf8340cc357a46f3a
> > > > > > Author: Jens Axboe <[email protected]>
> > > > > > Date: Fri Sep 21 10:44:19 2007 +0200
> > > > > >
> > > > > > block: convert to using sg helpers
> > > > > >
> > > > > > Convert the main rq mapper (blk_rq_map_sg()) to the sg helper setup.
> > > > > >
> > > > > > Signed-off-by: Jens Axboe <[email protected]>
> > > > > >
> > > > > > Specifically this part:
> > > > > >
> > > > > > new_segment:
> > > > > > - memset(&sg[nsegs],0,sizeof(struct scatterlist));
> > > > > > - sg[nsegs].page = bvec->bv_page;
> > > > > > - sg[nsegs].length = nbytes;
> > > > > > - sg[nsegs].offset = bvec->bv_offset;
> > > > > > + sg = next_sg;
> > > > > > + next_sg = sg_next(sg);
> > > > > >
> > > > > > + sg->page = bvec->bv_page;
> > > > > > + sg->length = nbytes;
> > > > > > + sg->offset = bvec->bv_offset;
> > > > > >
> > > > > > You can't remove that memset(), it's there for a reason. The IOMMU
> > > > > > layers depended upon the code zero'ing out the whole scatterlist
> > > > > > struct, there might be more to it than page, length and offset :-)
> > > > >
> > > > > I realize that, and I was pretty worried about this specific change. But
> > > > > there's only been one piece of fallout because if it until now - well
> > > > > two, with the sparc64 stuff.
> > > > >
> > > > > The problem is that you cannot zero the entire sg entry, because then
> > > > > you'd potentially overwrite the chain pointer.
> > > > >
> > > > > I'd propose just adding a
> > > > >
> > > > > sg_dma_address(sg) = 0;
> > > > > sg_dma_len(sg) = 0;
> > > > >
> > > > > there for now, or provide an arch_clear_sg_entry() helper if we need
> > > > > more killed.
> > > >
> > > > Actually, just clearing AFTER sg_next() would be fine, since we know
> > > > that is not a link entry. Duh...
> > > >
> > > > diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
> > > > index 9eabac9..1014d34 100644
> > > > --- a/block/ll_rw_blk.c
> > > > +++ b/block/ll_rw_blk.c
> > > > @@ -1352,6 +1352,7 @@ new_segment:
> > > > sg = next_sg;
> > > > next_sg = sg_next(sg);
> > > >
> > > > + memset(sg, 0, sizeof(*sg));
> > > > sg->page = bvec->bv_page;
> > > > sg->length = nbytes;
> > > > sg->offset = bvec->bv_offset;
> > > >
> > > > --
> > >
> > > So now how about removing zero'ing out sglist in scsi-ml?
> > >
> > >
> > > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> > > index aac8a02..0c86be7 100644
> > > --- a/drivers/scsi/scsi_lib.c
> > > +++ b/drivers/scsi/scsi_lib.c
> > > @@ -764,8 +764,6 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
> > > if (unlikely(!sgl))
> > > goto enomem;
> > >
> > > - memset(sgl, 0, sizeof(*sgl) * sgp->size);
> > > -
> > > /*
> > > * first loop through, set initial index and return value
> > > */
> >
> > Sure, that should be quite alright then. I'll add it.
>
> Thanks, it would be. Before sg chaining, scsi-ml didn't zero out.
Oops, it should be.
> I think that it would be better that IOMMU code handles uninitialized
> sg entries (sg list can be pretty large). Execpt for sparc64, the
> IOMMU code can do, I think.
And I think that with this patch, sparc64 can handle it:
http://marc.info/?l=linux-scsi&m=119261920425120&w=2
On Wed, Oct 17 2007, FUJITA Tomonori wrote:
> > I think that it would be better that IOMMU code handles uninitialized
> > sg entries (sg list can be pretty large). Execpt for sparc64, the
> > IOMMU code can do, I think.
>
> And I think that with this patch, sparc64 can handle it:
>
> http://marc.info/?l=linux-scsi&m=119261920425120&w=2
I would tend to agree. Get davem to ack that patch, and we can move it
forward.
--
Jens Axboe
On Wed, 17 Oct 2007 20:37:58 +0900
FUJITA Tomonori <[email protected]> wrote:
> On Wed, 17 Oct 2007 13:01:42 +0200
> Jens Axboe <[email protected]> wrote:
>
> > On Wed, Oct 17 2007, Jens Axboe wrote:
> > > On Wed, Oct 17 2007, David Miller wrote:
> > > > From: Jens Axboe <[email protected]>
> > > > Date: Wed, 17 Oct 2007 11:16:29 +0200
> > > >
> > > > > On Wed, Oct 17 2007, David Miller wrote:
> > > > > > From: Jens Axboe <[email protected]>
> > > > > > Date: Wed, 17 Oct 2007 10:45:28 +0200
> > > > > >
> > > > > > > Righto, it's invalid to call sg_next() on the last entry!
> > > > > >
> > > > > > Unfortunately, that's what the sparc64 code wanted to do, this
> > > > > > transformation in the sparc64 sg chaining patch is not equilavent:
> > > > > >
> > > > > > - struct scatterlist *sg_end = sg + nelems;
> > > > > > + struct scatterlist *sg_end = sg_last(sg, nelems);
> > > > > > ...
> > > > > > - while (sg < sg_end &&
> > > > > > + while (sg != sg_end &&
> > > > >
> > > > > Auch indeed. That'd probably be better as a
> > > > >
> > > > > do {
> > > > > ...
> > > > > } while (sg != sg_end);
> > > >
> > > > Ok, next bug, introduced by this change:
> > > >
> > > > commit f565913ef8a8d0cfa46a1faaf8340cc357a46f3a
> > > > Author: Jens Axboe <[email protected]>
> > > > Date: Fri Sep 21 10:44:19 2007 +0200
> > > >
> > > > block: convert to using sg helpers
> > > >
> > > > Convert the main rq mapper (blk_rq_map_sg()) to the sg helper setup.
> > > >
> > > > Signed-off-by: Jens Axboe <[email protected]>
> > > >
> > > > Specifically this part:
> > > >
> > > > new_segment:
> > > > - memset(&sg[nsegs],0,sizeof(struct scatterlist));
> > > > - sg[nsegs].page = bvec->bv_page;
> > > > - sg[nsegs].length = nbytes;
> > > > - sg[nsegs].offset = bvec->bv_offset;
> > > > + sg = next_sg;
> > > > + next_sg = sg_next(sg);
> > > >
> > > > + sg->page = bvec->bv_page;
> > > > + sg->length = nbytes;
> > > > + sg->offset = bvec->bv_offset;
> > > >
> > > > You can't remove that memset(), it's there for a reason. The IOMMU
> > > > layers depended upon the code zero'ing out the whole scatterlist
> > > > struct, there might be more to it than page, length and offset :-)
> > >
> > > I realize that, and I was pretty worried about this specific change. But
> > > there's only been one piece of fallout because if it until now - well
> > > two, with the sparc64 stuff.
> > >
> > > The problem is that you cannot zero the entire sg entry, because then
> > > you'd potentially overwrite the chain pointer.
> > >
> > > I'd propose just adding a
> > >
> > > sg_dma_address(sg) = 0;
> > > sg_dma_len(sg) = 0;
> > >
> > > there for now, or provide an arch_clear_sg_entry() helper if we need
> > > more killed.
> >
> > Actually, just clearing AFTER sg_next() would be fine, since we know
> > that is not a link entry. Duh...
> >
> > diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
> > index 9eabac9..1014d34 100644
> > --- a/block/ll_rw_blk.c
> > +++ b/block/ll_rw_blk.c
> > @@ -1352,6 +1352,7 @@ new_segment:
> > sg = next_sg;
> > next_sg = sg_next(sg);
> >
> > + memset(sg, 0, sizeof(*sg));
> > sg->page = bvec->bv_page;
> > sg->length = nbytes;
> > sg->offset = bvec->bv_offset;
> >
> > --
>
> So now how about removing zero'ing out sglist in scsi-ml?
Really sorry, I must have been stoned.