Subject: [patch][9/9] block: remove bio walking


[patch] block: remove bio walking

IDE driver was the only user of bio walking code.

I now think that block drivers shouldn't use bios directly.

Signed-off-by: Bartlomiej Zolnierkiewicz <[email protected]>
---

linux-2.6.9-rc1-bk10-bzolnier/drivers/block/ll_rw_blk.c | 114 ----------------
linux-2.6.9-rc1-bk10-bzolnier/include/linux/blkdev.h | 36 -----
2 files changed, 5 insertions(+), 145 deletions(-)

diff -puN drivers/block/ll_rw_blk.c~bio_walking drivers/block/ll_rw_blk.c
--- linux-2.6.9-rc1-bk10/drivers/block/ll_rw_blk.c~bio_walking 2004-09-08 19:59:00.551496200 +0200
+++ linux-2.6.9-rc1-bk10-bzolnier/drivers/block/ll_rw_blk.c 2004-09-08 19:59:00.566493920 +0200
@@ -2383,9 +2383,7 @@ again:
break;

bio->bi_next = req->bio;
- req->cbio = req->bio = bio;
- req->nr_cbio_segments = bio_segments(bio);
- req->nr_cbio_sectors = bio_sectors(bio);
+ req->bio = bio;

/*
* may not be valid. if the low level driver said
@@ -2457,11 +2455,9 @@ get_rq:
req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;
req->nr_phys_segments = bio_phys_segments(q, bio);
req->nr_hw_segments = bio_hw_segments(q, bio);
- req->nr_cbio_segments = bio_segments(bio);
- req->nr_cbio_sectors = bio_sectors(bio);
req->buffer = bio_data(bio); /* see ->buffer comment above */
req->waiting = NULL;
- req->cbio = req->bio = req->biotail = bio;
+ req->bio = req->biotail = bio;
req->rq_disk = bio->bi_bdev->bd_disk;
req->start_time = jiffies;

@@ -2642,83 +2638,6 @@ void submit_bio(int rw, struct bio *bio)

EXPORT_SYMBOL(submit_bio);

-/**
- * blk_rq_next_segment
- * @rq: the request being processed
- *
- * Description:
- * Points to the next segment in the request if the current segment
- * is complete. Leaves things unchanged if this segment is not over
- * or if no more segments are left in this request.
- *
- * Meant to be used for bio traversal during I/O submission
- * Does not affect any I/O completions or update completion state
- * in the request, and does not modify any bio fields.
- *
- * Decrementing rq->nr_sectors, rq->current_nr_sectors and
- * rq->nr_cbio_sectors as data is transferred is the caller's
- * responsibility and should be done before calling this routine.
- **/
-void blk_rq_next_segment(struct request *rq)
-{
- if (rq->current_nr_sectors > 0)
- return;
-
- if (rq->nr_cbio_sectors > 0) {
- --rq->nr_cbio_segments;
- rq->current_nr_sectors = blk_rq_vec(rq)->bv_len >> 9;
- } else {
- if ((rq->cbio = rq->cbio->bi_next)) {
- rq->nr_cbio_segments = bio_segments(rq->cbio);
- rq->nr_cbio_sectors = bio_sectors(rq->cbio);
- rq->current_nr_sectors = bio_cur_sectors(rq->cbio);
- }
- }
-
- /* remember the size of this segment before we start I/O */
- rq->hard_cur_sectors = rq->current_nr_sectors;
-}
-
-/**
- * process_that_request_first - process partial request submission
- * @req: the request being processed
- * @nr_sectors: number of sectors I/O has been submitted on
- *
- * Description:
- * May be used for processing bio's while submitting I/O without
- * signalling completion. Fails if more data is requested than is
- * available in the request in which case it doesn't advance any
- * pointers.
- *
- * Assumes a request is correctly set up. No sanity checks.
- *
- * Return:
- * 0 - no more data left to submit (not processed)
- * 1 - data available to submit for this request (processed)
- **/
-int process_that_request_first(struct request *req, unsigned int nr_sectors)
-{
- unsigned int nsect;
-
- if (req->nr_sectors < nr_sectors)
- return 0;
-
- req->nr_sectors -= nr_sectors;
- req->sector += nr_sectors;
- while (nr_sectors) {
- nsect = min_t(unsigned, req->current_nr_sectors, nr_sectors);
- req->current_nr_sectors -= nsect;
- nr_sectors -= nsect;
- if (req->cbio) {
- req->nr_cbio_sectors -= nsect;
- blk_rq_next_segment(req);
- }
- }
- return 1;
-}
-
-EXPORT_SYMBOL(process_that_request_first);
-
void blk_recalc_rq_segments(struct request *rq)
{
struct bio *bio, *prevbio = NULL;
@@ -2754,8 +2673,7 @@ void blk_recalc_rq_sectors(struct reques
rq->hard_nr_sectors -= nsect;

/*
- * Move the I/O submission pointers ahead if required,
- * i.e. for drivers not aware of rq->cbio.
+ * Move the I/O submission pointers ahead if required.
*/
if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
(rq->sector <= rq->hard_sector)) {
@@ -2763,11 +2681,7 @@ void blk_recalc_rq_sectors(struct reques
rq->nr_sectors = rq->hard_nr_sectors;
rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
rq->current_nr_sectors = rq->hard_cur_sectors;
- rq->nr_cbio_segments = bio_segments(rq->bio);
- rq->nr_cbio_sectors = bio_sectors(rq->bio);
rq->buffer = bio_data(rq->bio);
-
- rq->cbio = rq->bio;
}

/*
@@ -2979,33 +2893,13 @@ void blk_rq_bio_prep(request_queue_t *q,
rq->current_nr_sectors = bio_cur_sectors(bio);
rq->hard_cur_sectors = rq->current_nr_sectors;
rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
- rq->nr_cbio_segments = bio_segments(bio);
- rq->nr_cbio_sectors = bio_sectors(bio);
rq->buffer = bio_data(bio);

- rq->cbio = rq->bio = rq->biotail = bio;
+ rq->bio = rq->biotail = bio;
}

EXPORT_SYMBOL(blk_rq_bio_prep);

-void blk_rq_prep_restart(struct request *rq)
-{
- struct bio *bio;
-
- bio = rq->cbio = rq->bio;
- if (bio) {
- rq->nr_cbio_segments = bio_segments(bio);
- rq->nr_cbio_sectors = bio_sectors(bio);
- rq->hard_cur_sectors = bio_cur_sectors(bio);
- rq->buffer = bio_data(bio);
- }
- rq->sector = rq->hard_sector;
- rq->nr_sectors = rq->hard_nr_sectors;
- rq->current_nr_sectors = rq->hard_cur_sectors;
-}
-
-EXPORT_SYMBOL(blk_rq_prep_restart);
-
int kblockd_schedule_work(struct work_struct *work)
{
return queue_work(kblockd_workqueue, work);
diff -puN include/linux/blkdev.h~bio_walking include/linux/blkdev.h
--- linux-2.6.9-rc1-bk10/include/linux/blkdev.h~bio_walking 2004-09-08 19:59:00.553495896 +0200
+++ linux-2.6.9-rc1-bk10-bzolnier/include/linux/blkdev.h 2004-09-08 19:59:00.567493768 +0200
@@ -107,13 +107,7 @@ struct request {
/* no. of sectors left to complete in the current segment */
unsigned int hard_cur_sectors;

- /* no. of segments left to submit in the current bio */
- unsigned short nr_cbio_segments;
- /* no. of sectors left to submit in the current bio */
- unsigned long nr_cbio_sectors;
-
- struct bio *cbio; /* next bio to submit */
- struct bio *bio; /* next unfinished bio to complete */
+ struct bio *bio;
struct bio *biotail;

void *elevator_private;
@@ -444,32 +438,6 @@ static inline void blk_clear_queue_full(
*/
#define blk_queue_headactive(q, head_active)

-/* current index into bio being processed for submission */
-#define blk_rq_idx(rq) ((rq)->cbio->bi_vcnt - (rq)->nr_cbio_segments)
-
-/* current bio vector being processed */
-#define blk_rq_vec(rq) (bio_iovec_idx((rq)->cbio, blk_rq_idx(rq)))
-
-/* current offset with respect to start of the segment being submitted */
-#define blk_rq_offset(rq) \
- (((rq)->hard_cur_sectors - (rq)->current_nr_sectors) << 9)
-
-/*
- * temporarily mapping a (possible) highmem bio (typically for PIO transfer)
- */
-
-/* Assumes rq->cbio != NULL */
-static inline char * rq_map_buffer(struct request *rq, unsigned long *flags)
-{
- return (__bio_kmap_irq(rq->cbio, blk_rq_idx(rq), flags)
- + blk_rq_offset(rq));
-}
-
-static inline void rq_unmap_buffer(char *buffer, unsigned long *flags)
-{
- __bio_kunmap_irq(buffer, flags);
-}
-
/*
* q->prep_rq_fn return values
*/
@@ -568,7 +536,6 @@ static inline void blk_run_address_space
extern int end_that_request_first(struct request *, int, int);
extern int end_that_request_chunk(struct request *, int, int);
extern void end_that_request_last(struct request *);
-extern int process_that_request_first(struct request *, unsigned int);
extern void end_request(struct request *req, int uptodate);

/*
@@ -637,7 +604,6 @@ extern void blk_queue_invalidate_tags(re
extern long blk_congestion_wait(int rw, long timeout);

extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *);
-extern void blk_rq_prep_restart(struct request *);
extern int blkdev_issue_flush(struct block_device *, sector_t *);

#define MAX_PHYS_SEGMENTS 128
_


2004-09-09 08:03:23

by Russell King

[permalink] [raw]
Subject: Re: [patch][9/9] block: remove bio walking

On Wed, Sep 08, 2004 at 09:27:04PM +0200, Bartlomiej Zolnierkiewicz wrote:
> [patch] block: remove bio walking
>
> IDE driver was the only user of bio walking code.

The MMC driver also uses this. Please don't remove.

--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of: 2.6 PCMCIA - http://pcmcia.arm.linux.org.uk/
2.6 Serial core

Subject: Re: [patch][9/9] block: remove bio walking

On Thursday 09 September 2004 10:03, Russell King wrote:
> On Wed, Sep 08, 2004 at 09:27:04PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > [patch] block: remove bio walking
> >
> > IDE driver was the only user of bio walking code.

was in -bk10 :-(

> The MMC driver also uses this. Please don't remove.

OK I'll just drop this patch but can't we also use scatterlists in MMC?

The point is that I now think bio walking was a mistake and accessing
bios directly from low-level drivers is a layering violation (thus
all the added complexity). Moreover with fixed IDE PIO and without
bio walking code it should be possible to shrink struct request by
removing all "current" entries.

Bartlomiej

2004-09-09 14:07:15

by Russell King

[permalink] [raw]
Subject: Re: [patch][9/9] block: remove bio walking

On Thu, Sep 09, 2004 at 03:53:13PM +0200, Bartlomiej Zolnierkiewicz wrote:
> On Thursday 09 September 2004 10:03, Russell King wrote:
> > On Wed, Sep 08, 2004 at 09:27:04PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > > [patch] block: remove bio walking
> > >
> > > IDE driver was the only user of bio walking code.
>
> was in -bk10 :-(
>
> > The MMC driver also uses this. Please don't remove.
>
> OK I'll just drop this patch but can't we also use scatterlists in MMC?
>
> The point is that I now think bio walking was a mistake and accessing
> bios directly from low-level drivers is a layering violation (thus
> all the added complexity). Moreover with fixed IDE PIO and without
> bio walking code it should be possible to shrink struct request by
> removing all "current" entries.

I'm wondering whether it is legal to map onto SG lists and then do PIO.
Provided we don't end up using the DMA API and then using PIO to the
original pages, it should work.

I would rather Jens considered your point first before rewriting code.

However, using the SG lists does finally provide us with a nice way to
ensure that we have the right information to finally fix IDE wrt the
PIO cache issues (dirty cache lines being left in the page cache.)

--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of: 2.6 PCMCIA - http://pcmcia.arm.linux.org.uk/
2.6 Serial core

Subject: Re: [patch][9/9] block: remove bio walking

On Thursday 09 September 2004 16:04, Russell King wrote:
> On Thu, Sep 09, 2004 at 03:53:13PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > On Thursday 09 September 2004 10:03, Russell King wrote:
> > > On Wed, Sep 08, 2004 at 09:27:04PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > > > [patch] block: remove bio walking
> > > >
> > > > IDE driver was the only user of bio walking code.
> >
> > was in -bk10 :-(
> >
> > > The MMC driver also uses this. Please don't remove.
> >
> > OK I'll just drop this patch but can't we also use scatterlists in MMC?
> >
> > The point is that I now think bio walking was a mistake and accessing
> > bios directly from low-level drivers is a layering violation (thus
> > all the added complexity). Moreover with fixed IDE PIO and without
> > bio walking code it should be possible to shrink struct request by
> > removing all "current" entries.
>
> I'm wondering whether it is legal to map onto SG lists and then do PIO.
> Provided we don't end up using the DMA API and then using PIO to the
> original pages, it should work.

Yes, it actually works fine. See the other patches from the patchkit. :-)

While at it: AFAICS libata does pci_[un]map_sg() for PIO which is wrong.

> I would rather Jens considered your point first before rewriting code.

OK

> However, using the SG lists does finally provide us with a nice way to
> ensure that we have the right information to finally fix IDE wrt the
> PIO cache issues (dirty cache lines being left in the page cache.)

Could you explain the issue a bit more?

2004-09-09 14:32:41

by Jens Axboe

[permalink] [raw]
Subject: Re: [patch][9/9] block: remove bio walking

On Thu, Sep 09 2004, Bartlomiej Zolnierkiewicz wrote:
> On Thursday 09 September 2004 10:03, Russell King wrote:
> > On Wed, Sep 08, 2004 at 09:27:04PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > > [patch] block: remove bio walking
> > >
> > > IDE driver was the only user of bio walking code.
>
> was in -bk10 :-(
>
> > The MMC driver also uses this. Please don't remove.
>
> OK I'll just drop this patch but can't we also use scatterlists in MMC?
>
> The point is that I now think bio walking was a mistake and accessing
> bios directly from low-level drivers is a layering violation (thus
> all the added complexity). Moreover with fixed IDE PIO and without
> bio walking code it should be possible to shrink struct request by
> removing all "current" entries.

I agree, it's much nicer and it allows to drop the cbio fields in struct
request which is good.

--
Jens Axboe

2004-09-09 14:34:14

by Jens Axboe

[permalink] [raw]
Subject: Re: [patch][9/9] block: remove bio walking

On Thu, Sep 09 2004, Russell King wrote:
> On Thu, Sep 09, 2004 at 03:53:13PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > On Thursday 09 September 2004 10:03, Russell King wrote:
> > > On Wed, Sep 08, 2004 at 09:27:04PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > > > [patch] block: remove bio walking
> > > >
> > > > IDE driver was the only user of bio walking code.
> >
> > was in -bk10 :-(
> >
> > > The MMC driver also uses this. Please don't remove.
> >
> > OK I'll just drop this patch but can't we also use scatterlists in MMC?
> >
> > The point is that I now think bio walking was a mistake and accessing
> > bios directly from low-level drivers is a layering violation (thus
> > all the added complexity). Moreover with fixed IDE PIO and without
> > bio walking code it should be possible to shrink struct request by
> > removing all "current" entries.
>
> I'm wondering whether it is legal to map onto SG lists and then do PIO.
> Provided we don't end up using the DMA API and then using PIO to the
> original pages, it should work.

Will be fine.

> I would rather Jens considered your point first before rewriting code.
>
> However, using the SG lists does finally provide us with a nice way to
> ensure that we have the right information to finally fix IDE wrt the
> PIO cache issues (dirty cache lines being left in the page cache.)

You can do this directly from the bio. Probably mmc should just do a
make_request_fn hook instead and just fill in a scatter gather table
directly and submit.

--
Jens Axboe

2004-09-09 14:54:39

by Russell King

[permalink] [raw]
Subject: Re: [patch][9/9] block: remove bio walking

On Thu, Sep 09, 2004 at 04:28:25PM +0200, Bartlomiej Zolnierkiewicz wrote:
> On Thursday 09 September 2004 16:04, Russell King wrote:
> > On Thu, Sep 09, 2004 at 03:53:13PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > > On Thursday 09 September 2004 10:03, Russell King wrote:
> > > > On Wed, Sep 08, 2004 at 09:27:04PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > > > > [patch] block: remove bio walking
> > > > >
> > > > > IDE driver was the only user of bio walking code.
> > >
> > > was in -bk10 :-(
> > >
> > > > The MMC driver also uses this. Please don't remove.
> > >
> > > OK I'll just drop this patch but can't we also use scatterlists in MMC?
> > >
> > > The point is that I now think bio walking was a mistake and accessing
> > > bios directly from low-level drivers is a layering violation (thus
> > > all the added complexity). Moreover with fixed IDE PIO and without
> > > bio walking code it should be possible to shrink struct request by
> > > removing all "current" entries.
> >
> > I'm wondering whether it is legal to map onto SG lists and then do PIO.
> > Provided we don't end up using the DMA API and then using PIO to the
> > original pages, it should work.
>
> Yes, it actually works fine. See the other patches from the patchkit. :-)

Actually, if you've only tested x86, you don't know if it works fine.
x86 is a rather benign architecture when it comes to testing whether
various interfaces are being used correctly.

> > However, using the SG lists does finally provide us with a nice way to
> > ensure that we have the right information to finally fix IDE wrt the
> > PIO cache issues (dirty cache lines being left in the page cache.)
>
> Could you explain the issue a bit more?

Essentially, kernel PIO writes data into the page cache, and that action
may leave data in the CPU's caches. Since the kernels mappings may not
be coherent with mappings in userspace, data written to the kernel
mappings may remain in the data cache, and stale data would be visible
to user space.

There has been talk about using flush_dcache_page() to resolve
this issue, but I'm not sure what the outcome was. Certainly
flush_dcache_page() is supposed to be used before the data in the
kernels page cache is read or written.

See Documentation/cachetlb.txt for further information on this
interface.

--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of: 2.6 PCMCIA - http://pcmcia.arm.linux.org.uk/
2.6 Serial core

2004-09-09 15:48:59

by Jens Axboe

[permalink] [raw]
Subject: Re: [patch][9/9] block: remove bio walking

On Thu, Sep 09 2004, Russell King wrote:
> Essentially, kernel PIO writes data into the page cache, and that action
> may leave data in the CPU's caches. Since the kernels mappings may not
> be coherent with mappings in userspace, data written to the kernel
> mappings may remain in the data cache, and stale data would be visible
> to user space.
>
> There has been talk about using flush_dcache_page() to resolve
> this issue, but I'm not sure what the outcome was. Certainly
> flush_dcache_page() is supposed to be used before the data in the
> kernels page cache is read or written.

Have you ever tested bouncing on arm? It seems to be lacking a
flush_dcache_page() indeed, how does this look?

===== mm/highmem.c 1.51 vs edited =====
--- 1.51/mm/highmem.c 2004-07-29 06:58:32 +02:00
+++ edited/mm/highmem.c 2004-09-09 17:44:14 +02:00
@@ -301,6 +301,7 @@
vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;

bounce_copy_vec(tovec, vfrom);
+ flush_dcache_page(tovec->bv_page);
}
}


--
Jens Axboe

2004-09-09 15:57:30

by Jens Axboe

[permalink] [raw]
Subject: Re: [patch][9/9] block: remove bio walking

On Thu, Sep 09 2004, Jens Axboe wrote:
> On Thu, Sep 09 2004, Russell King wrote:
> > Essentially, kernel PIO writes data into the page cache, and that action
> > may leave data in the CPU's caches. Since the kernels mappings may not
> > be coherent with mappings in userspace, data written to the kernel
> > mappings may remain in the data cache, and stale data would be visible
> > to user space.
> >
> > There has been talk about using flush_dcache_page() to resolve
> > this issue, but I'm not sure what the outcome was. Certainly
> > flush_dcache_page() is supposed to be used before the data in the
> > kernels page cache is read or written.
>
> Have you ever tested bouncing on arm? It seems to be lacking a
> flush_dcache_page() indeed, how does this look?
>
> ===== mm/highmem.c 1.51 vs edited =====
> --- 1.51/mm/highmem.c 2004-07-29 06:58:32 +02:00
> +++ edited/mm/highmem.c 2004-09-09 17:44:14 +02:00
> @@ -301,6 +301,7 @@
> vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
>
> bounce_copy_vec(tovec, vfrom);
> + flush_dcache_page(tovec->bv_page);
> }
> }

Not even enough, here's a better one.

===== mm/highmem.c 1.51 vs edited =====
--- 1.51/mm/highmem.c 2004-07-29 06:58:32 +02:00
+++ edited/mm/highmem.c 2004-09-09 17:47:58 +02:00
@@ -300,6 +300,7 @@
*/
vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;

+ flush_dcache_page(tovec->bv_page);
bounce_copy_vec(tovec, vfrom);
}
}
@@ -406,6 +407,7 @@
if (rw == WRITE) {
char *vto, *vfrom;

+ flush_dcache_page(from->bv_page);
vto = page_address(to->bv_page) + to->bv_offset;
vfrom = kmap(from->bv_page) + from->bv_offset;
memcpy(vto, vfrom, to->bv_len);

--
Jens Axboe

2004-09-09 16:02:06

by David Miller

[permalink] [raw]
Subject: Re: [patch][9/9] block: remove bio walking

On Thu, 9 Sep 2004 17:44:53 +0200
Jens Axboe <[email protected]> wrote:

> On Thu, Sep 09 2004, Russell King wrote:
> > Essentially, kernel PIO writes data into the page cache, and that action
> > may leave data in the CPU's caches. Since the kernels mappings may not
> > be coherent with mappings in userspace, data written to the kernel
> > mappings may remain in the data cache, and stale data would be visible
> > to user space.
> >
> > There has been talk about using flush_dcache_page() to resolve
> > this issue, but I'm not sure what the outcome was. Certainly
> > flush_dcache_page() is supposed to be used before the data in the
> > kernels page cache is read or written.
>
> Have you ever tested bouncing on arm? It seems to be lacking a
> flush_dcache_page() indeed, how does this look?

This looks like a good fix to me too.

Subject: Re: [patch][9/9] block: remove bio walking

On Thursday 09 September 2004 16:54, Russell King wrote:
> On Thu, Sep 09, 2004 at 04:28:25PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > On Thursday 09 September 2004 16:04, Russell King wrote:
> > > On Thu, Sep 09, 2004 at 03:53:13PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > > > On Thursday 09 September 2004 10:03, Russell King wrote:
> > > > > On Wed, Sep 08, 2004 at 09:27:04PM +0200, Bartlomiej Zolnierkiewicz wrote:
> > > > > > [patch] block: remove bio walking
> > > > > >
> > > > > > IDE driver was the only user of bio walking code.
> > > >
> > > > was in -bk10 :-(
> > > >
> > > > > The MMC driver also uses this. Please don't remove.
> > > >
> > > > OK I'll just drop this patch but can't we also use scatterlists in MMC?
> > > >
> > > > The point is that I now think bio walking was a mistake and accessing
> > > > bios directly from low-level drivers is a layering violation (thus
> > > > all the added complexity). Moreover with fixed IDE PIO and without
> > > > bio walking code it should be possible to shrink struct request by
> > > > removing all "current" entries.
> > >
> > > I'm wondering whether it is legal to map onto SG lists and then do PIO.
> > > Provided we don't end up using the DMA API and then using PIO to the
> > > original pages, it should work.
> >
> > Yes, it actually works fine. See the other patches from the patchkit. :-)
>
> Actually, if you've only tested x86, you don't know if it works fine.
> x86 is a rather benign architecture when it comes to testing whether
> various interfaces are being used correctly.

x86 only

This forced me into rechecking everything and I found one issue:
in ide_pio_sector() sg->length should be used instead of sg_dma_len(sg).

With this fixed patchkit should work on any arch
(testing and comments are welcomed of course).

> > > However, using the SG lists does finally provide us with a nice way to
> > > ensure that we have the right information to finally fix IDE wrt the
> > > PIO cache issues (dirty cache lines being left in the page cache.)
> >
> > Could you explain the issue a bit more?
>
> Essentially, kernel PIO writes data into the page cache, and that action
> may leave data in the CPU's caches. Since the kernels mappings may not
> be coherent with mappings in userspace, data written to the kernel
> mappings may remain in the data cache, and stale data would be visible
> to user space.
>
> There has been talk about using flush_dcache_page() to resolve
> this issue, but I'm not sure what the outcome was. Certainly
> flush_dcache_page() is supposed to be used before the data in the
> kernels page cache is read or written.
>
> See Documentation/cachetlb.txt for further information on this
> interface.

Thanks, now I understand the problem.