2007-12-11 22:40:50

by Kiyoshi Ueda

[permalink] [raw]
Subject: [PATCH 01/30] blk_end_request: add new request completion interface (take 4)

This patch adds 2 new interfaces for request completion:
o blk_end_request() : called without queue lock
o __blk_end_request() : called with queue lock held

blk_end_request takes 'error' as an argument instead of 'uptodate',
which current end_that_request_* take.
The meanings of values are below and the value is used when bio is
completed.
0 : success
< 0 : error

Some device drivers call some generic functions below between
end_that_request_{first/chunk} and end_that_request_last().
o add_disk_randomness()
o blk_queue_end_tag()
o blkdev_dequeue_request()
These are called in the blk_end_request interfaces as a part of
generic request completion.
So all device drivers become to call above functions.
To decide whether to call blkdev_dequeue_request(), blk_end_request
uses list_empty(&rq->queuelist) (blk_queued_rq() macro is added for it).
So drivers must re-initialize it using list_init() or so before calling
blk_end_request if drivers use it for its specific purpose.
(Currently, there is no driver which completes request without
re-initializing the queuelist after used it. So rq->queuelist
can be used for the purpose above.)

"Normal" drivers can be converted to use blk_end_request()
in a standard way shown below.

a) end_that_request_{chunk/first}
spin_lock_irqsave()
(add_disk_randomness(), blk_queue_end_tag(), blkdev_dequeue_request())
end_that_request_last()
spin_unlock_irqrestore()
=> blk_end_request()

b) spin_lock_irqsave()
end_that_request_{chunk/first}
(add_disk_randomness(), blk_queue_end_tag(), blkdev_dequeue_request())
end_that_request_last()
spin_unlock_irqrestore()
=> spin_lock_irqsave()
__blk_end_request()
spin_unlock_irqsave()

c) spin_lock_irqsave()
(add_disk_randomness(), blk_queue_end_tag(), blkdev_dequeue_request())
end_that_request_last()
spin_unlock_irqrestore()
=> blk_end_request() or spin_lock_irqsave()
__blk_end_request()
spin_unlock_irqrestore()

Signed-off-by: Kiyoshi Ueda <[email protected]>
Signed-off-by: Jun'ichi Nomura <[email protected]>
---
block/ll_rw_blk.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 4 ++
2 files changed, 100 insertions(+)

Index: 2.6.24-rc4/block/ll_rw_blk.c
===================================================================
--- 2.6.24-rc4.orig/block/ll_rw_blk.c
+++ 2.6.24-rc4/block/ll_rw_blk.c
@@ -3769,6 +3769,102 @@ void end_request(struct request *req, in
}
EXPORT_SYMBOL(end_request);

+static void complete_request(struct request *rq, int error)
+{
+ /*
+ * REMOVEME: This conversion is transitional and will be removed
+ * when old end_that_request_* are unexported.
+ */
+ int uptodate = 1;
+ if (error)
+ uptodate = (error == -EIO) ? 0 : error;
+
+ if (blk_rq_tagged(rq))
+ blk_queue_end_tag(rq->q, rq);
+
+ if (blk_queued_rq(rq))
+ blkdev_dequeue_request(rq);
+
+ end_that_request_last(rq, uptodate);
+}
+
+/**
+ * blk_end_request - Helper function for drivers to complete the request.
+ * @rq: the request being processed
+ * @error: 0 for success, < 0 for error
+ * @nr_bytes: number of bytes to complete
+ *
+ * Description:
+ * Ends I/O on a number of bytes attached to @rq.
+ * If @rq has leftover, sets it up for the next range of segments.
+ *
+ * Return:
+ * 0 - we are done with this request
+ * 1 - still buffers pending for this request
+ **/
+int blk_end_request(struct request *rq, int error, int nr_bytes)
+{
+ struct request_queue *q = rq->q;
+ unsigned long flags = 0UL;
+ /*
+ * REMOVEME: This conversion is transitional and will be removed
+ * when old end_that_request_* are unexported.
+ */
+ int uptodate = 1;
+ if (error)
+ uptodate = (error == -EIO) ? 0 : error;
+
+ if (blk_fs_request(rq) || blk_pc_request(rq)) {
+ if (__end_that_request_first(rq, uptodate, nr_bytes))
+ return 1;
+ }
+
+ add_disk_randomness(rq->rq_disk);
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ complete_request(rq, error);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(blk_end_request);
+
+/**
+ * __blk_end_request - Helper function for drivers to complete the request.
+ * @rq: the request being processed
+ * @error: 0 for success, < 0 for error
+ * @nr_bytes: number of bytes to complete
+ *
+ * Description:
+ * Must be called with queue lock held unlike blk_end_request().
+ *
+ * Return:
+ * 0 - we are done with this request
+ * 1 - still buffers pending for this request
+ **/
+int __blk_end_request(struct request *rq, int error, int nr_bytes)
+{
+ /*
+ * REMOVEME: This conversion is transitional and will be removed
+ * when old end_that_request_* are unexported.
+ */
+ int uptodate = 1;
+ if (error)
+ uptodate = (error == -EIO) ? 0 : error;
+
+ if (blk_fs_request(rq) || blk_pc_request(rq)) {
+ if (__end_that_request_first(rq, uptodate, nr_bytes))
+ return 1;
+ }
+
+ add_disk_randomness(rq->rq_disk);
+
+ complete_request(rq, error);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__blk_end_request);
+
static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio)
{
Index: 2.6.24-rc4/include/linux/blkdev.h
===================================================================
--- 2.6.24-rc4.orig/include/linux/blkdev.h
+++ 2.6.24-rc4/include/linux/blkdev.h
@@ -539,6 +539,8 @@ enum {
#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
#define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
+/* rq->queuelist of dequeued request must be list_empty() */
+#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist))

#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)

@@ -726,6 +728,8 @@ static inline void blk_run_address_space
* for parts of the original function. This prevents
* code duplication in drivers.
*/
+extern int blk_end_request(struct request *rq, int error, int nr_bytes);
+extern int __blk_end_request(struct request *rq, int error, int nr_bytes);
extern int end_that_request_first(struct request *, int, int);
extern int end_that_request_chunk(struct request *, int, int);
extern void end_that_request_last(struct request *, int);


2007-12-12 12:54:00

by James Bottomley

[permalink] [raw]
Subject: Re: [PATCH 01/30] blk_end_request: add new request completion interface (take 4)


On Tue, 2007-12-11 at 17:40 -0500, Kiyoshi Ueda wrote:
> This patch adds 2 new interfaces for request completion:
> o blk_end_request() : called without queue lock
> o __blk_end_request() : called with queue lock held
>
> blk_end_request takes 'error' as an argument instead of 'uptodate',
> which current end_that_request_* take.
> The meanings of values are below and the value is used when bio is
> completed.
> 0 : success
> < 0 : error
>
> Some device drivers call some generic functions below between
> end_that_request_{first/chunk} and end_that_request_last().
> o add_disk_randomness()
> o blk_queue_end_tag()
> o blkdev_dequeue_request()

If we can roll the whole thing together, that would be nice. However,
the way you're doing it with this patch, we now have an asymmetrical
interface: The request routine must explicitly start the tag, but now
doesn't have to end it.

We really need symmetry. Either go back to start tag/end tag, or absorb
the whole lot into the block infrastructure.

The original reason for the explicit start/end is that there are some
requests on a tagged device that aren't able to be tagged by the block
layer (some devices reserve tag numbers for specific meanings).
However, I don't think there's any driver that actually implemented this
feature.

James

2007-12-13 01:32:56

by Kiyoshi Ueda

[permalink] [raw]
Subject: Re: [PATCH 01/30] blk_end_request: add new request completion interface (take 4)

Hi James, Jens,

On Wed, 12 Dec 2007 07:53:36 -0500, James Bottomley wrote:
> On Tue, 2007-12-11 at 17:40 -0500, Kiyoshi Ueda wrote:
> > This patch adds 2 new interfaces for request completion:
> > o blk_end_request() : called without queue lock
> > o __blk_end_request() : called with queue lock held
> >
> > blk_end_request takes 'error' as an argument instead of 'uptodate',
> > which current end_that_request_* take.
> > The meanings of values are below and the value is used when bio is
> > completed.
> > 0 : success
> > < 0 : error
> >
> > Some device drivers call some generic functions below between
> > end_that_request_{first/chunk} and end_that_request_last().
> > o add_disk_randomness()
> > o blk_queue_end_tag()
> > o blkdev_dequeue_request()
>
> If we can roll the whole thing together, that would be nice. However,
> the way you're doing it with this patch, we now have an asymmetrical
> interface: The request routine must explicitly start the tag, but now
> doesn't have to end it.
>
> We really need symmetry. Either go back to start tag/end tag, or absorb
> the whole lot into the block infrastructure.
>
> The original reason for the explicit start/end is that there are some
> requests on a tagged device that aren't able to be tagged by the block
> layer (some devices reserve tag numbers for specific meanings).
> However, I don't think there's any driver that actually implemented this
> feature.

As far as I investigated in 2.6.24-rc5, only scsi uses the blk_queue_tag
and no files in drivers/scsi reserving tag_index in Scsi_Host->bqt.
So I would like to take the "absorbing start tag in the block layer" way.

The patch below is on top of the blk_end_request patch-set.
Is it acceptable?

Thanks,
Kiyoshi Ueda



Subject: [PATCH 31/31] blk_end_request: merge start_tag to block layer


This patch merges blk_queue_start_tag() into blkdev_dequeue_request().

blk_queue_start_tag() and blk_queue_end_tag() are a pair of
interfaces for starting/ending request tagging.
Since with the new blk_end_request interfaces, blk_queue_end_tag()
is done implicitly in the block layer, blk_queue_start_tag() should
be done in the block layer, too, for keeping the interface symmetric.

Originally, the start/end tag was not done by the block layer so that
drivers can choose tag numbers for their specific meanings.
But no driver uses the feature.
Scsi is the only user of the block layer tagging and it uses
the generic blk_queue_start/end_tag.
So moving the start/end tag to the block layer is not an issue now.


Cc: James Bottomley <[email protected]>
Signed-off-by: Kiyoshi Ueda <[email protected]>
Signed-off-by: Jun'ichi Nomura <[email protected]>
---
block/ll_rw_blk.c | 2 +-
drivers/scsi/scsi_lib.c | 3 +--
include/linux/blkdev.h | 11 ++++++-----
3 files changed, 8 insertions(+), 8 deletions(-)

Index: 2.6.24-rc5/block/ll_rw_blk.c
===================================================================
--- 2.6.24-rc5.orig/block/ll_rw_blk.c
+++ 2.6.24-rc5/block/ll_rw_blk.c
@@ -1115,7 +1115,7 @@ int blk_queue_start_tag(struct request_q
rq->cmd_flags |= REQ_QUEUED;
rq->tag = tag;
bqt->tag_index[tag] = rq;
- blkdev_dequeue_request(rq);
+ elv_dequeue_request(q, rq);
list_add(&rq->queuelist, &q->tag_busy_list);
bqt->busy++;
return 0;
Index: 2.6.24-rc5/drivers/scsi/scsi_lib.c
===================================================================
--- 2.6.24-rc5.orig/drivers/scsi/scsi_lib.c
+++ 2.6.24-rc5/drivers/scsi/scsi_lib.c
@@ -1530,8 +1530,7 @@ static void scsi_request_fn(struct reque
/*
* Remove the request from the request list.
*/
- if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
- blkdev_dequeue_request(req);
+ blkdev_dequeue_request(req);
sdev->device_busy++;

spin_unlock(q->queue_lock);
Index: 2.6.24-rc5/include/linux/blkdev.h
===================================================================
--- 2.6.24-rc5.orig/include/linux/blkdev.h
+++ 2.6.24-rc5/include/linux/blkdev.h
@@ -747,11 +747,6 @@ extern void blk_complete_request(struct
extern unsigned int blk_rq_bytes(struct request *rq);
extern unsigned int blk_rq_cur_bytes(struct request *rq);

-static inline void blkdev_dequeue_request(struct request *req)
-{
- elv_dequeue_request(req->q, req);
-}
-
/*
* Access functions for manipulating queue properties
*/
@@ -814,6 +809,12 @@ static inline struct request *blk_map_qu
return bqt->tag_index[tag];
}

+static inline void blkdev_dequeue_request(struct request *req)
+{
+ if (!(blk_queue_tagged(req->q) && !blk_queue_start_tag(req->q, req)))
+ elv_dequeue_request(req->q, req);
+}
+
extern int blkdev_issue_flush(struct block_device *, sector_t *);

#define MAX_PHYS_SEGMENTS 128