This patch was posted last year and if I remember correctly, Jens said
he is OK with the patch. In function __generic_unplug_deivce(), kernel
can use a cheaper function elv_queue_empty() instead of more expensive
elv_next_request to find whether the queue is empty or not. blk_run_queue
can also made conditional on whether queue's emptiness before calling
request_fn().
Signed-off-by: Ken Chen <[email protected]>
--- linux-2.6.11/drivers/block/ll_rw_blk.c.orig 2005-03-28 18:22:26.000000000 -0800
+++ linux-2.6.11/drivers/block/ll_rw_blk.c 2005-03-28 18:44:46.000000000 -0800
@@ -1459,7 +1459,7 @@ void __generic_unplug_device(request_que
/*
* was plugged, fire request_fn if queue has stuff to do
*/
- if (elv_next_request(q))
+ if (!elv_queue_empty(q))
q->request_fn(q);
}
EXPORT_SYMBOL(__generic_unplug_device);
@@ -1589,7 +1589,8 @@ void blk_run_queue(struct request_queue
spin_lock_irqsave(q->queue_lock, flags);
blk_remove_plug(q);
- q->request_fn(q);
+ if (!elv_queue_empty(q))
+ q->request_fn(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_run_queue);
On Mon, Mar 28 2005, Chen, Kenneth W wrote:
> This patch was posted last year and if I remember correctly, Jens said
> he is OK with the patch. In function __generic_unplug_deivce(), kernel
> can use a cheaper function elv_queue_empty() instead of more expensive
> elv_next_request to find whether the queue is empty or not. blk_run_queue
> can also made conditional on whether queue's emptiness before calling
> request_fn().
>
>
> Signed-off-by: Ken Chen <[email protected]>
Looks good, thanks.
Signed-off-by: Jens Axboe <[email protected]>
--
Jens Axboe
This memory barrier is not needed because the waitqueue will only get
waiters on it in the following situations:
rq->count has exceeded the threshold - however all manipulations of ->count
are performed under the runqueue lock, and so we will correctly pick up any
waiter.
Memory allocation for the request fails. In this case, there is no additional
help provided by the memory barrier. We are guaranteed to eventually wake
up waiters because the request allocation mempool guarantees that if the mem
allocation for a request fails, there must be some requests in flight. They
will wake up waiters when they are retired.
---
linux-2.6-npiggin/drivers/block/ll_rw_blk.c | 1 -
1 files changed, 1 deletion(-)
diff -puN drivers/block/ll_rw_blk.c~blk-no-mb drivers/block/ll_rw_blk.c
--- linux-2.6/drivers/block/ll_rw_blk.c~blk-no-mb 2005-03-29 18:58:19.000000000 +1000
+++ linux-2.6-npiggin/drivers/block/ll_rw_blk.c 2005-03-29 19:20:10.000000000 +1000
@@ -1828,7 +1828,6 @@ static void __freed_request(request_queu
clear_queue_congested(q, rw);
if (rl->count[rw] + 1 <= q->nr_requests) {
- smp_mb();
if (waitqueue_active(&rl->wait[rw]))
wake_up(&rl->wait[rw]);
_
---
linux-2.6-npiggin/drivers/block/ll_rw_blk.c | 63 ++++++++++------------------
1 files changed, 23 insertions(+), 40 deletions(-)
diff -puN drivers/block/ll_rw_blk.c~blk-efficient drivers/block/ll_rw_blk.c
--- linux-2.6/drivers/block/ll_rw_blk.c~blk-efficient 2005-03-29 19:00:07.000000000 +1000
+++ linux-2.6-npiggin/drivers/block/ll_rw_blk.c 2005-03-29 19:10:45.000000000 +1000
@@ -1450,7 +1450,7 @@ EXPORT_SYMBOL(blk_remove_plug);
*/
void __generic_unplug_device(request_queue_t *q)
{
- if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
+ if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)))
return;
if (!blk_remove_plug(q))
@@ -1955,7 +1955,6 @@ static struct request *get_request_wait(
DEFINE_WAIT(wait);
struct request *rq;
- generic_unplug_device(q);
do {
struct request_list *rl = &q->rq;
@@ -1967,6 +1966,7 @@ static struct request *get_request_wait(
if (!rq) {
struct io_context *ioc;
+ generic_unplug_device(q);
io_schedule();
/*
@@ -2557,7 +2557,7 @@ EXPORT_SYMBOL(__blk_attempt_remerge);
static int __make_request(request_queue_t *q, struct bio *bio)
{
- struct request *req, *freereq = NULL;
+ struct request *req;
int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err;
sector_t sector;
@@ -2577,19 +2577,18 @@ static int __make_request(request_queue_
spin_lock_prefetch(q->queue_lock);
barrier = bio_barrier(bio);
- if (barrier && (q->ordered == QUEUE_ORDERED_NONE)) {
+ if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {
err = -EOPNOTSUPP;
goto end_io;
}
-again:
spin_lock_irq(q->queue_lock);
if (elv_queue_empty(q)) {
blk_plug_device(q);
goto get_rq;
}
- if (barrier)
+ if (unlikely(barrier))
goto get_rq;
el_ret = elv_merge(q, &req, bio);
@@ -2632,40 +2631,23 @@ again:
elv_merged_request(q, req);
goto out;
- /*
- * elevator says don't/can't merge. get new request
- */
- case ELEVATOR_NO_MERGE:
- break;
-
+ /* ELV_NO_MERGE: elevator says don't/can't merge. */
default:
- printk("elevator returned crap (%d)\n", el_ret);
- BUG();
+ ;
}
+get_rq:
/*
- * Grab a free request from the freelist - if that is empty, check
- * if we are doing read ahead and abort instead of blocking for
- * a free slot.
+ * Grab a free request. This is might sleep but can not fail.
+ */
+ spin_unlock_irq(q->queue_lock);
+ req = get_request_wait(q, rw);
+ /*
+ * After dropping the lock and possibly sleeping here, our request
+ * may now be mergeable after it had proven unmergeable (above).
+ * We don't worry about that case for efficiency. It won't happen
+ * often, and the elevators are able to handle it.
*/
-get_rq:
- if (freereq) {
- req = freereq;
- freereq = NULL;
- } else {
- spin_unlock_irq(q->queue_lock);
- if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
- /*
- * READA bit set
- */
- err = -EWOULDBLOCK;
- if (bio_rw_ahead(bio))
- goto end_io;
-
- freereq = get_request_wait(q, rw);
- }
- goto again;
- }
req->flags |= REQ_CMD;
@@ -2678,7 +2660,7 @@ get_rq:
/*
* REQ_BARRIER implies no merging, but lets make it explicit
*/
- if (barrier)
+ if (unlikely(barrier))
req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
req->errors = 0;
@@ -2693,10 +2675,11 @@ get_rq:
req->rq_disk = bio->bi_bdev->bd_disk;
req->start_time = jiffies;
+ spin_lock_irq(q->queue_lock);
+ if (elv_queue_empty(q))
+ blk_plug_device(q);
add_request(q, req);
out:
- if (freereq)
- __blk_put_request(q, freereq);
if (bio_sync(bio))
__generic_unplug_device(q);
@@ -2802,7 +2785,7 @@ static inline void block_wait_queue_runn
{
DEFINE_WAIT(wait);
- while (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) {
+ while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
struct request_list *rl = &q->rq;
prepare_to_wait_exclusive(&rl->drain, &wait,
@@ -2911,7 +2894,7 @@ end_io:
goto end_io;
}
- if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))
+ if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
goto end_io;
block_wait_queue_running(q);
_
On Tue, Mar 29 2005, Nick Piggin wrote:
> Jens Axboe wrote:
> >On Mon, Mar 28 2005, Chen, Kenneth W wrote:
> >
> >>This patch was posted last year and if I remember correctly, Jens said
> >>he is OK with the patch. In function __generic_unplug_deivce(), kernel
> >>can use a cheaper function elv_queue_empty() instead of more expensive
> >>elv_next_request to find whether the queue is empty or not. blk_run_queue
> >>can also made conditional on whether queue's emptiness before calling
> >>request_fn().
> >>
> >>
> >>Signed-off-by: Ken Chen <[email protected]>
> >
> >
> >Looks good, thanks.
> >
> >Signed-off-by: Jens Axboe <[email protected]>
> >
>
> Speaking of which, I've had a few ideas lying around for possible
> performance improvement in the block code.
>
> I haven't used a big disk array (or tried any simulation), but I'll
> attach the patch if you're looking into that area.
>
> It puts in a few unlikely()s, but the main changes are:
> - don't generic_unplug_device unconditionally in get_request_wait,
> - removes the relock/retry merge mechanism in __make_request if we
> aren't able to get the GFP_ATOMIC allocation. Just fall through
> and assume the chances of getting a merge will be small (is this
> a valid assumption? Should measure it I guess).
> - removes the GFP_ATOMIC allocation. That's always a good thing.
Looks good, I've been toying with something very similar for a long time
myself.
The unplug change is a no-brainer. The retry stuff i __make_request()
will make no real difference on 'typical' hardware, when it was
introduced in 2.4.x it was very useful on slower devices like dvd-rams.
The batch wakeups should take care of this.
The atomic-vs-blocking allocation should be tested. I'd really like it
to be a "don't dive into the vm very much, just wait on the mempool"
type allocation, so we are not at the mercy of long vm reclaim times
hurting the latencies here.
--
Jens Axboe
---
linux-2.6-npiggin/mm/mempool.c | 5 ++++-
1 files changed, 4 insertions(+), 1 deletion(-)
diff -puN mm/mempool.c~mempool-can-fail mm/mempool.c
--- linux-2.6/mm/mempool.c~mempool-can-fail 2005-03-29 19:45:02.000000000 +1000
+++ linux-2.6-npiggin/mm/mempool.c 2005-03-29 19:48:05.000000000 +1000
@@ -198,7 +198,10 @@ void * mempool_alloc(mempool_t *pool, in
void *element;
unsigned long flags;
DEFINE_WAIT(wait);
- int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+ int gfp_nowait;
+
+ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
+ gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
might_sleep_if(gfp_mask & __GFP_WAIT);
repeat_alloc:
_
---
linux-2.6-npiggin/drivers/block/ll_rw_blk.c | 31 +++++++++++++++++++---------
1 files changed, 22 insertions(+), 9 deletions(-)
diff -puN drivers/block/ll_rw_blk.c~blk-reduce-locking drivers/block/ll_rw_blk.c
--- linux-2.6/drivers/block/ll_rw_blk.c~blk-reduce-locking 2005-03-29 19:51:30.000000000 +1000
+++ linux-2.6-npiggin/drivers/block/ll_rw_blk.c 2005-03-29 20:03:12.000000000 +1000
@@ -1859,10 +1859,13 @@ static void freed_request(request_queue_
#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
/*
- * Get a free request, queue_lock must not be held
+ * Get a free request, queue_lock must be held.
+ * Returns NULL on failure, with queue_lock held.
+ * Returns !NULL on success, with queue_lock *not held*.
*/
static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
{
+ int batching;
struct request *rq = NULL;
struct request_list *rl = &q->rq;
struct io_context *ioc = get_io_context(gfp_mask);
@@ -1870,7 +1873,6 @@ static struct request *get_request(reque
if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
goto out;
- spin_lock_irq(q->queue_lock);
if (rl->count[rw]+1 >= q->nr_requests) {
/*
* The queue will fill after this allocation, so set it as
@@ -1883,6 +1885,8 @@ static struct request *get_request(reque
blk_set_queue_full(q, rw);
}
}
+
+ batching = ioc_batching(q, ioc);
switch (elv_may_queue(q, rw)) {
case ELV_MQUEUE_NO:
@@ -1893,12 +1897,11 @@ static struct request *get_request(reque
goto get_rq;
}
- if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) {
+ if (blk_queue_full(q, rw) && !batching) {
/*
* The queue is full and the allocating process is not a
* "batcher", and not exempted by the IO scheduler
*/
- spin_unlock_irq(q->queue_lock);
goto out;
}
@@ -1932,11 +1935,10 @@ rq_starved:
if (unlikely(rl->count[rw] == 0))
rl->starved[rw] = 1;
- spin_unlock_irq(q->queue_lock);
goto out;
}
- if (ioc_batching(q, ioc))
+ if (batching)
ioc->nr_batch_requests--;
rq_init(q, rq);
@@ -1949,6 +1951,8 @@ out:
/*
* No available requests for this queue, unplug the device and wait for some
* requests to become available.
+ *
+ * Called with q->queue_lock held, and returns with it unlocked.
*/
static struct request *get_request_wait(request_queue_t *q, int rw)
{
@@ -1966,7 +1970,8 @@ static struct request *get_request_wait(
if (!rq) {
struct io_context *ioc;
- generic_unplug_device(q);
+ __generic_unplug_device(q);
+ spin_unlock_irq(q->queue_lock);
io_schedule();
/*
@@ -1978,6 +1983,8 @@ static struct request *get_request_wait(
ioc = get_io_context(GFP_NOIO);
ioc_set_batching(q, ioc);
put_io_context(ioc);
+
+ spin_lock_irq(q->queue_lock);
}
finish_wait(&rl->wait[rw], &wait);
} while (!rq);
@@ -1991,10 +1998,15 @@ struct request *blk_get_request(request_
BUG_ON(rw != READ && rw != WRITE);
+ spin_lock_irq(q->queue_lock);
if (gfp_mask & __GFP_WAIT)
rq = get_request_wait(q, rw);
- else
+ else {
rq = get_request(q, rw, gfp_mask);
+ if (!rq)
+ spin_unlock_irq(q->queue_lock);
+ }
+ /* q->queue_lock is unlocked at this point */
return rq;
}
@@ -2639,9 +2651,10 @@ static int __make_request(request_queue_
get_rq:
/*
* Grab a free request. This is might sleep but can not fail.
+ * Returns with the queue unlocked.
*/
- spin_unlock_irq(q->queue_lock);
req = get_request_wait(q, rw);
+
/*
* After dropping the lock and possibly sleeping here, our request
* may now be mergeable after it had proven unmergeable (above).
_
On Tue, 2005-03-29 at 19:19 +1000, Nick Piggin wrote:
> - removes the relock/retry merge mechanism in __make_request if we
> aren't able to get the GFP_ATOMIC allocation. Just fall through
> and assume the chances of getting a merge will be small (is this
> a valid assumption? Should measure it I guess).
this may have a potential problem; if the vm gets in trouble, you
suddenly start to generate worse IO patterns, which means IO performance
goes down right when it's most needed.....
of course we need to figure if this actually ever hits in practice,
because if it doesn't I'm all for simpler code.
On Tue, Mar 29 2005, Arjan van de Ven wrote:
> On Tue, 2005-03-29 at 19:19 +1000, Nick Piggin wrote:
> > - removes the relock/retry merge mechanism in __make_request if we
> > aren't able to get the GFP_ATOMIC allocation. Just fall through
> > and assume the chances of getting a merge will be small (is this
> > a valid assumption? Should measure it I guess).
>
> this may have a potential problem; if the vm gets in trouble, you
> suddenly start to generate worse IO patterns, which means IO performance
> goes down right when it's most needed.....
>
> of course we need to figure if this actually ever hits in practice,
> because if it doesn't I'm all for simpler code.
Precisely, see my response as well. If the noretry flag works, that
should be fine.
--
Jens Axboe
Arjan van de Ven wrote:
> On Tue, 2005-03-29 at 19:19 +1000, Nick Piggin wrote:
>
>>- removes the relock/retry merge mechanism in __make_request if we
>> aren't able to get the GFP_ATOMIC allocation. Just fall through
>> and assume the chances of getting a merge will be small (is this
>> a valid assumption? Should measure it I guess).
>
>
> this may have a potential problem; if the vm gets in trouble, you
> suddenly start to generate worse IO patterns, which means IO performance
> goes down right when it's most needed.....
>
Sorry my wording was incorrect. It currently *always* retries the
merge if it had at first failed, and after the patch, we never retry.
So it should not result in behavioural shifts when there is a VM load
is high.
It seems to be a clear source of problems for Kenneth though, because
his workload appears to have almost zero merges, so he'll always be
invoking the merge logic twice.
I agree there is potential for subtle interactions. But generally the
block layer is surprisingly well behaved in my experience.
As Jens said, the complete removal of the GFP_ATOMIC allocation probably
has the most potential for problems in this regard, although bios are not
using GFP_ATOMIC allocations, so I would be a little surprised if it made
a really noticable difference.
On Tue, Mar 29 2005, Nick Piggin wrote:
> @@ -2577,19 +2577,18 @@ static int __make_request(request_queue_
> spin_lock_prefetch(q->queue_lock);
>
> barrier = bio_barrier(bio);
> - if (barrier && (q->ordered == QUEUE_ORDERED_NONE)) {
> + if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {
> err = -EOPNOTSUPP;
> goto end_io;
> }
>
> -again:
> spin_lock_irq(q->queue_lock);
>
> if (elv_queue_empty(q)) {
> blk_plug_device(q);
> goto get_rq;
> }
This should just goto get_rq, the plug should happen only at the end
where you did add it:
> @@ -2693,10 +2675,11 @@ get_rq:
> req->rq_disk = bio->bi_bdev->bd_disk;
> req->start_time = jiffies;
>
> + spin_lock_irq(q->queue_lock);
> + if (elv_queue_empty(q))
> + blk_plug_device(q);
> add_request(q, req);
> out:
> - if (freereq)
> - __blk_put_request(q, freereq);
> if (bio_sync(bio))
> __generic_unplug_device(q);
>
--
Jens Axboe
Nick Piggin wrote on Tuesday, March 29, 2005 1:20 AM
> Speaking of which, I've had a few ideas lying around for possible
> performance improvement in the block code.
>
> I haven't used a big disk array (or tried any simulation), but I'll
> attach the patch if you're looking into that area.
>
> linux-2.6-npiggin/drivers/block/ll_rw_blk.c | 63 ++++++++++------------------
> 1 files changed, 23 insertions(+), 40 deletions(-)
>
> diff -puN drivers/block/ll_rw_blk.c~blk-efficient drivers/block/ll_rw_blk.c
> --- linux-2.6/drivers/block/ll_rw_blk.c~blk-efficient 2005-03-29 19:00:07.000000000 +1000
> +++ linux-2.6-npiggin/drivers/block/ll_rw_blk.c 2005-03-29 19:10:45.000000000 +1000
> @@ -2557,7 +2557,7 @@ EXPORT_SYMBOL(__blk_attempt_remerge);
>
> static int __make_request(request_queue_t *q, struct bio *bio)
> {
> - struct request *req, *freereq = NULL;
> + struct request *req;
> int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err;
> sector_t sector;
>
> @@ -2577,19 +2577,18 @@ static int __make_request(request_queue_
>
> -again:
> spin_lock_irq(q->queue_lock);
>
> if (elv_queue_empty(q)) {
>
....
> +get_rq:
> /*
> - * Grab a free request from the freelist - if that is empty, check
> - * if we are doing read ahead and abort instead of blocking for
> - * a free slot.
> + * Grab a free request. This is might sleep but can not fail.
> + */
> + spin_unlock_irq(q->queue_lock);
> + req = get_request_wait(q, rw);
> + /*
> + * After dropping the lock and possibly sleeping here, our request
> + * may now be mergeable after it had proven unmergeable (above).
> + * We don't worry about that case for efficiency. It won't happen
> + * often, and the elevators are able to handle it.
> */
> -get_rq:
> - if (freereq) {
> - req = freereq;
> - freereq = NULL;
> - } else {
> - spin_unlock_irq(q->queue_lock);
> - if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
> - /*
> - * READA bit set
> - */
> - err = -EWOULDBLOCK;
> - if (bio_rw_ahead(bio))
> - goto end_io;
> -
> - freereq = get_request_wait(q, rw);
> - }
> - goto again;
> - }
>
> req->flags |= REQ_CMD;
>
....
> @@ -2693,10 +2675,11 @@ get_rq:
> req->rq_disk = bio->bi_bdev->bd_disk;
> req->start_time = jiffies;
>
> + spin_lock_irq(q->queue_lock);
> + if (elv_queue_empty(q))
> + blk_plug_device(q);
> add_request(q, req);
> out:
> - if (freereq)
> - __blk_put_request(q, freereq);
> if (bio_sync(bio))
> __generic_unplug_device(q);
This part of change in __make_request() is very welcome for enterprise workload.
I have an unpublished version of patch does something similar. I will look through
all other changes in your patch.
Jens Axboe wrote:
> On Tue, Mar 29 2005, Nick Piggin wrote:
>
>>@@ -2577,19 +2577,18 @@ static int __make_request(request_queue_
>> spin_lock_prefetch(q->queue_lock);
>>
>> barrier = bio_barrier(bio);
>>- if (barrier && (q->ordered == QUEUE_ORDERED_NONE)) {
>>+ if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {
>> err = -EOPNOTSUPP;
>> goto end_io;
>> }
>>
>>-again:
>> spin_lock_irq(q->queue_lock);
>>
>> if (elv_queue_empty(q)) {
>> blk_plug_device(q);
>> goto get_rq;
>> }
>
>
> This should just goto get_rq, the plug should happen only at the end
> where you did add it:
>
Yes I see. I'll fix that up.
>
>>@@ -2693,10 +2675,11 @@ get_rq:
>> req->rq_disk = bio->bi_bdev->bd_disk;
>> req->start_time = jiffies;
>>
>>+ spin_lock_irq(q->queue_lock);
>>+ if (elv_queue_empty(q))
>>+ blk_plug_device(q);
>> add_request(q, req);
>> out:
>>- if (freereq)
>>- __blk_put_request(q, freereq);
>> if (bio_sync(bio))
>> __generic_unplug_device(q);
>>
>
>
Nick Piggin wrote:
> Jens Axboe wrote:
>
>> Looks good, I've been toying with something very similar for a long time
>> myself.
>>
>
> Here is another thing I just noticed that should further reduce the
> locking by at least 1, sometimes 2 lock/unlock pairs per request.
> At the cost of uglifying the code somewhat. Although it is pretty
> nicely contained, so Jens you might consider it acceptable as is,
> or we could investigate how to make it nicer if Kenneth reports some
> improvement.
>
> Note, this isn't runtime tested - it could easily have a bug.
>
OK - I have booted this on a 4-way SMP with SCSI disks, and done
some IO tests, and no hangs.
So Kenneth if you could look into this one as well, to see if
it is worthwhile, that would be great.
On Wed, Mar 30 2005, Nick Piggin wrote:
> Nick Piggin wrote:
> >Jens Axboe wrote:
> >
> >>Looks good, I've been toying with something very similar for a long time
> >>myself.
> >>
> >
> >Here is another thing I just noticed that should further reduce the
> >locking by at least 1, sometimes 2 lock/unlock pairs per request.
> >At the cost of uglifying the code somewhat. Although it is pretty
> >nicely contained, so Jens you might consider it acceptable as is,
> >or we could investigate how to make it nicer if Kenneth reports some
> >improvement.
> >
> >Note, this isn't runtime tested - it could easily have a bug.
> >
>
> OK - I have booted this on a 4-way SMP with SCSI disks, and done
> some IO tests, and no hangs.
>
> So Kenneth if you could look into this one as well, to see if
> it is worthwhile, that would be great.
I think it is definitely worth while. It might not be the prettiest, but
it isn't as bad as you originally stated :-)
--
Jens Axboe
On Wed, Mar 30 2005, Nick Piggin wrote:
> Nick Piggin wrote:
> >Jens Axboe wrote:
> >
> >>Looks good, I've been toying with something very similar for a long time
> >>myself.
> >>
> >
> >Here is another thing I just noticed that should further reduce the
> >locking by at least 1, sometimes 2 lock/unlock pairs per request.
> >At the cost of uglifying the code somewhat. Although it is pretty
> >nicely contained, so Jens you might consider it acceptable as is,
> >or we could investigate how to make it nicer if Kenneth reports some
> >improvement.
> >
> >Note, this isn't runtime tested - it could easily have a bug.
> >
>
> OK - I have booted this on a 4-way SMP with SCSI disks, and done
> some IO tests, and no hangs.
>
> So Kenneth if you could look into this one as well, to see if
> it is worthwhile, that would be great.
For that to work, you have to change the get_io_context() allocation to
be GFP_ATOMIC.
--
Jens Axboe
Jens Axboe wrote:
> On Wed, Mar 30 2005, Nick Piggin wrote:
>>So Kenneth if you could look into this one as well, to see if
>>it is worthwhile, that would be great.
>
>
> For that to work, you have to change the get_io_context() allocation to
> be GFP_ATOMIC.
>
Yes of course, thanks for picking that up.
I guess this isn't a problem, as io contexts should be allocated
comparatively rarely. It would be possible to move it out of the
lock though if we really want to.
Thanks.
--
SUSE Labs, Novell Inc.
On Fri, Apr 08 2005, Nick Piggin wrote:
> Jens Axboe wrote:
> >On Wed, Mar 30 2005, Nick Piggin wrote:
>
> >>So Kenneth if you could look into this one as well, to see if
> >>it is worthwhile, that would be great.
> >
> >
> >For that to work, you have to change the get_io_context() allocation to
> >be GFP_ATOMIC.
> >
>
> Yes of course, thanks for picking that up.
>
> I guess this isn't a problem, as io contexts should be allocated
> comparatively rarely. It would be possible to move it out of the
> lock though if we really want to.
Lets just keep it inside the lock, for the fast case it should just be
an increment of the already existing io context.
--
Jens Axboe
Jens Axboe wrote:
> On Fri, Apr 08 2005, Nick Piggin wrote:
>
>>I guess this isn't a problem, as io contexts should be allocated
>>comparatively rarely. It would be possible to move it out of the
>>lock though if we really want to.
>
>
> Lets just keep it inside the lock, for the fast case it should just be
> an increment of the already existing io context.
>
Sounds good.
--
SUSE Labs, Novell Inc.