2009-04-16 13:14:18

by Jerome Marchand

[permalink] [raw]
Subject: [PATCH] block: simplify I/O stat accounting


This simplifies I/O stat accounting switching code and separates it
completely from I/O scheduler switch code.

Requests are accounted according to the state of their request queue
at the time of the request allocation. There is no need anymore to
flush the request queue when switching I/O accounting state.

Regards,
Jerome

Signed-off-by: Jerome Marchand <[email protected]>
---
block/blk-core.c | 9 +++++----
block/blk-merge.c | 6 +++---
block/blk-sysfs.c | 4 ----
block/blk.h | 7 +------
include/linux/blkdev.h | 3 +++
5 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 07ab754..30203a8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -643,7 +643,7 @@ static inline void blk_free_request(struct
request_queue *q, struct request *rq)
}

static struct request *
-blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t
gfp_mask)
+blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t
gfp_mask)
{
struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

@@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw,
int priv, gfp_t gfp_mask)

blk_rq_init(q, rq);

- rq->cmd_flags = rw | REQ_ALLOCED;
+ rq->cmd_flags = flags | REQ_ALLOCED;

if (priv) {
if (unlikely(elv_set_request(q, rq, gfp_mask))) {
@@ -744,7 +744,7 @@ static struct request *get_request(struct
request_queue *q, int rw_flags,
struct request_list *rl = &q->rq;
struct io_context *ioc = NULL;
const bool is_sync = rw_is_sync(rw_flags) != 0;
- int may_queue, priv;
+ int may_queue, priv, iostat;

may_queue = elv_may_queue(q, rw_flags);
if (may_queue == ELV_MQUEUE_NO)
@@ -792,9 +792,10 @@ static struct request *get_request(struct
request_queue *q, int rw_flags,
if (priv)
rl->elvpriv++;

+ iostat = blk_queue_io_stat(q) ? REQ_IO_STAT : 0;
spin_unlock_irq(q->queue_lock);

- rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
+ rq = blk_alloc_request(q, rw_flags | iostat, priv, gfp_mask);
if (unlikely(!rq)) {
/*
* Allocation failed presumably due to memory. Undo anything
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 63760ca..6a05270 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -338,9 +338,9 @@ static int ll_merge_requests_fn(struct request_queue
*q, struct request *req,
return 1;
}

-static void blk_account_io_merge(struct request *req)
+static void blk_account_io_merge(struct request *req, struct request *next)
{
- if (blk_do_io_stat(req)) {
+ if (req->rq_disk && blk_rq_io_stat(next)) {
struct hd_struct *part;
int cpu;

@@ -402,7 +402,7 @@ static int attempt_merge(struct request_queue *q,
struct request *req,

elv_merge_requests(q, req, next);

- blk_account_io_merge(req);
+ blk_account_io_merge(req, next);

req->ioprio = ioprio_best(req->ioprio, next->ioprio);
if (blk_rq_cpu_valid(next))
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 73f36be..3ff9bba 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct
request_queue *q, const char *page,
ssize_t ret = queue_var_store(&stats, page, count);

spin_lock_irq(q->queue_lock);
- elv_quisce_start(q);
-
if (stats)
queue_flag_set(QUEUE_FLAG_IO_STAT, q);
else
queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
-
- elv_quisce_end(q);
spin_unlock_irq(q->queue_lock);

return ret;
diff --git a/block/blk.h b/block/blk.h
index 24fcaee..ad6dbdf 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu)

static inline int blk_do_io_stat(struct request *rq)
{
- struct gendisk *disk = rq->rq_disk;
-
- if (!disk || !disk->queue)
- return 0;
-
- return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
+ return rq->rq_disk && blk_rq_io_stat(rq);
}

#endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ba54c83..4629da4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -118,6 +118,7 @@ enum rq_flag_bits {
__REQ_COPY_USER, /* contains copies of user pages */
__REQ_INTEGRITY, /* integrity metadata has been remapped */
__REQ_NOIDLE, /* Don't anticipate more IO after this one */
+ __REQ_IO_STAT, /* account I/O stat */
__REQ_NR_BITS, /* stops here */
};

@@ -145,6 +146,7 @@ enum rq_flag_bits {
#define REQ_COPY_USER (1 << __REQ_COPY_USER)
#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
#define REQ_NOIDLE (1 << __REQ_NOIDLE)
+#define REQ_IO_STAT (1 << __REQ_IO_STAT)

#define BLK_MAX_CDB 16

@@ -598,6 +600,7 @@ enum {
blk_failfast_transport(rq) || \
blk_failfast_driver(rq))
#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
+#define blk_rq_io_stat(rq) ((rq)->flags & REQ_IO_STAT)

#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq)
|| blk_discard_rq(rq)))


2009-04-16 16:35:11

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH] block: simplify I/O stat accounting

On Thu, Apr 16 2009, Jerome Marchand wrote:
>
> This simplifies I/O stat accounting switching code and separates it
> completely from I/O scheduler switch code.
>
> Requests are accounted according to the state of their request queue
> at the time of the request allocation. There is no need anymore to
> flush the request queue when switching I/O accounting state.

This is cleaner, I like it. I'll apply it, but I'm changing this one:

> @@ -792,9 +792,10 @@ static struct request *get_request(struct
> request_queue *q, int rw_flags,
> if (priv)
> rl->elvpriv++;
>
> + iostat = blk_queue_io_stat(q) ? REQ_IO_STAT : 0;
> spin_unlock_irq(q->queue_lock);

to a regular if, I hate these ?: constructs. An if is much more
readable, imho.

--
Jens Axboe

2009-04-16 16:37:38

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH] block: simplify I/O stat accounting

On Thu, Apr 16 2009, Jens Axboe wrote:
> On Thu, Apr 16 2009, Jerome Marchand wrote:
> >
> > This simplifies I/O stat accounting switching code and separates it
> > completely from I/O scheduler switch code.
> >
> > Requests are accounted according to the state of their request queue
> > at the time of the request allocation. There is no need anymore to
> > flush the request queue when switching I/O accounting state.
>
> This is cleaner, I like it. I'll apply it, but I'm changing this one:
>
> > @@ -792,9 +792,10 @@ static struct request *get_request(struct
> > request_queue *q, int rw_flags,
> > if (priv)
> > rl->elvpriv++;
> >
> > + iostat = blk_queue_io_stat(q) ? REQ_IO_STAT : 0;
> > spin_unlock_irq(q->queue_lock);
>
> to a regular if, I hate these ?: constructs. An if is much more
> readable, imho.

Grmbl, your patch is line wrapped. Please fix your mailer.

--
Jens Axboe

2009-04-16 16:38:42

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH] block: simplify I/O stat accounting

On Thu, Apr 16 2009, Jens Axboe wrote:
> On Thu, Apr 16 2009, Jens Axboe wrote:
> > On Thu, Apr 16 2009, Jerome Marchand wrote:
> > >
> > > This simplifies I/O stat accounting switching code and separates it
> > > completely from I/O scheduler switch code.
> > >
> > > Requests are accounted according to the state of their request queue
> > > at the time of the request allocation. There is no need anymore to
> > > flush the request queue when switching I/O accounting state.
> >
> > This is cleaner, I like it. I'll apply it, but I'm changing this one:
> >
> > > @@ -792,9 +792,10 @@ static struct request *get_request(struct
> > > request_queue *q, int rw_flags,
> > > if (priv)
> > > rl->elvpriv++;
> > >
> > > + iostat = blk_queue_io_stat(q) ? REQ_IO_STAT : 0;
> > > spin_unlock_irq(q->queue_lock);
> >
> > to a regular if, I hate these ?: constructs. An if is much more
> > readable, imho.
>
> Grmbl, your patch is line wrapped. Please fix your mailer.

And it doesn't apply to current -git. Looks like a hand apply, but
please be a little more careful in the future.

--
Jens Axboe

2009-04-16 16:43:10

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH] block: simplify I/O stat accounting

On Thu, Apr 16 2009, Jens Axboe wrote:
> On Thu, Apr 16 2009, Jens Axboe wrote:
> > On Thu, Apr 16 2009, Jens Axboe wrote:
> > > On Thu, Apr 16 2009, Jerome Marchand wrote:
> > > >
> > > > This simplifies I/O stat accounting switching code and separates it
> > > > completely from I/O scheduler switch code.
> > > >
> > > > Requests are accounted according to the state of their request queue
> > > > at the time of the request allocation. There is no need anymore to
> > > > flush the request queue when switching I/O accounting state.
> > >
> > > This is cleaner, I like it. I'll apply it, but I'm changing this one:
> > >
> > > > @@ -792,9 +792,10 @@ static struct request *get_request(struct
> > > > request_queue *q, int rw_flags,
> > > > if (priv)
> > > > rl->elvpriv++;
> > > >
> > > > + iostat = blk_queue_io_stat(q) ? REQ_IO_STAT : 0;
> > > > spin_unlock_irq(q->queue_lock);
> > >
> > > to a regular if, I hate these ?: constructs. An if is much more
> > > readable, imho.
> >
> > Grmbl, your patch is line wrapped. Please fix your mailer.
>
> And it doesn't apply to current -git. Looks like a hand apply, but
> please be a little more careful in the future.

OK, it doesn't even compile either:

+#define blk_rq_io_stat(rq) ((rq)->flags & REQ_IO_STAT)

that wants to be ->cmd_flags.

Please resend when you have something that at least compiles. If you
send untested stuff my way, at least tell me.

--
Jens Axboe

2009-04-17 08:04:27

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH] block: simplify I/O stat accounting

Jens Axboe wrote:
> On Thu, Apr 16 2009, Jens Axboe wrote:
>> On Thu, Apr 16 2009, Jens Axboe wrote:
>>> On Thu, Apr 16 2009, Jens Axboe wrote:
>>>> On Thu, Apr 16 2009, Jerome Marchand wrote:
>>>>> This simplifies I/O stat accounting switching code and separates it
>>>>> completely from I/O scheduler switch code.
>>>>>
>>>>> Requests are accounted according to the state of their request queue
>>>>> at the time of the request allocation. There is no need anymore to
>>>>> flush the request queue when switching I/O accounting state.
>>>> This is cleaner, I like it. I'll apply it, but I'm changing this one:
>>>>
>>>>> @@ -792,9 +792,10 @@ static struct request *get_request(struct
>>>>> request_queue *q, int rw_flags,
>>>>> if (priv)
>>>>> rl->elvpriv++;
>>>>>
>>>>> + iostat = blk_queue_io_stat(q) ? REQ_IO_STAT : 0;
>>>>> spin_unlock_irq(q->queue_lock);
>>>> to a regular if, I hate these ?: constructs. An if is much more
>>>> readable, imho.
>>> Grmbl, your patch is line wrapped. Please fix your mailer.
>> And it doesn't apply to current -git. Looks like a hand apply, but
>> please be a little more careful in the future.
>
> OK, it doesn't even compile either:
>
> +#define blk_rq_io_stat(rq) ((rq)->flags & REQ_IO_STAT)
>
> that wants to be ->cmd_flags.
>
> Please resend when you have something that at least compiles. If you
> send untested stuff my way, at least tell me.
>

Hi Jens,

I'm very sorry about this. I didn't send you a patch which does not
compiles on purpose. I was working on backporting that patch on an older
version of the kernel. It looks like I hand-edited that patch by mistake
before I sent it to you.

J?r?me

2009-04-17 11:21:39

by Jerome Marchand

[permalink] [raw]
Subject: [PATCH v2] block: simplify I/O stat accounting


This simplifies I/O stat accounting switching code and separates it
completely from I/O scheduler switch code.

Requests are accounted according to the state of their request queue
at the time of the request allocation. There is no need anymore to
flush the request queue when switching I/O accounting state.


Signed-off-by: Jerome Marchand <[email protected]>
---
block/blk-core.c | 10 ++++++----
block/blk-merge.c | 6 +++---
block/blk-sysfs.c | 4 ----
block/blk.h | 7 +------
include/linux/blkdev.h | 3 +++
5 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 07ab754..42a646f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
}

static struct request *
-blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
{
struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

@@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)

blk_rq_init(q, rq);

- rq->cmd_flags = rw | REQ_ALLOCED;
+ rq->cmd_flags = flags | REQ_ALLOCED;

if (priv) {
if (unlikely(elv_set_request(q, rq, gfp_mask))) {
@@ -744,7 +744,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
struct request_list *rl = &q->rq;
struct io_context *ioc = NULL;
const bool is_sync = rw_is_sync(rw_flags) != 0;
- int may_queue, priv;
+ int may_queue, priv, iostat = 0;

may_queue = elv_may_queue(q, rw_flags);
if (may_queue == ELV_MQUEUE_NO)
@@ -792,9 +792,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
if (priv)
rl->elvpriv++;

+ if (blk_queue_io_stat(q))
+ iostat = REQ_IO_STAT;
spin_unlock_irq(q->queue_lock);

- rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
+ rq = blk_alloc_request(q, rw_flags | iostat, priv, gfp_mask);
if (unlikely(!rq)) {
/*
* Allocation failed presumably due to memory. Undo anything
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 63760ca..6a05270 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -338,9 +338,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
return 1;
}

-static void blk_account_io_merge(struct request *req)
+static void blk_account_io_merge(struct request *req, struct request *next)
{
- if (blk_do_io_stat(req)) {
+ if (req->rq_disk && blk_rq_io_stat(next)) {
struct hd_struct *part;
int cpu;

@@ -402,7 +402,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,

elv_merge_requests(q, req, next);

- blk_account_io_merge(req);
+ blk_account_io_merge(req, next);

req->ioprio = ioprio_best(req->ioprio, next->ioprio);
if (blk_rq_cpu_valid(next))
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cac4e9f..3ff9bba 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
ssize_t ret = queue_var_store(&stats, page, count);

spin_lock_irq(q->queue_lock);
- elv_quiesce_start(q);
-
if (stats)
queue_flag_set(QUEUE_FLAG_IO_STAT, q);
else
queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
-
- elv_quiesce_end(q);
spin_unlock_irq(q->queue_lock);

return ret;
diff --git a/block/blk.h b/block/blk.h
index 5dfc412..79c85f7 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu)

static inline int blk_do_io_stat(struct request *rq)
{
- struct gendisk *disk = rq->rq_disk;
-
- if (!disk || !disk->queue)
- return 0;
-
- return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
+ return rq->rq_disk && blk_rq_io_stat(rq);
}

#endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ba54c83..2755d5c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -118,6 +118,7 @@ enum rq_flag_bits {
__REQ_COPY_USER, /* contains copies of user pages */
__REQ_INTEGRITY, /* integrity metadata has been remapped */
__REQ_NOIDLE, /* Don't anticipate more IO after this one */
+ __REQ_IO_STAT, /* account I/O stat */
__REQ_NR_BITS, /* stops here */
};

@@ -145,6 +146,7 @@ enum rq_flag_bits {
#define REQ_COPY_USER (1 << __REQ_COPY_USER)
#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
#define REQ_NOIDLE (1 << __REQ_NOIDLE)
+#define REQ_IO_STAT (1 << __REQ_IO_STAT)

#define BLK_MAX_CDB 16

@@ -598,6 +600,7 @@ enum {
blk_failfast_transport(rq) || \
blk_failfast_driver(rq))
#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
+#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT)

#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))

2009-04-17 11:38:13

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v2] block: simplify I/O stat accounting

On Fri, Apr 17 2009, Jerome Marchand wrote:
>
> This simplifies I/O stat accounting switching code and separates it
> completely from I/O scheduler switch code.
>
> Requests are accounted according to the state of their request queue
> at the time of the request allocation. There is no need anymore to
> flush the request queue when switching I/O accounting state.
>
>
> Signed-off-by: Jerome Marchand <[email protected]>
> ---
> block/blk-core.c | 10 ++++++----
> block/blk-merge.c | 6 +++---
> block/blk-sysfs.c | 4 ----
> block/blk.h | 7 +------
> include/linux/blkdev.h | 3 +++
> 5 files changed, 13 insertions(+), 17 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 07ab754..42a646f 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
> }
>
> static struct request *
> -blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
> +blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
> {
> struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
>
> @@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
>
> blk_rq_init(q, rq);
>
> - rq->cmd_flags = rw | REQ_ALLOCED;
> + rq->cmd_flags = flags | REQ_ALLOCED;
>
> if (priv) {
> if (unlikely(elv_set_request(q, rq, gfp_mask))) {
> @@ -744,7 +744,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
> struct request_list *rl = &q->rq;
> struct io_context *ioc = NULL;
> const bool is_sync = rw_is_sync(rw_flags) != 0;
> - int may_queue, priv;
> + int may_queue, priv, iostat = 0;
>
> may_queue = elv_may_queue(q, rw_flags);
> if (may_queue == ELV_MQUEUE_NO)
> @@ -792,9 +792,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
> if (priv)
> rl->elvpriv++;
>
> + if (blk_queue_io_stat(q))
> + iostat = REQ_IO_STAT;

On second thought, not sure why you add 'iostat' for this. It would be
OK to just do

if (blk_queue_io_stat(q))
rw_flags |= REQ_IO_STAT;

since it's just used for the allocation call, and the trace call (which
does & 1 on it anyway).

>
> - rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
> + rq = blk_alloc_request(q, rw_flags | iostat, priv, gfp_mask);
> if (unlikely(!rq)) {
> /*
> * Allocation failed presumably due to memory. Undo anything
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index 63760ca..6a05270 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -338,9 +338,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
> return 1;
> }
>
> -static void blk_account_io_merge(struct request *req)
> +static void blk_account_io_merge(struct request *req, struct request *next)
> {
> - if (blk_do_io_stat(req)) {
> + if (req->rq_disk && blk_rq_io_stat(next)) {

This at least needs a comment, it's not at all directly clear why we are
checking 'next' for io stat and ->rq_disk in 'req'. Since it's just
called from that one spot, it would be cleaner to do:

/*
* 'next' is going away, so update stats accordingly
*/
if (blk_rq_io_stat(next))
blk_account_io_merge(req->rq_disk, req->sector);

and have blk_account_io_merge() be more ala:

static void blk_account_io_merge(struct request *req)
{
struct hd_struct *part;
int cpu;

cpu = part_stat_lock();
part = disk_map_sector_rcu(disk, sector);
...
}

> struct hd_struct *part;
> int cpu;
>
> @@ -402,7 +402,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
>
> elv_merge_requests(q, req, next);
>
> - blk_account_io_merge(req);
> + blk_account_io_merge(req, next);
>
> req->ioprio = ioprio_best(req->ioprio, next->ioprio);
> if (blk_rq_cpu_valid(next))
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index cac4e9f..3ff9bba 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
> ssize_t ret = queue_var_store(&stats, page, count);
>
> spin_lock_irq(q->queue_lock);
> - elv_quiesce_start(q);
> -
> if (stats)
> queue_flag_set(QUEUE_FLAG_IO_STAT, q);
> else
> queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
> -
> - elv_quiesce_end(q);
> spin_unlock_irq(q->queue_lock);
>
> return ret;
> diff --git a/block/blk.h b/block/blk.h
> index 5dfc412..79c85f7 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu)
>
> static inline int blk_do_io_stat(struct request *rq)
> {
> - struct gendisk *disk = rq->rq_disk;
> -
> - if (!disk || !disk->queue)
> - return 0;
> -
> - return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
> + return rq->rq_disk && blk_rq_io_stat(rq);
> }
>
> #endif
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index ba54c83..2755d5c 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -118,6 +118,7 @@ enum rq_flag_bits {
> __REQ_COPY_USER, /* contains copies of user pages */
> __REQ_INTEGRITY, /* integrity metadata has been remapped */
> __REQ_NOIDLE, /* Don't anticipate more IO after this one */
> + __REQ_IO_STAT, /* account I/O stat */
> __REQ_NR_BITS, /* stops here */
> };
>
> @@ -145,6 +146,7 @@ enum rq_flag_bits {
> #define REQ_COPY_USER (1 << __REQ_COPY_USER)
> #define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
> #define REQ_NOIDLE (1 << __REQ_NOIDLE)
> +#define REQ_IO_STAT (1 << __REQ_IO_STAT)
>
> #define BLK_MAX_CDB 16
>
> @@ -598,6 +600,7 @@ enum {
> blk_failfast_transport(rq) || \
> blk_failfast_driver(rq))
> #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
> +#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT)
>
> #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))
>

--
Jens Axboe

2009-04-17 11:54:59

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v2] block: simplify I/O stat accounting

On Fri, Apr 17 2009, Jens Axboe wrote:
> On Fri, Apr 17 2009, Jerome Marchand wrote:
> >
> > This simplifies I/O stat accounting switching code and separates it
> > completely from I/O scheduler switch code.
> >
> > Requests are accounted according to the state of their request queue
> > at the time of the request allocation. There is no need anymore to
> > flush the request queue when switching I/O accounting state.
> >
> >
> > Signed-off-by: Jerome Marchand <[email protected]>
> > ---
> > block/blk-core.c | 10 ++++++----
> > block/blk-merge.c | 6 +++---
> > block/blk-sysfs.c | 4 ----
> > block/blk.h | 7 +------
> > include/linux/blkdev.h | 3 +++
> > 5 files changed, 13 insertions(+), 17 deletions(-)
> >
> > diff --git a/block/blk-core.c b/block/blk-core.c
> > index 07ab754..42a646f 100644
> > --- a/block/blk-core.c
> > +++ b/block/blk-core.c
> > @@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
> > }
> >
> > static struct request *
> > -blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
> > +blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
> > {
> > struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
> >
> > @@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
> >
> > blk_rq_init(q, rq);
> >
> > - rq->cmd_flags = rw | REQ_ALLOCED;
> > + rq->cmd_flags = flags | REQ_ALLOCED;
> >
> > if (priv) {
> > if (unlikely(elv_set_request(q, rq, gfp_mask))) {
> > @@ -744,7 +744,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
> > struct request_list *rl = &q->rq;
> > struct io_context *ioc = NULL;
> > const bool is_sync = rw_is_sync(rw_flags) != 0;
> > - int may_queue, priv;
> > + int may_queue, priv, iostat = 0;
> >
> > may_queue = elv_may_queue(q, rw_flags);
> > if (may_queue == ELV_MQUEUE_NO)
> > @@ -792,9 +792,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
> > if (priv)
> > rl->elvpriv++;
> >
> > + if (blk_queue_io_stat(q))
> > + iostat = REQ_IO_STAT;
>
> On second thought, not sure why you add 'iostat' for this. It would be
> OK to just do
>
> if (blk_queue_io_stat(q))
> rw_flags |= REQ_IO_STAT;
>
> since it's just used for the allocation call, and the trace call (which
> does & 1 on it anyway).
>
> >
> > - rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
> > + rq = blk_alloc_request(q, rw_flags | iostat, priv, gfp_mask);
> > if (unlikely(!rq)) {
> > /*
> > * Allocation failed presumably due to memory. Undo anything
> > diff --git a/block/blk-merge.c b/block/blk-merge.c
> > index 63760ca..6a05270 100644
> > --- a/block/blk-merge.c
> > +++ b/block/blk-merge.c
> > @@ -338,9 +338,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
> > return 1;
> > }
> >
> > -static void blk_account_io_merge(struct request *req)
> > +static void blk_account_io_merge(struct request *req, struct request *next)
> > {
> > - if (blk_do_io_stat(req)) {
> > + if (req->rq_disk && blk_rq_io_stat(next)) {
>
> This at least needs a comment, it's not at all directly clear why we are
> checking 'next' for io stat and ->rq_disk in 'req'. Since it's just
> called from that one spot, it would be cleaner to do:
>
> /*
> * 'next' is going away, so update stats accordingly
> */
> if (blk_rq_io_stat(next))
> blk_account_io_merge(req->rq_disk, req->sector);
>
> and have blk_account_io_merge() be more ala:
>
> static void blk_account_io_merge(struct request *req)
> {
> struct hd_struct *part;
> int cpu;
>
> cpu = part_stat_lock();
> part = disk_map_sector_rcu(disk, sector);
> ...
> }

BTW, it seems there's a current problem with this construct. If 'req'
and 'next' reside on different partitions, the accounting will be wrong.
This wont happen with normal fs activity of course, but it's definitely
possible with buffered (or O_DIRECT) IO on the full device.

--
Jens Axboe

2009-04-17 12:24:52

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH v2] block: simplify I/O stat accounting

Jens Axboe wrote:
> On Fri, Apr 17 2009, Jens Axboe wrote:
>> On second thought, not sure why you add 'iostat' for this. It would be
>> OK to just do
>>
>> if (blk_queue_io_stat(q))
>> rw_flags |= REQ_IO_STAT;
>>
>> since it's just used for the allocation call, and the trace call (which
>> does & 1 on it anyway).
>>
OK.

>>> diff --git a/block/blk-merge.c b/block/blk-merge.c
>>> index 63760ca..6a05270 100644
>>> --- a/block/blk-merge.c
>>> +++ b/block/blk-merge.c
>>> @@ -338,9 +338,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
>>> return 1;
>>> }
>>>
>>> -static void blk_account_io_merge(struct request *req)
>>> +static void blk_account_io_merge(struct request *req, struct request *next)
>>> {
>>> - if (blk_do_io_stat(req)) {
>>> + if (req->rq_disk && blk_rq_io_stat(next)) {
>> This at least needs a comment, it's not at all directly clear why we are
>> checking 'next' for io stat and ->rq_disk in 'req'. Since it's just
>> called from that one spot, it would be cleaner to do:
>>
>> /*
>> * 'next' is going away, so update stats accordingly
>> */
>> if (blk_rq_io_stat(next))
>> blk_account_io_merge(req->rq_disk, req->sector);
>>
>> and have blk_account_io_merge() be more ala:
>>
>> static void blk_account_io_merge(struct request *req)
>> {
>> struct hd_struct *part;
>> int cpu;
>>
>> cpu = part_stat_lock();
>> part = disk_map_sector_rcu(disk, sector);
>> ...
>> }
>
> BTW, it seems there's a current problem with this construct. If 'req'
> and 'next' reside on different partitions, the accounting will be wrong.
> This wont happen with normal fs activity of course, but it's definitely
> possible with buffered (or O_DIRECT) IO on the full device.
>

You're right. We may end up decrease in_flight on the wrong partition.
I think having blk_account_io_merge() unchanged but call it for next
request would solve that:

- blk_account_io_merge(req)
+ blk_account_io_merge(next)

We would still have the request payload accounted to the wrong partition
(as it always was), but I don't think that small inaccuracy really matters.

J?r?me

2009-04-17 12:30:35

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v2] block: simplify I/O stat accounting

On Fri, Apr 17 2009, Jerome Marchand wrote:
> Jens Axboe wrote:
> > On Fri, Apr 17 2009, Jens Axboe wrote:
> >> On second thought, not sure why you add 'iostat' for this. It would be
> >> OK to just do
> >>
> >> if (blk_queue_io_stat(q))
> >> rw_flags |= REQ_IO_STAT;
> >>
> >> since it's just used for the allocation call, and the trace call (which
> >> does & 1 on it anyway).
> >>
> OK.
>
> >>> diff --git a/block/blk-merge.c b/block/blk-merge.c
> >>> index 63760ca..6a05270 100644
> >>> --- a/block/blk-merge.c
> >>> +++ b/block/blk-merge.c
> >>> @@ -338,9 +338,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
> >>> return 1;
> >>> }
> >>>
> >>> -static void blk_account_io_merge(struct request *req)
> >>> +static void blk_account_io_merge(struct request *req, struct request *next)
> >>> {
> >>> - if (blk_do_io_stat(req)) {
> >>> + if (req->rq_disk && blk_rq_io_stat(next)) {
> >> This at least needs a comment, it's not at all directly clear why we are
> >> checking 'next' for io stat and ->rq_disk in 'req'. Since it's just
> >> called from that one spot, it would be cleaner to do:
> >>
> >> /*
> >> * 'next' is going away, so update stats accordingly
> >> */
> >> if (blk_rq_io_stat(next))
> >> blk_account_io_merge(req->rq_disk, req->sector);
> >>
> >> and have blk_account_io_merge() be more ala:
> >>
> >> static void blk_account_io_merge(struct request *req)
> >> {
> >> struct hd_struct *part;
> >> int cpu;
> >>
> >> cpu = part_stat_lock();
> >> part = disk_map_sector_rcu(disk, sector);
> >> ...
> >> }
> >
> > BTW, it seems there's a current problem with this construct. If 'req'
> > and 'next' reside on different partitions, the accounting will be wrong.
> > This wont happen with normal fs activity of course, but it's definitely
> > possible with buffered (or O_DIRECT) IO on the full device.
> >
>
> You're right. We may end up decrease in_flight on the wrong partition.
> I think having blk_account_io_merge() unchanged but call it for next
> request would solve that:
>
> - blk_account_io_merge(req)
> + blk_account_io_merge(next)
>
> We would still have the request payload accounted to the wrong partition
> (as it always was), but I don't think that small inaccuracy really matters.

Yes, just using 'next' is clearly the better approach here. It still not
perfect, but it's probably not worth it to do anything about this. It
should be commented, though :-)

--
Jens Axboe

2009-04-21 13:32:24

by Jerome Marchand

[permalink] [raw]
Subject: [PATCH v3] block: simplify I/O stat accounting


This simplifies I/O stat accounting switching code and separates it
completely from I/O scheduler switch code.

Requests are accounted according to the state of their request queue
at the time of the request allocation. There is no need anymore to
flush the request queue when switching I/O accounting state.


Signed-off-by: Jerome Marchand <[email protected]>
---
block/blk-core.c | 6 ++++--
block/blk-merge.c | 5 ++++-
block/blk-sysfs.c | 4 ----
block/blk.h | 7 +------
include/linux/blkdev.h | 3 +++
5 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 07ab754..2998fe3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
}

static struct request *
-blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
{
struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

@@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)

blk_rq_init(q, rq);

- rq->cmd_flags = rw | REQ_ALLOCED;
+ rq->cmd_flags = flags | REQ_ALLOCED;

if (priv) {
if (unlikely(elv_set_request(q, rq, gfp_mask))) {
@@ -792,6 +792,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
if (priv)
rl->elvpriv++;

+ if (blk_queue_io_stat(q))
+ rw_flags |= REQ_IO_STAT;
spin_unlock_irq(q->queue_lock);

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 63760ca..23d2a6f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -402,7 +402,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,

elv_merge_requests(q, req, next);

- blk_account_io_merge(req);
+ /*
+ * 'next' is going away, so update stats accordingly
+ */
+ blk_account_io_merge(next);

req->ioprio = ioprio_best(req->ioprio, next->ioprio);
if (blk_rq_cpu_valid(next))
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cac4e9f..3ff9bba 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
ssize_t ret = queue_var_store(&stats, page, count);

spin_lock_irq(q->queue_lock);
- elv_quiesce_start(q);
-
if (stats)
queue_flag_set(QUEUE_FLAG_IO_STAT, q);
else
queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
-
- elv_quiesce_end(q);
spin_unlock_irq(q->queue_lock);

return ret;
diff --git a/block/blk.h b/block/blk.h
index 5dfc412..79c85f7 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu)

static inline int blk_do_io_stat(struct request *rq)
{
- struct gendisk *disk = rq->rq_disk;
-
- if (!disk || !disk->queue)
- return 0;
-
- return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
+ return rq->rq_disk && blk_rq_io_stat(rq);
}

#endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ba54c83..2755d5c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -118,6 +118,7 @@ enum rq_flag_bits {
__REQ_COPY_USER, /* contains copies of user pages */
__REQ_INTEGRITY, /* integrity metadata has been remapped */
__REQ_NOIDLE, /* Don't anticipate more IO after this one */
+ __REQ_IO_STAT, /* account I/O stat */
__REQ_NR_BITS, /* stops here */
};

@@ -145,6 +146,7 @@ enum rq_flag_bits {
#define REQ_COPY_USER (1 << __REQ_COPY_USER)
#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
#define REQ_NOIDLE (1 << __REQ_NOIDLE)
+#define REQ_IO_STAT (1 << __REQ_IO_STAT)

#define BLK_MAX_CDB 16

@@ -598,6 +600,7 @@ enum {
blk_failfast_transport(rq) || \
blk_failfast_driver(rq))
#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
+#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT)

#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))

2009-04-22 12:16:51

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v3] block: simplify I/O stat accounting

On Tue, Apr 21 2009, Jerome Marchand wrote:
>
> This simplifies I/O stat accounting switching code and separates it
> completely from I/O scheduler switch code.
>
> Requests are accounted according to the state of their request queue
> at the time of the request allocation. There is no need anymore to
> flush the request queue when switching I/O accounting state.

Thanks Jerome, applied!

>
>
> Signed-off-by: Jerome Marchand <[email protected]>
> ---
> block/blk-core.c | 6 ++++--
> block/blk-merge.c | 5 ++++-
> block/blk-sysfs.c | 4 ----
> block/blk.h | 7 +------
> include/linux/blkdev.h | 3 +++
> 5 files changed, 12 insertions(+), 13 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 07ab754..2998fe3 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
> }
>
> static struct request *
> -blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
> +blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
> {
> struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
>
> @@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
>
> blk_rq_init(q, rq);
>
> - rq->cmd_flags = rw | REQ_ALLOCED;
> + rq->cmd_flags = flags | REQ_ALLOCED;
>
> if (priv) {
> if (unlikely(elv_set_request(q, rq, gfp_mask))) {
> @@ -792,6 +792,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
> if (priv)
> rl->elvpriv++;
>
> + if (blk_queue_io_stat(q))
> + rw_flags |= REQ_IO_STAT;
> spin_unlock_irq(q->queue_lock);
>
> rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index 63760ca..23d2a6f 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -402,7 +402,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
>
> elv_merge_requests(q, req, next);
>
> - blk_account_io_merge(req);
> + /*
> + * 'next' is going away, so update stats accordingly
> + */
> + blk_account_io_merge(next);
>
> req->ioprio = ioprio_best(req->ioprio, next->ioprio);
> if (blk_rq_cpu_valid(next))
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index cac4e9f..3ff9bba 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
> ssize_t ret = queue_var_store(&stats, page, count);
>
> spin_lock_irq(q->queue_lock);
> - elv_quiesce_start(q);
> -
> if (stats)
> queue_flag_set(QUEUE_FLAG_IO_STAT, q);
> else
> queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
> -
> - elv_quiesce_end(q);
> spin_unlock_irq(q->queue_lock);
>
> return ret;
> diff --git a/block/blk.h b/block/blk.h
> index 5dfc412..79c85f7 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu)
>
> static inline int blk_do_io_stat(struct request *rq)
> {
> - struct gendisk *disk = rq->rq_disk;
> -
> - if (!disk || !disk->queue)
> - return 0;
> -
> - return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
> + return rq->rq_disk && blk_rq_io_stat(rq);
> }
>
> #endif
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index ba54c83..2755d5c 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -118,6 +118,7 @@ enum rq_flag_bits {
> __REQ_COPY_USER, /* contains copies of user pages */
> __REQ_INTEGRITY, /* integrity metadata has been remapped */
> __REQ_NOIDLE, /* Don't anticipate more IO after this one */
> + __REQ_IO_STAT, /* account I/O stat */
> __REQ_NR_BITS, /* stops here */
> };
>
> @@ -145,6 +146,7 @@ enum rq_flag_bits {
> #define REQ_COPY_USER (1 << __REQ_COPY_USER)
> #define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
> #define REQ_NOIDLE (1 << __REQ_NOIDLE)
> +#define REQ_IO_STAT (1 << __REQ_IO_STAT)
>
> #define BLK_MAX_CDB 16
>
> @@ -598,6 +600,7 @@ enum {
> blk_failfast_transport(rq) || \
> blk_failfast_driver(rq))
> #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
> +#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT)
>
> #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))
>

--
Jens Axboe