2011-03-11 21:06:34

by Justin TerAvest

[permalink] [raw]
Subject: [PATCH] Add unaccounted time to timeslice_used.

There are two kind of times that tasks are not charged for: the first
seek and the extra time slice used over the allocated timeslice. Both
of these exported as a new unaccounted_time stat.

I think it would be good to have this reported in 'time' as well, but
that is probably a separate discussion.

Signed-off-by: Justin TerAvest <[email protected]>
---
block/blk-cgroup.c | 16 +++++++++++++++-
block/blk-cgroup.h | 12 ++++++++++--
block/cfq-iosched.c | 21 +++++++++++++--------
block/cfq.h | 6 +++---
4 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 455768a..77ee3c1 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -371,12 +371,14 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
}
EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);

-void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
+void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
+ unsigned long unaccounted_time)
{
unsigned long flags;

spin_lock_irqsave(&blkg->stats_lock, flags);
blkg->stats.time += time;
+ blkg->stats.unaccounted_time += unaccounted_time;
spin_unlock_irqrestore(&blkg->stats_lock, flags);
}
EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
@@ -603,6 +605,9 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
if (type == BLKIO_STAT_SECTORS)
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
blkg->stats.sectors, cb, dev);
+ if (type == BLKIO_STAT_UNACCOUNTED_TIME)
+ return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
+ blkg->stats.unaccounted_time, cb, dev);
#ifdef CONFIG_DEBUG_BLK_CGROUP
if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
uint64_t sum = blkg->stats.avg_queue_size_sum;
@@ -1106,6 +1111,9 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
case BLKIO_PROP_sectors:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_SECTORS, 0);
+ case BLKIO_PROP_unaccounted_time:
+ return blkio_read_blkg_stats(blkcg, cft, cb,
+ BLKIO_STAT_UNACCOUNTED_TIME, 0);
case BLKIO_PROP_io_service_bytes:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_SERVICE_BYTES, 1);
@@ -1262,6 +1270,12 @@ struct cftype blkio_files[] = {
.read_map = blkiocg_file_read_map,
},
{
+ .name = "unaccounted_time",
+ .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
+ BLKIO_PROP_unaccounted_time),
+ .read_map = blkiocg_file_read_map,
+ },
+ {
.name = "io_service_bytes",
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
BLKIO_PROP_io_service_bytes),
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ea4861b..d71409e 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -49,6 +49,8 @@ enum stat_type {
/* All the single valued stats go below this */
BLKIO_STAT_TIME,
BLKIO_STAT_SECTORS,
+ /* Time not charged to this cgroup */
+ BLKIO_STAT_UNACCOUNTED_TIME,
#ifdef CONFIG_DEBUG_BLK_CGROUP
BLKIO_STAT_AVG_QUEUE_SIZE,
BLKIO_STAT_IDLE_TIME,
@@ -81,6 +83,7 @@ enum blkcg_file_name_prop {
BLKIO_PROP_io_serviced,
BLKIO_PROP_time,
BLKIO_PROP_sectors,
+ BLKIO_PROP_unaccounted_time,
BLKIO_PROP_io_service_time,
BLKIO_PROP_io_wait_time,
BLKIO_PROP_io_merged,
@@ -114,6 +117,8 @@ struct blkio_group_stats {
/* total disk time and nr sectors dispatched by this group */
uint64_t time;
uint64_t sectors;
+ /* Time not charged to this cgroup */
+ uint64_t unaccounted_time;
uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
#ifdef CONFIG_DEBUG_BLK_CGROUP
/* Sum of number of IOs queued across all samples */
@@ -293,7 +298,8 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
void *key);
void blkiocg_update_timeslice_used(struct blkio_group *blkg,
- unsigned long time);
+ unsigned long time,
+ unsigned long unaccounted_time);
void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
bool direction, bool sync);
void blkiocg_update_completion_stats(struct blkio_group *blkg,
@@ -319,7 +325,9 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
static inline struct blkio_group *
blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
- unsigned long time) {}
+ unsigned long time,
+ unsigned long unaccounted_time)
+{}
static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
uint64_t bytes, bool direction, bool sync) {}
static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 7be4c79..0ba1e5c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -909,7 +909,8 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
}

-static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
+static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
+ unsigned int *unaccounted_time)
{
unsigned int slice_used;

@@ -928,8 +929,13 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
1);
} else {
slice_used = jiffies - cfqq->slice_start;
- if (slice_used > cfqq->allocated_slice)
+ if (slice_used > cfqq->allocated_slice) {
+ *unaccounted_time = slice_used - cfqq->allocated_slice;
slice_used = cfqq->allocated_slice;
+ }
+ if (time_after(cfqq->slice_start, cfqq->dispatch_start))
+ *unaccounted_time += cfqq->slice_start -
+ cfqq->dispatch_start;
}

return slice_used;
@@ -939,12 +945,12 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
struct cfq_queue *cfqq)
{
struct cfq_rb_root *st = &cfqd->grp_service_tree;
- unsigned int used_sl, charge;
+ unsigned int used_sl, charge, unaccounted_sl = 0;
int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
- cfqg->service_tree_idle.count;

BUG_ON(nr_sync < 0);
- used_sl = charge = cfq_cfqq_slice_usage(cfqq);
+ used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);

if (iops_mode(cfqd))
charge = cfqq->slice_dispatch;
@@ -970,7 +976,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
" sect=%u", used_sl, cfqq->slice_dispatch, charge,
iops_mode(cfqd), cfqq->nr_sectors);
- cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
+ cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
+ unaccounted_sl);
cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
}

@@ -3314,9 +3321,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
BUG_ON(!cfq_cfqq_on_rr(cfqq));

cfq_service_tree_add(cfqd, cfqq, 1);
-
- cfqq->slice_end = 0;
- cfq_mark_cfqq_slice_new(cfqq);
+ __cfq_set_active_queue(cfqd, cfqq);
}

/*
diff --git a/block/cfq.h b/block/cfq.h
index 54a6d90..2a15592 100644
--- a/block/cfq.h
+++ b/block/cfq.h
@@ -16,9 +16,9 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
}

static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
- unsigned long time)
+ unsigned long time, unsigned long unaccounted_time)
{
- blkiocg_update_timeslice_used(blkg, time);
+ blkiocg_update_timeslice_used(blkg, time, unaccounted_time);
}

static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg)
@@ -85,7 +85,7 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
unsigned long dequeue) {}

static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
- unsigned long time) {}
+ unsigned long time, unsigned long unaccounted_time) {}
static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg,
bool direction, bool sync) {}
--
1.7.3.1


2011-03-12 04:17:32

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] Add unaccounted time to timeslice_used.

On Fri, Mar 11, 2011 at 01:06:12PM -0800, Justin TerAvest wrote:
> There are two kind of times that tasks are not charged for: the first
> seek and the extra time slice used over the allocated timeslice. Both
> of these exported as a new unaccounted_time stat.
>
> I think it would be good to have this reported in 'time' as well, but
> that is probably a separate discussion.
>
> Signed-off-by: Justin TerAvest <[email protected]>
> ---
> block/blk-cgroup.c | 16 +++++++++++++++-
> block/blk-cgroup.h | 12 ++++++++++--
> block/cfq-iosched.c | 21 +++++++++++++--------
> block/cfq.h | 6 +++---
> 4 files changed, 41 insertions(+), 14 deletions(-)

Hi! Just an FYI, could you add what subsystem you are changing into the
subject. Like:

[PATCH] block: Add unaccounted time to timeslice_used

Otherwise, the subject looks like it's modifying the kernel internal time
source.

Thanks,

-- Steve

2011-03-12 05:35:30

by Justin TerAvest

[permalink] [raw]
Subject: Re: [PATCH] Add unaccounted time to timeslice_used.

On Fri, Mar 11, 2011 at 8:17 PM, Steven Rostedt <[email protected]> wrote:
> On Fri, Mar 11, 2011 at 01:06:12PM -0800, Justin TerAvest wrote:
>> There are two kind of times that tasks are not charged for: the first
>> seek and the extra time slice used over the allocated timeslice. Both
>> of these exported as a new unaccounted_time stat.
>>
>> I think it would be good to have this reported in 'time' as well, but
>> that is probably a separate discussion.
>>
>> Signed-off-by: Justin TerAvest <[email protected]>
>> ---
>> ?block/blk-cgroup.c ?| ? 16 +++++++++++++++-
>> ?block/blk-cgroup.h ?| ? 12 ++++++++++--
>> ?block/cfq-iosched.c | ? 21 +++++++++++++--------
>> ?block/cfq.h ? ? ? ? | ? ?6 +++---
>> ?4 files changed, 41 insertions(+), 14 deletions(-)
>
> Hi! Just an FYI, could you add what subsystem you are changing into the
> subject. Like:
>
> [PATCH] block: Add unaccounted time to timeslice_used
>
> Otherwise, the subject looks like it's modifying the kernel internal time
> source.

Sorry about that. I'll be more careful in the future.

Thanks,
Justin

>
> Thanks,
>
> -- Steve
>
>

2011-03-12 15:49:57

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH] Add unaccounted time to timeslice_used.

On 2011-03-11 22:06, Justin TerAvest wrote:
> There are two kind of times that tasks are not charged for: the first
> seek and the extra time slice used over the allocated timeslice. Both
> of these exported as a new unaccounted_time stat.
>
> I think it would be good to have this reported in 'time' as well, but
> that is probably a separate discussion.

I think that is a very good idea, helps to build confidence when
testing. I'll apply this, thanks Justin.

--
Jens Axboe

2011-03-14 13:55:22

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH] Add unaccounted time to timeslice_used.

On Fri, Mar 11, 2011 at 01:06:12PM -0800, Justin TerAvest wrote:
> There are two kind of times that tasks are not charged for: the first
> seek and the extra time slice used over the allocated timeslice. Both
> of these exported as a new unaccounted_time stat.
>
> I think it would be good to have this reported in 'time' as well, but
> that is probably a separate discussion.
>

Justin,

I would say that for such optimization do make sure that you mention that
these are useful only if one is driving a queue depth of 1.

Otherwise previous queue might have dumped bunch of requests in device
and expired. Now new queue's first request completion time is also
impacted by the requests dumped by other queues.

There are already so many stats which I have never used so far and I have
not encountered anybody else using these either. I think primary reason
being that in general nobody forced the queue depth of 1 hence most of the
timing stats are of no use.

So personally I am not very keen on keep on increasing number of stats in
CFQ which are useful only when using queue depth 1 as that might not be
the common case. But Jens likes it, so....

Also a comment inline.

[..]
> @@ -3314,9 +3321,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
> BUG_ON(!cfq_cfqq_on_rr(cfqq));
>
> cfq_service_tree_add(cfqd, cfqq, 1);
> -
> - cfqq->slice_end = 0;
> - cfq_mark_cfqq_slice_new(cfqq);
> + __cfq_set_active_queue(cfqd, cfqq);

So far a new queue selection was always in select_queue(). Now this will
change it and new queue selection will also take place in
cfq_preempt_queue().

Also I think this is not right. It is not necessary that we select the
preempting queue. For example a sync queue in one group can preempt the
async in root group but it might happen that we still select again
the root group's sync queue for dispatch.

So queue selection logic should be driven by select_queue() which selects
group first then workload with-in group and then queue with-in workload
and we shoud not be setting active queue here.

Thanks
Vivek

2011-03-14 21:55:52

by Justin TerAvest

[permalink] [raw]
Subject: Re: [PATCH] Add unaccounted time to timeslice_used.

On Mon, Mar 14, 2011 at 6:55 AM, Vivek Goyal <[email protected]> wrote:
> On Fri, Mar 11, 2011 at 01:06:12PM -0800, Justin TerAvest wrote:
>> There are two kind of times that tasks are not charged for: the first
>> seek and the extra time slice used over the allocated timeslice. Both
>> of these exported as a new unaccounted_time stat.
>>
>> I think it would be good to have this reported in 'time' as well, but
>> that is probably a separate discussion.
>>
>
> Justin,
>
> I would say that for such optimization do make sure that you mention that
> these are useful only if one is driving a queue depth of 1.

Hi Vivek,

That's a good point. I should have mentioned that.

>
> Otherwise previous queue might have dumped bunch of requests in device
> and expired. Now new queue's first request completion time is also
> impacted by the requests dumped by other queues.
>
> There are already so many stats which I have never used so far and I have
> not encountered anybody else using these either. I think primary reason
> being that in general nobody forced the queue depth of 1 hence most of the
> timing stats are of no use.

We could probably put the data collected here back into "time"
eventually, but having it separate right now helps build confidence in
the accuracy of the stats.

>
> So personally I am not very keen on keep on increasing number of stats in
> CFQ which are useful only when using queue depth 1 as that might not be
> the common case. But Jens likes it, so....
>
> Also a comment inline.
>
> [..]
>> @@ -3314,9 +3321,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
>> ? ? ? BUG_ON(!cfq_cfqq_on_rr(cfqq));
>>
>> ? ? ? cfq_service_tree_add(cfqd, cfqq, 1);
>> -
>> - ? ? cfqq->slice_end = 0;
>> - ? ? cfq_mark_cfqq_slice_new(cfqq);
>> + ? ? __cfq_set_active_queue(cfqd, cfqq);
>
> So far a new queue selection was always in select_queue(). Now this will
> change it and new queue selection will also take place in
> cfq_preempt_queue().
>
> Also I think this is not right. It is not necessary that we select the
> preempting queue. For example a sync queue in one group can preempt the
> async in root group but it might happen that we still select again
> the root group's sync queue for dispatch.
>
> So queue selection logic should be driven by select_queue() which selects
> group first then workload with-in group and then queue with-in workload
> and we shoud not be setting active queue here.

That sounds reasonable. I will send out another version of the patch
that will only clear the stats for the cfqq.

Thanks,
Justin

>
> Thanks
> Vivek
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at ?http://www.tux.org/lkml/
>

2011-03-14 22:11:43

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH] Add unaccounted time to timeslice_used.

On Mon, Mar 14, 2011 at 02:55:27PM -0700, Justin TerAvest wrote:
> On Mon, Mar 14, 2011 at 6:55 AM, Vivek Goyal <[email protected]> wrote:
> > On Fri, Mar 11, 2011 at 01:06:12PM -0800, Justin TerAvest wrote:
> >> There are two kind of times that tasks are not charged for: the first
> >> seek and the extra time slice used over the allocated timeslice. Both
> >> of these exported as a new unaccounted_time stat.
> >>
> >> I think it would be good to have this reported in 'time' as well, but
> >> that is probably a separate discussion.
> >>
> >
> > Justin,
> >
> > I would say that for such optimization do make sure that you mention that
> > these are useful only if one is driving a queue depth of 1.
>
> Hi Vivek,
>
> That's a good point. I should have mentioned that.
>
> >
> > Otherwise previous queue might have dumped bunch of requests in device
> > and expired. Now new queue's first request completion time is also
> > impacted by the requests dumped by other queues.
> >
> > There are already so many stats which I have never used so far and I have
> > not encountered anybody else using these either. I think primary reason
> > being that in general nobody forced the queue depth of 1 hence most of the
> > timing stats are of no use.
>
> We could probably put the data collected here back into "time"
> eventually, but having it separate right now helps build confidence in
> the accuracy of the stats.

Again putting it into time will make sense only if request queue depth
is 1. So you shall have to keep track whether we are driving queue
depth 1 or not and then add the data accordingly.

In fact, instead of adding a new file for unaccounted_time, I think a
better idea would be (as you said), that add it to "time" if we are
driving queue depth 1. In that case "unaccounted_time" can at max be
a debugging feature and can be moved under #CONFIG_DEBUG_BLK_CGROUP or
get rid of unaccounted_time entirely.

Jens what do you think?

I really would prefer avoiding another stat file until and unless it is really
useful.

>
> >
> > So personally I am not very keen on keep on increasing number of stats in
> > CFQ which are useful only when using queue depth 1 as that might not be
> > the common case. But Jens likes it, so....
> >
> > Also a comment inline.
> >
> > [..]
> >> @@ -3314,9 +3321,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
> >> ? ? ? BUG_ON(!cfq_cfqq_on_rr(cfqq));
> >>
> >> ? ? ? cfq_service_tree_add(cfqd, cfqq, 1);
> >> -
> >> - ? ? cfqq->slice_end = 0;
> >> - ? ? cfq_mark_cfqq_slice_new(cfqq);
> >> + ? ? __cfq_set_active_queue(cfqd, cfqq);
> >
> > So far a new queue selection was always in select_queue(). Now this will
> > change it and new queue selection will also take place in
> > cfq_preempt_queue().
> >
> > Also I think this is not right. It is not necessary that we select the
> > preempting queue. For example a sync queue in one group can preempt the
> > async in root group but it might happen that we still select again
> > the root group's sync queue for dispatch.
> >
> > So queue selection logic should be driven by select_queue() which selects
> > group first then workload with-in group and then queue with-in workload
> > and we shoud not be setting active queue here.
>
> That sounds reasonable. I will send out another version of the patch
> that will only clear the stats for the cfqq.

I think Jens already committed this version of patch. So please general
an incremental patch on top of his for-2.6.39/core branch.

Thanks
Vivek

2011-03-22 17:33:40

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH] Add unaccounted time to timeslice_used.

On Mon, Mar 14, 2011 at 02:55:27PM -0700, Justin TerAvest wrote:
> On Mon, Mar 14, 2011 at 6:55 AM, Vivek Goyal <[email protected]> wrote:
> > On Fri, Mar 11, 2011 at 01:06:12PM -0800, Justin TerAvest wrote:
> >> There are two kind of times that tasks are not charged for: the first
> >> seek and the extra time slice used over the allocated timeslice. Both
> >> of these exported as a new unaccounted_time stat.
> >>
> >> I think it would be good to have this reported in 'time' as well, but
> >> that is probably a separate discussion.
> >>
> >
> > Justin,
> >
> > I would say that for such optimization do make sure that you mention that
> > these are useful only if one is driving a queue depth of 1.
>
> Hi Vivek,
>
> That's a good point. I should have mentioned that.
>
> >
> > Otherwise previous queue might have dumped bunch of requests in device
> > and expired. Now new queue's first request completion time is also
> > impacted by the requests dumped by other queues.
> >
> > There are already so many stats which I have never used so far and I have
> > not encountered anybody else using these either. I think primary reason
> > being that in general nobody forced the queue depth of 1 hence most of the
> > timing stats are of no use.
>
> We could probably put the data collected here back into "time"
> eventually, but having it separate right now helps build confidence in
> the accuracy of the stats.
>
> >
> > So personally I am not very keen on keep on increasing number of stats in
> > CFQ which are useful only when using queue depth 1 as that might not be
> > the common case. But Jens likes it, so....
> >
> > Also a comment inline.
> >
> > [..]
> >> @@ -3314,9 +3321,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
> >> ? ? ? BUG_ON(!cfq_cfqq_on_rr(cfqq));
> >>
> >> ? ? ? cfq_service_tree_add(cfqd, cfqq, 1);
> >> -
> >> - ? ? cfqq->slice_end = 0;
> >> - ? ? cfq_mark_cfqq_slice_new(cfqq);
> >> + ? ? __cfq_set_active_queue(cfqd, cfqq);
> >
> > So far a new queue selection was always in select_queue(). Now this will
> > change it and new queue selection will also take place in
> > cfq_preempt_queue().
> >
> > Also I think this is not right. It is not necessary that we select the
> > preempting queue. For example a sync queue in one group can preempt the
> > async in root group but it might happen that we still select again
> > the root group's sync queue for dispatch.
> >
> > So queue selection logic should be driven by select_queue() which selects
> > group first then workload with-in group and then queue with-in workload
> > and we shoud not be setting active queue here.
>
> That sounds reasonable. I will send out another version of the patch
> that will only clear the stats for the cfqq.

Hi Justin,

Are you planning to send a fix?

- do not set active queue in preempt_queue()
- move unaccounted time under debug?

Thanks
Vivek

2011-03-22 17:48:21

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH] Add unaccounted time to timeslice_used.

On Tue, Mar 22, 2011 at 10:36:25AM -0700, Justin TerAvest wrote:
> On Tue, Mar 22, 2011 at 10:33 AM, Vivek Goyal <[email protected]> wrote:
>
> > On Mon, Mar 14, 2011 at 02:55:27PM -0700, Justin TerAvest wrote:
> > > On Mon, Mar 14, 2011 at 6:55 AM, Vivek Goyal <[email protected]> wrote:
> > > > On Fri, Mar 11, 2011 at 01:06:12PM -0800, Justin TerAvest wrote:
> > > >> There are two kind of times that tasks are not charged for: the first
> > > >> seek and the extra time slice used over the allocated timeslice. Both
> > > >> of these exported as a new unaccounted_time stat.
> > > >>
> > > >> I think it would be good to have this reported in 'time' as well, but
> > > >> that is probably a separate discussion.
> > > >>
> > > >
> > > > Justin,
> > > >
> > > > I would say that for such optimization do make sure that you mention
> > that
> > > > these are useful only if one is driving a queue depth of 1.
> > >
> > > Hi Vivek,
> > >
> > > That's a good point. I should have mentioned that.
> > >
> > > >
> > > > Otherwise previous queue might have dumped bunch of requests in device
> > > > and expired. Now new queue's first request completion time is also
> > > > impacted by the requests dumped by other queues.
> > > >
> > > > There are already so many stats which I have never used so far and I
> > have
> > > > not encountered anybody else using these either. I think primary reason
> > > > being that in general nobody forced the queue depth of 1 hence most of
> > the
> > > > timing stats are of no use.
> > >
> > > We could probably put the data collected here back into "time"
> > > eventually, but having it separate right now helps build confidence in
> > > the accuracy of the stats.
> > >
> > > >
> > > > So personally I am not very keen on keep on increasing number of stats
> > in
> > > > CFQ which are useful only when using queue depth 1 as that might not be
> > > > the common case. But Jens likes it, so....
> > > >
> > > > Also a comment inline.
> > > >
> > > > [..]
> > > >> @@ -3314,9 +3321,7 @@ static void cfq_preempt_queue(struct cfq_data
> > *cfqd, struct cfq_queue *cfqq)
> > > >> BUG_ON(!cfq_cfqq_on_rr(cfqq));
> > > >>
> > > >> cfq_service_tree_add(cfqd, cfqq, 1);
> > > >> -
> > > >> - cfqq->slice_end = 0;
> > > >> - cfq_mark_cfqq_slice_new(cfqq);
> > > >> + __cfq_set_active_queue(cfqd, cfqq);
> > > >
> > > > So far a new queue selection was always in select_queue(). Now this
> > will
> > > > change it and new queue selection will also take place in
> > > > cfq_preempt_queue().
> > > >
> > > > Also I think this is not right. It is not necessary that we select the
> > > > preempting queue. For example a sync queue in one group can preempt the
> > > > async in root group but it might happen that we still select again
> > > > the root group's sync queue for dispatch.
> > > >
> > > > So queue selection logic should be driven by select_queue() which
> > selects
> > > > group first then workload with-in group and then queue with-in workload
> > > > and we shoud not be setting active queue here.
> > >
> > > That sounds reasonable. I will send out another version of the patch
> > > that will only clear the stats for the cfqq.
> >
> > Hi Justin,
> >
> > Are you planning to send a fix?
> >
> > - do not set active queue in preempt_queue()
> >
>
> Yes. The reason I did not do this immediately was because I started to
> wonder if now we'll have a stale value of jiffies. :( If you want, I can
> still make that change immediately so that the active queue isn't set, at
> least for now.
>

I think fixing this is important otherwise it gets serving_group also
out of sync and that can lead to further bad effects.

unaccounted time is a debug feature which works only with queue depth
1, so even if there are little issues with jiffies, you can sort that
out in a follow up patch.

Thanks
Vivek

>
> > - move unaccounted time under debug?
> >
>
> Yes.
>
> I should be able to do this by the end of the day today.
>
>
> >
> > Thanks
> > Vivek
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at http://www.tux.org/lkml/
> >

2011-03-22 17:50:26

by Justin TerAvest

[permalink] [raw]
Subject: Re: [PATCH] Add unaccounted time to timeslice_used.

On Tue, Mar 22, 2011 at 10:48 AM, Vivek Goyal <[email protected]> wrote:
>
> On Tue, Mar 22, 2011 at 10:36:25AM -0700, Justin TerAvest wrote:
> > On Tue, Mar 22, 2011 at 10:33 AM, Vivek Goyal <[email protected]> wrote:
> >
> > > On Mon, Mar 14, 2011 at 02:55:27PM -0700, Justin TerAvest wrote:
> > > > On Mon, Mar 14, 2011 at 6:55 AM, Vivek Goyal <[email protected]> wrote:
> > > > > On Fri, Mar 11, 2011 at 01:06:12PM -0800, Justin TerAvest wrote:
> > > > >> There are two kind of times that tasks are not charged for: the first
> > > > >> seek and the extra time slice used over the allocated timeslice. Both
> > > > >> of these exported as a new unaccounted_time stat.
> > > > >>
> > > > >> I think it would be good to have this reported in 'time' as well, but
> > > > >> that is probably a separate discussion.
> > > > >>
> > > > >
> > > > > Justin,
> > > > >
> > > > > I would say that for such optimization do make sure that you mention
> > > that
> > > > > these are useful only if one is driving a queue depth of 1.
> > > >
> > > > Hi Vivek,
> > > >
> > > > That's a good point. I should have mentioned that.
> > > >
> > > > >
> > > > > Otherwise previous queue might have dumped bunch of requests in device
> > > > > and expired. Now new queue's first request completion time is also
> > > > > impacted by the requests dumped by other queues.
> > > > >
> > > > > There are already so many stats which I have never used so far and I
> > > have
> > > > > not encountered anybody else using these either. I think primary reason
> > > > > being that in general nobody forced the queue depth of 1 hence most of
> > > the
> > > > > timing stats are of no use.
> > > >
> > > > We could probably put the data collected here back into "time"
> > > > eventually, but having it separate right now helps build confidence in
> > > > the accuracy of the stats.
> > > >
> > > > >
> > > > > So personally I am not very keen on keep on increasing number of stats
> > > in
> > > > > CFQ which are useful only when using queue depth 1 as that might not be
> > > > > the common case. But Jens likes it, so....
> > > > >
> > > > > Also a comment inline.
> > > > >
> > > > > [..]
> > > > >> @@ -3314,9 +3321,7 @@ static void cfq_preempt_queue(struct cfq_data
> > > *cfqd, struct cfq_queue *cfqq)
> > > > >> ? ? ? BUG_ON(!cfq_cfqq_on_rr(cfqq));
> > > > >>
> > > > >> ? ? ? cfq_service_tree_add(cfqd, cfqq, 1);
> > > > >> -
> > > > >> - ? ? cfqq->slice_end = 0;
> > > > >> - ? ? cfq_mark_cfqq_slice_new(cfqq);
> > > > >> + ? ? __cfq_set_active_queue(cfqd, cfqq);
> > > > >
> > > > > So far a new queue selection was always in select_queue(). Now this
> > > will
> > > > > change it and new queue selection will also take place in
> > > > > cfq_preempt_queue().
> > > > >
> > > > > Also I think this is not right. It is not necessary that we select the
> > > > > preempting queue. For example a sync queue in one group can preempt the
> > > > > async in root group but it might happen that we still select again
> > > > > the root group's sync queue for dispatch.
> > > > >
> > > > > So queue selection logic should be driven by select_queue() which
> > > selects
> > > > > group first then workload with-in group and then queue with-in workload
> > > > > and we shoud not be setting active queue here.
> > > >
> > > > That sounds reasonable. I will send out another version of the patch
> > > > that will only clear the stats for the cfqq.
> > >
> > > Hi Justin,
> > >
> > > Are you planning to send a fix?
> > >
> > > - do not set active queue in preempt_queue()
> > >
> >
> > Yes. The reason I did not do this immediately was because I started to
> > wonder if now we'll have a stale value of jiffies. :( If you want, I can
> > still make that change immediately so that the active queue isn't set, at
> > least for now.
> >
>
> I think fixing this is important otherwise it gets serving_group also
> out of sync and that can lead to further bad effects.
>
> unaccounted time is a debug feature which works only with queue depth
> 1, so even if there are little issues with jiffies, you can sort that
> out in a follow up patch.

Sounds good. I'll send out what I've got to fix the immediate issue
with setting the active group, then.

Thanks
Justin

>
> Thanks
> Vivek
>
> >
> > > - move unaccounted time under debug?
> > >
> >
> > Yes.
> >
> > I should be able to do this by the end of the day today.
> >
> >
> > >
> > > Thanks
> > > Vivek
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > > the body of a message to [email protected]
> > > More majordomo info at ?http://vger.kernel.org/majordomo-info.html
> > > Please read the FAQ at ?http://www.tux.org/lkml/
> > >