2010-04-03 01:55:07

by Divyesh Shah

[permalink] [raw]
Subject: [PATCH 0/3][v2] blkio: IO controller stats

The following series implements some additional stats for IO controller.

These stats have helped us debug issues with earlier IO controller
versions and should be useful now as well.
We've been using these stats for monitoring and debugging problems after the
fact as these stats can be collected and stored for later use.

One might argue that most of this information can be exported using blktrace
when debugging. However, blktrace has non-trivial performance impact and
cannot be always turned on. These stats provide a way for continuous monitoring
without losing much performance on rotational disks. We've been able to look
at these stats and debug issues after problems have been reported in the wild
and understand the IO pattern of the affected workloads.
Some of these stats are also a good data source for high-level analysis and
capacity planning.

This patchset adds 4 stats and I will send out another patchset later for
stats like io_merged and some stats that can be turned on only for
debugging - idle_time (total time spent idling for this blkio_group),
wait_time (total time spent by the blkio_group waiting before any one of its
queues got a timeslice). I've tried to breakdown the stats and sent the most
basic ones here.

Changelog from v1 (most based on Vivek Goyal's comments):
o blkio.time now exports in jiffies as before
o Added stats description in patch description and
Documentation/cgroup/blkio-controller.txt
o Prefix all stats functions with blkio and make them static as applicable
o replace IO_TYPE_MAX with IO_TYPE_TOTAL
o Moved #define constant to top of blk-cgroup.c
o Pass dev_t around instead of char *
o Add note to documentation file about resetting stats
o use BLK_CGROUP_MODULE in addition to BLK_CGROUP config option in #ifdef
statements
o Avoid struct request specific knowledge in blk-cgroup. blk-cgroup.h now has
rq_direction() and rq_sync() functions which are used by CFQ and when using
io-controller at a higher level, bio_* functions can be added.
---

Divyesh Shah (3):
Increment the blkio cgroup stats for real now.
Add io controller stats like
Remove per-cfqq nr_sectors as we'll be passing that info at request dispatch


Documentation/cgroups/blkio-controller.txt | 33 ++++
block/blk-cgroup.c | 229 ++++++++++++++++++++++++++--
block/blk-cgroup.h | 74 +++++++--
block/blk-core.c | 6 -
block/cfq-iosched.c | 16 +-
include/linux/blkdev.h | 38 +++++
6 files changed, 352 insertions(+), 44 deletions(-)

--
Divyesh


2010-04-03 01:56:10

by Divyesh Shah

[permalink] [raw]
Subject: [PATCH 1/3][v2] blkio: Remove per-cfqq nr_sectors as we'll be passing

that info at request dispatch with other stats now. This patch removes the
existing support for accounting sectors for a blkio_group. This will be added
back differently in the next two patches.

Signed-off-by: Divyesh Shah<[email protected]>
---

block/blk-cgroup.c | 3 +--
block/blk-cgroup.h | 6 ++----
block/cfq-iosched.c | 10 ++--------
3 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4b686ad..5be3981 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -56,10 +56,9 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);

void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
- unsigned long time, unsigned long sectors)
+ unsigned long time)
{
blkg->time += time;
- blkg->sectors += sectors;
}
EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 8ccc204..fe44517 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -106,7 +106,7 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
void *key);
void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
- unsigned long time, unsigned long sectors);
+ unsigned long time);
#else
struct cgroup;
static inline struct blkio_cgroup *
@@ -123,8 +123,6 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
static inline struct blkio_group *
blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
- unsigned long time, unsigned long sectors)
-{
-}
+ unsigned long time) {}
#endif
#endif /* _BLK_CGROUP_H */
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ef1680b..c18e348 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -141,8 +141,6 @@ struct cfq_queue {
struct cfq_queue *new_cfqq;
struct cfq_group *cfqg;
struct cfq_group *orig_cfqg;
- /* Sectors dispatched in current dispatch round */
- unsigned long nr_sectors;
};

/*
@@ -882,8 +880,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
slice_used = cfqq->allocated_slice;
}

- cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used,
- cfqq->nr_sectors);
+ cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used);
return slice_used;
}

@@ -917,8 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,

cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
st->min_vdisktime);
- blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl,
- cfqq->nr_sectors);
+ blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
}

#ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -1524,7 +1520,6 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
cfqq->allocated_slice = 0;
cfqq->slice_end = 0;
cfqq->slice_dispatch = 0;
- cfqq->nr_sectors = 0;

cfq_clear_cfqq_wait_request(cfqq);
cfq_clear_cfqq_must_dispatch(cfqq);
@@ -1869,7 +1864,6 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
elv_dispatch_sort(q, rq);

cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
- cfqq->nr_sectors += blk_rq_sectors(rq);
}

/*

2010-04-03 01:57:09

by Divyesh Shah

[permalink] [raw]
Subject: [PATCH 2/3][v2] blkio: Add io controller stats like

- io_service_time (the actual time in ns taken by the dis to service the IO)
- io_wait_time (the time spent waiting in the IO shceduler queues before
getting serviced)
- io_serviced (number of IOs serviced from this blkio_group)
- io_service_bytes (Number of bytes served for this cgroup)

These stats are accumulated per operation type helping us to distinguish between
read and write, and sync and async IO. This patch does not increment any of
these stats.

Signed-off-by: Divyesh Shah<[email protected]>
---

Documentation/cgroups/blkio-controller.txt | 33 +++++
block/blk-cgroup.c | 179 +++++++++++++++++++++++++---
block/blk-cgroup.h | 41 +++++-
block/cfq-iosched.c | 2
4 files changed, 229 insertions(+), 26 deletions(-)

diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index 630879c..ededdca 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -75,6 +75,9 @@ CONFIG_DEBUG_BLK_CGROUP

Details of cgroup files
=======================
+Writing an int to any of the stats files (which excludes weight) will result
+in all stats for that cgroup to be erased.
+
- blkio.weight
- Specifies per cgroup weight.

@@ -92,6 +95,36 @@ Details of cgroup files
third field specifies the number of sectors transferred by the
group to/from the device.

+- blkio.io_service_bytes
+ - Number of bytes transferred to/from the disk by the group. These
+ are further divided by the type of operation - read or write, sync
+ or async. First two fields specify the major and minor number of the
+ device, third field specifies the operation type and the fourth field
+ specifies the number of bytes.
+
+- blkio.io_serviced
+ - Number of IOs completed to/from the disk by the group. These
+ are further divided by the type of operation - read or write, sync
+ or async. First two fields specify the major and minor number of the
+ device, third field specifies the operation type and the fourth field
+ specifies the number of IOs.
+
+- blkio.io_service_time
+ - Total amount of time spent by the device to service the IOs for this
+ cgroup. This is in nanoseconds to make it meaningful for flash
+ devices too. This time is further divided by the type of operation -
+ read or write, sync or async. First two fields specify the major and
+ minor number of the device, third field specifies the operation type
+ and the fourth field specifies the io_service_time in ns.
+
+- blkio.io_wait_time
+ - Total amount of time the IO spent waiting in the scheduler queues for
+ service. This is in nanoseconds to make it meaningful for flash
+ devices too. This time is further divided by the type of operation -
+ read or write, sync or async. First two fields specify the major and
+ minor number of the device, third field specifies the operation type
+ and the fourth field specifies the io_wait_time in ns.
+
- blkio.dequeue
- Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This
gives the statistics about how many a times a group was dequeued
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5be3981..cac10b2 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -17,6 +17,8 @@
#include <linux/err.h>
#include "blk-cgroup.h"

+#define MAX_KEY_LEN 100
+
static DEFINE_SPINLOCK(blkio_list_lock);
static LIST_HEAD(blkio_list);

@@ -55,12 +57,15 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
}
EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);

-void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
- unsigned long time)
+void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
{
- blkg->time += time;
+ unsigned long flags;
+
+ spin_lock_irqsave(&blkg->stats_lock, flags);
+ blkg->stats.time += time;
+ spin_unlock_irqrestore(&blkg->stats_lock, flags);
}
-EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);
+EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);

void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, void *key, dev_t dev)
@@ -170,13 +175,119 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
return 0;
}

-#define SHOW_FUNCTION_PER_GROUP(__VAR) \
+static int
+blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
+{
+ struct blkio_cgroup *blkcg;
+ struct blkio_group *blkg;
+ struct hlist_node *n;
+ struct blkio_group_stats *stats;
+
+ blkcg = cgroup_to_blkio_cgroup(cgroup);
+ spin_lock_irq(&blkcg->lock);
+ hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+ spin_lock(&blkg->stats_lock);
+ stats = &blkg->stats;
+ memset(stats, 0, sizeof(struct blkio_group_stats));
+ spin_unlock(&blkg->stats_lock);
+ }
+ spin_unlock_irq(&blkcg->lock);
+ return 0;
+}
+
+static void blkio_get_key_name(int type, dev_t dev, char *str, int chars_left)
+{
+ snprintf(str, chars_left, "%u:%u", MAJOR(dev), MINOR(dev));
+ chars_left -= strlen(str);
+ if (chars_left <= 0) {
+ printk(KERN_WARNING
+ "Possibly incorrect cgroup stat display format");
+ return;
+ }
+ switch (type) {
+ case IO_READ:
+ strlcat(str, " Read", chars_left);
+ break;
+ case IO_WRITE:
+ strlcat(str, " Write", chars_left);
+ break;
+ case IO_SYNC:
+ strlcat(str, " Sync", chars_left);
+ break;
+ case IO_ASYNC:
+ strlcat(str, " Async", chars_left);
+ break;
+ case IO_TYPE_TOTAL:
+ strlcat(str, " Total", chars_left);
+ break;
+ default:
+ strlcat(str, " Invalid", chars_left);
+ }
+}
+
+typedef uint64_t (get_var) (struct blkio_group *, int);
+
+static uint64_t blkio_get_typed_stat(struct blkio_group *blkg,
+ struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
+{
+ uint64_t disk_total;
+ char key_str[MAX_KEY_LEN];
+ int type;
+
+ for (type = 0; type < IO_TYPE_TOTAL; type++) {
+ blkio_get_key_name(type, dev, key_str, MAX_KEY_LEN);
+ cb->fill(cb, key_str, getvar(blkg, type));
+ }
+ disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE);
+ blkio_get_key_name(IO_TYPE_TOTAL, dev, key_str, MAX_KEY_LEN);
+ cb->fill(cb, key_str, disk_total);
+ return disk_total;
+}
+
+static uint64_t blkio_get_stat(struct blkio_group *blkg,
+ struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
+{
+ uint64_t var = getvar(blkg, 0);
+ char str[10];
+
+ snprintf(str, 10, "%u:%u", MAJOR(dev), MINOR(dev));
+ cb->fill(cb, str, var);
+ return var;
+}
+
+#define GET_STAT_INDEXED(__VAR) \
+uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int type) \
+{ \
+ return blkg->stats.__VAR[type]; \
+} \
+
+GET_STAT_INDEXED(io_service_bytes);
+GET_STAT_INDEXED(io_serviced);
+GET_STAT_INDEXED(io_service_time);
+GET_STAT_INDEXED(io_wait_time);
+#undef GET_STAT_INDEXED
+
+#define GET_STAT(__VAR) \
+uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int dummy) \
+{ \
+ return blkg->stats.__VAR; \
+}
+
+GET_STAT(time);
+GET_STAT(sectors);
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+GET_STAT(dequeue);
+#endif
+#undef GET_STAT
+
+#define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) \
static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
- struct cftype *cftype, struct seq_file *m) \
+ struct cftype *cftype, struct cgroup_map_cb *cb) \
{ \
struct blkio_cgroup *blkcg; \
struct blkio_group *blkg; \
struct hlist_node *n; \
+ uint64_t cgroup_total = 0; \
\
if (!cgroup_lock_live_group(cgroup)) \
return -ENODEV; \
@@ -184,19 +295,32 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
blkcg = cgroup_to_blkio_cgroup(cgroup); \
rcu_read_lock(); \
hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
- if (blkg->dev) \
- seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), \
- MINOR(blkg->dev), blkg->__VAR); \
+ if (blkg->dev) { \
+ spin_lock_irq(&blkg->stats_lock); \
+ cgroup_total += get_stats(blkg, cb, getvar, \
+ blkg->dev); \
+ spin_unlock_irq(&blkg->stats_lock); \
+ } \
} \
+ if (show_total) \
+ cb->fill(cb, "Total", cgroup_total); \
rcu_read_unlock(); \
cgroup_unlock(); \
return 0; \
}

-SHOW_FUNCTION_PER_GROUP(time);
-SHOW_FUNCTION_PER_GROUP(sectors);
+SHOW_FUNCTION_PER_GROUP(time, blkio_get_stat, blkio_get_time_stat, 0);
+SHOW_FUNCTION_PER_GROUP(sectors, blkio_get_stat, blkio_get_sectors_stat, 0);
+SHOW_FUNCTION_PER_GROUP(io_service_bytes, blkio_get_typed_stat,
+ blkio_get_io_service_bytes_stat, 1);
+SHOW_FUNCTION_PER_GROUP(io_serviced, blkio_get_typed_stat,
+ blkio_get_io_serviced_stat, 1);
+SHOW_FUNCTION_PER_GROUP(io_service_time, blkio_get_typed_stat,
+ blkio_get_io_service_time_stat, 1);
+SHOW_FUNCTION_PER_GROUP(io_wait_time, blkio_get_typed_stat,
+ blkio_get_io_wait_time_stat, 1);
#ifdef CONFIG_DEBUG_BLK_CGROUP
-SHOW_FUNCTION_PER_GROUP(dequeue);
+SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
#endif
#undef SHOW_FUNCTION_PER_GROUP

@@ -204,7 +328,7 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
unsigned long dequeue)
{
- blkg->dequeue += dequeue;
+ blkg->stats.dequeue += dequeue;
}
EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
#endif
@@ -217,16 +341,39 @@ struct cftype blkio_files[] = {
},
{
.name = "time",
- .read_seq_string = blkiocg_time_read,
+ .read_map = blkiocg_time_read,
+ .write_u64 = blkiocg_reset_write,
},
{
.name = "sectors",
- .read_seq_string = blkiocg_sectors_read,
+ .read_map = blkiocg_sectors_read,
+ .write_u64 = blkiocg_reset_write,
+ },
+ {
+ .name = "io_service_bytes",
+ .read_map = blkiocg_io_service_bytes_read,
+ .write_u64 = blkiocg_reset_write,
+ },
+ {
+ .name = "io_serviced",
+ .read_map = blkiocg_io_serviced_read,
+ .write_u64 = blkiocg_reset_write,
+ },
+ {
+ .name = "io_service_time",
+ .read_map = blkiocg_io_service_time_read,
+ .write_u64 = blkiocg_reset_write,
+ },
+ {
+ .name = "io_wait_time",
+ .read_map = blkiocg_io_wait_time_read,
+ .write_u64 = blkiocg_reset_write,
},
#ifdef CONFIG_DEBUG_BLK_CGROUP
{
.name = "dequeue",
- .read_seq_string = blkiocg_dequeue_read,
+ .read_map = blkiocg_dequeue_read,
+ .write_u64 = blkiocg_reset_write,
},
#endif
};
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index fe44517..e8600b0 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -23,6 +23,14 @@ extern struct cgroup_subsys blkio_subsys;
#define blkio_subsys_id blkio_subsys.subsys_id
#endif

+enum io_type {
+ IO_READ = 0,
+ IO_WRITE,
+ IO_SYNC,
+ IO_ASYNC,
+ IO_TYPE_TOTAL
+};
+
struct blkio_cgroup {
struct cgroup_subsys_state css;
unsigned int weight;
@@ -30,6 +38,23 @@ struct blkio_cgroup {
struct hlist_head blkg_list;
};

+struct blkio_group_stats {
+ /* total disk time and nr sectors dispatched by this group */
+ uint64_t time;
+ uint64_t sectors;
+ /* Total disk time used by IOs in ns */
+ uint64_t io_service_time[IO_TYPE_TOTAL];
+ uint64_t io_service_bytes[IO_TYPE_TOTAL]; /* Total bytes transferred */
+ /* Total IOs serviced, post merge */
+ uint64_t io_serviced[IO_TYPE_TOTAL];
+ /* Total time spent waiting in scheduler queue in ns */
+ uint64_t io_wait_time[IO_TYPE_TOTAL];
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+ /* How many times this group has been removed from service tree */
+ unsigned long dequeue;
+#endif
+};
+
struct blkio_group {
/* An rcu protected unique identifier for the group */
void *key;
@@ -38,15 +63,13 @@ struct blkio_group {
#ifdef CONFIG_DEBUG_BLK_CGROUP
/* Store cgroup path */
char path[128];
- /* How many times this group has been removed from service tree */
- unsigned long dequeue;
#endif
/* The device MKDEV(major, minor), this group has been created for */
- dev_t dev;
+ dev_t dev;

- /* total disk time and nr sectors dispatched by this group */
- unsigned long time;
- unsigned long sectors;
+ /* Need to serialize the stats in the case of reset/update */
+ spinlock_t stats_lock;
+ struct blkio_group_stats stats;
};

typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
@@ -105,8 +128,8 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
void *key);
-void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
- unsigned long time);
+void blkiocg_update_timeslice_used(struct blkio_group *blkg,
+ unsigned long time);
#else
struct cgroup;
static inline struct blkio_cgroup *
@@ -122,7 +145,7 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }

static inline struct blkio_group *
blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
-static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
+static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
unsigned long time) {}
#endif
#endif /* _BLK_CGROUP_H */
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c18e348..d91df9f 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -914,7 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,

cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
st->min_vdisktime);
- blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
+ blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
}

#ifdef CONFIG_CFQ_GROUP_IOSCHED

2010-04-03 01:57:54

by Divyesh Shah

[permalink] [raw]
Subject: [PATCH 3/3][v2] blkio: Increment the blkio cgroup stats for real now

We also add start_time_ns and io_start_time_ns fields to struct request
here to record the time when a request is created and when it is
dispatched to device. We use ns uints here as ms and jiffies are
not very useful for non-rotational media.

Signed-off-by: Divyesh Shah<[email protected]>
---

block/blk-cgroup.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--
block/blk-cgroup.h | 33 +++++++++++++++++++++++++++++---
block/blk-core.c | 6 ++++--
block/cfq-iosched.c | 6 +++++-
include/linux/blkdev.h | 38 ++++++++++++++++++++++++++++++++++++-
5 files changed, 123 insertions(+), 9 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index cac10b2..21c0d28 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -15,6 +15,7 @@
#include <linux/kdev_t.h>
#include <linux/module.h>
#include <linux/err.h>
+#include <linux/blkdev.h>
#include "blk-cgroup.h"

#define MAX_KEY_LEN 100
@@ -57,6 +58,16 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
}
EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);

+/*
+ * Add to the appropriate stat variable depending on the request type.
+ * This should be called with the blkg->stats_lock held.
+ */
+void io_add_stat(uint64_t *stat, uint64_t add, int direction, int sync)
+{
+ stat[direction] += add;
+ stat[sync] += add;
+}
+
void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
{
unsigned long flags;
@@ -67,6 +78,40 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
}
EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);

+void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
+ uint64_t bytes, int direction, int sync)
+{
+ struct blkio_group_stats *stats;
+ unsigned long flags;
+
+ spin_lock_irqsave(&blkg->stats_lock, flags);
+ stats = &blkg->stats;
+ stats->sectors += bytes >> 9;
+ io_add_stat(stats->io_serviced, 1, direction, sync);
+ io_add_stat(stats->io_service_bytes, bytes, direction, sync);
+ spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
+
+void blkiocg_update_completion_stats(struct blkio_group *blkg,
+ uint64_t start_time, uint64_t io_start_time, int direction, int sync)
+{
+ struct blkio_group_stats *stats;
+ unsigned long flags;
+ unsigned long long now = sched_clock();
+
+ spin_lock_irqsave(&blkg->stats_lock, flags);
+ stats = &blkg->stats;
+ if (time_after64(now, io_start_time))
+ io_add_stat(stats->io_service_time, now - io_start_time,
+ direction, sync);
+ if (time_after64(io_start_time, start_time))
+ io_add_stat(stats->io_wait_time, io_start_time - start_time,
+ direction, sync);
+ spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
+
void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, void *key, dev_t dev)
{
@@ -325,12 +370,12 @@ SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
#undef SHOW_FUNCTION_PER_GROUP

#ifdef CONFIG_DEBUG_BLK_CGROUP
-void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
+void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
unsigned long dequeue)
{
blkg->stats.dequeue += dequeue;
}
-EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
+EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
#endif

struct cftype blkio_files[] = {
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index e8600b0..5fc88e1 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -112,12 +112,12 @@ static inline char *blkg_path(struct blkio_group *blkg)
{
return blkg->path;
}
-void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
+void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
unsigned long dequeue);
#else
static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
-static inline void blkiocg_update_blkio_group_dequeue_stats(
- struct blkio_group *blkg, unsigned long dequeue) {}
+static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
+ unsigned long dequeue) {}
#endif

#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
@@ -130,6 +130,20 @@ extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
void *key);
void blkiocg_update_timeslice_used(struct blkio_group *blkg,
unsigned long time);
+void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
+ int direction, int sync);
+void blkiocg_update_completion_stats(struct blkio_group *blkg,
+ uint64_t start_time, uint64_t io_start_time, int direction, int sync);
+static inline int rq_direction(struct request *rq)
+{
+ return (rq->cmd_flags & REQ_RW) ? IO_WRITE : IO_READ;
+}
+
+static inline int rq_sync(struct request *rq)
+{
+ return (!(rq->cmd_flags & REQ_RW) ||
+ rq->cmd_flags & REQ_RW_SYNC) ? IO_SYNC : IO_ASYNC;
+}
#else
struct cgroup;
static inline struct blkio_cgroup *
@@ -147,5 +161,18 @@ static inline struct blkio_group *
blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
unsigned long time) {}
+static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
+ uint64_t bytes, int direction, int sync) {}
+static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
+ uint64_t start_time, uint64_t io_start_time, int direction, int sync) {}
+static inline int rq_direction(struct request *rq)
+{
+ return 0;
+}
+
+static inline int rq_sync(struct request *rq)
+{
+ return 0;
+}
#endif
#endif /* _BLK_CGROUP_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index 9fe174d..1d94f15 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -127,6 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->tag = -1;
rq->ref_count = 1;
rq->start_time = jiffies;
+ set_start_time_ns(rq);
}
EXPORT_SYMBOL(blk_rq_init);

@@ -1855,8 +1856,10 @@ void blk_dequeue_request(struct request *rq)
* and to it is freed is accounted as io that is in progress at
* the driver side.
*/
- if (blk_account_rq(rq))
+ if (blk_account_rq(rq)) {
q->in_flight[rq_is_sync(rq)]++;
+ set_io_start_time_ns(rq);
+ }
}

/**
@@ -2517,4 +2520,3 @@ int __init blk_dev_init(void)

return 0;
}
-
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d91df9f..59aeadf 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -854,7 +854,7 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
if (!RB_EMPTY_NODE(&cfqg->rb_node))
cfq_rb_erase(&cfqg->rb_node, st);
cfqg->saved_workload_slice = 0;
- blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1);
+ blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
}

static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
@@ -1864,6 +1864,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
elv_dispatch_sort(q, rq);

cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
+ blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
+ rq_direction(rq), rq_sync(rq));
}

/*
@@ -3284,6 +3286,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
WARN_ON(!cfqq->dispatched);
cfqd->rq_in_driver--;
cfqq->dispatched--;
+ blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq),
+ rq_io_start_time_ns(rq), rq_direction(rq), rq_sync(rq));

cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ebd22db..ac1f30d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -193,7 +193,10 @@ struct request {

struct gendisk *rq_disk;
unsigned long start_time;
-
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
+ unsigned long long start_time_ns;
+ unsigned long long io_start_time_ns; /* when passed to hardware */
+#endif
/* Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
*/
@@ -1219,6 +1222,39 @@ static inline void put_dev_sector(Sector p)
struct work_struct;
int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);

+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
+static inline void set_start_time_ns(struct request *req)
+{
+ req->start_time_ns = sched_clock();
+}
+
+static inline void set_io_start_time_ns(struct request *req)
+{
+ req->io_start_time_ns = sched_clock();
+}
+
+static inline uint64_t rq_start_time_ns(struct request *req)
+{
+ return req->start_time_ns;
+}
+
+static inline uint64_t rq_io_start_time_ns(struct request *req)
+{
+ return req->io_start_time_ns;
+}
+#else
+static inline void set_start_time_ns(struct request *req) {}
+static inline void set_io_start_time_ns(struct request *req) {}
+static inline uint64_t rq_start_time_ns(struct request *req)
+{
+ return 0;
+}
+static inline uint64_t rq_io_start_time_ns(struct request *req)
+{
+ return 0;
+}
+#endif
+
#define MODULE_ALIAS_BLOCKDEV(major,minor) \
MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \

2010-04-03 01:59:56

by Divyesh Shah

[permalink] [raw]
Subject: Re: [PATCH 2/3][v2] blkio: Add io controller stats like

Missed the v2 tag on this patch. Resending. Apologies for the spam.

On Fri, Apr 2, 2010 at 6:56 PM, Divyesh Shah <[email protected]> wrote:
> - io_service_time (the actual time in ns taken by the dis to service the IO)
> - io_wait_time (the time spent waiting in the IO shceduler queues before
> ?getting serviced)
> - io_serviced (number of IOs serviced from this blkio_group)
> - io_service_bytes (Number of bytes served for this cgroup)
>
> These stats are accumulated per operation type helping us to distinguish between
> read and write, and sync and async IO. This patch does not increment any of
> these stats.
>
> Signed-off-by: Divyesh Shah<[email protected]>
> ---
>
> ?Documentation/cgroups/blkio-controller.txt | ? 33 +++++
> ?block/blk-cgroup.c ? ? ? ? ? ? ? ? ? ? ? ? | ?179 +++++++++++++++++++++++++---
> ?block/blk-cgroup.h ? ? ? ? ? ? ? ? ? ? ? ? | ? 41 +++++-
> ?block/cfq-iosched.c ? ? ? ? ? ? ? ? ? ? ? ?| ? ?2
> ?4 files changed, 229 insertions(+), 26 deletions(-)
>
> diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
> index 630879c..ededdca 100644
> --- a/Documentation/cgroups/blkio-controller.txt
> +++ b/Documentation/cgroups/blkio-controller.txt
> @@ -75,6 +75,9 @@ CONFIG_DEBUG_BLK_CGROUP
>
> ?Details of cgroup files
> ?=======================
> +Writing an int to any of the stats files (which excludes weight) will result
> +in all stats for that cgroup to be erased.
> +
> ?- blkio.weight
> ? ? ? ?- Specifies per cgroup weight.
>
> @@ -92,6 +95,36 @@ Details of cgroup files
> ? ? ? ? ?third field specifies the number of sectors transferred by the
> ? ? ? ? ?group to/from the device.
>
> +- blkio.io_service_bytes
> + ? ? ? - Number of bytes transferred to/from the disk by the group. These
> + ? ? ? ? are further divided by the type of operation - read or write, sync
> + ? ? ? ? or async. First two fields specify the major and minor number of the
> + ? ? ? ? device, third field specifies the operation type and the fourth field
> + ? ? ? ? specifies the number of bytes.
> +
> +- blkio.io_serviced
> + ? ? ? - Number of IOs completed to/from the disk by the group. These
> + ? ? ? ? are further divided by the type of operation - read or write, sync
> + ? ? ? ? or async. First two fields specify the major and minor number of the
> + ? ? ? ? device, third field specifies the operation type and the fourth field
> + ? ? ? ? specifies the number of IOs.
> +
> +- blkio.io_service_time
> + ? ? ? - Total amount of time spent by the device to service the IOs for this
> + ? ? ? ? cgroup. This is in nanoseconds to make it meaningful for flash
> + ? ? ? ? devices too. This time is further divided by the type of operation -
> + ? ? ? ? read or write, sync or async. First two fields specify the major and
> + ? ? ? ? minor number of the device, third field specifies the operation type
> + ? ? ? ? and the fourth field specifies the io_service_time in ns.
> +
> +- blkio.io_wait_time
> + ? ? ? - Total amount of time the IO spent waiting in the scheduler queues for
> + ? ? ? ? service. This is in nanoseconds to make it meaningful for flash
> + ? ? ? ? devices too. This time is further divided by the type of operation -
> + ? ? ? ? read or write, sync or async. First two fields specify the major and
> + ? ? ? ? minor number of the device, third field specifies the operation type
> + ? ? ? ? and the fourth field specifies the io_wait_time in ns.
> +
> ?- blkio.dequeue
> ? ? ? ?- Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This
> ? ? ? ? ?gives the statistics about how many a times a group was dequeued
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index 5be3981..cac10b2 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -17,6 +17,8 @@
> ?#include <linux/err.h>
> ?#include "blk-cgroup.h"
>
> +#define MAX_KEY_LEN 100
> +
> ?static DEFINE_SPINLOCK(blkio_list_lock);
> ?static LIST_HEAD(blkio_list);
>
> @@ -55,12 +57,15 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
> ?}
> ?EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
>
> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time)
> +void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
> ?{
> - ? ? ? blkg->time += time;
> + ? ? ? unsigned long flags;
> +
> + ? ? ? spin_lock_irqsave(&blkg->stats_lock, flags);
> + ? ? ? blkg->stats.time += time;
> + ? ? ? spin_unlock_irqrestore(&blkg->stats_lock, flags);
> ?}
> -EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);
> +EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
>
> ?void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> ? ? ? ? ? ? ? ? ? ? ? ?struct blkio_group *blkg, void *key, dev_t dev)
> @@ -170,13 +175,119 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> ? ? ? ?return 0;
> ?}
>
> -#define SHOW_FUNCTION_PER_GROUP(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> +static int
> +blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> +{
> + ? ? ? struct blkio_cgroup *blkcg;
> + ? ? ? struct blkio_group *blkg;
> + ? ? ? struct hlist_node *n;
> + ? ? ? struct blkio_group_stats *stats;
> +
> + ? ? ? blkcg = cgroup_to_blkio_cgroup(cgroup);
> + ? ? ? spin_lock_irq(&blkcg->lock);
> + ? ? ? hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> + ? ? ? ? ? ? ? spin_lock(&blkg->stats_lock);
> + ? ? ? ? ? ? ? stats = &blkg->stats;
> + ? ? ? ? ? ? ? memset(stats, 0, sizeof(struct blkio_group_stats));
> + ? ? ? ? ? ? ? spin_unlock(&blkg->stats_lock);
> + ? ? ? }
> + ? ? ? spin_unlock_irq(&blkcg->lock);
> + ? ? ? return 0;
> +}
> +
> +static void blkio_get_key_name(int type, dev_t dev, char *str, int chars_left)
> +{
> + ? ? ? snprintf(str, chars_left, "%u:%u", MAJOR(dev), MINOR(dev));
> + ? ? ? chars_left -= strlen(str);
> + ? ? ? if (chars_left <= 0) {
> + ? ? ? ? ? ? ? printk(KERN_WARNING
> + ? ? ? ? ? ? ? ? ? ? ? "Possibly incorrect cgroup stat display format");
> + ? ? ? ? ? ? ? return;
> + ? ? ? }
> + ? ? ? switch (type) {
> + ? ? ? case IO_READ:
> + ? ? ? ? ? ? ? strlcat(str, " Read", chars_left);
> + ? ? ? ? ? ? ? break;
> + ? ? ? case IO_WRITE:
> + ? ? ? ? ? ? ? strlcat(str, " Write", chars_left);
> + ? ? ? ? ? ? ? break;
> + ? ? ? case IO_SYNC:
> + ? ? ? ? ? ? ? strlcat(str, " Sync", chars_left);
> + ? ? ? ? ? ? ? break;
> + ? ? ? case IO_ASYNC:
> + ? ? ? ? ? ? ? strlcat(str, " Async", chars_left);
> + ? ? ? ? ? ? ? break;
> + ? ? ? case IO_TYPE_TOTAL:
> + ? ? ? ? ? ? ? strlcat(str, " Total", chars_left);
> + ? ? ? ? ? ? ? break;
> + ? ? ? default:
> + ? ? ? ? ? ? ? strlcat(str, " Invalid", chars_left);
> + ? ? ? }
> +}
> +
> +typedef uint64_t (get_var) (struct blkio_group *, int);
> +
> +static uint64_t blkio_get_typed_stat(struct blkio_group *blkg,
> + ? ? ? ? ? ? ? struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
> +{
> + ? ? ? uint64_t disk_total;
> + ? ? ? char key_str[MAX_KEY_LEN];
> + ? ? ? int type;
> +
> + ? ? ? for (type = 0; type < IO_TYPE_TOTAL; type++) {
> + ? ? ? ? ? ? ? blkio_get_key_name(type, dev, key_str, MAX_KEY_LEN);
> + ? ? ? ? ? ? ? cb->fill(cb, key_str, getvar(blkg, type));
> + ? ? ? }
> + ? ? ? disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE);
> + ? ? ? blkio_get_key_name(IO_TYPE_TOTAL, dev, key_str, MAX_KEY_LEN);
> + ? ? ? cb->fill(cb, key_str, disk_total);
> + ? ? ? return disk_total;
> +}
> +
> +static uint64_t blkio_get_stat(struct blkio_group *blkg,
> + ? ? ? ? ? ? ? struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
> +{
> + ? ? ? uint64_t var = getvar(blkg, 0);
> + ? ? ? char str[10];
> +
> + ? ? ? snprintf(str, 10, "%u:%u", MAJOR(dev), MINOR(dev));
> + ? ? ? cb->fill(cb, str, var);
> + ? ? ? return var;
> +}
> +
> +#define GET_STAT_INDEXED(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int type) ?\
> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? return blkg->stats.__VAR[type]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> +} ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> +
> +GET_STAT_INDEXED(io_service_bytes);
> +GET_STAT_INDEXED(io_serviced);
> +GET_STAT_INDEXED(io_service_time);
> +GET_STAT_INDEXED(io_wait_time);
> +#undef GET_STAT_INDEXED
> +
> +#define GET_STAT(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int dummy) \
> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? return blkg->stats.__VAR; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> +}
> +
> +GET_STAT(time);
> +GET_STAT(sectors);
> +#ifdef CONFIG_DEBUG_BLK_CGROUP
> +GET_STAT(dequeue);
> +#endif
> +#undef GET_STAT
> +
> +#define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) ?\
> ?static int blkiocg_##__VAR##_read(struct cgroup *cgroup, ? ? ? ? ? ? ? \
> - ? ? ? ? ? ? ? ? ? ? ? struct cftype *cftype, struct seq_file *m) ? ? ?\
> + ? ? ? ? ? ? ? struct cftype *cftype, struct cgroup_map_cb *cb) ? ? ? ?\
> ?{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> ? ? ? ?struct blkio_cgroup *blkcg; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> ? ? ? ?struct blkio_group *blkg; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> ? ? ? ?struct hlist_node *n; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? uint64_t cgroup_total = 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> ? ? ? ?if (!cgroup_lock_live_group(cgroup)) ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> ? ? ? ? ? ? ? ?return -ENODEV; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> @@ -184,19 +295,32 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, ? ? ? ? ?\
> ? ? ? ?blkcg = cgroup_to_blkio_cgroup(cgroup); ? ? ? ? ? ? ? ? ? ? ? ? \
> ? ? ? ?rcu_read_lock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> ? ? ? ?hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
> - ? ? ? ? ? ? ? if (blkg->dev) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> - ? ? ? ? ? ? ? ? ? ? ? seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), ?\
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?MINOR(blkg->dev), blkg->__VAR); ? ? ? ?\
> + ? ? ? ? ? ? ? if (blkg->dev) { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? ? ? ? ? ? ? ? ? spin_lock_irq(&blkg->stats_lock); ? ? ? ? ? ? ? \
> + ? ? ? ? ? ? ? ? ? ? ? cgroup_total += get_stats(blkg, cb, getvar, ? ? \
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? blkg->dev); ? ? ? ? ? ? \
> + ? ? ? ? ? ? ? ? ? ? ? spin_unlock_irq(&blkg->stats_lock); ? ? ? ? ? ? \
> + ? ? ? ? ? ? ? } ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> ? ? ? ?} ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? if (show_total) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? ? ? ? ? cb->fill(cb, "Total", cgroup_total); ? ? ? ? ? ? ? ? ? ?\
> ? ? ? ?rcu_read_unlock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> ? ? ? ?cgroup_unlock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> ? ? ? ?return 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> ?}
>
> -SHOW_FUNCTION_PER_GROUP(time);
> -SHOW_FUNCTION_PER_GROUP(sectors);
> +SHOW_FUNCTION_PER_GROUP(time, blkio_get_stat, blkio_get_time_stat, 0);
> +SHOW_FUNCTION_PER_GROUP(sectors, blkio_get_stat, blkio_get_sectors_stat, 0);
> +SHOW_FUNCTION_PER_GROUP(io_service_bytes, blkio_get_typed_stat,
> + ? ? ? ? ? ? ? ? ? ? ? blkio_get_io_service_bytes_stat, 1);
> +SHOW_FUNCTION_PER_GROUP(io_serviced, blkio_get_typed_stat,
> + ? ? ? ? ? ? ? ? ? ? ? blkio_get_io_serviced_stat, 1);
> +SHOW_FUNCTION_PER_GROUP(io_service_time, blkio_get_typed_stat,
> + ? ? ? ? ? ? ? ? ? ? ? blkio_get_io_service_time_stat, 1);
> +SHOW_FUNCTION_PER_GROUP(io_wait_time, blkio_get_typed_stat,
> + ? ? ? ? ? ? ? ? ? ? ? blkio_get_io_wait_time_stat, 1);
> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
> -SHOW_FUNCTION_PER_GROUP(dequeue);
> +SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
> ?#endif
> ?#undef SHOW_FUNCTION_PER_GROUP
>
> @@ -204,7 +328,7 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
> ?void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
> ? ? ? ? ? ? ? ? ? ? ? ?unsigned long dequeue)
> ?{
> - ? ? ? blkg->dequeue += dequeue;
> + ? ? ? blkg->stats.dequeue += dequeue;
> ?}
> ?EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
> ?#endif
> @@ -217,16 +341,39 @@ struct cftype blkio_files[] = {
> ? ? ? ?},
> ? ? ? ?{
> ? ? ? ? ? ? ? ?.name = "time",
> - ? ? ? ? ? ? ? .read_seq_string = blkiocg_time_read,
> + ? ? ? ? ? ? ? .read_map = blkiocg_time_read,
> + ? ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> ? ? ? ?},
> ? ? ? ?{
> ? ? ? ? ? ? ? ?.name = "sectors",
> - ? ? ? ? ? ? ? .read_seq_string = blkiocg_sectors_read,
> + ? ? ? ? ? ? ? .read_map = blkiocg_sectors_read,
> + ? ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> + ? ? ? },
> + ? ? ? {
> + ? ? ? ? ? ? ? .name = "io_service_bytes",
> + ? ? ? ? ? ? ? .read_map = blkiocg_io_service_bytes_read,
> + ? ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> + ? ? ? },
> + ? ? ? {
> + ? ? ? ? ? ? ? .name = "io_serviced",
> + ? ? ? ? ? ? ? .read_map = blkiocg_io_serviced_read,
> + ? ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> + ? ? ? },
> + ? ? ? {
> + ? ? ? ? ? ? ? .name = "io_service_time",
> + ? ? ? ? ? ? ? .read_map = blkiocg_io_service_time_read,
> + ? ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> + ? ? ? },
> + ? ? ? {
> + ? ? ? ? ? ? ? .name = "io_wait_time",
> + ? ? ? ? ? ? ? .read_map = blkiocg_io_wait_time_read,
> + ? ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> ? ? ? ?},
> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
> ? ? ? ?{
> ? ? ? ? ? ? ? ?.name = "dequeue",
> - ? ? ? ? ? ? ? .read_seq_string = blkiocg_dequeue_read,
> + ? ? ? ? ? ? ? .read_map = blkiocg_dequeue_read,
> + ? ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> ? ? ? ?},
> ?#endif
> ?};
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index fe44517..e8600b0 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -23,6 +23,14 @@ extern struct cgroup_subsys blkio_subsys;
> ?#define blkio_subsys_id blkio_subsys.subsys_id
> ?#endif
>
> +enum io_type {
> + ? ? ? IO_READ = 0,
> + ? ? ? IO_WRITE,
> + ? ? ? IO_SYNC,
> + ? ? ? IO_ASYNC,
> + ? ? ? IO_TYPE_TOTAL
> +};
> +
> ?struct blkio_cgroup {
> ? ? ? ?struct cgroup_subsys_state css;
> ? ? ? ?unsigned int weight;
> @@ -30,6 +38,23 @@ struct blkio_cgroup {
> ? ? ? ?struct hlist_head blkg_list;
> ?};
>
> +struct blkio_group_stats {
> + ? ? ? /* total disk time and nr sectors dispatched by this group */
> + ? ? ? uint64_t time;
> + ? ? ? uint64_t sectors;
> + ? ? ? /* Total disk time used by IOs in ns */
> + ? ? ? uint64_t io_service_time[IO_TYPE_TOTAL];
> + ? ? ? uint64_t io_service_bytes[IO_TYPE_TOTAL]; /* Total bytes transferred */
> + ? ? ? /* Total IOs serviced, post merge */
> + ? ? ? uint64_t io_serviced[IO_TYPE_TOTAL];
> + ? ? ? /* Total time spent waiting in scheduler queue in ns */
> + ? ? ? uint64_t io_wait_time[IO_TYPE_TOTAL];
> +#ifdef CONFIG_DEBUG_BLK_CGROUP
> + ? ? ? /* How many times this group has been removed from service tree */
> + ? ? ? unsigned long dequeue;
> +#endif
> +};
> +
> ?struct blkio_group {
> ? ? ? ?/* An rcu protected unique identifier for the group */
> ? ? ? ?void *key;
> @@ -38,15 +63,13 @@ struct blkio_group {
> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
> ? ? ? ?/* Store cgroup path */
> ? ? ? ?char path[128];
> - ? ? ? /* How many times this group has been removed from service tree */
> - ? ? ? unsigned long dequeue;
> ?#endif
> ? ? ? ?/* The device MKDEV(major, minor), this group has been created for */
> - ? ? ? dev_t ? dev;
> + ? ? ? dev_t dev;
>
> - ? ? ? /* total disk time and nr sectors dispatched by this group */
> - ? ? ? unsigned long time;
> - ? ? ? unsigned long sectors;
> + ? ? ? /* Need to serialize the stats in the case of reset/update */
> + ? ? ? spinlock_t stats_lock;
> + ? ? ? struct blkio_group_stats stats;
> ?};
>
> ?typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
> @@ -105,8 +128,8 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> ?extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
> ?extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?void *key);
> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
> +void blkiocg_update_timeslice_used(struct blkio_group *blkg,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
> ?#else
> ?struct cgroup;
> ?static inline struct blkio_cgroup *
> @@ -122,7 +145,7 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
>
> ?static inline struct blkio_group *
> ?blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
> -static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> +static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned long time) {}
> ?#endif
> ?#endif /* _BLK_CGROUP_H */
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index c18e348..d91df9f 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -914,7 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
>
> ? ? ? ?cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?st->min_vdisktime);
> - ? ? ? blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
> + ? ? ? blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
> ?}
>
> ?#ifdef CONFIG_CFQ_GROUP_IOSCHED
>
>

2010-04-05 15:48:16

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH 1/3][v2] blkio: Remove per-cfqq nr_sectors as we'll be passing

On Fri, Apr 02, 2010 at 06:55:48PM -0700, Divyesh Shah wrote:
> that info at request dispatch with other stats now. This patch removes the
> existing support for accounting sectors for a blkio_group. This will be added
> back differently in the next two patches.
>
> Signed-off-by: Divyesh Shah<[email protected]>
> ---
>
> block/blk-cgroup.c | 3 +--
> block/blk-cgroup.h | 6 ++----
> block/cfq-iosched.c | 10 ++--------
> 3 files changed, 5 insertions(+), 14 deletions(-)
>
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index 4b686ad..5be3981 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -56,10 +56,9 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
> EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
>
> void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> - unsigned long time, unsigned long sectors)
> + unsigned long time)
> {
> blkg->time += time;
> - blkg->sectors += sectors;
> }
> EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);
>
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index 8ccc204..fe44517 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -106,7 +106,7 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
> extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
> void *key);
> void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> - unsigned long time, unsigned long sectors);
> + unsigned long time);
> #else
> struct cgroup;
> static inline struct blkio_cgroup *
> @@ -123,8 +123,6 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
> static inline struct blkio_group *
> blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
> static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> - unsigned long time, unsigned long sectors)
> -{
> -}
> + unsigned long time) {}
> #endif
> #endif /* _BLK_CGROUP_H */
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index ef1680b..c18e348 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -141,8 +141,6 @@ struct cfq_queue {
> struct cfq_queue *new_cfqq;
> struct cfq_group *cfqg;
> struct cfq_group *orig_cfqg;
> - /* Sectors dispatched in current dispatch round */
> - unsigned long nr_sectors;
> };
>
> /*
> @@ -882,8 +880,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
> slice_used = cfqq->allocated_slice;
> }
>
> - cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used,
> - cfqq->nr_sectors);
> + cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used);

Hi Divyesh,

I had found above message useful when I am looking at blktraces. It helps
to know how many sectors one could transfer in a given time slice.
Especially if you are comparing two queues in two groups. I think we are
going to loose this information now?

Vivek

> return slice_used;
> }
>
> @@ -917,8 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
>
> cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
> st->min_vdisktime);
> - blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl,
> - cfqq->nr_sectors);
> + blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
> }
>
> #ifdef CONFIG_CFQ_GROUP_IOSCHED
> @@ -1524,7 +1520,6 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
> cfqq->allocated_slice = 0;
> cfqq->slice_end = 0;
> cfqq->slice_dispatch = 0;
> - cfqq->nr_sectors = 0;
>
> cfq_clear_cfqq_wait_request(cfqq);
> cfq_clear_cfqq_must_dispatch(cfqq);
> @@ -1869,7 +1864,6 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
> elv_dispatch_sort(q, rq);
>
> cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
> - cfqq->nr_sectors += blk_rq_sectors(rq);
> }
>
> /*

2010-04-05 16:06:26

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH 2/3][v2] blkio: Add io controller stats like

On Fri, Apr 02, 2010 at 06:56:35PM -0700, Divyesh Shah wrote:
> - io_service_time (the actual time in ns taken by the dis to service the IO)
> - io_wait_time (the time spent waiting in the IO shceduler queues before
> getting serviced)
> - io_serviced (number of IOs serviced from this blkio_group)
> - io_service_bytes (Number of bytes served for this cgroup)
>
> These stats are accumulated per operation type helping us to distinguish between
> read and write, and sync and async IO. This patch does not increment any of
> these stats.
>
> Signed-off-by: Divyesh Shah<[email protected]>
> ---
>
> Documentation/cgroups/blkio-controller.txt | 33 +++++
> block/blk-cgroup.c | 179 +++++++++++++++++++++++++---
> block/blk-cgroup.h | 41 +++++-
> block/cfq-iosched.c | 2
> 4 files changed, 229 insertions(+), 26 deletions(-)
>
> diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
> index 630879c..ededdca 100644
> --- a/Documentation/cgroups/blkio-controller.txt
> +++ b/Documentation/cgroups/blkio-controller.txt
> @@ -75,6 +75,9 @@ CONFIG_DEBUG_BLK_CGROUP
>
> Details of cgroup files
> =======================
> +Writing an int to any of the stats files (which excludes weight) will result
> +in all stats for that cgroup to be erased.
> +
> - blkio.weight
> - Specifies per cgroup weight.
>
> @@ -92,6 +95,36 @@ Details of cgroup files
> third field specifies the number of sectors transferred by the
> group to/from the device.
>
> +- blkio.io_service_bytes
> + - Number of bytes transferred to/from the disk by the group. These
> + are further divided by the type of operation - read or write, sync
> + or async. First two fields specify the major and minor number of the
> + device, third field specifies the operation type and the fourth field
> + specifies the number of bytes.
> +
> +- blkio.io_serviced
> + - Number of IOs completed to/from the disk by the group. These
> + are further divided by the type of operation - read or write, sync
> + or async. First two fields specify the major and minor number of the
> + device, third field specifies the operation type and the fourth field
> + specifies the number of IOs.
> +
> +- blkio.io_service_time
> + - Total amount of time spent by the device to service the IOs for this
> + cgroup. This is in nanoseconds to make it meaningful for flash
> + devices too. This time is further divided by the type of operation -
> + read or write, sync or async. First two fields specify the major and
> + minor number of the device, third field specifies the operation type
> + and the fourth field specifies the io_service_time in ns.

Hi Divyesh,

Looking the third patch where you acutally increment the stats, this is
how service time is calculated.

- Save start_time in rq, when driver actually removes the request from
request queue.
- at request completion time calculate service time.
service_time = now - start_time.

This works very well if driver/device does not have NCQ and process one
request at a time. But with NCQ, we can multiple requests in the driver
queue at the same time then we can run into issues.

- With NCQ, time becomes cumulative. So if three requests rq1, rq2 and rq3
are in disk queue, and if requests are processed in the order rq1,rq2,rq3 by
the disk (no parallel channles), then rq1 and rq2's completion time is
added in rq3. So total service time of the group can be much more than
actual time elapsed. That does not seem right. Did you face this issue in
your testing?


Same is the case with io_wait_time, Because time is cumulative, actual
io_wait_time, can be more than real time elapsed.

Is this the intention and you are finding even cumulative time useful?

> +
> +- blkio.io_wait_time
> + - Total amount of time the IO spent waiting in the scheduler queues for
> + service. This is in nanoseconds to make it meaningful for flash
> + devices too. This time is further divided by the type of operation -
> + read or write, sync or async. First two fields specify the major and
> + minor number of the device, third field specifies the operation type
> + and the fourth field specifies the io_wait_time in ns.
> +
> - blkio.dequeue
> - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This
> gives the statistics about how many a times a group was dequeued
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index 5be3981..cac10b2 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -17,6 +17,8 @@
> #include <linux/err.h>
> #include "blk-cgroup.h"
>
> +#define MAX_KEY_LEN 100
> +
> static DEFINE_SPINLOCK(blkio_list_lock);
> static LIST_HEAD(blkio_list);
>
> @@ -55,12 +57,15 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
> }
> EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
>
> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> - unsigned long time)
> +void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
> {
> - blkg->time += time;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&blkg->stats_lock, flags);
> + blkg->stats.time += time;
> + spin_unlock_irqrestore(&blkg->stats_lock, flags);
> }
> -EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);
> +EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
>
> void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> struct blkio_group *blkg, void *key, dev_t dev)
> @@ -170,13 +175,119 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> return 0;
> }
>
> -#define SHOW_FUNCTION_PER_GROUP(__VAR) \
> +static int
> +blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> +{
> + struct blkio_cgroup *blkcg;
> + struct blkio_group *blkg;
> + struct hlist_node *n;
> + struct blkio_group_stats *stats;
> +
> + blkcg = cgroup_to_blkio_cgroup(cgroup);
> + spin_lock_irq(&blkcg->lock);
> + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> + spin_lock(&blkg->stats_lock);
> + stats = &blkg->stats;
> + memset(stats, 0, sizeof(struct blkio_group_stats));
> + spin_unlock(&blkg->stats_lock);
> + }
> + spin_unlock_irq(&blkcg->lock);
> + return 0;
> +}
> +
> +static void blkio_get_key_name(int type, dev_t dev, char *str, int chars_left)
> +{
> + snprintf(str, chars_left, "%u:%u", MAJOR(dev), MINOR(dev));
> + chars_left -= strlen(str);
> + if (chars_left <= 0) {
> + printk(KERN_WARNING
> + "Possibly incorrect cgroup stat display format");
> + return;
> + }
> + switch (type) {
> + case IO_READ:
> + strlcat(str, " Read", chars_left);
> + break;
> + case IO_WRITE:
> + strlcat(str, " Write", chars_left);
> + break;
> + case IO_SYNC:
> + strlcat(str, " Sync", chars_left);
> + break;
> + case IO_ASYNC:
> + strlcat(str, " Async", chars_left);
> + break;
> + case IO_TYPE_TOTAL:
> + strlcat(str, " Total", chars_left);
> + break;
> + default:
> + strlcat(str, " Invalid", chars_left);
> + }
> +}
> +
> +typedef uint64_t (get_var) (struct blkio_group *, int);
> +
> +static uint64_t blkio_get_typed_stat(struct blkio_group *blkg,
> + struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
> +{
> + uint64_t disk_total;
> + char key_str[MAX_KEY_LEN];
> + int type;
> +
> + for (type = 0; type < IO_TYPE_TOTAL; type++) {
> + blkio_get_key_name(type, dev, key_str, MAX_KEY_LEN);
> + cb->fill(cb, key_str, getvar(blkg, type));
> + }
> + disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE);
> + blkio_get_key_name(IO_TYPE_TOTAL, dev, key_str, MAX_KEY_LEN);
> + cb->fill(cb, key_str, disk_total);
> + return disk_total;
> +}
> +
> +static uint64_t blkio_get_stat(struct blkio_group *blkg,
> + struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
> +{
> + uint64_t var = getvar(blkg, 0);
> + char str[10];
> +
> + snprintf(str, 10, "%u:%u", MAJOR(dev), MINOR(dev));
> + cb->fill(cb, str, var);
> + return var;
> +}
> +
> +#define GET_STAT_INDEXED(__VAR) \
> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int type) \
> +{ \
> + return blkg->stats.__VAR[type]; \
> +} \
> +
> +GET_STAT_INDEXED(io_service_bytes);
> +GET_STAT_INDEXED(io_serviced);
> +GET_STAT_INDEXED(io_service_time);
> +GET_STAT_INDEXED(io_wait_time);
> +#undef GET_STAT_INDEXED
> +
> +#define GET_STAT(__VAR) \
> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int dummy) \
> +{ \
> + return blkg->stats.__VAR; \
> +}
> +
> +GET_STAT(time);
> +GET_STAT(sectors);
> +#ifdef CONFIG_DEBUG_BLK_CGROUP
> +GET_STAT(dequeue);
> +#endif
> +#undef GET_STAT
> +
> +#define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) \
> static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
> - struct cftype *cftype, struct seq_file *m) \
> + struct cftype *cftype, struct cgroup_map_cb *cb) \
> { \
> struct blkio_cgroup *blkcg; \
> struct blkio_group *blkg; \
> struct hlist_node *n; \
> + uint64_t cgroup_total = 0; \
> \
> if (!cgroup_lock_live_group(cgroup)) \
> return -ENODEV; \
> @@ -184,19 +295,32 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
> blkcg = cgroup_to_blkio_cgroup(cgroup); \
> rcu_read_lock(); \
> hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
> - if (blkg->dev) \
> - seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), \
> - MINOR(blkg->dev), blkg->__VAR); \
> + if (blkg->dev) { \
> + spin_lock_irq(&blkg->stats_lock); \
> + cgroup_total += get_stats(blkg, cb, getvar, \
> + blkg->dev); \
> + spin_unlock_irq(&blkg->stats_lock); \
> + } \
> } \
> + if (show_total) \
> + cb->fill(cb, "Total", cgroup_total); \
> rcu_read_unlock(); \
> cgroup_unlock(); \
> return 0; \
> }
>
> -SHOW_FUNCTION_PER_GROUP(time);
> -SHOW_FUNCTION_PER_GROUP(sectors);
> +SHOW_FUNCTION_PER_GROUP(time, blkio_get_stat, blkio_get_time_stat, 0);
> +SHOW_FUNCTION_PER_GROUP(sectors, blkio_get_stat, blkio_get_sectors_stat, 0);
> +SHOW_FUNCTION_PER_GROUP(io_service_bytes, blkio_get_typed_stat,
> + blkio_get_io_service_bytes_stat, 1);
> +SHOW_FUNCTION_PER_GROUP(io_serviced, blkio_get_typed_stat,
> + blkio_get_io_serviced_stat, 1);
> +SHOW_FUNCTION_PER_GROUP(io_service_time, blkio_get_typed_stat,
> + blkio_get_io_service_time_stat, 1);
> +SHOW_FUNCTION_PER_GROUP(io_wait_time, blkio_get_typed_stat,
> + blkio_get_io_wait_time_stat, 1);
> #ifdef CONFIG_DEBUG_BLK_CGROUP
> -SHOW_FUNCTION_PER_GROUP(dequeue);
> +SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
> #endif
> #undef SHOW_FUNCTION_PER_GROUP
>
> @@ -204,7 +328,7 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
> void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
> unsigned long dequeue)
> {
> - blkg->dequeue += dequeue;
> + blkg->stats.dequeue += dequeue;
> }
> EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
> #endif
> @@ -217,16 +341,39 @@ struct cftype blkio_files[] = {
> },
> {
> .name = "time",
> - .read_seq_string = blkiocg_time_read,
> + .read_map = blkiocg_time_read,
> + .write_u64 = blkiocg_reset_write,
> },
> {
> .name = "sectors",
> - .read_seq_string = blkiocg_sectors_read,
> + .read_map = blkiocg_sectors_read,
> + .write_u64 = blkiocg_reset_write,
> + },
> + {
> + .name = "io_service_bytes",
> + .read_map = blkiocg_io_service_bytes_read,
> + .write_u64 = blkiocg_reset_write,
> + },
> + {
> + .name = "io_serviced",
> + .read_map = blkiocg_io_serviced_read,
> + .write_u64 = blkiocg_reset_write,
> + },
> + {
> + .name = "io_service_time",
> + .read_map = blkiocg_io_service_time_read,
> + .write_u64 = blkiocg_reset_write,
> + },
> + {
> + .name = "io_wait_time",
> + .read_map = blkiocg_io_wait_time_read,
> + .write_u64 = blkiocg_reset_write,
> },
> #ifdef CONFIG_DEBUG_BLK_CGROUP
> {
> .name = "dequeue",
> - .read_seq_string = blkiocg_dequeue_read,
> + .read_map = blkiocg_dequeue_read,
> + .write_u64 = blkiocg_reset_write,
> },
> #endif
> };
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index fe44517..e8600b0 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -23,6 +23,14 @@ extern struct cgroup_subsys blkio_subsys;
> #define blkio_subsys_id blkio_subsys.subsys_id
> #endif
>
> +enum io_type {
> + IO_READ = 0,
> + IO_WRITE,
> + IO_SYNC,
> + IO_ASYNC,
> + IO_TYPE_TOTAL
> +};
> +
> struct blkio_cgroup {
> struct cgroup_subsys_state css;
> unsigned int weight;
> @@ -30,6 +38,23 @@ struct blkio_cgroup {
> struct hlist_head blkg_list;
> };
>
> +struct blkio_group_stats {
> + /* total disk time and nr sectors dispatched by this group */
> + uint64_t time;
> + uint64_t sectors;
> + /* Total disk time used by IOs in ns */
> + uint64_t io_service_time[IO_TYPE_TOTAL];
> + uint64_t io_service_bytes[IO_TYPE_TOTAL]; /* Total bytes transferred */
> + /* Total IOs serviced, post merge */
> + uint64_t io_serviced[IO_TYPE_TOTAL];
> + /* Total time spent waiting in scheduler queue in ns */
> + uint64_t io_wait_time[IO_TYPE_TOTAL];
> +#ifdef CONFIG_DEBUG_BLK_CGROUP
> + /* How many times this group has been removed from service tree */
> + unsigned long dequeue;
> +#endif
> +};
> +
> struct blkio_group {
> /* An rcu protected unique identifier for the group */
> void *key;
> @@ -38,15 +63,13 @@ struct blkio_group {
> #ifdef CONFIG_DEBUG_BLK_CGROUP
> /* Store cgroup path */
> char path[128];
> - /* How many times this group has been removed from service tree */
> - unsigned long dequeue;
> #endif
> /* The device MKDEV(major, minor), this group has been created for */
> - dev_t dev;
> + dev_t dev;
>
> - /* total disk time and nr sectors dispatched by this group */
> - unsigned long time;
> - unsigned long sectors;
> + /* Need to serialize the stats in the case of reset/update */
> + spinlock_t stats_lock;
> + struct blkio_group_stats stats;
> };
>
> typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
> @@ -105,8 +128,8 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
> extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
> void *key);
> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> - unsigned long time);
> +void blkiocg_update_timeslice_used(struct blkio_group *blkg,
> + unsigned long time);
> #else
> struct cgroup;
> static inline struct blkio_cgroup *
> @@ -122,7 +145,7 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
>
> static inline struct blkio_group *
> blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
> -static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> +static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
> unsigned long time) {}
> #endif
> #endif /* _BLK_CGROUP_H */
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index c18e348..d91df9f 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -914,7 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
>
> cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
> st->min_vdisktime);
> - blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
> + blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
> }
>
> #ifdef CONFIG_CFQ_GROUP_IOSCHED

2010-04-05 16:57:36

by Divyesh Shah

[permalink] [raw]
Subject: Re: [PATCH 1/3][v2] blkio: Remove per-cfqq nr_sectors as we'll be passing

On Mon, Apr 5, 2010 at 8:47 AM, Vivek Goyal <[email protected]> wrote:
> On Fri, Apr 02, 2010 at 06:55:48PM -0700, Divyesh Shah wrote:
>> that info at request dispatch with other stats now. This patch removes the
>> existing support for accounting sectors for a blkio_group. This will be added
>> back differently in the next two patches.
>>
>> Signed-off-by: Divyesh Shah<[email protected]>
>> ---
>>
>> ?block/blk-cgroup.c ?| ? ?3 +--
>> ?block/blk-cgroup.h ?| ? ?6 ++----
>> ?block/cfq-iosched.c | ? 10 ++--------
>> ?3 files changed, 5 insertions(+), 14 deletions(-)
>>
>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>> index 4b686ad..5be3981 100644
>> --- a/block/blk-cgroup.c
>> +++ b/block/blk-cgroup.c
>> @@ -56,10 +56,9 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>> ?EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
>>
>> ?void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> - ? ? ? ? ? ? ? ? ? ? unsigned long time, unsigned long sectors)
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time)
>> ?{
>> ? ? ? blkg->time += time;
>> - ? ? blkg->sectors += sectors;
>> ?}
>> ?EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);
>>
>> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
>> index 8ccc204..fe44517 100644
>> --- a/block/blk-cgroup.h
>> +++ b/block/blk-cgroup.h
>> @@ -106,7 +106,7 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
>> ?extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? void *key);
>> ?void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> - ? ? ? ? ? ? ? ? ? ? unsigned long time, unsigned long sectors);
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
>> ?#else
>> ?struct cgroup;
>> ?static inline struct blkio_cgroup *
>> @@ -123,8 +123,6 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
>> ?static inline struct blkio_group *
>> ?blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
>> ?static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> - ? ? ? ? ? ? ? ? ? ? unsigned long time, unsigned long sectors)
>> -{
>> -}
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time) {}
>> ?#endif
>> ?#endif /* _BLK_CGROUP_H */
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index ef1680b..c18e348 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -141,8 +141,6 @@ struct cfq_queue {
>> ? ? ? struct cfq_queue *new_cfqq;
>> ? ? ? struct cfq_group *cfqg;
>> ? ? ? struct cfq_group *orig_cfqg;
>> - ? ? /* Sectors dispatched in current dispatch round */
>> - ? ? unsigned long nr_sectors;
>> ?};
>>
>> ?/*
>> @@ -882,8 +880,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
>> ? ? ? ? ? ? ? ? ? ? ? slice_used = cfqq->allocated_slice;
>> ? ? ? }
>>
>> - ? ? cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? cfqq->nr_sectors);
>> + ? ? cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used);
>
> Hi Divyesh,
>
> I had found above message useful when I am looking at blktraces. It helps
> to know how many sectors one could transfer in a given time slice.
> Especially if you are comparing two queues in two groups. I think we are
> going to loose this information now?

Would printing the number of sectors for each request help? If you
think this is really important we can keep this redundant information
as well.

>
> Vivek
>
>> ? ? ? return slice_used;
>> ?}
>>
>> @@ -917,8 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
>>
>> ? ? ? cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? st->min_vdisktime);
>> - ? ? blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? cfqq->nr_sectors);
>> + ? ? blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
>> ?}
>>
>> ?#ifdef CONFIG_CFQ_GROUP_IOSCHED
>> @@ -1524,7 +1520,6 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
>> ? ? ? ? ? ? ? cfqq->allocated_slice = 0;
>> ? ? ? ? ? ? ? cfqq->slice_end = 0;
>> ? ? ? ? ? ? ? cfqq->slice_dispatch = 0;
>> - ? ? ? ? ? ? cfqq->nr_sectors = 0;
>>
>> ? ? ? ? ? ? ? cfq_clear_cfqq_wait_request(cfqq);
>> ? ? ? ? ? ? ? cfq_clear_cfqq_must_dispatch(cfqq);
>> @@ -1869,7 +1864,6 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
>> ? ? ? elv_dispatch_sort(q, rq);
>>
>> ? ? ? cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
>> - ? ? cfqq->nr_sectors += blk_rq_sectors(rq);
>> ?}
>>
>> ?/*
>

2010-04-05 17:05:58

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH 1/3][v2] blkio: Remove per-cfqq nr_sectors as we'll be passing

On Mon, Apr 05, 2010 at 09:57:08AM -0700, Divyesh Shah wrote:
> On Mon, Apr 5, 2010 at 8:47 AM, Vivek Goyal <[email protected]> wrote:
> > On Fri, Apr 02, 2010 at 06:55:48PM -0700, Divyesh Shah wrote:
> >> that info at request dispatch with other stats now. This patch removes the
> >> existing support for accounting sectors for a blkio_group. This will be added
> >> back differently in the next two patches.
> >>
> >> Signed-off-by: Divyesh Shah<[email protected]>
> >> ---
> >>
> >> ?block/blk-cgroup.c ?| ? ?3 +--
> >> ?block/blk-cgroup.h ?| ? ?6 ++----
> >> ?block/cfq-iosched.c | ? 10 ++--------
> >> ?3 files changed, 5 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> >> index 4b686ad..5be3981 100644
> >> --- a/block/blk-cgroup.c
> >> +++ b/block/blk-cgroup.c
> >> @@ -56,10 +56,9 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
> >> ?EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
> >>
> >> ?void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> >> - ? ? ? ? ? ? ? ? ? ? unsigned long time, unsigned long sectors)
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time)
> >> ?{
> >> ? ? ? blkg->time += time;
> >> - ? ? blkg->sectors += sectors;
> >> ?}
> >> ?EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);
> >>
> >> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> >> index 8ccc204..fe44517 100644
> >> --- a/block/blk-cgroup.h
> >> +++ b/block/blk-cgroup.h
> >> @@ -106,7 +106,7 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
> >> ?extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? void *key);
> >> ?void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> >> - ? ? ? ? ? ? ? ? ? ? unsigned long time, unsigned long sectors);
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
> >> ?#else
> >> ?struct cgroup;
> >> ?static inline struct blkio_cgroup *
> >> @@ -123,8 +123,6 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
> >> ?static inline struct blkio_group *
> >> ?blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
> >> ?static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> >> - ? ? ? ? ? ? ? ? ? ? unsigned long time, unsigned long sectors)
> >> -{
> >> -}
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time) {}
> >> ?#endif
> >> ?#endif /* _BLK_CGROUP_H */
> >> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> >> index ef1680b..c18e348 100644
> >> --- a/block/cfq-iosched.c
> >> +++ b/block/cfq-iosched.c
> >> @@ -141,8 +141,6 @@ struct cfq_queue {
> >> ? ? ? struct cfq_queue *new_cfqq;
> >> ? ? ? struct cfq_group *cfqg;
> >> ? ? ? struct cfq_group *orig_cfqg;
> >> - ? ? /* Sectors dispatched in current dispatch round */
> >> - ? ? unsigned long nr_sectors;
> >> ?};
> >>
> >> ?/*
> >> @@ -882,8 +880,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
> >> ? ? ? ? ? ? ? ? ? ? ? slice_used = cfqq->allocated_slice;
> >> ? ? ? }
> >>
> >> - ? ? cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used,
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? cfqq->nr_sectors);
> >> + ? ? cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used);
> >
> > Hi Divyesh,
> >
> > I had found above message useful when I am looking at blktraces. It helps
> > to know how many sectors one could transfer in a given time slice.
> > Especially if you are comparing two queues in two groups. I think we are
> > going to loose this information now?
>

Printing it for each request does not help, because I was interested total
of once slice. How long was the slice and how many sectors we completed.

Well, for the time being, don't worry about it. If I really need it I
will put anohter patch to re-introduce it.

Vivek

> Would printing the number of sectors for each request help? If you
> think this is really important we can keep this redundant information
> as well.
>
> >
> > Vivek
> >
> >> ? ? ? return slice_used;
> >> ?}
> >>
> >> @@ -917,8 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
> >>
> >> ? ? ? cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? st->min_vdisktime);
> >> - ? ? blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl,
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? cfqq->nr_sectors);
> >> + ? ? blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
> >> ?}
> >>
> >> ?#ifdef CONFIG_CFQ_GROUP_IOSCHED
> >> @@ -1524,7 +1520,6 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
> >> ? ? ? ? ? ? ? cfqq->allocated_slice = 0;
> >> ? ? ? ? ? ? ? cfqq->slice_end = 0;
> >> ? ? ? ? ? ? ? cfqq->slice_dispatch = 0;
> >> - ? ? ? ? ? ? cfqq->nr_sectors = 0;
> >>
> >> ? ? ? ? ? ? ? cfq_clear_cfqq_wait_request(cfqq);
> >> ? ? ? ? ? ? ? cfq_clear_cfqq_must_dispatch(cfqq);
> >> @@ -1869,7 +1864,6 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
> >> ? ? ? elv_dispatch_sort(q, rq);
> >>
> >> ? ? ? cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
> >> - ? ? cfqq->nr_sectors += blk_rq_sectors(rq);
> >> ?}
> >>
> >> ?/*
> >

2010-04-05 18:12:15

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH 3/3][v2] blkio: Increment the blkio cgroup stats for real now

On Fri, Apr 02, 2010 at 06:57:27PM -0700, Divyesh Shah wrote:
> We also add start_time_ns and io_start_time_ns fields to struct request
> here to record the time when a request is created and when it is
> dispatched to device. We use ns uints here as ms and jiffies are
> not very useful for non-rotational media.
>
> Signed-off-by: Divyesh Shah<[email protected]>
> ---
>
> block/blk-cgroup.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--
> block/blk-cgroup.h | 33 +++++++++++++++++++++++++++++---
> block/blk-core.c | 6 ++++--
> block/cfq-iosched.c | 6 +++++-
> include/linux/blkdev.h | 38 ++++++++++++++++++++++++++++++++++++-
> 5 files changed, 123 insertions(+), 9 deletions(-)
>
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index cac10b2..21c0d28 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -15,6 +15,7 @@
> #include <linux/kdev_t.h>
> #include <linux/module.h>
> #include <linux/err.h>
> +#include <linux/blkdev.h>
> #include "blk-cgroup.h"
>
> #define MAX_KEY_LEN 100
> @@ -57,6 +58,16 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
> }
> EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
>
> +/*
> + * Add to the appropriate stat variable depending on the request type.
> + * This should be called with the blkg->stats_lock held.
> + */
> +void io_add_stat(uint64_t *stat, uint64_t add, int direction, int sync)
> +{
> + stat[direction] += add;
> + stat[sync] += add;
> +}
> +

static? blkio_add_stat()?

how about using "bool" for direction and sync? This is true throughout the
patch.

> void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
> {
> unsigned long flags;
> @@ -67,6 +78,40 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
> }
> EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
>
> +void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
> + uint64_t bytes, int direction, int sync)
> +{
> + struct blkio_group_stats *stats;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&blkg->stats_lock, flags);
> + stats = &blkg->stats;
> + stats->sectors += bytes >> 9;
> + io_add_stat(stats->io_serviced, 1, direction, sync);
> + io_add_stat(stats->io_service_bytes, bytes, direction, sync);
> + spin_unlock_irqrestore(&blkg->stats_lock, flags);
> +}
> +EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
> +
> +void blkiocg_update_completion_stats(struct blkio_group *blkg,
> + uint64_t start_time, uint64_t io_start_time, int direction, int sync)
> +{
> + struct blkio_group_stats *stats;
> + unsigned long flags;
> + unsigned long long now = sched_clock();
> +
> + spin_lock_irqsave(&blkg->stats_lock, flags);
> + stats = &blkg->stats;
> + if (time_after64(now, io_start_time))
> + io_add_stat(stats->io_service_time, now - io_start_time,
> + direction, sync);
> + if (time_after64(io_start_time, start_time))
> + io_add_stat(stats->io_wait_time, io_start_time - start_time,
> + direction, sync);
> + spin_unlock_irqrestore(&blkg->stats_lock, flags);
> +}
> +EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
> +
> void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> struct blkio_group *blkg, void *key, dev_t dev)
> {
> @@ -325,12 +370,12 @@ SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
> #undef SHOW_FUNCTION_PER_GROUP
>
> #ifdef CONFIG_DEBUG_BLK_CGROUP
> -void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
> +void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
> unsigned long dequeue)
> {
> blkg->stats.dequeue += dequeue;
> }
> -EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
> +EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
> #endif
>
> struct cftype blkio_files[] = {
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index e8600b0..5fc88e1 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -112,12 +112,12 @@ static inline char *blkg_path(struct blkio_group *blkg)
> {
> return blkg->path;
> }
> -void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
> +void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
> unsigned long dequeue);
> #else
> static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
> -static inline void blkiocg_update_blkio_group_dequeue_stats(
> - struct blkio_group *blkg, unsigned long dequeue) {}
> +static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
> + unsigned long dequeue) {}
> #endif
>
> #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
> @@ -130,6 +130,20 @@ extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
> void *key);
> void blkiocg_update_timeslice_used(struct blkio_group *blkg,
> unsigned long time);
> +void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
> + int direction, int sync);
> +void blkiocg_update_completion_stats(struct blkio_group *blkg,
> + uint64_t start_time, uint64_t io_start_time, int direction, int sync);
> +static inline int rq_direction(struct request *rq)
> +{
> + return (rq->cmd_flags & REQ_RW) ? IO_WRITE : IO_READ;
> +}

blkdev.h offers rq_data_dir(). Let mapping of 0 or 1 to correct enum
happen in blkio-cgroup.c. This is not rq property. This is internal to
blkio.

> +
> +static inline int rq_sync(struct request *rq)
> +{
> + return (!(rq->cmd_flags & REQ_RW) ||
> + rq->cmd_flags & REQ_RW_SYNC) ? IO_SYNC : IO_ASYNC;
> +}


blkdev.h already offers rq_is_sync(). We can use that. Mapping of 1 or 0
to corrent enum number IO_SYNC, IO_ASYNC can be done by blkio-cgroup.c


Vivek

> #else
> struct cgroup;
> static inline struct blkio_cgroup *
> @@ -147,5 +161,18 @@ static inline struct blkio_group *
> blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
> static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
> unsigned long time) {}
> +static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
> + uint64_t bytes, int direction, int sync) {}
> +static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
> + uint64_t start_time, uint64_t io_start_time, int direction, int sync) {}
> +static inline int rq_direction(struct request *rq)
> +{
> + return 0;
> +}
> +
> +static inline int rq_sync(struct request *rq)
> +{
> + return 0;
> +}
> #endif
> #endif /* _BLK_CGROUP_H */
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 9fe174d..1d94f15 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -127,6 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
> rq->tag = -1;
> rq->ref_count = 1;
> rq->start_time = jiffies;
> + set_start_time_ns(rq);
> }
> EXPORT_SYMBOL(blk_rq_init);
>
> @@ -1855,8 +1856,10 @@ void blk_dequeue_request(struct request *rq)
> * and to it is freed is accounted as io that is in progress at
> * the driver side.
> */
> - if (blk_account_rq(rq))
> + if (blk_account_rq(rq)) {
> q->in_flight[rq_is_sync(rq)]++;
> + set_io_start_time_ns(rq);
> + }
> }
>
> /**
> @@ -2517,4 +2520,3 @@ int __init blk_dev_init(void)
>
> return 0;
> }
> -
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index d91df9f..59aeadf 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -854,7 +854,7 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
> if (!RB_EMPTY_NODE(&cfqg->rb_node))
> cfq_rb_erase(&cfqg->rb_node, st);
> cfqg->saved_workload_slice = 0;
> - blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1);
> + blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
> }
>
> static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
> @@ -1864,6 +1864,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
> elv_dispatch_sort(q, rq);
>
> cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
> + blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
> + rq_direction(rq), rq_sync(rq));
> }
>
> /*
> @@ -3284,6 +3286,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
> WARN_ON(!cfqq->dispatched);
> cfqd->rq_in_driver--;
> cfqq->dispatched--;
> + blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq),
> + rq_io_start_time_ns(rq), rq_direction(rq), rq_sync(rq));
>
> cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
>
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index ebd22db..ac1f30d 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -193,7 +193,10 @@ struct request {
>
> struct gendisk *rq_disk;
> unsigned long start_time;
> -
> +#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
> + unsigned long long start_time_ns;
> + unsigned long long io_start_time_ns; /* when passed to hardware */
> +#endif
> /* Number of scatter-gather DMA addr+len pairs after
> * physical address coalescing is performed.
> */
> @@ -1219,6 +1222,39 @@ static inline void put_dev_sector(Sector p)
> struct work_struct;
> int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
>
> +#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
> +static inline void set_start_time_ns(struct request *req)
> +{
> + req->start_time_ns = sched_clock();
> +}
> +
> +static inline void set_io_start_time_ns(struct request *req)
> +{
> + req->io_start_time_ns = sched_clock();
> +}
> +
> +static inline uint64_t rq_start_time_ns(struct request *req)
> +{
> + return req->start_time_ns;
> +}
> +
> +static inline uint64_t rq_io_start_time_ns(struct request *req)
> +{
> + return req->io_start_time_ns;
> +}
> +#else
> +static inline void set_start_time_ns(struct request *req) {}
> +static inline void set_io_start_time_ns(struct request *req) {}
> +static inline uint64_t rq_start_time_ns(struct request *req)
> +{
> + return 0;
> +}
> +static inline uint64_t rq_io_start_time_ns(struct request *req)
> +{
> + return 0;
> +}
> +#endif
> +
> #define MODULE_ALIAS_BLOCKDEV(major,minor) \
> MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
> #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \

2010-04-05 18:59:07

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH 2/3][v2] blkio: Add io controller stats like

On Fri, Apr 02, 2010 at 06:56:35PM -0700, Divyesh Shah wrote:

[..]
> +static uint64_t blkio_get_typed_stat(struct blkio_group *blkg,
> + struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
> +{
> + uint64_t disk_total;
> + char key_str[MAX_KEY_LEN];
> + int type;
> +
> + for (type = 0; type < IO_TYPE_TOTAL; type++) {
> + blkio_get_key_name(type, dev, key_str, MAX_KEY_LEN);
> + cb->fill(cb, key_str, getvar(blkg, type));
> + }
> + disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE);
> + blkio_get_key_name(IO_TYPE_TOTAL, dev, key_str, MAX_KEY_LEN);
> + cb->fill(cb, key_str, disk_total);
> + return disk_total;
> +}
> +
> +static uint64_t blkio_get_stat(struct blkio_group *blkg,
> + struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
> +{
> + uint64_t var = getvar(blkg, 0);
> + char str[10];
> +
> + snprintf(str, 10, "%u:%u", MAJOR(dev), MINOR(dev));

What is the significance of limiting major:minor string to 10 characters?
I see BDEVT_SIZE being used in genhd.c. But it looks like it is valid
only if we printing numbers in hex.

In genhd.c:diskstats_show(), we seem to be using %4d and %7d for major
minor numbers. In extended minor allocation scheme, we seem to be having
20 bits for minor numbers, so 7 decimal places for representing max minor
number will make sense. Not sure about major number though.

So it might be safe to just to follow diskstats_show convention and at
least keep the buffer size as 12 (4:7).

Vivek

> + cb->fill(cb, str, var);
> + return var;
> +}
> +
> +#define GET_STAT_INDEXED(__VAR) \
> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int type) \
> +{ \
> + return blkg->stats.__VAR[type]; \
> +} \
> +
> +GET_STAT_INDEXED(io_service_bytes);
> +GET_STAT_INDEXED(io_serviced);
> +GET_STAT_INDEXED(io_service_time);
> +GET_STAT_INDEXED(io_wait_time);
> +#undef GET_STAT_INDEXED
> +
> +#define GET_STAT(__VAR) \
> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int dummy) \
> +{ \
> + return blkg->stats.__VAR; \
> +}
> +
> +GET_STAT(time);
> +GET_STAT(sectors);
> +#ifdef CONFIG_DEBUG_BLK_CGROUP
> +GET_STAT(dequeue);
> +#endif
> +#undef GET_STAT
> +
> +#define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) \
> static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
> - struct cftype *cftype, struct seq_file *m) \
> + struct cftype *cftype, struct cgroup_map_cb *cb) \
> { \
> struct blkio_cgroup *blkcg; \
> struct blkio_group *blkg; \
> struct hlist_node *n; \
> + uint64_t cgroup_total = 0; \
> \
> if (!cgroup_lock_live_group(cgroup)) \
> return -ENODEV; \
> @@ -184,19 +295,32 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
> blkcg = cgroup_to_blkio_cgroup(cgroup); \
> rcu_read_lock(); \
> hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
> - if (blkg->dev) \
> - seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), \
> - MINOR(blkg->dev), blkg->__VAR); \
> + if (blkg->dev) { \
> + spin_lock_irq(&blkg->stats_lock); \
> + cgroup_total += get_stats(blkg, cb, getvar, \
> + blkg->dev); \
> + spin_unlock_irq(&blkg->stats_lock); \
> + } \
> } \
> + if (show_total) \
> + cb->fill(cb, "Total", cgroup_total); \
> rcu_read_unlock(); \
> cgroup_unlock(); \
> return 0; \
> }
>
> -SHOW_FUNCTION_PER_GROUP(time);
> -SHOW_FUNCTION_PER_GROUP(sectors);
> +SHOW_FUNCTION_PER_GROUP(time, blkio_get_stat, blkio_get_time_stat, 0);
> +SHOW_FUNCTION_PER_GROUP(sectors, blkio_get_stat, blkio_get_sectors_stat, 0);
> +SHOW_FUNCTION_PER_GROUP(io_service_bytes, blkio_get_typed_stat,
> + blkio_get_io_service_bytes_stat, 1);
> +SHOW_FUNCTION_PER_GROUP(io_serviced, blkio_get_typed_stat,
> + blkio_get_io_serviced_stat, 1);
> +SHOW_FUNCTION_PER_GROUP(io_service_time, blkio_get_typed_stat,
> + blkio_get_io_service_time_stat, 1);
> +SHOW_FUNCTION_PER_GROUP(io_wait_time, blkio_get_typed_stat,
> + blkio_get_io_wait_time_stat, 1);
> #ifdef CONFIG_DEBUG_BLK_CGROUP
> -SHOW_FUNCTION_PER_GROUP(dequeue);
> +SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
> #endif
> #undef SHOW_FUNCTION_PER_GROUP
>
> @@ -204,7 +328,7 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
> void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
> unsigned long dequeue)
> {
> - blkg->dequeue += dequeue;
> + blkg->stats.dequeue += dequeue;
> }
> EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
> #endif
> @@ -217,16 +341,39 @@ struct cftype blkio_files[] = {
> },
> {
> .name = "time",
> - .read_seq_string = blkiocg_time_read,
> + .read_map = blkiocg_time_read,
> + .write_u64 = blkiocg_reset_write,
> },
> {
> .name = "sectors",
> - .read_seq_string = blkiocg_sectors_read,
> + .read_map = blkiocg_sectors_read,
> + .write_u64 = blkiocg_reset_write,
> + },
> + {
> + .name = "io_service_bytes",
> + .read_map = blkiocg_io_service_bytes_read,
> + .write_u64 = blkiocg_reset_write,
> + },
> + {
> + .name = "io_serviced",
> + .read_map = blkiocg_io_serviced_read,
> + .write_u64 = blkiocg_reset_write,
> + },
> + {
> + .name = "io_service_time",
> + .read_map = blkiocg_io_service_time_read,
> + .write_u64 = blkiocg_reset_write,
> + },
> + {
> + .name = "io_wait_time",
> + .read_map = blkiocg_io_wait_time_read,
> + .write_u64 = blkiocg_reset_write,
> },
> #ifdef CONFIG_DEBUG_BLK_CGROUP
> {
> .name = "dequeue",
> - .read_seq_string = blkiocg_dequeue_read,
> + .read_map = blkiocg_dequeue_read,
> + .write_u64 = blkiocg_reset_write,
> },
> #endif
> };
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index fe44517..e8600b0 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -23,6 +23,14 @@ extern struct cgroup_subsys blkio_subsys;
> #define blkio_subsys_id blkio_subsys.subsys_id
> #endif
>
> +enum io_type {
> + IO_READ = 0,
> + IO_WRITE,
> + IO_SYNC,
> + IO_ASYNC,
> + IO_TYPE_TOTAL
> +};
> +
> struct blkio_cgroup {
> struct cgroup_subsys_state css;
> unsigned int weight;
> @@ -30,6 +38,23 @@ struct blkio_cgroup {
> struct hlist_head blkg_list;
> };
>
> +struct blkio_group_stats {
> + /* total disk time and nr sectors dispatched by this group */
> + uint64_t time;
> + uint64_t sectors;
> + /* Total disk time used by IOs in ns */
> + uint64_t io_service_time[IO_TYPE_TOTAL];
> + uint64_t io_service_bytes[IO_TYPE_TOTAL]; /* Total bytes transferred */
> + /* Total IOs serviced, post merge */
> + uint64_t io_serviced[IO_TYPE_TOTAL];
> + /* Total time spent waiting in scheduler queue in ns */
> + uint64_t io_wait_time[IO_TYPE_TOTAL];
> +#ifdef CONFIG_DEBUG_BLK_CGROUP
> + /* How many times this group has been removed from service tree */
> + unsigned long dequeue;
> +#endif
> +};
> +
> struct blkio_group {
> /* An rcu protected unique identifier for the group */
> void *key;
> @@ -38,15 +63,13 @@ struct blkio_group {
> #ifdef CONFIG_DEBUG_BLK_CGROUP
> /* Store cgroup path */
> char path[128];
> - /* How many times this group has been removed from service tree */
> - unsigned long dequeue;
> #endif
> /* The device MKDEV(major, minor), this group has been created for */
> - dev_t dev;
> + dev_t dev;
>
> - /* total disk time and nr sectors dispatched by this group */
> - unsigned long time;
> - unsigned long sectors;
> + /* Need to serialize the stats in the case of reset/update */
> + spinlock_t stats_lock;
> + struct blkio_group_stats stats;
> };
>
> typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
> @@ -105,8 +128,8 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
> extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
> void *key);
> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> - unsigned long time);
> +void blkiocg_update_timeslice_used(struct blkio_group *blkg,
> + unsigned long time);
> #else
> struct cgroup;
> static inline struct blkio_cgroup *
> @@ -122,7 +145,7 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
>
> static inline struct blkio_group *
> blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
> -static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> +static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
> unsigned long time) {}
> #endif
> #endif /* _BLK_CGROUP_H */
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index c18e348..d91df9f 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -914,7 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
>
> cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
> st->min_vdisktime);
> - blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
> + blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
> }
>
> #ifdef CONFIG_CFQ_GROUP_IOSCHED

2010-04-05 22:11:16

by Divyesh Shah

[permalink] [raw]
Subject: Re: [PATCH 3/3][v2] blkio: Increment the blkio cgroup stats for real now

On Mon, Apr 5, 2010 at 11:12 AM, Vivek Goyal <[email protected]> wrote:
> On Fri, Apr 02, 2010 at 06:57:27PM -0700, Divyesh Shah wrote:
>> We also add start_time_ns and io_start_time_ns fields to struct request
>> here to record the time when a request is created and when it is
>> dispatched to device. We use ns uints here as ms and jiffies are
>> not very useful for non-rotational media.
>>
>> Signed-off-by: Divyesh Shah<[email protected]>
>> ---
>>
>> ?block/blk-cgroup.c ? ? | ? 49 ++++++++++++++++++++++++++++++++++++++++++++++--
>> ?block/blk-cgroup.h ? ? | ? 33 +++++++++++++++++++++++++++++---
>> ?block/blk-core.c ? ? ? | ? ?6 ++++--
>> ?block/cfq-iosched.c ? ?| ? ?6 +++++-
>> ?include/linux/blkdev.h | ? 38 ++++++++++++++++++++++++++++++++++++-
>> ?5 files changed, 123 insertions(+), 9 deletions(-)
>>
>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>> index cac10b2..21c0d28 100644
>> --- a/block/blk-cgroup.c
>> +++ b/block/blk-cgroup.c
>> @@ -15,6 +15,7 @@
>> ?#include <linux/kdev_t.h>
>> ?#include <linux/module.h>
>> ?#include <linux/err.h>
>> +#include <linux/blkdev.h>
>> ?#include "blk-cgroup.h"
>>
>> ?#define MAX_KEY_LEN 100
>> @@ -57,6 +58,16 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>> ?}
>> ?EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
>>
>> +/*
>> + * Add to the appropriate stat variable depending on the request type.
>> + * This should be called with the blkg->stats_lock held.
>> + */
>> +void io_add_stat(uint64_t *stat, uint64_t add, int direction, int sync)
>> +{
>> + ? ? stat[direction] += add;
>> + ? ? stat[sync] += add;
>> +}
>> +
>
> static? blkio_add_stat()?
Will fix in V3
>
> how about using "bool" for direction and sync? This is true throughout the
> patch.
Will do.

>
>> ?void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
>> ?{
>> ? ? ? unsigned long flags;
>> @@ -67,6 +78,40 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
>> ?}
>> ?EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
>>
>> +void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? uint64_t bytes, int direction, int sync)
>> +{
>> + ? ? struct blkio_group_stats *stats;
>> + ? ? unsigned long flags;
>> +
>> + ? ? spin_lock_irqsave(&blkg->stats_lock, flags);
>> + ? ? stats = &blkg->stats;
>> + ? ? stats->sectors += bytes >> 9;
>> + ? ? io_add_stat(stats->io_serviced, 1, direction, sync);
>> + ? ? io_add_stat(stats->io_service_bytes, bytes, direction, sync);
>> + ? ? spin_unlock_irqrestore(&blkg->stats_lock, flags);
>> +}
>> +EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
>> +
>> +void blkiocg_update_completion_stats(struct blkio_group *blkg,
>> + ? ? uint64_t start_time, uint64_t io_start_time, int direction, int sync)
>> +{
>> + ? ? struct blkio_group_stats *stats;
>> + ? ? unsigned long flags;
>> + ? ? unsigned long long now = sched_clock();
>> +
>> + ? ? spin_lock_irqsave(&blkg->stats_lock, flags);
>> + ? ? stats = &blkg->stats;
>> + ? ? if (time_after64(now, io_start_time))
>> + ? ? ? ? ? ? io_add_stat(stats->io_service_time, now - io_start_time,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? direction, sync);
>> + ? ? if (time_after64(io_start_time, start_time))
>> + ? ? ? ? ? ? io_add_stat(stats->io_wait_time, io_start_time - start_time,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? direction, sync);
>> + ? ? spin_unlock_irqrestore(&blkg->stats_lock, flags);
>> +}
>> +EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
>> +
>> ?void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
>> ? ? ? ? ? ? ? ? ? ? ? struct blkio_group *blkg, void *key, dev_t dev)
>> ?{
>> @@ -325,12 +370,12 @@ SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
>> ?#undef SHOW_FUNCTION_PER_GROUP
>>
>> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
>> -void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>> +void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
>> ? ? ? ? ? ? ? ? ? ? ? unsigned long dequeue)
>> ?{
>> ? ? ? blkg->stats.dequeue += dequeue;
>> ?}
>> -EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>> +EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
>> ?#endif
>>
>> ?struct cftype blkio_files[] = {
>> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
>> index e8600b0..5fc88e1 100644
>> --- a/block/blk-cgroup.h
>> +++ b/block/blk-cgroup.h
>> @@ -112,12 +112,12 @@ static inline char *blkg_path(struct blkio_group *blkg)
>> ?{
>> ? ? ? return blkg->path;
>> ?}
>> -void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>> +void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long dequeue);
>> ?#else
>> ?static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
>> -static inline void blkiocg_update_blkio_group_dequeue_stats(
>> - ? ? ? ? ? ? ? ? ? ? struct blkio_group *blkg, unsigned long dequeue) {}
>> +static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long dequeue) {}
>> ?#endif
>>
>> ?#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
>> @@ -130,6 +130,20 @@ extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? void *key);
>> ?void blkiocg_update_timeslice_used(struct blkio_group *blkg,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
>> +void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int direction, int sync);
>> +void blkiocg_update_completion_stats(struct blkio_group *blkg,
>> + ? ? uint64_t start_time, uint64_t io_start_time, int direction, int sync);
>> +static inline int rq_direction(struct request *rq)
>> +{
>> + ? ? return (rq->cmd_flags & REQ_RW) ? IO_WRITE : IO_READ;
>> +}
>
> blkdev.h offers rq_data_dir(). Let mapping of 0 or 1 to correct enum
> happen in blkio-cgroup.c. This is not rq property. This is internal to
> blkio.
Will fix in v3 for both direction and sync mapping.
>
>> +
>> +static inline int rq_sync(struct request *rq)
>> +{
>> + ? ? return (!(rq->cmd_flags & REQ_RW) ||
>> + ? ? ? ? ? ? rq->cmd_flags & REQ_RW_SYNC) ? IO_SYNC : IO_ASYNC;
>> +}
>
>
> blkdev.h already offers rq_is_sync(). We can use that. Mapping of 1 or 0
> to corrent enum number IO_SYNC, IO_ASYNC can be done by blkio-cgroup.c
>
>
> Vivek
>
>> ?#else
>> ?struct cgroup;
>> ?static inline struct blkio_cgroup *
>> @@ -147,5 +161,18 @@ static inline struct blkio_group *
>> ?blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
>> ?static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time) {}
>> +static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? uint64_t bytes, int direction, int sync) {}
>> +static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
>> + ? ? uint64_t start_time, uint64_t io_start_time, int direction, int sync) {}
>> +static inline int rq_direction(struct request *rq)
>> +{
>> + ? ? return 0;
>> +}
>> +
>> +static inline int rq_sync(struct request *rq)
>> +{
>> + ? ? return 0;
>> +}
>> ?#endif
>> ?#endif /* _BLK_CGROUP_H */
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 9fe174d..1d94f15 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -127,6 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
>> ? ? ? rq->tag = -1;
>> ? ? ? rq->ref_count = 1;
>> ? ? ? rq->start_time = jiffies;
>> + ? ? set_start_time_ns(rq);
>> ?}
>> ?EXPORT_SYMBOL(blk_rq_init);
>>
>> @@ -1855,8 +1856,10 @@ void blk_dequeue_request(struct request *rq)
>> ? ? ? ?* and to it is freed is accounted as io that is in progress at
>> ? ? ? ?* the driver side.
>> ? ? ? ?*/
>> - ? ? if (blk_account_rq(rq))
>> + ? ? if (blk_account_rq(rq)) {
>> ? ? ? ? ? ? ? q->in_flight[rq_is_sync(rq)]++;
>> + ? ? ? ? ? ? set_io_start_time_ns(rq);
>> + ? ? }
>> ?}
>>
>> ?/**
>> @@ -2517,4 +2520,3 @@ int __init blk_dev_init(void)
>>
>> ? ? ? return 0;
>> ?}
>> -
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index d91df9f..59aeadf 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -854,7 +854,7 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
>> ? ? ? if (!RB_EMPTY_NODE(&cfqg->rb_node))
>> ? ? ? ? ? ? ? cfq_rb_erase(&cfqg->rb_node, st);
>> ? ? ? cfqg->saved_workload_slice = 0;
>> - ? ? blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1);
>> + ? ? blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
>> ?}
>>
>> ?static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
>> @@ -1864,6 +1864,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
>> ? ? ? elv_dispatch_sort(q, rq);
>>
>> ? ? ? cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
>> + ? ? blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? rq_direction(rq), rq_sync(rq));
>> ?}
>>
>> ?/*
>> @@ -3284,6 +3286,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
>> ? ? ? WARN_ON(!cfqq->dispatched);
>> ? ? ? cfqd->rq_in_driver--;
>> ? ? ? cfqq->dispatched--;
>> + ? ? blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq),
>> + ? ? ? ? ? ? ? ? ? ? rq_io_start_time_ns(rq), rq_direction(rq), rq_sync(rq));
>>
>> ? ? ? cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
>>
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index ebd22db..ac1f30d 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -193,7 +193,10 @@ struct request {
>>
>> ? ? ? struct gendisk *rq_disk;
>> ? ? ? unsigned long start_time;
>> -
>> +#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
>> + ? ? unsigned long long start_time_ns;
>> + ? ? unsigned long long io_start_time_ns; ? ?/* when passed to hardware */
>> +#endif
>> ? ? ? /* Number of scatter-gather DMA addr+len pairs after
>> ? ? ? ?* physical address coalescing is performed.
>> ? ? ? ?*/
>> @@ -1219,6 +1222,39 @@ static inline void put_dev_sector(Sector p)
>> ?struct work_struct;
>> ?int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
>>
>> +#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
>> +static inline void set_start_time_ns(struct request *req)
>> +{
>> + ? ? req->start_time_ns = sched_clock();
>> +}
>> +
>> +static inline void set_io_start_time_ns(struct request *req)
>> +{
>> + ? ? req->io_start_time_ns = sched_clock();
>> +}
>> +
>> +static inline uint64_t rq_start_time_ns(struct request *req)
>> +{
>> + ? ? return req->start_time_ns;
>> +}
>> +
>> +static inline uint64_t rq_io_start_time_ns(struct request *req)
>> +{
>> + ? ? return req->io_start_time_ns;
>> +}
>> +#else
>> +static inline void set_start_time_ns(struct request *req) {}
>> +static inline void set_io_start_time_ns(struct request *req) {}
>> +static inline uint64_t rq_start_time_ns(struct request *req)
>> +{
>> + ? ? return 0;
>> +}
>> +static inline uint64_t rq_io_start_time_ns(struct request *req)
>> +{
>> + ? ? return 0;
>> +}
>> +#endif
>> +
>> ?#define MODULE_ALIAS_BLOCKDEV(major,minor) \
>> ? ? ? MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
>> ?#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
>

2010-04-05 22:18:15

by Divyesh Shah

[permalink] [raw]
Subject: Re: [PATCH 2/3][v2] blkio: Add io controller stats like

On Mon, Apr 5, 2010 at 11:58 AM, Vivek Goyal <[email protected]> wrote:
> On Fri, Apr 02, 2010 at 06:56:35PM -0700, Divyesh Shah wrote:
>
> [..]
>> +static uint64_t blkio_get_typed_stat(struct blkio_group *blkg,
>> + ? ? ? ? ? ? struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
>> +{
>> + ? ? uint64_t disk_total;
>> + ? ? char key_str[MAX_KEY_LEN];
>> + ? ? int type;
>> +
>> + ? ? for (type = 0; type < IO_TYPE_TOTAL; type++) {
>> + ? ? ? ? ? ? blkio_get_key_name(type, dev, key_str, MAX_KEY_LEN);
>> + ? ? ? ? ? ? cb->fill(cb, key_str, getvar(blkg, type));
>> + ? ? }
>> + ? ? disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE);
>> + ? ? blkio_get_key_name(IO_TYPE_TOTAL, dev, key_str, MAX_KEY_LEN);
>> + ? ? cb->fill(cb, key_str, disk_total);
>> + ? ? return disk_total;
>> +}
>> +
>> +static uint64_t blkio_get_stat(struct blkio_group *blkg,
>> + ? ? ? ? ? ? struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
>> +{
>> + ? ? uint64_t var = getvar(blkg, 0);
>> + ? ? char str[10];
>> +
>> + ? ? snprintf(str, 10, "%u:%u", MAJOR(dev), MINOR(dev));
>
> What is the significance of limiting major:minor string to 10 characters?
> I see BDEVT_SIZE being used in genhd.c. But it looks like it is valid
> only if we printing numbers in hex.
>
> In genhd.c:diskstats_show(), we seem to be using %4d and %7d for major
> minor numbers. In extended minor allocation scheme, we seem to be having
> 20 bits for minor numbers, so 7 decimal places for representing max minor
> number will make sense. Not sure about major number though.
>
> So it might be safe to just to follow diskstats_show convention and at
> least keep the buffer size as 12 (4:7).

Will do in V3

>
> Vivek
>
>> + ? ? cb->fill(cb, str, var);
>> + ? ? return var;
>> +}
>> +
>> +#define GET_STAT_INDEXED(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int type) ? ? ? ?\
>> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ? return blkg->stats.__VAR[type]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> +} ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> +
>> +GET_STAT_INDEXED(io_service_bytes);
>> +GET_STAT_INDEXED(io_serviced);
>> +GET_STAT_INDEXED(io_service_time);
>> +GET_STAT_INDEXED(io_wait_time);
>> +#undef GET_STAT_INDEXED
>> +
>> +#define GET_STAT(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int dummy) ? ? ? \
>> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ? return blkg->stats.__VAR; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> +}
>> +
>> +GET_STAT(time);
>> +GET_STAT(sectors);
>> +#ifdef CONFIG_DEBUG_BLK_CGROUP
>> +GET_STAT(dequeue);
>> +#endif
>> +#undef GET_STAT
>> +
>> +#define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) ? ? ? ?\
>> ?static int blkiocg_##__VAR##_read(struct cgroup *cgroup, ? ? ? ? ? ? \
>> - ? ? ? ? ? ? ? ? ? ? struct cftype *cftype, struct seq_file *m) ? ? ?\
>> + ? ? ? ? ? ? struct cftype *cftype, struct cgroup_map_cb *cb) ? ? ? ?\
>> ?{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? struct blkio_cgroup *blkcg; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> ? ? ? struct blkio_group *blkg; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> ? ? ? struct hlist_node *n; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ? uint64_t cgroup_total = 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> ? ? ? if (!cgroup_lock_live_group(cgroup)) ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? ? ? ? ? return -ENODEV; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> @@ -184,19 +295,32 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, ? ? ? ? ? ? ? ?\
>> ? ? ? blkcg = cgroup_to_blkio_cgroup(cgroup); ? ? ? ? ? ? ? ? ? ? ? ? \
>> ? ? ? rcu_read_lock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
>> - ? ? ? ? ? ? if (blkg->dev) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> - ? ? ? ? ? ? ? ? ? ? seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), ?\
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?MINOR(blkg->dev), blkg->__VAR); ? ? ? ?\
>> + ? ? ? ? ? ? if (blkg->dev) { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ? ? ? ? ? ? ? ? ? spin_lock_irq(&blkg->stats_lock); ? ? ? ? ? ? ? \
>> + ? ? ? ? ? ? ? ? ? ? cgroup_total += get_stats(blkg, cb, getvar, ? ? \
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? blkg->dev); ? ? ? ? ? ? \
>> + ? ? ? ? ? ? ? ? ? ? spin_unlock_irq(&blkg->stats_lock); ? ? ? ? ? ? \
>> + ? ? ? ? ? ? } ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> ? ? ? } ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ? if (show_total) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ? ? ? ? ? cb->fill(cb, "Total", cgroup_total); ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? rcu_read_unlock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? cgroup_unlock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? return 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> ?}
>>
>> -SHOW_FUNCTION_PER_GROUP(time);
>> -SHOW_FUNCTION_PER_GROUP(sectors);
>> +SHOW_FUNCTION_PER_GROUP(time, blkio_get_stat, blkio_get_time_stat, 0);
>> +SHOW_FUNCTION_PER_GROUP(sectors, blkio_get_stat, blkio_get_sectors_stat, 0);
>> +SHOW_FUNCTION_PER_GROUP(io_service_bytes, blkio_get_typed_stat,
>> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_service_bytes_stat, 1);
>> +SHOW_FUNCTION_PER_GROUP(io_serviced, blkio_get_typed_stat,
>> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_serviced_stat, 1);
>> +SHOW_FUNCTION_PER_GROUP(io_service_time, blkio_get_typed_stat,
>> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_service_time_stat, 1);
>> +SHOW_FUNCTION_PER_GROUP(io_wait_time, blkio_get_typed_stat,
>> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_wait_time_stat, 1);
>> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
>> -SHOW_FUNCTION_PER_GROUP(dequeue);
>> +SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
>> ?#endif
>> ?#undef SHOW_FUNCTION_PER_GROUP
>>
>> @@ -204,7 +328,7 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
>> ?void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>> ? ? ? ? ? ? ? ? ? ? ? unsigned long dequeue)
>> ?{
>> - ? ? blkg->dequeue += dequeue;
>> + ? ? blkg->stats.dequeue += dequeue;
>> ?}
>> ?EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>> ?#endif
>> @@ -217,16 +341,39 @@ struct cftype blkio_files[] = {
>> ? ? ? },
>> ? ? ? {
>> ? ? ? ? ? ? ? .name = "time",
>> - ? ? ? ? ? ? .read_seq_string = blkiocg_time_read,
>> + ? ? ? ? ? ? .read_map = blkiocg_time_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> ? ? ? },
>> ? ? ? {
>> ? ? ? ? ? ? ? .name = "sectors",
>> - ? ? ? ? ? ? .read_seq_string = blkiocg_sectors_read,
>> + ? ? ? ? ? ? .read_map = blkiocg_sectors_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> + ? ? },
>> + ? ? {
>> + ? ? ? ? ? ? .name = "io_service_bytes",
>> + ? ? ? ? ? ? .read_map = blkiocg_io_service_bytes_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> + ? ? },
>> + ? ? {
>> + ? ? ? ? ? ? .name = "io_serviced",
>> + ? ? ? ? ? ? .read_map = blkiocg_io_serviced_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> + ? ? },
>> + ? ? {
>> + ? ? ? ? ? ? .name = "io_service_time",
>> + ? ? ? ? ? ? .read_map = blkiocg_io_service_time_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> + ? ? },
>> + ? ? {
>> + ? ? ? ? ? ? .name = "io_wait_time",
>> + ? ? ? ? ? ? .read_map = blkiocg_io_wait_time_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> ? ? ? },
>> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
>> ? ? ? ? {
>> ? ? ? ? ? ? ? .name = "dequeue",
>> - ? ? ? ? ? ? .read_seq_string = blkiocg_dequeue_read,
>> + ? ? ? ? ? ? .read_map = blkiocg_dequeue_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> ? ? ? ? },
>> ?#endif
>> ?};
>> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
>> index fe44517..e8600b0 100644
>> --- a/block/blk-cgroup.h
>> +++ b/block/blk-cgroup.h
>> @@ -23,6 +23,14 @@ extern struct cgroup_subsys blkio_subsys;
>> ?#define blkio_subsys_id blkio_subsys.subsys_id
>> ?#endif
>>
>> +enum io_type {
>> + ? ? IO_READ = 0,
>> + ? ? IO_WRITE,
>> + ? ? IO_SYNC,
>> + ? ? IO_ASYNC,
>> + ? ? IO_TYPE_TOTAL
>> +};
>> +
>> ?struct blkio_cgroup {
>> ? ? ? struct cgroup_subsys_state css;
>> ? ? ? unsigned int weight;
>> @@ -30,6 +38,23 @@ struct blkio_cgroup {
>> ? ? ? struct hlist_head blkg_list;
>> ?};
>>
>> +struct blkio_group_stats {
>> + ? ? /* total disk time and nr sectors dispatched by this group */
>> + ? ? uint64_t time;
>> + ? ? uint64_t sectors;
>> + ? ? /* Total disk time used by IOs in ns */
>> + ? ? uint64_t io_service_time[IO_TYPE_TOTAL];
>> + ? ? uint64_t io_service_bytes[IO_TYPE_TOTAL]; /* Total bytes transferred */
>> + ? ? /* Total IOs serviced, post merge */
>> + ? ? uint64_t io_serviced[IO_TYPE_TOTAL];
>> + ? ? /* Total time spent waiting in scheduler queue in ns */
>> + ? ? uint64_t io_wait_time[IO_TYPE_TOTAL];
>> +#ifdef CONFIG_DEBUG_BLK_CGROUP
>> + ? ? /* How many times this group has been removed from service tree */
>> + ? ? unsigned long dequeue;
>> +#endif
>> +};
>> +
>> ?struct blkio_group {
>> ? ? ? /* An rcu protected unique identifier for the group */
>> ? ? ? void *key;
>> @@ -38,15 +63,13 @@ struct blkio_group {
>> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
>> ? ? ? /* Store cgroup path */
>> ? ? ? char path[128];
>> - ? ? /* How many times this group has been removed from service tree */
>> - ? ? unsigned long dequeue;
>> ?#endif
>> ? ? ? /* The device MKDEV(major, minor), this group has been created for */
>> - ? ? dev_t ? dev;
>> + ? ? dev_t dev;
>>
>> - ? ? /* total disk time and nr sectors dispatched by this group */
>> - ? ? unsigned long time;
>> - ? ? unsigned long sectors;
>> + ? ? /* Need to serialize the stats in the case of reset/update */
>> + ? ? spinlock_t stats_lock;
>> + ? ? struct blkio_group_stats stats;
>> ?};
>>
>> ?typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>> @@ -105,8 +128,8 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
>> ?extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
>> ?extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? void *key);
>> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
>> +void blkiocg_update_timeslice_used(struct blkio_group *blkg,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
>> ?#else
>> ?struct cgroup;
>> ?static inline struct blkio_cgroup *
>> @@ -122,7 +145,7 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
>>
>> ?static inline struct blkio_group *
>> ?blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
>> -static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> +static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time) {}
>> ?#endif
>> ?#endif /* _BLK_CGROUP_H */
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index c18e348..d91df9f 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -914,7 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
>>
>> ? ? ? cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? st->min_vdisktime);
>> - ? ? blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
>> + ? ? blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
>> ?}
>>
>> ?#ifdef CONFIG_CFQ_GROUP_IOSCHED
>

2010-04-05 23:29:35

by Divyesh Shah

[permalink] [raw]
Subject: Re: [PATCH 2/3][v2] blkio: Add io controller stats like

On Mon, Apr 5, 2010 at 9:06 AM, Vivek Goyal <[email protected]> wrote:
> On Fri, Apr 02, 2010 at 06:56:35PM -0700, Divyesh Shah wrote:
>> - io_service_time (the actual time in ns taken by the dis to service the IO)
>> - io_wait_time (the time spent waiting in the IO shceduler queues before
>> ? getting serviced)
>> - io_serviced (number of IOs serviced from this blkio_group)
>> - io_service_bytes (Number of bytes served for this cgroup)
>>
>> These stats are accumulated per operation type helping us to distinguish between
>> read and write, and sync and async IO. This patch does not increment any of
>> these stats.
>>
>> Signed-off-by: Divyesh Shah<[email protected]>
>> ---
>>
>> ?Documentation/cgroups/blkio-controller.txt | ? 33 +++++
>> ?block/blk-cgroup.c ? ? ? ? ? ? ? ? ? ? ? ? | ?179 +++++++++++++++++++++++++---
>> ?block/blk-cgroup.h ? ? ? ? ? ? ? ? ? ? ? ? | ? 41 +++++-
>> ?block/cfq-iosched.c ? ? ? ? ? ? ? ? ? ? ? ?| ? ?2
>> ?4 files changed, 229 insertions(+), 26 deletions(-)
>>
>> diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
>> index 630879c..ededdca 100644
>> --- a/Documentation/cgroups/blkio-controller.txt
>> +++ b/Documentation/cgroups/blkio-controller.txt
>> @@ -75,6 +75,9 @@ CONFIG_DEBUG_BLK_CGROUP
>>
>> ?Details of cgroup files
>> ?=======================
>> +Writing an int to any of the stats files (which excludes weight) will result
>> +in all stats for that cgroup to be erased.
>> +
>> ?- blkio.weight
>> ? ? ? - Specifies per cgroup weight.
>>
>> @@ -92,6 +95,36 @@ Details of cgroup files
>> ? ? ? ? third field specifies the number of sectors transferred by the
>> ? ? ? ? group to/from the device.
>>
>> +- blkio.io_service_bytes
>> + ? ? - Number of bytes transferred to/from the disk by the group. These
>> + ? ? ? are further divided by the type of operation - read or write, sync
>> + ? ? ? or async. First two fields specify the major and minor number of the
>> + ? ? ? device, third field specifies the operation type and the fourth field
>> + ? ? ? specifies the number of bytes.
>> +
>> +- blkio.io_serviced
>> + ? ? - Number of IOs completed to/from the disk by the group. These
>> + ? ? ? are further divided by the type of operation - read or write, sync
>> + ? ? ? or async. First two fields specify the major and minor number of the
>> + ? ? ? device, third field specifies the operation type and the fourth field
>> + ? ? ? specifies the number of IOs.
>> +
>> +- blkio.io_service_time
>> + ? ? - Total amount of time spent by the device to service the IOs for this
>> + ? ? ? cgroup. This is in nanoseconds to make it meaningful for flash
>> + ? ? ? devices too. This time is further divided by the type of operation -
>> + ? ? ? read or write, sync or async. First two fields specify the major and
>> + ? ? ? minor number of the device, third field specifies the operation type
>> + ? ? ? and the fourth field specifies the io_service_time in ns.
>
> Hi Divyesh,
>
> Looking the third patch where you acutally increment the stats, this is
> how service time is calculated.
>
> - Save start_time in rq, when driver actually removes the request from
> ?request queue.
> - at request completion time calculate service time.
> ?service_time = now - start_time.
>
> This works very well if driver/device does not have NCQ and process one
> request at a time. But with NCQ, we can multiple requests in the driver
> queue at the same time then we can run into issues.
>
> - With NCQ, time becomes cumulative. So if three requests rq1, rq2 and rq3
> ?are in disk queue, and if requests are processed in the order rq1,rq2,rq3 by
> ?the disk (no parallel channles), then rq1 and rq2's completion time is
> ?added in rq3. So total service time of the group can be much more than
> ?actual time elapsed. That does not seem right. Did you face this issue in
> ?your testing?

You are right. With NCQ, the io_service_time numbers aren't exact.
I've only tested this without NCQ for which these stats are accurate
and very useful. With NCQ turned on I don't see a good way of getting
this information accurately since only the disk knows when it actually
starts to service a given request. With NCQ, io_service_time will have
cumulative time as you pointed out which may still be useful for some
high-level analysis if you also have the average queue depth for the
device.

io_wait_time stat makes sense with or w/o NCQ since its defined to
only include the scheduler queueing time for each IO and is cumulative
by design. I should probably reword the definition of io_service_time
to indicate time after dispatch to the driver and request completion
and also add a comment of how NCQ affects this stat. Sounds good?

-Divyesh

>
> Same is the case with io_wait_time, Because time is cumulative, actual
> io_wait_time, can be more than real time elapsed.
>
> Is this the intention and you are finding even cumulative time useful?
>
>> +
>> +- blkio.io_wait_time
>> + ? ? - Total amount of time the IO spent waiting in the scheduler queues for
>> + ? ? ? service. This is in nanoseconds to make it meaningful for flash
>> + ? ? ? devices too. This time is further divided by the type of operation -
>> + ? ? ? read or write, sync or async. First two fields specify the major and
>> + ? ? ? minor number of the device, third field specifies the operation type
>> + ? ? ? and the fourth field specifies the io_wait_time in ns.
>> +
>> ?- blkio.dequeue
>> ? ? ? - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This
>> ? ? ? ? gives the statistics about how many a times a group was dequeued
>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>> index 5be3981..cac10b2 100644
>> --- a/block/blk-cgroup.c
>> +++ b/block/blk-cgroup.c
>> @@ -17,6 +17,8 @@
>> ?#include <linux/err.h>
>> ?#include "blk-cgroup.h"
>>
>> +#define MAX_KEY_LEN 100
>> +
>> ?static DEFINE_SPINLOCK(blkio_list_lock);
>> ?static LIST_HEAD(blkio_list);
>>
>> @@ -55,12 +57,15 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>> ?}
>> ?EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
>>
>> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time)
>> +void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
>> ?{
>> - ? ? blkg->time += time;
>> + ? ? unsigned long flags;
>> +
>> + ? ? spin_lock_irqsave(&blkg->stats_lock, flags);
>> + ? ? blkg->stats.time += time;
>> + ? ? spin_unlock_irqrestore(&blkg->stats_lock, flags);
>> ?}
>> -EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);
>> +EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
>>
>> ?void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
>> ? ? ? ? ? ? ? ? ? ? ? struct blkio_group *blkg, void *key, dev_t dev)
>> @@ -170,13 +175,119 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>> ? ? ? return 0;
>> ?}
>>
>> -#define SHOW_FUNCTION_PER_GROUP(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> +static int
>> +blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>> +{
>> + ? ? struct blkio_cgroup *blkcg;
>> + ? ? struct blkio_group *blkg;
>> + ? ? struct hlist_node *n;
>> + ? ? struct blkio_group_stats *stats;
>> +
>> + ? ? blkcg = cgroup_to_blkio_cgroup(cgroup);
>> + ? ? spin_lock_irq(&blkcg->lock);
>> + ? ? hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> + ? ? ? ? ? ? spin_lock(&blkg->stats_lock);
>> + ? ? ? ? ? ? stats = &blkg->stats;
>> + ? ? ? ? ? ? memset(stats, 0, sizeof(struct blkio_group_stats));
>> + ? ? ? ? ? ? spin_unlock(&blkg->stats_lock);
>> + ? ? }
>> + ? ? spin_unlock_irq(&blkcg->lock);
>> + ? ? return 0;
>> +}
>> +
>> +static void blkio_get_key_name(int type, dev_t dev, char *str, int chars_left)
>> +{
>> + ? ? snprintf(str, chars_left, "%u:%u", MAJOR(dev), MINOR(dev));
>> + ? ? chars_left -= strlen(str);
>> + ? ? if (chars_left <= 0) {
>> + ? ? ? ? ? ? printk(KERN_WARNING
>> + ? ? ? ? ? ? ? ? ? ? "Possibly incorrect cgroup stat display format");
>> + ? ? ? ? ? ? return;
>> + ? ? }
>> + ? ? switch (type) {
>> + ? ? case IO_READ:
>> + ? ? ? ? ? ? strlcat(str, " Read", chars_left);
>> + ? ? ? ? ? ? break;
>> + ? ? case IO_WRITE:
>> + ? ? ? ? ? ? strlcat(str, " Write", chars_left);
>> + ? ? ? ? ? ? break;
>> + ? ? case IO_SYNC:
>> + ? ? ? ? ? ? strlcat(str, " Sync", chars_left);
>> + ? ? ? ? ? ? break;
>> + ? ? case IO_ASYNC:
>> + ? ? ? ? ? ? strlcat(str, " Async", chars_left);
>> + ? ? ? ? ? ? break;
>> + ? ? case IO_TYPE_TOTAL:
>> + ? ? ? ? ? ? strlcat(str, " Total", chars_left);
>> + ? ? ? ? ? ? break;
>> + ? ? default:
>> + ? ? ? ? ? ? strlcat(str, " Invalid", chars_left);
>> + ? ? }
>> +}
>> +
>> +typedef uint64_t (get_var) (struct blkio_group *, int);
>> +
>> +static uint64_t blkio_get_typed_stat(struct blkio_group *blkg,
>> + ? ? ? ? ? ? struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
>> +{
>> + ? ? uint64_t disk_total;
>> + ? ? char key_str[MAX_KEY_LEN];
>> + ? ? int type;
>> +
>> + ? ? for (type = 0; type < IO_TYPE_TOTAL; type++) {
>> + ? ? ? ? ? ? blkio_get_key_name(type, dev, key_str, MAX_KEY_LEN);
>> + ? ? ? ? ? ? cb->fill(cb, key_str, getvar(blkg, type));
>> + ? ? }
>> + ? ? disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE);
>> + ? ? blkio_get_key_name(IO_TYPE_TOTAL, dev, key_str, MAX_KEY_LEN);
>> + ? ? cb->fill(cb, key_str, disk_total);
>> + ? ? return disk_total;
>> +}
>> +
>> +static uint64_t blkio_get_stat(struct blkio_group *blkg,
>> + ? ? ? ? ? ? struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
>> +{
>> + ? ? uint64_t var = getvar(blkg, 0);
>> + ? ? char str[10];
>> +
>> + ? ? snprintf(str, 10, "%u:%u", MAJOR(dev), MINOR(dev));
>> + ? ? cb->fill(cb, str, var);
>> + ? ? return var;
>> +}
>> +
>> +#define GET_STAT_INDEXED(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int type) ? ? ? ?\
>> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ? return blkg->stats.__VAR[type]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> +} ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> +
>> +GET_STAT_INDEXED(io_service_bytes);
>> +GET_STAT_INDEXED(io_serviced);
>> +GET_STAT_INDEXED(io_service_time);
>> +GET_STAT_INDEXED(io_wait_time);
>> +#undef GET_STAT_INDEXED
>> +
>> +#define GET_STAT(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int dummy) ? ? ? \
>> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ? return blkg->stats.__VAR; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> +}
>> +
>> +GET_STAT(time);
>> +GET_STAT(sectors);
>> +#ifdef CONFIG_DEBUG_BLK_CGROUP
>> +GET_STAT(dequeue);
>> +#endif
>> +#undef GET_STAT
>> +
>> +#define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) ? ? ? ?\
>> ?static int blkiocg_##__VAR##_read(struct cgroup *cgroup, ? ? ? ? ? ? \
>> - ? ? ? ? ? ? ? ? ? ? struct cftype *cftype, struct seq_file *m) ? ? ?\
>> + ? ? ? ? ? ? struct cftype *cftype, struct cgroup_map_cb *cb) ? ? ? ?\
>> ?{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? struct blkio_cgroup *blkcg; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> ? ? ? struct blkio_group *blkg; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> ? ? ? struct hlist_node *n; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ? uint64_t cgroup_total = 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> ? ? ? if (!cgroup_lock_live_group(cgroup)) ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? ? ? ? ? return -ENODEV; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> @@ -184,19 +295,32 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, ? ? ? ? ? ? ? ?\
>> ? ? ? blkcg = cgroup_to_blkio_cgroup(cgroup); ? ? ? ? ? ? ? ? ? ? ? ? \
>> ? ? ? rcu_read_lock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
>> - ? ? ? ? ? ? if (blkg->dev) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> - ? ? ? ? ? ? ? ? ? ? seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), ?\
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?MINOR(blkg->dev), blkg->__VAR); ? ? ? ?\
>> + ? ? ? ? ? ? if (blkg->dev) { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ? ? ? ? ? ? ? ? ? spin_lock_irq(&blkg->stats_lock); ? ? ? ? ? ? ? \
>> + ? ? ? ? ? ? ? ? ? ? cgroup_total += get_stats(blkg, cb, getvar, ? ? \
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? blkg->dev); ? ? ? ? ? ? \
>> + ? ? ? ? ? ? ? ? ? ? spin_unlock_irq(&blkg->stats_lock); ? ? ? ? ? ? \
>> + ? ? ? ? ? ? } ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> ? ? ? } ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ? if (show_total) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ? ? ? ? ? cb->fill(cb, "Total", cgroup_total); ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? rcu_read_unlock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? cgroup_unlock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> ? ? ? return 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> ?}
>>
>> -SHOW_FUNCTION_PER_GROUP(time);
>> -SHOW_FUNCTION_PER_GROUP(sectors);
>> +SHOW_FUNCTION_PER_GROUP(time, blkio_get_stat, blkio_get_time_stat, 0);
>> +SHOW_FUNCTION_PER_GROUP(sectors, blkio_get_stat, blkio_get_sectors_stat, 0);
>> +SHOW_FUNCTION_PER_GROUP(io_service_bytes, blkio_get_typed_stat,
>> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_service_bytes_stat, 1);
>> +SHOW_FUNCTION_PER_GROUP(io_serviced, blkio_get_typed_stat,
>> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_serviced_stat, 1);
>> +SHOW_FUNCTION_PER_GROUP(io_service_time, blkio_get_typed_stat,
>> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_service_time_stat, 1);
>> +SHOW_FUNCTION_PER_GROUP(io_wait_time, blkio_get_typed_stat,
>> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_wait_time_stat, 1);
>> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
>> -SHOW_FUNCTION_PER_GROUP(dequeue);
>> +SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
>> ?#endif
>> ?#undef SHOW_FUNCTION_PER_GROUP
>>
>> @@ -204,7 +328,7 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
>> ?void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>> ? ? ? ? ? ? ? ? ? ? ? unsigned long dequeue)
>> ?{
>> - ? ? blkg->dequeue += dequeue;
>> + ? ? blkg->stats.dequeue += dequeue;
>> ?}
>> ?EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>> ?#endif
>> @@ -217,16 +341,39 @@ struct cftype blkio_files[] = {
>> ? ? ? },
>> ? ? ? {
>> ? ? ? ? ? ? ? .name = "time",
>> - ? ? ? ? ? ? .read_seq_string = blkiocg_time_read,
>> + ? ? ? ? ? ? .read_map = blkiocg_time_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> ? ? ? },
>> ? ? ? {
>> ? ? ? ? ? ? ? .name = "sectors",
>> - ? ? ? ? ? ? .read_seq_string = blkiocg_sectors_read,
>> + ? ? ? ? ? ? .read_map = blkiocg_sectors_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> + ? ? },
>> + ? ? {
>> + ? ? ? ? ? ? .name = "io_service_bytes",
>> + ? ? ? ? ? ? .read_map = blkiocg_io_service_bytes_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> + ? ? },
>> + ? ? {
>> + ? ? ? ? ? ? .name = "io_serviced",
>> + ? ? ? ? ? ? .read_map = blkiocg_io_serviced_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> + ? ? },
>> + ? ? {
>> + ? ? ? ? ? ? .name = "io_service_time",
>> + ? ? ? ? ? ? .read_map = blkiocg_io_service_time_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> + ? ? },
>> + ? ? {
>> + ? ? ? ? ? ? .name = "io_wait_time",
>> + ? ? ? ? ? ? .read_map = blkiocg_io_wait_time_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> ? ? ? },
>> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
>> ? ? ? ? {
>> ? ? ? ? ? ? ? .name = "dequeue",
>> - ? ? ? ? ? ? .read_seq_string = blkiocg_dequeue_read,
>> + ? ? ? ? ? ? .read_map = blkiocg_dequeue_read,
>> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> ? ? ? ? },
>> ?#endif
>> ?};
>> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
>> index fe44517..e8600b0 100644
>> --- a/block/blk-cgroup.h
>> +++ b/block/blk-cgroup.h
>> @@ -23,6 +23,14 @@ extern struct cgroup_subsys blkio_subsys;
>> ?#define blkio_subsys_id blkio_subsys.subsys_id
>> ?#endif
>>
>> +enum io_type {
>> + ? ? IO_READ = 0,
>> + ? ? IO_WRITE,
>> + ? ? IO_SYNC,
>> + ? ? IO_ASYNC,
>> + ? ? IO_TYPE_TOTAL
>> +};
>> +
>> ?struct blkio_cgroup {
>> ? ? ? struct cgroup_subsys_state css;
>> ? ? ? unsigned int weight;
>> @@ -30,6 +38,23 @@ struct blkio_cgroup {
>> ? ? ? struct hlist_head blkg_list;
>> ?};
>>
>> +struct blkio_group_stats {
>> + ? ? /* total disk time and nr sectors dispatched by this group */
>> + ? ? uint64_t time;
>> + ? ? uint64_t sectors;
>> + ? ? /* Total disk time used by IOs in ns */
>> + ? ? uint64_t io_service_time[IO_TYPE_TOTAL];
>> + ? ? uint64_t io_service_bytes[IO_TYPE_TOTAL]; /* Total bytes transferred */
>> + ? ? /* Total IOs serviced, post merge */
>> + ? ? uint64_t io_serviced[IO_TYPE_TOTAL];
>> + ? ? /* Total time spent waiting in scheduler queue in ns */
>> + ? ? uint64_t io_wait_time[IO_TYPE_TOTAL];
>> +#ifdef CONFIG_DEBUG_BLK_CGROUP
>> + ? ? /* How many times this group has been removed from service tree */
>> + ? ? unsigned long dequeue;
>> +#endif
>> +};
>> +
>> ?struct blkio_group {
>> ? ? ? /* An rcu protected unique identifier for the group */
>> ? ? ? void *key;
>> @@ -38,15 +63,13 @@ struct blkio_group {
>> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
>> ? ? ? /* Store cgroup path */
>> ? ? ? char path[128];
>> - ? ? /* How many times this group has been removed from service tree */
>> - ? ? unsigned long dequeue;
>> ?#endif
>> ? ? ? /* The device MKDEV(major, minor), this group has been created for */
>> - ? ? dev_t ? dev;
>> + ? ? dev_t dev;
>>
>> - ? ? /* total disk time and nr sectors dispatched by this group */
>> - ? ? unsigned long time;
>> - ? ? unsigned long sectors;
>> + ? ? /* Need to serialize the stats in the case of reset/update */
>> + ? ? spinlock_t stats_lock;
>> + ? ? struct blkio_group_stats stats;
>> ?};
>>
>> ?typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>> @@ -105,8 +128,8 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
>> ?extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
>> ?extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? void *key);
>> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
>> +void blkiocg_update_timeslice_used(struct blkio_group *blkg,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
>> ?#else
>> ?struct cgroup;
>> ?static inline struct blkio_cgroup *
>> @@ -122,7 +145,7 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
>>
>> ?static inline struct blkio_group *
>> ?blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
>> -static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> +static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time) {}
>> ?#endif
>> ?#endif /* _BLK_CGROUP_H */
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index c18e348..d91df9f 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -914,7 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
>>
>> ? ? ? cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? st->min_vdisktime);
>> - ? ? blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
>> + ? ? blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
>> ?}
>>
>> ?#ifdef CONFIG_CFQ_GROUP_IOSCHED
>

2010-04-06 13:48:07

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH 2/3][v2] blkio: Add io controller stats like

On Mon, Apr 05, 2010 at 04:29:06PM -0700, Divyesh Shah wrote:
> On Mon, Apr 5, 2010 at 9:06 AM, Vivek Goyal <[email protected]> wrote:
> > On Fri, Apr 02, 2010 at 06:56:35PM -0700, Divyesh Shah wrote:
> >> - io_service_time (the actual time in ns taken by the dis to service the IO)
> >> - io_wait_time (the time spent waiting in the IO shceduler queues before
> >> ? getting serviced)
> >> - io_serviced (number of IOs serviced from this blkio_group)
> >> - io_service_bytes (Number of bytes served for this cgroup)
> >>
> >> These stats are accumulated per operation type helping us to distinguish between
> >> read and write, and sync and async IO. This patch does not increment any of
> >> these stats.
> >>
> >> Signed-off-by: Divyesh Shah<[email protected]>
> >> ---
> >>
> >> ?Documentation/cgroups/blkio-controller.txt | ? 33 +++++
> >> ?block/blk-cgroup.c ? ? ? ? ? ? ? ? ? ? ? ? | ?179 +++++++++++++++++++++++++---
> >> ?block/blk-cgroup.h ? ? ? ? ? ? ? ? ? ? ? ? | ? 41 +++++-
> >> ?block/cfq-iosched.c ? ? ? ? ? ? ? ? ? ? ? ?| ? ?2
> >> ?4 files changed, 229 insertions(+), 26 deletions(-)
> >>
> >> diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
> >> index 630879c..ededdca 100644
> >> --- a/Documentation/cgroups/blkio-controller.txt
> >> +++ b/Documentation/cgroups/blkio-controller.txt
> >> @@ -75,6 +75,9 @@ CONFIG_DEBUG_BLK_CGROUP
> >>
> >> ?Details of cgroup files
> >> ?=======================
> >> +Writing an int to any of the stats files (which excludes weight) will result
> >> +in all stats for that cgroup to be erased.
> >> +
> >> ?- blkio.weight
> >> ? ? ? - Specifies per cgroup weight.
> >>
> >> @@ -92,6 +95,36 @@ Details of cgroup files
> >> ? ? ? ? third field specifies the number of sectors transferred by the
> >> ? ? ? ? group to/from the device.
> >>
> >> +- blkio.io_service_bytes
> >> + ? ? - Number of bytes transferred to/from the disk by the group. These
> >> + ? ? ? are further divided by the type of operation - read or write, sync
> >> + ? ? ? or async. First two fields specify the major and minor number of the
> >> + ? ? ? device, third field specifies the operation type and the fourth field
> >> + ? ? ? specifies the number of bytes.
> >> +
> >> +- blkio.io_serviced
> >> + ? ? - Number of IOs completed to/from the disk by the group. These
> >> + ? ? ? are further divided by the type of operation - read or write, sync
> >> + ? ? ? or async. First two fields specify the major and minor number of the
> >> + ? ? ? device, third field specifies the operation type and the fourth field
> >> + ? ? ? specifies the number of IOs.
> >> +
> >> +- blkio.io_service_time
> >> + ? ? - Total amount of time spent by the device to service the IOs for this
> >> + ? ? ? cgroup. This is in nanoseconds to make it meaningful for flash
> >> + ? ? ? devices too. This time is further divided by the type of operation -
> >> + ? ? ? read or write, sync or async. First two fields specify the major and
> >> + ? ? ? minor number of the device, third field specifies the operation type
> >> + ? ? ? and the fourth field specifies the io_service_time in ns.
> >
> > Hi Divyesh,
> >
> > Looking the third patch where you acutally increment the stats, this is
> > how service time is calculated.
> >
> > - Save start_time in rq, when driver actually removes the request from
> > ?request queue.
> > - at request completion time calculate service time.
> > ?service_time = now - start_time.
> >
> > This works very well if driver/device does not have NCQ and process one
> > request at a time. But with NCQ, we can multiple requests in the driver
> > queue at the same time then we can run into issues.
> >
> > - With NCQ, time becomes cumulative. So if three requests rq1, rq2 and rq3
> > ?are in disk queue, and if requests are processed in the order rq1,rq2,rq3 by
> > ?the disk (no parallel channles), then rq1 and rq2's completion time is
> > ?added in rq3. So total service time of the group can be much more than
> > ?actual time elapsed. That does not seem right. Did you face this issue in
> > ?your testing?
>
> You are right. With NCQ, the io_service_time numbers aren't exact.
> I've only tested this without NCQ for which these stats are accurate
> and very useful. With NCQ turned on I don't see a good way of getting
> this information accurately since only the disk knows when it actually
> starts to service a given request. With NCQ, io_service_time will have
> cumulative time as you pointed out which may still be useful for some
> high-level analysis if you also have the average queue depth for the
> device.
>

Most of the modern disks now seem to be be NCQ. But I guess we can keep
those stats as it is useful for you on non NCQ setup.

> io_wait_time stat makes sense with or w/o NCQ since its defined to
> only include the scheduler queueing time for each IO and is cumulative
> by design. I should probably reword the definition of io_service_time
> to indicate time after dispatch to the driver and request completion
> and also add a comment of how NCQ affects this stat. Sounds good?

I am not sure if io_wait_time is cumulative by design. Think of this,
somebody says that this cgroup has waited for 1s on CFQ queue but actually
only .5 seconds have elapsed since the observation started.

So I would say that please document it very properly in blkio-controller.txt
so that it is clear what exactly these numbers represent and what to expect on
NCQ disks.

Thanks
Vivek

>
> -Divyesh
>
> >
> > Same is the case with io_wait_time, Because time is cumulative, actual
> > io_wait_time, can be more than real time elapsed.
> >
> > Is this the intention and you are finding even cumulative time useful?
> >
> >> +
> >> +- blkio.io_wait_time
> >> + ? ? - Total amount of time the IO spent waiting in the scheduler queues for
> >> + ? ? ? service. This is in nanoseconds to make it meaningful for flash
> >> + ? ? ? devices too. This time is further divided by the type of operation -
> >> + ? ? ? read or write, sync or async. First two fields specify the major and
> >> + ? ? ? minor number of the device, third field specifies the operation type
> >> + ? ? ? and the fourth field specifies the io_wait_time in ns.
> >> +
> >> ?- blkio.dequeue
> >> ? ? ? - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This
> >> ? ? ? ? gives the statistics about how many a times a group was dequeued
> >> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> >> index 5be3981..cac10b2 100644
> >> --- a/block/blk-cgroup.c
> >> +++ b/block/blk-cgroup.c
> >> @@ -17,6 +17,8 @@
> >> ?#include <linux/err.h>
> >> ?#include "blk-cgroup.h"
> >>
> >> +#define MAX_KEY_LEN 100
> >> +
> >> ?static DEFINE_SPINLOCK(blkio_list_lock);
> >> ?static LIST_HEAD(blkio_list);
> >>
> >> @@ -55,12 +57,15 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
> >> ?}
> >> ?EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
> >>
> >> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time)
> >> +void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
> >> ?{
> >> - ? ? blkg->time += time;
> >> + ? ? unsigned long flags;
> >> +
> >> + ? ? spin_lock_irqsave(&blkg->stats_lock, flags);
> >> + ? ? blkg->stats.time += time;
> >> + ? ? spin_unlock_irqrestore(&blkg->stats_lock, flags);
> >> ?}
> >> -EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);
> >> +EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
> >>
> >> ?void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> >> ? ? ? ? ? ? ? ? ? ? ? struct blkio_group *blkg, void *key, dev_t dev)
> >> @@ -170,13 +175,119 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> >> ? ? ? return 0;
> >> ?}
> >>
> >> -#define SHOW_FUNCTION_PER_GROUP(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> +static int
> >> +blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> >> +{
> >> + ? ? struct blkio_cgroup *blkcg;
> >> + ? ? struct blkio_group *blkg;
> >> + ? ? struct hlist_node *n;
> >> + ? ? struct blkio_group_stats *stats;
> >> +
> >> + ? ? blkcg = cgroup_to_blkio_cgroup(cgroup);
> >> + ? ? spin_lock_irq(&blkcg->lock);
> >> + ? ? hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> >> + ? ? ? ? ? ? spin_lock(&blkg->stats_lock);
> >> + ? ? ? ? ? ? stats = &blkg->stats;
> >> + ? ? ? ? ? ? memset(stats, 0, sizeof(struct blkio_group_stats));
> >> + ? ? ? ? ? ? spin_unlock(&blkg->stats_lock);
> >> + ? ? }
> >> + ? ? spin_unlock_irq(&blkcg->lock);
> >> + ? ? return 0;
> >> +}
> >> +
> >> +static void blkio_get_key_name(int type, dev_t dev, char *str, int chars_left)
> >> +{
> >> + ? ? snprintf(str, chars_left, "%u:%u", MAJOR(dev), MINOR(dev));
> >> + ? ? chars_left -= strlen(str);
> >> + ? ? if (chars_left <= 0) {
> >> + ? ? ? ? ? ? printk(KERN_WARNING
> >> + ? ? ? ? ? ? ? ? ? ? "Possibly incorrect cgroup stat display format");
> >> + ? ? ? ? ? ? return;
> >> + ? ? }
> >> + ? ? switch (type) {
> >> + ? ? case IO_READ:
> >> + ? ? ? ? ? ? strlcat(str, " Read", chars_left);
> >> + ? ? ? ? ? ? break;
> >> + ? ? case IO_WRITE:
> >> + ? ? ? ? ? ? strlcat(str, " Write", chars_left);
> >> + ? ? ? ? ? ? break;
> >> + ? ? case IO_SYNC:
> >> + ? ? ? ? ? ? strlcat(str, " Sync", chars_left);
> >> + ? ? ? ? ? ? break;
> >> + ? ? case IO_ASYNC:
> >> + ? ? ? ? ? ? strlcat(str, " Async", chars_left);
> >> + ? ? ? ? ? ? break;
> >> + ? ? case IO_TYPE_TOTAL:
> >> + ? ? ? ? ? ? strlcat(str, " Total", chars_left);
> >> + ? ? ? ? ? ? break;
> >> + ? ? default:
> >> + ? ? ? ? ? ? strlcat(str, " Invalid", chars_left);
> >> + ? ? }
> >> +}
> >> +
> >> +typedef uint64_t (get_var) (struct blkio_group *, int);
> >> +
> >> +static uint64_t blkio_get_typed_stat(struct blkio_group *blkg,
> >> + ? ? ? ? ? ? struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
> >> +{
> >> + ? ? uint64_t disk_total;
> >> + ? ? char key_str[MAX_KEY_LEN];
> >> + ? ? int type;
> >> +
> >> + ? ? for (type = 0; type < IO_TYPE_TOTAL; type++) {
> >> + ? ? ? ? ? ? blkio_get_key_name(type, dev, key_str, MAX_KEY_LEN);
> >> + ? ? ? ? ? ? cb->fill(cb, key_str, getvar(blkg, type));
> >> + ? ? }
> >> + ? ? disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE);
> >> + ? ? blkio_get_key_name(IO_TYPE_TOTAL, dev, key_str, MAX_KEY_LEN);
> >> + ? ? cb->fill(cb, key_str, disk_total);
> >> + ? ? return disk_total;
> >> +}
> >> +
> >> +static uint64_t blkio_get_stat(struct blkio_group *blkg,
> >> + ? ? ? ? ? ? struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
> >> +{
> >> + ? ? uint64_t var = getvar(blkg, 0);
> >> + ? ? char str[10];
> >> +
> >> + ? ? snprintf(str, 10, "%u:%u", MAJOR(dev), MINOR(dev));
> >> + ? ? cb->fill(cb, str, var);
> >> + ? ? return var;
> >> +}
> >> +
> >> +#define GET_STAT_INDEXED(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int type) ? ? ? ?\
> >> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> + ? ? return blkg->stats.__VAR[type]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> +} ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> +
> >> +GET_STAT_INDEXED(io_service_bytes);
> >> +GET_STAT_INDEXED(io_serviced);
> >> +GET_STAT_INDEXED(io_service_time);
> >> +GET_STAT_INDEXED(io_wait_time);
> >> +#undef GET_STAT_INDEXED
> >> +
> >> +#define GET_STAT(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int dummy) ? ? ? \
> >> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> + ? ? return blkg->stats.__VAR; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> +}
> >> +
> >> +GET_STAT(time);
> >> +GET_STAT(sectors);
> >> +#ifdef CONFIG_DEBUG_BLK_CGROUP
> >> +GET_STAT(dequeue);
> >> +#endif
> >> +#undef GET_STAT
> >> +
> >> +#define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) ? ? ? ?\
> >> ?static int blkiocg_##__VAR##_read(struct cgroup *cgroup, ? ? ? ? ? ? \
> >> - ? ? ? ? ? ? ? ? ? ? struct cftype *cftype, struct seq_file *m) ? ? ?\
> >> + ? ? ? ? ? ? struct cftype *cftype, struct cgroup_map_cb *cb) ? ? ? ?\
> >> ?{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> ? ? ? struct blkio_cgroup *blkcg; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> ? ? ? struct blkio_group *blkg; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> ? ? ? struct hlist_node *n; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> + ? ? uint64_t cgroup_total = 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> ? ? ? if (!cgroup_lock_live_group(cgroup)) ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> ? ? ? ? ? ? ? return -ENODEV; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> @@ -184,19 +295,32 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, ? ? ? ? ? ? ? ?\
> >> ? ? ? blkcg = cgroup_to_blkio_cgroup(cgroup); ? ? ? ? ? ? ? ? ? ? ? ? \
> >> ? ? ? rcu_read_lock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> ? ? ? hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
> >> - ? ? ? ? ? ? if (blkg->dev) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> - ? ? ? ? ? ? ? ? ? ? seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), ?\
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?MINOR(blkg->dev), blkg->__VAR); ? ? ? ?\
> >> + ? ? ? ? ? ? if (blkg->dev) { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> + ? ? ? ? ? ? ? ? ? ? spin_lock_irq(&blkg->stats_lock); ? ? ? ? ? ? ? \
> >> + ? ? ? ? ? ? ? ? ? ? cgroup_total += get_stats(blkg, cb, getvar, ? ? \
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? blkg->dev); ? ? ? ? ? ? \
> >> + ? ? ? ? ? ? ? ? ? ? spin_unlock_irq(&blkg->stats_lock); ? ? ? ? ? ? \
> >> + ? ? ? ? ? ? } ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> ? ? ? } ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> + ? ? if (show_total) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> + ? ? ? ? ? ? cb->fill(cb, "Total", cgroup_total); ? ? ? ? ? ? ? ? ? ?\
> >> ? ? ? rcu_read_unlock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> ? ? ? cgroup_unlock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> >> ? ? ? return 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> >> ?}
> >>
> >> -SHOW_FUNCTION_PER_GROUP(time);
> >> -SHOW_FUNCTION_PER_GROUP(sectors);
> >> +SHOW_FUNCTION_PER_GROUP(time, blkio_get_stat, blkio_get_time_stat, 0);
> >> +SHOW_FUNCTION_PER_GROUP(sectors, blkio_get_stat, blkio_get_sectors_stat, 0);
> >> +SHOW_FUNCTION_PER_GROUP(io_service_bytes, blkio_get_typed_stat,
> >> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_service_bytes_stat, 1);
> >> +SHOW_FUNCTION_PER_GROUP(io_serviced, blkio_get_typed_stat,
> >> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_serviced_stat, 1);
> >> +SHOW_FUNCTION_PER_GROUP(io_service_time, blkio_get_typed_stat,
> >> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_service_time_stat, 1);
> >> +SHOW_FUNCTION_PER_GROUP(io_wait_time, blkio_get_typed_stat,
> >> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_wait_time_stat, 1);
> >> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
> >> -SHOW_FUNCTION_PER_GROUP(dequeue);
> >> +SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
> >> ?#endif
> >> ?#undef SHOW_FUNCTION_PER_GROUP
> >>
> >> @@ -204,7 +328,7 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
> >> ?void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
> >> ? ? ? ? ? ? ? ? ? ? ? unsigned long dequeue)
> >> ?{
> >> - ? ? blkg->dequeue += dequeue;
> >> + ? ? blkg->stats.dequeue += dequeue;
> >> ?}
> >> ?EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
> >> ?#endif
> >> @@ -217,16 +341,39 @@ struct cftype blkio_files[] = {
> >> ? ? ? },
> >> ? ? ? {
> >> ? ? ? ? ? ? ? .name = "time",
> >> - ? ? ? ? ? ? .read_seq_string = blkiocg_time_read,
> >> + ? ? ? ? ? ? .read_map = blkiocg_time_read,
> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> >> ? ? ? },
> >> ? ? ? {
> >> ? ? ? ? ? ? ? .name = "sectors",
> >> - ? ? ? ? ? ? .read_seq_string = blkiocg_sectors_read,
> >> + ? ? ? ? ? ? .read_map = blkiocg_sectors_read,
> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> >> + ? ? },
> >> + ? ? {
> >> + ? ? ? ? ? ? .name = "io_service_bytes",
> >> + ? ? ? ? ? ? .read_map = blkiocg_io_service_bytes_read,
> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> >> + ? ? },
> >> + ? ? {
> >> + ? ? ? ? ? ? .name = "io_serviced",
> >> + ? ? ? ? ? ? .read_map = blkiocg_io_serviced_read,
> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> >> + ? ? },
> >> + ? ? {
> >> + ? ? ? ? ? ? .name = "io_service_time",
> >> + ? ? ? ? ? ? .read_map = blkiocg_io_service_time_read,
> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> >> + ? ? },
> >> + ? ? {
> >> + ? ? ? ? ? ? .name = "io_wait_time",
> >> + ? ? ? ? ? ? .read_map = blkiocg_io_wait_time_read,
> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> >> ? ? ? },
> >> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
> >> ? ? ? ? {
> >> ? ? ? ? ? ? ? .name = "dequeue",
> >> - ? ? ? ? ? ? .read_seq_string = blkiocg_dequeue_read,
> >> + ? ? ? ? ? ? .read_map = blkiocg_dequeue_read,
> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
> >> ? ? ? ? },
> >> ?#endif
> >> ?};
> >> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> >> index fe44517..e8600b0 100644
> >> --- a/block/blk-cgroup.h
> >> +++ b/block/blk-cgroup.h
> >> @@ -23,6 +23,14 @@ extern struct cgroup_subsys blkio_subsys;
> >> ?#define blkio_subsys_id blkio_subsys.subsys_id
> >> ?#endif
> >>
> >> +enum io_type {
> >> + ? ? IO_READ = 0,
> >> + ? ? IO_WRITE,
> >> + ? ? IO_SYNC,
> >> + ? ? IO_ASYNC,
> >> + ? ? IO_TYPE_TOTAL
> >> +};
> >> +
> >> ?struct blkio_cgroup {
> >> ? ? ? struct cgroup_subsys_state css;
> >> ? ? ? unsigned int weight;
> >> @@ -30,6 +38,23 @@ struct blkio_cgroup {
> >> ? ? ? struct hlist_head blkg_list;
> >> ?};
> >>
> >> +struct blkio_group_stats {
> >> + ? ? /* total disk time and nr sectors dispatched by this group */
> >> + ? ? uint64_t time;
> >> + ? ? uint64_t sectors;
> >> + ? ? /* Total disk time used by IOs in ns */
> >> + ? ? uint64_t io_service_time[IO_TYPE_TOTAL];
> >> + ? ? uint64_t io_service_bytes[IO_TYPE_TOTAL]; /* Total bytes transferred */
> >> + ? ? /* Total IOs serviced, post merge */
> >> + ? ? uint64_t io_serviced[IO_TYPE_TOTAL];
> >> + ? ? /* Total time spent waiting in scheduler queue in ns */
> >> + ? ? uint64_t io_wait_time[IO_TYPE_TOTAL];
> >> +#ifdef CONFIG_DEBUG_BLK_CGROUP
> >> + ? ? /* How many times this group has been removed from service tree */
> >> + ? ? unsigned long dequeue;
> >> +#endif
> >> +};
> >> +
> >> ?struct blkio_group {
> >> ? ? ? /* An rcu protected unique identifier for the group */
> >> ? ? ? void *key;
> >> @@ -38,15 +63,13 @@ struct blkio_group {
> >> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
> >> ? ? ? /* Store cgroup path */
> >> ? ? ? char path[128];
> >> - ? ? /* How many times this group has been removed from service tree */
> >> - ? ? unsigned long dequeue;
> >> ?#endif
> >> ? ? ? /* The device MKDEV(major, minor), this group has been created for */
> >> - ? ? dev_t ? dev;
> >> + ? ? dev_t dev;
> >>
> >> - ? ? /* total disk time and nr sectors dispatched by this group */
> >> - ? ? unsigned long time;
> >> - ? ? unsigned long sectors;
> >> + ? ? /* Need to serialize the stats in the case of reset/update */
> >> + ? ? spinlock_t stats_lock;
> >> + ? ? struct blkio_group_stats stats;
> >> ?};
> >>
> >> ?typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
> >> @@ -105,8 +128,8 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> >> ?extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
> >> ?extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? void *key);
> >> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
> >> +void blkiocg_update_timeslice_used(struct blkio_group *blkg,
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
> >> ?#else
> >> ?struct cgroup;
> >> ?static inline struct blkio_cgroup *
> >> @@ -122,7 +145,7 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
> >>
> >> ?static inline struct blkio_group *
> >> ?blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
> >> -static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
> >> +static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time) {}
> >> ?#endif
> >> ?#endif /* _BLK_CGROUP_H */
> >> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> >> index c18e348..d91df9f 100644
> >> --- a/block/cfq-iosched.c
> >> +++ b/block/cfq-iosched.c
> >> @@ -914,7 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
> >>
> >> ? ? ? cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? st->min_vdisktime);
> >> - ? ? blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
> >> + ? ? blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
> >> ?}
> >>
> >> ?#ifdef CONFIG_CFQ_GROUP_IOSCHED
> >

2010-04-06 17:00:36

by Divyesh Shah

[permalink] [raw]
Subject: Re: [PATCH 2/3][v2] blkio: Add io controller stats like

On Tue, Apr 6, 2010 at 6:47 AM, Vivek Goyal <[email protected]> wrote:
> On Mon, Apr 05, 2010 at 04:29:06PM -0700, Divyesh Shah wrote:
>> On Mon, Apr 5, 2010 at 9:06 AM, Vivek Goyal <[email protected]> wrote:
>> > On Fri, Apr 02, 2010 at 06:56:35PM -0700, Divyesh Shah wrote:
>> >> - io_service_time (the actual time in ns taken by the dis to service the IO)
>> >> - io_wait_time (the time spent waiting in the IO shceduler queues before
>> >> ? getting serviced)
>> >> - io_serviced (number of IOs serviced from this blkio_group)
>> >> - io_service_bytes (Number of bytes served for this cgroup)
>> >>
>> >> These stats are accumulated per operation type helping us to distinguish between
>> >> read and write, and sync and async IO. This patch does not increment any of
>> >> these stats.
>> >>
>> >> Signed-off-by: Divyesh Shah<[email protected]>
>> >> ---
>> >>
>> >> ?Documentation/cgroups/blkio-controller.txt | ? 33 +++++
>> >> ?block/blk-cgroup.c ? ? ? ? ? ? ? ? ? ? ? ? | ?179 +++++++++++++++++++++++++---
>> >> ?block/blk-cgroup.h ? ? ? ? ? ? ? ? ? ? ? ? | ? 41 +++++-
>> >> ?block/cfq-iosched.c ? ? ? ? ? ? ? ? ? ? ? ?| ? ?2
>> >> ?4 files changed, 229 insertions(+), 26 deletions(-)
>> >>
>> >> diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
>> >> index 630879c..ededdca 100644
>> >> --- a/Documentation/cgroups/blkio-controller.txt
>> >> +++ b/Documentation/cgroups/blkio-controller.txt
>> >> @@ -75,6 +75,9 @@ CONFIG_DEBUG_BLK_CGROUP
>> >>
>> >> ?Details of cgroup files
>> >> ?=======================
>> >> +Writing an int to any of the stats files (which excludes weight) will result
>> >> +in all stats for that cgroup to be erased.
>> >> +
>> >> ?- blkio.weight
>> >> ? ? ? - Specifies per cgroup weight.
>> >>
>> >> @@ -92,6 +95,36 @@ Details of cgroup files
>> >> ? ? ? ? third field specifies the number of sectors transferred by the
>> >> ? ? ? ? group to/from the device.
>> >>
>> >> +- blkio.io_service_bytes
>> >> + ? ? - Number of bytes transferred to/from the disk by the group. These
>> >> + ? ? ? are further divided by the type of operation - read or write, sync
>> >> + ? ? ? or async. First two fields specify the major and minor number of the
>> >> + ? ? ? device, third field specifies the operation type and the fourth field
>> >> + ? ? ? specifies the number of bytes.
>> >> +
>> >> +- blkio.io_serviced
>> >> + ? ? - Number of IOs completed to/from the disk by the group. These
>> >> + ? ? ? are further divided by the type of operation - read or write, sync
>> >> + ? ? ? or async. First two fields specify the major and minor number of the
>> >> + ? ? ? device, third field specifies the operation type and the fourth field
>> >> + ? ? ? specifies the number of IOs.
>> >> +
>> >> +- blkio.io_service_time
>> >> + ? ? - Total amount of time spent by the device to service the IOs for this
>> >> + ? ? ? cgroup. This is in nanoseconds to make it meaningful for flash
>> >> + ? ? ? devices too. This time is further divided by the type of operation -
>> >> + ? ? ? read or write, sync or async. First two fields specify the major and
>> >> + ? ? ? minor number of the device, third field specifies the operation type
>> >> + ? ? ? and the fourth field specifies the io_service_time in ns.
>> >
>> > Hi Divyesh,
>> >
>> > Looking the third patch where you acutally increment the stats, this is
>> > how service time is calculated.
>> >
>> > - Save start_time in rq, when driver actually removes the request from
>> > ?request queue.
>> > - at request completion time calculate service time.
>> > ?service_time = now - start_time.
>> >
>> > This works very well if driver/device does not have NCQ and process one
>> > request at a time. But with NCQ, we can multiple requests in the driver
>> > queue at the same time then we can run into issues.
>> >
>> > - With NCQ, time becomes cumulative. So if three requests rq1, rq2 and rq3
>> > ?are in disk queue, and if requests are processed in the order rq1,rq2,rq3 by
>> > ?the disk (no parallel channles), then rq1 and rq2's completion time is
>> > ?added in rq3. So total service time of the group can be much more than
>> > ?actual time elapsed. That does not seem right. Did you face this issue in
>> > ?your testing?
>>
>> You are right. With NCQ, the io_service_time numbers aren't exact.
>> I've only tested this without NCQ for which these stats are accurate
>> and very useful. With NCQ turned on I don't see a good way of getting
>> this information accurately since only the disk knows when it actually
>> starts to service a given request. With NCQ, io_service_time will have
>> cumulative time as you pointed out which may still be useful for some
>> high-level analysis if you also have the average queue depth for the
>> device.
>>
>
> Most of the modern disks now seem to be be NCQ. But I guess we can keep
> those stats as it is useful for you on non NCQ setup.
>
>> io_wait_time stat makes sense with or w/o NCQ since its defined to
>> only include the scheduler queueing time for each IO and is cumulative
>> by design. I should probably reword the definition of io_service_time
>> to indicate time after dispatch to the driver and request completion
>> and also add a comment of how NCQ affects this stat. Sounds good?
>
> I am not sure if io_wait_time is cumulative by design. Think of this,
> somebody says that this cgroup has waited for 1s on CFQ queue but actually
> only .5 seconds have elapsed since the observation started.
>
> So I would say that please document it very properly in blkio-controller.txt
> so that it is clear what exactly these numbers represent and what to expect on
> NCQ disks.

Ok I will try to clarify further.

>
> Thanks
> Vivek
>
>>
>> -Divyesh
>>
>> >
>> > Same is the case with io_wait_time, Because time is cumulative, actual
>> > io_wait_time, can be more than real time elapsed.
>> >
>> > Is this the intention and you are finding even cumulative time useful?
>> >
>> >> +
>> >> +- blkio.io_wait_time
>> >> + ? ? - Total amount of time the IO spent waiting in the scheduler queues for
>> >> + ? ? ? service. This is in nanoseconds to make it meaningful for flash
>> >> + ? ? ? devices too. This time is further divided by the type of operation -
>> >> + ? ? ? read or write, sync or async. First two fields specify the major and
>> >> + ? ? ? minor number of the device, third field specifies the operation type
>> >> + ? ? ? and the fourth field specifies the io_wait_time in ns.
>> >> +
>> >> ?- blkio.dequeue
>> >> ? ? ? - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This
>> >> ? ? ? ? gives the statistics about how many a times a group was dequeued
>> >> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>> >> index 5be3981..cac10b2 100644
>> >> --- a/block/blk-cgroup.c
>> >> +++ b/block/blk-cgroup.c
>> >> @@ -17,6 +17,8 @@
>> >> ?#include <linux/err.h>
>> >> ?#include "blk-cgroup.h"
>> >>
>> >> +#define MAX_KEY_LEN 100
>> >> +
>> >> ?static DEFINE_SPINLOCK(blkio_list_lock);
>> >> ?static LIST_HEAD(blkio_list);
>> >>
>> >> @@ -55,12 +57,15 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>> >> ?}
>> >> ?EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
>> >>
>> >> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time)
>> >> +void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
>> >> ?{
>> >> - ? ? blkg->time += time;
>> >> + ? ? unsigned long flags;
>> >> +
>> >> + ? ? spin_lock_irqsave(&blkg->stats_lock, flags);
>> >> + ? ? blkg->stats.time += time;
>> >> + ? ? spin_unlock_irqrestore(&blkg->stats_lock, flags);
>> >> ?}
>> >> -EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);
>> >> +EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
>> >>
>> >> ?void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
>> >> ? ? ? ? ? ? ? ? ? ? ? struct blkio_group *blkg, void *key, dev_t dev)
>> >> @@ -170,13 +175,119 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>> >> ? ? ? return 0;
>> >> ?}
>> >>
>> >> -#define SHOW_FUNCTION_PER_GROUP(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> +static int
>> >> +blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>> >> +{
>> >> + ? ? struct blkio_cgroup *blkcg;
>> >> + ? ? struct blkio_group *blkg;
>> >> + ? ? struct hlist_node *n;
>> >> + ? ? struct blkio_group_stats *stats;
>> >> +
>> >> + ? ? blkcg = cgroup_to_blkio_cgroup(cgroup);
>> >> + ? ? spin_lock_irq(&blkcg->lock);
>> >> + ? ? hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> >> + ? ? ? ? ? ? spin_lock(&blkg->stats_lock);
>> >> + ? ? ? ? ? ? stats = &blkg->stats;
>> >> + ? ? ? ? ? ? memset(stats, 0, sizeof(struct blkio_group_stats));
>> >> + ? ? ? ? ? ? spin_unlock(&blkg->stats_lock);
>> >> + ? ? }
>> >> + ? ? spin_unlock_irq(&blkcg->lock);
>> >> + ? ? return 0;
>> >> +}
>> >> +
>> >> +static void blkio_get_key_name(int type, dev_t dev, char *str, int chars_left)
>> >> +{
>> >> + ? ? snprintf(str, chars_left, "%u:%u", MAJOR(dev), MINOR(dev));
>> >> + ? ? chars_left -= strlen(str);
>> >> + ? ? if (chars_left <= 0) {
>> >> + ? ? ? ? ? ? printk(KERN_WARNING
>> >> + ? ? ? ? ? ? ? ? ? ? "Possibly incorrect cgroup stat display format");
>> >> + ? ? ? ? ? ? return;
>> >> + ? ? }
>> >> + ? ? switch (type) {
>> >> + ? ? case IO_READ:
>> >> + ? ? ? ? ? ? strlcat(str, " Read", chars_left);
>> >> + ? ? ? ? ? ? break;
>> >> + ? ? case IO_WRITE:
>> >> + ? ? ? ? ? ? strlcat(str, " Write", chars_left);
>> >> + ? ? ? ? ? ? break;
>> >> + ? ? case IO_SYNC:
>> >> + ? ? ? ? ? ? strlcat(str, " Sync", chars_left);
>> >> + ? ? ? ? ? ? break;
>> >> + ? ? case IO_ASYNC:
>> >> + ? ? ? ? ? ? strlcat(str, " Async", chars_left);
>> >> + ? ? ? ? ? ? break;
>> >> + ? ? case IO_TYPE_TOTAL:
>> >> + ? ? ? ? ? ? strlcat(str, " Total", chars_left);
>> >> + ? ? ? ? ? ? break;
>> >> + ? ? default:
>> >> + ? ? ? ? ? ? strlcat(str, " Invalid", chars_left);
>> >> + ? ? }
>> >> +}
>> >> +
>> >> +typedef uint64_t (get_var) (struct blkio_group *, int);
>> >> +
>> >> +static uint64_t blkio_get_typed_stat(struct blkio_group *blkg,
>> >> + ? ? ? ? ? ? struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
>> >> +{
>> >> + ? ? uint64_t disk_total;
>> >> + ? ? char key_str[MAX_KEY_LEN];
>> >> + ? ? int type;
>> >> +
>> >> + ? ? for (type = 0; type < IO_TYPE_TOTAL; type++) {
>> >> + ? ? ? ? ? ? blkio_get_key_name(type, dev, key_str, MAX_KEY_LEN);
>> >> + ? ? ? ? ? ? cb->fill(cb, key_str, getvar(blkg, type));
>> >> + ? ? }
>> >> + ? ? disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE);
>> >> + ? ? blkio_get_key_name(IO_TYPE_TOTAL, dev, key_str, MAX_KEY_LEN);
>> >> + ? ? cb->fill(cb, key_str, disk_total);
>> >> + ? ? return disk_total;
>> >> +}
>> >> +
>> >> +static uint64_t blkio_get_stat(struct blkio_group *blkg,
>> >> + ? ? ? ? ? ? struct cgroup_map_cb *cb, get_var *getvar, dev_t dev)
>> >> +{
>> >> + ? ? uint64_t var = getvar(blkg, 0);
>> >> + ? ? char str[10];
>> >> +
>> >> + ? ? snprintf(str, 10, "%u:%u", MAJOR(dev), MINOR(dev));
>> >> + ? ? cb->fill(cb, str, var);
>> >> + ? ? return var;
>> >> +}
>> >> +
>> >> +#define GET_STAT_INDEXED(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int type) ? ? ? ?\
>> >> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> + ? ? return blkg->stats.__VAR[type]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> +} ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> +
>> >> +GET_STAT_INDEXED(io_service_bytes);
>> >> +GET_STAT_INDEXED(io_serviced);
>> >> +GET_STAT_INDEXED(io_service_time);
>> >> +GET_STAT_INDEXED(io_wait_time);
>> >> +#undef GET_STAT_INDEXED
>> >> +
>> >> +#define GET_STAT(__VAR) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> +uint64_t blkio_get_##__VAR##_stat(struct blkio_group *blkg, int dummy) ? ? ? \
>> >> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> + ? ? return blkg->stats.__VAR; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> +}
>> >> +
>> >> +GET_STAT(time);
>> >> +GET_STAT(sectors);
>> >> +#ifdef CONFIG_DEBUG_BLK_CGROUP
>> >> +GET_STAT(dequeue);
>> >> +#endif
>> >> +#undef GET_STAT
>> >> +
>> >> +#define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) ? ? ? ?\
>> >> ?static int blkiocg_##__VAR##_read(struct cgroup *cgroup, ? ? ? ? ? ? \
>> >> - ? ? ? ? ? ? ? ? ? ? struct cftype *cftype, struct seq_file *m) ? ? ?\
>> >> + ? ? ? ? ? ? struct cftype *cftype, struct cgroup_map_cb *cb) ? ? ? ?\
>> >> ?{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> ? ? ? struct blkio_cgroup *blkcg; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> ? ? ? struct blkio_group *blkg; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> ? ? ? struct hlist_node *n; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> + ? ? uint64_t cgroup_total = 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> ? ? ? if (!cgroup_lock_live_group(cgroup)) ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> ? ? ? ? ? ? ? return -ENODEV; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> @@ -184,19 +295,32 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, ? ? ? ? ? ? ? ?\
>> >> ? ? ? blkcg = cgroup_to_blkio_cgroup(cgroup); ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> ? ? ? rcu_read_lock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> ? ? ? hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
>> >> - ? ? ? ? ? ? if (blkg->dev) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> - ? ? ? ? ? ? ? ? ? ? seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), ?\
>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?MINOR(blkg->dev), blkg->__VAR); ? ? ? ?\
>> >> + ? ? ? ? ? ? if (blkg->dev) { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> + ? ? ? ? ? ? ? ? ? ? spin_lock_irq(&blkg->stats_lock); ? ? ? ? ? ? ? \
>> >> + ? ? ? ? ? ? ? ? ? ? cgroup_total += get_stats(blkg, cb, getvar, ? ? \
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? blkg->dev); ? ? ? ? ? ? \
>> >> + ? ? ? ? ? ? ? ? ? ? spin_unlock_irq(&blkg->stats_lock); ? ? ? ? ? ? \
>> >> + ? ? ? ? ? ? } ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> ? ? ? } ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> + ? ? if (show_total) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> + ? ? ? ? ? ? cb->fill(cb, "Total", cgroup_total); ? ? ? ? ? ? ? ? ? ?\
>> >> ? ? ? rcu_read_unlock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> ? ? ? cgroup_unlock(); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> >> ? ? ? return 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> >> ?}
>> >>
>> >> -SHOW_FUNCTION_PER_GROUP(time);
>> >> -SHOW_FUNCTION_PER_GROUP(sectors);
>> >> +SHOW_FUNCTION_PER_GROUP(time, blkio_get_stat, blkio_get_time_stat, 0);
>> >> +SHOW_FUNCTION_PER_GROUP(sectors, blkio_get_stat, blkio_get_sectors_stat, 0);
>> >> +SHOW_FUNCTION_PER_GROUP(io_service_bytes, blkio_get_typed_stat,
>> >> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_service_bytes_stat, 1);
>> >> +SHOW_FUNCTION_PER_GROUP(io_serviced, blkio_get_typed_stat,
>> >> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_serviced_stat, 1);
>> >> +SHOW_FUNCTION_PER_GROUP(io_service_time, blkio_get_typed_stat,
>> >> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_service_time_stat, 1);
>> >> +SHOW_FUNCTION_PER_GROUP(io_wait_time, blkio_get_typed_stat,
>> >> + ? ? ? ? ? ? ? ? ? ? blkio_get_io_wait_time_stat, 1);
>> >> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
>> >> -SHOW_FUNCTION_PER_GROUP(dequeue);
>> >> +SHOW_FUNCTION_PER_GROUP(dequeue, blkio_get_stat, blkio_get_dequeue_stat, 0);
>> >> ?#endif
>> >> ?#undef SHOW_FUNCTION_PER_GROUP
>> >>
>> >> @@ -204,7 +328,7 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
>> >> ?void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>> >> ? ? ? ? ? ? ? ? ? ? ? unsigned long dequeue)
>> >> ?{
>> >> - ? ? blkg->dequeue += dequeue;
>> >> + ? ? blkg->stats.dequeue += dequeue;
>> >> ?}
>> >> ?EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>> >> ?#endif
>> >> @@ -217,16 +341,39 @@ struct cftype blkio_files[] = {
>> >> ? ? ? },
>> >> ? ? ? {
>> >> ? ? ? ? ? ? ? .name = "time",
>> >> - ? ? ? ? ? ? .read_seq_string = blkiocg_time_read,
>> >> + ? ? ? ? ? ? .read_map = blkiocg_time_read,
>> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> >> ? ? ? },
>> >> ? ? ? {
>> >> ? ? ? ? ? ? ? .name = "sectors",
>> >> - ? ? ? ? ? ? .read_seq_string = blkiocg_sectors_read,
>> >> + ? ? ? ? ? ? .read_map = blkiocg_sectors_read,
>> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> >> + ? ? },
>> >> + ? ? {
>> >> + ? ? ? ? ? ? .name = "io_service_bytes",
>> >> + ? ? ? ? ? ? .read_map = blkiocg_io_service_bytes_read,
>> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> >> + ? ? },
>> >> + ? ? {
>> >> + ? ? ? ? ? ? .name = "io_serviced",
>> >> + ? ? ? ? ? ? .read_map = blkiocg_io_serviced_read,
>> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> >> + ? ? },
>> >> + ? ? {
>> >> + ? ? ? ? ? ? .name = "io_service_time",
>> >> + ? ? ? ? ? ? .read_map = blkiocg_io_service_time_read,
>> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> >> + ? ? },
>> >> + ? ? {
>> >> + ? ? ? ? ? ? .name = "io_wait_time",
>> >> + ? ? ? ? ? ? .read_map = blkiocg_io_wait_time_read,
>> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> >> ? ? ? },
>> >> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
>> >> ? ? ? ? {
>> >> ? ? ? ? ? ? ? .name = "dequeue",
>> >> - ? ? ? ? ? ? .read_seq_string = blkiocg_dequeue_read,
>> >> + ? ? ? ? ? ? .read_map = blkiocg_dequeue_read,
>> >> + ? ? ? ? ? ? .write_u64 = blkiocg_reset_write,
>> >> ? ? ? ? },
>> >> ?#endif
>> >> ?};
>> >> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
>> >> index fe44517..e8600b0 100644
>> >> --- a/block/blk-cgroup.h
>> >> +++ b/block/blk-cgroup.h
>> >> @@ -23,6 +23,14 @@ extern struct cgroup_subsys blkio_subsys;
>> >> ?#define blkio_subsys_id blkio_subsys.subsys_id
>> >> ?#endif
>> >>
>> >> +enum io_type {
>> >> + ? ? IO_READ = 0,
>> >> + ? ? IO_WRITE,
>> >> + ? ? IO_SYNC,
>> >> + ? ? IO_ASYNC,
>> >> + ? ? IO_TYPE_TOTAL
>> >> +};
>> >> +
>> >> ?struct blkio_cgroup {
>> >> ? ? ? struct cgroup_subsys_state css;
>> >> ? ? ? unsigned int weight;
>> >> @@ -30,6 +38,23 @@ struct blkio_cgroup {
>> >> ? ? ? struct hlist_head blkg_list;
>> >> ?};
>> >>
>> >> +struct blkio_group_stats {
>> >> + ? ? /* total disk time and nr sectors dispatched by this group */
>> >> + ? ? uint64_t time;
>> >> + ? ? uint64_t sectors;
>> >> + ? ? /* Total disk time used by IOs in ns */
>> >> + ? ? uint64_t io_service_time[IO_TYPE_TOTAL];
>> >> + ? ? uint64_t io_service_bytes[IO_TYPE_TOTAL]; /* Total bytes transferred */
>> >> + ? ? /* Total IOs serviced, post merge */
>> >> + ? ? uint64_t io_serviced[IO_TYPE_TOTAL];
>> >> + ? ? /* Total time spent waiting in scheduler queue in ns */
>> >> + ? ? uint64_t io_wait_time[IO_TYPE_TOTAL];
>> >> +#ifdef CONFIG_DEBUG_BLK_CGROUP
>> >> + ? ? /* How many times this group has been removed from service tree */
>> >> + ? ? unsigned long dequeue;
>> >> +#endif
>> >> +};
>> >> +
>> >> ?struct blkio_group {
>> >> ? ? ? /* An rcu protected unique identifier for the group */
>> >> ? ? ? void *key;
>> >> @@ -38,15 +63,13 @@ struct blkio_group {
>> >> ?#ifdef CONFIG_DEBUG_BLK_CGROUP
>> >> ? ? ? /* Store cgroup path */
>> >> ? ? ? char path[128];
>> >> - ? ? /* How many times this group has been removed from service tree */
>> >> - ? ? unsigned long dequeue;
>> >> ?#endif
>> >> ? ? ? /* The device MKDEV(major, minor), this group has been created for */
>> >> - ? ? dev_t ? dev;
>> >> + ? ? dev_t dev;
>> >>
>> >> - ? ? /* total disk time and nr sectors dispatched by this group */
>> >> - ? ? unsigned long time;
>> >> - ? ? unsigned long sectors;
>> >> + ? ? /* Need to serialize the stats in the case of reset/update */
>> >> + ? ? spinlock_t stats_lock;
>> >> + ? ? struct blkio_group_stats stats;
>> >> ?};
>> >>
>> >> ?typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>> >> @@ -105,8 +128,8 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
>> >> ?extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
>> >> ?extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? void *key);
>> >> -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
>> >> +void blkiocg_update_timeslice_used(struct blkio_group *blkg,
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time);
>> >> ?#else
>> >> ?struct cgroup;
>> >> ?static inline struct blkio_cgroup *
>> >> @@ -122,7 +145,7 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
>> >>
>> >> ?static inline struct blkio_group *
>> >> ?blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
>> >> -static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
>> >> +static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long time) {}
>> >> ?#endif
>> >> ?#endif /* _BLK_CGROUP_H */
>> >> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> >> index c18e348..d91df9f 100644
>> >> --- a/block/cfq-iosched.c
>> >> +++ b/block/cfq-iosched.c
>> >> @@ -914,7 +914,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
>> >>
>> >> ? ? ? cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? st->min_vdisktime);
>> >> - ? ? blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl);
>> >> + ? ? blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
>> >> ?}
>> >>
>> >> ?#ifdef CONFIG_CFQ_GROUP_IOSCHED
>> >
>