This patch adds "use_hierarchy" in Root CGroup with out any functionality.
Signed-off-by: Gui Jianfeng <[email protected]>
---
block/blk-cgroup.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++--
block/blk-cgroup.h | 5 +++-
block/cfq-iosched.c | 24 +++++++++++++++++
3 files changed, 97 insertions(+), 4 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 455768a..9747ebb 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -25,7 +25,10 @@
static DEFINE_SPINLOCK(blkio_list_lock);
static LIST_HEAD(blkio_list);
-struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
+struct blkio_cgroup blkio_root_cgroup = {
+ .weight = 2*BLKIO_WEIGHT_DEFAULT,
+ .use_hierarchy = 1,
+ };
EXPORT_SYMBOL_GPL(blkio_root_cgroup);
static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
@@ -1385,10 +1388,73 @@ struct cftype blkio_files[] = {
#endif
};
+static u64 blkiocg_use_hierarchy_read(struct cgroup *cgroup,
+ struct cftype *cftype)
+{
+ struct blkio_cgroup *blkcg;
+
+ blkcg = cgroup_to_blkio_cgroup(cgroup);
+ return (u64)blkcg->use_hierarchy;
+}
+
+static int
+blkiocg_use_hierarchy_write(struct cgroup *cgroup,
+ struct cftype *cftype, u64 val)
+{
+ struct blkio_cgroup *blkcg;
+ struct blkio_group *blkg;
+ struct hlist_node *n;
+ struct blkio_policy_type *blkiop;
+
+ blkcg = cgroup_to_blkio_cgroup(cgroup);
+
+ if (val > 1 || !list_empty(&cgroup->children))
+ return -EINVAL;
+
+ if (blkcg->use_hierarchy == val)
+ return 0;
+
+ spin_lock(&blkio_list_lock);
+ blkcg->use_hierarchy = val;
+
+ hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+ list_for_each_entry(blkiop, &blkio_list, list) {
+ /*
+ * If this policy does not own the blkg, do not change
+ * cfq group scheduling mode.
+ */
+ if (blkiop->plid != blkg->plid)
+ continue;
+
+ if (blkiop->ops.blkio_update_use_hierarchy_fn)
+ blkiop->ops.blkio_update_use_hierarchy_fn(blkg,
+ val);
+ }
+ }
+ spin_unlock(&blkio_list_lock);
+ return 0;
+}
+
+static struct cftype blkio_use_hierarchy = {
+ .name = "use_hierarchy",
+ .read_u64 = blkiocg_use_hierarchy_read,
+ .write_u64 = blkiocg_use_hierarchy_write,
+};
+
static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
{
- return cgroup_add_files(cgroup, subsys, blkio_files,
- ARRAY_SIZE(blkio_files));
+ int ret;
+
+ ret = cgroup_add_files(cgroup, subsys, blkio_files,
+ ARRAY_SIZE(blkio_files));
+ if (ret)
+ return ret;
+
+ /* use_hierarchy is in root cgroup only. */
+ if (!cgroup->parent)
+ ret = cgroup_add_file(cgroup, subsys, &blkio_use_hierarchy);
+
+ return ret;
}
static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ea4861b..c8caf4e 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -105,6 +105,7 @@ enum blkcg_file_name_throtl {
struct blkio_cgroup {
struct cgroup_subsys_state css;
unsigned int weight;
+ bool use_hierarchy;
spinlock_t lock;
struct hlist_head blkg_list;
struct list_head policy_list; /* list of blkio_policy_node */
@@ -200,7 +201,8 @@ typedef void (blkio_update_group_read_iops_fn) (void *key,
struct blkio_group *blkg, unsigned int read_iops);
typedef void (blkio_update_group_write_iops_fn) (void *key,
struct blkio_group *blkg, unsigned int write_iops);
-
+typedef void (blkio_update_use_hierarchy_fn) (struct blkio_group *blkg,
+ bool val);
struct blkio_policy_ops {
blkio_unlink_group_fn *blkio_unlink_group_fn;
blkio_update_group_weight_fn *blkio_update_group_weight_fn;
@@ -208,6 +210,7 @@ struct blkio_policy_ops {
blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
+ blkio_update_use_hierarchy_fn *blkio_update_use_hierarchy_fn;
};
struct blkio_policy_type {
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d90627e..08323f5 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -192,6 +192,9 @@ struct cfq_group {
/* cfq group sched entity */
struct cfq_entity cfqe;
+ /* parent cfq_data */
+ struct cfq_data *cfqd;
+
/* number of cfqq currently on this group */
int nr_cfqq;
@@ -235,6 +238,9 @@ struct cfq_data {
struct request_queue *queue;
struct cfq_group root_group;
+ /* cfq group schedule in flat or hierarchy manner. */
+ bool use_hierarchy;
+
/*
* The priority currently being served
*/
@@ -1091,6 +1097,15 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
cfqg_of_blkg(blkg)->cfqe.weight = weight;
}
+void
+cfq_update_blkio_use_hierarchy(struct blkio_group *blkg, bool val)
+{
+ struct cfq_group *cfqg;
+
+ cfqg = cfqg_of_blkg(blkg);
+ cfqg->cfqd->use_hierarchy = val;
+}
+
static void init_cfqe(struct blkio_cgroup *blkcg,
struct cfq_group *cfqg)
{
@@ -1121,6 +1136,9 @@ static void init_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg,
*/
atomic_set(&cfqg->ref, 1);
+ /* Setup cfq data for cfq group */
+ cfqg->cfqd = cfqd;
+
/* Add group onto cgroup list */
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
@@ -4164,6 +4182,7 @@ static void *cfq_init_queue(struct request_queue *q)
/* Init root group */
cfqg = &cfqd->root_group;
+ cfqg->cfqd = cfqd;
for_each_cfqg_st(cfqg, i, j, st)
*st = CFQ_RB_ROOT;
RB_CLEAR_NODE(&cfqg->cfqe.rb_node);
@@ -4224,6 +4243,10 @@ static void *cfq_init_queue(struct request_queue *q)
cfqd->cfq_latency = 1;
cfqd->cfq_group_isolation = 0;
cfqd->hw_tag = -1;
+
+ /* hierarchical scheduling for cfq group by default */
+ cfqd->use_hierarchy = 1;
+
/*
* we optimistically start assuming sync ops weren't delayed in last
* second, in order to have larger depth for async operations.
@@ -4386,6 +4409,7 @@ static struct blkio_policy_type blkio_policy_cfq = {
.ops = {
.blkio_unlink_group_fn = cfq_unlink_blkio_group,
.blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
+ .blkio_update_use_hierarchy_fn = cfq_update_blkio_use_hierarchy,
},
.plid = BLKIO_POLICY_PROP,
};
--
1.6.5.2
On Mon, Dec 13, 2010 at 09:45:07AM +0800, Gui Jianfeng wrote:
> This patch adds "use_hierarchy" in Root CGroup with out any functionality.
>
> Signed-off-by: Gui Jianfeng <[email protected]>
> ---
> block/blk-cgroup.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++--
> block/blk-cgroup.h | 5 +++-
> block/cfq-iosched.c | 24 +++++++++++++++++
> 3 files changed, 97 insertions(+), 4 deletions(-)
>
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index 455768a..9747ebb 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -25,7 +25,10 @@
> static DEFINE_SPINLOCK(blkio_list_lock);
> static LIST_HEAD(blkio_list);
>
> -struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
> +struct blkio_cgroup blkio_root_cgroup = {
> + .weight = 2*BLKIO_WEIGHT_DEFAULT,
> + .use_hierarchy = 1,
Currently flat mode is the default. Lets not change the default. So lets
start with use_hierarchy = 0.
Secondly, why don't you make it per cgroup something along the lines of
memory controller where one can start the hierarchy lower in the cgroup
chain and not necessarily at the root. This way we can avoid some
accounting overhead for all the groups which are non-hierarchical.
> + };
> EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>
> static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
> @@ -1385,10 +1388,73 @@ struct cftype blkio_files[] = {
> #endif
> };
>
> +static u64 blkiocg_use_hierarchy_read(struct cgroup *cgroup,
> + struct cftype *cftype)
> +{
> + struct blkio_cgroup *blkcg;
> +
> + blkcg = cgroup_to_blkio_cgroup(cgroup);
> + return (u64)blkcg->use_hierarchy;
> +}
> +
> +static int
> +blkiocg_use_hierarchy_write(struct cgroup *cgroup,
> + struct cftype *cftype, u64 val)
> +{
> + struct blkio_cgroup *blkcg;
> + struct blkio_group *blkg;
> + struct hlist_node *n;
> + struct blkio_policy_type *blkiop;
> +
> + blkcg = cgroup_to_blkio_cgroup(cgroup);
> +
> + if (val > 1 || !list_empty(&cgroup->children))
> + return -EINVAL;
> +
> + if (blkcg->use_hierarchy == val)
> + return 0;
> +
> + spin_lock(&blkio_list_lock);
> + blkcg->use_hierarchy = val;
> +
> + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> + list_for_each_entry(blkiop, &blkio_list, list) {
> + /*
> + * If this policy does not own the blkg, do not change
> + * cfq group scheduling mode.
> + */
> + if (blkiop->plid != blkg->plid)
> + continue;
> +
> + if (blkiop->ops.blkio_update_use_hierarchy_fn)
> + blkiop->ops.blkio_update_use_hierarchy_fn(blkg,
> + val);
Should we really allow this? I mean allow changing hierarchy of a group
when there are already children groups. I think memory controller does
not allow this. We can design along the same lines. Keep use_hierarchy
as 0 by default. Allow changing it only if there are no children cgroups.
Otherwise we shall have to send notifications to subscribing policies
and then change their structure etc. Lets keep it simple.
I was playing with a use_hierarhcy patch for throttling and parts have been
copied from memory controller. I am attaching that with the mail and see if
you can make that working.
---
block/blk-cgroup.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
block/blk-cgroup.h | 2 +
2 files changed, 60 insertions(+), 1 deletion(-)
Index: linux-2.6/block/blk-cgroup.c
===================================================================
--- linux-2.6.orig/block/blk-cgroup.c 2010-11-19 10:30:27.129704770 -0500
+++ linux-2.6/block/blk-cgroup.c 2010-11-19 10:30:29.885671705 -0500
@@ -1214,6 +1214,39 @@ static int blkio_weight_write(struct blk
return 0;
}
+static int blkio_throtl_use_hierarchy_write(struct cgroup *cgrp, u64 val)
+{
+ struct cgroup *parent = cgrp->parent;
+ struct blkio_cgroup *blkcg, *parent_blkcg;
+ int ret = 0;
+
+ if (val != 0 || val != 1)
+ return -EINVAL;
+
+ blkcg = cgroup_to_blkio_cgroup(cgrp);
+ if (parent)
+ parent_blkcg = cgroup_to_blkio_cgroup(parent);
+
+ cgroup_lock();
+ /*
+ * If parent's use_hierarchy is set, we can't make any modifications
+ * in the child subtrees. If it is unset, then the change can
+ * occur, provided the current cgroup has no children.
+ *
+ * For the root cgroup, parent_mem is NULL, we allow value to be
+ * set if there are no children.
+ */
+ if (!parent_blkcg || !parent_blkcg->throtl_use_hier) {
+ if (list_empty(&cgrp->children))
+ blkcg->throtl_use_hier = val;
+ else
+ ret = -EBUSY;
+ } else
+ ret = -EINVAL;
+ cgroup_unlock();
+ return ret;
+}
+
static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
struct blkio_cgroup *blkcg;
enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
@@ -1228,6 +1261,12 @@ static u64 blkiocg_file_read_u64 (struct
return (u64)blkcg->weight;
}
break;
+ case BLKIO_POLICY_THROTL:
+ switch(name) {
+ case BLKIO_THROTL_use_hierarchy:
+ return (u64)blkcg->throtl_use_hier;
+ }
+ break;
default:
BUG();
}
@@ -1250,6 +1289,12 @@ blkiocg_file_write_u64(struct cgroup *cg
return blkio_weight_write(blkcg, val);
}
break;
+ case BLKIO_POLICY_THROTL:
+ switch(name) {
+ case BLKIO_THROTL_use_hierarchy:
+ return blkio_throtl_use_hierarchy_write(cgrp, val);
+ }
+ break;
default:
BUG();
}
@@ -1373,6 +1418,13 @@ struct cftype blkio_files[] = {
BLKIO_THROTL_io_serviced),
.read_map = blkiocg_file_read_map,
},
+ {
+ .name = "throttle.use_hierarchy",
+ .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
+ BLKIO_THROTL_use_hierarchy),
+ .read_u64 = blkiocg_file_read_u64,
+ .write_u64 = blkiocg_file_write_u64,
+ },
#endif /* CONFIG_BLK_DEV_THROTTLING */
#ifdef CONFIG_DEBUG_BLK_CGROUP
@@ -1470,7 +1522,7 @@ static void blkiocg_destroy(struct cgrou
static struct cgroup_subsys_state *
blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
{
- struct blkio_cgroup *blkcg;
+ struct blkio_cgroup *blkcg, *parent_blkcg = NULL;
struct cgroup *parent = cgroup->parent;
if (!parent) {
@@ -1483,11 +1535,16 @@ blkiocg_create(struct cgroup_subsys *sub
return ERR_PTR(-ENOMEM);
blkcg->weight = BLKIO_WEIGHT_DEFAULT;
+ parent_blkcg = cgroup_to_blkio_cgroup(parent);
done:
spin_lock_init(&blkcg->lock);
INIT_HLIST_HEAD(&blkcg->blkg_list);
INIT_LIST_HEAD(&blkcg->policy_list);
+ if (parent)
+ blkcg->throtl_use_hier = parent_blkcg->throtl_use_hier;
+ else
+ blkcg->throtl_use_hier = 0;
return &blkcg->css;
}
Index: linux-2.6/block/blk-cgroup.h
===================================================================
--- linux-2.6.orig/block/blk-cgroup.h 2010-11-19 10:15:56.321149940 -0500
+++ linux-2.6/block/blk-cgroup.h 2010-11-19 10:30:29.885671705 -0500
@@ -100,11 +100,13 @@ enum blkcg_file_name_throtl {
BLKIO_THROTL_write_iops_device,
BLKIO_THROTL_io_service_bytes,
BLKIO_THROTL_io_serviced,
+ BLKIO_THROTL_use_hierarchy,
};
struct blkio_cgroup {
struct cgroup_subsys_state css;
unsigned int weight;
+ bool throtl_use_hier;
spinlock_t lock;
struct hlist_head blkg_list;
/*
Vivek Goyal wrote:
> On Mon, Dec 13, 2010 at 09:45:07AM +0800, Gui Jianfeng wrote:
>> This patch adds "use_hierarchy" in Root CGroup with out any functionality.
>>
>> Signed-off-by: Gui Jianfeng <[email protected]>
>> ---
>> block/blk-cgroup.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++--
>> block/blk-cgroup.h | 5 +++-
>> block/cfq-iosched.c | 24 +++++++++++++++++
>> 3 files changed, 97 insertions(+), 4 deletions(-)
>>
>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>> index 455768a..9747ebb 100644
>> --- a/block/blk-cgroup.c
>> +++ b/block/blk-cgroup.c
>> @@ -25,7 +25,10 @@
>> static DEFINE_SPINLOCK(blkio_list_lock);
>> static LIST_HEAD(blkio_list);
>>
>> -struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>> +struct blkio_cgroup blkio_root_cgroup = {
>> + .weight = 2*BLKIO_WEIGHT_DEFAULT,
>> + .use_hierarchy = 1,
>
> Currently flat mode is the default. Lets not change the default. So lets
> start with use_hierarchy = 0.
OK, will do.
>
> Secondly, why don't you make it per cgroup something along the lines of
> memory controller where one can start the hierarchy lower in the cgroup
> chain and not necessarily at the root. This way we can avoid some
> accounting overhead for all the groups which are non-hierarchical.
I'm not sure whether there's a actual use case that needs per cgroup "use_hierarchy".
So for first step, I just give a global "use_hierarchy" in root group. If there're
some actual requirements that need per cgroup "use_hierarchy". We may add the feature
later.
>
>> + };
>> EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>>
>> static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
>> @@ -1385,10 +1388,73 @@ struct cftype blkio_files[] = {
>> #endif
>> };
>>
>> +static u64 blkiocg_use_hierarchy_read(struct cgroup *cgroup,
>> + struct cftype *cftype)
>> +{
>> + struct blkio_cgroup *blkcg;
>> +
>> + blkcg = cgroup_to_blkio_cgroup(cgroup);
>> + return (u64)blkcg->use_hierarchy;
>> +}
>> +
>> +static int
>> +blkiocg_use_hierarchy_write(struct cgroup *cgroup,
>> + struct cftype *cftype, u64 val)
>> +{
>> + struct blkio_cgroup *blkcg;
>> + struct blkio_group *blkg;
>> + struct hlist_node *n;
>> + struct blkio_policy_type *blkiop;
>> +
>> + blkcg = cgroup_to_blkio_cgroup(cgroup);
>> +
>> + if (val > 1 || !list_empty(&cgroup->children))
>> + return -EINVAL;
>> +
>> + if (blkcg->use_hierarchy == val)
>> + return 0;
>> +
>> + spin_lock(&blkio_list_lock);
>> + blkcg->use_hierarchy = val;
>> +
>> + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> + list_for_each_entry(blkiop, &blkio_list, list) {
>> + /*
>> + * If this policy does not own the blkg, do not change
>> + * cfq group scheduling mode.
>> + */
>> + if (blkiop->plid != blkg->plid)
>> + continue;
>> +
>> + if (blkiop->ops.blkio_update_use_hierarchy_fn)
>> + blkiop->ops.blkio_update_use_hierarchy_fn(blkg,
>> + val);
>
> Should we really allow this? I mean allow changing hierarchy of a group
> when there are already children groups. I think memory controller does
> not allow this. We can design along the same lines. Keep use_hierarchy
> as 0 by default. Allow changing it only if there are no children cgroups.
> Otherwise we shall have to send notifications to subscribing policies
> and then change their structure etc. Lets keep it simple.
Yes, I really don't allow changing use_hierarchy if there are childre cgroups.
Please consider following line in my patch.
if (val > 1 || !list_empty(&cgroup->children))
return -EINVAL;
>
> I was playing with a use_hierarhcy patch for throttling and parts have been
> copied from memory controller. I am attaching that with the mail and see if
> you can make that working.
Thanks
Gui
>
> ---
> block/blk-cgroup.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> block/blk-cgroup.h | 2 +
> 2 files changed, 60 insertions(+), 1 deletion(-)
>
> Index: linux-2.6/block/blk-cgroup.c
> ===================================================================
> --- linux-2.6.orig/block/blk-cgroup.c 2010-11-19 10:30:27.129704770 -0500
> +++ linux-2.6/block/blk-cgroup.c 2010-11-19 10:30:29.885671705 -0500
> @@ -1214,6 +1214,39 @@ static int blkio_weight_write(struct blk
> return 0;
> }
>
> +static int blkio_throtl_use_hierarchy_write(struct cgroup *cgrp, u64 val)
> +{
> + struct cgroup *parent = cgrp->parent;
> + struct blkio_cgroup *blkcg, *parent_blkcg;
> + int ret = 0;
> +
> + if (val != 0 || val != 1)
> + return -EINVAL;
> +
> + blkcg = cgroup_to_blkio_cgroup(cgrp);
> + if (parent)
> + parent_blkcg = cgroup_to_blkio_cgroup(parent);
> +
> + cgroup_lock();
> + /*
> + * If parent's use_hierarchy is set, we can't make any modifications
> + * in the child subtrees. If it is unset, then the change can
> + * occur, provided the current cgroup has no children.
> + *
> + * For the root cgroup, parent_mem is NULL, we allow value to be
> + * set if there are no children.
> + */
> + if (!parent_blkcg || !parent_blkcg->throtl_use_hier) {
> + if (list_empty(&cgrp->children))
> + blkcg->throtl_use_hier = val;
> + else
> + ret = -EBUSY;
> + } else
> + ret = -EINVAL;
> + cgroup_unlock();
> + return ret;
> +}
> +
> static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
> struct blkio_cgroup *blkcg;
> enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
> @@ -1228,6 +1261,12 @@ static u64 blkiocg_file_read_u64 (struct
> return (u64)blkcg->weight;
> }
> break;
> + case BLKIO_POLICY_THROTL:
> + switch(name) {
> + case BLKIO_THROTL_use_hierarchy:
> + return (u64)blkcg->throtl_use_hier;
> + }
> + break;
> default:
> BUG();
> }
> @@ -1250,6 +1289,12 @@ blkiocg_file_write_u64(struct cgroup *cg
> return blkio_weight_write(blkcg, val);
> }
> break;
> + case BLKIO_POLICY_THROTL:
> + switch(name) {
> + case BLKIO_THROTL_use_hierarchy:
> + return blkio_throtl_use_hierarchy_write(cgrp, val);
> + }
> + break;
> default:
> BUG();
> }
> @@ -1373,6 +1418,13 @@ struct cftype blkio_files[] = {
> BLKIO_THROTL_io_serviced),
> .read_map = blkiocg_file_read_map,
> },
> + {
> + .name = "throttle.use_hierarchy",
> + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
> + BLKIO_THROTL_use_hierarchy),
> + .read_u64 = blkiocg_file_read_u64,
> + .write_u64 = blkiocg_file_write_u64,
> + },
> #endif /* CONFIG_BLK_DEV_THROTTLING */
>
> #ifdef CONFIG_DEBUG_BLK_CGROUP
> @@ -1470,7 +1522,7 @@ static void blkiocg_destroy(struct cgrou
> static struct cgroup_subsys_state *
> blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
> {
> - struct blkio_cgroup *blkcg;
> + struct blkio_cgroup *blkcg, *parent_blkcg = NULL;
> struct cgroup *parent = cgroup->parent;
>
> if (!parent) {
> @@ -1483,11 +1535,16 @@ blkiocg_create(struct cgroup_subsys *sub
> return ERR_PTR(-ENOMEM);
>
> blkcg->weight = BLKIO_WEIGHT_DEFAULT;
> + parent_blkcg = cgroup_to_blkio_cgroup(parent);
> done:
> spin_lock_init(&blkcg->lock);
> INIT_HLIST_HEAD(&blkcg->blkg_list);
>
> INIT_LIST_HEAD(&blkcg->policy_list);
> + if (parent)
> + blkcg->throtl_use_hier = parent_blkcg->throtl_use_hier;
> + else
> + blkcg->throtl_use_hier = 0;
> return &blkcg->css;
> }
>
> Index: linux-2.6/block/blk-cgroup.h
> ===================================================================
> --- linux-2.6.orig/block/blk-cgroup.h 2010-11-19 10:15:56.321149940 -0500
> +++ linux-2.6/block/blk-cgroup.h 2010-11-19 10:30:29.885671705 -0500
> @@ -100,11 +100,13 @@ enum blkcg_file_name_throtl {
> BLKIO_THROTL_write_iops_device,
> BLKIO_THROTL_io_service_bytes,
> BLKIO_THROTL_io_serviced,
> + BLKIO_THROTL_use_hierarchy,
> };
>
> struct blkio_cgroup {
> struct cgroup_subsys_state css;
> unsigned int weight;
> + bool throtl_use_hier;
> spinlock_t lock;
> struct hlist_head blkg_list;
> /*
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
On Thu, Dec 16, 2010 at 10:42:42AM +0800, Gui Jianfeng wrote:
> Vivek Goyal wrote:
> > On Mon, Dec 13, 2010 at 09:45:07AM +0800, Gui Jianfeng wrote:
> >> This patch adds "use_hierarchy" in Root CGroup with out any functionality.
> >>
> >> Signed-off-by: Gui Jianfeng <[email protected]>
> >> ---
> >> block/blk-cgroup.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++--
> >> block/blk-cgroup.h | 5 +++-
> >> block/cfq-iosched.c | 24 +++++++++++++++++
> >> 3 files changed, 97 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> >> index 455768a..9747ebb 100644
> >> --- a/block/blk-cgroup.c
> >> +++ b/block/blk-cgroup.c
> >> @@ -25,7 +25,10 @@
> >> static DEFINE_SPINLOCK(blkio_list_lock);
> >> static LIST_HEAD(blkio_list);
> >>
> >> -struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
> >> +struct blkio_cgroup blkio_root_cgroup = {
> >> + .weight = 2*BLKIO_WEIGHT_DEFAULT,
> >> + .use_hierarchy = 1,
> >
> > Currently flat mode is the default. Lets not change the default. So lets
> > start with use_hierarchy = 0.
>
> OK, will do.
>
> >
> > Secondly, why don't you make it per cgroup something along the lines of
> > memory controller where one can start the hierarchy lower in the cgroup
> > chain and not necessarily at the root. This way we can avoid some
> > accounting overhead for all the groups which are non-hierarchical.
>
> I'm not sure whether there's a actual use case that needs per cgroup "use_hierarchy".
> So for first step, I just give a global "use_hierarchy" in root group. If there're
> some actual requirements that need per cgroup "use_hierarchy". We may add the feature
> later.
>
I think there is some use case. Currently libvirt creates its own cgroups
for each VM. Depending on what cgroup libvirtd has been placed when it
started, it starts creating cgroups from there. So depending on distro,
one might mount blkio controller at /cgroup/blkio by default and then
libcgroup will create its own cgroups from there.
Now as of today, default is flat so the packages which takes care of
mounting blkio controller, I am not expecting them to suddenly change
default to using hierarchy.
Now if libvirt goes on to create its own cgroups under root cgroup
(/cgroup/blkio), then libvirt can't switch it to hierarchical even if
it wants to as children cgroups have already been created under root
and anyway libvirt is not supposed to control the settings of
use_hierarchy of root group.
So if we allow that a hierarchy can be defined from a child node, then
libvirt can easily do it only for its sub hierarchy.
pivot
/ \
root libvirtd
/ \
vm1 vm2
Here root will have use_hierarchy=0 and libvirtd will have use_hierarchy=1
Secondly, I am beginning to believe that overhead of updating the in
all the group of hierarchy might have significant overhead (though I don't
have the data yet) but you will take blkcg->stats_lock of each cgroup
in the path for each IO completion and CFQ updates so many stats. So
there also it might make sense that let libvirtd set use_hierarchy=1
if it needs to and incur additional overhead but global default will
not be run with use_hierarchy=1. I think libvirtd mounts memory controller
as of today with use_hierarchy=0.
Also I don't think it is lot of extra code to support per cgroup
use_hierarchy. So to me it makes sense to do it right now. I am more
concerned about getting it right now because it is part of user interface.
If we introduce something now and then change it 2 releases town the line
the we are stuck with one more convetntion of use_hiearchy and support it
making life even more complicated.
So I would say that do think through it and it should not be lot of extra
code to support it.
> >
> >> + };
> >> EXPORT_SYMBOL_GPL(blkio_root_cgroup);
> >>
> >> static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
> >> @@ -1385,10 +1388,73 @@ struct cftype blkio_files[] = {
> >> #endif
> >> };
> >>
> >> +static u64 blkiocg_use_hierarchy_read(struct cgroup *cgroup,
> >> + struct cftype *cftype)
> >> +{
> >> + struct blkio_cgroup *blkcg;
> >> +
> >> + blkcg = cgroup_to_blkio_cgroup(cgroup);
> >> + return (u64)blkcg->use_hierarchy;
> >> +}
> >> +
> >> +static int
> >> +blkiocg_use_hierarchy_write(struct cgroup *cgroup,
> >> + struct cftype *cftype, u64 val)
> >> +{
> >> + struct blkio_cgroup *blkcg;
> >> + struct blkio_group *blkg;
> >> + struct hlist_node *n;
> >> + struct blkio_policy_type *blkiop;
> >> +
> >> + blkcg = cgroup_to_blkio_cgroup(cgroup);
> >> +
> >> + if (val > 1 || !list_empty(&cgroup->children))
> >> + return -EINVAL;
> >> +
> >> + if (blkcg->use_hierarchy == val)
> >> + return 0;
> >> +
> >> + spin_lock(&blkio_list_lock);
> >> + blkcg->use_hierarchy = val;
> >> +
> >> + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> >> + list_for_each_entry(blkiop, &blkio_list, list) {
> >> + /*
> >> + * If this policy does not own the blkg, do not change
> >> + * cfq group scheduling mode.
> >> + */
> >> + if (blkiop->plid != blkg->plid)
> >> + continue;
> >> +
> >> + if (blkiop->ops.blkio_update_use_hierarchy_fn)
> >> + blkiop->ops.blkio_update_use_hierarchy_fn(blkg,
> >> + val);
> >
> > Should we really allow this? I mean allow changing hierarchy of a group
> > when there are already children groups. I think memory controller does
> > not allow this. We can design along the same lines. Keep use_hierarchy
> > as 0 by default. Allow changing it only if there are no children cgroups.
> > Otherwise we shall have to send notifications to subscribing policies
> > and then change their structure etc. Lets keep it simple.
>
> Yes, I really don't allow changing use_hierarchy if there are childre cgroups.
> Please consider following line in my patch.
>
> if (val > 1 || !list_empty(&cgroup->children))
> return -EINVAL;
If there are no children cgroups, then there can not be any children blkg
and there is no need to send any per blkg notification to each policy?
Thanks
Vivek
Vivek Goyal wrote:
> On Thu, Dec 16, 2010 at 10:42:42AM +0800, Gui Jianfeng wrote:
>> Vivek Goyal wrote:
>>> On Mon, Dec 13, 2010 at 09:45:07AM +0800, Gui Jianfeng wrote:
>>>> This patch adds "use_hierarchy" in Root CGroup with out any functionality.
>>>>
>>>> Signed-off-by: Gui Jianfeng <[email protected]>
>>>> ---
>>>> block/blk-cgroup.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++--
>>>> block/blk-cgroup.h | 5 +++-
>>>> block/cfq-iosched.c | 24 +++++++++++++++++
>>>> 3 files changed, 97 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>>>> index 455768a..9747ebb 100644
>>>> --- a/block/blk-cgroup.c
>>>> +++ b/block/blk-cgroup.c
>>>> @@ -25,7 +25,10 @@
>>>> static DEFINE_SPINLOCK(blkio_list_lock);
>>>> static LIST_HEAD(blkio_list);
>>>>
>>>> -struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>>>> +struct blkio_cgroup blkio_root_cgroup = {
>>>> + .weight = 2*BLKIO_WEIGHT_DEFAULT,
>>>> + .use_hierarchy = 1,
>>> Currently flat mode is the default. Lets not change the default. So lets
>>> start with use_hierarchy = 0.
>> OK, will do.
>>
>>> Secondly, why don't you make it per cgroup something along the lines of
>>> memory controller where one can start the hierarchy lower in the cgroup
>>> chain and not necessarily at the root. This way we can avoid some
>>> accounting overhead for all the groups which are non-hierarchical.
>> I'm not sure whether there's a actual use case that needs per cgroup "use_hierarchy".
>> So for first step, I just give a global "use_hierarchy" in root group. If there're
>> some actual requirements that need per cgroup "use_hierarchy". We may add the feature
>> later.
>>
>
> I think there is some use case. Currently libvirt creates its own cgroups
> for each VM. Depending on what cgroup libvirtd has been placed when it
> started, it starts creating cgroups from there. So depending on distro,
> one might mount blkio controller at /cgroup/blkio by default and then
> libcgroup will create its own cgroups from there.
>
> Now as of today, default is flat so the packages which takes care of
> mounting blkio controller, I am not expecting them to suddenly change
> default to using hierarchy.
>
> Now if libvirt goes on to create its own cgroups under root cgroup
> (/cgroup/blkio), then libvirt can't switch it to hierarchical even if
> it wants to as children cgroups have already been created under root
> and anyway libvirt is not supposed to control the settings of
> use_hierarchy of root group.
>
> So if we allow that a hierarchy can be defined from a child node, then
> libvirt can easily do it only for its sub hierarchy.
>
> pivot
> / \
> root libvirtd
> / \
> vm1 vm2
>
> Here root will have use_hierarchy=0 and libvirtd will have use_hierarchy=1
>
> Secondly, I am beginning to believe that overhead of updating the in
> all the group of hierarchy might have significant overhead (though I don't
> have the data yet) but you will take blkcg->stats_lock of each cgroup
> in the path for each IO completion and CFQ updates so many stats. So
> there also it might make sense that let libvirtd set use_hierarchy=1
> if it needs to and incur additional overhead but global default will
> not be run with use_hierarchy=1. I think libvirtd mounts memory controller
> as of today with use_hierarchy=0.
>
> Also I don't think it is lot of extra code to support per cgroup
> use_hierarchy. So to me it makes sense to do it right now. I am more
> concerned about getting it right now because it is part of user interface.
> If we introduce something now and then change it 2 releases town the line
> the we are stuck with one more convetntion of use_hiearchy and support it
> making life even more complicated.
>
> So I would say that do think through it and it should not be lot of extra
> code to support it.
>
>>>> + };
>>>> EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>>>>
>>>> static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
>>>> @@ -1385,10 +1388,73 @@ struct cftype blkio_files[] = {
>>>> #endif
>>>> };
>>>>
>>>> +static u64 blkiocg_use_hierarchy_read(struct cgroup *cgroup,
>>>> + struct cftype *cftype)
>>>> +{
>>>> + struct blkio_cgroup *blkcg;
>>>> +
>>>> + blkcg = cgroup_to_blkio_cgroup(cgroup);
>>>> + return (u64)blkcg->use_hierarchy;
>>>> +}
>>>> +
>>>> +static int
>>>> +blkiocg_use_hierarchy_write(struct cgroup *cgroup,
>>>> + struct cftype *cftype, u64 val)
>>>> +{
>>>> + struct blkio_cgroup *blkcg;
>>>> + struct blkio_group *blkg;
>>>> + struct hlist_node *n;
>>>> + struct blkio_policy_type *blkiop;
>>>> +
>>>> + blkcg = cgroup_to_blkio_cgroup(cgroup);
>>>> +
>>>> + if (val > 1 || !list_empty(&cgroup->children))
>>>> + return -EINVAL;
>>>> +
>>>> + if (blkcg->use_hierarchy == val)
>>>> + return 0;
>>>> +
>>>> + spin_lock(&blkio_list_lock);
>>>> + blkcg->use_hierarchy = val;
>>>> +
>>>> + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>>>> + list_for_each_entry(blkiop, &blkio_list, list) {
>>>> + /*
>>>> + * If this policy does not own the blkg, do not change
>>>> + * cfq group scheduling mode.
>>>> + */
>>>> + if (blkiop->plid != blkg->plid)
>>>> + continue;
>>>> +
>>>> + if (blkiop->ops.blkio_update_use_hierarchy_fn)
>>>> + blkiop->ops.blkio_update_use_hierarchy_fn(blkg,
>>>> + val);
>>> Should we really allow this? I mean allow changing hierarchy of a group
>>> when there are already children groups. I think memory controller does
>>> not allow this. We can design along the same lines. Keep use_hierarchy
>>> as 0 by default. Allow changing it only if there are no children cgroups.
>>> Otherwise we shall have to send notifications to subscribing policies
>>> and then change their structure etc. Lets keep it simple.
>> Yes, I really don't allow changing use_hierarchy if there are childre cgroups.
>> Please consider following line in my patch.
>>
>> if (val > 1 || !list_empty(&cgroup->children))
>> return -EINVAL;
>
> If there are no children cgroups, then there can not be any children blkg
> and there is no need to send any per blkg notification to each policy?
Firsly, In my patch, per blkg notification only happens on root blkg.
Secondly, root cfqg is put onto "flat_service_tree" in flat mode,
where for hierarchical mode, it don't belongs to anybody. When switching, it
has to inform root cfqg to switch onto or switch off "flat_service_tree".
Anyway, If we're going to put root cfqg onto grp_service_tree regardless of
flat or hierarchical mode, this piece of code can be gone.
Thanks,
Gui
>
> Thanks
> Vivek
>
On Fri, Dec 17, 2010 at 11:06:46AM +0800, Gui Jianfeng wrote:
[..]
> >>>> +static int
> >>>> +blkiocg_use_hierarchy_write(struct cgroup *cgroup,
> >>>> + struct cftype *cftype, u64 val)
> >>>> +{
> >>>> + struct blkio_cgroup *blkcg;
> >>>> + struct blkio_group *blkg;
> >>>> + struct hlist_node *n;
> >>>> + struct blkio_policy_type *blkiop;
> >>>> +
> >>>> + blkcg = cgroup_to_blkio_cgroup(cgroup);
> >>>> +
> >>>> + if (val > 1 || !list_empty(&cgroup->children))
> >>>> + return -EINVAL;
> >>>> +
> >>>> + if (blkcg->use_hierarchy == val)
> >>>> + return 0;
> >>>> +
> >>>> + spin_lock(&blkio_list_lock);
> >>>> + blkcg->use_hierarchy = val;
> >>>> +
> >>>> + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> >>>> + list_for_each_entry(blkiop, &blkio_list, list) {
> >>>> + /*
> >>>> + * If this policy does not own the blkg, do not change
> >>>> + * cfq group scheduling mode.
> >>>> + */
> >>>> + if (blkiop->plid != blkg->plid)
> >>>> + continue;
> >>>> +
> >>>> + if (blkiop->ops.blkio_update_use_hierarchy_fn)
> >>>> + blkiop->ops.blkio_update_use_hierarchy_fn(blkg,
> >>>> + val);
> >>> Should we really allow this? I mean allow changing hierarchy of a group
> >>> when there are already children groups. I think memory controller does
> >>> not allow this. We can design along the same lines. Keep use_hierarchy
> >>> as 0 by default. Allow changing it only if there are no children cgroups.
> >>> Otherwise we shall have to send notifications to subscribing policies
> >>> and then change their structure etc. Lets keep it simple.
> >> Yes, I really don't allow changing use_hierarchy if there are childre cgroups.
> >> Please consider following line in my patch.
> >>
> >> if (val > 1 || !list_empty(&cgroup->children))
> >> return -EINVAL;
> >
> > If there are no children cgroups, then there can not be any children blkg
> > and there is no need to send any per blkg notification to each policy?
>
> Firsly, In my patch, per blkg notification only happens on root blkg.
> Secondly, root cfqg is put onto "flat_service_tree" in flat mode,
> where for hierarchical mode, it don't belongs to anybody. When switching, it
> has to inform root cfqg to switch onto or switch off "flat_service_tree".
>
> Anyway, If we're going to put root cfqg onto grp_service_tree regardless of
> flat or hierarchical mode, this piece of code can be gone.
>
Exactly. Keeping everything on grp_service_tree() both for flat and
hierarchical mode will make sure no root group moving around business
and no notifications when use_hierarchy is set.
Thanks
Vivek