2008-10-23 05:43:21

by Bharata B Rao

[permalink] [raw]
Subject: [PATCH] Add hierarchical accounting to cpu accounting controller

Add hierarchical accounting to cpu accounting controller

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <[email protected]>
Signed-off-by: Bharata B Rao <[email protected]>
CC: Peter Zijlstra <[email protected]>
CC: Ingo Molnar <[email protected]>
---
kernel/sched.c | 30 ++++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9131,10 +9131,15 @@ struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct cpuacct *parent;
};

+static struct cpuacct init_cpuacct_group;
struct cgroup_subsys cpuacct_subsys;

+#define for_each_cpuacct_group(ca) \
+ for (; ca; ca = ca->parent)
+
/* return cpu accounting group corresponding to this container */
static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
{
@@ -9153,17 +9158,28 @@ static inline struct cpuacct *task_ca(st
static struct cgroup_subsys_state *cpuacct_create(
struct cgroup_subsys *ss, struct cgroup *cgrp)
{
- struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ struct cpuacct *ca;

- if (!ca)
- return ERR_PTR(-ENOMEM);
+ if (!cgrp->parent) {
+ /* This is early initialization for the top cgroup */
+ ca = &init_cpuacct_group;
+ } else {
+ ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ if (!ca)
+ return ERR_PTR(-ENOMEM);
+ }

ca->cpuusage = alloc_percpu(u64);
if (!ca->cpuusage) {
- kfree(ca);
+ if (cgrp->parent)
+ kfree(ca);
return ERR_PTR(-ENOMEM);
}

+ if (cgrp->parent) {
+ ca->css.cgroup = cgrp;
+ ca->parent = cgroup_ca(cgrp->parent);
+ }
return &ca->css;
}

@@ -9243,14 +9259,16 @@ static int cpuacct_populate(struct cgrou
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int cpu;

if (!cpuacct_subsys.active)
return;

+ cpu = task_cpu(tsk);
ca = task_ca(tsk);
- if (ca) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));

+ for_each_cpuacct_group(ca) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
}


2008-10-23 06:41:07

by Bharata B Rao

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

I realized from the patch present at http://lkml.org/lkml/2008/10/5/277,
that it is not necessary to initialize css.group in cpuacct_create()
as it is done in cgroup core. Present below is the updated patch:

Regards,
Bharata.

Add hierarchical accounting to cpu accounting controller

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <[email protected]>
Signed-off-by: Bharata B Rao <[email protected]>
CC: Peter Zijlstra <[email protected]>
CC: Ingo Molnar <[email protected]>
---
kernel/sched.c | 29 +++++++++++++++++++++++------
1 file changed, 23 insertions(+), 6 deletions(-)

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9131,10 +9131,15 @@ struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct cpuacct *parent;
};

+static struct cpuacct init_cpuacct_group;
struct cgroup_subsys cpuacct_subsys;

+#define for_each_cpuacct_group(ca) \
+ for (; ca; ca = ca->parent)
+
/* return cpu accounting group corresponding to this container */
static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
{
@@ -9153,17 +9158,27 @@ static inline struct cpuacct *task_ca(st
static struct cgroup_subsys_state *cpuacct_create(
struct cgroup_subsys *ss, struct cgroup *cgrp)
{
- struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ struct cpuacct *ca;

- if (!ca)
- return ERR_PTR(-ENOMEM);
+ if (!cgrp->parent) {
+ /* This is early initialization for the top cgroup */
+ ca = &init_cpuacct_group;
+ } else {
+ ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ if (!ca)
+ return ERR_PTR(-ENOMEM);
+ }

ca->cpuusage = alloc_percpu(u64);
if (!ca->cpuusage) {
- kfree(ca);
+ if (cgrp->parent)
+ kfree(ca);
return ERR_PTR(-ENOMEM);
}

+ if (cgrp->parent)
+ ca->parent = cgroup_ca(cgrp->parent);
+
return &ca->css;
}

@@ -9243,14 +9258,16 @@ static int cpuacct_populate(struct cgrou
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int cpu;

if (!cpuacct_subsys.active)
return;

+ cpu = task_cpu(tsk);
ca = task_ca(tsk);
- if (ca) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));

+ for_each_cpuacct_group(ca) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
}

2008-10-23 15:49:47

by Paul Menage

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Wed, Oct 22, 2008 at 10:43 PM, Bharata B Rao
<[email protected]> wrote:
> ---
> kernel/sched.c | 30 ++++++++++++++++++++++++------
> 1 file changed, 24 insertions(+), 6 deletions(-)
>
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -9131,10 +9131,15 @@ struct cpuacct {
> struct cgroup_subsys_state css;
> /* cpuusage holds pointer to a u64-type object on every cpu */
> u64 *cpuusage;
> + struct cpuacct *parent;
> };
>
> +static struct cpuacct init_cpuacct_group;

Why are you making the root state static?

> struct cgroup_subsys cpuacct_subsys;
>
> +#define for_each_cpuacct_group(ca) \
> + for (; ca; ca = ca->parent)
> +

This is only used once, so doesn't seem worth creating a new macro for.

>
> + if (cgrp->parent) {
> + ca->css.cgroup = cgrp;

The cgroup pointer in the css is maintained by cgroups - you don't
need to set it.

Paul

2008-10-24 05:08:32

by Bharata B Rao

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Thu, Oct 23, 2008 at 08:49:19AM -0700, Paul Menage wrote:
> On Wed, Oct 22, 2008 at 10:43 PM, Bharata B Rao
> <[email protected]> wrote:
> > ---
> > kernel/sched.c | 30 ++++++++++++++++++++++++------
> > 1 file changed, 24 insertions(+), 6 deletions(-)
> >
> > --- a/kernel/sched.c
> > +++ b/kernel/sched.c
> > @@ -9131,10 +9131,15 @@ struct cpuacct {
> > struct cgroup_subsys_state css;
> > /* cpuusage holds pointer to a u64-type object on every cpu */
> > u64 *cpuusage;
> > + struct cpuacct *parent;
> > };
> >
> > +static struct cpuacct init_cpuacct_group;
>
> Why are you making the root state static?

Because it is not used anywhere outside sched.c. Is that a problem ?
>
> > struct cgroup_subsys cpuacct_subsys;
> >
> > +#define for_each_cpuacct_group(ca) \
> > + for (; ca; ca = ca->parent)
> > +
>
> This is only used once, so doesn't seem worth creating a new macro for.

Ok. Removed.

>
> >
> > + if (cgrp->parent) {
> > + ca->css.cgroup = cgrp;
>
> The cgroup pointer in the css is maintained by cgroups - you don't
> need to set it.

Yes, I realized this immediately after my 1st post and posted
a corrected version subsequently.

Paul, thanks for your review.

Updated patch below:

Add hierarchical accounting to cpu accounting controller

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <[email protected]>
Signed-off-by: Bharata B Rao <[email protected]>
CC: Peter Zijlstra <[email protected]>
CC: Ingo Molnar <[email protected]>
---
kernel/sched.c | 26 ++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9131,8 +9131,10 @@ struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct cpuacct *parent;
};

+static struct cpuacct init_cpuacct_group;
struct cgroup_subsys cpuacct_subsys;

/* return cpu accounting group corresponding to this container */
@@ -9153,17 +9155,27 @@ static inline struct cpuacct *task_ca(st
static struct cgroup_subsys_state *cpuacct_create(
struct cgroup_subsys *ss, struct cgroup *cgrp)
{
- struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ struct cpuacct *ca;

- if (!ca)
- return ERR_PTR(-ENOMEM);
+ if (!cgrp->parent) {
+ /* This is early initialization for the top cgroup */
+ ca = &init_cpuacct_group;
+ } else {
+ ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ if (!ca)
+ return ERR_PTR(-ENOMEM);
+ }

ca->cpuusage = alloc_percpu(u64);
if (!ca->cpuusage) {
- kfree(ca);
+ if (cgrp->parent)
+ kfree(ca);
return ERR_PTR(-ENOMEM);
}

+ if (cgrp->parent)
+ ca->parent = cgroup_ca(cgrp->parent);
+
return &ca->css;
}

@@ -9243,14 +9255,16 @@ static int cpuacct_populate(struct cgrou
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int cpu;

if (!cpuacct_subsys.active)
return;

+ cpu = task_cpu(tsk);
ca = task_ca(tsk);
- if (ca) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));

+ for (; ca; ca = ca->parent) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
}

2008-10-24 17:37:51

by Paul Menage

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Thu, Oct 23, 2008 at 10:08 PM, Bharata B Rao
<[email protected]> wrote:
>> > +static struct cpuacct init_cpuacct_group;
>>
>> Why are you making the root state static?
>
> Because it is not used anywhere outside sched.c. Is that a problem ?

Sorry, I wasn't clear - I mean, why are you changing it from
dynamically-allocated in cpuacct_create to statically-allocated in the
BSS?

Paul

2008-10-25 06:01:56

by Bharata B Rao

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Fri, Oct 24, 2008 at 10:37:34AM -0700, Paul Menage wrote:
> On Thu, Oct 23, 2008 at 10:08 PM, Bharata B Rao
> <[email protected]> wrote:
> >> > +static struct cpuacct init_cpuacct_group;
> >>
> >> Why are you making the root state static?
> >
> > Because it is not used anywhere outside sched.c. Is that a problem ?
>
> Sorry, I wasn't clear - I mean, why are you changing it from
> dynamically-allocated in cpuacct_create to statically-allocated in the
> BSS?

Ok. I realized that defining init_cpuacct_group explicitly and statically
isn't needed. I was just influenced by init_task_group and init_mem_cgroup.
I guess those controllers have different reasons to have their init groups
statically defined.

Here is the updated (hopefully final) patch:

Add hierarchical accounting to cpu accounting controller

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <[email protected]>
Signed-off-by: Bharata B Rao <[email protected]>
CC: Peter Zijlstra <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Paul Menage <[email protected]>
CC: Srivatsa Vaddagiri <[email protected]>
---
kernel/sched.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9131,6 +9131,7 @@ struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct cpuacct *parent;
};

struct cgroup_subsys cpuacct_subsys;
@@ -9164,6 +9165,9 @@ static struct cgroup_subsys_state *cpuac
return ERR_PTR(-ENOMEM);
}

+ if (cgrp->parent)
+ ca->parent = cgroup_ca(cgrp->parent);
+
return &ca->css;
}

@@ -9243,14 +9247,16 @@ static int cpuacct_populate(struct cgrou
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int cpu;

if (!cpuacct_subsys.active)
return;

+ cpu = task_cpu(tsk);
ca = task_ca(tsk);
- if (ca) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));

+ for (; ca; ca = ca->parent) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
}

2008-10-25 15:39:17

by Paul Menage

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Fri, Oct 24, 2008 at 11:01 PM, Bharata B Rao
<[email protected]> wrote:
>
> Reported-by: Srivatsa Vaddagiri <[email protected]>
> Signed-off-by: Bharata B Rao <[email protected]>

Reviewed-by: Paul Menage <[email protected]>

So in technical terms this patch looks fine now. There's still the
question of whether it's OK to change the existing API, since it's
been in the kernel in its currently (non-hierarchical) form for
several releases now.

> CC: Peter Zijlstra <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Paul Menage <[email protected]>
> CC: Srivatsa Vaddagiri <[email protected]>
> ---
> kernel/sched.c | 10 ++++++++--
> 1 file changed, 8 insertions(+), 2 deletions(-)
>
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -9131,6 +9131,7 @@ struct cpuacct {
> struct cgroup_subsys_state css;
> /* cpuusage holds pointer to a u64-type object on every cpu */
> u64 *cpuusage;
> + struct cpuacct *parent;
> };
>
> struct cgroup_subsys cpuacct_subsys;
> @@ -9164,6 +9165,9 @@ static struct cgroup_subsys_state *cpuac
> return ERR_PTR(-ENOMEM);
> }
>
> + if (cgrp->parent)
> + ca->parent = cgroup_ca(cgrp->parent);
> +
> return &ca->css;
> }
>
> @@ -9243,14 +9247,16 @@ static int cpuacct_populate(struct cgrou
> static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
> {
> struct cpuacct *ca;
> + int cpu;
>
> if (!cpuacct_subsys.active)
> return;
>
> + cpu = task_cpu(tsk);
> ca = task_ca(tsk);
> - if (ca) {
> - u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
>
> + for (; ca; ca = ca->parent) {
> + u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
> *cpuusage += cputime;
> }
> }
>

2008-10-27 01:17:43

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Sat, 25 Oct 2008 08:38:52 -0700
"Paul Menage" <[email protected]> wrote:

> On Fri, Oct 24, 2008 at 11:01 PM, Bharata B Rao
> <[email protected]> wrote:
> >
> > Reported-by: Srivatsa Vaddagiri <[email protected]>
> > Signed-off-by: Bharata B Rao <[email protected]>
>
> Reviewed-by: Paul Menage <[email protected]>
>
> So in technical terms this patch looks fine now. There's still the
> question of whether it's OK to change the existing API, since it's
> been in the kernel in its currently (non-hierarchical) form for
> several releases now.
>
Hmm..how about having 2 params as "aggregated usage" and "private usage" ?

cpuacct.usage.
cpuacct.all_subtree_usage.

heavy ?

-Kame

2008-10-27 04:43:17

by Bharata B Rao

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Mon, Oct 27, 2008 at 10:17:03AM +0900, KAMEZAWA Hiroyuki wrote:
> On Sat, 25 Oct 2008 08:38:52 -0700
> "Paul Menage" <[email protected]> wrote:
>
> > On Fri, Oct 24, 2008 at 11:01 PM, Bharata B Rao
> > <[email protected]> wrote:
> > >
> > > Reported-by: Srivatsa Vaddagiri <[email protected]>
> > > Signed-off-by: Bharata B Rao <[email protected]>
> >
> > Reviewed-by: Paul Menage <[email protected]>

Thanks Paul.

> >
> > So in technical terms this patch looks fine now. There's still the
> > question of whether it's OK to change the existing API, since it's
> > been in the kernel in its currently (non-hierarchical) form for
> > several releases now.

Hmm... Can we consider this as an API change ? Currently cpuacct.usage
readers of a parent accounting group are missing the usage contributions
from its children groups. I would consider this patch as fixing the
above problem by correctly reflecting the cpu usage for every accounting
group.

> >
> Hmm..how about having 2 params as "aggregated usage" and "private usage" ?
>
> cpuacct.usage.
> cpuacct.all_subtree_usage.

Is there really a need to differentiate between aggregated and private
usage other than to maintain the current behaviour ?

Regards,
Bharata.

2008-10-27 05:57:29

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

> Ok. I realized that defining init_cpuacct_group explicitly and statically
> isn't needed. I was just influenced by init_task_group and init_mem_cgroup.
> I guess those controllers have different reasons to have their init groups
> statically defined.
>

I think one reason is, cpu_cgroup_subsys has to be initialized very early,
and at that time kmalloc() is not usable, so we use statically defined
init_task_group.

2008-10-27 06:57:55

by Balbir Singh

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Mon, Oct 27, 2008 at 10:13 AM, Bharata B Rao
<[email protected]> wrote:
> On Mon, Oct 27, 2008 at 10:17:03AM +0900, KAMEZAWA Hiroyuki wrote:
>> On Sat, 25 Oct 2008 08:38:52 -0700
>> "Paul Menage" <[email protected]> wrote:
>>
>> > On Fri, Oct 24, 2008 at 11:01 PM, Bharata B Rao
>> > <[email protected]> wrote:
>> > >
>> > > Reported-by: Srivatsa Vaddagiri <[email protected]>
>> > > Signed-off-by: Bharata B Rao <[email protected]>
>> >
>> > Reviewed-by: Paul Menage <[email protected]>
>
> Thanks Paul.
>
>> >
>> > So in technical terms this patch looks fine now. There's still the
>> > question of whether it's OK to change the existing API, since it's
>> > been in the kernel in its currently (non-hierarchical) form for
>> > several releases now.
>
> Hmm... Can we consider this as an API change ? Currently cpuacct.usage
> readers of a parent accounting group are missing the usage contributions
> from its children groups. I would consider this patch as fixing the
> above problem by correctly reflecting the cpu usage for every accounting
> group.
>

If a particular application desires to derive the usage of its
immediate tasks and does not care about subcgroups, it is a simple
iteration (after this fix)

cpuacct - sigma(cpuacct_child)

and currently if we cared about child accounting, we could do

cpuacct + recursively(sigma(cpuacct_child))

In that sense this fix makes more sense, but like Paul said we need to
figure out if it is an API change. My take is that it is a BUG fix,
since we do care about child subgroups in accounting.


>> >
>> Hmm..how about having 2 params as "aggregated usage" and "private usage" ?
>>
>> cpuacct.usage.
>> cpuacct.all_subtree_usage.
>
> Is there really a need to differentiate between aggregated and private
> usage other than to maintain the current behaviour ?
>

That might be useful to have, but as above it can always be derived.

Balbir

2008-10-27 08:28:09

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

>>>> So in technical terms this patch looks fine now. There's still the
>>>> question of whether it's OK to change the existing API, since it's
>>>> been in the kernel in its currently (non-hierarchical) form for
>>>> several releases now.
>> Hmm... Can we consider this as an API change ? Currently cpuacct.usage
>> readers of a parent accounting group are missing the usage contributions
>> from its children groups. I would consider this patch as fixing the
>> above problem by correctly reflecting the cpu usage for every accounting
>> group.
>>
>
> If a particular application desires to derive the usage of its
> immediate tasks and does not care about subcgroups, it is a simple
> iteration (after this fix)
>
> cpuacct - sigma(cpuacct_child)
>
> and currently if we cared about child accounting, we could do
>
> cpuacct + recursively(sigma(cpuacct_child))
>
> In that sense this fix makes more sense, but like Paul said we need to
> figure out if it is an API change. My take is that it is a BUG fix,
> since we do care about child subgroups in accounting.
>

cpuacct was designed to count cpu usage of a group of tasks, and now some people
want it to also take child group's usage into account, so I think this is a feature
request but not a bug fix.

How about add a flag to disable/enable hierarchical accounting?

=====

From: Li Zefan <[email protected]>
Date: Mon, 27 Oct 2008 16:00:21 +0800
Subject: [PATCH] cpuacct: add hierarchical accouning

Add hierarchical accouning to cpu accouting subsystem, so the cputime
of a task is chareged to its accounting group and all it's parent
accouning groups.

Also add 'cpuacct.hierarchy' control file, so we can enable/disable
hierarchical accounting. The default is disabled, so we reserve the
original behavior of cpuacct.

Signed-off-by: Bharata B Rao <[email protected]>
Signed-off-by: Li Zefan <[email protected]>
---
kernel/sched.c | 75 ++++++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 59 insertions(+), 16 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 6625c3c..1c997bd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9232,15 +9232,22 @@ struct cgroup_subsys cpu_cgroup_subsys = {
* ([email protected]).
*/

-/* track cpu usage of a group of tasks */
+/*
+ * Track cpu usage of a group of tasks.
+ *
+ * If cpuacct_hierarchy is set, it's children's usage is also accounted.
+ */
struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct cpuacct *parent;
};

struct cgroup_subsys cpuacct_subsys;

+static int cpuacct_hierarchy;
+
/* return cpu accounting group corresponding to this container */
static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
{
@@ -9256,8 +9263,8 @@ static inline struct cpuacct *task_ca(struct task_struct *tsk)
}

/* create a new cpu accounting group */
-static struct cgroup_subsys_state *cpuacct_create(
- struct cgroup_subsys *ss, struct cgroup *cgrp)
+static struct cgroup_subsys_state *cpuacct_create(struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
{
struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);

@@ -9270,12 +9277,14 @@ static struct cgroup_subsys_state *cpuacct_create(
return ERR_PTR(-ENOMEM);
}

+ if (cgrp->parent)
+ ca->parent = cgroup_ca(cgrp->parent);
+
return &ca->css;
}

/* destroy an existing cpu accounting group */
-static void
-cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+static void cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
{
struct cpuacct *ca = cgroup_ca(cgrp);

@@ -9306,7 +9315,7 @@ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
}

static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
- u64 reset)
+ u64 reset)
{
struct cpuacct *ca = cgroup_ca(cgrp);
int err = 0;
@@ -9328,17 +9337,42 @@ out:
return err;
}

-static struct cftype files[] = {
- {
- .name = "usage",
- .read_u64 = cpuusage_read,
- .write_u64 = cpuusage_write,
- },
+static u64 cpuacct_hierarchy_read(struct cgroup *cgrp, struct cftype *cft)
+{
+ return cpuacct_hierarchy;
+}
+
+static int cpuacct_hierarchy_write(struct cgroup *cgrp, struct cftype *cftype,
+ u64 val)
+{
+ cpuacct_hierarchy = !!val;
+ return 0;
+}
+
+static struct cftype cft_cpuusage = {
+ .name = "usage",
+ .read_u64 = cpuusage_read,
+ .write_u64 = cpuusage_write,
+};
+
+static struct cftype cft_hierarchy = {
+ .name = "hierarchy",
+ .read_u64 = cpuacct_hierarchy_read,
+ .write_u64 = cpuacct_hierarchy_write,
};

static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
{
- return cgroup_add_files(cgrp, ss, files, ARRAY_SIZE(files));
+ int ret;
+
+ ret = cgroup_add_file(cgrp, ss, &cft_cpuusage);
+ if (ret)
+ return ret;
+
+ if (!cgrp->parent)
+ ret = cgroup_add_file(cgrp, ss, &cft_hierarchy);
+
+ return ret;
}

/*
@@ -9349,15 +9383,24 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int cpu;

if (!cpuacct_subsys.active)
return;

+ cpu = task_cpu(tsk);
ca = task_ca(tsk);
- if (ca) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));

- *cpuusage += cputime;
+ if (cpuacct_hierarchy) {
+ for (; ca; ca = ca->parent) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+ *cpuusage += cputime;
+ }
+ } else {
+ if (ca) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+ *cpuusage += cputime;
+ }
}
}

--
1.5.4.rc3

2008-10-30 17:16:44

by Dhaval Giani

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Mon, Oct 27, 2008 at 04:25:01PM +0800, Li Zefan wrote:
> >>>> So in technical terms this patch looks fine now. There's still the
> >>>> question of whether it's OK to change the existing API, since it's
> >>>> been in the kernel in its currently (non-hierarchical) form for
> >>>> several releases now.
> >> Hmm... Can we consider this as an API change ? Currently cpuacct.usage
> >> readers of a parent accounting group are missing the usage contributions
> >> from its children groups. I would consider this patch as fixing the
> >> above problem by correctly reflecting the cpu usage for every accounting
> >> group.
> >>
> >
> > If a particular application desires to derive the usage of its
> > immediate tasks and does not care about subcgroups, it is a simple
> > iteration (after this fix)
> >
> > cpuacct - sigma(cpuacct_child)
> >
> > and currently if we cared about child accounting, we could do
> >
> > cpuacct + recursively(sigma(cpuacct_child))
> >
> > In that sense this fix makes more sense, but like Paul said we need to
> > figure out if it is an API change. My take is that it is a BUG fix,
> > since we do care about child subgroups in accounting.
> >
>
> cpuacct was designed to count cpu usage of a group of tasks, and now some people
> want it to also take child group's usage into account, so I think this is a feature
> request but not a bug fix.
>

I disagree. The child is a part of the parent's hierarchy, and therefore
its usage should reflect in the parent's usage.

Thanks,
--
regards,
Dhaval

2008-10-31 00:41:30

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Thu, 30 Oct 2008 22:46:22 +0530
Dhaval Giani <[email protected]> wrote:

> On Mon, Oct 27, 2008 at 04:25:01PM +0800, Li Zefan wrote:
> > >>>> So in technical terms this patch looks fine now. There's still the
> > >>>> question of whether it's OK to change the existing API, since it's
> > >>>> been in the kernel in its currently (non-hierarchical) form for
> > >>>> several releases now.
> > >> Hmm... Can we consider this as an API change ? Currently cpuacct.usage
> > >> readers of a parent accounting group are missing the usage contributions
> > >> from its children groups. I would consider this patch as fixing the
> > >> above problem by correctly reflecting the cpu usage for every accounting
> > >> group.
> > >>
> > >
> > > If a particular application desires to derive the usage of its
> > > immediate tasks and does not care about subcgroups, it is a simple
> > > iteration (after this fix)
> > >
> > > cpuacct - sigma(cpuacct_child)
> > >
> > > and currently if we cared about child accounting, we could do
> > >
> > > cpuacct + recursively(sigma(cpuacct_child))
> > >
> > > In that sense this fix makes more sense, but like Paul said we need to
> > > figure out if it is an API change. My take is that it is a BUG fix,
> > > since we do care about child subgroups in accounting.
> > >
> >
> > cpuacct was designed to count cpu usage of a group of tasks, and now some people
> > want it to also take child group's usage into account, so I think this is a feature
> > request but not a bug fix.
> >
>
> I disagree. The child is a part of the parent's hierarchy, and therefore
> its usage should reflect in the parent's usage.
>

In my point of view, there is no big difference. It's just whether we need a tool
in userland or not. If there is no performance impact, I have no objections.

One request from me is add Documentation/controllers/cpuacct.txt or some to explain
"what we see".

Thanks,
-Kame

2008-11-04 12:50:45

by Bharata B Rao

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Fri, Oct 31, 2008 at 09:40:41AM +0900, KAMEZAWA Hiroyuki wrote:
> On Thu, 30 Oct 2008 22:46:22 +0530
> Dhaval Giani <[email protected]> wrote:
> > I disagree. The child is a part of the parent's hierarchy, and therefore
> > its usage should reflect in the parent's usage.
> >
>
> In my point of view, there is no big difference. It's just whether we need a tool
> in userland or not. If there is no performance impact, I have no objections.
>
> One request from me is add Documentation/controllers/cpuacct.txt or some to explain
> "what we see".

I am not sure which version (mine or Li Zefan's) Paul prefers. I am
resending my patch anyway with documentation and performance numbers
included. I don't see any significant improvement or degradation in
hackbench, lmbench and volanomark numbers with this patch.

Regards,
Bharata.

Add hierarchical accounting to cpu accounting controller and cpuacct
documentation.

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <[email protected]>
Signed-off-by: Bharata B Rao <[email protected]>
CC: Peter Zijlstra <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Srivatsa Vaddagiri <[email protected]>
Reviewed-by: Paul Menage <[email protected]>
---
Documentation/controllers/cpuacct.txt | 32 ++++++++++++++++++++++++++++++++
kernel/sched.c | 10 ++++++++--
2 files changed, 40 insertions(+), 2 deletions(-)

--- /dev/null
+++ b/Documentation/controllers/cpuacct.txt
@@ -0,0 +1,32 @@
+CPU Accounting Controller
+-------------------------
+
+The CPU accounting controller is used to group tasks using cgroups and
+account the CPU usage of these group of tasks.
+
+The CPU accounting controller supports multi-hierarchy groups. An accounting
+group accumulates the CPU usage of all of it's child groups and
+the tasks directly present in it's group.
+
+Accounting groups can be created by first mounting the cgroup filesystem.
+
+# mkdir /cgroups
+# mount -t cgroup -ocpuacct none /cgroups
+
+With the above step, the initial or the parent accounting group
+becomes visible at /cgroups. At bootup, this group comprises of all the
+tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
+/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
+this group which is essentially the CPU time obtained by all the tasks
+in the system.
+
+New accounting groups can be created under the parent group /cgroups.
+
+# cd /cgroups
+# mkdir g1
+# echo $$ > g1
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it. CPU time consumed by this bash and it's children
+can be obtained from g1/cpuacct.usage and the same gets accumulated in
+/cgroups/cpuacct.usage also.
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9236,6 +9236,7 @@ struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct cpuacct *parent;
};

struct cgroup_subsys cpuacct_subsys;
@@ -9269,6 +9270,9 @@ static struct cgroup_subsys_state *cpuac
return ERR_PTR(-ENOMEM);
}

+ if (cgrp->parent)
+ ca->parent = cgroup_ca(cgrp->parent);
+
return &ca->css;
}

@@ -9348,14 +9352,16 @@ static int cpuacct_populate(struct cgrou
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int cpu;

if (!cpuacct_subsys.active)
return;

+ cpu = task_cpu(tsk);
ca = task_ca(tsk);
- if (ca) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));

+ for (; ca; ca = ca->parent) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
}

Performance numbers
==================
2x2HT=4CPU i386 running 2.6.28-rc3

All benchmarks were run from bash which belonged to the grand
child group of the topmost accounting group. The tests were
first run on 2628rc3 and then on 2628rc3+this patch and the
normalized difference between the two runs is shown below.

I. hackbench
============
# ./hackbench 100 process 100
Running with 100*40 (== 4000) tasks.
Normalized time difference for avg of 5 runs: 1.0059

# ./hackbench 25 thread 100
Running with 25*40 (== 1000) tasks.
Normalized time difference for avg of 5 runs: 1.0139

II. lmbench
===========
---------------------
4 threads
---------------------
size Normalized
(kb) Change (Time)
---------------------
16 1.1017
64 1.1168
128 1.1072
256 1.0085
--------------------
8 threads
-------------------
16 1.1835
64 1.0617
128 0.9980
256 0.9682
-------------------
16 threads
-------------------
16 1.1186
64 0.9921
128 0.9505
256 1.0043
-------------------
32 threads
-------------------
16 1.0005
64 1.0089
128 1.0019
256 1.0226
-------------------
64 threads
-------------------
16 1.0207
64 1.0385
128 1.0109
256 1.0159
-------------------

III. volanomark
===============
Normalized average throughput difference for loopback test

------------------------------------------------------------
Nr runs Normalized average throughput difference
------------------------------------------------------------
10 0.9579
------------------------------------------------------------
4 1.1465
------------------------------------------------------------
9 0.9451
------------------------------------------------------------
19 1.0133
------------------------------------------------------------

2008-11-04 17:21:43

by Randy Dunlap

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Tue, 4 Nov 2008 18:19:37 +0530 Bharata B Rao wrote:

> Add hierarchical accounting to cpu accounting controller and cpuacct
> documentation.
>
> Currently, while charging the task's cputime to its accounting group,
> the accounting group hierarchy isn't updated. This patch charges the cputime
> of a task to its accounting group and all its parent accounting groups.
>
> Reported-by: Srivatsa Vaddagiri <[email protected]>
> Signed-off-by: Bharata B Rao <[email protected]>
> CC: Peter Zijlstra <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Srivatsa Vaddagiri <[email protected]>
> Reviewed-by: Paul Menage <[email protected]>
> ---
> Documentation/controllers/cpuacct.txt | 32 ++++++++++++++++++++++++++++++++
> kernel/sched.c | 10 ++++++++--
> 2 files changed, 40 insertions(+), 2 deletions(-)
>
> --- /dev/null
> +++ b/Documentation/controllers/cpuacct.txt
> @@ -0,0 +1,32 @@
> +CPU Accounting Controller
> +-------------------------
> +
> +The CPU accounting controller is used to group tasks using cgroups and
> +account the CPU usage of these group of tasks.
> +
> +The CPU accounting controller supports multi-hierarchy groups. An accounting
> +group accumulates the CPU usage of all of it's child groups and

its {it's means "it is"}

> +the tasks directly present in it's group.

its

> +
> +Accounting groups can be created by first mounting the cgroup filesystem.
> +
> +# mkdir /cgroups
> +# mount -t cgroup -ocpuacct none /cgroups
> +
> +With the above step, the initial or the parent accounting group
> +becomes visible at /cgroups. At bootup, this group comprises of all the

"comprises of all" seems awkward to me. Maybe "includes all" or
"comprises all" if you have to use that word. ;)

> +tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
> +/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
> +this group which is essentially the CPU time obtained by all the tasks
> +in the system.
> +
> +New accounting groups can be created under the parent group /cgroups.
> +
> +# cd /cgroups
> +# mkdir g1
> +# echo $$ > g1
> +
> +The above steps create a new group g1 and move the current shell
> +process (bash) into it. CPU time consumed by this bash and it's children

its

> +can be obtained from g1/cpuacct.usage and the same gets accumulated in

is accumulated in

> +/cgroups/cpuacct.usage also.



> Performance numbers
> ==================
> 2x2HT=4CPU i386 running 2.6.28-rc3
>
> All benchmarks were run from bash which belonged to the grand

grandchild (?)
Otherwise it says that the "child group" is "grand."
Oh, never mind. I thought that this was part of a doc file, but it's not.


> child group of the topmost accounting group. The tests were
> first run on 2628rc3 and then on 2628rc3+this patch and the
> normalized difference between the two runs is shown below.
>
> I. hackbench
> ============
> # ./hackbench 100 process 100
> Running with 100*40 (== 4000) tasks.
> Normalized time difference for avg of 5 runs: 1.0059
>
> # ./hackbench 25 thread 100
> Running with 25*40 (== 1000) tasks.
> Normalized time difference for avg of 5 runs: 1.0139
>
> II. lmbench
> ===========
> ---------------------
> 4 threads
> ---------------------
> size Normalized
> (kb) Change (Time)
> ---------------------
> 16 1.1017
> 64 1.1168
> 128 1.1072
> 256 1.0085
> --------------------
> 8 threads
> -------------------
> 16 1.1835
> 64 1.0617
> 128 0.9980
> 256 0.9682
> -------------------
> 16 threads
> -------------------
> 16 1.1186
> 64 0.9921
> 128 0.9505
> 256 1.0043
> -------------------
> 32 threads
> -------------------
> 16 1.0005
> 64 1.0089
> 128 1.0019
> 256 1.0226
> -------------------
> 64 threads
> -------------------
> 16 1.0207
> 64 1.0385
> 128 1.0109
> 256 1.0159
> -------------------
>
> III. volanomark
> ===============
> Normalized average throughput difference for loopback test
>
> ------------------------------------------------------------
> Nr runs Normalized average throughput difference
> ------------------------------------------------------------
> 10 0.9579
> ------------------------------------------------------------
> 4 1.1465
> ------------------------------------------------------------
> 9 0.9451
> ------------------------------------------------------------
> 19 1.0133
> ------------------------------------------------------------
> --


---
~Randy

2008-11-05 03:24:32

by Bharata B Rao

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Tue, Nov 04, 2008 at 09:20:40AM -0800, Randy Dunlap wrote:
> On Tue, 4 Nov 2008 18:19:37 +0530 Bharata B Rao wrote:
>
> > Add hierarchical accounting to cpu accounting controller and cpuacct
> > documentation.
> >
> > Currently, while charging the task's cputime to its accounting group,
> > the accounting group hierarchy isn't updated. This patch charges the cputime
> > of a task to its accounting group and all its parent accounting groups.
> >
> > Reported-by: Srivatsa Vaddagiri <[email protected]>
> > Signed-off-by: Bharata B Rao <[email protected]>
> > CC: Peter Zijlstra <[email protected]>
> > CC: Ingo Molnar <[email protected]>
> > CC: Srivatsa Vaddagiri <[email protected]>
> > Reviewed-by: Paul Menage <[email protected]>
> > ---
> > Documentation/controllers/cpuacct.txt | 32 ++++++++++++++++++++++++++++++++
> > kernel/sched.c | 10 ++++++++--
> > 2 files changed, 40 insertions(+), 2 deletions(-)
> >
> > --- /dev/null
> > +++ b/Documentation/controllers/cpuacct.txt
> > @@ -0,0 +1,32 @@
> > +CPU Accounting Controller
> > +-------------------------
> > +
> > +The CPU accounting controller is used to group tasks using cgroups and
> > +account the CPU usage of these group of tasks.
> > +
> > +The CPU accounting controller supports multi-hierarchy groups. An accounting
> > +group accumulates the CPU usage of all of it's child groups and
>
> its {it's means "it is"}
>
> > +the tasks directly present in it's group.
>
> its

You are right, I was under the wrong impression that possessive case of
"it" is still "it's".

I will resend the patch with other corrections you mentioned only if
Paul/Ingo want this patch to go in.

Thanks for your comments.

Regards,
Bharata.

2008-11-05 03:30:28

by Srivatsa Vaddagiri

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Wed, Nov 05, 2008 at 08:54:40AM +0530, Bharata B Rao wrote:
> I will resend the patch with other corrections you mentioned only if
> Paul/Ingo want this patch to go in.

I strongly recommend his patch to go in, as otherwise group accounting
will be broken where control hierarchy is not same as accounting
hierarchy. In that sense, I would treat this patch as more of a bug fix
than anything else.

- vatsa

2008-11-05 03:34:34

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

>> cpuacct was designed to count cpu usage of a group of tasks, and now some people
>> want it to also take child group's usage into account, so I think this is a feature
>> request but not a bug fix.
>>
>
> I disagree. The child is a part of the parent's hierarchy, and therefore
> its usage should reflect in the parent's usage.
>

In memcg the child's usage doesn't reflect in its parent's usage. ;)

Balbir just posted a patchset to add hierarchy support in memcg, and added memory.feature
to disable/enable this feature. Is it for performance only or also for keeping the user
interface/behavior unchanged?

2008-11-05 03:49:57

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Wed, 05 Nov 2008 11:31:32 +0800
Li Zefan <[email protected]> wrote:

> >> cpuacct was designed to count cpu usage of a group of tasks, and now some people
> >> want it to also take child group's usage into account, so I think this is a feature
> >> request but not a bug fix.
> >>
> >
> > I disagree. The child is a part of the parent's hierarchy, and therefore
> > its usage should reflect in the parent's usage.
> >
>
> In memcg the child's usage doesn't reflect in its parent's usage. ;)
>
> Balbir just posted a patchset to add hierarchy support in memcg, and added memory.feature
> to disable/enable this feature. Is it for performance only or also for keeping the user
> interface/behavior unchanged?

The main reason is performance.

And one of memcg's purpose is isolating resource usage of groups. Sum of usage
is not very important sometimes. (we have /proc/meminfo ;)
Sum of usage can be easily calculcated by user land and we don't have to pay
the cost for it in the kernel.

Thanks,
-Kame

2008-11-05 06:20:39

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

> +The CPU accounting controller is used to group tasks using cgroups and
> +account the CPU usage of these group of tasks.

this group or these groups?

...

> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -9236,6 +9236,7 @@ struct cpuacct {
> struct cgroup_subsys_state css;
> /* cpuusage holds pointer to a u64-type object on every cpu */
> u64 *cpuusage;
> + struct cpuacct *parent;
> };
>

Maybe the comment needs to be updated?

/* track cpu usage of a group of tasks */
struct cpuacct {
...
};

->

/* track cpu usage of a group of tasks and its child groups */

2008-11-06 06:48:30

by Balbir Singh

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

Bharata B Rao wrote:
> On Fri, Oct 31, 2008 at 09:40:41AM +0900, KAMEZAWA Hiroyuki wrote:
>> On Thu, 30 Oct 2008 22:46:22 +0530
>> Dhaval Giani <[email protected]> wrote:
>>> I disagree. The child is a part of the parent's hierarchy, and therefore
>>> its usage should reflect in the parent's usage.
>>>
>> In my point of view, there is no big difference. It's just whether we need a tool
>> in userland or not. If there is no performance impact, I have no objections.
>>
>> One request from me is add Documentation/controllers/cpuacct.txt or some to explain
>> "what we see".
>
> I am not sure which version (mine or Li Zefan's) Paul prefers. I am
> resending my patch anyway with documentation and performance numbers
> included. I don't see any significant improvement or degradation in
> hackbench, lmbench and volanomark numbers with this patch.
>
> Regards,
> Bharata.
>
> Add hierarchical accounting to cpu accounting controller and cpuacct
> documentation.
>
> Currently, while charging the task's cputime to its accounting group,
> the accounting group hierarchy isn't updated. This patch charges the cputime
> of a task to its accounting group and all its parent accounting groups.
>
> Reported-by: Srivatsa Vaddagiri <[email protected]>
> Signed-off-by: Bharata B Rao <[email protected]>
> CC: Peter Zijlstra <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Srivatsa Vaddagiri <[email protected]>
> Reviewed-by: Paul Menage <[email protected]>

Looks good and straight forward.

Acked-by: Balbir Singh <[email protected]>

--
Balbir

2008-11-10 10:21:38

by Dhaval Giani

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Wed, Nov 05, 2008 at 08:59:11AM +0530, Srivatsa Vaddagiri wrote:
> On Wed, Nov 05, 2008 at 08:54:40AM +0530, Bharata B Rao wrote:
> > I will resend the patch with other corrections you mentioned only if
> > Paul/Ingo want this patch to go in.
>
> I strongly recommend his patch to go in, as otherwise group accounting
> will be broken where control hierarchy is not same as accounting
> hierarchy. In that sense, I would treat this patch as more of a bug fix
> than anything else.
>

Wondering what the status of this patch is?

thanks,
--
regards,
Dhaval

2008-11-10 10:22:21

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Tue, 2008-11-04 at 18:19 +0530, Bharata B Rao wrote:

> Add hierarchical accounting to cpu accounting controller and cpuacct
> documentation.
>
> Currently, while charging the task's cputime to its accounting group,
> the accounting group hierarchy isn't updated. This patch charges the cputime
> of a task to its accounting group and all its parent accounting groups.
>
> Reported-by: Srivatsa Vaddagiri <[email protected]>
> Signed-off-by: Bharata B Rao <[email protected]>
> CC: Peter Zijlstra <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Srivatsa Vaddagiri <[email protected]>
> Reviewed-by: Paul Menage <[email protected]>

Seems sane and simple enough.

Acked-by: Peter Zijlstra <[email protected]>

Ingo?

> ---
> Documentation/controllers/cpuacct.txt | 32 ++++++++++++++++++++++++++++++++
> kernel/sched.c | 10 ++++++++--
> 2 files changed, 40 insertions(+), 2 deletions(-)
>
> --- /dev/null
> +++ b/Documentation/controllers/cpuacct.txt
> @@ -0,0 +1,32 @@
> +CPU Accounting Controller
> +-------------------------
> +
> +The CPU accounting controller is used to group tasks using cgroups and
> +account the CPU usage of these group of tasks.
> +
> +The CPU accounting controller supports multi-hierarchy groups. An accounting
> +group accumulates the CPU usage of all of it's child groups and
> +the tasks directly present in it's group.
> +
> +Accounting groups can be created by first mounting the cgroup filesystem.
> +
> +# mkdir /cgroups
> +# mount -t cgroup -ocpuacct none /cgroups
> +
> +With the above step, the initial or the parent accounting group
> +becomes visible at /cgroups. At bootup, this group comprises of all the
> +tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
> +/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
> +this group which is essentially the CPU time obtained by all the tasks
> +in the system.
> +
> +New accounting groups can be created under the parent group /cgroups.
> +
> +# cd /cgroups
> +# mkdir g1
> +# echo $$ > g1
> +
> +The above steps create a new group g1 and move the current shell
> +process (bash) into it. CPU time consumed by this bash and it's children
> +can be obtained from g1/cpuacct.usage and the same gets accumulated in
> +/cgroups/cpuacct.usage also.
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -9236,6 +9236,7 @@ struct cpuacct {
> struct cgroup_subsys_state css;
> /* cpuusage holds pointer to a u64-type object on every cpu */
> u64 *cpuusage;
> + struct cpuacct *parent;
> };
>
> struct cgroup_subsys cpuacct_subsys;
> @@ -9269,6 +9270,9 @@ static struct cgroup_subsys_state *cpuac
> return ERR_PTR(-ENOMEM);
> }
>
> + if (cgrp->parent)
> + ca->parent = cgroup_ca(cgrp->parent);
> +
> return &ca->css;
> }
>
> @@ -9348,14 +9352,16 @@ static int cpuacct_populate(struct cgrou
> static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
> {
> struct cpuacct *ca;
> + int cpu;
>
> if (!cpuacct_subsys.active)
> return;
>
> + cpu = task_cpu(tsk);
> ca = task_ca(tsk);
> - if (ca) {
> - u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
>
> + for (; ca; ca = ca->parent) {
> + u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
> *cpuusage += cputime;
> }
> }

2008-11-10 13:10:21

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller


* Bharata B Rao <[email protected]> wrote:

> I will resend the patch with other corrections you mentioned only if
> Paul/Ingo want this patch to go in.

yes, please resend the fixed version.

Ingo

2008-11-10 13:13:43

by Balbir Singh

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

Peter Zijlstra wrote:
> On Tue, 2008-11-04 at 18:19 +0530, Bharata B Rao wrote:
>
>> Add hierarchical accounting to cpu accounting controller and cpuacct
>> documentation.
>>
>> Currently, while charging the task's cputime to its accounting group,
>> the accounting group hierarchy isn't updated. This patch charges the cputime
>> of a task to its accounting group and all its parent accounting groups.
>>
>> Reported-by: Srivatsa Vaddagiri <[email protected]>
>> Signed-off-by: Bharata B Rao <[email protected]>
>> CC: Peter Zijlstra <[email protected]>
>> CC: Ingo Molnar <[email protected]>
>> CC: Srivatsa Vaddagiri <[email protected]>
>> Reviewed-by: Paul Menage <[email protected]>
>
> Seems sane and simple enough.
>
> Acked-by: Peter Zijlstra <[email protected]>

Just in case it was missed

Acked-by: Balbir Singh <[email protected]>

--
Balbir

2008-11-10 15:11:18

by Bharata B Rao

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller

On Mon, Nov 10, 2008 at 02:09:37PM +0100, Ingo Molnar wrote:
>
> * Bharata B Rao <[email protected]> wrote:
>
> > I will resend the patch with other corrections you mentioned only if
> > Paul/Ingo want this patch to go in.
>
> yes, please resend the fixed version.

Here it is:

Add hierarchical accounting to cpu accounting controller and include
cpuacct documentation.

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <[email protected]>
Signed-off-by: Bharata B Rao <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Srivatsa Vaddagiri <[email protected]>
Reviewed-by: Paul Menage <[email protected]>
Acked-by: Balbir Singh <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
---
Documentation/controllers/cpuacct.txt | 32 ++++++++++++++++++++++++++++++++
kernel/sched.c | 12 +++++++++---
2 files changed, 41 insertions(+), 3 deletions(-)

--- /dev/null
+++ b/Documentation/controllers/cpuacct.txt
@@ -0,0 +1,32 @@
+CPU Accounting Controller
+-------------------------
+
+The CPU accounting controller is used to group tasks using cgroups and
+account the CPU usage of these groups of tasks.
+
+The CPU accounting controller supports multi-hierarchy groups. An accounting
+group accumulates the CPU usage of all of its child groups and the tasks
+directly present in its group.
+
+Accounting groups can be created by first mounting the cgroup filesystem.
+
+# mkdir /cgroups
+# mount -t cgroup -ocpuacct none /cgroups
+
+With the above step, the initial or the parent accounting group
+becomes visible at /cgroups. At bootup, this group includes all the
+tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
+/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
+this group which is essentially the CPU time obtained by all the tasks
+in the system.
+
+New accounting groups can be created under the parent group /cgroups.
+
+# cd /cgroups
+# mkdir g1
+# echo $$ > g1
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it. CPU time consumed by this bash and its children
+can be obtained from g1/cpuacct.usage and the same is accumulated in
+/cgroups/cpuacct.usage also.
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9231,11 +9231,12 @@ struct cgroup_subsys cpu_cgroup_subsys =
* ([email protected]).
*/

-/* track cpu usage of a group of tasks */
+/* track cpu usage of a group of tasks and its child groups */
struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct cpuacct *parent;
};

struct cgroup_subsys cpuacct_subsys;
@@ -9269,6 +9270,9 @@ static struct cgroup_subsys_state *cpuac
return ERR_PTR(-ENOMEM);
}

+ if (cgrp->parent)
+ ca->parent = cgroup_ca(cgrp->parent);
+
return &ca->css;
}

@@ -9348,14 +9352,16 @@ static int cpuacct_populate(struct cgrou
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int cpu;

if (!cpuacct_subsys.active)
return;

+ cpu = task_cpu(tsk);
ca = task_ca(tsk);
- if (ca) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));

+ for (; ca; ca = ca->parent) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
}

Regards,
Bharata.

2008-11-11 11:14:22

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] Add hierarchical accounting to cpu accounting controller


* Bharata B Rao <[email protected]> wrote:

> On Mon, Nov 10, 2008 at 02:09:37PM +0100, Ingo Molnar wrote:
> >
> > * Bharata B Rao <[email protected]> wrote:
> >
> > > I will resend the patch with other corrections you mentioned only if
> > > Paul/Ingo want this patch to go in.
> >
> > yes, please resend the fixed version.
>
> Here it is:
>
> Add hierarchical accounting to cpu accounting controller and include
> cpuacct documentation.
>
> Currently, while charging the task's cputime to its accounting group,
> the accounting group hierarchy isn't updated. This patch charges the cputime
> of a task to its accounting group and all its parent accounting groups.
>
> Reported-by: Srivatsa Vaddagiri <[email protected]>
> Signed-off-by: Bharata B Rao <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Srivatsa Vaddagiri <[email protected]>
> Reviewed-by: Paul Menage <[email protected]>
> Acked-by: Balbir Singh <[email protected]>
> Acked-by: Peter Zijlstra <[email protected]>
> ---
> Documentation/controllers/cpuacct.txt | 32 ++++++++++++++++++++++++++++++++
> kernel/sched.c | 12 +++++++++---
> 2 files changed, 41 insertions(+), 3 deletions(-)

applied to tip/sched/core, thanks Bharata!

Ingo