The name of 'reset' makes a little confusion in reading, we would
say, if we want to reset usage, return -EINVAL. That's not true.
Actually, we want to say, we only allow user to do a reset. This
patch rename reset to val and add a comment here, making the code
more readable.
Signed-off-by: Dongsheng Yang <[email protected]>
---
kernel/sched/cpuacct.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index dd7cbb5..9c2bbf7 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -145,13 +145,16 @@ static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
}
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
- u64 reset)
+ u64 val)
{
struct cpuacct *ca = css_ca(css);
int err = 0;
int i;
- if (reset) {
+ /*
+ * Only allow '0' here to do a reset.
+ */
+ if (val) {
err = -EINVAL;
goto out;
}
--
1.8.4.2
Sometimes, cpuacct.usage is not detialed enough to user
to see how much usage a group used. We want to know how
much time it used in user mode and how much in kernel mode.
This patch introduce some more files to tell user these informations.
# ls /sys/fs/cgroup/cpuacct/cpuacct.usage*
/sys/fs/cgroup/cpuacct/cpuacct.usage /sys/fs/cgroup/cpuacct/cpuacct.usage_percpu_user
/sys/fs/cgroup/cpuacct/cpuacct.usage_percpu /sys/fs/cgroup/cpuacct/cpuacct.usage_sys
/sys/fs/cgroup/cpuacct/cpuacct.usage_percpu_sys /sys/fs/cgroup/cpuacct/cpuacct.usage_user
Signed-off-by: Dongsheng Yang <[email protected]>
---
kernel/sched/cpuacct.c | 140 ++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 120 insertions(+), 20 deletions(-)
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 9c2bbf7..b3e8971 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -25,11 +25,22 @@ enum cpuacct_stat_index {
CPUACCT_STAT_NSTATS,
};
+enum cpuacct_usage_index {
+ CPUACCT_USAGE_USER, /* ... user mode */
+ CPUACCT_USAGE_SYSTEM, /* ... kernel mode */
+
+ CPUACCT_USAGE_NRUSAGE,
+};
+
+struct cpuacct_usage {
+ u64 usages[CPUACCT_USAGE_NRUSAGE];
+};
+
/* track cpu usage of a group of tasks and its child groups */
struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
- u64 __percpu *cpuusage;
+ struct cpuacct_usage __percpu *cpuusage;
struct kernel_cpustat __percpu *cpustat;
};
@@ -49,7 +60,7 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca)
return css_ca(ca->css.parent);
}
-static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
+static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
static struct cpuacct root_cpuacct = {
.cpustat = &kernel_cpustat,
.cpuusage = &root_cpuacct_cpuusage,
@@ -68,7 +79,7 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
if (!ca)
goto out;
- ca->cpuusage = alloc_percpu(u64);
+ ca->cpuusage = alloc_percpu(struct cpuacct_usage);
if (!ca->cpuusage)
goto out_free_ca;
@@ -96,54 +107,104 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css)
kfree(ca);
}
-static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
+static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
+ enum cpuacct_usage_index index)
{
- u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
- u64 data;
+ struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+ u64 data = 0;
+ int i = 0;
+
+ /*
+ * We allow index == CPUACCT_USAGE_NRUSAGE here to read
+ * the sum of suages.
+ */
+ BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
+
+ if (index == CPUACCT_USAGE_NRUSAGE) {
+ raw_spin_lock_irq(&cpu_rq(cpu)->lock);
+ for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
+ data += cpuusage->usages[i];
+ raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
+
+ goto out;
+ }
#ifndef CONFIG_64BIT
/*
* Take rq->lock to make 64-bit read safe on 32-bit platforms.
*/
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
- data = *cpuusage;
+ data = cpuusage->usages[index];
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#else
- data = *cpuusage;
+ data = cpuusage->usages[index];
#endif
+out:
return data;
}
-static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
+static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu,
+ enum cpuacct_usage_index index, u64 val)
{
- u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+ struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+ int i = 0;
+
+ /*
+ * We allow index == CPUACCT_USAGE_NRUSAGE here to write
+ * val to each index of usages.
+ */
+ BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
+
+ if (index == CPUACCT_USAGE_NRUSAGE) {
+ raw_spin_lock_irq(&cpu_rq(cpu)->lock);
+ for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
+ cpuusage->usages[i] = val;
+ raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
+
+ return;
+ }
#ifndef CONFIG_64BIT
/*
* Take rq->lock to make 64-bit write safe on 32-bit platforms.
*/
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
- *cpuusage = val;
+ cpuusage->usages[index] = val;
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#else
- *cpuusage = val;
+ cpuusage->usages[index] = val;
#endif
}
/* return total cpu usage (in nanoseconds) of a group */
-static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
+static u64 __cpuusage_read(struct cgroup_subsys_state *css, enum cpuacct_usage_index index)
{
struct cpuacct *ca = css_ca(css);
u64 totalcpuusage = 0;
int i;
for_each_present_cpu(i)
- totalcpuusage += cpuacct_cpuusage_read(ca, i);
+ totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
return totalcpuusage;
}
+static u64 cpuusage_user_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return __cpuusage_read(css, CPUACCT_USAGE_USER);
+}
+
+static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
+}
+
+static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
+}
+
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 val)
{
@@ -159,27 +220,43 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
goto out;
}
- for_each_present_cpu(i)
- cpuacct_cpuusage_write(ca, i, 0);
+ for_each_present_cpu(i) {
+ cpuacct_cpuusage_write(ca, i, CPUACCT_USAGE_NRUSAGE, 0);
+ }
out:
return err;
}
-static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
+static int __cpuacct_percpu_seq_show(struct seq_file *m, enum cpuacct_usage_index index)
{
struct cpuacct *ca = css_ca(seq_css(m));
u64 percpu;
int i;
for_each_present_cpu(i) {
- percpu = cpuacct_cpuusage_read(ca, i);
+ percpu = cpuacct_cpuusage_read(ca, i, index);
seq_printf(m, "%llu ", (unsigned long long) percpu);
}
seq_printf(m, "\n");
return 0;
}
+static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
+{
+ return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
+}
+
+static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
+{
+ return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
+}
+
+static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
+{
+ return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
+}
+
static const char * const cpuacct_stat_desc[] = {
[CPUACCT_STAT_USER] = "user",
[CPUACCT_STAT_SYSTEM] = "system",
@@ -220,10 +297,26 @@ static struct cftype files[] = {
.write_u64 = cpuusage_write,
},
{
+ .name = "usage_user",
+ .read_u64 = cpuusage_user_read,
+ },
+ {
+ .name = "usage_sys",
+ .read_u64 = cpuusage_sys_read,
+ },
+ {
.name = "usage_percpu",
.seq_show = cpuacct_percpu_seq_show,
},
{
+ .name = "usage_percpu_user",
+ .seq_show = cpuacct_percpu_user_seq_show,
+ },
+ {
+ .name = "usage_percpu_sys",
+ .seq_show = cpuacct_percpu_sys_seq_show,
+ },
+ {
.name = "stat",
.seq_show = cpuacct_stats_show,
},
@@ -239,6 +332,7 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
int cpu;
+ int user_time;
cpu = task_cpu(tsk);
@@ -246,9 +340,15 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
ca = task_ca(tsk);
+ user_time = user_mode(task_pt_regs(tsk));
+
while (true) {
- u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
- *cpuusage += cputime;
+ struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+
+ if (user_time)
+ cpuusage->usages[CPUACCT_USAGE_USER] += cputime;
+ else
+ cpuusage->usages[CPUACCT_USAGE_SYSTEM] += cputime;
ca = parent_ca(ca);
if (!ca)
--
1.8.4.2
On Mon, Dec 21, 2015 at 07:14:43PM +0800, Dongsheng Yang wrote:
> Sometimes, cpuacct.usage is not detialed enough to user
> to see how much usage a group used. We want to know how
> much time it used in user mode and how much in kernel mode.
cpuusage is being phased out. If you need these stats, please
implement it on cpu side.
Thanks.
--
tejun
On 12/22/2015 05:33 AM, Tejun Heo wrote:
> On Mon, Dec 21, 2015 at 07:14:43PM +0800, Dongsheng Yang wrote:
>> Sometimes, cpuacct.usage is not detialed enough to user
>> to see how much usage a group used. We want to know how
>> much time it used in user mode and how much in kernel mode.
>
> cpuusage is being phased out. If you need these stats, please
> implement it on cpu side.
Hi TJ, thanx, then I will look at introducing this feature in cpu side
then.
Thanx
Yang
>
> Thanks.
>
Hi, Tejun Heo
> From: [email protected]
> [mailto:[email protected]] On Behalf Of Tejun Heo
> Sent: Tuesday, December 22, 2015 5:33 AM
> To: Dongsheng Yang <[email protected]>
> Cc: [email protected]; [email protected];
> [email protected]; [email protected]
> Subject: Re: [RESEND PATCH 2/2] cpuacct: split usage into user_usage and
> sys_usage.
>
> On Mon, Dec 21, 2015 at 07:14:43PM +0800, Dongsheng Yang wrote:
> > Sometimes, cpuacct.usage is not detialed enough to user
> > to see how much usage a group used. We want to know how
> > much time it used in user mode and how much in kernel mode.
>
Yang gave this patch to me :)
I'm plan to continue this function.
> cpuusage is being phased out. If you need these stats, please
> implement it on cpu side.
>
Currently, old stats are put in cpuacct, if we put new stats into cpu side,
we have following way:
1: Only put new stats into cpu side, and don't change old interface,
we'll see following content in cgroup dir:
/cgroup/cpuacct:
cpuacct.usage
cpuacct.usage_percpu
/cgroup/cpu:
cpu.usage_sys
cpu.usage_percpu_sys
cpu.usage_user
cpu.usage_percpu_user
It is obviously not unify.
2: Move old stats into cpu side, and add new stats:
we'll see following content in cgroup dir:
/cgroup/cpuacct:
[NONE]
/cgroup/cpu:
cpu.usage
cpu.usage_percpu
cpu.usage_sys
cpu.usage_percpu_sys
cpu.usage_user
cpu.usage_percpu_user
It have compatibility issues with old system.
3: Clone old stats into cpu side(share one copy of code), and add new stats
Into cpu side, we'll see following content in cgroup dir:
/cgroup/cpuacct:
cpuacct.usage
cpuacct.usage_percpu
/cgroup/cpu:
cpu.usage
cpu.usage_percpu
cpu.usage_sys
cpu.usage_percpu_sys
cpu.usage_user
cpu.usage_percpu_user
It can support both old and new systems, and we can remove old interface
in future, but we if mount both cpu and cpuacct into same dir,
we'll see following duplicated files:
/cgroup/cpu,cpuacct
cpu.usage
cpuacct.usage
cpu.usage_percpu
cpuacct.usage_percpu
...
Maybe above way3 looks having less defect, cound you talk about
your opinion on it.
Thanks
Zhaolei
Hello,
On Tue, Mar 01, 2016 at 06:35:56PM +0800, Zhao Lei wrote:
> Hi, Tejun Heo
>
> > From: [email protected]
> > [mailto:[email protected]] On Behalf Of Tejun Heo
> > Sent: Tuesday, December 22, 2015 5:33 AM
> > To: Dongsheng Yang <[email protected]>
> > Cc: [email protected]; [email protected];
> > [email protected]; [email protected]
> > Subject: Re: [RESEND PATCH 2/2] cpuacct: split usage into user_usage and
> > sys_usage.
> >
> > On Mon, Dec 21, 2015 at 07:14:43PM +0800, Dongsheng Yang wrote:
> > > Sometimes, cpuacct.usage is not detialed enough to user
> > > to see how much usage a group used. We want to know how
> > > much time it used in user mode and how much in kernel mode.
> >
> Yang gave this patch to me :)
> I'm plan to continue this function.
>
> > cpuusage is being phased out. If you need these stats, please
> > implement it on cpu side.
>
> Currently, old stats are put in cpuacct, if we put new stats into cpu side,
> we have following way:
Yeah, on the second thought, at least for now, I think it's best to
put this in cpuacct. We can do the trafer to the cpu controller
later. Please feel free to add
Acked-by: Tejun Heo <[email protected]>
Thanks.
--
tejun
Commit-ID: 1a736b77a3f50910843d076623204ba6e5057dc1
Gitweb: http://git.kernel.org/tip/1a736b77a3f50910843d076623204ba6e5057dc1
Author: Dongsheng Yang <[email protected]>
AuthorDate: Mon, 21 Dec 2015 19:14:42 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 21 Mar 2016 10:59:29 +0100
sched/cpuacct: Rename parameter in cpuusage_write() for readability
The name of the 'reset' parameter to cpuusage_write() is quite confusing,
because the only valid value we allow is '0', so !reset is actually the
case that resets ...
Rename it to 'val' and explain it in a comment that we only allow 0.
Signed-off-by: Dongsheng Yang <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/sched/cpuacct.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index dd7cbb5..9c2bbf7 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -145,13 +145,16 @@ static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
}
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
- u64 reset)
+ u64 val)
{
struct cpuacct *ca = css_ca(css);
int err = 0;
int i;
- if (reset) {
+ /*
+ * Only allow '0' here to do a reset.
+ */
+ if (val) {
err = -EINVAL;
goto out;
}