2006-05-02 06:22:24

by Balbir Singh

[permalink] [raw]
Subject: [Patch 6/8] delay accounting usage of taskstats interface


Changelog

Fixes suggested by Jay Lan
- check for tidstats before taking the mutex_lock in taskstats_exit_send()
- add back version information for struct taskstats

Fixes comments by akpm (on earlier patch now incorporated here)
- detailed comments on atomicity rules of accounting fields
- replace use of nsec_t

Other changes
- fix coding style

delayacct-taskstats.patch

Usage of taskstats interface by delay accounting.

Signed-off-by: Shailabh Nagar <[email protected]>
Signed-off-by: Balbir Singh <[email protected]>
---

include/linux/delayacct.h | 13 +++++++++++
include/linux/taskstats.h | 53 ++++++++++++++++++++++++++++++++++++++++++++--
init/Kconfig | 1
kernel/delayacct.c | 42 ++++++++++++++++++++++++++++++++++++
kernel/taskstats.c | 13 ++++++++++-
5 files changed, 119 insertions(+), 3 deletions(-)

diff -puN include/linux/delayacct.h~delayacct-taskstats include/linux/delayacct.h
--- linux-2.6.17-rc3/include/linux/delayacct.h~delayacct-taskstats 2006-05-02 10:36:31.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/delayacct.h 2006-05-02 10:39:30.000000000 +0530
@@ -18,6 +18,7 @@
#define _LINUX_DELAYACCT_H

#include <linux/sched.h>
+#include <linux/taskstats_kern.h>

/*
* Per-task flags relevant to delay accounting
@@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct
extern void __delayacct_tsk_exit(struct task_struct *);
extern void __delayacct_blkio_start(void);
extern void __delayacct_blkio_end(void);
+extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);

static inline void delayacct_set_flag(int flag)
{
@@ -74,6 +76,14 @@ static inline void delayacct_blkio_end(v
__delayacct_blkio_end();
}

+static inline int delayacct_add_tsk(struct taskstats *d,
+ struct task_struct *tsk)
+{
+ if (!tsk->delays)
+ return -EINVAL;
+ return __delayacct_add_tsk(d, tsk);
+}
+
#else
static inline void delayacct_set_flag(int flag)
{}
@@ -89,6 +99,9 @@ static inline void delayacct_blkio_start
{}
static inline void delayacct_blkio_end(void)
{}
+static inline int delayacct_add_tsk(struct taskstats *d,
+ struct task_struct *tsk)
+{ return 0; }
#endif /* CONFIG_TASK_DELAY_ACCT */

#endif
diff -puN include/linux/taskstats.h~delayacct-taskstats include/linux/taskstats.h
--- linux-2.6.17-rc3/include/linux/taskstats.h~delayacct-taskstats 2006-05-02 10:36:31.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/taskstats.h 2006-05-02 10:36:31.000000000 +0530
@@ -33,9 +33,58 @@

struct taskstats {

- /* Version 1 */
+ /* Delay accounting fields start
+ *
+ * All values, until comment "Delay accounting fields end" are
+ * available only if delay accounting is enabled, even though the last
+ * few fields are not delays
+ *
+ * xxx_count is the number of delay values recorded
+ * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+ *
+ * xxx_delay_total wraps around to zero on overflow
+ * xxx_count incremented regardless of overflow
+ */
+
+ /* Delay waiting for cpu, while runnable
+ * count, delay_total NOT updated atomically
+ */
+ __u64 cpu_count;
+ __u64 cpu_delay_total;
+
+ /* Following four fields atomically updated using task->delays->lock */
+
+ /* Delay waiting for synchronous block I/O to complete
+ * does not account for delays in I/O submission
+ */
+ __u64 blkio_count;
+ __u64 blkio_delay_total;
+
+ /* Delay waiting for page fault I/O (swap in only) */
+ __u64 swapin_count;
+ __u64 swapin_delay_total;
+
+ /* cpu "wall-clock" running time
+ * On some architectures, value will adjust for cpu time stolen
+ * from the kernel in involuntary waits due to virtualization.
+ * Value is cumulative, in nanoseconds, without a corresponding count
+ * and wraps around to zero silently on overflow
+ */
+ __u64 cpu_run_real_total;
+
+ /* cpu "virtual" running time
+ * Uses time intervals seen by the kernel i.e. no adjustment
+ * for kernel's involuntary waits due to virtualization.
+ * Value is cumulative, in nanoseconds, without a corresponding count
+ * and wraps around to zero silently on overflow
+ */
+ __u64 cpu_run_virtual_total;
+ /* Delay accounting fields end */
+ /* version 1 ends here */
+
+ /* version of taskstats */
+ __u64 version;

- int filler_avoids_empty_struct_warnings;
};


diff -puN init/Kconfig~delayacct-taskstats init/Kconfig
--- linux-2.6.17-rc3/init/Kconfig~delayacct-taskstats 2006-05-02 10:36:31.000000000 +0530
+++ linux-2.6.17-rc3-balbir/init/Kconfig 2006-05-02 10:36:31.000000000 +0530
@@ -164,6 +164,7 @@ config TASKSTATS

config TASK_DELAY_ACCT
bool "Enable per-task delay accounting (EXPERIMENTAL)"
+ depends on TASKSTATS
help
Collect information on time spent by a task waiting for system
resources like cpu, synchronous block I/O completion and swapping
diff -puN kernel/delayacct.c~delayacct-taskstats kernel/delayacct.c
--- linux-2.6.17-rc3/kernel/delayacct.c~delayacct-taskstats 2006-05-02 10:36:31.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/delayacct.c 2006-05-02 10:37:57.000000000 +0530
@@ -104,3 +104,45 @@ void __delayacct_blkio_end(void)
&current->delays->blkio_delay,
&current->delays->blkio_count);
}
+
+int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
+{
+ s64 tmp;
+ struct timespec ts;
+ unsigned long t1,t2,t3;
+
+ tmp = (s64)d->cpu_run_real_total;
+ tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC;
+ d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
+
+ /*
+ * No locking available for sched_info (and too expensive to add one)
+ * Mitigate by taking snapshot of values
+ */
+ t1 = tsk->sched_info.pcnt;
+ t2 = tsk->sched_info.run_delay;
+ t3 = tsk->sched_info.cpu_time;
+
+ d->cpu_count += t1;
+
+ jiffies_to_timespec(t2, &ts);
+ tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
+ d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
+
+ tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
+ d->cpu_run_virtual_total =
+ (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
+
+ /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
+
+ spin_lock(&tsk->delays->lock);
+ tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
+ d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
+ tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
+ d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+ d->blkio_count += tsk->delays->blkio_count;
+ d->swapin_count += tsk->delays->swapin_count;
+ spin_unlock(&tsk->delays->lock);
+
+ return 0;
+}
diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c
--- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats 2006-05-02 10:36:31.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/taskstats.c 2006-05-02 10:36:31.000000000 +0530
@@ -18,6 +18,7 @@

#include <linux/kernel.h>
#include <linux/taskstats_kern.h>
+#include <linux/delayacct.h>
#include <net/genetlink.h>
#include <asm/atomic.h>

@@ -120,7 +121,9 @@ static int fill_pid(pid_t pid, struct ta
* goto err;
*/

-err:
+ rc = delayacct_add_tsk(stats, tsk);
+
+ /* Define err: label here if needed */
put_task_struct(tsk);
return rc;

@@ -152,6 +155,10 @@ static int fill_tgid(pid_t tgid, struct
* break;
*/

+ rc = delayacct_add_tsk(stats, tsk);
+ if (rc)
+ break;
+
} while_each_thread(first, tsk);
read_unlock(&tasklist_lock);

@@ -255,6 +262,8 @@ void taskstats_exit_send(struct task_str
if (rc < 0)
goto err_skb;

+ tidstats->version = TASKSTATS_VERSION;
+
na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid);
NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
@@ -270,6 +279,8 @@ void taskstats_exit_send(struct task_str
if (rc < 0)
goto err_skb;

+ tgidstats->version = TASKSTATS_VERSION;
+
na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid);
NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
_


2006-05-04 02:13:54

by Jay Lan

[permalink] [raw]
Subject: Re: [Lse-tech] [Patch 6/8] delay accounting usage of taskstats interface

Balbir Singh wrote:
>Changelog
>
>Fixes suggested by Jay Lan
>- check for tidstats before taking the mutex_lock in taskstats_exit_send()
>- add back version information for struct taskstats
>
><clip>
>
> struct taskstats {
>
>- /* Version 1 */
>+ /* Delay accounting fields start
>+ *
>+ * All values, until comment "Delay accounting fields end" are
>+ * available only if delay accounting is enabled, even though the last
>+ * few fields are not delays
>+ *
>+ * xxx_count is the number of delay values recorded
>+ * xxx_delay_total is the corresponding cumulative delay in nanoseconds
>+ *
>+ * xxx_delay_total wraps around to zero on overflow
>+ * xxx_count incremented regardless of overflow
>+ */
>+
>+ /* Delay waiting for cpu, while runnable
>+ * count, delay_total NOT updated atomically
>+ */
>+ __u64 cpu_count;
>+ __u64 cpu_delay_total;
>+
>+ /* Following four fields atomically updated using task->delays->lock */
>+
>+ /* Delay waiting for synchronous block I/O to complete
>+ * does not account for delays in I/O submission
>+ */
>+ __u64 blkio_count;
>+ __u64 blkio_delay_total;
>+
>+ /* Delay waiting for page fault I/O (swap in only) */
>+ __u64 swapin_count;
>+ __u64 swapin_delay_total;
>+
>+ /* cpu "wall-clock" running time
>+ * On some architectures, value will adjust for cpu time stolen
>+ * from the kernel in involuntary waits due to virtualization.
>+ * Value is cumulative, in nanoseconds, without a corresponding count
>+ * and wraps around to zero silently on overflow
>+ */
>+ __u64 cpu_run_real_total;
>+
>+ /* cpu "virtual" running time
>+ * Uses time intervals seen by the kernel i.e. no adjustment
>+ * for kernel's involuntary waits due to virtualization.
>+ * Value is cumulative, in nanoseconds, without a corresponding count
>+ * and wraps around to zero silently on overflow
>+ */
>+ __u64 cpu_run_virtual_total;
>+ /* Delay accounting fields end */
>+ /* version 1 ends here */
>+
>+ /* version of taskstats */
>+ __u64 version;
>

Could you place the common field "version" before any acct subsystem
specific fields?

As a matter of fact, we do not need
'filler_avoids_empty_struct_warnings' in [patch 5/8] taskstats
interface. Replacing that field with "version" would be great!

Thanks,
- jay


>
>- int filler_avoids_empty_struct_warnings;
> };
>
>
>

2006-05-04 04:26:42

by Balbir Singh

[permalink] [raw]
Subject: Re: [Lse-tech] [Patch 6/8] delay accounting usage of taskstats interface

On Wed, May 03, 2006 at 07:13:54PM -0700, Jay Lan wrote:
> Balbir Singh wrote:
> >Changelog
> >
> >Fixes suggested by Jay Lan
> >- check for tidstats before taking the mutex_lock in taskstats_exit_send()
> >- add back version information for struct taskstats
> >
> ><clip>
> >
> > struct taskstats {
> >
> >- /* Version 1 */
> >+ /* Delay accounting fields start
> >+ *
> >+ * All values, until comment "Delay accounting fields end" are
> >+ * available only if delay accounting is enabled, even though the last
> >+ * few fields are not delays
> >+ *
> >+ * xxx_count is the number of delay values recorded
> >+ * xxx_delay_total is the corresponding cumulative delay in nanoseconds
> >+ *
> >+ * xxx_delay_total wraps around to zero on overflow
> >+ * xxx_count incremented regardless of overflow
> >+ */
> >+
> >+ /* Delay waiting for cpu, while runnable
> >+ * count, delay_total NOT updated atomically
> >+ */
> >+ __u64 cpu_count;
> >+ __u64 cpu_delay_total;
> >+
> >+ /* Following four fields atomically updated using task->delays->lock */
> >+
> >+ /* Delay waiting for synchronous block I/O to complete
> >+ * does not account for delays in I/O submission
> >+ */
> >+ __u64 blkio_count;
> >+ __u64 blkio_delay_total;
> >+
> >+ /* Delay waiting for page fault I/O (swap in only) */
> >+ __u64 swapin_count;
> >+ __u64 swapin_delay_total;
> >+
> >+ /* cpu "wall-clock" running time
> >+ * On some architectures, value will adjust for cpu time stolen
> >+ * from the kernel in involuntary waits due to virtualization.
> >+ * Value is cumulative, in nanoseconds, without a corresponding count
> >+ * and wraps around to zero silently on overflow
> >+ */
> >+ __u64 cpu_run_real_total;
> >+
> >+ /* cpu "virtual" running time
> >+ * Uses time intervals seen by the kernel i.e. no adjustment
> >+ * for kernel's involuntary waits due to virtualization.
> >+ * Value is cumulative, in nanoseconds, without a corresponding count
> >+ * and wraps around to zero silently on overflow
> >+ */
> >+ __u64 cpu_run_virtual_total;
> >+ /* Delay accounting fields end */
> >+ /* version 1 ends here */
> >+
> >+ /* version of taskstats */
> >+ __u64 version;
> >
>
> Could you place the common field "version" before any acct subsystem
> specific fields?
>
> As a matter of fact, we do not need
> 'filler_avoids_empty_struct_warnings' in [patch 5/8] taskstats
> interface. Replacing that field with "version" would be great!

Yes, I thought about it and wanted to put it upfront. To maintain compatibility
with the previous post, I decided to add it to the end.

If putting the version right up helps the readability of taskstats.h,
I can make the changes and repost the patches again.

Thanks for your review,
Balbir

2006-05-04 18:47:57

by Balbir Singh

[permalink] [raw]
Subject: [updated] [Patch 6/8] delay accounting usage of taskstats interface


Changelog

Fixes suggested by Jay Lan
- check for tidstats before taking the mutex_lock in taskstats_exit_send()
- add back and fill version information for struct taskstats

Fixes comments by akpm (on earlier patch now incorporated here)
- detailed comments on atomicity rules of accounting fields
- replace use of nsec_t

delayacct-taskstats.patch

Usage of taskstats interface by delay accounting.

Signed-off-by: Shailabh Nagar <[email protected]>
Signed-off-by: Balbir Singh <[email protected]>
---

include/linux/delayacct.h | 13 ++++++++++++
include/linux/taskstats.h | 49 ++++++++++++++++++++++++++++++++++++++++++++++
init/Kconfig | 1
kernel/delayacct.c | 42 +++++++++++++++++++++++++++++++++++++++
kernel/taskstats.c | 12 ++++++++++-
5 files changed, 116 insertions(+), 1 deletion(-)

diff -puN include/linux/delayacct.h~delayacct-taskstats include/linux/delayacct.h
--- linux-2.6.17-rc3/include/linux/delayacct.h~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/delayacct.h 2006-05-04 11:26:18.000000000 +0530
@@ -18,6 +18,7 @@
#define _LINUX_DELAYACCT_H

#include <linux/sched.h>
+#include <linux/taskstats_kern.h>

/*
* Per-task flags relevant to delay accounting
@@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct
extern void __delayacct_tsk_exit(struct task_struct *);
extern void __delayacct_blkio_start(void);
extern void __delayacct_blkio_end(void);
+extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);

static inline void delayacct_set_flag(int flag)
{
@@ -74,6 +76,14 @@ static inline void delayacct_blkio_end(v
__delayacct_blkio_end();
}

+static inline int delayacct_add_tsk(struct taskstats *d,
+ struct task_struct *tsk)
+{
+ if (!tsk->delays)
+ return -EINVAL;
+ return __delayacct_add_tsk(d, tsk);
+}
+
#else
static inline void delayacct_set_flag(int flag)
{}
@@ -89,6 +99,9 @@ static inline void delayacct_blkio_start
{}
static inline void delayacct_blkio_end(void)
{}
+static inline int delayacct_add_tsk(struct taskstats *d,
+ struct task_struct *tsk)
+{ return 0; }
#endif /* CONFIG_TASK_DELAY_ACCT */

#endif
diff -puN include/linux/taskstats.h~delayacct-taskstats include/linux/taskstats.h
--- linux-2.6.17-rc3/include/linux/taskstats.h~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/taskstats.h 2006-05-04 09:34:11.000000000 +0530
@@ -35,6 +35,55 @@ struct taskstats {

/* Version 1 */
__u64 version;
+
+ /* Delay accounting fields start
+ *
+ * All values, until comment "Delay accounting fields end" are
+ * available only if delay accounting is enabled, even though the last
+ * few fields are not delays
+ *
+ * xxx_count is the number of delay values recorded
+ * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+ *
+ * xxx_delay_total wraps around to zero on overflow
+ * xxx_count incremented regardless of overflow
+ */
+
+ /* Delay waiting for cpu, while runnable
+ * count, delay_total NOT updated atomically
+ */
+ __u64 cpu_count;
+ __u64 cpu_delay_total;
+
+ /* Following four fields atomically updated using task->delays->lock */
+
+ /* Delay waiting for synchronous block I/O to complete
+ * does not account for delays in I/O submission
+ */
+ __u64 blkio_count;
+ __u64 blkio_delay_total;
+
+ /* Delay waiting for page fault I/O (swap in only) */
+ __u64 swapin_count;
+ __u64 swapin_delay_total;
+
+ /* cpu "wall-clock" running time
+ * On some architectures, value will adjust for cpu time stolen
+ * from the kernel in involuntary waits due to virtualization.
+ * Value is cumulative, in nanoseconds, without a corresponding count
+ * and wraps around to zero silently on overflow
+ */
+ __u64 cpu_run_real_total;
+
+ /* cpu "virtual" running time
+ * Uses time intervals seen by the kernel i.e. no adjustment
+ * for kernel's involuntary waits due to virtualization.
+ * Value is cumulative, in nanoseconds, without a corresponding count
+ * and wraps around to zero silently on overflow
+ */
+ __u64 cpu_run_virtual_total;
+ /* Delay accounting fields end */
+ /* version 1 ends here */
};


diff -puN init/Kconfig~delayacct-taskstats init/Kconfig
--- linux-2.6.17-rc3/init/Kconfig~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530
+++ linux-2.6.17-rc3-balbir/init/Kconfig 2006-05-04 09:31:59.000000000 +0530
@@ -164,6 +164,7 @@ config TASKSTATS

config TASK_DELAY_ACCT
bool "Enable per-task delay accounting (EXPERIMENTAL)"
+ depends on TASKSTATS
help
Collect information on time spent by a task waiting for system
resources like cpu, synchronous block I/O completion and swapping
diff -puN kernel/delayacct.c~delayacct-taskstats kernel/delayacct.c
--- linux-2.6.17-rc3/kernel/delayacct.c~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/delayacct.c 2006-05-04 11:26:18.000000000 +0530
@@ -104,3 +104,45 @@ void __delayacct_blkio_end(void)
&current->delays->blkio_delay,
&current->delays->blkio_count);
}
+
+int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
+{
+ s64 tmp;
+ struct timespec ts;
+ unsigned long t1,t2,t3;
+
+ tmp = (s64)d->cpu_run_real_total;
+ tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC;
+ d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
+
+ /*
+ * No locking available for sched_info (and too expensive to add one)
+ * Mitigate by taking snapshot of values
+ */
+ t1 = tsk->sched_info.pcnt;
+ t2 = tsk->sched_info.run_delay;
+ t3 = tsk->sched_info.cpu_time;
+
+ d->cpu_count += t1;
+
+ jiffies_to_timespec(t2, &ts);
+ tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
+ d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
+
+ tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
+ d->cpu_run_virtual_total =
+ (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
+
+ /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
+
+ spin_lock(&tsk->delays->lock);
+ tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
+ d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
+ tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
+ d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+ d->blkio_count += tsk->delays->blkio_count;
+ d->swapin_count += tsk->delays->swapin_count;
+ spin_unlock(&tsk->delays->lock);
+
+ return 0;
+}
diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c
--- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/taskstats.c 2006-05-04 11:27:53.000000000 +0530
@@ -18,6 +18,7 @@

#include <linux/kernel.h>
#include <linux/taskstats_kern.h>
+#include <linux/delayacct.h>
#include <net/genetlink.h>
#include <asm/atomic.h>

@@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta
* goto err;
*/

-err:
+ rc = delayacct_add_tsk(stats, tsk);
+ stats->version = TASKSTATS_VERSION;
+
+ /* Define err: label here if needed */
put_task_struct(tsk);
return rc;

@@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct
* break;
*/

+ rc = delayacct_add_tsk(stats, tsk);
+ if (rc)
+ break;
+
} while_each_thread(first, tsk);
read_unlock(&tasklist_lock);
+ stats->version = TASKSTATS_VERSION;
+

/*
* Accounting subsytems can also add calls here if they don't
_

2006-05-09 09:46:31

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [updated] [Patch 6/8] delay accounting usage of taskstats interface

On Fri, 2006-05-05 at 00:14 +0530, Balbir Singh wrote:

> #else
> static inline void delayacct_set_flag(int flag)
> {}

Please make this

static inline void delayacct_set_flag(int flag) { }

Same in all other files

> @@ -89,6 +99,9 @@ static inline void delayacct_blkio_start
> {}
> static inline void delayacct_blkio_end(void)
> {}
> +static inline int delayacct_add_tsk(struct taskstats *d,
> + struct task_struct *tsk)
> +{ return 0; }

{
return 0;
}

> + *
> + * xxx_count is the number of delay values recorded
> + * xxx_delay_total is the corresponding cumulative delay in nanoseconds
> + *
> + * xxx_delay_total wraps around to zero on overflow
> + * xxx_count incremented regardless of overflow
> + */

Please use a structure for that.

struct delay_account {
ktime_t delay;
u64 count;
};

Also instead of having tons of fields added, please use something like

enum {
CPU_ACCT,
BLKIO_ACCT,
....,
MAX_ACCT_TYPES
};
struct delay_account stats[MAX_ACCT_TYPES];


> + /* Delay waiting for cpu, while runnable
> + * count, delay_total NOT updated atomically
> + */
> + __u64 cpu_count;
> + __u64 cpu_delay_total;
> + /* Following four fields atomically updated using task->delays->lock */
> +
> + /* Delay waiting for synchronous block I/O to complete
> + * does not account for delays in I/O submission
> + */
> + __u64 blkio_count;
> + __u64 blkio_delay_total;
> +
> + /* Delay waiting for page fault I/O (swap in only) */
> + __u64 swapin_count;
> + __u64 swapin_delay_total;
> +
> + /* cpu "wall-clock" running time
> + * On some architectures, value will adjust for cpu time stolen
> + * from the kernel in involuntary waits due to virtualization.
> + * Value is cumulative, in nanoseconds, without a corresponding count
> + * and wraps around to zero silently on overflow

When will this overflow happen ? 2^64 nsec ~= 584 years


> +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
> +{

Please add some comment what this code does.

> + s64 tmp;
> + struct timespec ts;
> + unsigned long t1,t2,t3;
> +
> + tmp = (s64)d->cpu_run_real_total;
> + tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC;
> + d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
> +
> + /*
> + * No locking available for sched_info (and too expensive to add one)
> + * Mitigate by taking snapshot of values
> + */
> + t1 = tsk->sched_info.pcnt;
> + t2 = tsk->sched_info.run_delay;
> + t3 = tsk->sched_info.cpu_time;
> +
> + d->cpu_count += t1;
> +
> + jiffies_to_timespec(t2, &ts);
> + tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
> + d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
> +
> + tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
> + d->cpu_run_virtual_total =
> + (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
> +
> + /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
> +
> + spin_lock(&tsk->delays->lock);
> + tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
> + d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;

I really do not get whatfor all these comparisons are. How can those
values get > 2^63 within a reasonable timeframe ?

> + tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
> + d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
> + d->blkio_count += tsk->delays->blkio_count;
> + d->swapin_count += tsk->delays->swapin_count;
> + spin_unlock(&tsk->delays->lock);
> + return 0;
> +}
> diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c
> --- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530
> +++ linux-2.6.17-rc3-balbir/kernel/taskstats.c 2006-05-04 11:27:53.000000000 +0530
> @@ -18,6 +18,7 @@
>
> #include <linux/kernel.h>
> #include <linux/taskstats_kern.h>
> +#include <linux/delayacct.h>
> #include <net/genetlink.h>
> #include <asm/atomic.h>
>
> @@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta
> * goto err;
> */
>
> -err:
> + rc = delayacct_add_tsk(stats, tsk);
> + stats->version = TASKSTATS_VERSION;
> +
> + /* Define err: label here if needed */

Please remove that comment. The err label goes to a place where it makes
sense.

> put_task_struct(tsk);
> return rc;
>
> @@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct
> * break;
> */
>
> + rc = delayacct_add_tsk(stats, tsk);
> + if (rc)
> + break;
> +
> } while_each_thread(first, tsk);
> read_unlock(&tasklist_lock);
> + stats->version = TASKSTATS_VERSION;
> +
>
> /*
> * Accounting subsytems can also add calls here if they don't
> _
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2006-05-09 17:15:05

by Balbir Singh

[permalink] [raw]
Subject: Re: [updated] [Patch 6/8] delay accounting usage of taskstats interface

On Tue, May 09, 2006 at 11:46:48AM +0000, Thomas Gleixner wrote:
> On Fri, 2006-05-05 at 00:14 +0530, Balbir Singh wrote:
>
> > #else
> > static inline void delayacct_set_flag(int flag)
> > {}
>
> Please make this
>
> static inline void delayacct_set_flag(int flag) { }
>
> Same in all other files

Will do.

>
> > @@ -89,6 +99,9 @@ static inline void delayacct_blkio_start
> > {}
> > static inline void delayacct_blkio_end(void)
> > {}
> > +static inline int delayacct_add_tsk(struct taskstats *d,
> > + struct task_struct *tsk)
> > +{ return 0; }
>
> {
> return 0;
> }
>
> > + *
> > + * xxx_count is the number of delay values recorded
> > + * xxx_delay_total is the corresponding cumulative delay in nanoseconds
> > + *
> > + * xxx_delay_total wraps around to zero on overflow
> > + * xxx_count incremented regardless of overflow
> > + */
>
> Please use a structure for that.
>
> struct delay_account {
> ktime_t delay;
> u64 count;
> };
>
> Also instead of having tons of fields added, please use something like
>
> enum {
> CPU_ACCT,
> BLKIO_ACCT,
> ....,
> MAX_ACCT_TYPES
> };
> struct delay_account stats[MAX_ACCT_TYPES];
>

Yes, this would make sense if all the fields were of type delay_account.
currently, only CPU, BLKIO and SWAPIN are of that type. The new
fields that will be added to this structure (since this is a part of
a statistics interface now) could potentially belong to other subsystems.

>
> > + /* Delay waiting for cpu, while runnable
> > + * count, delay_total NOT updated atomically
> > + */
> > + __u64 cpu_count;
> > + __u64 cpu_delay_total;
> > + /* Following four fields atomically updated using task->delays->lock */
> > +
> > + /* Delay waiting for synchronous block I/O to complete
> > + * does not account for delays in I/O submission
> > + */
> > + __u64 blkio_count;
> > + __u64 blkio_delay_total;
> > +
> > + /* Delay waiting for page fault I/O (swap in only) */
> > + __u64 swapin_count;
> > + __u64 swapin_delay_total;
> > +
> > + /* cpu "wall-clock" running time
> > + * On some architectures, value will adjust for cpu time stolen
> > + * from the kernel in involuntary waits due to virtualization.
> > + * Value is cumulative, in nanoseconds, without a corresponding count
> > + * and wraps around to zero silently on overflow
>
> When will this overflow happen ? 2^64 nsec ~= 584 years
>

Detecting a potential overflow is not a bad thing IMHO. Currently
the existing data type makes this overflow very rare.

>
> > +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
> > +{
>
> Please add some comment what this code does.

Will do

>
> > + s64 tmp;
> > + struct timespec ts;
> > + unsigned long t1,t2,t3;
> > +
> > + tmp = (s64)d->cpu_run_real_total;
> > + tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC;
> > + d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
> > +
> > + /*
> > + * No locking available for sched_info (and too expensive to add one)
> > + * Mitigate by taking snapshot of values
> > + */
> > + t1 = tsk->sched_info.pcnt;
> > + t2 = tsk->sched_info.run_delay;
> > + t3 = tsk->sched_info.cpu_time;
> > +
> > + d->cpu_count += t1;
> > +
> > + jiffies_to_timespec(t2, &ts);
> > + tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
> > + d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
> > +
> > + tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
> > + d->cpu_run_virtual_total =
> > + (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
> > +
> > + /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
> > +
> > + spin_lock(&tsk->delays->lock);
> > + tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
> > + d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
>
> I really do not get whatfor all these comparisons are. How can those
> values get > 2^63 within a reasonable timeframe ?

Its defensive programming, granted that it is probably not required.

>
> > + tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
> > + d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
> > + d->blkio_count += tsk->delays->blkio_count;
> > + d->swapin_count += tsk->delays->swapin_count;
> > + spin_unlock(&tsk->delays->lock);
> > + return 0;
> > +}
> > diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c
> > --- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530
> > +++ linux-2.6.17-rc3-balbir/kernel/taskstats.c 2006-05-04 11:27:53.000000000 +0530
> > @@ -18,6 +18,7 @@
> >
> > #include <linux/kernel.h>
> > #include <linux/taskstats_kern.h>
> > +#include <linux/delayacct.h>
> > #include <net/genetlink.h>
> > #include <asm/atomic.h>
> >
> > @@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta
> > * goto err;
> > */
> >
> > -err:
> > + rc = delayacct_add_tsk(stats, tsk);
> > + stats->version = TASKSTATS_VERSION;
> > +
> > + /* Define err: label here if needed */
>
> Please remove that comment. The err label goes to a place where it makes
> sense.

Will fix this.

>
> > put_task_struct(tsk);
> > return rc;
> >
> > @@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct
> > * break;
> > */
> >
> > + rc = delayacct_add_tsk(stats, tsk);
> > + if (rc)
> > + break;
> > +
> > } while_each_thread(first, tsk);
> > read_unlock(&tasklist_lock);
> > + stats->version = TASKSTATS_VERSION;
> > +
> >
> > /*
> > * Accounting subsytems can also add calls here if they don't
> > _
> > -
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at http://www.tux.org/lkml/


Thanks for your review,
Balbir Singh,
Linux Technology Center,
IBM Software Labs