Index: linux/include/linux/acct.h
===================================================================
--- linux.orig/include/linux/acct.h 2006-08-02 18:39:58.000000000 -0700
+++ linux/include/linux/acct.h 2006-08-02 18:42:33.257363761 -0700
@@ -124,16 +124,12 @@ extern void acct_auto_close(struct super
extern void acct_init_pacct(struct pacct_struct *pacct);
extern void acct_collect(long exitcode, int group_dead);
extern void acct_process(void);
-extern void acct_update_integrals(struct task_struct *tsk);
-extern void acct_clear_integrals(struct task_struct *tsk);
#else
#define acct_auto_close_mnt(x) do { } while (0)
#define acct_auto_close(x) do { } while (0)
#define acct_init_pacct(x) do { } while (0)
#define acct_collect(x,y) do { } while (0)
#define acct_process() do { } while (0)
-#define acct_update_integrals(x) do { } while (0)
-#define acct_clear_integrals(task) do { } while (0)
#endif
/*
Index: linux/kernel/acct.c
===================================================================
--- linux.orig/kernel/acct.c 2006-08-02 18:39:58.000000000 -0700
+++ linux/kernel/acct.c 2006-08-02 18:42:33.261363809 -0700
@@ -598,33 +598,3 @@ void acct_process(void)
do_acct_process(file);
fput(file);
}
-
-
-/**
- * acct_update_integrals - update mm integral fields in task_struct
- * @tsk: task_struct for accounting
- */
-void acct_update_integrals(struct task_struct *tsk)
-{
- if (likely(tsk->mm)) {
- long delta =
- cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
-
- if (delta == 0)
- return;
- tsk->acct_stimexpd = tsk->stime;
- tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
- tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
- }
-}
-
-/**
- * acct_clear_integrals - clear the mm integral fields in task_struct
- * @tsk: task_struct whose accounting fields are cleared
- */
-void acct_clear_integrals(struct task_struct *tsk)
-{
- tsk->acct_stimexpd = 0;
- tsk->acct_rss_mem1 = 0;
- tsk->acct_vm_mem1 = 0;
-}
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h 2006-08-02 18:39:58.000000000 -0700
+++ linux/include/linux/sched.h 2006-08-02 18:42:33.261363809 -0700
@@ -964,7 +964,7 @@ struct task_struct {
wait_queue_t *io_wait;
/* i/o counters(bytes read/written, #syscalls */
u64 rchar, wchar, syscr, syscw;
-#if defined(CONFIG_BSD_PROCESS_ACCT)
+#if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1; /* accumulated rss usage */
u64 acct_vm_mem1; /* accumulated virtual memory usage */
clock_t acct_stimexpd; /* clock_t-converted stime since last update */
Index: linux/fs/compat.c
===================================================================
--- linux.orig/fs/compat.c 2006-08-02 18:39:58.000000000 -0700
+++ linux/fs/compat.c 2006-08-02 18:42:33.265363858 -0700
@@ -44,7 +44,7 @@
#include <linux/nfsd/syscall.h>
#include <linux/personality.h>
#include <linux/rwsem.h>
-#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
#include <linux/mm.h>
#include <net/sock.h> /* siocdevprivate_ioctl */
Index: linux/fs/exec.c
===================================================================
--- linux.orig/fs/exec.c 2006-08-02 18:39:58.000000000 -0700
+++ linux/fs/exec.c 2006-08-02 18:42:33.265363858 -0700
@@ -46,7 +46,7 @@
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/rmap.h>
-#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/audit.h>
Index: linux/kernel/exit.c
===================================================================
--- linux.orig/kernel/exit.c 2006-08-02 18:39:58.000000000 -0700
+++ linux/kernel/exit.c 2006-08-02 18:42:33.269363906 -0700
@@ -18,6 +18,7 @@
#include <linux/security.h>
#include <linux/cpu.h>
#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
#include <linux/file.h>
#include <linux/binfmts.h>
#include <linux/ptrace.h>
Index: linux/kernel/fork.c
===================================================================
--- linux.orig/kernel/fork.c 2006-08-02 18:39:58.000000000 -0700
+++ linux/kernel/fork.c 2006-08-02 18:42:33.269363906 -0700
@@ -42,6 +42,7 @@
#include <linux/profile.h>
#include <linux/rmap.h>
#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/delayacct.h>
#include <linux/taskstats_kern.h>
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c 2006-08-02 18:39:58.000000000 -0700
+++ linux/kernel/sched.c 2006-08-02 18:42:33.273363954 -0700
@@ -49,7 +49,7 @@
#include <linux/seq_file.h>
#include <linux/syscalls.h>
#include <linux/times.h>
-#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
#include <linux/kprobes.h>
#include <linux/delayacct.h>
#include <asm/tlb.h>
Index: linux/include/linux/tsacct_kern.h
===================================================================
--- linux.orig/include/linux/tsacct_kern.h 2006-08-02 18:39:58.000000000 -0700
+++ linux/include/linux/tsacct_kern.h 2006-08-02 18:42:33.273363954 -0700
@@ -18,9 +18,15 @@ static inline void bacct_add_tsk(struct
#ifdef CONFIG_TASK_XACCT
extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
+extern void acct_update_integrals(struct task_struct *tsk);
+extern void acct_clear_integrals(struct task_struct *tsk);
#else
static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
{}
+static inline void acct_update_integrals(struct task_struct *tsk)
+{}
+static inline void acct_clear_integrals(struct task_struct *tsk)
+{}
#endif /* CONFIG_TASK_XACCT */
#endif
Index: linux/kernel/tsacct.c
===================================================================
--- linux.orig/kernel/tsacct.c 2006-08-02 18:42:06.000000000 -0700
+++ linux/kernel/tsacct.c 2006-08-02 18:46:45.232406831 -0700
@@ -85,4 +85,34 @@ void xacct_add_tsk(struct taskstats *sta
stats->read_syscalls = p->syscr;
stats->write_syscalls = p->syscw;
}
+
+
+/**
+ * acct_update_integrals - update mm integral fields in task_struct
+ * @tsk: task_struct for accounting
+ */
+void acct_update_integrals(struct task_struct *tsk)
+{
+ if (likely(tsk->mm)) {
+ long delta =
+ cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
+
+ if (delta == 0)
+ return;
+ tsk->acct_stimexpd = tsk->stime;
+ tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
+ tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
+ }
+}
+
+/**
+ * acct_clear_integrals - clear the mm integral fields in task_struct
+ * @tsk: task_struct whose accounting fields are cleared
+ */
+void acct_clear_integrals(struct task_struct *tsk)
+{
+ tsk->acct_stimexpd = 0;
+ tsk->acct_rss_mem1 = 0;
+ tsk->acct_vm_mem1 = 0;
+}
#endif
On Wed, 02 Aug 2006 21:23:35 -0700
Jay Lan <[email protected]> wrote:
> +/**
> + * acct_update_integrals - update mm integral fields in task_struct
> + * @tsk: task_struct for accounting
> + */
> +void acct_update_integrals(struct task_struct *tsk)
> +{
> + if (likely(tsk->mm)) {
> + long delta =
> + cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
If a 32 architecture chooses to implement a 64-bit cputime_t, this
expression might go wrong for very long-running tasks and high HZ.
Perhaps we should do all this in terms of cputime_t and export everything
to userspace as u64?
> + if (delta == 0)
> + return;
> + tsk->acct_stimexpd = tsk->stime;
> + tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
> + tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
It's a bit weird to be multiplying RSS by time. What unit is a "byte
second"?
If this is not a bug then I guess this is an intermediate term for
additional downstream processing. There is information loss here and I'd
have thought that it would be better to simply send `delta' and the rss
straight to userspace, let userspace work out what math it wants to perform
on it. If that makes sense?
I see that the code has been like this for a long time, so treat this as a
"please educate me about BSD accounting" email ;)
Andrew Morton wrote:
> On Wed, 02 Aug 2006 21:23:35 -0700
> Jay Lan <[email protected]> wrote:
>
>
>>+/**
>>+ * acct_update_integrals - update mm integral fields in task_struct
>>+ * @tsk: task_struct for accounting
>>+ */
>>+void acct_update_integrals(struct task_struct *tsk)
>>+{
>>+ if (likely(tsk->mm)) {
>>+ long delta =
>>+ cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
>
>
> If a 32 architecture chooses to implement a 64-bit cputime_t, this
> expression might go wrong for very long-running tasks and high HZ.
>
> Perhaps we should do all this in terms of cputime_t and export everything
> to userspace as u64?
Andrew,
We export to userspace the acct_rss_mem1 and acct_vm_mem1, both as u64.
The above logic is to calculate stime delta since last update. Note that
acc_update_integrals() is invoked at do_execve, do_exit, _AND_ at
account_system_time, which is called every jiffy by timer interrupt
handler.
The tsk->acct_stimexpd is used to save the tsk->stime of last update.
It should be changed to cputime_t as well. I will include the fix in
my next fix patch.
>
>
>>+ if (delta == 0)
>>+ return;
>>+ tsk->acct_stimexpd = tsk->stime;
>>+ tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
>>+ tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
>
>
> It's a bit weird to be multiplying RSS by time. What unit is a "byte
> second"?
>
> If this is not a bug then I guess this is an intermediate term for
> additional downstream processing. There is information loss here and I'd
> have thought that it would be better to simply send `delta' and the rss
> straight to userspace, let userspace work out what math it wants to perform
> on it. If that makes sense?
>
> I see that the code has been like this for a long time, so treat this as a
> "please educate me about BSD accounting" email ;)
This is not a BSD accounting thing. It came from UNICOS and IRIX.
I am pinging the person who knows how the real world users use these
two fields...
Regards,
- jay
>
Jay Lan wrote:
> Andrew Morton wrote:
>
[snip]
>>
>>> + if (delta == 0)
>>> + return;
>>> + tsk->acct_stimexpd = tsk->stime;
>>> + tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
>>> + tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
>>
>>
>>
>> It's a bit weird to be multiplying RSS by time. What unit is a "byte
>> second"?
>>
>> If this is not a bug then I guess this is an intermediate term for
>> additional downstream processing. There is information loss here and I'd
>> have thought that it would be better to simply send `delta' and the rss
>> straight to userspace, let userspace work out what math it wants to
>> perform
>> on it. If that makes sense?
>>
>> I see that the code has been like this for a long time, so treat this
>> as a
>> "please educate me about BSD accounting" email ;)
>
>
> This is not a BSD accounting thing. It came from UNICOS and IRIX.
> I am pinging the person who knows how the real world users use these
> two fields...
Andrew,
Here is the explanation i owe you.
We accumulated the RSS/VM value at each timer interrupt update in terms
of pages-tick. At userland, the value is divided by tsk->stime (in usec)
to gain average usage of RSS/VM.
I need to do a little bit more processing in the kernel to convert
the pages-tick values to Mbytes-usec unit before delivery to userland
since the calculation are platform dependent. I will include the
change in the upcoming update patch.
Regards,
- jay
>
> Regards,
> - jay
>
>>
>
>