Sometimes, application responses become bad under heavy memory load.
Applications take a bit time to reclaim memory.
The statistics, how long memory reclaim takes, will be useful to
measure memory usage.
This patch adds accounting memory reclaim to per-task-delay-accounting
for accounting the time of try_to_free_pages().
<i.e>
- When System is under low memory load,
memory reclaim may not occur.
$ free
total used free shared buffers cached
Mem: 8197800 1577300 6620500 0 4808 1516724
-/+ buffers/cache: 55768 8142032
Swap: 16386292 0 16386292
$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
r b swpd free buff cache si so bi bo in cs us sy id wa
0 0 0 5069748 10612 3014060 0 0 0 0 3 26 0 0 100 0
0 0 0 5069748 10612 3014060 0 0 0 0 4 22 0 0 100 0
0 0 0 5069748 10612 3014060 0 0 0 0 3 18 0 0 100 0
Measure the time of tar command.
$ ls -s test.dat
1501472 test.dat
$ time tar cvf test.tar test.dat
real 0m13.388s
user 0m0.116s
sys 0m5.304s
$ ./delayget -d -p <pid>
CPU count real total virtual total delay total
428 5528345500 5477116080 62749891
IO count delay total
338 8078977189
SWAP count delay total
0 0
RECLAIM count delay total
0 0
- When system is under heavy memory load
memory reclaim may occur.
$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
r b swpd free buff cache si so bi bo in cs us sy id wa
0 0 7159032 49724 1812 3012 0 0 0 0 3 24 0 0 100 0
0 0 7159032 49724 1812 3012 0 0 0 0 4 24 0 0 100 0
0 0 7159032 49848 1812 3012 0 0 0 0 3 22 0 0 100 0
In this case, one process uses more 8G memory
by execution of malloc() and memset().
$ time tar cvf test.tar test.dat
real 1m38.563s <- increased by 85 sec
user 0m0.140s
sys 0m7.060s
$ ./delayget -d -p <pid>
CPU count real total virtual total delay total
9021 7140446250 7315277975 923201824
IO count delay total
8965 90466349669
SWAP count delay total
3 21036367
RECLAIM count delay total
740 61011951153
In the later case, the value of RECLAIM is increasing.
So, taskstats can show how much memory reclaim influences TAT.
Signed-off-by: Keika Kobayashi <[email protected]>
---
include/linux/delayacct.h | 19 +++++++++++++++++++
include/linux/sched.h | 4 ++++
kernel/delayacct.c | 13 +++++++++++++
mm/page_alloc.c | 3 +++
4 files changed, 39 insertions(+), 0 deletions(-)
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index ab94bc0..f352f06 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -39,6 +39,8 @@ extern void __delayacct_blkio_start(void);
extern void __delayacct_blkio_end(void);
extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
extern __u64 __delayacct_blkio_ticks(struct task_struct *);
+extern void __delayacct_freepages_start(void);
+extern void __delayacct_freepages_end(void);
static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{
@@ -107,6 +109,18 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
return 0;
}
+static inline void delayacct_freepages_start(void)
+{
+ if (current->delays)
+ __delayacct_freepages_start();
+}
+
+static inline void delayacct_freepages_end(void)
+{
+ if (current->delays)
+ __delayacct_freepages_end();
+}
+
#else
static inline void delayacct_set_flag(int flag)
{}
@@ -129,6 +143,11 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
{ return 0; }
static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{ return 0; }
+static inline void delayacct_freepages_start(void)
+{}
+static inline void delayacct_freepages_end(void)
+{}
+
#endif /* CONFIG_TASK_DELAY_ACCT */
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3e05e54..e6c557c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -666,6 +666,10 @@ struct task_delay_info {
/* io operations performed */
u32 swapin_count; /* total count of the number of swapin block */
/* io operations performed */
+
+ struct timespec freepages_start, freepages_end;
+ u64 freepages_delay; /* wait for memory reclaim */
+ u32 freepages_count; /* total count of memory reclaim */
};
#endif /* CONFIG_TASK_DELAY_ACCT */
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 10e43fd..84b6782 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -165,3 +165,16 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
return ret;
}
+void __delayacct_freepages_start(void)
+{
+ delayacct_start(¤t->delays->freepages_start);
+}
+
+void __delayacct_freepages_end(void)
+{
+ delayacct_end(¤t->delays->freepages_start,
+ ¤t->delays->freepages_end,
+ ¤t->delays->freepages_delay,
+ ¤t->delays->freepages_count);
+}
+
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8e83f02..8b4e606 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -46,6 +46,7 @@
#include <linux/page-isolation.h>
#include <linux/memcontrol.h>
#include <linux/debugobjects.h>
+#include <linux/delayacct.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -1571,7 +1572,9 @@ nofail_alloc:
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
+ delayacct_freepages_start();
did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+ delayacct_freepages_end();
p->reclaim_state = NULL;
p->flags &= ~PF_MEMALLOC;
--
1.5.0.6
Signed-off-by: Keika Kobayashi <[email protected]>
---
include/linux/taskstats.h | 4 ++++
kernel/delayacct.c | 3 +++
2 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 5d69c07..87aae21 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -81,6 +81,10 @@ struct taskstats {
__u64 swapin_count;
__u64 swapin_delay_total;
+ /* Delay waiting for memory reclaim */
+ __u64 freepages_count;
+ __u64 freepages_delay_total;
+
/* cpu "wall-clock" running time
* On some architectures, value will adjust for cpu time stolen
* from the kernel in involuntary waits due to virtualization.
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 84b6782..b3179da 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -145,8 +145,11 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+ tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
+ d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
+ d->freepages_count += tsk->delays->freepages_count;
spin_unlock_irqrestore(&tsk->delays->lock, flags);
done:
--
1.5.0.6
Update document and
make getdelays.c show delay accounting of memory reclaim.
For making a distinction between "swapping in pages" and "memory reclaim"
in getdelays.c, MEM is changed to SWAP.
Signed-off-by: Keika Kobayashi <[email protected]>
---
Documentation/accounting/delay-accounting.txt | 11 ++++++++---
Documentation/accounting/getdelays.c | 8 ++++++--
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt
index 1443cd7..8a12f07 100644
--- a/Documentation/accounting/delay-accounting.txt
+++ b/Documentation/accounting/delay-accounting.txt
@@ -11,6 +11,7 @@ the delays experienced by a task while
a) waiting for a CPU (while being runnable)
b) completion of synchronous block I/O initiated by the task
c) swapping in pages
+d) memory reclaim
and makes these statistics available to userspace through
the taskstats interface.
@@ -41,7 +42,7 @@ this structure. See
include/linux/taskstats.h
for a description of the fields pertaining to delay accounting.
It will generally be in the form of counters returning the cumulative
-delay seen for cpu, sync block I/O, swapin etc.
+delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
Taking the difference of two successive readings of a given
counter (say cpu_delay_total) for a task will give the delay
@@ -94,7 +95,9 @@ CPU count real total virtual total delay total
7876 92005750 100000000 24001500
IO count delay total
0 0
-MEM count delay total
+SWAP count delay total
+ 0 0
+RECLAIM count delay total
0 0
Get delays seen in executing a given simple command
@@ -108,5 +111,7 @@ CPU count real total virtual total delay total
6 4000250 4000000 0
IO count delay total
0 0
-MEM count delay total
+SWAP count delay total
+ 0 0
+RECLAIM count delay total
0 0
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index 40121b5..3f7755f 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -196,14 +196,18 @@ void print_delayacct(struct taskstats *t)
" %15llu%15llu%15llu%15llu\n"
"IO %15s%15s\n"
" %15llu%15llu\n"
- "MEM %15s%15s\n"
+ "SWAP %15s%15s\n"
+ " %15llu%15llu\n"
+ "RECLAIM %12s%15s\n"
" %15llu%15llu\n",
"count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total,
"count", "delay total",
t->blkio_count, t->blkio_delay_total,
- "count", "delay total", t->swapin_count, t->swapin_delay_total);
+ "count", "delay total", t->swapin_count, t->swapin_delay_total,
+ "count", "delay total",
+ t->freepages_count, t->freepages_delay_total);
}
void task_context_switch_counts(struct taskstats *t)
--
1.5.0.6
Signed-off-by: Keika Kobayashi <[email protected]>
---
fs/proc/array.c | 3 ++-
include/linux/delayacct.h | 10 ++++++++++
kernel/delayacct.c | 11 +++++++++++
3 files changed, 23 insertions(+), 1 deletions(-)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9e3b8c3..a3e6e86 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -498,7 +498,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %llu %lu %ld\n",
pid_nr_ns(pid, ns),
tcomm,
state,
@@ -544,6 +544,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
task->rt_priority,
task->policy,
(unsigned long long)delayacct_blkio_ticks(task),
+ (unsigned long long)delayacct_freepages_ticks(task),
cputime_to_clock_t(gtime),
cputime_to_clock_t(cgtime));
if (mm)
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index f352f06..226b754 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -41,6 +41,7 @@ extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
extern __u64 __delayacct_blkio_ticks(struct task_struct *);
extern void __delayacct_freepages_start(void);
extern void __delayacct_freepages_end(void);
+extern __u64 __delayacct_freepages_ticks(struct task_struct *);
static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{
@@ -121,6 +122,13 @@ static inline void delayacct_freepages_end(void)
__delayacct_freepages_end();
}
+static inline __u64 delayacct_freepages_ticks(struct task_struct *tsk)
+{
+ if (tsk->delays)
+ return __delayacct_freepages_ticks(tsk);
+ return 0;
+}
+
#else
static inline void delayacct_set_flag(int flag)
{}
@@ -147,6 +155,8 @@ static inline void delayacct_freepages_start(void)
{}
static inline void delayacct_freepages_end(void)
{}
+static inline __u64 delayacct_freepages_ticks(struct task_struct *tsk)
+{ return 0; }
#endif /* CONFIG_TASK_DELAY_ACCT */
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index b3179da..62601d3 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -181,3 +181,14 @@ void __delayacct_freepages_end(void)
¤t->delays->freepages_count);
}
+__u64 __delayacct_freepages_ticks(struct task_struct *tsk)
+{
+ __u64 ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tsk->delays->lock, flags);
+ ret = nsec_to_clock_t(tsk->delays->freepages_delay);
+ spin_unlock_irqrestore(&tsk->delays->lock, flags);
+ return ret;
+}
+
--
1.5.0.6
Hi
> Signed-off-by: Keika Kobayashi <[email protected]>
> ---
Please write patch desctiption :)
Hi
> + delayacct_freepages_start();
> did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
> + delayacct_freepages_end();
Shouldn't we consider memcgroup reclaim?
Hi
> diff --git a/fs/proc/array.c b/fs/proc/array.c
> index 9e3b8c3..a3e6e86 100644
> --- a/fs/proc/array.c
> +++ b/fs/proc/array.c
> @@ -498,7 +498,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
>
> seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
> %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
> -%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
> +%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %llu %lu %ld\n",
> pid_nr_ns(pid, ns),
> tcomm,
> state,
> @@ -544,6 +544,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
> task->rt_priority,
> task->policy,
> (unsigned long long)delayacct_blkio_ticks(task),
> + (unsigned long long)delayacct_freepages_ticks(task),
> cputime_to_clock_t(gtime),
> cputime_to_clock_t(cgtime));
userland program oftern below access.
thus, this patch break userland compatibility.
----------------------------------------
#!/usr/bin/perl
$stat = `cat /proc/$pid/stat`;
split
@stat_list = split(/ / , $stat);
print stat_list[$index];
^
|
use array index number
> From: Keika Kobayashi <[email protected]>
> To: [email protected]
> Cc: [email protected]
> Subject: [PATCH 1/4] per-task-delay-accounting: add memory reclaim delay
Why don't you cc'ed delayacct developer nor linux-mm?
Add members for memory reclaim delay to taskstats,
and accumulate them in __delayacct_add_tsk() .
Signed-off-by: Keika Kobayashi <[email protected]>
---
On Wed, 04 Jun 2008 11:59:04 +0900
KOSAKI Motohiro <[email protected]> wrote:
> Hi
>
> > Signed-off-by: Keika Kobayashi <[email protected]>
> > ---
>
> Please write patch desctiption :)
Thank you for your comment.
Here is update version.
include/linux/taskstats.h | 4 ++++
kernel/delayacct.c | 3 +++
2 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 5d69c07..87aae21 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -81,6 +81,10 @@ struct taskstats {
__u64 swapin_count;
__u64 swapin_delay_total;
+ /* Delay waiting for memory reclaim */
+ __u64 freepages_count;
+ __u64 freepages_delay_total;
+
/* cpu "wall-clock" running time
* On some architectures, value will adjust for cpu time stolen
* from the kernel in involuntary waits due to virtualization.
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 84b6782..b3179da 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -145,8 +145,11 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+ tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
+ d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
+ d->freepages_count += tsk->delays->freepages_count;
spin_unlock_irqrestore(&tsk->delays->lock, flags);
done:
--
1.5.0.6
On Wed, 04 Jun 2008 12:15:48 +0900
KOSAKI Motohiro <[email protected]> wrote:
> > From: Keika Kobayashi <[email protected]>
> > To: [email protected]
> > Cc: [email protected]
> > Subject: [PATCH 1/4] per-task-delay-accounting: add memory reclaim delay
>
> Why don't you cc'ed delayacct developer nor linux-mm?
Oops. I forgot to add delayacct developer to CC.
I'll do it.
But are you sure about linux-mm?
On Wed, 04 Jun 2008 12:07:29 +0900
KOSAKI Motohiro <[email protected]> wrote:
> ----------------------------------------
> #!/usr/bin/perl
>
> $stat = `cat /proc/$pid/stat`;
> split
> @stat_list = split(/ / , $stat);
> print stat_list[$index];
> ^
> |
> use array index number
Certainly, it makes sense.
I thought this measurement value should be put
beside other delay account.
> > ----------------------------------------
> > #!/usr/bin/perl
> >
> > $stat = `cat /proc/$pid/stat`;
> > @stat_list = split(/ / , $stat);
> > print stat_list[$index];
> > ^
> > |
> > use array index number
>
> Certainly, it makes sense.
> I thought this measurement value should be put
> beside other delay account.
Yeah, I'm looking forward to your v2 :)
Thanks!
On Tue, 3 Jun 2008 19:38:25 -0700
Keika Kobayashi <[email protected]> wrote:
> + delayacct_freepages_start();
> did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
> + delayacct_freepages_end();
>
Could you move this to vmscan.c do_try_to_free_pages().
Then, memory reclaim from memory resource controller can be catched as well.
Thanks,
-Kame
KOSAKI Motohiro wrote:
> Hi
>
>> diff --git a/fs/proc/array.c b/fs/proc/array.c
>> index 9e3b8c3..a3e6e86 100644
>> --- a/fs/proc/array.c
>> +++ b/fs/proc/array.c
>> @@ -498,7 +498,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
>>
>> seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
>> %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
>> -%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
>> +%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %llu %lu %ld\n",
>> pid_nr_ns(pid, ns),
>> tcomm,
>> state,
>> @@ -544,6 +544,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
>> task->rt_priority,
>> task->policy,
>> (unsigned long long)delayacct_blkio_ticks(task),
>> + (unsigned long long)delayacct_freepages_ticks(task),
>> cputime_to_clock_t(gtime),
>> cputime_to_clock_t(cgtime));
>
> userland program oftern below access.
> thus, this patch break userland compatibility.
It's my fault. I suggested him to group delayacct ticks.
I know this breakage, but was not sure what it affects.
I thought that if it'll be a problem we get a claim such you did.
Thanks,
Hiroshi Shimamoto
KOSAKI Motohiro wrote:
> Hi
>
>> + delayacct_freepages_start();
>> did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
>> + delayacct_freepages_end();
>
> Shouldn't we consider memcgroup reclaim?
thanks for pointing this.
Unfortunately, we're not so familiar to memcgroup.
Will look into this.
Could you tell us pointers to memcgroup?
Thanks,
Hiroshi Shimamoto
On Wed, 04 Jun 2008 12:07:29 +0900
KOSAKI Motohiro <[email protected]> wrote:
> Hi
>
> > diff --git a/fs/proc/array.c b/fs/proc/array.c
> > index 9e3b8c3..a3e6e86 100644
> > --- a/fs/proc/array.c
> > +++ b/fs/proc/array.c
> > @@ -498,7 +498,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
> >
> > seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
> > %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
> > -%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
> > +%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %llu %lu %ld\n",
> > pid_nr_ns(pid, ns),
> > tcomm,
> > state,
> > @@ -544,6 +544,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
> > task->rt_priority,
> > task->policy,
> > (unsigned long long)delayacct_blkio_ticks(task),
> > + (unsigned long long)delayacct_freepages_ticks(task),
> > cputime_to_clock_t(gtime),
> > cputime_to_clock_t(cgtime));
>
> userland program oftern below access.
> thus, this patch break userland compatibility.
>
> ----------------------------------------
> #!/usr/bin/perl
>
> $stat = `cat /proc/$pid/stat`;
> split
> @stat_list = split(/ / , $stat);
> print stat_list[$index];
> ^
> |
> use array index number
>
Any application which breaks if we add more things to /proc/pid/stat
just needs to be fixed.
But we can't add new fields in the middle of the line! We can only add
new items to the end.
But I think it would be good if we just weren't to make this change.
Do we really want to keep reporting the same information in two
separate ways?
If so, a new /proc/pid/delays would be a better way, but it might be a
bit late for doing that now, given that we already put
delayacct_blkio_ticks into /proc/pid/stat.
> >> + delayacct_freepages_start();
> >> did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
> >> + delayacct_freepages_end();
> >
> > Shouldn't we consider memcgroup reclaim?
>
> thanks for pointing this.
> Unfortunately, we're not so familiar to memcgroup.
> Will look into this.
> Could you tell us pointers to memcgroup?
Documentation/controllers/memory.txt is good guide :)
<abstract>
try_to_free_pages(): global reclaim entry point
try_to_free_mem_cgroup_pages(): memcgroup reclaim entry point
do_try_to_free_pages(): common layer
Unfortunately, I don't know your requirement and
memcgroup reclaim shold be mesured.
I hope you explain your purpose and benefit more.
Thanks.
On Wed, 4 Jun 2008 15:41:24 -0700
Andrew Morton <[email protected]> wrote:
>
> Any application which breaks if we add more things to /proc/pid/stat
> just needs to be fixed.
>
> But we can't add new fields in the middle of the line! We can only add
> new items to the end.
>
> But I think it would be good if we just weren't to make this change.
> Do we really want to keep reporting the same information in two
> separate ways?
I agree. There is no strong reason with two method.
I'll drop this one from patch series next time.
KOSAKI Motohiro wrote:
>>>> + delayacct_freepages_start();
>>>> did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
>>>> + delayacct_freepages_end();
>>> Shouldn't we consider memcgroup reclaim?
>> thanks for pointing this.
>> Unfortunately, we're not so familiar to memcgroup.
>> Will look into this.
>> Could you tell us pointers to memcgroup?
>
> Documentation/controllers/memory.txt is good guide :)
thanks, and playing with this is good too.
I see it's easy to use this feature :)
>
> <abstract>
> try_to_free_pages(): global reclaim entry point
> try_to_free_mem_cgroup_pages(): memcgroup reclaim entry point
> do_try_to_free_pages(): common layer
I see the point that, on memcgroup enabled system, there are
memcgroup memory reclaim points and try_to_free_mem_cgroup_pages()
is called when the page charge reach the limit.
So if we want to account delay for memory reclaim, we should
account at both of try_to_free_pages() and try_to_free_mem_cgroup_pages().
Accounting at do_try_to_free_pages() covers this case.
> Unfortunately, I don't know your requirement and
> memcgroup reclaim shold be mesured.
> I hope you explain your purpose and benefit more.
Ah, sure.
We have an issue that a process which accesses a DB sometime slows down.
We want to know what causes it, without the reproducer.
We have only the report of the problem, memory usage, vmstat...
And, I don't know the memory reclaim really causes the problem.
If we can see what kind of delay is the matter in the test bed, it will
help us to take a next action.
Thanks,
Hiroshi Shimamoto