2011-05-19 02:31:12

by KOSAKI Motohiro

[permalink] [raw]
Subject: [PATCH v2 1/3] vmscan,memcg: memcg aware swap token

Currently, memcg reclaim can disable swap token even if the swap token
mm doesn't belong in its memory cgroup. It's slightly risky. If an
admin creates very small mem-cgroup and silly guy runs contentious heavy
memory pressure workload, every tasks are going to lose swap token and
then system may become unresponsive. That's bad.

This patch adds 'memcg' parameter into disable_swap_token(). and if
the parameter doesn't match swap token, VM doesn't disable it.

Signed-off-by: KOSAKI Motohiro <[email protected]>
Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>
Reviewed-by: Rik van Riel<[email protected]>
---
include/linux/memcontrol.h | 6 +++
include/linux/swap.h | 8 +---
mm/memcontrol.c | 16 ++++-----
mm/thrash.c | 73 ++++++++++++++++++++++++++++++++-----------
mm/vmscan.c | 4 +-
5 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6a0cffd..df572af 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -84,6 +84,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);

extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
+extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);

static inline
int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
@@ -244,6 +245,11 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
return NULL;
}

+static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
+{
+ return NULL;
+}
+
static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
{
return 1;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 384eb5f..e705646 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -358,6 +358,7 @@ struct backing_dev_info;
extern struct mm_struct *swap_token_mm;
extern void grab_swap_token(struct mm_struct *);
extern void __put_swap_token(struct mm_struct *);
+extern void disable_swap_token(struct mem_cgroup *memcg);

static inline int has_swap_token(struct mm_struct *mm)
{
@@ -370,11 +371,6 @@ static inline void put_swap_token(struct mm_struct *mm)
__put_swap_token(mm);
}

-static inline void disable_swap_token(void)
-{
- put_swap_token(swap_token_mm);
-}
-
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
extern void
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
@@ -500,7 +496,7 @@ static inline int has_swap_token(struct mm_struct *mm)
return 0;
}

-static inline void disable_swap_token(void)
+static inline void disable_swap_token(struct mem_cgroup *memcg)
{
}

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c2776f1..1a78b3e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -735,7 +735,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
struct mem_cgroup, css);
}

-static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
+struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
{
struct mem_cgroup *mem = NULL;

@@ -5194,18 +5194,16 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
struct cgroup *old_cont,
struct task_struct *p)
{
- struct mm_struct *mm;
+ struct mm_struct *mm = get_task_mm(p);

- if (!mc.to)
- /* no need to move charge */
- return;
-
- mm = get_task_mm(p);
if (mm) {
- mem_cgroup_move_charge(mm);
+ if (mc.to)
+ mem_cgroup_move_charge(mm);
+ put_swap_token(mm);
mmput(mm);
}
- mem_cgroup_clear_mc();
+ if (mc.to)
+ mem_cgroup_clear_mc();
}
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
diff --git a/mm/thrash.c b/mm/thrash.c
index 2372d4e..32c07fd 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -21,14 +21,17 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/swap.h>
+#include <linux/memcontrol.h>

static DEFINE_SPINLOCK(swap_token_lock);
struct mm_struct *swap_token_mm;
+struct mem_cgroup *swap_token_memcg;
static unsigned int global_faults;

void grab_swap_token(struct mm_struct *mm)
{
int current_interval;
+ struct mem_cgroup *memcg;

global_faults++;

@@ -38,40 +41,72 @@ void grab_swap_token(struct mm_struct *mm)
return;

/* First come first served */
- if (swap_token_mm == NULL) {
- mm->token_priority = mm->token_priority + 2;
- swap_token_mm = mm;
+ if (!swap_token_mm)
+ goto replace_token;
+
+ if (mm == swap_token_mm) {
+ mm->token_priority += 2;
goto out;
}

- if (mm != swap_token_mm) {
- if (current_interval < mm->last_interval)
- mm->token_priority++;
- else {
- if (likely(mm->token_priority > 0))
- mm->token_priority--;
- }
- /* Check if we deserve the token */
- if (mm->token_priority > swap_token_mm->token_priority) {
- mm->token_priority += 2;
- swap_token_mm = mm;
- }
- } else {
- /* Token holder came in again! */
- mm->token_priority += 2;
+ if (current_interval < mm->last_interval)
+ mm->token_priority++;
+ else {
+ if (likely(mm->token_priority > 0))
+ mm->token_priority--;
}

+ /* Check if we deserve the token */
+ if (mm->token_priority > swap_token_mm->token_priority)
+ goto replace_token;
+
out:
mm->faultstamp = global_faults;
mm->last_interval = current_interval;
spin_unlock(&swap_token_lock);
+ return;
+
+replace_token:
+ mm->token_priority += 2;
+ memcg = try_get_mem_cgroup_from_mm(mm);
+ if (memcg)
+ css_put(mem_cgroup_css(memcg));
+ swap_token_mm = mm;
+ swap_token_memcg = memcg;
+ goto out;
}

/* Called on process exit. */
void __put_swap_token(struct mm_struct *mm)
{
spin_lock(&swap_token_lock);
- if (likely(mm == swap_token_mm))
+ if (likely(mm == swap_token_mm)) {
swap_token_mm = NULL;
+ swap_token_memcg = NULL;
+ }
spin_unlock(&swap_token_lock);
}
+
+static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b)
+{
+ if (!a)
+ return true;
+ if (!b)
+ return true;
+ if (a == b)
+ return true;
+ return false;
+}
+
+void disable_swap_token(struct mem_cgroup *memcg)
+{
+ /* memcg reclaim don't disable unrelated mm token. */
+ if (match_memcg(memcg, swap_token_memcg)) {
+ spin_lock(&swap_token_lock);
+ if (match_memcg(memcg, swap_token_memcg)) {
+ swap_token_mm = NULL;
+ swap_token_memcg = NULL;
+ }
+ spin_unlock(&swap_token_lock);
+ }
+}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b3a569f..19e179b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2044,7 +2044,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
sc->nr_scanned = 0;
if (!priority)
- disable_swap_token();
+ disable_swap_token(sc->mem_cgroup);
shrink_zones(priority, zonelist, sc);
/*
* Don't shrink slabs when reclaiming memory from
@@ -2353,7 +2353,7 @@ loop_again:

/* The swap token gets in the way of swapout... */
if (!priority)
- disable_swap_token();
+ disable_swap_token(NULL);

all_zones_ok = 1;
balanced = 0;
--
1.7.3.1


2011-05-19 02:33:04

by KOSAKI Motohiro

[permalink] [raw]
Subject: [PATCH v2 2/3] vmscan: implement swap token trace

This is useful for observing swap token activity.

example output:

zsh-1845 [000] 598.962716: update_swap_token_priority:
mm=ffff88015eaf7700 old_prio=1 new_prio=0
memtoy-1830 [001] 602.033900: update_swap_token_priority:
mm=ffff880037a45880 old_prio=947 new_prio=949
memtoy-1830 [000] 602.041509: update_swap_token_priority:
mm=ffff880037a45880 old_prio=949 new_prio=951
memtoy-1830 [000] 602.051959: update_swap_token_priority:
mm=ffff880037a45880 old_prio=951 new_prio=953
memtoy-1830 [000] 602.052188: update_swap_token_priority:
mm=ffff880037a45880 old_prio=953 new_prio=955
memtoy-1830 [001] 602.427184: put_swap_token:
token_mm=ffff880037a45880
zsh-1789 [000] 602.427281: replace_swap_token:
old_token_mm= (null) old_prio=0 new_token_mm=ffff88015eaf7018
new_prio=2
zsh-1789 [001] 602.433456: update_swap_token_priority:
mm=ffff88015eaf7018 old_prio=2 new_prio=4
zsh-1789 [000] 602.437613: update_swap_token_priority:
mm=ffff88015eaf7018 old_prio=4 new_prio=6
zsh-1789 [000] 602.443924: update_swap_token_priority:
mm=ffff88015eaf7018 old_prio=6 new_prio=8
zsh-1789 [000] 602.451873: update_swap_token_priority:
mm=ffff88015eaf7018 old_prio=8 new_prio=10
zsh-1789 [001] 602.462639: update_swap_token_priority:
mm=ffff88015eaf7018 old_prio=10 new_prio=12

Signed-off-by: KOSAKI Motohiro <[email protected]>
Acked-by: Rik van Riel<[email protected]>
Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>
---
include/trace/events/vmscan.h | 77 +++++++++++++++++++++++++++++++++++++++++
mm/thrash.c | 11 +++++-
2 files changed, 87 insertions(+), 1 deletions(-)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index ea422aa..1798e0c 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -6,6 +6,8 @@

#include <linux/types.h>
#include <linux/tracepoint.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
#include "gfpflags.h"

#define RECLAIM_WB_ANON 0x0001u
@@ -310,6 +312,81 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
show_reclaim_flags(__entry->reclaim_flags))
);

+TRACE_EVENT(replace_swap_token,
+ TP_PROTO(struct mm_struct *old_mm,
+ struct mm_struct *new_mm),
+
+ TP_ARGS(old_mm, new_mm),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct*, old_mm)
+ __field(unsigned int, old_prio)
+ __field(struct mm_struct*, new_mm)
+ __field(unsigned int, new_prio)
+ ),
+
+ TP_fast_assign(
+ __entry->old_mm = old_mm;
+ __entry->old_prio = old_mm ? old_mm->token_priority : 0;
+ __entry->new_mm = new_mm;
+ __entry->new_prio = new_mm->token_priority;
+ ),
+
+ TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u",
+ __entry->old_mm, __entry->old_prio,
+ __entry->new_mm, __entry->new_prio)
+);
+
+DECLARE_EVENT_CLASS(put_swap_token_template,
+ TP_PROTO(struct mm_struct *swap_token_mm),
+
+ TP_ARGS(swap_token_mm),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct*, swap_token_mm)
+ ),
+
+ TP_fast_assign(
+ __entry->swap_token_mm = swap_token_mm;
+ ),
+
+ TP_printk("token_mm=%p", __entry->swap_token_mm)
+);
+
+DEFINE_EVENT(put_swap_token_template, put_swap_token,
+ TP_PROTO(struct mm_struct *swap_token_mm),
+ TP_ARGS(swap_token_mm)
+);
+
+DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,
+ TP_PROTO(struct mm_struct *swap_token_mm),
+ TP_ARGS(swap_token_mm),
+ TP_CONDITION(swap_token_mm != NULL)
+);
+
+TRACE_EVENT_CONDITION(update_swap_token_priority,
+ TP_PROTO(struct mm_struct *mm,
+ unsigned int old_prio),
+
+ TP_ARGS(mm, old_prio),
+
+ TP_CONDITION(mm->token_priority != old_prio),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct*, mm)
+ __field(unsigned int, old_prio)
+ __field(unsigned int, new_prio)
+ ),
+
+ TP_fast_assign(
+ __entry->mm = mm;
+ __entry->old_prio = old_prio;
+ __entry->new_prio = mm->token_priority;
+ ),
+
+ TP_printk("mm=%p old_prio=%u new_prio=%u",
+ __entry->mm, __entry->old_prio, __entry->new_prio)
+);

#endif /* _TRACE_VMSCAN_H */

diff --git a/mm/thrash.c b/mm/thrash.c
index 32c07fd..14c6c9f 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -23,6 +23,8 @@
#include <linux/swap.h>
#include <linux/memcontrol.h>

+#include <trace/events/vmscan.h>
+
static DEFINE_SPINLOCK(swap_token_lock);
struct mm_struct *swap_token_mm;
struct mem_cgroup *swap_token_memcg;
@@ -31,6 +33,7 @@ static unsigned int global_faults;
void grab_swap_token(struct mm_struct *mm)
{
int current_interval;
+ unsigned int old_prio = mm->token_priority;
struct mem_cgroup *memcg;

global_faults++;
@@ -46,7 +49,7 @@ void grab_swap_token(struct mm_struct *mm)

if (mm == swap_token_mm) {
mm->token_priority += 2;
- goto out;
+ goto update_priority;
}

if (current_interval < mm->last_interval)
@@ -60,6 +63,9 @@ void grab_swap_token(struct mm_struct *mm)
if (mm->token_priority > swap_token_mm->token_priority)
goto replace_token;

+update_priority:
+ trace_update_swap_token_priority(mm, old_prio);
+
out:
mm->faultstamp = global_faults;
mm->last_interval = current_interval;
@@ -71,6 +77,7 @@ replace_token:
memcg = try_get_mem_cgroup_from_mm(mm);
if (memcg)
css_put(mem_cgroup_css(memcg));
+ trace_replace_swap_token(swap_token_mm, mm);
swap_token_mm = mm;
swap_token_memcg = memcg;
goto out;
@@ -81,6 +88,7 @@ void __put_swap_token(struct mm_struct *mm)
{
spin_lock(&swap_token_lock);
if (likely(mm == swap_token_mm)) {
+ trace_put_swap_token(swap_token_mm);
swap_token_mm = NULL;
swap_token_memcg = NULL;
}
@@ -104,6 +112,7 @@ void disable_swap_token(struct mem_cgroup *memcg)
if (match_memcg(memcg, swap_token_memcg)) {
spin_lock(&swap_token_lock);
if (match_memcg(memcg, swap_token_memcg)) {
+ trace_disable_swap_token(swap_token_mm);
swap_token_mm = NULL;
swap_token_memcg = NULL;
}
--
1.7.3.1


2011-05-19 02:34:25

by KOSAKI Motohiro

[permalink] [raw]
Subject: [PATCH v2 3/3] vmscan: implement swap token priority aging

While testing for memcg aware swap token, I observed a swap token
was often grabbed an intermittent running process (eg init, auditd)
and they never release a token.

Why?

Some processes (eg init, auditd, audispd) wake up when a process
exiting. And swap token can be get first page-in process when
a process exiting makes no swap token owner. Thus such above
intermittent running process often get a token.

And currently, swap token priority is only decreased at page fault
path. Then, if the process sleep immediately after to grab swap
token, the swap token priority never be decreased. That's obviously
undesirable.

This patch implement very poor (and lightweight) priority aging.
It only be affect to the above corner case and doesn't change swap
tendency workload performance (eg multi process qsbench load)

Signed-off-by: KOSAKI Motohiro <[email protected]>
Reviewed-by: Rik van Riel <[email protected]>
Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>
---
include/trace/events/vmscan.h | 20 +++++++++++++-------
mm/thrash.c | 11 ++++++++++-
2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 1798e0c..b2c33bd 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -366,9 +366,10 @@ DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,

TRACE_EVENT_CONDITION(update_swap_token_priority,
TP_PROTO(struct mm_struct *mm,
- unsigned int old_prio),
+ unsigned int old_prio,
+ struct mm_struct *swap_token_mm),

- TP_ARGS(mm, old_prio),
+ TP_ARGS(mm, old_prio, swap_token_mm),

TP_CONDITION(mm->token_priority != old_prio),

@@ -376,16 +377,21 @@ TRACE_EVENT_CONDITION(update_swap_token_priority,
__field(struct mm_struct*, mm)
__field(unsigned int, old_prio)
__field(unsigned int, new_prio)
+ __field(struct mm_struct*, swap_token_mm)
+ __field(unsigned int, swap_token_prio)
),

TP_fast_assign(
- __entry->mm = mm;
- __entry->old_prio = old_prio;
- __entry->new_prio = mm->token_priority;
+ __entry->mm = mm;
+ __entry->old_prio = old_prio;
+ __entry->new_prio = mm->token_priority;
+ __entry->swap_token_mm = swap_token_mm;
+ __entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0;
),

- TP_printk("mm=%p old_prio=%u new_prio=%u",
- __entry->mm, __entry->old_prio, __entry->new_prio)
+ TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u",
+ __entry->mm, __entry->old_prio, __entry->new_prio,
+ __entry->swap_token_mm, __entry->swap_token_prio)
);

#endif /* _TRACE_VMSCAN_H */
diff --git a/mm/thrash.c b/mm/thrash.c
index 14c6c9f..af46d67 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -25,10 +25,13 @@

#include <trace/events/vmscan.h>

+#define TOKEN_AGING_INTERVAL (0xFF)
+
static DEFINE_SPINLOCK(swap_token_lock);
struct mm_struct *swap_token_mm;
struct mem_cgroup *swap_token_memcg;
static unsigned int global_faults;
+static unsigned int last_aging;

void grab_swap_token(struct mm_struct *mm)
{
@@ -47,6 +50,11 @@ void grab_swap_token(struct mm_struct *mm)
if (!swap_token_mm)
goto replace_token;

+ if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) {
+ swap_token_mm->token_priority /= 2;
+ last_aging = global_faults;
+ }
+
if (mm == swap_token_mm) {
mm->token_priority += 2;
goto update_priority;
@@ -64,7 +72,7 @@ void grab_swap_token(struct mm_struct *mm)
goto replace_token;

update_priority:
- trace_update_swap_token_priority(mm, old_prio);
+ trace_update_swap_token_priority(mm, old_prio, swap_token_mm);

out:
mm->faultstamp = global_faults;
@@ -80,6 +88,7 @@ replace_token:
trace_replace_swap_token(swap_token_mm, mm);
swap_token_mm = mm;
swap_token_memcg = memcg;
+ last_aging = global_faults;
goto out;
}

--
1.7.3.1


2011-05-20 19:30:45

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] vmscan: implement swap token priority aging

On Thu, 19 May 2011 11:34:15 +0900
KOSAKI Motohiro <[email protected]> wrote:

> While testing for memcg aware swap token, I observed a swap token
> was often grabbed an intermittent running process (eg init, auditd)
> and they never release a token.
>
> Why?
>
> Some processes (eg init, auditd, audispd) wake up when a process
> exiting. And swap token can be get first page-in process when
> a process exiting makes no swap token owner. Thus such above
> intermittent running process often get a token.
>
> And currently, swap token priority is only decreased at page fault
> path. Then, if the process sleep immediately after to grab swap
> token, the swap token priority never be decreased. That's obviously
> undesirable.
>
> This patch implement very poor (and lightweight) priority aging.
> It only be affect to the above corner case and doesn't change swap
> tendency workload performance (eg multi process qsbench load)
>
> ...
>
> --- a/mm/thrash.c
> +++ b/mm/thrash.c
> @@ -25,10 +25,13 @@
>
> #include <trace/events/vmscan.h>
>
> +#define TOKEN_AGING_INTERVAL (0xFF)

Needs a comment describing its units and what it does, please.
Sufficient for readers to understand why this value was chosen and what
effect they could expect to see from changing it.


> static DEFINE_SPINLOCK(swap_token_lock);
> struct mm_struct *swap_token_mm;
> struct mem_cgroup *swap_token_memcg;
> static unsigned int global_faults;
> +static unsigned int last_aging;

Is this a good name? Would something like prev_global_faults be better?

`global_faults' and `last_aging' could be made static local in
grab_swap_token().

> void grab_swap_token(struct mm_struct *mm)
> {
> @@ -47,6 +50,11 @@ void grab_swap_token(struct mm_struct *mm)
> if (!swap_token_mm)
> goto replace_token;
>
> + if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) {
> + swap_token_mm->token_priority /= 2;
> + last_aging = global_faults;
> + }

It's really hard to reverse-engineer the design decisions from the
implementation here, therefore... ?

> if (mm == swap_token_mm) {
> mm->token_priority += 2;
> goto update_priority;
> @@ -64,7 +72,7 @@ void grab_swap_token(struct mm_struct *mm)
> goto replace_token;
>
> update_priority:
> - trace_update_swap_token_priority(mm, old_prio);
> + trace_update_swap_token_priority(mm, old_prio, swap_token_mm);
>
> out:
> mm->faultstamp = global_faults;
> @@ -80,6 +88,7 @@ replace_token:
> trace_replace_swap_token(swap_token_mm, mm);
> swap_token_mm = mm;
> swap_token_memcg = memcg;
> + last_aging = global_faults;
> goto out;
> }

In fact all of grab_swap_token() and the thrash-detection code in
general are pretty tricky and unobvious stuff. So we left it
undocumented :(

2011-05-24 02:10:22

by KOSAKI Motohiro

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] vmscan: implement swap token priority aging

Hi,

(2011/05/21 4:30), Andrew Morton wrote:
> On Thu, 19 May 2011 11:34:15 +0900
> KOSAKI Motohiro<[email protected]> wrote:
>
>> While testing for memcg aware swap token, I observed a swap token
>> was often grabbed an intermittent running process (eg init, auditd)
>> and they never release a token.
>>
>> Why?
>>
>> Some processes (eg init, auditd, audispd) wake up when a process
>> exiting. And swap token can be get first page-in process when
>> a process exiting makes no swap token owner. Thus such above
>> intermittent running process often get a token.
>>
>> And currently, swap token priority is only decreased at page fault
>> path. Then, if the process sleep immediately after to grab swap
>> token, the swap token priority never be decreased. That's obviously
>> undesirable.
>>
>> This patch implement very poor (and lightweight) priority aging.
>> It only be affect to the above corner case and doesn't change swap
>> tendency workload performance (eg multi process qsbench load)
>>
>> ...
>>
>> --- a/mm/thrash.c
>> +++ b/mm/thrash.c
>> @@ -25,10 +25,13 @@
>>
>> #include<trace/events/vmscan.h>
>>
>> +#define TOKEN_AGING_INTERVAL (0xFF)
>
> Needs a comment describing its units and what it does, please.
> Sufficient for readers to understand why this value was chosen and what
> effect they could expect to see from changing it.

Will do.


>> static DEFINE_SPINLOCK(swap_token_lock);
>> struct mm_struct *swap_token_mm;
>> struct mem_cgroup *swap_token_memcg;
>> static unsigned int global_faults;
>> +static unsigned int last_aging;
>
> Is this a good name? Would something like prev_global_faults be better?

Current code is "next-aging = last-aging + 256global-fault". so,
prev_global_faults is slightly misleading to me.


> `global_faults' and `last_aging' could be made static local in
> grab_swap_token().

OK. even though I don't like static in a function.


>
>> void grab_swap_token(struct mm_struct *mm)
>> {
>> @@ -47,6 +50,11 @@ void grab_swap_token(struct mm_struct *mm)
>> if (!swap_token_mm)
>> goto replace_token;
>>
>> + if ((global_faults - last_aging)> TOKEN_AGING_INTERVAL) {
>> + swap_token_mm->token_priority /= 2;
>> + last_aging = global_faults;
>> + }
>
> It's really hard to reverse-engineer the design decisions from the
> implementation here, therefore... ?
>
>> if (mm == swap_token_mm) {
>> mm->token_priority += 2;
>> goto update_priority;
>> @@ -64,7 +72,7 @@ void grab_swap_token(struct mm_struct *mm)
>> goto replace_token;
>>
>> update_priority:
>> - trace_update_swap_token_priority(mm, old_prio);
>> + trace_update_swap_token_priority(mm, old_prio, swap_token_mm);
>>
>> out:
>> mm->faultstamp = global_faults;
>> @@ -80,6 +88,7 @@ replace_token:
>> trace_replace_swap_token(swap_token_mm, mm);
>> swap_token_mm = mm;
>> swap_token_memcg = memcg;
>> + last_aging = global_faults;
>> goto out;
>> }
>
> In fact all of grab_swap_token() and the thrash-detection code in
> general are pretty tricky and unobvious stuff. So we left it
> undocumented :(

Well, original swap token (designed by Rik) is pretty straight forward
implementation on the theory. Therefore we didn't need too verbose doc.
The paper give us good well documentation.

# Oh, now http://www.cs.wm.edu/~sjiang/token.pdf is dead link.
# we should fix it to http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/papers/TR-05-1.pdf,
# maybe, or do anybody know better url?

But following commit rewrite almost all code. and we lost good documentation.
It's a bit sad.


commit 7602bdf2fd14a40dd9b104e516fdc05e1bd17952
Author: Ashwin Chaugule <[email protected]>
Date: Wed Dec 6 20:31:57 2006 -0800

[PATCH] new scheme to preempt swap token




2011-05-25 03:02:22

by KOSAKI Motohiro

[permalink] [raw]
Subject: [PATCH 1/3] swap-token: fix dead link

http://www.cs.wm.edu/~sjiang/token.pdf is now dead. Then, this patch
replace it with an alive alternative.

Signed-off-by: KOSAKI Motohiro <[email protected]>
---
mm/thrash.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/mm/thrash.c b/mm/thrash.c
index af46d67..0d41ff0 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -6,7 +6,7 @@
* Released under the GPL, see the file COPYING for details.
*
* Simple token based thrashing protection, using the algorithm
- * described in: http://www.cs.wm.edu/~sjiang/token.pdf
+ * described in: http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-1.html
*
* Sep 2006, Ashwin Chaugule <[email protected]>
* Improved algorithm to pass token:
--
1.7.3.1


2011-05-25 03:13:07

by KOSAKI Motohiro

[permalink] [raw]
Subject: [PATCH 2/3] swap-token: makes global variables to function local

global_faults and last_aging are only used in grab_swap_token().
Then, they can be moved into grab_swap_token().

Signed-off-by: KOSAKI Motohiro <[email protected]>
---
mm/thrash.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/thrash.c b/mm/thrash.c
index 0d41ff0..0504e8a 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -30,14 +30,14 @@
static DEFINE_SPINLOCK(swap_token_lock);
struct mm_struct *swap_token_mm;
struct mem_cgroup *swap_token_memcg;
-static unsigned int global_faults;
-static unsigned int last_aging;

void grab_swap_token(struct mm_struct *mm)
{
int current_interval;
unsigned int old_prio = mm->token_priority;
struct mem_cgroup *memcg;
+ static unsigned int global_faults;
+ static unsigned int last_aging;

global_faults++;

--
1.7.3.1


2011-05-25 03:13:39

by KOSAKI Motohiro

[permalink] [raw]
Subject: [PATCH 3/3] swap-token: add a comment for priority aging

Add to a few comment of design decision of swap token aging.

Signed-off-by: KOSAKI Motohiro <[email protected]>
---
mm/thrash.c | 11 +++++++++++
1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/mm/thrash.c b/mm/thrash.c
index 0504e8a..8832edb 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -50,6 +50,17 @@ void grab_swap_token(struct mm_struct *mm)
if (!swap_token_mm)
goto replace_token;

+ /*
+ * Usually, we don't need priority aging because long interval faults
+ * makes priority decrease quickly. But there is one exception. If the
+ * token owner task is sleeping, it never make long interval faults.
+ * Thus, we need a priority aging mechanism instead. The requirements
+ * of priority aging are
+ * 1) An aging interval is reasonable enough long. Too short aging
+ * interval makes quick swap token lost and decrease performance.
+ * 2) The swap token owner task have to get priority aging even if
+ * it's under sleep.
+ */
if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) {
swap_token_mm->token_priority /= 2;
last_aging = global_faults;
--
1.7.3.1


2011-05-25 12:27:42

by Rik van Riel

[permalink] [raw]
Subject: Re: [PATCH 1/3] swap-token: fix dead link

On 05/24/2011 11:02 PM, KOSAKI Motohiro wrote:
> http://www.cs.wm.edu/~sjiang/token.pdf is now dead. Then, this patch
> replace it with an alive alternative.
>
> Signed-off-by: KOSAKI Motohiro<[email protected]>

Acked-by: Rik van Riel <[email protected]>

--
All rights reversed

2011-05-25 12:28:05

by Rik van Riel

[permalink] [raw]
Subject: Re: [PATCH 2/3] swap-token: makes global variables to function local

On 05/24/2011 11:12 PM, KOSAKI Motohiro wrote:
> global_faults and last_aging are only used in grab_swap_token().
> Then, they can be moved into grab_swap_token().
>
> Signed-off-by: KOSAKI Motohiro<[email protected]>

Acked-by: Rik van Riel <[email protected]>

--
All rights reversed

2011-05-25 12:28:31

by Rik van Riel

[permalink] [raw]
Subject: Re: [PATCH 3/3] swap-token: add a comment for priority aging

On 05/24/2011 11:13 PM, KOSAKI Motohiro wrote:
> Add to a few comment of design decision of swap token aging.
>
> Signed-off-by: KOSAKI Motohiro<[email protected]>

Acked-by: Rik van Riel <[email protected]>

--
All rights reversed

2011-05-26 20:36:13

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] vmscan,memcg: memcg aware swap token

On Thu, 19 May 2011 11:30:53 +0900
KOSAKI Motohiro <[email protected]> wrote:

> Currently, memcg reclaim can disable swap token even if the swap token
> mm doesn't belong in its memory cgroup. It's slightly risky. If an
> admin creates very small mem-cgroup and silly guy runs contentious heavy
> memory pressure workload, every tasks are going to lose swap token and
> then system may become unresponsive. That's bad.
>
> This patch adds 'memcg' parameter into disable_swap_token(). and if
> the parameter doesn't match swap token, VM doesn't disable it.
>
>
> ...
>
> --- a/mm/thrash.c
> +++ b/mm/thrash.c
> @@ -21,14 +21,17 @@
> #include <linux/mm.h>
> #include <linux/sched.h>
> #include <linux/swap.h>
> +#include <linux/memcontrol.h>
>
> static DEFINE_SPINLOCK(swap_token_lock);
> struct mm_struct *swap_token_mm;
> +struct mem_cgroup *swap_token_memcg;
> static unsigned int global_faults;
>
> void grab_swap_token(struct mm_struct *mm)
> {
> int current_interval;
> + struct mem_cgroup *memcg;
>
> global_faults++;
>
> @@ -38,40 +41,72 @@ void grab_swap_token(struct mm_struct *mm)
> return;
>
> /* First come first served */
> - if (swap_token_mm == NULL) {
> - mm->token_priority = mm->token_priority + 2;
> - swap_token_mm = mm;
> + if (!swap_token_mm)
> + goto replace_token;
> +
> + if (mm == swap_token_mm) {
> + mm->token_priority += 2;
> goto out;
> }
>
> - if (mm != swap_token_mm) {
> - if (current_interval < mm->last_interval)
> - mm->token_priority++;
> - else {
> - if (likely(mm->token_priority > 0))
> - mm->token_priority--;
> - }
> - /* Check if we deserve the token */
> - if (mm->token_priority > swap_token_mm->token_priority) {
> - mm->token_priority += 2;
> - swap_token_mm = mm;
> - }
> - } else {
> - /* Token holder came in again! */
> - mm->token_priority += 2;
> + if (current_interval < mm->last_interval)
> + mm->token_priority++;
> + else {
> + if (likely(mm->token_priority > 0))
> + mm->token_priority--;
> }
>
> + /* Check if we deserve the token */
> + if (mm->token_priority > swap_token_mm->token_priority)
> + goto replace_token;
> +
> out:
> mm->faultstamp = global_faults;
> mm->last_interval = current_interval;
> spin_unlock(&swap_token_lock);
> + return;
> +
> +replace_token:
> + mm->token_priority += 2;
> + memcg = try_get_mem_cgroup_from_mm(mm);
> + if (memcg)
> + css_put(mem_cgroup_css(memcg));
> + swap_token_mm = mm;
> + swap_token_memcg = memcg;
> + goto out;
> }

CONFIG_CGROUPS=n:

mm/thrash.c: In function 'grab_swap_token':
mm/thrash.c:73: error: implicit declaration of function 'css_put'

I don't think that adding a null stub for css_put() is the right fix
here...

2011-05-30 07:44:26

by KOSAKI Motohiro

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] vmscan,memcg: memcg aware swap token

> CONFIG_CGROUPS=n:
>
> mm/thrash.c: In function 'grab_swap_token':
> mm/thrash.c:73: error: implicit declaration of function 'css_put'
>
> I don't think that adding a null stub for css_put() is the right fix
> here...

My bad. Following patch fixes this issue.

Thanks.



>From 6a824b46219cb2f11b125e9a33f65e0f01899d09 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <[email protected]>
Date: Mon, 30 May 2011 15:47:32 +0900
Subject: [PATCH] swap-token: fix compilation error when CONFIG_CGROUPS=n

Andrew Morton pointed out css_put() is not defined when
CONFIG_CGROUPS=n.

: CONFIG_CGROUPS=n:
:
: mm/thrash.c: In function 'grab_swap_token':
: mm/thrash.c:73: error: implicit declaration of function 'css_put'

Thus, this patch move the whole logic into CONFIG_CGROUP_MEM_RES_CTLR
condtion.

Reported-by: Andrew Morton <[email protected]>
Signed-off-by: KOSAKI Motohiro <[email protected]>
---
mm/thrash.c | 24 +++++++++++++++++++-----
1 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/mm/thrash.c b/mm/thrash.c
index 8832edb..cd06606 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -31,11 +31,28 @@ static DEFINE_SPINLOCK(swap_token_lock);
struct mm_struct *swap_token_mm;
struct mem_cgroup *swap_token_memcg;

+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+static struct mem_cgroup* swap_token_memcg_from_mm(struct mm_struct *mm)
+{
+ struct mem_cgroup *memcg;
+
+ memcg = try_get_mem_cgroup_from_mm(mm);
+ if (memcg)
+ css_put(mem_cgroup_css(memcg));
+
+ return memcg;
+}
+#else
+static struct mem_cgroup* swap_token_memcg_from_mm(struct mm_struct *mm)
+{
+ return NULL;
+}
+#endif
+
void grab_swap_token(struct mm_struct *mm)
{
int current_interval;
unsigned int old_prio = mm->token_priority;
- struct mem_cgroup *memcg;
static unsigned int global_faults;
static unsigned int last_aging;

@@ -93,12 +110,9 @@ out:

replace_token:
mm->token_priority += 2;
- memcg = try_get_mem_cgroup_from_mm(mm);
- if (memcg)
- css_put(mem_cgroup_css(memcg));
trace_replace_swap_token(swap_token_mm, mm);
swap_token_mm = mm;
- swap_token_memcg = memcg;
+ swap_token_memcg = swap_token_memcg_from_mm(mm);
last_aging = global_faults;
goto out;
}
--
1.7.3.1