2011-05-13 11:36:39

by KOSAKI Motohiro

[permalink] [raw]
Subject: [PATCH 0/3] swap token revisit

Hi

They are a patchset of swap token improvement. each patch is independent.
Probably memcg folks are interest to [1/3]. :)


KOSAKI Motohiro (3):
vmscan,memcg: memcg aware swap token
vmscan: implement swap token trace
vmscan: implement swap token priority decay

include/linux/memcontrol.h | 6 +++
include/linux/swap.h | 8 +---
include/trace/events/vmscan.h | 81 ++++++++++++++++++++++++++++++++++++++
mm/memcontrol.c | 16 +++----
mm/thrash.c | 87 +++++++++++++++++++++++++++++++---------
mm/vmscan.c | 4 +-
6 files changed, 165 insertions(+), 37 deletions(-)

--
1.7.3.1


2011-05-13 11:38:37

by KOSAKI Motohiro

[permalink] [raw]
Subject: [PATCH 1/3] vmscan,memcg: memcg aware swap token

Currently, memcg reclaim can disable swap token even if the swap token
mm doesn't belong in its memory cgroup. It's slightly risky. If an
admin creates very small mem-cgroup and silly guy runs contentious heavy
memory pressure workload, every tasks are going to lose swap token and
then system may become unresponsive. That's bad.

This patch adds 'memcg' parameter into disable_swap_token(). and if
the parameter doesn't match swap token, VM doesn't disable it.

Signed-off-by: KOSAKI Motohiro <[email protected]>
---
include/linux/memcontrol.h | 6 +++
include/linux/swap.h | 8 +---
mm/memcontrol.c | 16 ++++-----
mm/thrash.c | 73 ++++++++++++++++++++++++++++++++-----------
mm/vmscan.c | 4 +-
5 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6a0cffd..df572af 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -84,6 +84,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);

extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
+extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);

static inline
int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
@@ -244,6 +245,11 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
return NULL;
}

+static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
+{
+ return NULL;
+}
+
static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
{
return 1;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 384eb5f..e705646 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -358,6 +358,7 @@ struct backing_dev_info;
extern struct mm_struct *swap_token_mm;
extern void grab_swap_token(struct mm_struct *);
extern void __put_swap_token(struct mm_struct *);
+extern void disable_swap_token(struct mem_cgroup *memcg);

static inline int has_swap_token(struct mm_struct *mm)
{
@@ -370,11 +371,6 @@ static inline void put_swap_token(struct mm_struct *mm)
__put_swap_token(mm);
}

-static inline void disable_swap_token(void)
-{
- put_swap_token(swap_token_mm);
-}
-
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
extern void
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
@@ -500,7 +496,7 @@ static inline int has_swap_token(struct mm_struct *mm)
return 0;
}

-static inline void disable_swap_token(void)
+static inline void disable_swap_token(struct mem_cgroup *memcg)
{
}

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c2776f1..1a78b3e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -735,7 +735,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
struct mem_cgroup, css);
}

-static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
+struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
{
struct mem_cgroup *mem = NULL;

@@ -5194,18 +5194,16 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
struct cgroup *old_cont,
struct task_struct *p)
{
- struct mm_struct *mm;
+ struct mm_struct *mm = get_task_mm(p);

- if (!mc.to)
- /* no need to move charge */
- return;
-
- mm = get_task_mm(p);
if (mm) {
- mem_cgroup_move_charge(mm);
+ if (mc.to)
+ mem_cgroup_move_charge(mm);
+ put_swap_token(mm);
mmput(mm);
}
- mem_cgroup_clear_mc();
+ if (mc.to)
+ mem_cgroup_clear_mc();
}
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
diff --git a/mm/thrash.c b/mm/thrash.c
index 2372d4e..32c07fd 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -21,14 +21,17 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/swap.h>
+#include <linux/memcontrol.h>

static DEFINE_SPINLOCK(swap_token_lock);
struct mm_struct *swap_token_mm;
+struct mem_cgroup *swap_token_memcg;
static unsigned int global_faults;

void grab_swap_token(struct mm_struct *mm)
{
int current_interval;
+ struct mem_cgroup *memcg;

global_faults++;

@@ -38,40 +41,72 @@ void grab_swap_token(struct mm_struct *mm)
return;

/* First come first served */
- if (swap_token_mm == NULL) {
- mm->token_priority = mm->token_priority + 2;
- swap_token_mm = mm;
+ if (!swap_token_mm)
+ goto replace_token;
+
+ if (mm == swap_token_mm) {
+ mm->token_priority += 2;
goto out;
}

- if (mm != swap_token_mm) {
- if (current_interval < mm->last_interval)
- mm->token_priority++;
- else {
- if (likely(mm->token_priority > 0))
- mm->token_priority--;
- }
- /* Check if we deserve the token */
- if (mm->token_priority > swap_token_mm->token_priority) {
- mm->token_priority += 2;
- swap_token_mm = mm;
- }
- } else {
- /* Token holder came in again! */
- mm->token_priority += 2;
+ if (current_interval < mm->last_interval)
+ mm->token_priority++;
+ else {
+ if (likely(mm->token_priority > 0))
+ mm->token_priority--;
}

+ /* Check if we deserve the token */
+ if (mm->token_priority > swap_token_mm->token_priority)
+ goto replace_token;
+
out:
mm->faultstamp = global_faults;
mm->last_interval = current_interval;
spin_unlock(&swap_token_lock);
+ return;
+
+replace_token:
+ mm->token_priority += 2;
+ memcg = try_get_mem_cgroup_from_mm(mm);
+ if (memcg)
+ css_put(mem_cgroup_css(memcg));
+ swap_token_mm = mm;
+ swap_token_memcg = memcg;
+ goto out;
}

/* Called on process exit. */
void __put_swap_token(struct mm_struct *mm)
{
spin_lock(&swap_token_lock);
- if (likely(mm == swap_token_mm))
+ if (likely(mm == swap_token_mm)) {
swap_token_mm = NULL;
+ swap_token_memcg = NULL;
+ }
spin_unlock(&swap_token_lock);
}
+
+static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b)
+{
+ if (!a)
+ return true;
+ if (!b)
+ return true;
+ if (a == b)
+ return true;
+ return false;
+}
+
+void disable_swap_token(struct mem_cgroup *memcg)
+{
+ /* memcg reclaim don't disable unrelated mm token. */
+ if (match_memcg(memcg, swap_token_memcg)) {
+ spin_lock(&swap_token_lock);
+ if (match_memcg(memcg, swap_token_memcg)) {
+ swap_token_mm = NULL;
+ swap_token_memcg = NULL;
+ }
+ spin_unlock(&swap_token_lock);
+ }
+}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b3a569f..19e179b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2044,7 +2044,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
sc->nr_scanned = 0;
if (!priority)
- disable_swap_token();
+ disable_swap_token(sc->mem_cgroup);
shrink_zones(priority, zonelist, sc);
/*
* Don't shrink slabs when reclaiming memory from
@@ -2353,7 +2353,7 @@ loop_again:

/* The swap token gets in the way of swapout... */
if (!priority)
- disable_swap_token();
+ disable_swap_token(NULL);

all_zones_ok = 1;
balanced = 0;
--
1.7.3.1


2011-05-13 11:39:15

by KOSAKI Motohiro

[permalink] [raw]
Subject: [PATCH 2/3] vmscan: implement swap token trace

This is useful for observing swap token activity.

example output:

zsh-1845 [000] 598.962716: update_swap_token_priority:
mm=ffff88015eaf7700 old_prio=1 new_prio=0
memtoy-1830 [001] 602.033900: update_swap_token_priority:
mm=ffff880037a45880 old_prio=947 new_prio=949
memtoy-1830 [000] 602.041509: update_swap_token_priority:
mm=ffff880037a45880 old_prio=949 new_prio=951
memtoy-1830 [000] 602.051959: update_swap_token_priority:
mm=ffff880037a45880 old_prio=951 new_prio=953
memtoy-1830 [000] 602.052188: update_swap_token_priority:
mm=ffff880037a45880 old_prio=953 new_prio=955
memtoy-1830 [001] 602.427184: put_swap_token:
token_mm=ffff880037a45880
zsh-1789 [000] 602.427281: replace_swap_token:
old_token_mm= (null) old_prio=0 new_token_mm=ffff88015eaf7018
new_prio=2
zsh-1789 [001] 602.433456: update_swap_token_priority:
mm=ffff88015eaf7018 old_prio=2 new_prio=4
zsh-1789 [000] 602.437613: update_swap_token_priority:
mm=ffff88015eaf7018 old_prio=4 new_prio=6
zsh-1789 [000] 602.443924: update_swap_token_priority:
mm=ffff88015eaf7018 old_prio=6 new_prio=8
zsh-1789 [000] 602.451873: update_swap_token_priority:
mm=ffff88015eaf7018 old_prio=8 new_prio=10
zsh-1789 [001] 602.462639: update_swap_token_priority:
mm=ffff88015eaf7018 old_prio=10 new_prio=12

Signed-off-by: KOSAKI Motohiro <[email protected]>
---
include/trace/events/vmscan.h | 77 +++++++++++++++++++++++++++++++++++++++++
mm/thrash.c | 11 +++++-
2 files changed, 87 insertions(+), 1 deletions(-)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index ea422aa..1798e0c 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -6,6 +6,8 @@

#include <linux/types.h>
#include <linux/tracepoint.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
#include "gfpflags.h"

#define RECLAIM_WB_ANON 0x0001u
@@ -310,6 +312,81 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
show_reclaim_flags(__entry->reclaim_flags))
);

+TRACE_EVENT(replace_swap_token,
+ TP_PROTO(struct mm_struct *old_mm,
+ struct mm_struct *new_mm),
+
+ TP_ARGS(old_mm, new_mm),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct*, old_mm)
+ __field(unsigned int, old_prio)
+ __field(struct mm_struct*, new_mm)
+ __field(unsigned int, new_prio)
+ ),
+
+ TP_fast_assign(
+ __entry->old_mm = old_mm;
+ __entry->old_prio = old_mm ? old_mm->token_priority : 0;
+ __entry->new_mm = new_mm;
+ __entry->new_prio = new_mm->token_priority;
+ ),
+
+ TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u",
+ __entry->old_mm, __entry->old_prio,
+ __entry->new_mm, __entry->new_prio)
+);
+
+DECLARE_EVENT_CLASS(put_swap_token_template,
+ TP_PROTO(struct mm_struct *swap_token_mm),
+
+ TP_ARGS(swap_token_mm),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct*, swap_token_mm)
+ ),
+
+ TP_fast_assign(
+ __entry->swap_token_mm = swap_token_mm;
+ ),
+
+ TP_printk("token_mm=%p", __entry->swap_token_mm)
+);
+
+DEFINE_EVENT(put_swap_token_template, put_swap_token,
+ TP_PROTO(struct mm_struct *swap_token_mm),
+ TP_ARGS(swap_token_mm)
+);
+
+DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,
+ TP_PROTO(struct mm_struct *swap_token_mm),
+ TP_ARGS(swap_token_mm),
+ TP_CONDITION(swap_token_mm != NULL)
+);
+
+TRACE_EVENT_CONDITION(update_swap_token_priority,
+ TP_PROTO(struct mm_struct *mm,
+ unsigned int old_prio),
+
+ TP_ARGS(mm, old_prio),
+
+ TP_CONDITION(mm->token_priority != old_prio),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct*, mm)
+ __field(unsigned int, old_prio)
+ __field(unsigned int, new_prio)
+ ),
+
+ TP_fast_assign(
+ __entry->mm = mm;
+ __entry->old_prio = old_prio;
+ __entry->new_prio = mm->token_priority;
+ ),
+
+ TP_printk("mm=%p old_prio=%u new_prio=%u",
+ __entry->mm, __entry->old_prio, __entry->new_prio)
+);

#endif /* _TRACE_VMSCAN_H */

diff --git a/mm/thrash.c b/mm/thrash.c
index 32c07fd..14c6c9f 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -23,6 +23,8 @@
#include <linux/swap.h>
#include <linux/memcontrol.h>

+#include <trace/events/vmscan.h>
+
static DEFINE_SPINLOCK(swap_token_lock);
struct mm_struct *swap_token_mm;
struct mem_cgroup *swap_token_memcg;
@@ -31,6 +33,7 @@ static unsigned int global_faults;
void grab_swap_token(struct mm_struct *mm)
{
int current_interval;
+ unsigned int old_prio = mm->token_priority;
struct mem_cgroup *memcg;

global_faults++;
@@ -46,7 +49,7 @@ void grab_swap_token(struct mm_struct *mm)

if (mm == swap_token_mm) {
mm->token_priority += 2;
- goto out;
+ goto update_priority;
}

if (current_interval < mm->last_interval)
@@ -60,6 +63,9 @@ void grab_swap_token(struct mm_struct *mm)
if (mm->token_priority > swap_token_mm->token_priority)
goto replace_token;

+update_priority:
+ trace_update_swap_token_priority(mm, old_prio);
+
out:
mm->faultstamp = global_faults;
mm->last_interval = current_interval;
@@ -71,6 +77,7 @@ replace_token:
memcg = try_get_mem_cgroup_from_mm(mm);
if (memcg)
css_put(mem_cgroup_css(memcg));
+ trace_replace_swap_token(swap_token_mm, mm);
swap_token_mm = mm;
swap_token_memcg = memcg;
goto out;
@@ -81,6 +88,7 @@ void __put_swap_token(struct mm_struct *mm)
{
spin_lock(&swap_token_lock);
if (likely(mm == swap_token_mm)) {
+ trace_put_swap_token(swap_token_mm);
swap_token_mm = NULL;
swap_token_memcg = NULL;
}
@@ -104,6 +112,7 @@ void disable_swap_token(struct mem_cgroup *memcg)
if (match_memcg(memcg, swap_token_memcg)) {
spin_lock(&swap_token_lock);
if (match_memcg(memcg, swap_token_memcg)) {
+ trace_disable_swap_token(swap_token_mm);
swap_token_mm = NULL;
swap_token_memcg = NULL;
}
--
1.7.3.1


2011-05-13 11:40:32

by KOSAKI Motohiro

[permalink] [raw]
Subject: [PATCH 3/3] vmscan: implement swap token priority decay

While testing for memcg aware swap token, I observed a swap token
was often grabbed an intermittent running process (eg init, auditd)
and they never release a token.

Why? Currently, swap toke priority is only decreased at page fault
path. Then, if the process sleep immediately after to grab swap
token, their swap token priority never be decreased. That makes
obviously undesired result.

This patch implement very poor (and lightweight) priority decay
mechanism. It only be affect to the above corner case and doesn't
change swap tendency workload performance (eg multi process qsbench
load)

Signed-off-by: KOSAKI Motohiro <[email protected]>
---
include/trace/events/vmscan.h | 12 ++++++++----
mm/thrash.c | 5 ++++-
2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 1798e0c..ba18137 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -366,9 +366,10 @@ DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,

TRACE_EVENT_CONDITION(update_swap_token_priority,
TP_PROTO(struct mm_struct *mm,
- unsigned int old_prio),
+ unsigned int old_prio,
+ struct mm_struct *swap_token_mm),

- TP_ARGS(mm, old_prio),
+ TP_ARGS(mm, old_prio, swap_token_mm),

TP_CONDITION(mm->token_priority != old_prio),

@@ -376,16 +377,19 @@ TRACE_EVENT_CONDITION(update_swap_token_priority,
__field(struct mm_struct*, mm)
__field(unsigned int, old_prio)
__field(unsigned int, new_prio)
+ __field(unsigned int, token_prio)
),

TP_fast_assign(
__entry->mm = mm;
__entry->old_prio = old_prio;
__entry->new_prio = mm->token_priority;
+ __entry->token_prio = swap_token_mm ? swap_token_mm->token_priority : 0;
),

- TP_printk("mm=%p old_prio=%u new_prio=%u",
- __entry->mm, __entry->old_prio, __entry->new_prio)
+ TP_printk("mm=%p old_prio=%u new_prio=%u token_prio=%u",
+ __entry->mm, __entry->old_prio, __entry->new_prio,
+ __entry->token_prio)
);

#endif /* _TRACE_VMSCAN_H */
diff --git a/mm/thrash.c b/mm/thrash.c
index 14c6c9f..0c4f0a8 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -47,6 +47,9 @@ void grab_swap_token(struct mm_struct *mm)
if (!swap_token_mm)
goto replace_token;

+ if (!(global_faults & 0xff))
+ mm->token_priority /= 2;
+
if (mm == swap_token_mm) {
mm->token_priority += 2;
goto update_priority;
@@ -64,7 +67,7 @@ void grab_swap_token(struct mm_struct *mm)
goto replace_token;

update_priority:
- trace_update_swap_token_priority(mm, old_prio);
+ trace_update_swap_token_priority(mm, old_prio, swap_token_mm);

out:
mm->faultstamp = global_faults;
--
1.7.3.1


2011-05-15 17:56:38

by Rik van Riel

[permalink] [raw]
Subject: Re: [PATCH 1/3] vmscan,memcg: memcg aware swap token

On 05/13/2011 07:40 AM, KOSAKI Motohiro wrote:
> Currently, memcg reclaim can disable swap token even if the swap token
> mm doesn't belong in its memory cgroup. It's slightly risky. If an
> admin creates very small mem-cgroup and silly guy runs contentious heavy
> memory pressure workload, every tasks are going to lose swap token and
> then system may become unresponsive. That's bad.
>
> This patch adds 'memcg' parameter into disable_swap_token(). and if
> the parameter doesn't match swap token, VM doesn't disable it.
>
> Signed-off-by: KOSAKI Motohiro<[email protected]>

Reviewed-by: Rik van Riel<[email protected]>

--
All rights reversed

2011-05-15 22:27:26

by Rik van Riel

[permalink] [raw]
Subject: Re: [PATCH 2/3] vmscan: implement swap token trace

On 05/13/2011 07:40 AM, KOSAKI Motohiro wrote:
> This is useful for observing swap token activity.
>
> example output:

Acked-by: Rik van Riel<[email protected]>

--
All rights reversed

2011-05-15 22:34:32

by Rik van Riel

[permalink] [raw]
Subject: Re: [PATCH 3/3] vmscan: implement swap token priority decay

On 05/13/2011 07:42 AM, KOSAKI Motohiro wrote:
> While testing for memcg aware swap token, I observed a swap token
> was often grabbed an intermittent running process (eg init, auditd)
> and they never release a token.
>
> Why? Currently, swap toke priority is only decreased at page fault
> path. Then, if the process sleep immediately after to grab swap
> token, their swap token priority never be decreased. That makes
> obviously undesired result.
>
> This patch implement very poor (and lightweight) priority decay
> mechanism. It only be affect to the above corner case and doesn't
> change swap tendency workload performance (eg multi process qsbench
> load)

Ohhh, good catch. The original swap token algorithm did
not have this problem, and I never caught the fact that
the replacement (which is better in many ways) does...

> Signed-off-by: KOSAKI Motohiro<[email protected]>

Reviewed-by: Rik van Riel<[email protected]>

--
All rights reversed

2011-05-16 08:08:26

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [PATCH 1/3] vmscan,memcg: memcg aware swap token

On Fri, 13 May 2011 20:40:13 +0900
KOSAKI Motohiro <[email protected]> wrote:

> Currently, memcg reclaim can disable swap token even if the swap token
> mm doesn't belong in its memory cgroup. It's slightly risky. If an
> admin creates very small mem-cgroup and silly guy runs contentious heavy
> memory pressure workload, every tasks are going to lose swap token and
> then system may become unresponsive. That's bad.
>
> This patch adds 'memcg' parameter into disable_swap_token(). and if
> the parameter doesn't match swap token, VM doesn't disable it.
>
> Signed-off-by: KOSAKI Motohiro <[email protected]>

Thank you

Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>

2011-05-16 08:09:26

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [PATCH 2/3] vmscan: implement swap token trace

On Fri, 13 May 2011 20:40:52 +0900
KOSAKI Motohiro <[email protected]> wrote:

> This is useful for observing swap token activity.
>
> example output:
>
> zsh-1845 [000] 598.962716: update_swap_token_priority:
> mm=ffff88015eaf7700 old_prio=1 new_prio=0
> memtoy-1830 [001] 602.033900: update_swap_token_priority:
> mm=ffff880037a45880 old_prio=947 new_prio=949
> memtoy-1830 [000] 602.041509: update_swap_token_priority:
> mm=ffff880037a45880 old_prio=949 new_prio=951
> memtoy-1830 [000] 602.051959: update_swap_token_priority:
> mm=ffff880037a45880 old_prio=951 new_prio=953
> memtoy-1830 [000] 602.052188: update_swap_token_priority:
> mm=ffff880037a45880 old_prio=953 new_prio=955
> memtoy-1830 [001] 602.427184: put_swap_token:
> token_mm=ffff880037a45880
> zsh-1789 [000] 602.427281: replace_swap_token:
> old_token_mm= (null) old_prio=0 new_token_mm=ffff88015eaf7018
> new_prio=2
> zsh-1789 [001] 602.433456: update_swap_token_priority:
> mm=ffff88015eaf7018 old_prio=2 new_prio=4
> zsh-1789 [000] 602.437613: update_swap_token_priority:
> mm=ffff88015eaf7018 old_prio=4 new_prio=6
> zsh-1789 [000] 602.443924: update_swap_token_priority:
> mm=ffff88015eaf7018 old_prio=6 new_prio=8
> zsh-1789 [000] 602.451873: update_swap_token_priority:
> mm=ffff88015eaf7018 old_prio=8 new_prio=10
> zsh-1789 [001] 602.462639: update_swap_token_priority:
> mm=ffff88015eaf7018 old_prio=10 new_prio=12
>
> Signed-off-by: KOSAKI Motohiro <[email protected]>

Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>

2011-05-16 08:29:46

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [PATCH 3/3] vmscan: implement swap token priority decay

On Fri, 13 May 2011 20:42:11 +0900
KOSAKI Motohiro <[email protected]> wrote:

> While testing for memcg aware swap token, I observed a swap token
> was often grabbed an intermittent running process (eg init, auditd)
> and they never release a token.
>
> Why? Currently, swap toke priority is only decreased at page fault
> path. Then, if the process sleep immediately after to grab swap
> token, their swap token priority never be decreased. That makes
> obviously undesired result.
>
> This patch implement very poor (and lightweight) priority decay
> mechanism. It only be affect to the above corner case and doesn't
> change swap tendency workload performance (eg multi process qsbench
> load)
>
> Signed-off-by: KOSAKI Motohiro <[email protected]>

Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>

But...

> ---
> include/trace/events/vmscan.h | 12 ++++++++----
> mm/thrash.c | 5 ++++-
> 2 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> index 1798e0c..ba18137 100644
> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -366,9 +366,10 @@ DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,
>
> TRACE_EVENT_CONDITION(update_swap_token_priority,
> TP_PROTO(struct mm_struct *mm,
> - unsigned int old_prio),
> + unsigned int old_prio,
> + struct mm_struct *swap_token_mm),
>
> - TP_ARGS(mm, old_prio),
> + TP_ARGS(mm, old_prio, swap_token_mm),
>
> TP_CONDITION(mm->token_priority != old_prio),
>
> @@ -376,16 +377,19 @@ TRACE_EVENT_CONDITION(update_swap_token_priority,
> __field(struct mm_struct*, mm)
> __field(unsigned int, old_prio)
> __field(unsigned int, new_prio)
> + __field(unsigned int, token_prio)
> ),
>
> TP_fast_assign(
> __entry->mm = mm;
> __entry->old_prio = old_prio;
> __entry->new_prio = mm->token_priority;
> + __entry->token_prio = swap_token_mm ? swap_token_mm->token_priority : 0;
> ),
>
> - TP_printk("mm=%p old_prio=%u new_prio=%u",
> - __entry->mm, __entry->old_prio, __entry->new_prio)
> + TP_printk("mm=%p old_prio=%u new_prio=%u token_prio=%u",
> + __entry->mm, __entry->old_prio, __entry->new_prio,
> + __entry->token_prio)
> );
>
> #endif /* _TRACE_VMSCAN_H */
> diff --git a/mm/thrash.c b/mm/thrash.c
> index 14c6c9f..0c4f0a8 100644
> --- a/mm/thrash.c
> +++ b/mm/thrash.c
> @@ -47,6 +47,9 @@ void grab_swap_token(struct mm_struct *mm)
> if (!swap_token_mm)
> goto replace_token;
>
> + if (!(global_faults & 0xff))
> + mm->token_priority /= 2;
> +

I personally don't like this kind of checking counter with mask.
Hmm. How about this ?

==
#define TOKEN_AGE_MASK ~(0xff)
/*
* If current global_fault is in different age from previous global_fault,
* Aging priority and starts new era.
*/
if ((mm->faultstamp & TOKEN_AGE_MASK) != (global_faults & MM_TOKEN_MASK))
mm->token_priority /= 2;
==

But I'm not sure 0xff is a proper value or not...

Thanks,
-Kame

2011-05-18 01:08:59

by KOSAKI Motohiro

[permalink] [raw]
Subject: Re: [PATCH 3/3] vmscan: implement swap token priority decay

>> diff --git a/mm/thrash.c b/mm/thrash.c
>> index 14c6c9f..0c4f0a8 100644
>> --- a/mm/thrash.c
>> +++ b/mm/thrash.c
>> @@ -47,6 +47,9 @@ void grab_swap_token(struct mm_struct *mm)
>> if (!swap_token_mm)
>> goto replace_token;
>>
>> + if (!(global_faults& 0xff))
>> + mm->token_priority /= 2;
>> +
>
> I personally don't like this kind of checking counter with mask.
> Hmm. How about this ?
>
> ==
> #define TOKEN_AGE_MASK ~(0xff)
> /*
> * If current global_fault is in different age from previous global_fault,
> * Aging priority and starts new era.
> */
> if ((mm->faultstamp& TOKEN_AGE_MASK) != (global_faults& MM_TOKEN_MASK))
> mm->token_priority /= 2;
> ==

OK. will do.