2014-10-21 20:21:49

by Johannes Weiner

[permalink] [raw]
Subject: [patch 1/4] mm: memcontrol: inline memcg->move_lock locking

The wrappers around taking and dropping the memcg->move_lock spinlock
add nothing of value. Inline the spinlock calls into the callsites.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/memcontrol.c | 34 +++++++++-------------------------
1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 293db8234179..1ff125d2a427 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1507,23 +1507,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
return false;
}

-/*
- * Take this lock when
- * - a code tries to modify page's memcg while it's USED.
- * - a code tries to modify page state accounting in a memcg.
- */
-static void move_lock_mem_cgroup(struct mem_cgroup *memcg,
- unsigned long *flags)
-{
- spin_lock_irqsave(&memcg->move_lock, *flags);
-}
-
-static void move_unlock_mem_cgroup(struct mem_cgroup *memcg,
- unsigned long *flags)
-{
- spin_unlock_irqrestore(&memcg->move_lock, *flags);
-}
-
#define K(x) ((x) << (PAGE_SHIFT-10))
/**
* mem_cgroup_print_oom_info: Print OOM information relevant to memory controller.
@@ -2013,7 +1996,7 @@ again:
return;
/*
* If this memory cgroup is not under account moving, we don't
- * need to take move_lock_mem_cgroup(). Because we already hold
+ * need to take &memcg->move_lock. Because we already hold
* rcu_read_lock(), any calls to move_account will be delayed until
* rcu_read_unlock().
*/
@@ -2021,9 +2004,9 @@ again:
if (atomic_read(&memcg->moving_account) <= 0)
return;

- move_lock_mem_cgroup(memcg, flags);
+ spin_lock_irqsave(&memcg->move_lock, *flags);
if (memcg != pc->mem_cgroup) {
- move_unlock_mem_cgroup(memcg, flags);
+ spin_unlock_irqrestore(&memcg->move_lock, *flags);
goto again;
}
*locked = true;
@@ -2038,7 +2021,7 @@ void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
* lock is held because a routine modifies pc->mem_cgroup
* should take move_lock_mem_cgroup().
*/
- move_unlock_mem_cgroup(pc->mem_cgroup, flags);
+ spin_unlock_irqrestore(&pc->mem_cgroup->move_lock, *flags);
}

void mem_cgroup_update_page_stat(struct page *page,
@@ -3083,7 +3066,7 @@ static int mem_cgroup_move_account(struct page *page,
if (pc->mem_cgroup != from)
goto out_unlock;

- move_lock_mem_cgroup(from, &flags);
+ spin_lock_irqsave(&from->move_lock, flags);

if (!PageAnon(page) && page_mapped(page)) {
__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
@@ -3107,7 +3090,8 @@ static int mem_cgroup_move_account(struct page *page,

/* caller should have done css_get */
pc->mem_cgroup = to;
- move_unlock_mem_cgroup(from, &flags);
+ spin_unlock_irqrestore(&from->move_lock, flags);
+
ret = 0;

local_irq_disable();
@@ -6033,9 +6017,9 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
* but there might still be references, e.g. from finishing
* writeback. Follow the charge moving protocol here.
*/
- move_lock_mem_cgroup(memcg, &flags);
+ spin_lock_irqsave(&memcg->move_lock, flags);
pc->mem_cgroup = NULL;
- move_unlock_mem_cgroup(memcg, &flags);
+ spin_unlock_irqrestore(&memcg->move_lock, flags);

if (lrucare)
unlock_page_lru(oldpage, isolated);
--
2.1.2


2014-10-21 20:21:53

by Johannes Weiner

[permalink] [raw]
Subject: [patch 3/4] mm: page-writeback: inline account_page_dirtied() into single caller

A follow-up patch would have changed the call signature. To save the
trouble, just fold it instead.

Signed-off-by: Johannes Weiner <[email protected]>
---
include/linux/mm.h | 1 -
mm/page-writeback.c | 23 ++++-------------------
2 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27eb1bfbe704..b46461116cd2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1235,7 +1235,6 @@ int __set_page_dirty_no_writeback(struct page *page);
int redirty_page_for_writepage(struct writeback_control *wbc,
struct page *page);
void account_page_dirtied(struct page *page, struct address_space *mapping);
-void account_page_writeback(struct page *page);
int set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);
int clear_page_dirty_for_io(struct page *page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ff24c9d83112..ff6a5b07211e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2116,23 +2116,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
EXPORT_SYMBOL(account_page_dirtied);

/*
- * Helper function for set_page_writeback family.
- *
- * The caller must hold mem_cgroup_begin/end_update_page_stat() lock
- * while calling this function.
- * See test_set_page_writeback for example.
- *
- * NOTE: Unlike account_page_dirtied this does not rely on being atomic
- * wrt interrupts.
- */
-void account_page_writeback(struct page *page)
-{
- mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
- inc_zone_page_state(page, NR_WRITEBACK);
-}
-EXPORT_SYMBOL(account_page_writeback);
-
-/*
* For address_spaces which do not use buffers. Just tag the page as dirty in
* its radix tree.
*
@@ -2410,8 +2393,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
} else {
ret = TestSetPageWriteback(page);
}
- if (!ret)
- account_page_writeback(page);
+ if (!ret) {
+ mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
+ inc_zone_page_state(page, NR_WRITEBACK);
+ }
mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
return ret;

--
2.1.2

2014-10-21 20:21:59

by Johannes Weiner

[permalink] [raw]
Subject: [patch 4/4] mm: memcontrol: simplify per-memcg page statistics accounting

The per-page statistics interface is heavily optimized to avoid a
function call and a lookup_page_cgroup() in the file unmap fast path,
but this results in fairly convoluted and repetitive code.

Rework it so that it looks up the page's memcg once at the beginning
of the transaction and then uses it throughout, which makes things a
lot more readable.

The following test results are from a microbenchmark that maps,
faults, and unmaps a 4GB sparse file three times in a nested fashion,
so that there are two negative passes that don't account but still go
through the new transaction overhead. There is no actual difference:

old: 33.195102545 seconds time elapsed ( +- 0.01% )
new: 33.199231369 seconds time elapsed ( +- 0.03% )

The time spent in page_remove_rmap()'s callees still adds up to the
same, but the time spent in the function itself seems reduced:

# Children Self Command Shared Object Symbol
old: 0.12% 0.11% filemapstress [kernel.kallsyms] [k] page_remove_rmap
new: 0.12% 0.08% filemapstress [kernel.kallsyms] [k] page_remove_rmap

Signed-off-by: Johannes Weiner <[email protected]>
---
include/linux/memcontrol.h | 57 +++++++++++++---------------------------------
mm/memcontrol.c | 52 +++++++++++++++++-------------------------
mm/page-writeback.c | 22 ++++++++++--------
mm/rmap.c | 20 ++++++++--------
4 files changed, 59 insertions(+), 92 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0daf383f8f1c..4dc4a2aec440 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -139,48 +139,23 @@ static inline bool mem_cgroup_disabled(void)
return false;
}

-void __mem_cgroup_begin_update_page_stat(struct page *page, bool *locked,
- unsigned long *flags);
-
-extern atomic_t memcg_moving;
-
-static inline void mem_cgroup_begin_update_page_stat(struct page *page,
- bool *locked, unsigned long *flags)
-{
- if (mem_cgroup_disabled())
- return;
- rcu_read_lock();
- *locked = false;
- if (atomic_read(&memcg_moving))
- __mem_cgroup_begin_update_page_stat(page, locked, flags);
-}
-
-void __mem_cgroup_end_update_page_stat(struct page *page,
- unsigned long *flags);
-static inline void mem_cgroup_end_update_page_stat(struct page *page,
- bool *locked, unsigned long *flags)
-{
- if (mem_cgroup_disabled())
- return;
- if (*locked)
- __mem_cgroup_end_update_page_stat(page, flags);
- rcu_read_unlock();
-}
-
-void mem_cgroup_update_page_stat(struct page *page,
- enum mem_cgroup_stat_index idx,
- int val);
-
-static inline void mem_cgroup_inc_page_stat(struct page *page,
+struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked,
+ unsigned long *flags);
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
+ unsigned long flags);
+void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx, int val);
+
+static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
- mem_cgroup_update_page_stat(page, idx, 1);
+ mem_cgroup_update_page_stat(memcg, idx, 1);
}

-static inline void mem_cgroup_dec_page_stat(struct page *page,
+static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
- mem_cgroup_update_page_stat(page, idx, -1);
+ mem_cgroup_update_page_stat(memcg, idx, -1);
}

unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
@@ -315,13 +290,13 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
}

-static inline void mem_cgroup_begin_update_page_stat(struct page *page,
+static inline void mem_cgroup_begin_page_stat(struct page *page,
bool *locked, unsigned long *flags)
{
}

-static inline void mem_cgroup_end_update_page_stat(struct page *page,
- bool *locked, unsigned long *flags)
+static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg,
+ bool locked, unsigned long flags)
{
}

@@ -343,12 +318,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
return false;
}

-static inline void mem_cgroup_inc_page_stat(struct page *page,
+static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
}

-static inline void mem_cgroup_dec_page_stat(struct page *page,
+static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c1fe774d712a..36ab98d6659e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1440,19 +1440,14 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
* start move here.
*/

-/* for quick checking without looking up memcg */
-atomic_t memcg_moving __read_mostly;
-
static void mem_cgroup_start_move(struct mem_cgroup *memcg)
{
- atomic_inc(&memcg_moving);
atomic_inc(&memcg->moving_account);
synchronize_rcu();
}

static void mem_cgroup_end_move(struct mem_cgroup *memcg)
{
- atomic_dec(&memcg_moving);
atomic_dec(&memcg->moving_account);
}

@@ -1977,26 +1972,32 @@ cleanup:
* account and taking the move_lock in the slowpath.
*/

-void __mem_cgroup_begin_update_page_stat(struct page *page,
- bool *locked, unsigned long *flags)
+struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
+ bool *locked,
+ unsigned long *flags)
{
struct mem_cgroup *memcg;
struct page_cgroup *pc;

+ rcu_read_lock();
+
+ if (mem_cgroup_disabled())
+ return NULL;
+
pc = lookup_page_cgroup(page);
again:
memcg = pc->mem_cgroup;
if (unlikely(!memcg))
- return;
+ return NULL;
/*
* If this memory cgroup is not under account moving, we don't
* need to take &memcg->move_lock. Because we already hold
* rcu_read_lock(), any calls to move_account will be delayed until
* rcu_read_unlock().
*/
- VM_BUG_ON(!rcu_read_lock_held());
+ *locked = false;
if (atomic_read(&memcg->moving_account) <= 0)
- return;
+ return memcg;

spin_lock_irqsave(&memcg->move_lock, *flags);
if (memcg != pc->mem_cgroup) {
@@ -2004,37 +2005,26 @@ again:
goto again;
}
*locked = true;
+
+ return memcg;
}

-void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
+ unsigned long flags)
{
- struct page_cgroup *pc = lookup_page_cgroup(page);
+ if (memcg && locked)
+ spin_unlock_irqrestore(&memcg->move_lock, flags);

- /*
- * It's guaranteed that pc->mem_cgroup never changes while
- * lock is held because a routine modifies pc->mem_cgroup
- * should take move_lock_mem_cgroup().
- */
- spin_unlock_irqrestore(&pc->mem_cgroup->move_lock, *flags);
+ rcu_read_unlock();
}

-void mem_cgroup_update_page_stat(struct page *page,
+void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx, int val)
{
- struct mem_cgroup *memcg;
- struct page_cgroup *pc;
-
VM_BUG_ON(!rcu_read_lock_held());

- if (mem_cgroup_disabled())
- return;
-
- pc = lookup_page_cgroup(page);
- memcg = pc->mem_cgroup;
- if (unlikely(!memcg))
- return;
-
- this_cpu_add(memcg->stat->count[idx], val);
+ if (memcg)
+ this_cpu_add(memcg->stat->count[idx], val);
}

/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ff6a5b07211e..19ceae87522d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2327,11 +2327,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
int test_clear_page_writeback(struct page *page)
{
struct address_space *mapping = page_mapping(page);
- int ret;
- bool locked;
unsigned long memcg_flags;
+ struct mem_cgroup *memcg;
+ bool locked;
+ int ret;

- mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
+ memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
if (mapping) {
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags;
@@ -2352,22 +2353,23 @@ int test_clear_page_writeback(struct page *page)
ret = TestClearPageWriteback(page);
}
if (ret) {
- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
+ mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
dec_zone_page_state(page, NR_WRITEBACK);
inc_zone_page_state(page, NR_WRITTEN);
}
- mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
+ mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
return ret;
}

int __test_set_page_writeback(struct page *page, bool keep_write)
{
struct address_space *mapping = page_mapping(page);
- int ret;
- bool locked;
unsigned long memcg_flags;
+ struct mem_cgroup *memcg;
+ bool locked;
+ int ret;

- mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
+ memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
if (mapping) {
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags;
@@ -2394,10 +2396,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
ret = TestSetPageWriteback(page);
}
if (!ret) {
- mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
+ mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
inc_zone_page_state(page, NR_WRITEBACK);
}
- mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
+ mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
return ret;

}
diff --git a/mm/rmap.c b/mm/rmap.c
index 116a5053415b..f574046f77d4 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1042,15 +1042,16 @@ void page_add_new_anon_rmap(struct page *page,
*/
void page_add_file_rmap(struct page *page)
{
- bool locked;
+ struct mem_cgroup *memcg;
unsigned long flags;
+ bool locked;

- mem_cgroup_begin_update_page_stat(page, &locked, &flags);
+ memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
if (atomic_inc_and_test(&page->_mapcount)) {
__inc_zone_page_state(page, NR_FILE_MAPPED);
- mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+ mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
}
- mem_cgroup_end_update_page_stat(page, &locked, &flags);
+ mem_cgroup_end_page_stat(memcg, locked, flags);
}

/**
@@ -1061,9 +1062,10 @@ void page_add_file_rmap(struct page *page)
*/
void page_remove_rmap(struct page *page)
{
+ struct mem_cgroup *uninitialized_var(memcg);
bool anon = PageAnon(page);
- bool locked;
unsigned long flags;
+ bool locked;

/*
* The anon case has no mem_cgroup page_stat to update; but may
@@ -1071,7 +1073,7 @@ void page_remove_rmap(struct page *page)
* we hold the lock against page_stat move: so avoid it on anon.
*/
if (!anon)
- mem_cgroup_begin_update_page_stat(page, &locked, &flags);
+ memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);

/* page still mapped by someone else? */
if (!atomic_add_negative(-1, &page->_mapcount))
@@ -1096,8 +1098,7 @@ void page_remove_rmap(struct page *page)
-hpage_nr_pages(page));
} else {
__dec_zone_page_state(page, NR_FILE_MAPPED);
- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
- mem_cgroup_end_update_page_stat(page, &locked, &flags);
+ mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
}
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
@@ -1110,10 +1111,9 @@ void page_remove_rmap(struct page *page)
* Leaving it set also helps swapoff to reinstate ptes
* faster for those pages still in swapcache.
*/
- return;
out:
if (!anon)
- mem_cgroup_end_update_page_stat(page, &locked, &flags);
+ mem_cgroup_end_page_stat(memcg, locked, flags);
}

/*
--
2.1.2

2014-10-21 20:22:51

by Johannes Weiner

[permalink] [raw]
Subject: [patch 2/4] mm: memcontrol: don't pass a NULL memcg to mem_cgroup_end_move()

mem_cgroup_end_move() checks if the passed memcg is NULL, along with a
lengthy comment to explain why this seemingly non-sensical situation
is even possible.

Check in cancel_attach() itself whether can_attach() set up the move
context or not, it's a lot more obvious from there. Then remove the
check and comment in mem_cgroup_end_move().

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/memcontrol.c | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1ff125d2a427..c1fe774d712a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1452,14 +1452,8 @@ static void mem_cgroup_start_move(struct mem_cgroup *memcg)

static void mem_cgroup_end_move(struct mem_cgroup *memcg)
{
- /*
- * Now, mem_cgroup_clear_mc() may call this function with NULL.
- * We check NULL in callee rather than caller.
- */
- if (memcg) {
- atomic_dec(&memcg_moving);
- atomic_dec(&memcg->moving_account);
- }
+ atomic_dec(&memcg_moving);
+ atomic_dec(&memcg->moving_account);
}

/*
@@ -5383,7 +5377,8 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
- mem_cgroup_clear_mc();
+ if (mc.to)
+ mem_cgroup_clear_mc();
}

static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
--
2.1.2

2014-10-21 21:06:00

by Johannes Weiner

[permalink] [raw]
Subject: Re: [patch 4/4] mm: memcontrol: simplify per-memcg page statistics accounting

On Tue, Oct 21, 2014 at 04:21:36PM -0400, Johannes Weiner wrote:
> @@ -315,13 +290,13 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
> {
> }
>
> -static inline void mem_cgroup_begin_update_page_stat(struct page *page,
> +static inline void mem_cgroup_begin_page_stat(struct page *page,
> bool *locked, unsigned long *flags)

I forgot to refresh after fixing the allnoconfig build. Andrew could
you fold the following please if/when merging this patch? Thanks!

Signed-off-by: Johannes Weiner <[email protected]>
---

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4dc4a2aec440..ea007615e8f9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -290,9 +290,10 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
}

-static inline void mem_cgroup_begin_page_stat(struct page *page,
+static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
bool *locked, unsigned long *flags)
{
+ return NULL;
}

static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg,

2014-10-22 06:49:04

by Vladimir Davydov

[permalink] [raw]
Subject: Re: [patch 1/4] mm: memcontrol: inline memcg->move_lock locking

On Tue, Oct 21, 2014 at 04:21:33PM -0400, Johannes Weiner wrote:
> The wrappers around taking and dropping the memcg->move_lock spinlock
> add nothing of value. Inline the spinlock calls into the callsites.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Vladimir Davydov <[email protected]>

> ---
> mm/memcontrol.c | 34 +++++++++-------------------------
> 1 file changed, 9 insertions(+), 25 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 293db8234179..1ff125d2a427 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1507,23 +1507,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
> return false;
> }
>
> -/*
> - * Take this lock when
> - * - a code tries to modify page's memcg while it's USED.
> - * - a code tries to modify page state accounting in a memcg.
> - */
> -static void move_lock_mem_cgroup(struct mem_cgroup *memcg,
> - unsigned long *flags)
> -{
> - spin_lock_irqsave(&memcg->move_lock, *flags);
> -}
> -
> -static void move_unlock_mem_cgroup(struct mem_cgroup *memcg,
> - unsigned long *flags)
> -{
> - spin_unlock_irqrestore(&memcg->move_lock, *flags);
> -}
> -
> #define K(x) ((x) << (PAGE_SHIFT-10))
> /**
> * mem_cgroup_print_oom_info: Print OOM information relevant to memory controller.
> @@ -2013,7 +1996,7 @@ again:
> return;
> /*
> * If this memory cgroup is not under account moving, we don't
> - * need to take move_lock_mem_cgroup(). Because we already hold
> + * need to take &memcg->move_lock. Because we already hold
> * rcu_read_lock(), any calls to move_account will be delayed until
> * rcu_read_unlock().
> */
> @@ -2021,9 +2004,9 @@ again:
> if (atomic_read(&memcg->moving_account) <= 0)
> return;
>
> - move_lock_mem_cgroup(memcg, flags);
> + spin_lock_irqsave(&memcg->move_lock, *flags);
> if (memcg != pc->mem_cgroup) {
> - move_unlock_mem_cgroup(memcg, flags);
> + spin_unlock_irqrestore(&memcg->move_lock, *flags);
> goto again;
> }
> *locked = true;
> @@ -2038,7 +2021,7 @@ void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
> * lock is held because a routine modifies pc->mem_cgroup
> * should take move_lock_mem_cgroup().
> */
> - move_unlock_mem_cgroup(pc->mem_cgroup, flags);
> + spin_unlock_irqrestore(&pc->mem_cgroup->move_lock, *flags);
> }
>
> void mem_cgroup_update_page_stat(struct page *page,
> @@ -3083,7 +3066,7 @@ static int mem_cgroup_move_account(struct page *page,
> if (pc->mem_cgroup != from)
> goto out_unlock;
>
> - move_lock_mem_cgroup(from, &flags);
> + spin_lock_irqsave(&from->move_lock, flags);
>
> if (!PageAnon(page) && page_mapped(page)) {
> __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
> @@ -3107,7 +3090,8 @@ static int mem_cgroup_move_account(struct page *page,
>
> /* caller should have done css_get */
> pc->mem_cgroup = to;
> - move_unlock_mem_cgroup(from, &flags);
> + spin_unlock_irqrestore(&from->move_lock, flags);
> +
> ret = 0;
>
> local_irq_disable();
> @@ -6033,9 +6017,9 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> * but there might still be references, e.g. from finishing
> * writeback. Follow the charge moving protocol here.
> */
> - move_lock_mem_cgroup(memcg, &flags);
> + spin_lock_irqsave(&memcg->move_lock, flags);
> pc->mem_cgroup = NULL;
> - move_unlock_mem_cgroup(memcg, &flags);
> + spin_unlock_irqrestore(&memcg->move_lock, flags);
>
> if (lrucare)
> unlock_page_lru(oldpage, isolated);
> --
> 2.1.2
>

2014-10-22 06:52:26

by Vladimir Davydov

[permalink] [raw]
Subject: Re: [patch 2/4] mm: memcontrol: don't pass a NULL memcg to mem_cgroup_end_move()

On Tue, Oct 21, 2014 at 04:21:34PM -0400, Johannes Weiner wrote:
> mem_cgroup_end_move() checks if the passed memcg is NULL, along with a
> lengthy comment to explain why this seemingly non-sensical situation
> is even possible.
>
> Check in cancel_attach() itself whether can_attach() set up the move
> context or not, it's a lot more obvious from there. Then remove the
> check and comment in mem_cgroup_end_move().
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Vladimir Davydov <[email protected]>

> ---
> mm/memcontrol.c | 13 ++++---------
> 1 file changed, 4 insertions(+), 9 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 1ff125d2a427..c1fe774d712a 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1452,14 +1452,8 @@ static void mem_cgroup_start_move(struct mem_cgroup *memcg)
>
> static void mem_cgroup_end_move(struct mem_cgroup *memcg)
> {
> - /*
> - * Now, mem_cgroup_clear_mc() may call this function with NULL.
> - * We check NULL in callee rather than caller.
> - */
> - if (memcg) {
> - atomic_dec(&memcg_moving);
> - atomic_dec(&memcg->moving_account);
> - }
> + atomic_dec(&memcg_moving);
> + atomic_dec(&memcg->moving_account);
> }
>
> /*
> @@ -5383,7 +5377,8 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
> static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
> struct cgroup_taskset *tset)
> {
> - mem_cgroup_clear_mc();
> + if (mc.to)
> + mem_cgroup_clear_mc();
> }
>
> static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
> --
> 2.1.2
>