2014-10-20 15:22:22

by Johannes Weiner

[permalink] [raw]
Subject: [patch 0/4] mm: memcontrol: remove the page_cgroup->flags field

Hi,

this series gets rid of the remaining page_cgroup flags, thus cutting
the memcg per-page overhead down to one pointer.

include/linux/page_cgroup.h | 12 ----
mm/memcontrol.c | 154 ++++++++++++++++++------------------------
2 files changed, 64 insertions(+), 102 deletions(-)


2014-10-20 15:22:24

by Johannes Weiner

[permalink] [raw]
Subject: [patch 2/4] mm: memcontrol: remove unnecessary PCG_MEMSW memory+swap charge flag

Now that mem_cgroup_swapout() fully uncharges the page, every page
that is still in use when reaching mem_cgroup_uncharge() is known to
carry both the memory and the memory+swap charge. Simplify the
uncharge path and remove the PCG_MEMSW page flag accordingly.

Signed-off-by: Johannes Weiner <[email protected]>
---
include/linux/page_cgroup.h | 1 -
mm/memcontrol.c | 34 ++++++++++++----------------------
2 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 5c831f1eca79..da62ee2be28b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -5,7 +5,6 @@ enum {
/* flags for mem_cgroup */
PCG_USED = 0x01, /* This page is charged to a memcg */
PCG_MEM = 0x02, /* This page holds a memory charge */
- PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
};

struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7709f17347f3..9bab35fc3e9e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
+ pc->flags = PCG_USED | PCG_MEM;

if (lrucare)
unlock_page_lru(page, isolated);
@@ -5815,7 +5815,6 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (!PageCgroupUsed(pc))
return;

- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
memcg = pc->mem_cgroup;

oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
@@ -6010,17 +6009,16 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
}

static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
- unsigned long nr_mem, unsigned long nr_memsw,
unsigned long nr_anon, unsigned long nr_file,
unsigned long nr_huge, struct page *dummy_page)
{
+ unsigned long nr_pages = nr_anon + nr_file;
unsigned long flags;

if (!mem_cgroup_is_root(memcg)) {
- if (nr_mem)
- page_counter_uncharge(&memcg->memory, nr_mem);
- if (nr_memsw)
- page_counter_uncharge(&memcg->memsw, nr_memsw);
+ page_counter_uncharge(&memcg->memory, nr_pages);
+ if (do_swap_account)
+ page_counter_uncharge(&memcg->memsw, nr_pages);
memcg_oom_recover(memcg);
}

@@ -6029,23 +6027,21 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
- __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+ __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
memcg_check_events(memcg, dummy_page);
local_irq_restore(flags);

if (!mem_cgroup_is_root(memcg))
- css_put_many(&memcg->css, max(nr_mem, nr_memsw));
+ css_put_many(&memcg->css, nr_pages);
}

static void uncharge_list(struct list_head *page_list)
{
struct mem_cgroup *memcg = NULL;
- unsigned long nr_memsw = 0;
unsigned long nr_anon = 0;
unsigned long nr_file = 0;
unsigned long nr_huge = 0;
unsigned long pgpgout = 0;
- unsigned long nr_mem = 0;
struct list_head *next;
struct page *page;

@@ -6072,10 +6068,9 @@ static void uncharge_list(struct list_head *page_list)

if (memcg != pc->mem_cgroup) {
if (memcg) {
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
- pgpgout = nr_mem = nr_memsw = 0;
- nr_anon = nr_file = nr_huge = 0;
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
+ pgpgout = nr_anon = nr_file = nr_huge = 0;
}
memcg = pc->mem_cgroup;
}
@@ -6091,18 +6086,14 @@ static void uncharge_list(struct list_head *page_list)
else
nr_file += nr_pages;

- if (pc->flags & PCG_MEM)
- nr_mem += nr_pages;
- if (pc->flags & PCG_MEMSW)
- nr_memsw += nr_pages;
pc->flags = 0;

pgpgout++;
} while (next != page_list);

if (memcg)
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
}

/**
@@ -6187,7 +6178,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
return;

VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
- VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);

if (lrucare)
lock_page_lru(oldpage, &isolated);
--
2.1.2

2014-10-20 15:22:29

by Johannes Weiner

[permalink] [raw]
Subject: [patch 4/4] mm: memcontrol: remove unnecessary PCG_USED pc->mem_cgroup valid flag

pc->mem_cgroup had to be left intact after uncharge for the final LRU
removal, and !PCG_USED indicated whether the page was uncharged. But
since 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") pages are
uncharged after the final LRU removal. Uncharge can simply clear the
pointer and the PCG_USED/PageCgroupUsed sites can test that instead.

Because this is the last page_cgroup flag, this patch reduces the
memcg per-page overhead to a single pointer.

Signed-off-by: Johannes Weiner <[email protected]>
---
include/linux/page_cgroup.h | 10 -----
mm/memcontrol.c | 107 +++++++++++++++++---------------------------
2 files changed, 42 insertions(+), 75 deletions(-)

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 97536e685843..1289be6b436c 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -1,11 +1,6 @@
#ifndef __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H

-enum {
- /* flags for mem_cgroup */
- PCG_USED = 0x01, /* This page is charged to a memcg */
-};
-
struct pglist_data;

#ifdef CONFIG_MEMCG
@@ -19,7 +14,6 @@ struct mem_cgroup;
* then the page cgroup for pfn always exists.
*/
struct page_cgroup {
- unsigned long flags;
struct mem_cgroup *mem_cgroup;
};

@@ -39,10 +33,6 @@ static inline void page_cgroup_init(void)

struct page_cgroup *lookup_page_cgroup(struct page *page);

-static inline int PageCgroupUsed(struct page_cgroup *pc)
-{
- return !!(pc->flags & PCG_USED);
-}
#else /* !CONFIG_MEMCG */
struct page_cgroup;

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1d66ac49e702..48d49c6b08d1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1284,14 +1284,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)

pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
-
/*
* Swapcache readahead pages are added to the LRU - and
- * possibly migrated - before they are charged. Ensure
- * pc->mem_cgroup is sane.
+ * possibly migrated - before they are charged.
*/
- if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
- pc->mem_cgroup = memcg = root_mem_cgroup;
+ if (!memcg)
+ memcg = root_mem_cgroup;

mz = mem_cgroup_page_zoneinfo(memcg, page);
lruvec = &mz->lruvec;
@@ -2141,7 +2139,7 @@ void __mem_cgroup_begin_update_page_stat(struct page *page,
pc = lookup_page_cgroup(page);
again:
memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
+ if (unlikely(!memcg))
return;
/*
* If this memory cgroup is not under account moving, we don't
@@ -2154,7 +2152,7 @@ again:
return;

move_lock_mem_cgroup(memcg, flags);
- if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
+ if (memcg != pc->mem_cgroup) {
move_unlock_mem_cgroup(memcg, flags);
goto again;
}
@@ -2186,7 +2184,7 @@ void mem_cgroup_update_page_stat(struct page *page,

pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
+ if (unlikely(!memcg))
return;

this_cpu_add(memcg->stat->count[idx], val);
@@ -2525,9 +2523,10 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
VM_BUG_ON_PAGE(!PageLocked(page), page);

pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- if (memcg && !css_tryget_online(&memcg->css))
+ memcg = pc->mem_cgroup;
+
+ if (memcg) {
+ if (!css_tryget_online(&memcg->css))
memcg = NULL;
} else if (PageSwapCache(page)) {
ent.val = page_private(page);
@@ -2578,7 +2577,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
struct page_cgroup *pc = lookup_page_cgroup(page);
int isolated;

- VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
+ VM_BUG_ON_PAGE(pc->mem_cgroup, page);
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
* accessed by any other context at this point.
@@ -2593,7 +2592,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,

/*
* Nobody should be changing or seriously looking at
- * pc->mem_cgroup and pc->flags at this point:
+ * pc->mem_cgroup at this point:
*
* - the page is uncharged
*
@@ -2606,7 +2605,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED;

if (lrucare)
unlock_page_lru(page, isolated);
@@ -3120,37 +3118,22 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
memcg_uncharge_kmem(memcg, 1 << order);
return;
}
- /*
- * The page is freshly allocated and not visible to any
- * outside callers yet. Set up pc non-atomically.
- */
pc = lookup_page_cgroup(page);
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED;
}

void __memcg_kmem_uncharge_pages(struct page *page, int order)
{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
-
-
- pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
- return;
-
- memcg = pc->mem_cgroup;
- pc->flags = 0;
+ struct page_cgroup *pc = lookup_page_cgroup(page);
+ struct mem_cgroup *memcg = pc->mem_cgroup;

- /*
- * We trust that only if there is a memcg associated with the page, it
- * is a valid allocation
- */
if (!memcg)
return;

VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
+
memcg_uncharge_kmem(memcg, 1 << order);
+ pc->mem_cgroup = NULL;
}
#else
static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
@@ -3168,21 +3151,16 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
*/
void mem_cgroup_split_huge_fixup(struct page *head)
{
- struct page_cgroup *head_pc = lookup_page_cgroup(head);
- struct page_cgroup *pc;
- struct mem_cgroup *memcg;
+ struct page_cgroup *pc = lookup_page_cgroup(head);
int i;

if (mem_cgroup_disabled())
return;

- memcg = head_pc->mem_cgroup;
- for (i = 1; i < HPAGE_PMD_NR; i++) {
- pc = head_pc + i;
- pc->mem_cgroup = memcg;
- pc->flags = head_pc->flags;
- }
- __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
+ for (i = 1; i < HPAGE_PMD_NR; i++)
+ pc[i].mem_cgroup = pc[0].mem_cgroup;
+
+ __this_cpu_sub(pc[0].mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
HPAGE_PMD_NR);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -3232,7 +3210,7 @@ static int mem_cgroup_move_account(struct page *page,
goto out;

ret = -EINVAL;
- if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
+ if (pc->mem_cgroup != from)
goto out_unlock;

move_lock_mem_cgroup(from, &flags);
@@ -3342,7 +3320,7 @@ static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
* the first time, i.e. during boot or memory hotplug;
* or when mem_cgroup_disabled().
*/
- if (likely(pc) && PageCgroupUsed(pc))
+ if (likely(pc) && pc->mem_cgroup)
return pc;
return NULL;
}
@@ -3360,10 +3338,8 @@ void mem_cgroup_print_bad_page(struct page *page)
struct page_cgroup *pc;

pc = lookup_page_cgroup_used(page);
- if (pc) {
- pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
- pc, pc->flags, pc->mem_cgroup);
- }
+ if (pc)
+ pr_alert("pc:%p pc->mem_cgroup:%p\n", pc, pc->mem_cgroup);
}
#endif

@@ -5330,7 +5306,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
* mem_cgroup_move_account() checks the pc is valid or
* not under LRU exclusion.
*/
- if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+ if (pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target)
target->page = page;
@@ -5366,7 +5342,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
if (!move_anon())
return ret;
pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+ if (pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target) {
get_page(page);
@@ -5810,18 +5786,17 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;

pc = lookup_page_cgroup(page);
+ memcg = pc->mem_cgroup;

/* Readahead page, never charged */
- if (!PageCgroupUsed(pc))
+ if (!memcg)
return;

- memcg = pc->mem_cgroup;
-
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
mem_cgroup_swap_statistics(memcg, true);

- pc->flags = 0;
+ pc->mem_cgroup = NULL;

if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, 1);
@@ -5895,7 +5870,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
* the page lock, which serializes swap cache removal, which
* in turn serializes uncharging.
*/
- if (PageCgroupUsed(pc))
+ if (pc->mem_cgroup)
goto out;
}

@@ -6057,13 +6032,13 @@ static void uncharge_list(struct list_head *page_list)
VM_BUG_ON_PAGE(page_count(page), page);

pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
+ if (!pc->mem_cgroup)
continue;

/*
* Nobody should be changing or seriously looking at
- * pc->mem_cgroup and pc->flags at this point, we have
- * fully exclusive access to the page.
+ * pc->mem_cgroup at this point, we have fully
+ * exclusive access to the page.
*/

if (memcg != pc->mem_cgroup) {
@@ -6086,7 +6061,7 @@ static void uncharge_list(struct list_head *page_list)
else
nr_file += nr_pages;

- pc->flags = 0;
+ pc->mem_cgroup = NULL;

pgpgout++;
} while (next != page_list);
@@ -6112,7 +6087,7 @@ void mem_cgroup_uncharge(struct page *page)

/* Don't touch page->lru of any random page, pre-check: */
pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
+ if (!pc->mem_cgroup)
return;

INIT_LIST_HEAD(&page->lru);
@@ -6148,6 +6123,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
bool lrucare)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
int isolated;

@@ -6164,7 +6140,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,

/* Page cache replacement: new page already charged? */
pc = lookup_page_cgroup(newpage);
- if (PageCgroupUsed(pc))
+ if (pc->mem_cgroup)
return;

/*
@@ -6174,18 +6150,19 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
* reclaim just put back on the LRU but has not released yet.
*/
pc = lookup_page_cgroup(oldpage);
- if (!PageCgroupUsed(pc))
+ memcg = pc->mem_cgroup;
+ if (!memcg)
return;

if (lrucare)
lock_page_lru(oldpage, &isolated);

- pc->flags = 0;
+ pc->mem_cgroup = NULL;

if (lrucare)
unlock_page_lru(oldpage, isolated);

- commit_charge(newpage, pc->mem_cgroup, lrucare);
+ commit_charge(newpage, memcg, lrucare);
}

/*
--
2.1.2

2014-10-20 15:22:54

by Johannes Weiner

[permalink] [raw]
Subject: [patch 3/4] mm: memcontrol: remove unnecessary PCG_MEM memory charge flag

PCG_MEM is a remnant from an earlier version of 0a31bc97c80c ("mm:
memcontrol: rewrite uncharge API"), used to tell whether migration
cleared a charge while leaving pc->mem_cgroup valid and PCG_USED set.
But in the final version, mem_cgroup_migrate() directly uncharges the
source page, rendering this distinction unnecessary. Remove it.

Signed-off-by: Johannes Weiner <[email protected]>
---
include/linux/page_cgroup.h | 1 -
mm/memcontrol.c | 4 +---
2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index da62ee2be28b..97536e685843 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -4,7 +4,6 @@
enum {
/* flags for mem_cgroup */
PCG_USED = 0x01, /* This page is charged to a memcg */
- PCG_MEM = 0x02, /* This page holds a memory charge */
};

struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9bab35fc3e9e..1d66ac49e702 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM;
+ pc->flags = PCG_USED;

if (lrucare)
unlock_page_lru(page, isolated);
@@ -6177,8 +6177,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
if (!PageCgroupUsed(pc))
return;

- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
-
if (lrucare)
lock_page_lru(oldpage, &isolated);

--
2.1.2

2014-10-20 15:23:17

by Johannes Weiner

[permalink] [raw]
Subject: [patch 1/4] mm: memcontrol: uncharge pages on swapout

mem_cgroup_swapout() is called with exclusive access to the page at
the end of the page's lifetime. Instead of clearing the PCG_MEMSW
flag and deferring the uncharge, just do it right away. This allows
follow-up patches to simplify the uncharge code.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/memcontrol.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bea3fddb3372..7709f17347f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
unsigned short oldid;

@@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;

VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+ memcg = pc->mem_cgroup;

- oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);

- pc->flags &= ~PCG_MEMSW;
- css_get(&pc->mem_cgroup->css);
- mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ pc->flags = 0;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
+
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
+ local_irq_enable();
}

/**
--
2.1.2

2014-10-21 01:08:23

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [patch 1/4] mm: memcontrol: uncharge pages on swapout

(2014/10/21 0:22), Johannes Weiner wrote:
> mem_cgroup_swapout() is called with exclusive access to the page at
> the end of the page's lifetime. Instead of clearing the PCG_MEMSW
> flag and deferring the uncharge, just do it right away. This allows
> follow-up patches to simplify the uncharge code.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/memcontrol.c | 17 +++++++++++++----
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index bea3fddb3372..7709f17347f3 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
> */
> void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> {
> + struct mem_cgroup *memcg;
> struct page_cgroup *pc;
> unsigned short oldid;
>
> @@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> return;
>
> VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
shouldn't be removed ?


> + memcg = pc->mem_cgroup;
>
> - oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
> + oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> VM_BUG_ON_PAGE(oldid, page);
> + mem_cgroup_swap_statistics(memcg, true);
>
> - pc->flags &= ~PCG_MEMSW;
> - css_get(&pc->mem_cgroup->css);
> - mem_cgroup_swap_statistics(pc->mem_cgroup, true);
> + pc->flags = 0;
> +
> + if (!mem_cgroup_is_root(memcg))
> + page_counter_uncharge(&memcg->memory, 1);
> +
> + local_irq_disable();
> + mem_cgroup_charge_statistics(memcg, page, -1);
> + memcg_check_events(memcg, page);
> + local_irq_enable();
> }
>

Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>

2014-10-21 12:53:07

by Vladimir Davydov

[permalink] [raw]
Subject: Re: [patch 1/4] mm: memcontrol: uncharge pages on swapout

On Mon, Oct 20, 2014 at 11:22:09AM -0400, Johannes Weiner wrote:
> mem_cgroup_swapout() is called with exclusive access to the page at
> the end of the page's lifetime. Instead of clearing the PCG_MEMSW
> flag and deferring the uncharge, just do it right away. This allows
> follow-up patches to simplify the uncharge code.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/memcontrol.c | 17 +++++++++++++----
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index bea3fddb3372..7709f17347f3 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
> */
> void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> {
> + struct mem_cgroup *memcg;
> struct page_cgroup *pc;
> unsigned short oldid;
>
> @@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> return;
>
> VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
> + memcg = pc->mem_cgroup;
>
> - oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
> + oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> VM_BUG_ON_PAGE(oldid, page);
> + mem_cgroup_swap_statistics(memcg, true);
>
> - pc->flags &= ~PCG_MEMSW;
> - css_get(&pc->mem_cgroup->css);
> - mem_cgroup_swap_statistics(pc->mem_cgroup, true);
> + pc->flags = 0;
> +
> + if (!mem_cgroup_is_root(memcg))
> + page_counter_uncharge(&memcg->memory, 1);

AFAIU it removes batched uncharge of swapped out pages, doesn't it? Will
it affect performance?

Besides, it looks asymmetric with respect to the page cache uncharge
path, where we still defer uncharge to mem_cgroup_uncharge_list(), and I
personally rather dislike this asymmetry.

> +
> + local_irq_disable();
> + mem_cgroup_charge_statistics(memcg, page, -1);
> + memcg_check_events(memcg, page);
> + local_irq_enable();

AFAICT mem_cgroup_swapout() is called under mapping->tree_lock with irqs
disabled, so we should use irq_save/restore here.

Thanks,
Vladimir

> }
>
> /**
> --
> 2.1.2
>

2014-10-21 20:39:21

by Johannes Weiner

[permalink] [raw]
Subject: Re: [patch 1/4] mm: memcontrol: uncharge pages on swapout

On Tue, Oct 21, 2014 at 10:07:52AM +0900, Kamezawa Hiroyuki wrote:
> (2014/10/21 0:22), Johannes Weiner wrote:
> > mem_cgroup_swapout() is called with exclusive access to the page at
> > the end of the page's lifetime. Instead of clearing the PCG_MEMSW
> > flag and deferring the uncharge, just do it right away. This allows
> > follow-up patches to simplify the uncharge code.
> >
> > Signed-off-by: Johannes Weiner <[email protected]>
> > ---
> > mm/memcontrol.c | 17 +++++++++++++----
> > 1 file changed, 13 insertions(+), 4 deletions(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index bea3fddb3372..7709f17347f3 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
> > */
> > void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > {
> > + struct mem_cgroup *memcg;
> > struct page_cgroup *pc;
> > unsigned short oldid;
> >
> > @@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > return;
> >
> > VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
> shouldn't be removed ?

It's still a legitimate check at this point. But it's removed later
in the series when PCG_MEMSW itself goes away.

> Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>

Thanks!

2014-10-21 21:03:41

by Johannes Weiner

[permalink] [raw]
Subject: Re: [patch 1/4] mm: memcontrol: uncharge pages on swapout

On Tue, Oct 21, 2014 at 04:52:52PM +0400, Vladimir Davydov wrote:
> On Mon, Oct 20, 2014 at 11:22:09AM -0400, Johannes Weiner wrote:
> > mem_cgroup_swapout() is called with exclusive access to the page at
> > the end of the page's lifetime. Instead of clearing the PCG_MEMSW
> > flag and deferring the uncharge, just do it right away. This allows
> > follow-up patches to simplify the uncharge code.
> >
> > Signed-off-by: Johannes Weiner <[email protected]>
> > ---
> > mm/memcontrol.c | 17 +++++++++++++----
> > 1 file changed, 13 insertions(+), 4 deletions(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index bea3fddb3372..7709f17347f3 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
> > */
> > void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > {
> > + struct mem_cgroup *memcg;
> > struct page_cgroup *pc;
> > unsigned short oldid;
> >
> > @@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > return;
> >
> > VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
> > + memcg = pc->mem_cgroup;
> >
> > - oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
> > + oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> > VM_BUG_ON_PAGE(oldid, page);
> > + mem_cgroup_swap_statistics(memcg, true);
> >
> > - pc->flags &= ~PCG_MEMSW;
> > - css_get(&pc->mem_cgroup->css);
> > - mem_cgroup_swap_statistics(pc->mem_cgroup, true);
> > + pc->flags = 0;
> > +
> > + if (!mem_cgroup_is_root(memcg))
> > + page_counter_uncharge(&memcg->memory, 1);
>
> AFAIU it removes batched uncharge of swapped out pages, doesn't it? Will
> it affect performance?

During swapout and with lockless page counters? I don't think so.

> Besides, it looks asymmetric with respect to the page cache uncharge
> path, where we still defer uncharge to mem_cgroup_uncharge_list(), and I
> personally rather dislike this asymmetry.

The asymmetry is inherent in the fact that we mave memory and
memory+swap accounting, and here a memory charge is transferred out to
swap. Before, the asymmetry was in mem_cgroup_uncharge_list() where
we separate out memory and memsw pages (which the next patch fixes).

So nothing changed, the ugliness was just moved around. I actually
like it better now that it's part of the swap controller, because
that's where the nastiness actually comes from. This will all go away
when we account swap separately. Then, swapped pages can keep their
memory charge until mem_cgroup_uncharge() again and the swap charge
will be completely independent from it. This reshuffling is just
necessary because it allows us to get rid of the per-page flag.

> > + local_irq_disable();
> > + mem_cgroup_charge_statistics(memcg, page, -1);
> > + memcg_check_events(memcg, page);
> > + local_irq_enable();
>
> AFAICT mem_cgroup_swapout() is called under mapping->tree_lock with irqs
> disabled, so we should use irq_save/restore here.

Good catch! I don't think this function actually needs to be called
under the tree_lock, so I'd rather send a follow-up that moves it out.
For now, this should be sufficient:

---

>From 3a40bd3b85a70db104ade873007dbb84b5117993 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <[email protected]>
Date: Tue, 21 Oct 2014 16:53:14 -0400
Subject: [patch] mm: memcontrol: uncharge pages on swapout fix

Vladimir notes:

> > + local_irq_disable();
> > + mem_cgroup_charge_statistics(memcg, page, -1);
> > + memcg_check_events(memcg, page);
> > + local_irq_enable();
>
> AFAICT mem_cgroup_swapout() is called under mapping->tree_lock with irqs
> disabled, so we should use irq_save/restore here.

But this function doesn't actually need to be called under the tree
lock. So for now, simply remove the irq-disabling altogether and rely
on the caller's IRQ state. Later on, we'll move it out from there and
add back the simple, non-saving IRQ-disabling.

Reported-by: Vladimir Davydov <[email protected]>
Signed-off-by: Johannes Weiner <[email protected]>
---
mm/memcontrol.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8dc46aa9ae8f..c688fb73ff35 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5806,6 +5806,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);

+ /* XXX: caller holds IRQ-safe mapping->tree_lock */
+ VM_BUG_ON(!irqs_disabled());
+
if (!do_swap_account)
return;

@@ -5827,10 +5830,8 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, 1);

- local_irq_disable();
mem_cgroup_charge_statistics(memcg, page, -1);
memcg_check_events(memcg, page);
- local_irq_enable();
}

/**
--
2.1.2

2014-10-22 01:51:16

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [patch 2/4] mm: memcontrol: remove unnecessary PCG_MEMSW memory+swap charge flag

(2014/10/21 0:22), Johannes Weiner wrote:
> Now that mem_cgroup_swapout() fully uncharges the page, every page
> that is still in use when reaching mem_cgroup_uncharge() is known to
> carry both the memory and the memory+swap charge. Simplify the
> uncharge path and remove the PCG_MEMSW page flag accordingly.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: KAMEZAWA Hiroyuki <[email protected]>

> ---
> include/linux/page_cgroup.h | 1 -
> mm/memcontrol.c | 34 ++++++++++++----------------------
> 2 files changed, 12 insertions(+), 23 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index 5c831f1eca79..da62ee2be28b 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -5,7 +5,6 @@ enum {
> /* flags for mem_cgroup */
> PCG_USED = 0x01, /* This page is charged to a memcg */
> PCG_MEM = 0x02, /* This page holds a memory charge */
> - PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
> };
>
> struct pglist_data;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 7709f17347f3..9bab35fc3e9e 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
> * have the page locked
> */
> pc->mem_cgroup = memcg;
> - pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
> + pc->flags = PCG_USED | PCG_MEM;
>
> if (lrucare)
> unlock_page_lru(page, isolated);
> @@ -5815,7 +5815,6 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> if (!PageCgroupUsed(pc))
> return;
>
> - VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
> memcg = pc->mem_cgroup;
>
> oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> @@ -6010,17 +6009,16 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
> }
>
> static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
> - unsigned long nr_mem, unsigned long nr_memsw,
> unsigned long nr_anon, unsigned long nr_file,
> unsigned long nr_huge, struct page *dummy_page)
> {
> + unsigned long nr_pages = nr_anon + nr_file;
> unsigned long flags;
>
> if (!mem_cgroup_is_root(memcg)) {
> - if (nr_mem)
> - page_counter_uncharge(&memcg->memory, nr_mem);
> - if (nr_memsw)
> - page_counter_uncharge(&memcg->memsw, nr_memsw);
> + page_counter_uncharge(&memcg->memory, nr_pages);
> + if (do_swap_account)
> + page_counter_uncharge(&memcg->memsw, nr_pages);
> memcg_oom_recover(memcg);
> }
>
> @@ -6029,23 +6027,21 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
> __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
> __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
> __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
> - __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
> + __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
> memcg_check_events(memcg, dummy_page);
> local_irq_restore(flags);
>
> if (!mem_cgroup_is_root(memcg))
> - css_put_many(&memcg->css, max(nr_mem, nr_memsw));
> + css_put_many(&memcg->css, nr_pages);
> }
>
> static void uncharge_list(struct list_head *page_list)
> {
> struct mem_cgroup *memcg = NULL;
> - unsigned long nr_memsw = 0;
> unsigned long nr_anon = 0;
> unsigned long nr_file = 0;
> unsigned long nr_huge = 0;
> unsigned long pgpgout = 0;
> - unsigned long nr_mem = 0;
> struct list_head *next;
> struct page *page;
>
> @@ -6072,10 +6068,9 @@ static void uncharge_list(struct list_head *page_list)
>
> if (memcg != pc->mem_cgroup) {
> if (memcg) {
> - uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
> - nr_anon, nr_file, nr_huge, page);
> - pgpgout = nr_mem = nr_memsw = 0;
> - nr_anon = nr_file = nr_huge = 0;
> + uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
> + nr_huge, page);
> + pgpgout = nr_anon = nr_file = nr_huge = 0;
> }
> memcg = pc->mem_cgroup;
> }
> @@ -6091,18 +6086,14 @@ static void uncharge_list(struct list_head *page_list)
> else
> nr_file += nr_pages;
>
> - if (pc->flags & PCG_MEM)
> - nr_mem += nr_pages;
> - if (pc->flags & PCG_MEMSW)
> - nr_memsw += nr_pages;
> pc->flags = 0;
>
> pgpgout++;
> } while (next != page_list);
>
> if (memcg)
> - uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
> - nr_anon, nr_file, nr_huge, page);
> + uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
> + nr_huge, page);
> }
>
> /**
> @@ -6187,7 +6178,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> return;
>
> VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
> - VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
>
> if (lrucare)
> lock_page_lru(oldpage, &isolated);
>

2014-10-22 01:52:29

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [patch 3/4] mm: memcontrol: remove unnecessary PCG_MEM memory charge flag

(2014/10/21 0:22), Johannes Weiner wrote:
> PCG_MEM is a remnant from an earlier version of 0a31bc97c80c ("mm:
> memcontrol: rewrite uncharge API"), used to tell whether migration
> cleared a charge while leaving pc->mem_cgroup valid and PCG_USED set.
> But in the final version, mem_cgroup_migrate() directly uncharges the
> source page, rendering this distinction unnecessary. Remove it.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: KAMEZAWA Hiroyuki <[email protected]>

> ---
> include/linux/page_cgroup.h | 1 -
> mm/memcontrol.c | 4 +---
> 2 files changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index da62ee2be28b..97536e685843 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -4,7 +4,6 @@
> enum {
> /* flags for mem_cgroup */
> PCG_USED = 0x01, /* This page is charged to a memcg */
> - PCG_MEM = 0x02, /* This page holds a memory charge */
> };
>
> struct pglist_data;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 9bab35fc3e9e..1d66ac49e702 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
> * have the page locked
> */
> pc->mem_cgroup = memcg;
> - pc->flags = PCG_USED | PCG_MEM;
> + pc->flags = PCG_USED;
>
> if (lrucare)
> unlock_page_lru(page, isolated);
> @@ -6177,8 +6177,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> if (!PageCgroupUsed(pc))
> return;
>
> - VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
> -
> if (lrucare)
> lock_page_lru(oldpage, &isolated);
>
>

2014-10-22 01:55:53

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [patch 4/4] mm: memcontrol: remove unnecessary PCG_USED pc->mem_cgroup valid flag

(2014/10/21 0:22), Johannes Weiner wrote:
> pc->mem_cgroup had to be left intact after uncharge for the final LRU
> removal, and !PCG_USED indicated whether the page was uncharged. But
> since 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") pages are
> uncharged after the final LRU removal. Uncharge can simply clear the
> pointer and the PCG_USED/PageCgroupUsed sites can test that instead.
>
> Because this is the last page_cgroup flag, this patch reduces the
> memcg per-page overhead to a single pointer.
>
> Signed-off-by: Johannes Weiner <[email protected]>

awesome.
Acked-by: KAMEZAWA Hiroyuki <[email protected]>

> ---
> include/linux/page_cgroup.h | 10 -----
> mm/memcontrol.c | 107 +++++++++++++++++---------------------------
> 2 files changed, 42 insertions(+), 75 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index 97536e685843..1289be6b436c 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -1,11 +1,6 @@
> #ifndef __LINUX_PAGE_CGROUP_H
> #define __LINUX_PAGE_CGROUP_H
>
> -enum {
> - /* flags for mem_cgroup */
> - PCG_USED = 0x01, /* This page is charged to a memcg */
> -};
> -
> struct pglist_data;
>
> #ifdef CONFIG_MEMCG
> @@ -19,7 +14,6 @@ struct mem_cgroup;
> * then the page cgroup for pfn always exists.
> */
> struct page_cgroup {
> - unsigned long flags;
> struct mem_cgroup *mem_cgroup;
> };
>
> @@ -39,10 +33,6 @@ static inline void page_cgroup_init(void)
>
> struct page_cgroup *lookup_page_cgroup(struct page *page);
>
> -static inline int PageCgroupUsed(struct page_cgroup *pc)
> -{
> - return !!(pc->flags & PCG_USED);
> -}
> #else /* !CONFIG_MEMCG */
> struct page_cgroup;
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 1d66ac49e702..48d49c6b08d1 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1284,14 +1284,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
>
> pc = lookup_page_cgroup(page);
> memcg = pc->mem_cgroup;
> -
> /*
> * Swapcache readahead pages are added to the LRU - and
> - * possibly migrated - before they are charged. Ensure
> - * pc->mem_cgroup is sane.
> + * possibly migrated - before they are charged.
> */
> - if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
> - pc->mem_cgroup = memcg = root_mem_cgroup;
> + if (!memcg)
> + memcg = root_mem_cgroup;
>
> mz = mem_cgroup_page_zoneinfo(memcg, page);
> lruvec = &mz->lruvec;
> @@ -2141,7 +2139,7 @@ void __mem_cgroup_begin_update_page_stat(struct page *page,
> pc = lookup_page_cgroup(page);
> again:
> memcg = pc->mem_cgroup;
> - if (unlikely(!memcg || !PageCgroupUsed(pc)))
> + if (unlikely(!memcg))
> return;
> /*
> * If this memory cgroup is not under account moving, we don't
> @@ -2154,7 +2152,7 @@ again:
> return;
>
> move_lock_mem_cgroup(memcg, flags);
> - if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
> + if (memcg != pc->mem_cgroup) {
> move_unlock_mem_cgroup(memcg, flags);
> goto again;
> }
> @@ -2186,7 +2184,7 @@ void mem_cgroup_update_page_stat(struct page *page,
>
> pc = lookup_page_cgroup(page);
> memcg = pc->mem_cgroup;
> - if (unlikely(!memcg || !PageCgroupUsed(pc)))
> + if (unlikely(!memcg))
> return;
>
> this_cpu_add(memcg->stat->count[idx], val);
> @@ -2525,9 +2523,10 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
> VM_BUG_ON_PAGE(!PageLocked(page), page);
>
> pc = lookup_page_cgroup(page);
> - if (PageCgroupUsed(pc)) {
> - memcg = pc->mem_cgroup;
> - if (memcg && !css_tryget_online(&memcg->css))
> + memcg = pc->mem_cgroup;
> +
> + if (memcg) {
> + if (!css_tryget_online(&memcg->css))
> memcg = NULL;
> } else if (PageSwapCache(page)) {
> ent.val = page_private(page);
> @@ -2578,7 +2577,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
> struct page_cgroup *pc = lookup_page_cgroup(page);
> int isolated;
>
> - VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
> + VM_BUG_ON_PAGE(pc->mem_cgroup, page);
> /*
> * we don't need page_cgroup_lock about tail pages, becase they are not
> * accessed by any other context at this point.
> @@ -2593,7 +2592,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
>
> /*
> * Nobody should be changing or seriously looking at
> - * pc->mem_cgroup and pc->flags at this point:
> + * pc->mem_cgroup at this point:
> *
> * - the page is uncharged
> *
> @@ -2606,7 +2605,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
> * have the page locked
> */
> pc->mem_cgroup = memcg;
> - pc->flags = PCG_USED;
>
> if (lrucare)
> unlock_page_lru(page, isolated);
> @@ -3120,37 +3118,22 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
> memcg_uncharge_kmem(memcg, 1 << order);
> return;
> }
> - /*
> - * The page is freshly allocated and not visible to any
> - * outside callers yet. Set up pc non-atomically.
> - */
> pc = lookup_page_cgroup(page);
> pc->mem_cgroup = memcg;
> - pc->flags = PCG_USED;
> }
>
> void __memcg_kmem_uncharge_pages(struct page *page, int order)
> {
> - struct mem_cgroup *memcg = NULL;
> - struct page_cgroup *pc;
> -
> -
> - pc = lookup_page_cgroup(page);
> - if (!PageCgroupUsed(pc))
> - return;
> -
> - memcg = pc->mem_cgroup;
> - pc->flags = 0;
> + struct page_cgroup *pc = lookup_page_cgroup(page);
> + struct mem_cgroup *memcg = pc->mem_cgroup;
>
> - /*
> - * We trust that only if there is a memcg associated with the page, it
> - * is a valid allocation
> - */
> if (!memcg)
> return;
>
> VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
> +
> memcg_uncharge_kmem(memcg, 1 << order);
> + pc->mem_cgroup = NULL;
> }
> #else
> static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
> @@ -3168,21 +3151,16 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
> */
> void mem_cgroup_split_huge_fixup(struct page *head)
> {
> - struct page_cgroup *head_pc = lookup_page_cgroup(head);
> - struct page_cgroup *pc;
> - struct mem_cgroup *memcg;
> + struct page_cgroup *pc = lookup_page_cgroup(head);
> int i;
>
> if (mem_cgroup_disabled())
> return;
>
> - memcg = head_pc->mem_cgroup;
> - for (i = 1; i < HPAGE_PMD_NR; i++) {
> - pc = head_pc + i;
> - pc->mem_cgroup = memcg;
> - pc->flags = head_pc->flags;
> - }
> - __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
> + for (i = 1; i < HPAGE_PMD_NR; i++)
> + pc[i].mem_cgroup = pc[0].mem_cgroup;
> +
> + __this_cpu_sub(pc[0].mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
> HPAGE_PMD_NR);
> }
> #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> @@ -3232,7 +3210,7 @@ static int mem_cgroup_move_account(struct page *page,
> goto out;
>
> ret = -EINVAL;
> - if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
> + if (pc->mem_cgroup != from)
> goto out_unlock;
>
> move_lock_mem_cgroup(from, &flags);
> @@ -3342,7 +3320,7 @@ static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
> * the first time, i.e. during boot or memory hotplug;
> * or when mem_cgroup_disabled().
> */
> - if (likely(pc) && PageCgroupUsed(pc))
> + if (likely(pc) && pc->mem_cgroup)
> return pc;
> return NULL;
> }
> @@ -3360,10 +3338,8 @@ void mem_cgroup_print_bad_page(struct page *page)
> struct page_cgroup *pc;
>
> pc = lookup_page_cgroup_used(page);
> - if (pc) {
> - pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
> - pc, pc->flags, pc->mem_cgroup);
> - }
> + if (pc)
> + pr_alert("pc:%p pc->mem_cgroup:%p\n", pc, pc->mem_cgroup);
> }
> #endif
>
> @@ -5330,7 +5306,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
> * mem_cgroup_move_account() checks the pc is valid or
> * not under LRU exclusion.
> */
> - if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
> + if (pc->mem_cgroup == mc.from) {
> ret = MC_TARGET_PAGE;
> if (target)
> target->page = page;
> @@ -5366,7 +5342,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
> if (!move_anon())
> return ret;
> pc = lookup_page_cgroup(page);
> - if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
> + if (pc->mem_cgroup == mc.from) {
> ret = MC_TARGET_PAGE;
> if (target) {
> get_page(page);
> @@ -5810,18 +5786,17 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> return;
>
> pc = lookup_page_cgroup(page);
> + memcg = pc->mem_cgroup;
>
> /* Readahead page, never charged */
> - if (!PageCgroupUsed(pc))
> + if (!memcg)
> return;
>
> - memcg = pc->mem_cgroup;
> -
> oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> VM_BUG_ON_PAGE(oldid, page);
> mem_cgroup_swap_statistics(memcg, true);
>
> - pc->flags = 0;
> + pc->mem_cgroup = NULL;
>
> if (!mem_cgroup_is_root(memcg))
> page_counter_uncharge(&memcg->memory, 1);
> @@ -5895,7 +5870,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
> * the page lock, which serializes swap cache removal, which
> * in turn serializes uncharging.
> */
> - if (PageCgroupUsed(pc))
> + if (pc->mem_cgroup)
> goto out;
> }
>
> @@ -6057,13 +6032,13 @@ static void uncharge_list(struct list_head *page_list)
> VM_BUG_ON_PAGE(page_count(page), page);
>
> pc = lookup_page_cgroup(page);
> - if (!PageCgroupUsed(pc))
> + if (!pc->mem_cgroup)
> continue;
>
> /*
> * Nobody should be changing or seriously looking at
> - * pc->mem_cgroup and pc->flags at this point, we have
> - * fully exclusive access to the page.
> + * pc->mem_cgroup at this point, we have fully
> + * exclusive access to the page.
> */
>
> if (memcg != pc->mem_cgroup) {
> @@ -6086,7 +6061,7 @@ static void uncharge_list(struct list_head *page_list)
> else
> nr_file += nr_pages;
>
> - pc->flags = 0;
> + pc->mem_cgroup = NULL;
>
> pgpgout++;
> } while (next != page_list);
> @@ -6112,7 +6087,7 @@ void mem_cgroup_uncharge(struct page *page)
>
> /* Don't touch page->lru of any random page, pre-check: */
> pc = lookup_page_cgroup(page);
> - if (!PageCgroupUsed(pc))
> + if (!pc->mem_cgroup)
> return;
>
> INIT_LIST_HEAD(&page->lru);
> @@ -6148,6 +6123,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
> void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> bool lrucare)
> {
> + struct mem_cgroup *memcg;
> struct page_cgroup *pc;
> int isolated;
>
> @@ -6164,7 +6140,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
>
> /* Page cache replacement: new page already charged? */
> pc = lookup_page_cgroup(newpage);
> - if (PageCgroupUsed(pc))
> + if (pc->mem_cgroup)
> return;
>
> /*
> @@ -6174,18 +6150,19 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> * reclaim just put back on the LRU but has not released yet.
> */
> pc = lookup_page_cgroup(oldpage);
> - if (!PageCgroupUsed(pc))
> + memcg = pc->mem_cgroup;
> + if (!memcg)
> return;
>
> if (lrucare)
> lock_page_lru(oldpage, &isolated);
>
> - pc->flags = 0;
> + pc->mem_cgroup = NULL;
>
> if (lrucare)
> unlock_page_lru(oldpage, isolated);
>
> - commit_charge(newpage, pc->mem_cgroup, lrucare);
> + commit_charge(newpage, memcg, lrucare);
> }
>
> /*
>

2014-10-22 08:34:09

by Vladimir Davydov

[permalink] [raw]
Subject: Re: [patch 1/4] mm: memcontrol: uncharge pages on swapout

On Tue, Oct 21, 2014 at 05:03:28PM -0400, Johannes Weiner wrote:
> On Tue, Oct 21, 2014 at 04:52:52PM +0400, Vladimir Davydov wrote:
> > On Mon, Oct 20, 2014 at 11:22:09AM -0400, Johannes Weiner wrote:
> > > mem_cgroup_swapout() is called with exclusive access to the page at
> > > the end of the page's lifetime. Instead of clearing the PCG_MEMSW
> > > flag and deferring the uncharge, just do it right away. This allows
> > > follow-up patches to simplify the uncharge code.
> > >
> > > Signed-off-by: Johannes Weiner <[email protected]>
> > > ---
> > > mm/memcontrol.c | 17 +++++++++++++----
> > > 1 file changed, 13 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index bea3fddb3372..7709f17347f3 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
> > > */
> > > void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > > {
> > > + struct mem_cgroup *memcg;
> > > struct page_cgroup *pc;
> > > unsigned short oldid;
> > >
> > > @@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > > return;
> > >
> > > VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
> > > + memcg = pc->mem_cgroup;
> > >
> > > - oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
> > > + oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> > > VM_BUG_ON_PAGE(oldid, page);
> > > + mem_cgroup_swap_statistics(memcg, true);
> > >
> > > - pc->flags &= ~PCG_MEMSW;
> > > - css_get(&pc->mem_cgroup->css);
> > > - mem_cgroup_swap_statistics(pc->mem_cgroup, true);
> > > + pc->flags = 0;
> > > +
> > > + if (!mem_cgroup_is_root(memcg))
> > > + page_counter_uncharge(&memcg->memory, 1);
> >
> > AFAIU it removes batched uncharge of swapped out pages, doesn't it? Will
> > it affect performance?
>
> During swapout and with lockless page counters? I don't think so.

How is this different from page cache out? I mean, we can have a lot of
pages in the swap cache that have already been swapped out, and are
waiting to be unmapped, uncharged, and freed, just like usual page
cache. Why do we use batching for file cache pages then?

>
> > Besides, it looks asymmetric with respect to the page cache uncharge
> > path, where we still defer uncharge to mem_cgroup_uncharge_list(), and I
> > personally rather dislike this asymmetry.
>
> The asymmetry is inherent in the fact that we mave memory and
> memory+swap accounting, and here a memory charge is transferred out to
> swap. Before, the asymmetry was in mem_cgroup_uncharge_list() where
> we separate out memory and memsw pages (which the next patch fixes).

I agree that memsw is inherently asymmetric, but IMO it isn't the case
for swap *cache* vs page *cache*. We handle them similarly - removing
from a mapping, uncharging, freeing. If one wants batching, why
shouldn't the other?

>
> So nothing changed, the ugliness was just moved around. I actually
> like it better now that it's part of the swap controller, because
> that's where the nastiness actually comes from. This will all go away
> when we account swap separately. Then, swapped pages can keep their
> memory charge until mem_cgroup_uncharge() again and the swap charge
> will be completely independent from it. This reshuffling is just
> necessary because it allows us to get rid of the per-page flag.

Do you mean that swap cache uncharge batching will be back soon?

>
> > > + local_irq_disable();
> > > + mem_cgroup_charge_statistics(memcg, page, -1);
> > > + memcg_check_events(memcg, page);
> > > + local_irq_enable();
> >
> > AFAICT mem_cgroup_swapout() is called under mapping->tree_lock with irqs
> > disabled, so we should use irq_save/restore here.
>
> Good catch! I don't think this function actually needs to be called
> under the tree_lock, so I'd rather send a follow-up that moves it out.

That's exactly what I though after sending that message.

> For now, this should be sufficient:
>
> ---
>
> From 3a40bd3b85a70db104ade873007dbb84b5117993 Mon Sep 17 00:00:00 2001
> From: Johannes Weiner <[email protected]>
> Date: Tue, 21 Oct 2014 16:53:14 -0400
> Subject: [patch] mm: memcontrol: uncharge pages on swapout fix
>
> Vladimir notes:
>
> > > + local_irq_disable();
> > > + mem_cgroup_charge_statistics(memcg, page, -1);
> > > + memcg_check_events(memcg, page);
> > > + local_irq_enable();
> >
> > AFAICT mem_cgroup_swapout() is called under mapping->tree_lock with irqs
> > disabled, so we should use irq_save/restore here.
>
> But this function doesn't actually need to be called under the tree
> lock. So for now, simply remove the irq-disabling altogether and rely
> on the caller's IRQ state. Later on, we'll move it out from there and
> add back the simple, non-saving IRQ-disabling.
>
> Reported-by: Vladimir Davydov <[email protected]>
> Signed-off-by: Johannes Weiner <[email protected]>

The fix looks good to me.

Acked-by: Vladimir Davydov <[email protected]>

But I'm still unsure about the original patch.

> ---
> mm/memcontrol.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 8dc46aa9ae8f..c688fb73ff35 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5806,6 +5806,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> VM_BUG_ON_PAGE(PageLRU(page), page);
> VM_BUG_ON_PAGE(page_count(page), page);
>
> + /* XXX: caller holds IRQ-safe mapping->tree_lock */
> + VM_BUG_ON(!irqs_disabled());
> +
> if (!do_swap_account)
> return;
>
> @@ -5827,10 +5830,8 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> if (!mem_cgroup_is_root(memcg))
> page_counter_uncharge(&memcg->memory, 1);
>
> - local_irq_disable();
> mem_cgroup_charge_statistics(memcg, page, -1);
> memcg_check_events(memcg, page);
> - local_irq_enable();
> }
>
> /**
> --
> 2.1.2

2014-10-22 13:20:53

by Johannes Weiner

[permalink] [raw]
Subject: Re: [patch 1/4] mm: memcontrol: uncharge pages on swapout

On Wed, Oct 22, 2014 at 12:33:53PM +0400, Vladimir Davydov wrote:
> On Tue, Oct 21, 2014 at 05:03:28PM -0400, Johannes Weiner wrote:
> > On Tue, Oct 21, 2014 at 04:52:52PM +0400, Vladimir Davydov wrote:
> > > On Mon, Oct 20, 2014 at 11:22:09AM -0400, Johannes Weiner wrote:
> > > > mem_cgroup_swapout() is called with exclusive access to the page at
> > > > the end of the page's lifetime. Instead of clearing the PCG_MEMSW
> > > > flag and deferring the uncharge, just do it right away. This allows
> > > > follow-up patches to simplify the uncharge code.
> > > >
> > > > Signed-off-by: Johannes Weiner <[email protected]>
> > > > ---
> > > > mm/memcontrol.c | 17 +++++++++++++----
> > > > 1 file changed, 13 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > > index bea3fddb3372..7709f17347f3 100644
> > > > --- a/mm/memcontrol.c
> > > > +++ b/mm/memcontrol.c
> > > > @@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
> > > > */
> > > > void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > > > {
> > > > + struct mem_cgroup *memcg;
> > > > struct page_cgroup *pc;
> > > > unsigned short oldid;
> > > >
> > > > @@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > > > return;
> > > >
> > > > VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
> > > > + memcg = pc->mem_cgroup;
> > > >
> > > > - oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
> > > > + oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> > > > VM_BUG_ON_PAGE(oldid, page);
> > > > + mem_cgroup_swap_statistics(memcg, true);
> > > >
> > > > - pc->flags &= ~PCG_MEMSW;
> > > > - css_get(&pc->mem_cgroup->css);
> > > > - mem_cgroup_swap_statistics(pc->mem_cgroup, true);
> > > > + pc->flags = 0;
> > > > +
> > > > + if (!mem_cgroup_is_root(memcg))
> > > > + page_counter_uncharge(&memcg->memory, 1);
> > >
> > > AFAIU it removes batched uncharge of swapped out pages, doesn't it? Will
> > > it affect performance?
> >
> > During swapout and with lockless page counters? I don't think so.
>
> How is this different from page cache out? I mean, we can have a lot of
> pages in the swap cache that have already been swapped out, and are
> waiting to be unmapped, uncharged, and freed, just like usual page
> cache. Why do we use batching for file cache pages then?

The batching is mostly for munmap(). We do it for reclaim because
it's convenient, but I don't think an extra word per struct page to
batch one, sometimes a few, locked subtractions per swapped out page
is a reasonable trade-off.

> > > Besides, it looks asymmetric with respect to the page cache uncharge
> > > path, where we still defer uncharge to mem_cgroup_uncharge_list(), and I
> > > personally rather dislike this asymmetry.
> >
> > The asymmetry is inherent in the fact that we mave memory and
> > memory+swap accounting, and here a memory charge is transferred out to
> > swap. Before, the asymmetry was in mem_cgroup_uncharge_list() where
> > we separate out memory and memsw pages (which the next patch fixes).
>
> I agree that memsw is inherently asymmetric, but IMO it isn't the case
> for swap *cache* vs page *cache*. We handle them similarly - removing
> from a mapping, uncharging, freeing. If one wants batching, why
> shouldn't the other?

It has to be worth it in practical terms. You can argue symmetry
between swap cache and page cache, but swapping simply is a much
colder path than reclaiming page cache. Our reclaim algorithm avoids
it like the plague.

> > So nothing changed, the ugliness was just moved around. I actually
> > like it better now that it's part of the swap controller, because
> > that's where the nastiness actually comes from. This will all go away
> > when we account swap separately. Then, swapped pages can keep their
> > memory charge until mem_cgroup_uncharge() again and the swap charge
> > will be completely independent from it. This reshuffling is just
> > necessary because it allows us to get rid of the per-page flag.
>
> Do you mean that swap cache uncharge batching will be back soon?

Well, yes, once we switch from memsw to a separate swap couter, it
comes automatically. Pages no longer carry two charges, and so the
uncharging of pages doesn't have to distinguish between swapped out
pages and other pages anymore.

2014-10-22 15:34:59

by Michal Hocko

[permalink] [raw]
Subject: Re: [patch 1/4] mm: memcontrol: uncharge pages on swapout

On Mon 20-10-14 11:22:09, Johannes Weiner wrote:
> mem_cgroup_swapout() is called with exclusive access to the page at
> the end of the page's lifetime. Instead of clearing the PCG_MEMSW
> flag and deferring the uncharge, just do it right away. This allows
> follow-up patches to simplify the uncharge code.
>
> Signed-off-by: Johannes Weiner <[email protected]>

OK, it makes sense. With the irq fixup
Acked-by: Michal Hocko <[email protected]>

> ---
> mm/memcontrol.c | 17 +++++++++++++----
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index bea3fddb3372..7709f17347f3 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
> */
> void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> {
> + struct mem_cgroup *memcg;
> struct page_cgroup *pc;
> unsigned short oldid;
>
> @@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> return;
>
> VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
> + memcg = pc->mem_cgroup;
>
> - oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
> + oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> VM_BUG_ON_PAGE(oldid, page);
> + mem_cgroup_swap_statistics(memcg, true);
>
> - pc->flags &= ~PCG_MEMSW;
> - css_get(&pc->mem_cgroup->css);
> - mem_cgroup_swap_statistics(pc->mem_cgroup, true);
> + pc->flags = 0;
> +
> + if (!mem_cgroup_is_root(memcg))
> + page_counter_uncharge(&memcg->memory, 1);
> +
> + local_irq_disable();
> + mem_cgroup_charge_statistics(memcg, page, -1);
> + memcg_check_events(memcg, page);
> + local_irq_enable();
> }
>
> /**
> --
> 2.1.2
>

--
Michal Hocko
SUSE Labs

2014-10-22 15:37:40

by Vladimir Davydov

[permalink] [raw]
Subject: Re: [patch 1/4] mm: memcontrol: uncharge pages on swapout

On Wed, Oct 22, 2014 at 09:20:38AM -0400, Johannes Weiner wrote:
> On Wed, Oct 22, 2014 at 12:33:53PM +0400, Vladimir Davydov wrote:
> > On Tue, Oct 21, 2014 at 05:03:28PM -0400, Johannes Weiner wrote:
> > > On Tue, Oct 21, 2014 at 04:52:52PM +0400, Vladimir Davydov wrote:
> > > > On Mon, Oct 20, 2014 at 11:22:09AM -0400, Johannes Weiner wrote:
> > > > > mem_cgroup_swapout() is called with exclusive access to the page at
> > > > > the end of the page's lifetime. Instead of clearing the PCG_MEMSW
> > > > > flag and deferring the uncharge, just do it right away. This allows
> > > > > follow-up patches to simplify the uncharge code.
> > > > >
> > > > > Signed-off-by: Johannes Weiner <[email protected]>
> > > > > ---
> > > > > mm/memcontrol.c | 17 +++++++++++++----
> > > > > 1 file changed, 13 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > > > index bea3fddb3372..7709f17347f3 100644
> > > > > --- a/mm/memcontrol.c
> > > > > +++ b/mm/memcontrol.c
> > > > > @@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
> > > > > */
> > > > > void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > > > > {
> > > > > + struct mem_cgroup *memcg;
> > > > > struct page_cgroup *pc;
> > > > > unsigned short oldid;
> > > > >
> > > > > @@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > > > > return;
> > > > >
> > > > > VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
> > > > > + memcg = pc->mem_cgroup;
> > > > >
> > > > > - oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
> > > > > + oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> > > > > VM_BUG_ON_PAGE(oldid, page);
> > > > > + mem_cgroup_swap_statistics(memcg, true);
> > > > >
> > > > > - pc->flags &= ~PCG_MEMSW;
> > > > > - css_get(&pc->mem_cgroup->css);
> > > > > - mem_cgroup_swap_statistics(pc->mem_cgroup, true);
> > > > > + pc->flags = 0;
> > > > > +
> > > > > + if (!mem_cgroup_is_root(memcg))
> > > > > + page_counter_uncharge(&memcg->memory, 1);
> > > >
> > > > AFAIU it removes batched uncharge of swapped out pages, doesn't it? Will
> > > > it affect performance?
> > >
> > > During swapout and with lockless page counters? I don't think so.
> >
> > How is this different from page cache out? I mean, we can have a lot of
> > pages in the swap cache that have already been swapped out, and are
> > waiting to be unmapped, uncharged, and freed, just like usual page
> > cache. Why do we use batching for file cache pages then?
>
> The batching is mostly for munmap(). We do it for reclaim because
> it's convenient, but I don't think an extra word per struct page to
> batch one, sometimes a few, locked subtractions per swapped out page
> is a reasonable trade-off.
>
> > > > Besides, it looks asymmetric with respect to the page cache uncharge
> > > > path, where we still defer uncharge to mem_cgroup_uncharge_list(), and I
> > > > personally rather dislike this asymmetry.
> > >
> > > The asymmetry is inherent in the fact that we mave memory and
> > > memory+swap accounting, and here a memory charge is transferred out to
> > > swap. Before, the asymmetry was in mem_cgroup_uncharge_list() where
> > > we separate out memory and memsw pages (which the next patch fixes).
> >
> > I agree that memsw is inherently asymmetric, but IMO it isn't the case
> > for swap *cache* vs page *cache*. We handle them similarly - removing
> > from a mapping, uncharging, freeing. If one wants batching, why
> > shouldn't the other?
>
> It has to be worth it in practical terms. You can argue symmetry
> between swap cache and page cache, but swapping simply is a much
> colder path than reclaiming page cache. Our reclaim algorithm avoids
> it like the plague.
>
> > > So nothing changed, the ugliness was just moved around. I actually
> > > like it better now that it's part of the swap controller, because
> > > that's where the nastiness actually comes from. This will all go away
> > > when we account swap separately. Then, swapped pages can keep their
> > > memory charge until mem_cgroup_uncharge() again and the swap charge
> > > will be completely independent from it. This reshuffling is just
> > > necessary because it allows us to get rid of the per-page flag.
> >
> > Do you mean that swap cache uncharge batching will be back soon?
>
> Well, yes, once we switch from memsw to a separate swap couter, it
> comes automatically. Pages no longer carry two charges, and so the
> uncharging of pages doesn't have to distinguish between swapped out
> pages and other pages anymore.

With this in mind,

Acked-by: Vladimir Davydov <[email protected]>

2014-10-22 15:43:15

by Vladimir Davydov

[permalink] [raw]
Subject: Re: [patch 2/4] mm: memcontrol: remove unnecessary PCG_MEMSW memory+swap charge flag

On Mon, Oct 20, 2014 at 11:22:10AM -0400, Johannes Weiner wrote:
> Now that mem_cgroup_swapout() fully uncharges the page, every page
> that is still in use when reaching mem_cgroup_uncharge() is known to
> carry both the memory and the memory+swap charge. Simplify the
> uncharge path and remove the PCG_MEMSW page flag accordingly.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Vladimir Davydov <[email protected]>

> ---
> include/linux/page_cgroup.h | 1 -
> mm/memcontrol.c | 34 ++++++++++++----------------------
> 2 files changed, 12 insertions(+), 23 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index 5c831f1eca79..da62ee2be28b 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -5,7 +5,6 @@ enum {
> /* flags for mem_cgroup */
> PCG_USED = 0x01, /* This page is charged to a memcg */
> PCG_MEM = 0x02, /* This page holds a memory charge */
> - PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
> };
>
> struct pglist_data;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 7709f17347f3..9bab35fc3e9e 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
> * have the page locked
> */
> pc->mem_cgroup = memcg;
> - pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
> + pc->flags = PCG_USED | PCG_MEM;
>
> if (lrucare)
> unlock_page_lru(page, isolated);
> @@ -5815,7 +5815,6 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> if (!PageCgroupUsed(pc))
> return;
>
> - VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
> memcg = pc->mem_cgroup;
>
> oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> @@ -6010,17 +6009,16 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
> }
>
> static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
> - unsigned long nr_mem, unsigned long nr_memsw,
> unsigned long nr_anon, unsigned long nr_file,
> unsigned long nr_huge, struct page *dummy_page)
> {
> + unsigned long nr_pages = nr_anon + nr_file;
> unsigned long flags;
>
> if (!mem_cgroup_is_root(memcg)) {
> - if (nr_mem)
> - page_counter_uncharge(&memcg->memory, nr_mem);
> - if (nr_memsw)
> - page_counter_uncharge(&memcg->memsw, nr_memsw);
> + page_counter_uncharge(&memcg->memory, nr_pages);
> + if (do_swap_account)
> + page_counter_uncharge(&memcg->memsw, nr_pages);
> memcg_oom_recover(memcg);
> }
>
> @@ -6029,23 +6027,21 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
> __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
> __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
> __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
> - __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
> + __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
> memcg_check_events(memcg, dummy_page);
> local_irq_restore(flags);
>
> if (!mem_cgroup_is_root(memcg))
> - css_put_many(&memcg->css, max(nr_mem, nr_memsw));
> + css_put_many(&memcg->css, nr_pages);
> }
>
> static void uncharge_list(struct list_head *page_list)
> {
> struct mem_cgroup *memcg = NULL;
> - unsigned long nr_memsw = 0;
> unsigned long nr_anon = 0;
> unsigned long nr_file = 0;
> unsigned long nr_huge = 0;
> unsigned long pgpgout = 0;
> - unsigned long nr_mem = 0;
> struct list_head *next;
> struct page *page;
>
> @@ -6072,10 +6068,9 @@ static void uncharge_list(struct list_head *page_list)
>
> if (memcg != pc->mem_cgroup) {
> if (memcg) {
> - uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
> - nr_anon, nr_file, nr_huge, page);
> - pgpgout = nr_mem = nr_memsw = 0;
> - nr_anon = nr_file = nr_huge = 0;
> + uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
> + nr_huge, page);
> + pgpgout = nr_anon = nr_file = nr_huge = 0;
> }
> memcg = pc->mem_cgroup;
> }
> @@ -6091,18 +6086,14 @@ static void uncharge_list(struct list_head *page_list)
> else
> nr_file += nr_pages;
>
> - if (pc->flags & PCG_MEM)
> - nr_mem += nr_pages;
> - if (pc->flags & PCG_MEMSW)
> - nr_memsw += nr_pages;
> pc->flags = 0;
>
> pgpgout++;
> } while (next != page_list);
>
> if (memcg)
> - uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
> - nr_anon, nr_file, nr_huge, page);
> + uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
> + nr_huge, page);
> }
>
> /**
> @@ -6187,7 +6178,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> return;
>
> VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
> - VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
>
> if (lrucare)
> lock_page_lru(oldpage, &isolated);
> --
> 2.1.2
>

2014-10-22 15:49:20

by Michal Hocko

[permalink] [raw]
Subject: Re: [patch 3/4] mm: memcontrol: remove unnecessary PCG_MEM memory charge flag

On Mon 20-10-14 11:22:11, Johannes Weiner wrote:
> PCG_MEM is a remnant from an earlier version of 0a31bc97c80c ("mm:
> memcontrol: rewrite uncharge API"), used to tell whether migration
> cleared a charge while leaving pc->mem_cgroup valid and PCG_USED set.
> But in the final version, mem_cgroup_migrate() directly uncharges the
> source page, rendering this distinction unnecessary. Remove it.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Michal Hocko <[email protected]>

> ---
> include/linux/page_cgroup.h | 1 -
> mm/memcontrol.c | 4 +---
> 2 files changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index da62ee2be28b..97536e685843 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -4,7 +4,6 @@
> enum {
> /* flags for mem_cgroup */
> PCG_USED = 0x01, /* This page is charged to a memcg */
> - PCG_MEM = 0x02, /* This page holds a memory charge */
> };
>
> struct pglist_data;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 9bab35fc3e9e..1d66ac49e702 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
> * have the page locked
> */
> pc->mem_cgroup = memcg;
> - pc->flags = PCG_USED | PCG_MEM;
> + pc->flags = PCG_USED;
>
> if (lrucare)
> unlock_page_lru(page, isolated);
> @@ -6177,8 +6177,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> if (!PageCgroupUsed(pc))
> return;
>
> - VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
> -
> if (lrucare)
> lock_page_lru(oldpage, &isolated);
>
> --
> 2.1.2
>

--
Michal Hocko
SUSE Labs

2014-10-22 15:53:31

by Michal Hocko

[permalink] [raw]
Subject: Re: [patch 2/4] mm: memcontrol: remove unnecessary PCG_MEMSW memory+swap charge flag

On Mon 20-10-14 11:22:10, Johannes Weiner wrote:
> Now that mem_cgroup_swapout() fully uncharges the page, every page
> that is still in use when reaching mem_cgroup_uncharge() is known to
> carry both the memory and the memory+swap charge. Simplify the
> uncharge path and remove the PCG_MEMSW page flag accordingly.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Looks good
Acked-by: Michal Hocko <[email protected]>

> ---
> include/linux/page_cgroup.h | 1 -
> mm/memcontrol.c | 34 ++++++++++++----------------------
> 2 files changed, 12 insertions(+), 23 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index 5c831f1eca79..da62ee2be28b 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -5,7 +5,6 @@ enum {
> /* flags for mem_cgroup */
> PCG_USED = 0x01, /* This page is charged to a memcg */
> PCG_MEM = 0x02, /* This page holds a memory charge */
> - PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
> };
>
> struct pglist_data;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 7709f17347f3..9bab35fc3e9e 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
> * have the page locked
> */
> pc->mem_cgroup = memcg;
> - pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
> + pc->flags = PCG_USED | PCG_MEM;
>
> if (lrucare)
> unlock_page_lru(page, isolated);
> @@ -5815,7 +5815,6 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> if (!PageCgroupUsed(pc))
> return;
>
> - VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
> memcg = pc->mem_cgroup;
>
> oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> @@ -6010,17 +6009,16 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
> }
>
> static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
> - unsigned long nr_mem, unsigned long nr_memsw,
> unsigned long nr_anon, unsigned long nr_file,
> unsigned long nr_huge, struct page *dummy_page)
> {
> + unsigned long nr_pages = nr_anon + nr_file;
> unsigned long flags;
>
> if (!mem_cgroup_is_root(memcg)) {
> - if (nr_mem)
> - page_counter_uncharge(&memcg->memory, nr_mem);
> - if (nr_memsw)
> - page_counter_uncharge(&memcg->memsw, nr_memsw);
> + page_counter_uncharge(&memcg->memory, nr_pages);
> + if (do_swap_account)
> + page_counter_uncharge(&memcg->memsw, nr_pages);
> memcg_oom_recover(memcg);
> }
>
> @@ -6029,23 +6027,21 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
> __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
> __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
> __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
> - __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
> + __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
> memcg_check_events(memcg, dummy_page);
> local_irq_restore(flags);
>
> if (!mem_cgroup_is_root(memcg))
> - css_put_many(&memcg->css, max(nr_mem, nr_memsw));
> + css_put_many(&memcg->css, nr_pages);
> }
>
> static void uncharge_list(struct list_head *page_list)
> {
> struct mem_cgroup *memcg = NULL;
> - unsigned long nr_memsw = 0;
> unsigned long nr_anon = 0;
> unsigned long nr_file = 0;
> unsigned long nr_huge = 0;
> unsigned long pgpgout = 0;
> - unsigned long nr_mem = 0;
> struct list_head *next;
> struct page *page;
>
> @@ -6072,10 +6068,9 @@ static void uncharge_list(struct list_head *page_list)
>
> if (memcg != pc->mem_cgroup) {
> if (memcg) {
> - uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
> - nr_anon, nr_file, nr_huge, page);
> - pgpgout = nr_mem = nr_memsw = 0;
> - nr_anon = nr_file = nr_huge = 0;
> + uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
> + nr_huge, page);
> + pgpgout = nr_anon = nr_file = nr_huge = 0;
> }
> memcg = pc->mem_cgroup;
> }
> @@ -6091,18 +6086,14 @@ static void uncharge_list(struct list_head *page_list)
> else
> nr_file += nr_pages;
>
> - if (pc->flags & PCG_MEM)
> - nr_mem += nr_pages;
> - if (pc->flags & PCG_MEMSW)
> - nr_memsw += nr_pages;
> pc->flags = 0;
>
> pgpgout++;
> } while (next != page_list);
>
> if (memcg)
> - uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
> - nr_anon, nr_file, nr_huge, page);
> + uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
> + nr_huge, page);
> }
>
> /**
> @@ -6187,7 +6178,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> return;
>
> VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
> - VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
>
> if (lrucare)
> lock_page_lru(oldpage, &isolated);
> --
> 2.1.2
>

--
Michal Hocko
SUSE Labs

2014-10-22 16:05:27

by Vladimir Davydov

[permalink] [raw]
Subject: Re: [patch 4/4] mm: memcontrol: remove unnecessary PCG_USED pc->mem_cgroup valid flag

On Mon, Oct 20, 2014 at 11:22:12AM -0400, Johannes Weiner wrote:
> pc->mem_cgroup had to be left intact after uncharge for the final LRU
> removal, and !PCG_USED indicated whether the page was uncharged. But
> since 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") pages are
> uncharged after the final LRU removal. Uncharge can simply clear the
> pointer and the PCG_USED/PageCgroupUsed sites can test that instead.
>
> Because this is the last page_cgroup flag, this patch reduces the
> memcg per-page overhead to a single pointer.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Vladimir Davydov <[email protected]>

> ---
> include/linux/page_cgroup.h | 10 -----
> mm/memcontrol.c | 107 +++++++++++++++++---------------------------
> 2 files changed, 42 insertions(+), 75 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index 97536e685843..1289be6b436c 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -1,11 +1,6 @@
> #ifndef __LINUX_PAGE_CGROUP_H
> #define __LINUX_PAGE_CGROUP_H
>
> -enum {
> - /* flags for mem_cgroup */
> - PCG_USED = 0x01, /* This page is charged to a memcg */
> -};
> -
> struct pglist_data;
>
> #ifdef CONFIG_MEMCG
> @@ -19,7 +14,6 @@ struct mem_cgroup;
> * then the page cgroup for pfn always exists.
> */
> struct page_cgroup {
> - unsigned long flags;
> struct mem_cgroup *mem_cgroup;
> };
>
> @@ -39,10 +33,6 @@ static inline void page_cgroup_init(void)
>
> struct page_cgroup *lookup_page_cgroup(struct page *page);
>
> -static inline int PageCgroupUsed(struct page_cgroup *pc)
> -{
> - return !!(pc->flags & PCG_USED);
> -}
> #else /* !CONFIG_MEMCG */
> struct page_cgroup;
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 1d66ac49e702..48d49c6b08d1 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1284,14 +1284,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
>
> pc = lookup_page_cgroup(page);
> memcg = pc->mem_cgroup;
> -
> /*
> * Swapcache readahead pages are added to the LRU - and
> - * possibly migrated - before they are charged. Ensure
> - * pc->mem_cgroup is sane.
> + * possibly migrated - before they are charged.
> */
> - if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
> - pc->mem_cgroup = memcg = root_mem_cgroup;
> + if (!memcg)
> + memcg = root_mem_cgroup;
>
> mz = mem_cgroup_page_zoneinfo(memcg, page);
> lruvec = &mz->lruvec;
> @@ -2141,7 +2139,7 @@ void __mem_cgroup_begin_update_page_stat(struct page *page,
> pc = lookup_page_cgroup(page);
> again:
> memcg = pc->mem_cgroup;
> - if (unlikely(!memcg || !PageCgroupUsed(pc)))
> + if (unlikely(!memcg))
> return;
> /*
> * If this memory cgroup is not under account moving, we don't
> @@ -2154,7 +2152,7 @@ again:
> return;
>
> move_lock_mem_cgroup(memcg, flags);
> - if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
> + if (memcg != pc->mem_cgroup) {
> move_unlock_mem_cgroup(memcg, flags);
> goto again;
> }
> @@ -2186,7 +2184,7 @@ void mem_cgroup_update_page_stat(struct page *page,
>
> pc = lookup_page_cgroup(page);
> memcg = pc->mem_cgroup;
> - if (unlikely(!memcg || !PageCgroupUsed(pc)))
> + if (unlikely(!memcg))
> return;
>
> this_cpu_add(memcg->stat->count[idx], val);
> @@ -2525,9 +2523,10 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
> VM_BUG_ON_PAGE(!PageLocked(page), page);
>
> pc = lookup_page_cgroup(page);
> - if (PageCgroupUsed(pc)) {
> - memcg = pc->mem_cgroup;
> - if (memcg && !css_tryget_online(&memcg->css))
> + memcg = pc->mem_cgroup;
> +
> + if (memcg) {
> + if (!css_tryget_online(&memcg->css))
> memcg = NULL;
> } else if (PageSwapCache(page)) {
> ent.val = page_private(page);
> @@ -2578,7 +2577,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
> struct page_cgroup *pc = lookup_page_cgroup(page);
> int isolated;
>
> - VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
> + VM_BUG_ON_PAGE(pc->mem_cgroup, page);
> /*
> * we don't need page_cgroup_lock about tail pages, becase they are not
> * accessed by any other context at this point.
> @@ -2593,7 +2592,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
>
> /*
> * Nobody should be changing or seriously looking at
> - * pc->mem_cgroup and pc->flags at this point:
> + * pc->mem_cgroup at this point:
> *
> * - the page is uncharged
> *
> @@ -2606,7 +2605,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
> * have the page locked
> */
> pc->mem_cgroup = memcg;
> - pc->flags = PCG_USED;
>
> if (lrucare)
> unlock_page_lru(page, isolated);
> @@ -3120,37 +3118,22 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
> memcg_uncharge_kmem(memcg, 1 << order);
> return;
> }
> - /*
> - * The page is freshly allocated and not visible to any
> - * outside callers yet. Set up pc non-atomically.
> - */
> pc = lookup_page_cgroup(page);
> pc->mem_cgroup = memcg;
> - pc->flags = PCG_USED;
> }
>
> void __memcg_kmem_uncharge_pages(struct page *page, int order)
> {
> - struct mem_cgroup *memcg = NULL;
> - struct page_cgroup *pc;
> -
> -
> - pc = lookup_page_cgroup(page);
> - if (!PageCgroupUsed(pc))
> - return;
> -
> - memcg = pc->mem_cgroup;
> - pc->flags = 0;
> + struct page_cgroup *pc = lookup_page_cgroup(page);
> + struct mem_cgroup *memcg = pc->mem_cgroup;
>
> - /*
> - * We trust that only if there is a memcg associated with the page, it
> - * is a valid allocation
> - */
> if (!memcg)
> return;
>
> VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
> +
> memcg_uncharge_kmem(memcg, 1 << order);
> + pc->mem_cgroup = NULL;
> }
> #else
> static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
> @@ -3168,21 +3151,16 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
> */
> void mem_cgroup_split_huge_fixup(struct page *head)
> {
> - struct page_cgroup *head_pc = lookup_page_cgroup(head);
> - struct page_cgroup *pc;
> - struct mem_cgroup *memcg;
> + struct page_cgroup *pc = lookup_page_cgroup(head);
> int i;
>
> if (mem_cgroup_disabled())
> return;
>
> - memcg = head_pc->mem_cgroup;
> - for (i = 1; i < HPAGE_PMD_NR; i++) {
> - pc = head_pc + i;
> - pc->mem_cgroup = memcg;
> - pc->flags = head_pc->flags;
> - }
> - __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
> + for (i = 1; i < HPAGE_PMD_NR; i++)
> + pc[i].mem_cgroup = pc[0].mem_cgroup;
> +
> + __this_cpu_sub(pc[0].mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
> HPAGE_PMD_NR);
> }
> #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> @@ -3232,7 +3210,7 @@ static int mem_cgroup_move_account(struct page *page,
> goto out;
>
> ret = -EINVAL;
> - if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
> + if (pc->mem_cgroup != from)
> goto out_unlock;
>
> move_lock_mem_cgroup(from, &flags);
> @@ -3342,7 +3320,7 @@ static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
> * the first time, i.e. during boot or memory hotplug;
> * or when mem_cgroup_disabled().
> */
> - if (likely(pc) && PageCgroupUsed(pc))
> + if (likely(pc) && pc->mem_cgroup)
> return pc;
> return NULL;
> }
> @@ -3360,10 +3338,8 @@ void mem_cgroup_print_bad_page(struct page *page)
> struct page_cgroup *pc;
>
> pc = lookup_page_cgroup_used(page);
> - if (pc) {
> - pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
> - pc, pc->flags, pc->mem_cgroup);
> - }
> + if (pc)
> + pr_alert("pc:%p pc->mem_cgroup:%p\n", pc, pc->mem_cgroup);
> }
> #endif
>
> @@ -5330,7 +5306,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
> * mem_cgroup_move_account() checks the pc is valid or
> * not under LRU exclusion.
> */
> - if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
> + if (pc->mem_cgroup == mc.from) {
> ret = MC_TARGET_PAGE;
> if (target)
> target->page = page;
> @@ -5366,7 +5342,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
> if (!move_anon())
> return ret;
> pc = lookup_page_cgroup(page);
> - if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
> + if (pc->mem_cgroup == mc.from) {
> ret = MC_TARGET_PAGE;
> if (target) {
> get_page(page);
> @@ -5810,18 +5786,17 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> return;
>
> pc = lookup_page_cgroup(page);
> + memcg = pc->mem_cgroup;
>
> /* Readahead page, never charged */
> - if (!PageCgroupUsed(pc))
> + if (!memcg)
> return;
>
> - memcg = pc->mem_cgroup;
> -
> oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> VM_BUG_ON_PAGE(oldid, page);
> mem_cgroup_swap_statistics(memcg, true);
>
> - pc->flags = 0;
> + pc->mem_cgroup = NULL;
>
> if (!mem_cgroup_is_root(memcg))
> page_counter_uncharge(&memcg->memory, 1);
> @@ -5895,7 +5870,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
> * the page lock, which serializes swap cache removal, which
> * in turn serializes uncharging.
> */
> - if (PageCgroupUsed(pc))
> + if (pc->mem_cgroup)
> goto out;
> }
>
> @@ -6057,13 +6032,13 @@ static void uncharge_list(struct list_head *page_list)
> VM_BUG_ON_PAGE(page_count(page), page);
>
> pc = lookup_page_cgroup(page);
> - if (!PageCgroupUsed(pc))
> + if (!pc->mem_cgroup)
> continue;
>
> /*
> * Nobody should be changing or seriously looking at
> - * pc->mem_cgroup and pc->flags at this point, we have
> - * fully exclusive access to the page.
> + * pc->mem_cgroup at this point, we have fully
> + * exclusive access to the page.
> */
>
> if (memcg != pc->mem_cgroup) {
> @@ -6086,7 +6061,7 @@ static void uncharge_list(struct list_head *page_list)
> else
> nr_file += nr_pages;
>
> - pc->flags = 0;
> + pc->mem_cgroup = NULL;
>
> pgpgout++;
> } while (next != page_list);
> @@ -6112,7 +6087,7 @@ void mem_cgroup_uncharge(struct page *page)
>
> /* Don't touch page->lru of any random page, pre-check: */
> pc = lookup_page_cgroup(page);
> - if (!PageCgroupUsed(pc))
> + if (!pc->mem_cgroup)
> return;
>
> INIT_LIST_HEAD(&page->lru);
> @@ -6148,6 +6123,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
> void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> bool lrucare)
> {
> + struct mem_cgroup *memcg;
> struct page_cgroup *pc;
> int isolated;
>
> @@ -6164,7 +6140,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
>
> /* Page cache replacement: new page already charged? */
> pc = lookup_page_cgroup(newpage);
> - if (PageCgroupUsed(pc))
> + if (pc->mem_cgroup)
> return;
>
> /*
> @@ -6174,18 +6150,19 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> * reclaim just put back on the LRU but has not released yet.
> */
> pc = lookup_page_cgroup(oldpage);
> - if (!PageCgroupUsed(pc))
> + memcg = pc->mem_cgroup;
> + if (!memcg)
> return;
>
> if (lrucare)
> lock_page_lru(oldpage, &isolated);
>
> - pc->flags = 0;
> + pc->mem_cgroup = NULL;
>
> if (lrucare)
> unlock_page_lru(oldpage, isolated);
>
> - commit_charge(newpage, pc->mem_cgroup, lrucare);
> + commit_charge(newpage, memcg, lrucare);
> }
>
> /*
> --
> 2.1.2
>

2014-10-22 16:11:43

by Michal Hocko

[permalink] [raw]
Subject: Re: [patch 4/4] mm: memcontrol: remove unnecessary PCG_USED pc->mem_cgroup valid flag

On Mon 20-10-14 11:22:12, Johannes Weiner wrote:
> pc->mem_cgroup had to be left intact after uncharge for the final LRU
> removal, and !PCG_USED indicated whether the page was uncharged. But
> since 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") pages are
> uncharged after the final LRU removal. Uncharge can simply clear the
> pointer and the PCG_USED/PageCgroupUsed sites can test that instead.
>
> Because this is the last page_cgroup flag, this patch reduces the
> memcg per-page overhead to a single pointer.

Nice. I have an old patch which stuck this flag into page_cgroup pointer
but this is of course much much better!

> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Michal Hocko <[email protected]>

Just a nit below

[...]
> @@ -2525,9 +2523,10 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)

memcg = NULL initialization is not needed now

> VM_BUG_ON_PAGE(!PageLocked(page), page);
>
> pc = lookup_page_cgroup(page);
> - if (PageCgroupUsed(pc)) {
> - memcg = pc->mem_cgroup;
> - if (memcg && !css_tryget_online(&memcg->css))
> + memcg = pc->mem_cgroup;
> +
> + if (memcg) {
> + if (!css_tryget_online(&memcg->css))
> memcg = NULL;
> } else if (PageSwapCache(page)) {
> ent.val = page_private(page);

> #else
> static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
[...]
--
Michal Hocko
SUSE Labs

2014-10-22 16:13:36

by Vladimir Davydov

[permalink] [raw]
Subject: Re: [patch 3/4] mm: memcontrol: remove unnecessary PCG_MEM memory charge flag

On Mon, Oct 20, 2014 at 11:22:11AM -0400, Johannes Weiner wrote:
> PCG_MEM is a remnant from an earlier version of 0a31bc97c80c ("mm:
> memcontrol: rewrite uncharge API"), used to tell whether migration
> cleared a charge while leaving pc->mem_cgroup valid and PCG_USED set.
> But in the final version, mem_cgroup_migrate() directly uncharges the
> source page, rendering this distinction unnecessary. Remove it.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Vladimir Davydov <[email protected]>

> ---
> include/linux/page_cgroup.h | 1 -
> mm/memcontrol.c | 4 +---
> 2 files changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index da62ee2be28b..97536e685843 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -4,7 +4,6 @@
> enum {
> /* flags for mem_cgroup */
> PCG_USED = 0x01, /* This page is charged to a memcg */
> - PCG_MEM = 0x02, /* This page holds a memory charge */
> };
>
> struct pglist_data;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 9bab35fc3e9e..1d66ac49e702 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
> * have the page locked
> */
> pc->mem_cgroup = memcg;
> - pc->flags = PCG_USED | PCG_MEM;
> + pc->flags = PCG_USED;
>
> if (lrucare)
> unlock_page_lru(page, isolated);
> @@ -6177,8 +6177,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
> if (!PageCgroupUsed(pc))
> return;
>
> - VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
> -
> if (lrucare)
> lock_page_lru(oldpage, &isolated);
>
> --
> 2.1.2
>