2019-02-16 05:06:30

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 0/6] mm: memcontrol: clean up the LRU counts tracking

The memcg LRU stats usage is currently a bit messy. Memcg has private
per-zone counters because reclaim needs zone granularity sometimes,
but we also have plenty of users that need to awkwardly sum them up to
node or memcg granularity. Meanwhile the canonical per-memcg vmstats
do not track the LRU counts (NR_INACTIVE_ANON etc.) as you'd expect.

This series enables LRU count tracking in the per-memcg vmstats array
such that lruvec_page_state() and memcg_page_state() work on the enum
node_stat_item items for the LRU counters. Then it converts all the
callers that don't specifically need per-zone numbers over to that.

include/linux/memcontrol.h | 28 ---------------
include/linux/mm_inline.h | 2 +-
include/linux/mmzone.h | 5 ---
mm/memcontrol.c | 85 +++++++++++++++++++++++++-------------------
mm/vmscan.c | 2 +-
mm/workingset.c | 5 +--
6 files changed, 54 insertions(+), 73 deletions(-)




2019-02-16 05:07:30

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 1/6] mm: memcontrol: track LRU counts in the vmstats array

The memcg code currently maintains private per-zone breakdowns of the
LRU counters. This is necessary for reclaim decisions which are still
zone-based, but there are a variety of users of these counters that
only want the aggregate per-lruvec or per-memcg LRU counts, and they
need to painfully sum up the zone counters on each request for that.

These would be better served using the memcg vmstats arrays, which
track VM statistics at the desired scope already. They just don't have
the LRU counts right now.

So to kick off the conversion, begin tracking LRU counts in those.

Signed-off-by: Johannes Weiner <[email protected]>
---
include/linux/mm_inline.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 04ec454d44ce..6f2fef7b0784 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -29,7 +29,7 @@ static __always_inline void __update_lru_size(struct lruvec *lruvec,
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);

- __mod_node_page_state(pgdat, NR_LRU_BASE + lru, nr_pages);
+ __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
__mod_zone_page_state(&pgdat->node_zones[zid],
NR_ZONE_LRU_BASE + lru, nr_pages);
}
--
2.20.1


2019-02-16 05:09:44

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 3/6] mm: memcontrol: replace node summing with memcg_page_state()

Instead of adding up the node counters, use memcg_page_state() to get
the memcg state directly. This is a bit cheaper and more stream-lined.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/memcontrol.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a04177f25758..4d573f4e1759 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -739,10 +739,13 @@ static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
unsigned int lru_mask)
{
unsigned long nr = 0;
- int nid;
+ enum lru_list lru;

- for_each_node_state(nid, N_MEMORY)
- nr += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
+ for_each_lru(lru) {
+ if (!(BIT(lru) & lru_mask))
+ continue;
+ nr += memcg_page_state(memcg, NR_LRU_BASE + lru);
+ }
return nr;
}

--
2.20.1


2019-02-16 05:09:48

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 6/6] mm: memcontrol: quarantine the mem_cgroup_[node_]nr_lru_pages() API

Only memcg_numa_stat_show() uses those wrappers and the lru bitmasks,
group them together.

Signed-off-by: Johannes Weiner <[email protected]>
---
include/linux/mmzone.h | 5 ----
mm/memcontrol.c | 67 +++++++++++++++++++++++-------------------
2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 842f9189537b..a93921ba7bd6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -247,11 +247,6 @@ struct lruvec {
#endif
};

-/* Mask used at gathering information at once (see memcontrol.c) */
-#define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE))
-#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
-#define LRU_ALL ((1 << NR_LRU_LISTS) - 1)
-
/* Isolate unmapped file */
#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
/* Isolate for asynchronous migration */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6d0c3374669f..20fb3de8bde4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -718,37 +718,6 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
__this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages);
}

-static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
- int nid, unsigned int lru_mask)
-{
- struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
- unsigned long nr = 0;
- enum lru_list lru;
-
- VM_BUG_ON((unsigned)nid >= nr_node_ids);
-
- for_each_lru(lru) {
- if (!(BIT(lru) & lru_mask))
- continue;
- nr += lruvec_page_state(lruvec, NR_LRU_BASE + lru);
- }
- return nr;
-}
-
-static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
- unsigned int lru_mask)
-{
- unsigned long nr = 0;
- enum lru_list lru;
-
- for_each_lru(lru) {
- if (!(BIT(lru) & lru_mask))
- continue;
- nr += memcg_page_state(memcg, NR_LRU_BASE + lru);
- }
- return nr;
-}
-
static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
enum mem_cgroup_events_target target)
{
@@ -3328,6 +3297,42 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
#endif

#ifdef CONFIG_NUMA
+
+#define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE))
+#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
+#define LRU_ALL ((1 << NR_LRU_LISTS) - 1)
+
+static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
+ int nid, unsigned int lru_mask)
+{
+ struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
+ unsigned long nr = 0;
+ enum lru_list lru;
+
+ VM_BUG_ON((unsigned)nid >= nr_node_ids);
+
+ for_each_lru(lru) {
+ if (!(BIT(lru) & lru_mask))
+ continue;
+ nr += lruvec_page_state(lruvec, NR_LRU_BASE + lru);
+ }
+ return nr;
+}
+
+static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
+ unsigned int lru_mask)
+{
+ unsigned long nr = 0;
+ enum lru_list lru;
+
+ for_each_lru(lru) {
+ if (!(BIT(lru) & lru_mask))
+ continue;
+ nr += memcg_page_state(memcg, NR_LRU_BASE + lru);
+ }
+ return nr;
+}
+
static int memcg_numa_stat_show(struct seq_file *m, void *v)
{
struct numa_stat {
--
2.20.1


2019-02-16 05:10:06

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 5/6] mm: memcontrol: push down mem_cgroup_nr_lru_pages()

mem_cgroup_nr_lru_pages() is just a convenience wrapper around
memcg_page_state() that takes bitmasks of lru indexes and aggregates
the counts for those.

Replace callsites where the bitmask is simple enough with direct
memcg_page_state() call(s).

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/memcontrol.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 73eb8333bc73..6d0c3374669f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1354,7 +1354,7 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)

for (i = 0; i < NR_LRU_LISTS; i++)
pr_cont(" %s:%luKB", mem_cgroup_lru_names[i],
- K(mem_cgroup_nr_lru_pages(iter, BIT(i))));
+ K(memcg_page_state(iter, NR_LRU_BASE + i)));

pr_cont("\n");
}
@@ -2987,8 +2987,8 @@ static void accumulate_memcg_tree(struct mem_cgroup *memcg,
acc->events_array ? acc->events_array[i] : i);

for (i = 0; i < NR_LRU_LISTS; i++)
- acc->lru_pages[i] +=
- mem_cgroup_nr_lru_pages(mi, BIT(i));
+ acc->lru_pages[i] += memcg_page_state(mi,
+ NR_LRU_BASE + i);
}
}

@@ -3418,7 +3418,8 @@ static int memcg_stat_show(struct seq_file *m, void *v)

for (i = 0; i < NR_LRU_LISTS; i++)
seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
- mem_cgroup_nr_lru_pages(memcg, BIT(i)) * PAGE_SIZE);
+ memcg_page_state(memcg, NR_LRU_BASE + i) *
+ PAGE_SIZE);

/* Hierarchical information */
memory = memsw = PAGE_COUNTER_MAX;
@@ -3909,8 +3910,8 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,

/* this should eventually include NR_UNSTABLE_NFS */
*pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
- *pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
- (1 << LRU_ACTIVE_FILE));
+ *pfilepages = memcg_page_state(memcg, NR_INACTIVE_FILE) +
+ memcg_page_state(memcg, NR_ACTIVE_FILE);
*pheadroom = PAGE_COUNTER_MAX;

while ((parent = parent_mem_cgroup(memcg))) {
--
2.20.1


2019-02-16 05:11:31

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 2/6] mm: memcontrol: replace zone summing with lruvec_page_state()

Instead of adding up the zone counters, use lruvec_page_state() to get
the node state directly. This is a bit cheaper and more stream-lined.

Signed-off-by: Johannes Weiner <[email protected]>
---
include/linux/memcontrol.h | 18 ------------------
mm/memcontrol.c | 2 +-
mm/vmscan.c | 2 +-
3 files changed, 2 insertions(+), 20 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 83ae11cbd12c..206090de5d7c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -499,19 +499,6 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
int nid, unsigned int lru_mask);

-static inline
-unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
-{
- struct mem_cgroup_per_node *mz;
- unsigned long nr_pages = 0;
- int zid;
-
- mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- for (zid = 0; zid < MAX_NR_ZONES; zid++)
- nr_pages += mz->lru_zone_size[zid][lru];
- return nr_pages;
-}
-
static inline
unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
enum lru_list lru, int zone_idx)
@@ -947,11 +934,6 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
return true;
}

-static inline unsigned long
-mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
-{
- return 0;
-}
static inline
unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
enum lru_list lru, int zone_idx)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index af7f18b32389..a04177f25758 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -730,7 +730,7 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
for_each_lru(lru) {
if (!(BIT(lru) & lru_mask))
continue;
- nr += mem_cgroup_get_lru_size(lruvec, lru);
+ nr += lruvec_page_state(lruvec, NR_LRU_BASE + lru);
}
return nr;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e979705bbf32..f88fef03fc04 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -346,7 +346,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
int zid;

if (!mem_cgroup_disabled())
- lru_size = mem_cgroup_get_lru_size(lruvec, lru);
+ lru_size = lruvec_page_state(lruvec, NR_LRU_BASE + lru);
else
lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);

--
2.20.1


2019-02-16 05:12:01

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 4/6] mm: memcontrol: push down mem_cgroup_node_nr_lru_pages()

mem_cgroup_node_nr_lru_pages() is just a convenience wrapper around
lruvec_page_state() that takes bitmasks of lru indexes and aggregates
the counts for those.

Replace callsites where the bitmask is simple enough with direct
lruvec_page_state() calls.

This removes the last extern user of mem_cgroup_node_nr_lru_pages(),
so make that function private again, too.

Signed-off-by: Johannes Weiner <[email protected]>
---
include/linux/memcontrol.h | 10 ----------
mm/memcontrol.c | 10 +++++++---
mm/workingset.c | 5 +++--
3 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 206090de5d7c..6bf06b9d0260 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -496,9 +496,6 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
int zid, int nr_pages);

-unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
- int nid, unsigned int lru_mask);
-
static inline
unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
enum lru_list lru, int zone_idx)
@@ -941,13 +938,6 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
return 0;
}

-static inline unsigned long
-mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
- int nid, unsigned int lru_mask)
-{
- return 0;
-}
-
static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
{
return 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4d573f4e1759..73eb8333bc73 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -718,7 +718,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
__this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages);
}

-unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
+static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
int nid, unsigned int lru_mask)
{
struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
@@ -1413,11 +1413,15 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
int nid, bool noswap)
{
- if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE))
+ struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
+
+ if (lruvec_page_state(lruvec, NR_INACTIVE_FILE) ||
+ lruvec_page_state(lruvec, NR_ACTIVE_FILE))
return true;
if (noswap || !total_swap_pages)
return false;
- if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON))
+ if (lruvec_page_state(lruvec, NR_INACTIVE_ANON) ||
+ lruvec_page_state(lruvec, NR_ACTIVE_ANON))
return true;
return false;

diff --git a/mm/workingset.c b/mm/workingset.c
index dcb994f2acc2..dbc333a21254 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -427,10 +427,11 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
#ifdef CONFIG_MEMCG
if (sc->memcg) {
struct lruvec *lruvec;
+ int i;

- pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
- LRU_ALL);
lruvec = mem_cgroup_lruvec(NODE_DATA(sc->nid), sc->memcg);
+ for (pages = 0, i = 0; i < NR_LRU_LISTS; i++)
+ pages += lruvec_page_state(lruvec, NR_LRU_BASE + i);
pages += lruvec_page_state(lruvec, NR_SLAB_RECLAIMABLE);
pages += lruvec_page_state(lruvec, NR_SLAB_UNRECLAIMABLE);
} else
--
2.20.1