LinuxLists.cc - [PATCH 7/7] mm: memcontrol: consolidate lruvec stat flushing

2021-02-03 00:42:00

Subject: [PATCH 7/7] mm: memcontrol: consolidate lruvec stat flushing

There are two functions to flush the per-cpu data of an lruvec into
the rest of the cgroup tree: when the cgroup is being freed, and when
a CPU disappears during hotplug. The difference is whether all CPUs or
just one is being collected, but the rest of the flushing code is the
same. Merge them into one function and share the common code.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/memcontrol.c | 88 +++++++++++++++++++++++--------------------------
1 file changed, 42 insertions(+), 46 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b205b2413186..88e8afc49a46 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2410,39 +2410,56 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
mutex_unlock(&percpu_charge_mutex);
}

-static int memcg_hotplug_cpu_dead(unsigned int cpu)
+static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg, int cpu)
{
- struct memcg_stock_pcp *stock;
- struct mem_cgroup *memcg;
-
- stock = &per_cpu(memcg_stock, cpu);
- drain_stock(stock);
+ int nid;

- for_each_mem_cgroup(memcg) {
+ for_each_node(nid) {
+ struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
+ unsigned long stat[NR_VM_NODE_STAT_ITEMS] = { 0, };
+ struct batched_lruvec_stat *lstatc;
int i;

- for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
- int nid;
-
- for_each_node(nid) {
- struct batched_lruvec_stat *lstatc;
- struct mem_cgroup_per_node *pn;
- long x;
-
- pn = memcg->nodeinfo[nid];
+ if (cpu == -1) {
+ int cpui;
+ /*
+ * The memcg is about to be freed, collect all
+ * CPUs, no need to zero anything out.
+ */
+ for_each_online_cpu(cpui) {
+ lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpui);
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ stat[i] += lstatc->count[i];
+ }
+ } else {
+ /*
+ * The CPU has gone away, collect and zero out
+ * its stats, it may come back later.
+ */
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpu);
-
- x = lstatc->count[i];
+ stat[i] = lstatc->count[i];
lstatc->count[i] = 0;
-
- if (x) {
- do {
- atomic_long_add(x, &pn->lruvec_stat[i]);
- } while ((pn = parent_nodeinfo(pn, nid)));
- }
}
}
+
+ do {
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ atomic_long_add(stat[i], &pn->lruvec_stat[i]);
+ } while ((pn = parent_nodeinfo(pn, nid)));
}
+}
+
+static int memcg_hotplug_cpu_dead(unsigned int cpu)
+{
+ struct memcg_stock_pcp *stock;
+ struct mem_cgroup *memcg;
+
+ stock = &per_cpu(memcg_stock, cpu);
+ drain_stock(stock);
+
+ for_each_mem_cgroup(memcg)
+ memcg_flush_lruvec_page_state(memcg, cpu);

return 0;
}
@@ -3636,27 +3653,6 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
}
}

-static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg)
-{
- int node;
-
- for_each_node(node) {
- struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
- unsigned long stat[NR_VM_NODE_STAT_ITEMS] = {0, };
- struct mem_cgroup_per_node *pi;
- int cpu, i;
-
- for_each_online_cpu(cpu)
- for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
- stat[i] += per_cpu(
- pn->lruvec_stat_cpu->count[i], cpu);
-
- for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
- for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
- atomic_long_add(stat[i], &pi->lruvec_stat[i]);
- }
-}
-
#ifdef CONFIG_MEMCG_KMEM
static int memcg_online_kmem(struct mem_cgroup *memcg)
{
@@ -5197,7 +5193,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
* Flush percpu lruvec stats to guarantee the value
* correctness on parent's and all ancestor levels.
*/
- memcg_flush_lruvec_page_state(memcg);
+ memcg_flush_lruvec_page_state(memcg, -1);
__mem_cgroup_free(memcg);
}

--
2.30.0

2021-02-03 02:27:50

by Roman Gushchin

[permalink] [raw]

Subject: Re: [PATCH 7/7] mm: memcontrol: consolidate lruvec stat flushing

On Tue, Feb 02, 2021 at 01:47:46PM -0500, Johannes Weiner wrote:
> There are two functions to flush the per-cpu data of an lruvec into
> the rest of the cgroup tree: when the cgroup is being freed, and when
> a CPU disappears during hotplug. The difference is whether all CPUs or
> just one is being collected, but the rest of the flushing code is the
> same. Merge them into one function and share the common code.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/memcontrol.c | 88 +++++++++++++++++++++++--------------------------
> 1 file changed, 42 insertions(+), 46 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b205b2413186..88e8afc49a46 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2410,39 +2410,56 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
> mutex_unlock(&percpu_charge_mutex);
> }
>
> -static int memcg_hotplug_cpu_dead(unsigned int cpu)
> +static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg, int cpu)
> {
> - struct memcg_stock_pcp *stock;
> - struct mem_cgroup *memcg;
> -
> - stock = &per_cpu(memcg_stock, cpu);
> - drain_stock(stock);
> + int nid;
>
> - for_each_mem_cgroup(memcg) {
> + for_each_node(nid) {
> + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
> + unsigned long stat[NR_VM_NODE_STAT_ITEMS] = { 0, };
^^^^
Same here.

> + struct batched_lruvec_stat *lstatc;
> int i;
>
> - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
> - int nid;
> -
> - for_each_node(nid) {
> - struct batched_lruvec_stat *lstatc;
> - struct mem_cgroup_per_node *pn;
> - long x;
> -
> - pn = memcg->nodeinfo[nid];
> + if (cpu == -1) {
> + int cpui;
> + /*
> + * The memcg is about to be freed, collect all
> + * CPUs, no need to zero anything out.
> + */
> + for_each_online_cpu(cpui) {
> + lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpui);
> + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> + stat[i] += lstatc->count[i];
> + }
> + } else {
> + /*
> + * The CPU has gone away, collect and zero out
> + * its stats, it may come back later.
> + */
> + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
> lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpu);
> -
> - x = lstatc->count[i];
> + stat[i] = lstatc->count[i];
> lstatc->count[i] = 0;
> -
> - if (x) {
> - do {
> - atomic_long_add(x, &pn->lruvec_stat[i]);
> - } while ((pn = parent_nodeinfo(pn, nid)));
> - }
> }
> }
> +
> + do {
> + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> + atomic_long_add(stat[i], &pn->lruvec_stat[i]);
> + } while ((pn = parent_nodeinfo(pn, nid)));
> }
> +}
> +
> +static int memcg_hotplug_cpu_dead(unsigned int cpu)
> +{
> + struct memcg_stock_pcp *stock;
> + struct mem_cgroup *memcg;
> +
> + stock = &per_cpu(memcg_stock, cpu);
> + drain_stock(stock);
> +
> + for_each_mem_cgroup(memcg)
> + memcg_flush_lruvec_page_state(memcg, cpu);
>
> return 0;
> }
> @@ -3636,27 +3653,6 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
> }
> }
>
> -static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg)
> -{
> - int node;
> -
> - for_each_node(node) {
> - struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
> - unsigned long stat[NR_VM_NODE_STAT_ITEMS] = {0, };
> - struct mem_cgroup_per_node *pi;
> - int cpu, i;
> -
> - for_each_online_cpu(cpu)
> - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> - stat[i] += per_cpu(
> - pn->lruvec_stat_cpu->count[i], cpu);
> -
> - for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
> - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> - atomic_long_add(stat[i], &pi->lruvec_stat[i]);
> - }
> -}
> -
> #ifdef CONFIG_MEMCG_KMEM
> static int memcg_online_kmem(struct mem_cgroup *memcg)
> {
> @@ -5197,7 +5193,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
> * Flush percpu lruvec stats to guarantee the value
> * correctness on parent's and all ancestor levels.
> */
> - memcg_flush_lruvec_page_state(memcg);
> + memcg_flush_lruvec_page_state(memcg, -1);

I wonder if adding "cpu" or "percpu" into the function name will make clearer what -1 means?
E.g. memcg_flush_(per)cpu_lruvec_stats(memcg, -1).

Reviewed-by: Roman Gushchin <[email protected]>

2021-02-04 21:50:05

by Johannes Weiner

[permalink] [raw]

Subject: Re: [PATCH 7/7] mm: memcontrol: consolidate lruvec stat flushing

On Tue, Feb 02, 2021 at 06:25:30PM -0800, Roman Gushchin wrote:
> On Tue, Feb 02, 2021 at 01:47:46PM -0500, Johannes Weiner wrote:
> > There are two functions to flush the per-cpu data of an lruvec into
> > the rest of the cgroup tree: when the cgroup is being freed, and when
> > a CPU disappears during hotplug. The difference is whether all CPUs or
> > just one is being collected, but the rest of the flushing code is the
> > same. Merge them into one function and share the common code.
> >
> > Signed-off-by: Johannes Weiner <[email protected]>
> > ---
> > mm/memcontrol.c | 88 +++++++++++++++++++++++--------------------------
> > 1 file changed, 42 insertions(+), 46 deletions(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index b205b2413186..88e8afc49a46 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -2410,39 +2410,56 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
> > mutex_unlock(&percpu_charge_mutex);
> > }
> >
> > -static int memcg_hotplug_cpu_dead(unsigned int cpu)
> > +static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg, int cpu)
> > {
> > - struct memcg_stock_pcp *stock;
> > - struct mem_cgroup *memcg;
> > -
> > - stock = &per_cpu(memcg_stock, cpu);
> > - drain_stock(stock);
> > + int nid;
> >
> > - for_each_mem_cgroup(memcg) {
> > + for_each_node(nid) {
> > + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
> > + unsigned long stat[NR_VM_NODE_STAT_ITEMS] = { 0, };
> ^^^^
> Same here.
>
> > + struct batched_lruvec_stat *lstatc;
> > int i;
> >
> > - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
> > - int nid;
> > -
> > - for_each_node(nid) {
> > - struct batched_lruvec_stat *lstatc;
> > - struct mem_cgroup_per_node *pn;
> > - long x;
> > -
> > - pn = memcg->nodeinfo[nid];
> > + if (cpu == -1) {
> > + int cpui;
> > + /*
> > + * The memcg is about to be freed, collect all
> > + * CPUs, no need to zero anything out.
> > + */
> > + for_each_online_cpu(cpui) {
> > + lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpui);
> > + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> > + stat[i] += lstatc->count[i];
> > + }
> > + } else {
> > + /*
> > + * The CPU has gone away, collect and zero out
> > + * its stats, it may come back later.
> > + */
> > + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
> > lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpu);
> > -
> > - x = lstatc->count[i];
> > + stat[i] = lstatc->count[i];
> > lstatc->count[i] = 0;
> > -
> > - if (x) {
> > - do {
> > - atomic_long_add(x, &pn->lruvec_stat[i]);
> > - } while ((pn = parent_nodeinfo(pn, nid)));
> > - }
> > }
> > }
> > +
> > + do {
> > + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> > + atomic_long_add(stat[i], &pn->lruvec_stat[i]);
> > + } while ((pn = parent_nodeinfo(pn, nid)));
> > }
> > +}
> > +
> > +static int memcg_hotplug_cpu_dead(unsigned int cpu)
> > +{
> > + struct memcg_stock_pcp *stock;
> > + struct mem_cgroup *memcg;
> > +
> > + stock = &per_cpu(memcg_stock, cpu);
> > + drain_stock(stock);
> > +
> > + for_each_mem_cgroup(memcg)
> > + memcg_flush_lruvec_page_state(memcg, cpu);
> >
> > return 0;
> > }
> > @@ -3636,27 +3653,6 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
> > }
> > }
> >
> > -static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg)
> > -{
> > - int node;
> > -
> > - for_each_node(node) {
> > - struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
> > - unsigned long stat[NR_VM_NODE_STAT_ITEMS] = {0, };
> > - struct mem_cgroup_per_node *pi;
> > - int cpu, i;
> > -
> > - for_each_online_cpu(cpu)
> > - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> > - stat[i] += per_cpu(
> > - pn->lruvec_stat_cpu->count[i], cpu);
> > -
> > - for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
> > - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> > - atomic_long_add(stat[i], &pi->lruvec_stat[i]);
> > - }
> > -}
> > -
> > #ifdef CONFIG_MEMCG_KMEM
> > static int memcg_online_kmem(struct mem_cgroup *memcg)
> > {
> > @@ -5197,7 +5193,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
> > * Flush percpu lruvec stats to guarantee the value
> > * correctness on parent's and all ancestor levels.
> > */
> > - memcg_flush_lruvec_page_state(memcg);
> > + memcg_flush_lruvec_page_state(memcg, -1);
>
> I wonder if adding "cpu" or "percpu" into the function name will make clearer what -1 means?
> E.g. memcg_flush_(per)cpu_lruvec_stats(memcg, -1).

Yes, it's a bit ominous. I changed it to

memcg_flush_lruvec_page_state_cpu(memcg, -1);

percpu would have pushed the function signature over 80 characters.

> Reviewed-by: Roman Gushchin <[email protected]>

Thanks

2021-02-04 21:50:07

by Roman Gushchin

[permalink] [raw]

Subject: Re: [PATCH 7/7] mm: memcontrol: consolidate lruvec stat flushing

On Thu, Feb 04, 2021 at 04:44:27PM -0500, Johannes Weiner wrote:
> On Tue, Feb 02, 2021 at 06:25:30PM -0800, Roman Gushchin wrote:
> > On Tue, Feb 02, 2021 at 01:47:46PM -0500, Johannes Weiner wrote:
> > > There are two functions to flush the per-cpu data of an lruvec into
> > > the rest of the cgroup tree: when the cgroup is being freed, and when
> > > a CPU disappears during hotplug. The difference is whether all CPUs or
> > > just one is being collected, but the rest of the flushing code is the
> > > same. Merge them into one function and share the common code.
> > >
> > > Signed-off-by: Johannes Weiner <[email protected]>
> > > ---
> > > mm/memcontrol.c | 88 +++++++++++++++++++++++--------------------------
> > > 1 file changed, 42 insertions(+), 46 deletions(-)
> > >
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index b205b2413186..88e8afc49a46 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -2410,39 +2410,56 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
> > > mutex_unlock(&percpu_charge_mutex);
> > > }
> > >
> > > -static int memcg_hotplug_cpu_dead(unsigned int cpu)
> > > +static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg, int cpu)
> > > {
> > > - struct memcg_stock_pcp *stock;
> > > - struct mem_cgroup *memcg;
> > > -
> > > - stock = &per_cpu(memcg_stock, cpu);
> > > - drain_stock(stock);
> > > + int nid;
> > >
> > > - for_each_mem_cgroup(memcg) {
> > > + for_each_node(nid) {
> > > + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
> > > + unsigned long stat[NR_VM_NODE_STAT_ITEMS] = { 0, };
> > ^^^^
> > Same here.
> >
> > > + struct batched_lruvec_stat *lstatc;
> > > int i;
> > >
> > > - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
> > > - int nid;
> > > -
> > > - for_each_node(nid) {
> > > - struct batched_lruvec_stat *lstatc;
> > > - struct mem_cgroup_per_node *pn;
> > > - long x;
> > > -
> > > - pn = memcg->nodeinfo[nid];
> > > + if (cpu == -1) {
> > > + int cpui;
> > > + /*
> > > + * The memcg is about to be freed, collect all
> > > + * CPUs, no need to zero anything out.
> > > + */
> > > + for_each_online_cpu(cpui) {
> > > + lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpui);
> > > + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> > > + stat[i] += lstatc->count[i];
> > > + }
> > > + } else {
> > > + /*
> > > + * The CPU has gone away, collect and zero out
> > > + * its stats, it may come back later.
> > > + */
> > > + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
> > > lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpu);
> > > -
> > > - x = lstatc->count[i];
> > > + stat[i] = lstatc->count[i];
> > > lstatc->count[i] = 0;
> > > -
> > > - if (x) {
> > > - do {
> > > - atomic_long_add(x, &pn->lruvec_stat[i]);
> > > - } while ((pn = parent_nodeinfo(pn, nid)));
> > > - }
> > > }
> > > }
> > > +
> > > + do {
> > > + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> > > + atomic_long_add(stat[i], &pn->lruvec_stat[i]);
> > > + } while ((pn = parent_nodeinfo(pn, nid)));
> > > }
> > > +}
> > > +
> > > +static int memcg_hotplug_cpu_dead(unsigned int cpu)
> > > +{
> > > + struct memcg_stock_pcp *stock;
> > > + struct mem_cgroup *memcg;
> > > +
> > > + stock = &per_cpu(memcg_stock, cpu);
> > > + drain_stock(stock);
> > > +
> > > + for_each_mem_cgroup(memcg)
> > > + memcg_flush_lruvec_page_state(memcg, cpu);
> > >
> > > return 0;
> > > }
> > > @@ -3636,27 +3653,6 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
> > > }
> > > }
> > >
> > > -static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg)
> > > -{
> > > - int node;
> > > -
> > > - for_each_node(node) {
> > > - struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
> > > - unsigned long stat[NR_VM_NODE_STAT_ITEMS] = {0, };
> > > - struct mem_cgroup_per_node *pi;
> > > - int cpu, i;
> > > -
> > > - for_each_online_cpu(cpu)
> > > - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> > > - stat[i] += per_cpu(
> > > - pn->lruvec_stat_cpu->count[i], cpu);
> > > -
> > > - for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
> > > - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> > > - atomic_long_add(stat[i], &pi->lruvec_stat[i]);
> > > - }
> > > -}
> > > -
> > > #ifdef CONFIG_MEMCG_KMEM
> > > static int memcg_online_kmem(struct mem_cgroup *memcg)
> > > {
> > > @@ -5197,7 +5193,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
> > > * Flush percpu lruvec stats to guarantee the value
> > > * correctness on parent's and all ancestor levels.
> > > */
> > > - memcg_flush_lruvec_page_state(memcg);
> > > + memcg_flush_lruvec_page_state(memcg, -1);
> >
> > I wonder if adding "cpu" or "percpu" into the function name will make clearer what -1 means?
> > E.g. memcg_flush_(per)cpu_lruvec_stats(memcg, -1).
>
> Yes, it's a bit ominous. I changed it to
>
> memcg_flush_lruvec_page_state_cpu(memcg, -1);

Works for me!
But honestly I don't understand what does "page_state" mean in this context.

Thanks!

2021-02-05 19:41:00

by Johannes Weiner

[permalink] [raw]

Subject: Re: [PATCH 7/7] mm: memcontrol: consolidate lruvec stat flushing

On Fri, Feb 05, 2021 at 04:17:27PM +0100, Michal Hocko wrote:
> On Tue 02-02-21 13:47:46, Johannes Weiner wrote:
> > There are two functions to flush the per-cpu data of an lruvec into
> > the rest of the cgroup tree: when the cgroup is being freed, and when
> > a CPU disappears during hotplug. The difference is whether all CPUs or
> > just one is being collected, but the rest of the flushing code is the
> > same. Merge them into one function and share the common code.
>
> IIUC the only reason for the cpu == -1 special case is to avoid
> zeroying, right? Is this optimization worth the special case? The code
> would be slightly easier to follow without this.

Hm, it was less about the optimization and more about which CPU(s)
need(s) to be handled. But it's pretty silly the way it's written,
indeed. I'll move the for_each_online_cpu() to the caller and drop the
cpu==-1 special casing, it makes things much simpler and more obvious.

> > Signed-off-by: Johannes Weiner <[email protected]>
>
> Anyway the above is not really a fundamental objection. It is more important
> to unify the flushing.
>
> Acked-by: Michal Hocko <[email protected]>

Thanks. v2 is different, so I'll wait with taking the ack.

2021-02-05 23:53:16

by Michal Hocko

[permalink] [raw]

Subject: Re: [PATCH 7/7] mm: memcontrol: consolidate lruvec stat flushing

On Tue 02-02-21 13:47:46, Johannes Weiner wrote:
> There are two functions to flush the per-cpu data of an lruvec into
> the rest of the cgroup tree: when the cgroup is being freed, and when
> a CPU disappears during hotplug. The difference is whether all CPUs or
> just one is being collected, but the rest of the flushing code is the
> same. Merge them into one function and share the common code.

IIUC the only reason for the cpu == -1 special case is to avoid
zeroying, right? Is this optimization worth the special case? The code
would be slightly easier to follow without this.

> Signed-off-by: Johannes Weiner <[email protected]>

Anyway the above is not really a fundamental objection. It is more important
to unify the flushing.

Acked-by: Michal Hocko <[email protected]>

> ---
> mm/memcontrol.c | 88 +++++++++++++++++++++++--------------------------
> 1 file changed, 42 insertions(+), 46 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b205b2413186..88e8afc49a46 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2410,39 +2410,56 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
> mutex_unlock(&percpu_charge_mutex);
> }
>
> -static int memcg_hotplug_cpu_dead(unsigned int cpu)
> +static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg, int cpu)
> {
> - struct memcg_stock_pcp *stock;
> - struct mem_cgroup *memcg;
> -
> - stock = &per_cpu(memcg_stock, cpu);
> - drain_stock(stock);
> + int nid;
>
> - for_each_mem_cgroup(memcg) {
> + for_each_node(nid) {
> + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
> + unsigned long stat[NR_VM_NODE_STAT_ITEMS] = { 0, };
> + struct batched_lruvec_stat *lstatc;
> int i;
>
> - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
> - int nid;
> -
> - for_each_node(nid) {
> - struct batched_lruvec_stat *lstatc;
> - struct mem_cgroup_per_node *pn;
> - long x;
> -
> - pn = memcg->nodeinfo[nid];
> + if (cpu == -1) {
> + int cpui;
> + /*
> + * The memcg is about to be freed, collect all
> + * CPUs, no need to zero anything out.
> + */
> + for_each_online_cpu(cpui) {
> + lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpui);
> + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> + stat[i] += lstatc->count[i];
> + }
> + } else {
> + /*
> + * The CPU has gone away, collect and zero out
> + * its stats, it may come back later.
> + */
> + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
> lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpu);
> -
> - x = lstatc->count[i];
> + stat[i] = lstatc->count[i];
> lstatc->count[i] = 0;
> -
> - if (x) {
> - do {
> - atomic_long_add(x, &pn->lruvec_stat[i]);
> - } while ((pn = parent_nodeinfo(pn, nid)));
> - }
> }
> }
> +
> + do {
> + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> + atomic_long_add(stat[i], &pn->lruvec_stat[i]);
> + } while ((pn = parent_nodeinfo(pn, nid)));
> }
> +}
> +
> +static int memcg_hotplug_cpu_dead(unsigned int cpu)
> +{
> + struct memcg_stock_pcp *stock;
> + struct mem_cgroup *memcg;
> +
> + stock = &per_cpu(memcg_stock, cpu);
> + drain_stock(stock);
> +
> + for_each_mem_cgroup(memcg)
> + memcg_flush_lruvec_page_state(memcg, cpu);
>
> return 0;
> }
> @@ -3636,27 +3653,6 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
> }
> }
>
> -static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg)
> -{
> - int node;
> -
> - for_each_node(node) {
> - struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
> - unsigned long stat[NR_VM_NODE_STAT_ITEMS] = {0, };
> - struct mem_cgroup_per_node *pi;
> - int cpu, i;
> -
> - for_each_online_cpu(cpu)
> - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> - stat[i] += per_cpu(
> - pn->lruvec_stat_cpu->count[i], cpu);
> -
> - for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
> - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
> - atomic_long_add(stat[i], &pi->lruvec_stat[i]);
> - }
> -}
> -
> #ifdef CONFIG_MEMCG_KMEM
> static int memcg_online_kmem(struct mem_cgroup *memcg)
> {
> @@ -5197,7 +5193,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
> * Flush percpu lruvec stats to guarantee the value
> * correctness on parent's and all ancestor levels.
> */
> - memcg_flush_lruvec_page_state(memcg);
> + memcg_flush_lruvec_page_state(memcg, -1);
> __mem_cgroup_free(memcg);
> }
>
> --
> 2.30.0
>

--
Michal Hocko
SUSE Labs