2023-11-27 19:37:27

by Nhat Pham

[permalink] [raw]
Subject: [PATCH v6 2/6] memcontrol: allows mem_cgroup_iter() to check for onlineness

The new zswap writeback scheme requires an online-only memcg hierarchy
traversal. Add a new parameter to mem_cgroup_iter() to check for
onlineness before returning.

Signed-off-by: Nhat Pham <[email protected]>
---
include/linux/memcontrol.h | 4 ++--
mm/memcontrol.c | 17 ++++++++++-------
mm/shrinker.c | 4 ++--
mm/vmscan.c | 26 +++++++++++++-------------
4 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7bdcf3020d7a..86adce081a08 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -832,7 +832,7 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)

struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
struct mem_cgroup *,
- struct mem_cgroup_reclaim_cookie *);
+ struct mem_cgroup_reclaim_cookie *, bool online);
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
int (*)(struct task_struct *, void *), void *arg);
@@ -1381,7 +1381,7 @@ static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
- struct mem_cgroup_reclaim_cookie *reclaim)
+ struct mem_cgroup_reclaim_cookie *reclaim, bool online)
{
return NULL;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 564aa8f25b71..a1f051adaa15 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -221,14 +221,14 @@ enum res_type {
* be used for reference counting.
*/
#define for_each_mem_cgroup_tree(iter, root) \
- for (iter = mem_cgroup_iter(root, NULL, NULL); \
+ for (iter = mem_cgroup_iter(root, NULL, NULL, false); \
iter != NULL; \
- iter = mem_cgroup_iter(root, iter, NULL))
+ iter = mem_cgroup_iter(root, iter, NULL, false))

#define for_each_mem_cgroup(iter) \
- for (iter = mem_cgroup_iter(NULL, NULL, NULL); \
+ for (iter = mem_cgroup_iter(NULL, NULL, NULL, false); \
iter != NULL; \
- iter = mem_cgroup_iter(NULL, iter, NULL))
+ iter = mem_cgroup_iter(NULL, iter, NULL, false))

static inline bool task_is_dying(void)
{
@@ -1115,6 +1115,7 @@ struct mem_cgroup *get_mem_cgroup_from_current(void)
* @root: hierarchy root
* @prev: previously returned memcg, NULL on first invocation
* @reclaim: cookie for shared reclaim walks, NULL for full walks
+ * @online: skip offline memcgs
*
* Returns references to children of the hierarchy below @root, or
* @root itself, or %NULL after a full round-trip.
@@ -1129,7 +1130,8 @@ struct mem_cgroup *get_mem_cgroup_from_current(void)
*/
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
- struct mem_cgroup_reclaim_cookie *reclaim)
+ struct mem_cgroup_reclaim_cookie *reclaim,
+ bool online)
{
struct mem_cgroup_reclaim_iter *iter;
struct cgroup_subsys_state *css = NULL;
@@ -1199,7 +1201,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
* is provided by the caller, so we know it's alive
* and kicking, and don't take an extra reference.
*/
- if (css == &root->css || css_tryget(css)) {
+ if (css == &root->css || (!online && css_tryget(css)) ||
+ css_tryget_online(css)) {
memcg = mem_cgroup_from_css(css);
break;
}
@@ -1812,7 +1815,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
excess = soft_limit_excess(root_memcg);

while (1) {
- victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
+ victim = mem_cgroup_iter(root_memcg, victim, &reclaim, false);
if (!victim) {
loop++;
if (loop >= 2) {
diff --git a/mm/shrinker.c b/mm/shrinker.c
index dd91eab43ed3..54f5d3aa4f27 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -160,7 +160,7 @@ static int expand_shrinker_info(int new_id)
new_size = shrinker_unit_size(new_nr_max);
old_size = shrinker_unit_size(shrinker_nr_max);

- memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ memcg = mem_cgroup_iter(NULL, NULL, NULL, false);
do {
ret = expand_one_shrinker_info(memcg, new_size, old_size,
new_nr_max);
@@ -168,7 +168,7 @@ static int expand_shrinker_info(int new_id)
mem_cgroup_iter_break(NULL, memcg);
goto out;
}
- } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL, false)) != NULL);
out:
if (!ret)
shrinker_nr_max = new_nr_max;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d8c3338fee0f..9a65ee3a1bb7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -397,10 +397,10 @@ static unsigned long drop_slab_node(int nid)
unsigned long freed = 0;
struct mem_cgroup *memcg = NULL;

- memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ memcg = mem_cgroup_iter(NULL, NULL, NULL, false);
do {
freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
- } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL, false)) != NULL);

return freed;
}
@@ -3935,7 +3935,7 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
return;

- memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ memcg = mem_cgroup_iter(NULL, NULL, NULL, false);
do {
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);

@@ -3945,7 +3945,7 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
}

cond_resched();
- } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL, false)));

/*
* The main goal is to OOM kill if every generation from all memcgs is
@@ -5037,7 +5037,7 @@ static void lru_gen_change_state(bool enabled)
else
static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);

- memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ memcg = mem_cgroup_iter(NULL, NULL, NULL, false);
do {
int nid;

@@ -5061,7 +5061,7 @@ static void lru_gen_change_state(bool enabled)
}

cond_resched();
- } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL, false)));
unlock:
mutex_unlock(&state_mutex);
put_online_mems();
@@ -5164,7 +5164,7 @@ static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
if (!m->private)
return ERR_PTR(-ENOMEM);

- memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ memcg = mem_cgroup_iter(NULL, NULL, NULL, false);
do {
int nid;

@@ -5172,7 +5172,7 @@ static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
if (!nr_to_skip--)
return get_lruvec(memcg, nid);
}
- } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL, false)));

return NULL;
}
@@ -5195,7 +5195,7 @@ static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)

nid = next_memory_node(nid);
if (nid == MAX_NUMNODES) {
- memcg = mem_cgroup_iter(NULL, memcg, NULL);
+ memcg = mem_cgroup_iter(NULL, memcg, NULL, false);
if (!memcg)
return NULL;

@@ -5798,7 +5798,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
struct mem_cgroup *memcg;

- memcg = mem_cgroup_iter(target_memcg, NULL, NULL);
+ memcg = mem_cgroup_iter(target_memcg, NULL, NULL, false);
do {
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
unsigned long reclaimed;
@@ -5855,7 +5855,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
sc->nr_scanned - scanned,
sc->nr_reclaimed - reclaimed);

- } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
+ } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL, false)));
}

static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
@@ -6522,12 +6522,12 @@ static void kswapd_age_node(struct pglist_data *pgdat, struct scan_control *sc)
if (!inactive_is_low(lruvec, LRU_INACTIVE_ANON))
return;

- memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ memcg = mem_cgroup_iter(NULL, NULL, NULL, false);
do {
lruvec = mem_cgroup_lruvec(memcg, pgdat);
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
- memcg = mem_cgroup_iter(NULL, memcg, NULL);
+ memcg = mem_cgroup_iter(NULL, memcg, NULL, false);
} while (memcg);
}

--
2.34.1


2023-11-27 21:43:13

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v6 2/6] memcontrol: allows mem_cgroup_iter() to check for onlineness

On Mon, 27 Nov 2023 11:36:59 -0800 Nhat Pham <[email protected]> wrote:

> The new zswap writeback scheme requires an online-only memcg hierarchy
> traversal. Add a new parameter to mem_cgroup_iter() to check for
> onlineness before returning.

I get a few build errors, perhaps because of patch timing issues...

mm/shrinker_debug.c: In function 'shrinker_debugfs_count_show':
mm/shrinker_debug.c:64:17: error: too few arguments to function 'mem_cgroup_iter'
64 | memcg = mem_cgroup_iter(NULL, NULL, NULL);
| ^~~~~~~~~~~~~~~
In file included from mm/shrinker_debug.c:7:
./include/linux/memcontrol.h:833:20: note: declared here
833 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
| ^~~~~~~~~~~~~~~
mm/shrinker_debug.c:89:27: error: too few arguments to function 'mem_cgroup_iter'
89 | } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
| ^~~~~~~~~~~~~~~
./include/linux/memcontrol.h:833:20: note: declared here
833 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
| ^~~~~~~~~~~~~~~
mm/damon/sysfs-schemes.c: In function 'damon_sysfs_memcg_path_to_id':
mm/damon/sysfs-schemes.c:1594:22: error: too few arguments to function 'mem_cgroup_iter'
1594 | for (memcg = mem_cgroup_iter(NULL, NULL, NULL); memcg;
| ^~~~~~~~~~~~~~~
In file included from ./include/linux/damon.h:11,
from mm/damon/sysfs-common.h:8,
from mm/damon/sysfs-schemes.c:10:
./include/linux/memcontrol.h:833:20: note: declared here
833 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
| ^~~~~~~~~~~~~~~
mm/damon/sysfs-schemes.c:1595:33: error: too few arguments to function 'mem_cgroup_iter'
1595 | memcg = mem_cgroup_iter(NULL, memcg, NULL)) {
| ^~~~~~~~~~~~~~~
./include/linux/memcontrol.h:833:20: note: declared here
833 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
| ^~~~~~~~~~~~~~~

> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -832,7 +832,7 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
>
> struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
> struct mem_cgroup *,
> - struct mem_cgroup_reclaim_cookie *);
> + struct mem_cgroup_reclaim_cookie *, bool online);

How many callsites do we expect to utilize the new `online' argument?
Few, I suspect.

How about we fix the above and simplify the patch by adding a new
mem_cgroup_iter_online() and make mem_cgroup_iter() a one-line wrapper
which calls that and adds the online=false argument?

I also saw this, didn't investigate.

drivers/android/binder_alloc.c: In function 'binder_update_page_range':
drivers/android/binder_alloc.c:237:34: error: too few arguments to function 'list_lru_del'
237 | on_lru = list_lru_del(&binder_alloc_lru, &page->lru);

2023-11-27 22:34:59

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH v6 2/6] memcontrol: allows mem_cgroup_iter() to check for onlineness

On Mon, Nov 27, 2023 at 1:43 PM Andrew Morton <[email protected]> wrote:
>
> On Mon, 27 Nov 2023 11:36:59 -0800 Nhat Pham <[email protected]> wrote:
>
> > The new zswap writeback scheme requires an online-only memcg hierarchy
> > traversal. Add a new parameter to mem_cgroup_iter() to check for
> > onlineness before returning.
>
> I get a few build errors, perhaps because of patch timing issues...

Ah I thought I got all of them. Must have somehow missed it.

>
> mm/shrinker_debug.c: In function 'shrinker_debugfs_count_show':
> mm/shrinker_debug.c:64:17: error: too few arguments to function 'mem_cgroup_iter'
> 64 | memcg = mem_cgroup_iter(NULL, NULL, NULL);
> | ^~~~~~~~~~~~~~~
> In file included from mm/shrinker_debug.c:7:
> ./include/linux/memcontrol.h:833:20: note: declared here
> 833 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
> | ^~~~~~~~~~~~~~~
> mm/shrinker_debug.c:89:27: error: too few arguments to function 'mem_cgroup_iter'
> 89 | } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
> | ^~~~~~~~~~~~~~~
> ./include/linux/memcontrol.h:833:20: note: declared here
> 833 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
> | ^~~~~~~~~~~~~~~
> mm/damon/sysfs-schemes.c: In function 'damon_sysfs_memcg_path_to_id':
> mm/damon/sysfs-schemes.c:1594:22: error: too few arguments to function 'mem_cgroup_iter'
> 1594 | for (memcg = mem_cgroup_iter(NULL, NULL, NULL); memcg;
> | ^~~~~~~~~~~~~~~
> In file included from ./include/linux/damon.h:11,
> from mm/damon/sysfs-common.h:8,
> from mm/damon/sysfs-schemes.c:10:
> ./include/linux/memcontrol.h:833:20: note: declared here
> 833 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
> | ^~~~~~~~~~~~~~~
> mm/damon/sysfs-schemes.c:1595:33: error: too few arguments to function 'mem_cgroup_iter'
> 1595 | memcg = mem_cgroup_iter(NULL, memcg, NULL)) {
> | ^~~~~~~~~~~~~~~
> ./include/linux/memcontrol.h:833:20: note: declared here
> 833 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
> | ^~~~~~~~~~~~~~~
>
> > --- a/include/linux/memcontrol.h
> > +++ b/include/linux/memcontrol.h
> > @@ -832,7 +832,7 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
> >
> > struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
> > struct mem_cgroup *,
> > - struct mem_cgroup_reclaim_cookie *);
> > + struct mem_cgroup_reclaim_cookie *, bool online);
>
> How many callsites do we expect to utilize the new `online' argument?
> Few, I suspect.
>
> How about we fix the above and simplify the patch by adding a new
> mem_cgroup_iter_online() and make mem_cgroup_iter() a one-line wrapper
> which calls that and adds the online=false argument?

But yes, this is a much smarter idea. Should have thought about it initially :)

>
> I also saw this, didn't investigate.
>
> drivers/android/binder_alloc.c: In function 'binder_update_page_range':
> drivers/android/binder_alloc.c:237:34: error: too few arguments to function 'list_lru_del'
> 237 | on_lru = list_lru_del(&binder_alloc_lru, &page->lru);

Oh yeah I missed this too - it's due to the API change introduced to
the previous bug. The old "list_lru_del" is now "list_lru_del_obj".

Let me double check everything again and send out the fixes. My apologies.

>

2023-11-28 09:38:55

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH v6 2/6] memcontrol: allows mem_cgroup_iter() to check for onlineness

On Mon 27-11-23 11:36:59, Nhat Pham wrote:
> The new zswap writeback scheme requires an online-only memcg hierarchy
> traversal. Add a new parameter to mem_cgroup_iter() to check for
> onlineness before returning.

Why is this needed?
--
Michal Hocko
SUSE Labs

2023-11-28 16:54:20

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH v6 2/6] memcontrol: allows mem_cgroup_iter() to check for onlineness

On Tue, Nov 28, 2023 at 1:38 AM Michal Hocko <[email protected]> wrote:
>
> On Mon 27-11-23 11:36:59, Nhat Pham wrote:
> > The new zswap writeback scheme requires an online-only memcg hierarchy
> > traversal. Add a new parameter to mem_cgroup_iter() to check for
> > onlineness before returning.
>
> Why is this needed?

For context, in patch 3 of this series, Domenico and I are adding
cgroup-aware LRU to zswap, so that we can perform workload-specific
zswap writeback. When the reclaim happens due to the global zswap
limit being hit, a cgroup is selected by the mem_cgroup_iter(), and
the last one selected is saved in the zswap pool (so that the
iteration can follow from there next time the limit is hit).

However, one problem with this scheme is we will be pinning the
reference to that saved memcg until the next global reclaim attempt,
which could prevent it from being killed for quite some time after it
has been offlined. Johannes, Yosry, and I discussed a couple of
approaches for a while, and decided to add a callback that would
release the reference held by the zswap pool when the memcg is
offlined, and the zswap pool will obtain the reference to the next
online memcg in the traversal (or at least one that has not had the
zswap-memcg-release-callback run on it yet).

> --
> Michal Hocko
> SUSE Labs

2023-11-28 16:59:10

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH v6 2/6] memcontrol: allows mem_cgroup_iter() to check for onlineness

On Tue, Nov 28, 2023 at 8:53 AM Nhat Pham <[email protected]> wrote:
>
> On Tue, Nov 28, 2023 at 1:38 AM Michal Hocko <[email protected]> wrote:
> >
> > On Mon 27-11-23 11:36:59, Nhat Pham wrote:
> > > The new zswap writeback scheme requires an online-only memcg hierarchy
> > > traversal. Add a new parameter to mem_cgroup_iter() to check for
> > > onlineness before returning.
> >
> > Why is this needed?
>
> For context, in patch 3 of this series, Domenico and I are adding
> cgroup-aware LRU to zswap, so that we can perform workload-specific
> zswap writeback. When the reclaim happens due to the global zswap
> limit being hit, a cgroup is selected by the mem_cgroup_iter(), and
> the last one selected is saved in the zswap pool (so that the
> iteration can follow from there next time the limit is hit).
>
> However, one problem with this scheme is we will be pinning the
> reference to that saved memcg until the next global reclaim attempt,
> which could prevent it from being killed for quite some time after it
> has been offlined. Johannes, Yosry, and I discussed a couple of
> approaches for a while, and decided to add a callback that would
> release the reference held by the zswap pool when the memcg is
> offlined, and the zswap pool will obtain the reference to the next
> online memcg in the traversal (or at least one that has not had the
> zswap-memcg-release-callback run on it yet).

I forgot to add, but as Andrew had pointed out, this is quite a niche
use case (well only zswap is using it specifically). So I have decided
to keep the original behavior for mem_cgroup_iter(), and added a
special mem_cgroup_iter_online() that does this. All the current
mem_cgroup_iter() users should not see any change. This is already in
v7 of this patch series:

https://lore.kernel.org/linux-mm/[email protected]/




>
> > --
> > Michal Hocko
> > SUSE Labs

2023-11-29 09:19:12

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH v6 2/6] memcontrol: allows mem_cgroup_iter() to check for onlineness

On Tue 28-11-23 08:53:56, Nhat Pham wrote:
> On Tue, Nov 28, 2023 at 1:38 AM Michal Hocko <[email protected]> wrote:
> >
> > On Mon 27-11-23 11:36:59, Nhat Pham wrote:
> > > The new zswap writeback scheme requires an online-only memcg hierarchy
> > > traversal. Add a new parameter to mem_cgroup_iter() to check for
> > > onlineness before returning.
> >
> > Why is this needed?
>
> For context, in patch 3 of this series, Domenico and I are adding
> cgroup-aware LRU to zswap, so that we can perform workload-specific
> zswap writeback. When the reclaim happens due to the global zswap
> limit being hit, a cgroup is selected by the mem_cgroup_iter(), and
> the last one selected is saved in the zswap pool (so that the
> iteration can follow from there next time the limit is hit).
>
> However, one problem with this scheme is we will be pinning the
> reference to that saved memcg until the next global reclaim attempt,
> which could prevent it from being killed for quite some time after it
> has been offlined. Johannes, Yosry, and I discussed a couple of
> approaches for a while, and decided to add a callback that would
> release the reference held by the zswap pool when the memcg is
> offlined, and the zswap pool will obtain the reference to the next
> online memcg in the traversal (or at least one that has not had the
> zswap-memcg-release-callback run on it yet).

This should be a part of the changelog along with an explanation why
this cannot be handled on the caller level? You have a pin on the memcg,
you can check it is online and scratch it if not, right? Why do we need
to make a rather convoluted iterator interface more complex when most
users simply do not require that?

--
Michal Hocko
SUSE Labs

2023-11-30 00:48:05

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH v6 2/6] memcontrol: allows mem_cgroup_iter() to check for onlineness

On Wed, Nov 29, 2023 at 1:18 AM Michal Hocko <[email protected]> wrote:
>
> On Tue 28-11-23 08:53:56, Nhat Pham wrote:
> > On Tue, Nov 28, 2023 at 1:38 AM Michal Hocko <[email protected]> wrote:
> > >
> > > On Mon 27-11-23 11:36:59, Nhat Pham wrote:
> > > > The new zswap writeback scheme requires an online-only memcg hierarchy
> > > > traversal. Add a new parameter to mem_cgroup_iter() to check for
> > > > onlineness before returning.
> > >
> > > Why is this needed?
> >
> > For context, in patch 3 of this series, Domenico and I are adding
> > cgroup-aware LRU to zswap, so that we can perform workload-specific
> > zswap writeback. When the reclaim happens due to the global zswap
> > limit being hit, a cgroup is selected by the mem_cgroup_iter(), and
> > the last one selected is saved in the zswap pool (so that the
> > iteration can follow from there next time the limit is hit).
> >
> > However, one problem with this scheme is we will be pinning the
> > reference to that saved memcg until the next global reclaim attempt,
> > which could prevent it from being killed for quite some time after it
> > has been offlined. Johannes, Yosry, and I discussed a couple of
> > approaches for a while, and decided to add a callback that would
> > release the reference held by the zswap pool when the memcg is
> > offlined, and the zswap pool will obtain the reference to the next
> > online memcg in the traversal (or at least one that has not had the
> > zswap-memcg-release-callback run on it yet).
>
> This should be a part of the changelog along with an explanation why
> this cannot be handled on the caller level? You have a pin on the memcg,
> you can check it is online and scratch it if not, right? Why do we need
> to make a rather convoluted iterator interface more complex when most
> users simply do not require that?

Ah that's a good point. Hmm then I'll just do an extra online check in
the zswap reclaim callsite - cleaner and less invasive.

Thanks for the suggestion!
>
> --
> Michal Hocko
> SUSE Labs