The commit 6be5e186fd65 ("mm: vmscan: restore incremental cgroup
iteration") added a retry reclaim heuristic to iterate all the cgroups
before returning an unsuccessful reclaim but missed to reset the
sc->priority. Let's fix it.
Reported-and-tested-by: [email protected]
Fixes: 6be5e186fd65 ("mm: vmscan: restore incremental cgroup iteration")
Signed-off-by: Shakeel Butt <[email protected]>
---
mm/vmscan.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b9170f767353..731b009a142b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6317,6 +6317,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
* meaningful forward progress. Avoid false OOMs in this case.
*/
if (!sc->memcg_full_walk) {
+ sc->priority = initial_priority;
sc->memcg_full_walk = 1;
goto retry;
}
--
2.43.0
On Wed, May 29, 2024 at 08:49:11AM -0700, Shakeel Butt wrote:
> The commit 6be5e186fd65 ("mm: vmscan: restore incremental cgroup
> iteration") added a retry reclaim heuristic to iterate all the cgroups
> before returning an unsuccessful reclaim but missed to reset the
> sc->priority. Let's fix it.
>
> Reported-and-tested-by: [email protected]
> Fixes: 6be5e186fd65 ("mm: vmscan: restore incremental cgroup iteration")
> Signed-off-by: Shakeel Butt <[email protected]>
Reviewed-by: Roman Gushchin <[email protected]>
Good catch!
> ---
> mm/vmscan.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index b9170f767353..731b009a142b 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -6317,6 +6317,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
> * meaningful forward progress. Avoid false OOMs in this case.
> */
> if (!sc->memcg_full_walk) {
> + sc->priority = initial_priority;
> sc->memcg_full_walk = 1;
> goto retry;
> }
> --
> 2.43.0
>
I wonder if it makes sense to refactor things to be more robust like this:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d3ae6bf1b65c7..f150e79f736da 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6246,7 +6246,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
if (!cgroup_reclaim(sc))
__count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
- do {
+ for (sc->priority = initial_priority; sc->priority >= 0; sc->priority--) {
if (!sc->proactive)
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
sc->priority);
@@ -6265,7 +6265,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
*/
if (sc->priority < DEF_PRIORITY - 2)
sc->may_writepage = 1;
- } while (--sc->priority >= 0);
+ }
last_pgdat = NULL;
for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx,
@@ -6318,7 +6318,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
* good, and retry with forcible deactivation if that fails.
*/
if (sc->skipped_deactivate) {
- sc->priority = initial_priority;
sc->force_deactivate = 1;
sc->skipped_deactivate = 0;
goto retry;
@@ -6326,7 +6325,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
/* Untapped cgroup reserves? Don't OOM, retry. */
if (sc->memcg_low_skipped) {
- sc->priority = initial_priority;
sc->force_deactivate = 0;
sc->memcg_low_reclaim = 1;
sc->memcg_low_skipped = 0;
On Wed, May 29, 2024 at 09:20:46AM GMT, Roman Gushchin wrote:
> On Wed, May 29, 2024 at 08:49:11AM -0700, Shakeel Butt wrote:
> > The commit 6be5e186fd65 ("mm: vmscan: restore incremental cgroup
> > iteration") added a retry reclaim heuristic to iterate all the cgroups
> > before returning an unsuccessful reclaim but missed to reset the
> > sc->priority. Let's fix it.
> >
> > Reported-and-tested-by: [email protected]
> > Fixes: 6be5e186fd65 ("mm: vmscan: restore incremental cgroup iteration")
> > Signed-off-by: Shakeel Butt <[email protected]>
>
> Reviewed-by: Roman Gushchin <[email protected]>
>
> Good catch!
Thanks.
>
> > ---
> > mm/vmscan.c | 1 +
> > 1 file changed, 1 insertion(+)
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index b9170f767353..731b009a142b 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -6317,6 +6317,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
> > * meaningful forward progress. Avoid false OOMs in this case.
> > */
> > if (!sc->memcg_full_walk) {
> > + sc->priority = initial_priority;
> > sc->memcg_full_walk = 1;
> > goto retry;
> > }
> > --
> > 2.43.0
> >
>
> I wonder if it makes sense to refactor things to be more robust like this:
Oh I like this as it will make sc->priority values explicit. I hope we
don't have any hidden dependency on do-while semantics for this code
path.
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index d3ae6bf1b65c7..f150e79f736da 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -6246,7 +6246,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
> if (!cgroup_reclaim(sc))
> __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
>
> - do {
> + for (sc->priority = initial_priority; sc->priority >= 0; sc->priority--) {
> if (!sc->proactive)
> vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
> sc->priority);
> @@ -6265,7 +6265,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
> */
> if (sc->priority < DEF_PRIORITY - 2)
> sc->may_writepage = 1;
> - } while (--sc->priority >= 0);
> + }
>
> last_pgdat = NULL;
> for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx,
> @@ -6318,7 +6318,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
> * good, and retry with forcible deactivation if that fails.
> */
> if (sc->skipped_deactivate) {
> - sc->priority = initial_priority;
> sc->force_deactivate = 1;
> sc->skipped_deactivate = 0;
> goto retry;
> @@ -6326,7 +6325,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
>
> /* Untapped cgroup reserves? Don't OOM, retry. */
> if (sc->memcg_low_skipped) {
> - sc->priority = initial_priority;
> sc->force_deactivate = 0;
> sc->memcg_low_reclaim = 1;
> sc->memcg_low_skipped = 0;