2023-11-04 13:04:49

by Liu Shixin

[permalink] [raw]
Subject: [PATCH v7] mm: vmscan: try to reclaim swapcache pages if no swap space

When spaces of swap devices are exhausted, only file pages can be
reclaimed. But there are still some swapcache pages in anon lru list.
This can lead to a premature out-of-memory.

The problem is found with such step:

Firstly, set a 9MB disk swap space, then create a cgroup with 10MB
memory limit, then runs an program to allocates about 15MB memory.

The problem occurs occasionally, which may need about 100 times [1].

Fix it by checking number of swapcache pages in can_reclaim_anon_pages().
If the number is not zero, return true and set swapcache_only to 1.
When scan anon lru list in swapcache_only mode, non-swapcache pages will
be skipped to isolate in order to accelerate reclaim efficiency.

However, in swapcache_only mode, the scan count still increased when scan
non-swapcache pages because there are large number of non-swapcache pages
and rare swapcache pages in swapcache_only mode, and if the non-swapcache
is skipped and do not count, the scan of pages in isolate_lru_folios() can
eventually lead to hung task, just as Sachin reported [2].

By the way, since there are enough times of memory reclaim before OOM, it
is not need to isolate too much swapcache pages in one times.

[1]. https://lore.kernel.org/lkml/CAJD7tkZAfgncV+KbKr36=eDzMnT=9dZOT0dpMWcurHLr6Do+GA@mail.gmail.com/
[2]. https://lore.kernel.org/linux-mm/CAJD7tkafz_2XAuqE8tGLPEcpLngewhUo=5US14PAtSM9tLBUQg@mail.gmail.com/

Signed-off-by: Liu Shixin <[email protected]>
Tested-by: Yosry Ahmed <[email protected]>
Reviewed-by: "Huang, Ying" <[email protected]>
Reviewed-by: Yosry Ahmed <[email protected]>
---
v6->v7: Reset swapcache_only to zero after there are swap spaces.
v5->v6: Fix NULL pointing derefence and hung task problem reported by Sachin.

include/linux/swap.h | 6 ++++++
mm/memcontrol.c | 8 ++++++++
mm/vmscan.c | 36 ++++++++++++++++++++++++++++++++++--
3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index f6dd6575b905..3ba146ae7cf5 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -659,6 +659,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p
}

extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
+extern long mem_cgroup_get_nr_swapcache_pages(struct mem_cgroup *memcg);
extern bool mem_cgroup_swap_full(struct folio *folio);
#else
static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
@@ -681,6 +682,11 @@ static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
return get_nr_swap_pages();
}

+static inline long mem_cgroup_get_nr_swapcache_pages(struct mem_cgroup *memcg)
+{
+ return total_swapcache_pages();
+}
+
static inline bool mem_cgroup_swap_full(struct folio *folio)
{
return vm_swap_full();
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5b009b233ab8..29e34c06ca83 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7584,6 +7584,14 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
return nr_swap_pages;
}

+long mem_cgroup_get_nr_swapcache_pages(struct mem_cgroup *memcg)
+{
+ if (mem_cgroup_disabled())
+ return total_swapcache_pages();
+
+ return memcg_page_state(memcg, NR_SWAPCACHE);
+}
+
bool mem_cgroup_swap_full(struct folio *folio)
{
struct mem_cgroup *memcg;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6f13394b112e..a5e04291662f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -137,6 +137,9 @@ struct scan_control {
/* Always discard instead of demoting to lower tier memory */
unsigned int no_demotion:1;

+ /* Swap space is exhausted, only reclaim swapcache for anon LRU */
+ unsigned int swapcache_only:1;
+
/* Allocation order */
s8 order;

@@ -602,6 +605,12 @@ static bool can_demote(int nid, struct scan_control *sc)
return true;
}

+static void set_swapcache_mode(struct scan_control *sc, bool swapcache_only)
+{
+ if (sc)
+ sc->swapcache_only = swapcache_only;
+}
+
static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
int nid,
struct scan_control *sc)
@@ -611,12 +620,26 @@ static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
* For non-memcg reclaim, is there
* space in any swap device?
*/
- if (get_nr_swap_pages() > 0)
+ if (get_nr_swap_pages() > 0) {
+ set_swapcache_mode(sc, false);
return true;
+ }
+ /* Is there any swapcache pages to reclaim? */
+ if (total_swapcache_pages() > 0) {
+ set_swapcache_mode(sc, true);
+ return true;
+ }
} else {
/* Is the memcg below its swap limit? */
- if (mem_cgroup_get_nr_swap_pages(memcg) > 0)
+ if (mem_cgroup_get_nr_swap_pages(memcg) > 0) {
+ set_swapcache_mode(sc, false);
return true;
+ }
+ /* Is there any swapcache pages in memcg to reclaim? */
+ if (mem_cgroup_get_nr_swapcache_pages(memcg) > 0) {
+ set_swapcache_mode(sc, true);
+ return true;
+ }
}

/*
@@ -2342,6 +2365,15 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
*/
scan += nr_pages;

+ /*
+ * Count non-swapcache too because the swapcache pages may
+ * be rare and it takes too much times here if not count
+ * the non-swapcache pages.
+ */
+ if (unlikely(sc->swapcache_only && !is_file_lru(lru) &&
+ !folio_test_swapcache(folio)))
+ goto move;
+
if (!folio_test_lru(folio))
goto move;
if (!sc->may_unmap && folio_mapped(folio))
--
2.25.1


2023-11-06 02:21:14

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH v7] mm: vmscan: try to reclaim swapcache pages if no swap space

Liu Shixin <[email protected]> writes:

> When spaces of swap devices are exhausted, only file pages can be
> reclaimed. But there are still some swapcache pages in anon lru list.
> This can lead to a premature out-of-memory.
>
> The problem is found with such step:
>
> Firstly, set a 9MB disk swap space, then create a cgroup with 10MB
> memory limit, then runs an program to allocates about 15MB memory.
>
> The problem occurs occasionally, which may need about 100 times [1].
>
> Fix it by checking number of swapcache pages in can_reclaim_anon_pages().
> If the number is not zero, return true and set swapcache_only to 1.
> When scan anon lru list in swapcache_only mode, non-swapcache pages will
> be skipped to isolate in order to accelerate reclaim efficiency.
>
> However, in swapcache_only mode, the scan count still increased when scan
> non-swapcache pages because there are large number of non-swapcache pages
> and rare swapcache pages in swapcache_only mode, and if the non-swapcache
> is skipped and do not count, the scan of pages in isolate_lru_folios() can
> eventually lead to hung task, just as Sachin reported [2].
>
> By the way, since there are enough times of memory reclaim before OOM, it
> is not need to isolate too much swapcache pages in one times.
>
> [1]. https://lore.kernel.org/lkml/CAJD7tkZAfgncV+KbKr36=eDzMnT=9dZOT0dpMWcurHLr6Do+GA@mail.gmail.com/
> [2]. https://lore.kernel.org/linux-mm/CAJD7tkafz_2XAuqE8tGLPEcpLngewhUo=5US14PAtSM9tLBUQg@mail.gmail.com/
>
> Signed-off-by: Liu Shixin <[email protected]>
> Tested-by: Yosry Ahmed <[email protected]>
> Reviewed-by: "Huang, Ying" <[email protected]>
> Reviewed-by: Yosry Ahmed <[email protected]>
> ---
> v6->v7: Reset swapcache_only to zero after there are swap spaces.
> v5->v6: Fix NULL pointing derefence and hung task problem reported by Sachin.
>
> include/linux/swap.h | 6 ++++++
> mm/memcontrol.c | 8 ++++++++
> mm/vmscan.c | 36 ++++++++++++++++++++++++++++++++++--
> 3 files changed, 48 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index f6dd6575b905..3ba146ae7cf5 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -659,6 +659,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p
> }
>
> extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
> +extern long mem_cgroup_get_nr_swapcache_pages(struct mem_cgroup *memcg);
> extern bool mem_cgroup_swap_full(struct folio *folio);
> #else
> static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
> @@ -681,6 +682,11 @@ static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
> return get_nr_swap_pages();
> }
>
> +static inline long mem_cgroup_get_nr_swapcache_pages(struct mem_cgroup *memcg)
> +{
> + return total_swapcache_pages();
> +}
> +
> static inline bool mem_cgroup_swap_full(struct folio *folio)
> {
> return vm_swap_full();
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 5b009b233ab8..29e34c06ca83 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -7584,6 +7584,14 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
> return nr_swap_pages;
> }
>
> +long mem_cgroup_get_nr_swapcache_pages(struct mem_cgroup *memcg)
> +{
> + if (mem_cgroup_disabled())
> + return total_swapcache_pages();
> +
> + return memcg_page_state(memcg, NR_SWAPCACHE);
> +}
> +
> bool mem_cgroup_swap_full(struct folio *folio)
> {
> struct mem_cgroup *memcg;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 6f13394b112e..a5e04291662f 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -137,6 +137,9 @@ struct scan_control {
> /* Always discard instead of demoting to lower tier memory */
> unsigned int no_demotion:1;
>
> + /* Swap space is exhausted, only reclaim swapcache for anon LRU */
> + unsigned int swapcache_only:1;
> +
> /* Allocation order */
> s8 order;
>
> @@ -602,6 +605,12 @@ static bool can_demote(int nid, struct scan_control *sc)
> return true;
> }
>
> +static void set_swapcache_mode(struct scan_control *sc, bool swapcache_only)
> +{
> + if (sc)
> + sc->swapcache_only = swapcache_only;
> +}
> +

I think that it's unnecessary to introduce a new function. I understand
that you want to reduce the code duplication. We can add

sc->swapcache_only = false;

at the beginning of can_reclaim_anon_pages() to reduce code duplication.
That can cover even more cases IIUC.

> static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
> int nid,
> struct scan_control *sc)
> @@ -611,12 +620,26 @@ static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
> * For non-memcg reclaim, is there
> * space in any swap device?
> */
> - if (get_nr_swap_pages() > 0)
> + if (get_nr_swap_pages() > 0) {
> + set_swapcache_mode(sc, false);
> return true;
> + }
> + /* Is there any swapcache pages to reclaim? */
> + if (total_swapcache_pages() > 0) {
> + set_swapcache_mode(sc, true);
> + return true;
> + }
> } else {
> /* Is the memcg below its swap limit? */
> - if (mem_cgroup_get_nr_swap_pages(memcg) > 0)
> + if (mem_cgroup_get_nr_swap_pages(memcg) > 0) {
> + set_swapcache_mode(sc, false);
> return true;
> + }
> + /* Is there any swapcache pages in memcg to reclaim? */
> + if (mem_cgroup_get_nr_swapcache_pages(memcg) > 0) {
> + set_swapcache_mode(sc, true);
> + return true;
> + }
> }

If can_demote() returns true, we shouldn't scan swapcache only.

--
Best Regards,
Huang, Ying

> /*
> @@ -2342,6 +2365,15 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
> */
> scan += nr_pages;
>
> + /*
> + * Count non-swapcache too because the swapcache pages may
> + * be rare and it takes too much times here if not count
> + * the non-swapcache pages.
> + */
> + if (unlikely(sc->swapcache_only && !is_file_lru(lru) &&
> + !folio_test_swapcache(folio)))
> + goto move;
> +
> if (!folio_test_lru(folio))
> goto move;
> if (!sc->may_unmap && folio_mapped(folio))

2023-11-06 06:46:22

by Liu Shixin

[permalink] [raw]
Subject: Re: [PATCH v7] mm: vmscan: try to reclaim swapcache pages if no swap space



On 2023/11/6 10:18, Huang, Ying wrote:
> Liu Shixin <[email protected]> writes:
>
>> When spaces of swap devices are exhausted, only file pages can be
>> reclaimed. But there are still some swapcache pages in anon lru list.
>> This can lead to a premature out-of-memory.
>>
>> The problem is found with such step:
>>
>> Firstly, set a 9MB disk swap space, then create a cgroup with 10MB
>> memory limit, then runs an program to allocates about 15MB memory.
>>
>> The problem occurs occasionally, which may need about 100 times [1].
>>
>> Fix it by checking number of swapcache pages in can_reclaim_anon_pages().
>> If the number is not zero, return true and set swapcache_only to 1.
>> When scan anon lru list in swapcache_only mode, non-swapcache pages will
>> be skipped to isolate in order to accelerate reclaim efficiency.
>>
>> However, in swapcache_only mode, the scan count still increased when scan
>> non-swapcache pages because there are large number of non-swapcache pages
>> and rare swapcache pages in swapcache_only mode, and if the non-swapcache
>> is skipped and do not count, the scan of pages in isolate_lru_folios() can
>> eventually lead to hung task, just as Sachin reported [2].
>>
>> By the way, since there are enough times of memory reclaim before OOM, it
>> is not need to isolate too much swapcache pages in one times.
>>
>> [1]. https://lore.kernel.org/lkml/CAJD7tkZAfgncV+KbKr36=eDzMnT=9dZOT0dpMWcurHLr6Do+GA@mail.gmail.com/
>> [2]. https://lore.kernel.org/linux-mm/CAJD7tkafz_2XAuqE8tGLPEcpLngewhUo=5US14PAtSM9tLBUQg@mail.gmail.com/
>>
>> Signed-off-by: Liu Shixin <[email protected]>
>> Tested-by: Yosry Ahmed <[email protected]>
>> Reviewed-by: "Huang, Ying" <[email protected]>
>> Reviewed-by: Yosry Ahmed <[email protected]>
>> ---
>> v6->v7: Reset swapcache_only to zero after there are swap spaces.
>> v5->v6: Fix NULL pointing derefence and hung task problem reported by Sachin.
>>
>> include/linux/swap.h | 6 ++++++
>> mm/memcontrol.c | 8 ++++++++
>> mm/vmscan.c | 36 ++++++++++++++++++++++++++++++++++--
>> 3 files changed, 48 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>> index f6dd6575b905..3ba146ae7cf5 100644
>> --- a/include/linux/swap.h
>> +++ b/include/linux/swap.h
>> @@ -659,6 +659,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p
>> }
>>
>> extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
>> +extern long mem_cgroup_get_nr_swapcache_pages(struct mem_cgroup *memcg);
>> extern bool mem_cgroup_swap_full(struct folio *folio);
>> #else
>> static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
>> @@ -681,6 +682,11 @@ static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
>> return get_nr_swap_pages();
>> }
>>
>> +static inline long mem_cgroup_get_nr_swapcache_pages(struct mem_cgroup *memcg)
>> +{
>> + return total_swapcache_pages();
>> +}
>> +
>> static inline bool mem_cgroup_swap_full(struct folio *folio)
>> {
>> return vm_swap_full();
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 5b009b233ab8..29e34c06ca83 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -7584,6 +7584,14 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
>> return nr_swap_pages;
>> }
>>
>> +long mem_cgroup_get_nr_swapcache_pages(struct mem_cgroup *memcg)
>> +{
>> + if (mem_cgroup_disabled())
>> + return total_swapcache_pages();
>> +
>> + return memcg_page_state(memcg, NR_SWAPCACHE);
>> +}
>> +
>> bool mem_cgroup_swap_full(struct folio *folio)
>> {
>> struct mem_cgroup *memcg;
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index 6f13394b112e..a5e04291662f 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -137,6 +137,9 @@ struct scan_control {
>> /* Always discard instead of demoting to lower tier memory */
>> unsigned int no_demotion:1;
>>
>> + /* Swap space is exhausted, only reclaim swapcache for anon LRU */
>> + unsigned int swapcache_only:1;
>> +
>> /* Allocation order */
>> s8 order;
>>
>> @@ -602,6 +605,12 @@ static bool can_demote(int nid, struct scan_control *sc)
>> return true;
>> }
>>
>> +static void set_swapcache_mode(struct scan_control *sc, bool swapcache_only)
>> +{
>> + if (sc)
>> + sc->swapcache_only = swapcache_only;
>> +}
>> +
> I think that it's unnecessary to introduce a new function. I understand
> that you want to reduce the code duplication. We can add
>
> sc->swapcache_only = false;
>
> at the beginning of can_reclaim_anon_pages() to reduce code duplication.
> That can cover even more cases IIUC.
OK, it?s more appropriate, I will resend v8, thank you.


>> static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
>> int nid,
>> struct scan_control *sc)
>> @@ -611,12 +620,26 @@ static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
>> * For non-memcg reclaim, is there
>> * space in any swap device?
>> */
>> - if (get_nr_swap_pages() > 0)
>> + if (get_nr_swap_pages() > 0) {
>> + set_swapcache_mode(sc, false);
>> return true;
>> + }
>> + /* Is there any swapcache pages to reclaim? */
>> + if (total_swapcache_pages() > 0) {
>> + set_swapcache_mode(sc, true);
>> + return true;
>> + }
>> } else {
>> /* Is the memcg below its swap limit? */
>> - if (mem_cgroup_get_nr_swap_pages(memcg) > 0)
>> + if (mem_cgroup_get_nr_swap_pages(memcg) > 0) {
>> + set_swapcache_mode(sc, false);
>> return true;
>> + }
>> + /* Is there any swapcache pages in memcg to reclaim? */
>> + if (mem_cgroup_get_nr_swapcache_pages(memcg) > 0) {
>> + set_swapcache_mode(sc, true);
>> + return true;
>> + }
>> }
> If can_demote() returns true, we shouldn't scan swapcache only.
>
> --
> Best Regards,
> Huang, Ying
>
>> /*
>> @@ -2342,6 +2365,15 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>> */
>> scan += nr_pages;
>>
>> + /*
>> + * Count non-swapcache too because the swapcache pages may
>> + * be rare and it takes too much times here if not count
>> + * the non-swapcache pages.
>> + */
>> + if (unlikely(sc->swapcache_only && !is_file_lru(lru) &&
>> + !folio_test_swapcache(folio)))
>> + goto move;
>> +
>> if (!folio_test_lru(folio))
>> goto move;
>> if (!sc->may_unmap && folio_mapped(folio))
> .
>