2024-06-11 02:45:29

by Yosry Ahmed

[permalink] [raw]
Subject: [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled()

In preparation of introducing a similar function, rename
is_zswap_enabled() to use zswap_* prefix like other zswap functions.

Signed-off-by: Yosry Ahmed <[email protected]>
---
include/linux/zswap.h | 4 ++--
mm/memcontrol.c | 2 +-
mm/zswap.c | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 2a85b941db975..ce5e7bfe8f1ec 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -35,7 +35,7 @@ void zswap_swapoff(int type);
void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
void zswap_lruvec_state_init(struct lruvec *lruvec);
void zswap_folio_swapin(struct folio *folio);
-bool is_zswap_enabled(void);
+bool zswap_is_enabled(void);
#else

struct zswap_lruvec_state {};
@@ -60,7 +60,7 @@ static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {}
static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {}
static inline void zswap_folio_swapin(struct folio *folio) {}

-static inline bool is_zswap_enabled(void)
+static inline bool zswap_is_enabled(void)
{
return false;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1303ed01bb5e5..a811dfff10cda 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -8469,7 +8469,7 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
{
/* if zswap is disabled, do not block pages going to the swapping device */
- return !is_zswap_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
+ return !zswap_is_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
}

static u64 zswap_current_read(struct cgroup_subsys_state *css,
diff --git a/mm/zswap.c b/mm/zswap.c
index b9b35ef86d9be..a8c8dd8cfe6f5 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -131,7 +131,7 @@ static bool zswap_shrinker_enabled = IS_ENABLED(
CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644);

-bool is_zswap_enabled(void)
+bool zswap_is_enabled(void)
{
return zswap_enabled;
}
--
2.45.2.505.gda0bf45e8d-goog



2024-06-11 02:45:36

by Yosry Ahmed

[permalink] [raw]
Subject: [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()

Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
zswap was never enabled on the system. It is implemented using static
branches for efficiency, as enabling zswap should be a rare event. This
could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
zswap is never enabled.

However, the real motivation behind this patch is two-fold:
- Incoming large folio swapin work will need to fallback to order-0
folios if zswap was ever enabled, because any part of the folio could
be in zswap, until proper handling of large folios with zswap is
added.

- A warning and recovery attempt will be added in a following change in
case the above was not done incorrectly. Zswap will fail the read if
the folio is large and it was ever enabled.

Signed-off-by: Yosry Ahmed <[email protected]>
---
mm/zswap.c | 10 ++++++++++
1 file changed, 10 insertions(+)

diff --git a/mm/zswap.c b/mm/zswap.c
index a8c8dd8cfe6f5..7fcd751e847d6 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -83,6 +83,7 @@ static bool zswap_pool_reached_full;
static int zswap_setup(void);

/* Enable/disable zswap */
+static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled);
static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
static int zswap_enabled_param_set(const char *,
const struct kernel_param *);
@@ -136,6 +137,11 @@ bool zswap_is_enabled(void)
return zswap_enabled;
}

+static bool zswap_never_enabled(void)
+{
+ return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
+}
+
/*********************************
* data structures
**********************************/
@@ -1557,6 +1563,9 @@ bool zswap_load(struct folio *folio)

VM_WARN_ON_ONCE(!folio_test_locked(folio));

+ if (zswap_never_enabled())
+ return false;
+
/*
* When reading into the swapcache, invalidate our entry. The
* swapcache can be the authoritative owner of the page and
@@ -1735,6 +1744,7 @@ static int zswap_setup(void)
zpool_get_type(pool->zpools[0]));
list_add(&pool->list, &zswap_pools);
zswap_has_pool = true;
+ static_branch_enable(&zswap_ever_enabled);
} else {
pr_err("pool creation failed\n");
zswap_enabled = false;
--
2.45.2.505.gda0bf45e8d-goog


2024-06-11 02:45:52

by Yosry Ahmed

[permalink] [raw]
Subject: [PATCH v3 3/3] mm: zswap: handle incorrect attempts to load large folios

Zswap does not support storing or loading large folios. Until proper
support is added, attempts to load large folios from zswap are a bug.

For example, if a swapin fault observes that contiguous PTEs are
pointing to contiguous swap entries and tries to swap them in as a large
folio, swap_read_folio() will pass in a large folio to zswap_load(), but
zswap_load() will only effectively load the first page in the folio. If
the first page is not in zswap, the folio will be read from disk, even
though other pages may be in zswap.

In both cases, this will lead to silent data corruption. Proper support
needs to be added before large folio swapins and zswap can work
together.

Looking at callers of swap_read_folio(), it seems like they are either
allocated from __read_swap_cache_async() or do_swap_page() in the
SWP_SYNCHRONOUS_IO path. Both of which allocate order-0 folios, so
everything is fine for now.

However, there is ongoing work to add to support large folio swapins
[1]. To make sure new development does not break zswap (or get broken by
zswap), add minimal handling of incorrect loads of large folios to
zswap.

First, move the call folio_mark_uptodate() inside zswap_load().

If a large folio load is attempted, and zswap was ever enabled on the
system, return 'true' without calling folio_mark_uptodate(). This will
prevent the folio from being read from disk, and will emit an IO error
because the folio is not uptodate (e.g. do_swap_fault() will return
VM_FAULT_SIGBUS). It may not be reliable recovery in all cases, but it
is better than nothing.

This was tested by hacking the allocation in __read_swap_cache_async()
to use order 2 and __GFP_COMP.

In the future, to handle this correctly, the swapin code should:
(a) Fallback to order-0 swapins if zswap was ever used on the machine,
because compressed pages remain in zswap after it is disabled.
(b) Add proper support to swapin large folios from zswap (fully or
partially).

Probably start with (a) then followup with (b).

[1]https://lore.kernel.org/linux-mm/[email protected]/

Signed-off-by: Yosry Ahmed <[email protected]>
---
mm/page_io.c | 1 -
mm/zswap.c | 12 ++++++++++++
2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/mm/page_io.c b/mm/page_io.c
index f1a9cfab6e748..8f441dd8e109f 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -517,7 +517,6 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
delayacct_swapin_start();

if (zswap_load(folio)) {
- folio_mark_uptodate(folio);
folio_unlock(folio);
} else if (data_race(sis->flags & SWP_FS_OPS)) {
swap_read_folio_fs(folio, plug);
diff --git a/mm/zswap.c b/mm/zswap.c
index 7fcd751e847d6..505f4b9812891 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1566,6 +1566,17 @@ bool zswap_load(struct folio *folio)
if (zswap_never_enabled())
return false;

+ /*
+ * Large folios should not be swapped in while zswap is being used, as
+ * they are not properly handled. Zswap does not properly load large
+ * folios, and a large folio may only be partially in zswap.
+ *
+ * Return true without marking the folio uptodate so that an IO error is
+ * emitted (e.g. do_swap_page() will sigbus).
+ */
+ if (WARN_ON_ONCE(folio_test_large(folio)))
+ return true;
+
/*
* When reading into the swapcache, invalidate our entry. The
* swapcache can be the authoritative owner of the page and
@@ -1600,6 +1611,7 @@ bool zswap_load(struct folio *folio)
folio_mark_dirty(folio);
}

+ folio_mark_uptodate(folio);
return true;
}

--
2.45.2.505.gda0bf45e8d-goog


2024-06-11 02:59:34

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled()

On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <[email protected]> wrote:
>
> In preparation of introducing a similar function, rename
> is_zswap_enabled() to use zswap_* prefix like other zswap functions.
>
> Signed-off-by: Yosry Ahmed <[email protected]>

Reviewed-by: Barry Song <[email protected]>

> ---
> include/linux/zswap.h | 4 ++--
> mm/memcontrol.c | 2 +-
> mm/zswap.c | 2 +-
> 3 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> index 2a85b941db975..ce5e7bfe8f1ec 100644
> --- a/include/linux/zswap.h
> +++ b/include/linux/zswap.h
> @@ -35,7 +35,7 @@ void zswap_swapoff(int type);
> void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
> void zswap_lruvec_state_init(struct lruvec *lruvec);
> void zswap_folio_swapin(struct folio *folio);
> -bool is_zswap_enabled(void);
> +bool zswap_is_enabled(void);
> #else
>
> struct zswap_lruvec_state {};
> @@ -60,7 +60,7 @@ static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {}
> static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {}
> static inline void zswap_folio_swapin(struct folio *folio) {}
>
> -static inline bool is_zswap_enabled(void)
> +static inline bool zswap_is_enabled(void)
> {
> return false;
> }
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 1303ed01bb5e5..a811dfff10cda 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -8469,7 +8469,7 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
> bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
> {
> /* if zswap is disabled, do not block pages going to the swapping device */
> - return !is_zswap_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
> + return !zswap_is_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
> }
>
> static u64 zswap_current_read(struct cgroup_subsys_state *css,
> diff --git a/mm/zswap.c b/mm/zswap.c
> index b9b35ef86d9be..a8c8dd8cfe6f5 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -131,7 +131,7 @@ static bool zswap_shrinker_enabled = IS_ENABLED(
> CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
> module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644);
>
> -bool is_zswap_enabled(void)
> +bool zswap_is_enabled(void)
> {
> return zswap_enabled;
> }
> --
> 2.45.2.505.gda0bf45e8d-goog
>

2024-06-11 15:59:01

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled()

On Mon, Jun 10, 2024 at 7:45 PM Yosry Ahmed <[email protected]> wrote:
>
> In preparation of introducing a similar function, rename
> is_zswap_enabled() to use zswap_* prefix like other zswap functions.
>
> Signed-off-by: Yosry Ahmed <[email protected]>

Ooops this is my bad :) Thanks for making it more consistent, Yosry!
Reviewed-by: Nhat Pham <[email protected]>

2024-06-11 16:42:23

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()

On Mon, Jun 10, 2024 at 7:45 PM Yosry Ahmed <[email protected]> wrote:
>
> Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
> zswap was never enabled on the system. It is implemented using static
> branches for efficiency, as enabling zswap should be a rare event. This
> could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
> zswap is never enabled.
>
> However, the real motivation behind this patch is two-fold:
> - Incoming large folio swapin work will need to fallback to order-0
> folios if zswap was ever enabled, because any part of the folio could
> be in zswap, until proper handling of large folios with zswap is
> added.
>
> - A warning and recovery attempt will be added in a following change in
> case the above was not done incorrectly. Zswap will fail the read if
> the folio is large and it was ever enabled.
>
> Signed-off-by: Yosry Ahmed <[email protected]>

This LGTM.
Reviewed-by: Nhat Pham <[email protected]>

2024-06-11 21:54:07

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()

On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <[email protected]> wrote:
>
> Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
> zswap was never enabled on the system. It is implemented using static
> branches for efficiency, as enabling zswap should be a rare event. This
> could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
> zswap is never enabled.
>
> However, the real motivation behind this patch is two-fold:
> - Incoming large folio swapin work will need to fallback to order-0
> folios if zswap was ever enabled, because any part of the folio could
> be in zswap, until proper handling of large folios with zswap is
> added.
>
> - A warning and recovery attempt will be added in a following change in
> case the above was not done incorrectly. Zswap will fail the read if
> the folio is large and it was ever enabled.
>
> Signed-off-by: Yosry Ahmed <[email protected]>
> ---
> mm/zswap.c | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index a8c8dd8cfe6f5..7fcd751e847d6 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -83,6 +83,7 @@ static bool zswap_pool_reached_full;
> static int zswap_setup(void);
>
> /* Enable/disable zswap */
> +static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled);
> static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
> static int zswap_enabled_param_set(const char *,
> const struct kernel_param *);
> @@ -136,6 +137,11 @@ bool zswap_is_enabled(void)
> return zswap_enabled;
> }
>
> +static bool zswap_never_enabled(void)
> +{
> + return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
> +}

Will we "extern" this one so that mm-core can use it to fallback
to small folios?
or you prefer this to be done within the coming swapin series?

> +
> /*********************************
> * data structures
> **********************************/
> @@ -1557,6 +1563,9 @@ bool zswap_load(struct folio *folio)
>
> VM_WARN_ON_ONCE(!folio_test_locked(folio));
>
> + if (zswap_never_enabled())
> + return false;
> +
> /*
> * When reading into the swapcache, invalidate our entry. The
> * swapcache can be the authoritative owner of the page and
> @@ -1735,6 +1744,7 @@ static int zswap_setup(void)
> zpool_get_type(pool->zpools[0]));
> list_add(&pool->list, &zswap_pools);
> zswap_has_pool = true;
> + static_branch_enable(&zswap_ever_enabled);
> } else {
> pr_err("pool creation failed\n");
> zswap_enabled = false;
> --
> 2.45.2.505.gda0bf45e8d-goog
>

Thanks
Barry

2024-06-11 21:56:06

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()

On Tue, Jun 11, 2024 at 2:53 PM Barry Song <[email protected]> wrote:
>
> On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <[email protected]> wrote:
> >
> > Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
> > zswap was never enabled on the system. It is implemented using static
> > branches for efficiency, as enabling zswap should be a rare event. This
> > could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
> > zswap is never enabled.
> >
> > However, the real motivation behind this patch is two-fold:
> > - Incoming large folio swapin work will need to fallback to order-0
> > folios if zswap was ever enabled, because any part of the folio could
> > be in zswap, until proper handling of large folios with zswap is
> > added.
> >
> > - A warning and recovery attempt will be added in a following change in
> > case the above was not done incorrectly. Zswap will fail the read if
> > the folio is large and it was ever enabled.
> >
> > Signed-off-by: Yosry Ahmed <[email protected]>
> > ---
> > mm/zswap.c | 10 ++++++++++
> > 1 file changed, 10 insertions(+)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index a8c8dd8cfe6f5..7fcd751e847d6 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -83,6 +83,7 @@ static bool zswap_pool_reached_full;
> > static int zswap_setup(void);
> >
> > /* Enable/disable zswap */
> > +static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled);
> > static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
> > static int zswap_enabled_param_set(const char *,
> > const struct kernel_param *);
> > @@ -136,6 +137,11 @@ bool zswap_is_enabled(void)
> > return zswap_enabled;
> > }
> >
> > +static bool zswap_never_enabled(void)
> > +{
> > + return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
> > +}
>
> Will we "extern" this one so that mm-core can use it to fallback
> to small folios?
> or you prefer this to be done within the coming swapin series?

My intention was to keep it static for now, and expose it in the
header when needed (in the swapin series). If others think it's better
to do this now to avoid the churn I am happy to do it as well.

2024-06-11 21:57:19

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] mm: zswap: handle incorrect attempts to load large folios

On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <[email protected]> wrote:
>
> Zswap does not support storing or loading large folios. Until proper
> support is added, attempts to load large folios from zswap are a bug.
>
> For example, if a swapin fault observes that contiguous PTEs are
> pointing to contiguous swap entries and tries to swap them in as a large
> folio, swap_read_folio() will pass in a large folio to zswap_load(), but
> zswap_load() will only effectively load the first page in the folio. If
> the first page is not in zswap, the folio will be read from disk, even
> though other pages may be in zswap.
>
> In both cases, this will lead to silent data corruption. Proper support
> needs to be added before large folio swapins and zswap can work
> together.
>
> Looking at callers of swap_read_folio(), it seems like they are either
> allocated from __read_swap_cache_async() or do_swap_page() in the
> SWP_SYNCHRONOUS_IO path. Both of which allocate order-0 folios, so
> everything is fine for now.
>
> However, there is ongoing work to add to support large folio swapins
> [1]. To make sure new development does not break zswap (or get broken by
> zswap), add minimal handling of incorrect loads of large folios to
> zswap.
>
> First, move the call folio_mark_uptodate() inside zswap_load().
>
> If a large folio load is attempted, and zswap was ever enabled on the
> system, return 'true' without calling folio_mark_uptodate(). This will
> prevent the folio from being read from disk, and will emit an IO error
> because the folio is not uptodate (e.g. do_swap_fault() will return
> VM_FAULT_SIGBUS). It may not be reliable recovery in all cases, but it
> is better than nothing.
>
> This was tested by hacking the allocation in __read_swap_cache_async()
> to use order 2 and __GFP_COMP.
>
> In the future, to handle this correctly, the swapin code should:
> (a) Fallback to order-0 swapins if zswap was ever used on the machine,
> because compressed pages remain in zswap after it is disabled.
> (b) Add proper support to swapin large folios from zswap (fully or
> partially).
>
> Probably start with (a) then followup with (b).
>
> [1]https://lore.kernel.org/linux-mm/[email protected]/
>
> Signed-off-by: Yosry Ahmed <[email protected]>

Acked-by: Barry Song <[email protected]>

> ---
> mm/page_io.c | 1 -
> mm/zswap.c | 12 ++++++++++++
> 2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/mm/page_io.c b/mm/page_io.c
> index f1a9cfab6e748..8f441dd8e109f 100644
> --- a/mm/page_io.c
> +++ b/mm/page_io.c
> @@ -517,7 +517,6 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
> delayacct_swapin_start();
>
> if (zswap_load(folio)) {
> - folio_mark_uptodate(folio);
> folio_unlock(folio);
> } else if (data_race(sis->flags & SWP_FS_OPS)) {
> swap_read_folio_fs(folio, plug);
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 7fcd751e847d6..505f4b9812891 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1566,6 +1566,17 @@ bool zswap_load(struct folio *folio)
> if (zswap_never_enabled())
> return false;
>
> + /*
> + * Large folios should not be swapped in while zswap is being used, as
> + * they are not properly handled. Zswap does not properly load large
> + * folios, and a large folio may only be partially in zswap.
> + *
> + * Return true without marking the folio uptodate so that an IO error is
> + * emitted (e.g. do_swap_page() will sigbus).
> + */
> + if (WARN_ON_ONCE(folio_test_large(folio)))
> + return true;
> +
> /*
> * When reading into the swapcache, invalidate our entry. The
> * swapcache can be the authoritative owner of the page and
> @@ -1600,6 +1611,7 @@ bool zswap_load(struct folio *folio)
> folio_mark_dirty(folio);
> }
>
> + folio_mark_uptodate(folio);
> return true;
> }
>
> --
> 2.45.2.505.gda0bf45e8d-goog
>

2024-06-11 22:20:20

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()

On Wed, Jun 12, 2024 at 9:55 AM Yosry Ahmed <[email protected]> wrote:
>
> On Tue, Jun 11, 2024 at 2:53 PM Barry Song <[email protected]> wrote:
> >
> > On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <[email protected]> wrote:
> > >
> > > Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
> > > zswap was never enabled on the system. It is implemented using static
> > > branches for efficiency, as enabling zswap should be a rare event. This
> > > could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
> > > zswap is never enabled.
> > >
> > > However, the real motivation behind this patch is two-fold:
> > > - Incoming large folio swapin work will need to fallback to order-0
> > > folios if zswap was ever enabled, because any part of the folio could
> > > be in zswap, until proper handling of large folios with zswap is
> > > added.
> > >
> > > - A warning and recovery attempt will be added in a following change in
> > > case the above was not done incorrectly. Zswap will fail the read if
> > > the folio is large and it was ever enabled.
> > >
> > > Signed-off-by: Yosry Ahmed <[email protected]>
> > > ---
> > > mm/zswap.c | 10 ++++++++++
> > > 1 file changed, 10 insertions(+)
> > >
> > > diff --git a/mm/zswap.c b/mm/zswap.c
> > > index a8c8dd8cfe6f5..7fcd751e847d6 100644
> > > --- a/mm/zswap.c
> > > +++ b/mm/zswap.c
> > > @@ -83,6 +83,7 @@ static bool zswap_pool_reached_full;
> > > static int zswap_setup(void);
> > >
> > > /* Enable/disable zswap */
> > > +static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled);
> > > static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
> > > static int zswap_enabled_param_set(const char *,
> > > const struct kernel_param *);
> > > @@ -136,6 +137,11 @@ bool zswap_is_enabled(void)
> > > return zswap_enabled;
> > > }
> > >
> > > +static bool zswap_never_enabled(void)
> > > +{
> > > + return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
> > > +}
> >
> > Will we "extern" this one so that mm-core can use it to fallback
> > to small folios?
> > or you prefer this to be done within the coming swapin series?
>
> My intention was to keep it static for now, and expose it in the
> header when needed (in the swapin series). If others think it's better
> to do this now to avoid the churn I am happy to do it as well.

Personally, I'd vote for exposing it now to avoid one more patch which might
come shortly. And this patchset serves the clear purpose of drawing attention
from mm-core to fallback to small folios.

Thanks
Barry

2024-06-11 23:38:05

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()

On Wed, Jun 12, 2024 at 10:19:58AM +1200, Barry Song wrote:
> On Wed, Jun 12, 2024 at 9:55 AM Yosry Ahmed <[email protected]> wrote:
> >
> > On Tue, Jun 11, 2024 at 2:53 PM Barry Song <[email protected]> wrote:
> > >
> > > On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <[email protected]> wrote:
> > > >
> > > > Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
> > > > zswap was never enabled on the system. It is implemented using static
> > > > branches for efficiency, as enabling zswap should be a rare event. This
> > > > could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
> > > > zswap is never enabled.
> > > >
> > > > However, the real motivation behind this patch is two-fold:
> > > > - Incoming large folio swapin work will need to fallback to order-0
> > > > folios if zswap was ever enabled, because any part of the folio could
> > > > be in zswap, until proper handling of large folios with zswap is
> > > > added.
> > > >
> > > > - A warning and recovery attempt will be added in a following change in
> > > > case the above was not done incorrectly. Zswap will fail the read if
> > > > the folio is large and it was ever enabled.
> > > >
> > > > Signed-off-by: Yosry Ahmed <[email protected]>
> > > > ---
> > > > mm/zswap.c | 10 ++++++++++
> > > > 1 file changed, 10 insertions(+)
> > > >
> > > > diff --git a/mm/zswap.c b/mm/zswap.c
> > > > index a8c8dd8cfe6f5..7fcd751e847d6 100644
> > > > --- a/mm/zswap.c
> > > > +++ b/mm/zswap.c
> > > > @@ -83,6 +83,7 @@ static bool zswap_pool_reached_full;
> > > > static int zswap_setup(void);
> > > >
> > > > /* Enable/disable zswap */
> > > > +static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled);
> > > > static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
> > > > static int zswap_enabled_param_set(const char *,
> > > > const struct kernel_param *);
> > > > @@ -136,6 +137,11 @@ bool zswap_is_enabled(void)
> > > > return zswap_enabled;
> > > > }
> > > >
> > > > +static bool zswap_never_enabled(void)
> > > > +{
> > > > + return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
> > > > +}
> > >
> > > Will we "extern" this one so that mm-core can use it to fallback
> > > to small folios?
> > > or you prefer this to be done within the coming swapin series?
> >
> > My intention was to keep it static for now, and expose it in the
> > header when needed (in the swapin series). If others think it's better
> > to do this now to avoid the churn I am happy to do it as well.
>
> Personally, I'd vote for exposing it now to avoid one more patch which might
> come shortly. And this patchset serves the clear purpose of drawing attention
> from mm-core to fallback to small folios.

Sure. Andrew, unless anyone objects, could you please squash the
following diff and add the following sentence to the commit log:

"Expose zswap_never_enabled() in the header for the swapin work to use
it later."

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index ce5e7bfe8f1ec..bf83ae5e285d4 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -36,6 +36,7 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
void zswap_lruvec_state_init(struct lruvec *lruvec);
void zswap_folio_swapin(struct folio *folio);
bool zswap_is_enabled(void);
+bool zswap_never_enabled(void);
#else

struct zswap_lruvec_state {};
@@ -65,6 +66,11 @@ static inline bool zswap_is_enabled(void)
return false;
}

+static inline bool zswap_never_enabled(void)
+{
+ return false;
+}
+
#endif

#endif /* _LINUX_ZSWAP_H */
diff --git a/mm/zswap.c b/mm/zswap.c
index 505f4b9812891..a546c01602aaf 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -137,7 +137,7 @@ bool zswap_is_enabled(void)
return zswap_enabled;
}

-static bool zswap_never_enabled(void)
+bool zswap_never_enabled(void)
{
return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
}

>
> Thanks
> Barry