2024-01-30 01:42:27

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 00/20] mm: zswap: cleanups

Cleanups and maintenance items that accumulated while reviewing zswap
patches. Based on akpm/mm-unstable + the UAF fix I sent just now.

mm/zswap.c | 1961 +++++++++++++++++++++++++++++-----------------------------
1 file changed, 971 insertions(+), 990 deletions(-)



2024-01-30 01:42:38

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 01/20] mm: zswap: rename zswap_free_entry to zswap_entry_free

There is a zswap_entry_ namespace with multiple functions already.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 7f88b3a77e4a..173f2e6657de 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -520,7 +520,7 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
* Carries out the common pattern of freeing and entry's zpool allocation,
* freeing the entry itself, and decrementing the number of stored pages.
*/
-static void zswap_free_entry(struct zswap_entry *entry)
+static void zswap_entry_free(struct zswap_entry *entry)
{
if (!entry->length)
atomic_dec(&zswap_same_filled_pages);
@@ -555,7 +555,7 @@ static void zswap_entry_put(struct zswap_entry *entry)
WARN_ON_ONCE(refcount < 0);
if (refcount == 0) {
WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
- zswap_free_entry(entry);
+ zswap_entry_free(entry);
}
}

--
2.43.0


2024-01-30 01:42:44

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 02/20] mm: zswap: inline and remove zswap_entry_find_get()

There is only one caller and the function is trivial. Inline it.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 17 ++---------------
1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 173f2e6657de..cf864aaa214d 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -559,19 +559,6 @@ static void zswap_entry_put(struct zswap_entry *entry)
}
}

-/* caller must hold the tree lock */
-static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
- pgoff_t offset)
-{
- struct zswap_entry *entry;
-
- entry = zswap_rb_search(root, offset);
- if (entry)
- zswap_entry_get(entry);
-
- return entry;
-}
-
/*********************************
* shrinker functions
**********************************/
@@ -1708,13 +1695,13 @@ bool zswap_load(struct folio *folio)

VM_WARN_ON_ONCE(!folio_test_locked(folio));

- /* find */
spin_lock(&tree->lock);
- entry = zswap_entry_find_get(&tree->rbroot, offset);
+ entry = zswap_rb_search(&tree->rbroot, offset);
if (!entry) {
spin_unlock(&tree->lock);
return false;
}
+ zswap_entry_get(entry);
spin_unlock(&tree->lock);

if (entry->length)
--
2.43.0


2024-01-30 01:42:53

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 03/20] mm: zswap: move zswap_invalidate_entry() to related functions

Move it up to the other tree and refcounting functions.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index cf864aaa214d..9f05282efe3c 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -559,6 +559,18 @@ static void zswap_entry_put(struct zswap_entry *entry)
}
}

+/*
+ * If the entry is still valid in the tree, drop the initial ref and remove it
+ * from the tree. This function must be called with an additional ref held,
+ * otherwise it may race with another invalidation freeing the entry.
+ */
+static void zswap_invalidate_entry(struct zswap_tree *tree,
+ struct zswap_entry *entry)
+{
+ if (zswap_rb_erase(&tree->rbroot, entry))
+ zswap_entry_put(entry);
+}
+
/*********************************
* shrinker functions
**********************************/
@@ -809,18 +821,6 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
return NULL;
}

-/*
- * If the entry is still valid in the tree, drop the initial ref and remove it
- * from the tree. This function must be called with an additional ref held,
- * otherwise it may race with another invalidation freeing the entry.
- */
-static void zswap_invalidate_entry(struct zswap_tree *tree,
- struct zswap_entry *entry)
-{
- if (zswap_rb_erase(&tree->rbroot, entry))
- zswap_entry_put(entry);
-}
-
static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
spinlock_t *lock, void *arg)
{
--
2.43.0


2024-01-30 01:43:13

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 04/20] mm: zswap: warn when referencing a dead entry

Put a standard sanity check on zswap_entry_get() for UAF scenario.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/mm/zswap.c b/mm/zswap.c
index 9f05282efe3c..0c6adaf2fdb6 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -542,6 +542,7 @@ static void zswap_entry_free(struct zswap_entry *entry)
/* caller must hold the tree lock */
static void zswap_entry_get(struct zswap_entry *entry)
{
+ WARN_ON_ONCE(!entry->refcount);
entry->refcount++;
}

--
2.43.0


2024-01-30 01:43:30

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 05/20] mm: zswap: clean up zswap_entry_put()

Remove stale comment and unnecessary local variable.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 0c6adaf2fdb6..7a7e8da2b4f8 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -546,15 +546,11 @@ static void zswap_entry_get(struct zswap_entry *entry)
entry->refcount++;
}

-/* caller must hold the tree lock
-* remove from the tree and free it, if nobody reference the entry
-*/
+/* caller must hold the tree lock */
static void zswap_entry_put(struct zswap_entry *entry)
{
- int refcount = --entry->refcount;
-
- WARN_ON_ONCE(refcount < 0);
- if (refcount == 0) {
+ WARN_ON_ONCE(!entry->refcount);
+ if (--entry->refcount == 0) {
WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
zswap_entry_free(entry);
}
--
2.43.0


2024-01-30 01:44:51

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 10/20] mm: zswap: function ordering: pool alloc & free

The function ordering in zswap.c is a little chaotic, which requires
jumping in unexpected directions when following related code. This is
a series of patches that brings the file into the following order:

- pool functions
- lru functions
- rbtree functions
- zswap entry functions
- compression/backend functions
- writeback & shrinking functions
- store, load, invalidate, swapon, swapoff
- debugfs
- init

But it has to be split up such the moving still produces halfway
readable diffs.

In this patch, move pool allocation and freeing functions.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 297 +++++++++++++++++++++++++++--------------------------
1 file changed, 152 insertions(+), 145 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 082d076a758d..805d9a35f633 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -320,6 +320,158 @@ static void zswap_update_total_size(void)
zswap_pool_total_size = total;
}

+/*********************************
+* pool functions
+**********************************/
+
+static void zswap_alloc_shrinker(struct zswap_pool *pool);
+static void shrink_worker(struct work_struct *w);
+
+static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
+{
+ int i;
+ struct zswap_pool *pool;
+ char name[38]; /* 'zswap' + 32 char (max) num + \0 */
+ gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
+ int ret;
+
+ if (!zswap_has_pool) {
+ /* if either are unset, pool initialization failed, and we
+ * need both params to be set correctly before trying to
+ * create a pool.
+ */
+ if (!strcmp(type, ZSWAP_PARAM_UNSET))
+ return NULL;
+ if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
+ return NULL;
+ }
+
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return NULL;
+
+ for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
+ /* unique name for each pool specifically required by zsmalloc */
+ snprintf(name, 38, "zswap%x",
+ atomic_inc_return(&zswap_pools_count));
+
+ pool->zpools[i] = zpool_create_pool(type, name, gfp);
+ if (!pool->zpools[i]) {
+ pr_err("%s zpool not available\n", type);
+ goto error;
+ }
+ }
+ pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
+
+ strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
+
+ pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
+ if (!pool->acomp_ctx) {
+ pr_err("percpu alloc failed\n");
+ goto error;
+ }
+
+ ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
+ &pool->node);
+ if (ret)
+ goto error;
+
+ zswap_alloc_shrinker(pool);
+ if (!pool->shrinker)
+ goto error;
+
+ pr_debug("using %s compressor\n", pool->tfm_name);
+
+ /* being the current pool takes 1 ref; this func expects the
+ * caller to always add the new pool as the current pool
+ */
+ kref_init(&pool->kref);
+ INIT_LIST_HEAD(&pool->list);
+ if (list_lru_init_memcg(&pool->list_lru, pool->shrinker))
+ goto lru_fail;
+ shrinker_register(pool->shrinker);
+ INIT_WORK(&pool->shrink_work, shrink_worker);
+ atomic_set(&pool->nr_stored, 0);
+
+ zswap_pool_debug("created", pool);
+
+ return pool;
+
+lru_fail:
+ list_lru_destroy(&pool->list_lru);
+ shrinker_free(pool->shrinker);
+error:
+ if (pool->acomp_ctx)
+ free_percpu(pool->acomp_ctx);
+ while (i--)
+ zpool_destroy_pool(pool->zpools[i]);
+ kfree(pool);
+ return NULL;
+}
+
+static struct zswap_pool *__zswap_pool_create_fallback(void)
+{
+ bool has_comp, has_zpool;
+
+ has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
+ if (!has_comp && strcmp(zswap_compressor,
+ CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
+ pr_err("compressor %s not available, using default %s\n",
+ zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
+ param_free_charp(&zswap_compressor);
+ zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
+ has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
+ }
+ if (!has_comp) {
+ pr_err("default compressor %s not available\n",
+ zswap_compressor);
+ param_free_charp(&zswap_compressor);
+ zswap_compressor = ZSWAP_PARAM_UNSET;
+ }
+
+ has_zpool = zpool_has_pool(zswap_zpool_type);
+ if (!has_zpool && strcmp(zswap_zpool_type,
+ CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
+ pr_err("zpool %s not available, using default %s\n",
+ zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
+ param_free_charp(&zswap_zpool_type);
+ zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
+ has_zpool = zpool_has_pool(zswap_zpool_type);
+ }
+ if (!has_zpool) {
+ pr_err("default zpool %s not available\n",
+ zswap_zpool_type);
+ param_free_charp(&zswap_zpool_type);
+ zswap_zpool_type = ZSWAP_PARAM_UNSET;
+ }
+
+ if (!has_comp || !has_zpool)
+ return NULL;
+
+ return zswap_pool_create(zswap_zpool_type, zswap_compressor);
+}
+
+static void zswap_pool_destroy(struct zswap_pool *pool)
+{
+ int i;
+
+ zswap_pool_debug("destroying", pool);
+
+ shrinker_free(pool->shrinker);
+ cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
+ free_percpu(pool->acomp_ctx);
+ list_lru_destroy(&pool->list_lru);
+
+ spin_lock(&zswap_pools_lock);
+ mem_cgroup_iter_break(NULL, pool->next_shrink);
+ pool->next_shrink = NULL;
+ spin_unlock(&zswap_pools_lock);
+
+ for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
+ zpool_destroy_pool(pool->zpools[i]);
+ kfree(pool);
+}
+
/* should be called under RCU */
#ifdef CONFIG_MEMCG
static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
@@ -970,151 +1122,6 @@ static void shrink_worker(struct work_struct *w)
zswap_pool_put(pool);
}

-static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
-{
- int i;
- struct zswap_pool *pool;
- char name[38]; /* 'zswap' + 32 char (max) num + \0 */
- gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
- int ret;
-
- if (!zswap_has_pool) {
- /* if either are unset, pool initialization failed, and we
- * need both params to be set correctly before trying to
- * create a pool.
- */
- if (!strcmp(type, ZSWAP_PARAM_UNSET))
- return NULL;
- if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
- return NULL;
- }
-
- pool = kzalloc(sizeof(*pool), GFP_KERNEL);
- if (!pool)
- return NULL;
-
- for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
- /* unique name for each pool specifically required by zsmalloc */
- snprintf(name, 38, "zswap%x",
- atomic_inc_return(&zswap_pools_count));
-
- pool->zpools[i] = zpool_create_pool(type, name, gfp);
- if (!pool->zpools[i]) {
- pr_err("%s zpool not available\n", type);
- goto error;
- }
- }
- pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
-
- strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
-
- pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
- if (!pool->acomp_ctx) {
- pr_err("percpu alloc failed\n");
- goto error;
- }
-
- ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
- &pool->node);
- if (ret)
- goto error;
-
- zswap_alloc_shrinker(pool);
- if (!pool->shrinker)
- goto error;
-
- pr_debug("using %s compressor\n", pool->tfm_name);
-
- /* being the current pool takes 1 ref; this func expects the
- * caller to always add the new pool as the current pool
- */
- kref_init(&pool->kref);
- INIT_LIST_HEAD(&pool->list);
- if (list_lru_init_memcg(&pool->list_lru, pool->shrinker))
- goto lru_fail;
- shrinker_register(pool->shrinker);
- INIT_WORK(&pool->shrink_work, shrink_worker);
- atomic_set(&pool->nr_stored, 0);
-
- zswap_pool_debug("created", pool);
-
- return pool;
-
-lru_fail:
- list_lru_destroy(&pool->list_lru);
- shrinker_free(pool->shrinker);
-error:
- if (pool->acomp_ctx)
- free_percpu(pool->acomp_ctx);
- while (i--)
- zpool_destroy_pool(pool->zpools[i]);
- kfree(pool);
- return NULL;
-}
-
-static struct zswap_pool *__zswap_pool_create_fallback(void)
-{
- bool has_comp, has_zpool;
-
- has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
- if (!has_comp && strcmp(zswap_compressor,
- CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
- pr_err("compressor %s not available, using default %s\n",
- zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
- param_free_charp(&zswap_compressor);
- zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
- has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
- }
- if (!has_comp) {
- pr_err("default compressor %s not available\n",
- zswap_compressor);
- param_free_charp(&zswap_compressor);
- zswap_compressor = ZSWAP_PARAM_UNSET;
- }
-
- has_zpool = zpool_has_pool(zswap_zpool_type);
- if (!has_zpool && strcmp(zswap_zpool_type,
- CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
- pr_err("zpool %s not available, using default %s\n",
- zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
- param_free_charp(&zswap_zpool_type);
- zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
- has_zpool = zpool_has_pool(zswap_zpool_type);
- }
- if (!has_zpool) {
- pr_err("default zpool %s not available\n",
- zswap_zpool_type);
- param_free_charp(&zswap_zpool_type);
- zswap_zpool_type = ZSWAP_PARAM_UNSET;
- }
-
- if (!has_comp || !has_zpool)
- return NULL;
-
- return zswap_pool_create(zswap_zpool_type, zswap_compressor);
-}
-
-static void zswap_pool_destroy(struct zswap_pool *pool)
-{
- int i;
-
- zswap_pool_debug("destroying", pool);
-
- shrinker_free(pool->shrinker);
- cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
- free_percpu(pool->acomp_ctx);
- list_lru_destroy(&pool->list_lru);
-
- spin_lock(&zswap_pools_lock);
- mem_cgroup_iter_break(NULL, pool->next_shrink);
- pool->next_shrink = NULL;
- spin_unlock(&zswap_pools_lock);
-
- for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
- zpool_destroy_pool(pool->zpools[i]);
- kfree(pool);
-}
-
static int __must_check zswap_pool_get(struct zswap_pool *pool)
{
if (!pool)
--
2.43.0


2024-01-30 01:44:54

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 11/20] mm: zswap: function ordering: pool refcounting

Move pool refcounting functions into the pool section. First the
destroy functions, then the get and put which uses them.

__zswap_pool_empty() has an upward reference to the global
zswap_pools, to sanity check it's not the currently active pool that's
being freed. That gets the forward decl for zswap_pool_cuyrrent().

This puts the get and put function above all callers, so kill the
forward decls as well.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 94 +++++++++++++++++++++++++++---------------------------
1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 805d9a35f633..33775f2224b7 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -278,8 +278,6 @@ static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp)

static int zswap_writeback_entry(struct zswap_entry *entry,
swp_entry_t swpentry);
-static int zswap_pool_get(struct zswap_pool *pool);
-static void zswap_pool_put(struct zswap_pool *pool);

static bool zswap_is_full(void)
{
@@ -472,6 +470,53 @@ static void zswap_pool_destroy(struct zswap_pool *pool)
kfree(pool);
}

+static void __zswap_pool_release(struct work_struct *work)
+{
+ struct zswap_pool *pool = container_of(work, typeof(*pool),
+ release_work);
+
+ synchronize_rcu();
+
+ /* nobody should have been able to get a kref... */
+ WARN_ON(kref_get_unless_zero(&pool->kref));
+
+ /* pool is now off zswap_pools list and has no references. */
+ zswap_pool_destroy(pool);
+}
+
+static struct zswap_pool *zswap_pool_current(void);
+
+static void __zswap_pool_empty(struct kref *kref)
+{
+ struct zswap_pool *pool;
+
+ pool = container_of(kref, typeof(*pool), kref);
+
+ spin_lock(&zswap_pools_lock);
+
+ WARN_ON(pool == zswap_pool_current());
+
+ list_del_rcu(&pool->list);
+
+ INIT_WORK(&pool->release_work, __zswap_pool_release);
+ schedule_work(&pool->release_work);
+
+ spin_unlock(&zswap_pools_lock);
+}
+
+static int __must_check zswap_pool_get(struct zswap_pool *pool)
+{
+ if (!pool)
+ return 0;
+
+ return kref_get_unless_zero(&pool->kref);
+}
+
+static void zswap_pool_put(struct zswap_pool *pool)
+{
+ kref_put(&pool->kref, __zswap_pool_empty);
+}
+
/* should be called under RCU */
#ifdef CONFIG_MEMCG
static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
@@ -1122,51 +1167,6 @@ static void shrink_worker(struct work_struct *w)
zswap_pool_put(pool);
}

-static int __must_check zswap_pool_get(struct zswap_pool *pool)
-{
- if (!pool)
- return 0;
-
- return kref_get_unless_zero(&pool->kref);
-}
-
-static void __zswap_pool_release(struct work_struct *work)
-{
- struct zswap_pool *pool = container_of(work, typeof(*pool),
- release_work);
-
- synchronize_rcu();
-
- /* nobody should have been able to get a kref... */
- WARN_ON(kref_get_unless_zero(&pool->kref));
-
- /* pool is now off zswap_pools list and has no references. */
- zswap_pool_destroy(pool);
-}
-
-static void __zswap_pool_empty(struct kref *kref)
-{
- struct zswap_pool *pool;
-
- pool = container_of(kref, typeof(*pool), kref);
-
- spin_lock(&zswap_pools_lock);
-
- WARN_ON(pool == zswap_pool_current());
-
- list_del_rcu(&pool->list);
-
- INIT_WORK(&pool->release_work, __zswap_pool_release);
- schedule_work(&pool->release_work);
-
- spin_unlock(&zswap_pools_lock);
-}
-
-static void zswap_pool_put(struct zswap_pool *pool)
-{
- kref_put(&pool->kref, __zswap_pool_empty);
-}
-
/*********************************
* param callbacks
**********************************/
--
2.43.0


2024-01-30 01:45:10

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 12/20] mm: zswap: function ordering: zswap_pools

Move the operations against the global zswap_pools list (current pool,
last, find) to the pool section.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 150 ++++++++++++++++++++++++++---------------------------
1 file changed, 73 insertions(+), 77 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 33775f2224b7..168afd6767b3 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -517,6 +517,79 @@ static void zswap_pool_put(struct zswap_pool *pool)
kref_put(&pool->kref, __zswap_pool_empty);
}

+static struct zswap_pool *__zswap_pool_current(void)
+{
+ struct zswap_pool *pool;
+
+ pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
+ WARN_ONCE(!pool && zswap_has_pool,
+ "%s: no page storage pool!\n", __func__);
+
+ return pool;
+}
+
+static struct zswap_pool *zswap_pool_current(void)
+{
+ assert_spin_locked(&zswap_pools_lock);
+
+ return __zswap_pool_current();
+}
+
+static struct zswap_pool *zswap_pool_current_get(void)
+{
+ struct zswap_pool *pool;
+
+ rcu_read_lock();
+
+ pool = __zswap_pool_current();
+ if (!zswap_pool_get(pool))
+ pool = NULL;
+
+ rcu_read_unlock();
+
+ return pool;
+}
+
+static struct zswap_pool *zswap_pool_last_get(void)
+{
+ struct zswap_pool *pool, *last = NULL;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(pool, &zswap_pools, list)
+ last = pool;
+ WARN_ONCE(!last && zswap_has_pool,
+ "%s: no page storage pool!\n", __func__);
+ if (!zswap_pool_get(last))
+ last = NULL;
+
+ rcu_read_unlock();
+
+ return last;
+}
+
+/* type and compressor must be null-terminated */
+static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
+{
+ struct zswap_pool *pool;
+
+ assert_spin_locked(&zswap_pools_lock);
+
+ list_for_each_entry_rcu(pool, &zswap_pools, list) {
+ if (strcmp(pool->tfm_name, compressor))
+ continue;
+ /* all zpools share the same type */
+ if (strcmp(zpool_get_type(pool->zpools[0]), type))
+ continue;
+ /* if we can't get it, it's about to be destroyed */
+ if (!zswap_pool_get(pool))
+ continue;
+ return pool;
+ }
+
+ return NULL;
+}
+
/* should be called under RCU */
#ifdef CONFIG_MEMCG
static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
@@ -938,83 +1011,6 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
return 0;
}

-/*********************************
-* pool functions
-**********************************/
-
-static struct zswap_pool *__zswap_pool_current(void)
-{
- struct zswap_pool *pool;
-
- pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
- WARN_ONCE(!pool && zswap_has_pool,
- "%s: no page storage pool!\n", __func__);
-
- return pool;
-}
-
-static struct zswap_pool *zswap_pool_current(void)
-{
- assert_spin_locked(&zswap_pools_lock);
-
- return __zswap_pool_current();
-}
-
-static struct zswap_pool *zswap_pool_current_get(void)
-{
- struct zswap_pool *pool;
-
- rcu_read_lock();
-
- pool = __zswap_pool_current();
- if (!zswap_pool_get(pool))
- pool = NULL;
-
- rcu_read_unlock();
-
- return pool;
-}
-
-static struct zswap_pool *zswap_pool_last_get(void)
-{
- struct zswap_pool *pool, *last = NULL;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(pool, &zswap_pools, list)
- last = pool;
- WARN_ONCE(!last && zswap_has_pool,
- "%s: no page storage pool!\n", __func__);
- if (!zswap_pool_get(last))
- last = NULL;
-
- rcu_read_unlock();
-
- return last;
-}
-
-/* type and compressor must be null-terminated */
-static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
-{
- struct zswap_pool *pool;
-
- assert_spin_locked(&zswap_pools_lock);
-
- list_for_each_entry_rcu(pool, &zswap_pools, list) {
- if (strcmp(pool->tfm_name, compressor))
- continue;
- /* all zpools share the same type */
- if (strcmp(zpool_get_type(pool->zpools[0]), type))
- continue;
- /* if we can't get it, it's about to be destroyed */
- if (!zswap_pool_get(pool))
- continue;
- return pool;
- }
-
- return NULL;
-}
-
static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
spinlock_t *lock, void *arg)
{
--
2.43.0


2024-01-30 01:45:30

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 13/20] mm: zswap: function ordering: pool params

The parameters primarily control pool attributes. Move those
operations up to the pool section.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 312 ++++++++++++++++++++++++++---------------------------
1 file changed, 156 insertions(+), 156 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 168afd6767b3..e650fc587116 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -590,6 +590,162 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
return NULL;
}

+/*********************************
+* param callbacks
+**********************************/
+
+static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
+{
+ /* no change required */
+ if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
+ return false;
+ return true;
+}
+
+/* val must be a null-terminated string */
+static int __zswap_param_set(const char *val, const struct kernel_param *kp,
+ char *type, char *compressor)
+{
+ struct zswap_pool *pool, *put_pool = NULL;
+ char *s = strstrip((char *)val);
+ int ret = 0;
+ bool new_pool = false;
+
+ mutex_lock(&zswap_init_lock);
+ switch (zswap_init_state) {
+ case ZSWAP_UNINIT:
+ /* if this is load-time (pre-init) param setting,
+ * don't create a pool; that's done during init.
+ */
+ ret = param_set_charp(s, kp);
+ break;
+ case ZSWAP_INIT_SUCCEED:
+ new_pool = zswap_pool_changed(s, kp);
+ break;
+ case ZSWAP_INIT_FAILED:
+ pr_err("can't set param, initialization failed\n");
+ ret = -ENODEV;
+ }
+ mutex_unlock(&zswap_init_lock);
+
+ /* no need to create a new pool, return directly */
+ if (!new_pool)
+ return ret;
+
+ if (!type) {
+ if (!zpool_has_pool(s)) {
+ pr_err("zpool %s not available\n", s);
+ return -ENOENT;
+ }
+ type = s;
+ } else if (!compressor) {
+ if (!crypto_has_acomp(s, 0, 0)) {
+ pr_err("compressor %s not available\n", s);
+ return -ENOENT;
+ }
+ compressor = s;
+ } else {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ spin_lock(&zswap_pools_lock);
+
+ pool = zswap_pool_find_get(type, compressor);
+ if (pool) {
+ zswap_pool_debug("using existing", pool);
+ WARN_ON(pool == zswap_pool_current());
+ list_del_rcu(&pool->list);
+ }
+
+ spin_unlock(&zswap_pools_lock);
+
+ if (!pool)
+ pool = zswap_pool_create(type, compressor);
+
+ if (pool)
+ ret = param_set_charp(s, kp);
+ else
+ ret = -EINVAL;
+
+ spin_lock(&zswap_pools_lock);
+
+ if (!ret) {
+ put_pool = zswap_pool_current();
+ list_add_rcu(&pool->list, &zswap_pools);
+ zswap_has_pool = true;
+ } else if (pool) {
+ /* add the possibly pre-existing pool to the end of the pools
+ * list; if it's new (and empty) then it'll be removed and
+ * destroyed by the put after we drop the lock
+ */
+ list_add_tail_rcu(&pool->list, &zswap_pools);
+ put_pool = pool;
+ }
+
+ spin_unlock(&zswap_pools_lock);
+
+ if (!zswap_has_pool && !pool) {
+ /* if initial pool creation failed, and this pool creation also
+ * failed, maybe both compressor and zpool params were bad.
+ * Allow changing this param, so pool creation will succeed
+ * when the other param is changed. We already verified this
+ * param is ok in the zpool_has_pool() or crypto_has_acomp()
+ * checks above.
+ */
+ ret = param_set_charp(s, kp);
+ }
+
+ /* drop the ref from either the old current pool,
+ * or the new pool we failed to add
+ */
+ if (put_pool)
+ zswap_pool_put(put_pool);
+
+ return ret;
+}
+
+static int zswap_compressor_param_set(const char *val,
+ const struct kernel_param *kp)
+{
+ return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
+}
+
+static int zswap_zpool_param_set(const char *val,
+ const struct kernel_param *kp)
+{
+ return __zswap_param_set(val, kp, NULL, zswap_compressor);
+}
+
+static int zswap_enabled_param_set(const char *val,
+ const struct kernel_param *kp)
+{
+ int ret = -ENODEV;
+
+ /* if this is load-time (pre-init) param setting, only set param. */
+ if (system_state != SYSTEM_RUNNING)
+ return param_set_bool(val, kp);
+
+ mutex_lock(&zswap_init_lock);
+ switch (zswap_init_state) {
+ case ZSWAP_UNINIT:
+ if (zswap_setup())
+ break;
+ fallthrough;
+ case ZSWAP_INIT_SUCCEED:
+ if (!zswap_has_pool)
+ pr_err("can't enable, no pool configured\n");
+ else
+ ret = param_set_bool(val, kp);
+ break;
+ case ZSWAP_INIT_FAILED:
+ pr_err("can't enable, initialization failed\n");
+ }
+ mutex_unlock(&zswap_init_lock);
+
+ return ret;
+}
+
/* should be called under RCU */
#ifdef CONFIG_MEMCG
static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
@@ -1163,162 +1319,6 @@ static void shrink_worker(struct work_struct *w)
zswap_pool_put(pool);
}

-/*********************************
-* param callbacks
-**********************************/
-
-static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
-{
- /* no change required */
- if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
- return false;
- return true;
-}
-
-/* val must be a null-terminated string */
-static int __zswap_param_set(const char *val, const struct kernel_param *kp,
- char *type, char *compressor)
-{
- struct zswap_pool *pool, *put_pool = NULL;
- char *s = strstrip((char *)val);
- int ret = 0;
- bool new_pool = false;
-
- mutex_lock(&zswap_init_lock);
- switch (zswap_init_state) {
- case ZSWAP_UNINIT:
- /* if this is load-time (pre-init) param setting,
- * don't create a pool; that's done during init.
- */
- ret = param_set_charp(s, kp);
- break;
- case ZSWAP_INIT_SUCCEED:
- new_pool = zswap_pool_changed(s, kp);
- break;
- case ZSWAP_INIT_FAILED:
- pr_err("can't set param, initialization failed\n");
- ret = -ENODEV;
- }
- mutex_unlock(&zswap_init_lock);
-
- /* no need to create a new pool, return directly */
- if (!new_pool)
- return ret;
-
- if (!type) {
- if (!zpool_has_pool(s)) {
- pr_err("zpool %s not available\n", s);
- return -ENOENT;
- }
- type = s;
- } else if (!compressor) {
- if (!crypto_has_acomp(s, 0, 0)) {
- pr_err("compressor %s not available\n", s);
- return -ENOENT;
- }
- compressor = s;
- } else {
- WARN_ON(1);
- return -EINVAL;
- }
-
- spin_lock(&zswap_pools_lock);
-
- pool = zswap_pool_find_get(type, compressor);
- if (pool) {
- zswap_pool_debug("using existing", pool);
- WARN_ON(pool == zswap_pool_current());
- list_del_rcu(&pool->list);
- }
-
- spin_unlock(&zswap_pools_lock);
-
- if (!pool)
- pool = zswap_pool_create(type, compressor);
-
- if (pool)
- ret = param_set_charp(s, kp);
- else
- ret = -EINVAL;
-
- spin_lock(&zswap_pools_lock);
-
- if (!ret) {
- put_pool = zswap_pool_current();
- list_add_rcu(&pool->list, &zswap_pools);
- zswap_has_pool = true;
- } else if (pool) {
- /* add the possibly pre-existing pool to the end of the pools
- * list; if it's new (and empty) then it'll be removed and
- * destroyed by the put after we drop the lock
- */
- list_add_tail_rcu(&pool->list, &zswap_pools);
- put_pool = pool;
- }
-
- spin_unlock(&zswap_pools_lock);
-
- if (!zswap_has_pool && !pool) {
- /* if initial pool creation failed, and this pool creation also
- * failed, maybe both compressor and zpool params were bad.
- * Allow changing this param, so pool creation will succeed
- * when the other param is changed. We already verified this
- * param is ok in the zpool_has_pool() or crypto_has_acomp()
- * checks above.
- */
- ret = param_set_charp(s, kp);
- }
-
- /* drop the ref from either the old current pool,
- * or the new pool we failed to add
- */
- if (put_pool)
- zswap_pool_put(put_pool);
-
- return ret;
-}
-
-static int zswap_compressor_param_set(const char *val,
- const struct kernel_param *kp)
-{
- return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
-}
-
-static int zswap_zpool_param_set(const char *val,
- const struct kernel_param *kp)
-{
- return __zswap_param_set(val, kp, NULL, zswap_compressor);
-}
-
-static int zswap_enabled_param_set(const char *val,
- const struct kernel_param *kp)
-{
- int ret = -ENODEV;
-
- /* if this is load-time (pre-init) param setting, only set param. */
- if (system_state != SYSTEM_RUNNING)
- return param_set_bool(val, kp);
-
- mutex_lock(&zswap_init_lock);
- switch (zswap_init_state) {
- case ZSWAP_UNINIT:
- if (zswap_setup())
- break;
- fallthrough;
- case ZSWAP_INIT_SUCCEED:
- if (!zswap_has_pool)
- pr_err("can't enable, no pool configured\n");
- else
- ret = param_set_bool(val, kp);
- break;
- case ZSWAP_INIT_FAILED:
- pr_err("can't enable, initialization failed\n");
- }
- mutex_unlock(&zswap_init_lock);
-
- return ret;
-}
-
static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
{
struct crypto_acomp_ctx *acomp_ctx;
--
2.43.0


2024-01-30 01:45:35

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 14/20] mm: zswap: function ordering: public lru api

The zswap entry section sits awkwardly in the middle of LRU-related
functions. Group the external LRU API functions first.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 37 +++++++++++++++++++------------------
1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index e650fc587116..511bfafc1456 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -746,6 +746,10 @@ static int zswap_enabled_param_set(const char *val,
return ret;
}

+/*********************************
+* lru functions
+**********************************/
+
/* should be called under RCU */
#ifdef CONFIG_MEMCG
static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
@@ -764,6 +768,21 @@ static inline int entry_to_nid(struct zswap_entry *entry)
return page_to_nid(virt_to_page(entry));
}

+void zswap_lruvec_state_init(struct lruvec *lruvec)
+{
+ atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
+}
+
+void zswap_folio_swapin(struct folio *folio)
+{
+ struct lruvec *lruvec;
+
+ if (folio) {
+ lruvec = folio_lruvec(folio);
+ atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
+ }
+}
+
void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
{
struct zswap_pool *pool;
@@ -798,24 +817,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
kmem_cache_free(zswap_entry_cache, entry);
}

-/*********************************
-* zswap lruvec functions
-**********************************/
-void zswap_lruvec_state_init(struct lruvec *lruvec)
-{
- atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
-}
-
-void zswap_folio_swapin(struct folio *folio)
-{
- struct lruvec *lruvec;
-
- if (folio) {
- lruvec = folio_lruvec(folio);
- atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
- }
-}
-
/*********************************
* lru functions
**********************************/
--
2.43.0


2024-01-30 01:45:36

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 08/20] mm: zswap: further cleanup zswap_store()

- Remove dupentry, reusing entry works just fine.
- Rename pool to shrink_pool, as this one actually is confusing.
- Remove page, use folio_nid() and kmap_local_folio() directly.
- Set entry->swpentry in a common path.
- Move value and src to local scope of use.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 28 +++++++++++++---------------
1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index f9b9494156ba..cde309c539b3 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1542,14 +1542,11 @@ bool zswap_store(struct folio *folio)
{
swp_entry_t swp = folio->swap;
pgoff_t offset = swp_offset(swp);
- struct page *page = &folio->page;
struct zswap_tree *tree = swap_zswap_tree(swp);
struct zswap_entry *entry, *dupentry;
struct obj_cgroup *objcg = NULL;
struct mem_cgroup *memcg = NULL;
- struct zswap_pool *pool;
- unsigned long value;
- u8 *src;
+ struct zswap_pool *shrink_pool;

VM_WARN_ON_ONCE(!folio_test_locked(folio));
VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
@@ -1567,10 +1564,10 @@ bool zswap_store(struct folio *folio)
* the tree, and it might be written back overriding the new data.
*/
spin_lock(&tree->lock);
- dupentry = zswap_rb_search(&tree->rbroot, offset);
- if (dupentry) {
+ entry = zswap_rb_search(&tree->rbroot, offset);
+ if (entry) {
+ zswap_invalidate_entry(tree, entry);
zswap_duplicate_entry++;
- zswap_invalidate_entry(tree, dupentry);
}
spin_unlock(&tree->lock);
objcg = get_obj_cgroup_from_folio(folio);
@@ -1598,17 +1595,19 @@ bool zswap_store(struct folio *folio)
}

/* allocate entry */
- entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
+ entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
if (!entry) {
zswap_reject_kmemcache_fail++;
goto reject;
}

if (zswap_same_filled_pages_enabled) {
- src = kmap_local_page(page);
+ unsigned long value;
+ u8 *src;
+
+ src = kmap_local_folio(folio, 0);
if (zswap_is_page_same_filled(src, &value)) {
kunmap_local(src);
- entry->swpentry = swp;
entry->length = 0;
entry->value = value;
atomic_inc(&zswap_same_filled_pages);
@@ -1637,9 +1636,8 @@ bool zswap_store(struct folio *folio)
if (!zswap_compress(folio, entry))
goto put_pool;

- entry->swpentry = swp;
-
insert_entry:
+ entry->swpentry = swp;
entry->objcg = objcg;
if (objcg) {
obj_cgroup_charge_zswap(objcg, entry->length);
@@ -1684,9 +1682,9 @@ bool zswap_store(struct folio *folio)
return false;

shrink:
- pool = zswap_pool_last_get();
- if (pool && !queue_work(shrink_wq, &pool->shrink_work))
- zswap_pool_put(pool);
+ shrink_pool = zswap_pool_last_get();
+ if (shrink_pool && !queue_work(shrink_wq, &shrink_pool->shrink_work))
+ zswap_pool_put(shrink_pool);
goto reject;
}

--
2.43.0


2024-01-30 01:46:01

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 16/20] mm: zswap: function ordering: move entry section out of tree section

The higher-level entry operations modify the tree, so move the entry
API after the tree section.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 42 +++++++++++++++++++++---------------------
1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 756d4d575efe..80adc2f7d1a2 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -848,27 +848,6 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
spin_unlock(&zswap_pools_lock);
}

-/*********************************
-* zswap entry functions
-**********************************/
-static struct kmem_cache *zswap_entry_cache;
-
-static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
-{
- struct zswap_entry *entry;
- entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
- if (!entry)
- return NULL;
- entry->refcount = 1;
- RB_CLEAR_NODE(&entry->rbnode);
- return entry;
-}
-
-static void zswap_entry_cache_free(struct zswap_entry *entry)
-{
- kmem_cache_free(zswap_entry_cache, entry);
-}
-
/*********************************
* rbtree functions
**********************************/
@@ -930,6 +909,27 @@ static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
return false;
}

+/*********************************
+* zswap entry functions
+**********************************/
+static struct kmem_cache *zswap_entry_cache;
+
+static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
+{
+ struct zswap_entry *entry;
+ entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
+ if (!entry)
+ return NULL;
+ entry->refcount = 1;
+ RB_CLEAR_NODE(&entry->rbnode);
+ return entry;
+}
+
+static void zswap_entry_cache_free(struct zswap_entry *entry)
+{
+ kmem_cache_free(zswap_entry_cache, entry);
+}
+
static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
{
int i = 0;
--
2.43.0


2024-01-30 01:46:18

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 17/20] mm: zswap: function ordering: compress & decompress functions

Writeback needs to decompress. Move the (de)compression API above what
will be the consolidated shrinking/writeback code.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 207 +++++++++++++++++++++++++++--------------------------
1 file changed, 105 insertions(+), 102 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 80adc2f7d1a2..17356b2e35c2 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -992,6 +992,111 @@ static void zswap_invalidate_entry(struct zswap_tree *tree,
zswap_entry_put(entry);
}

+/*********************************
+* compressed storage functions
+**********************************/
+static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
+{
+ struct crypto_acomp_ctx *acomp_ctx;
+ struct scatterlist input, output;
+ unsigned int dlen = PAGE_SIZE;
+ unsigned long handle;
+ struct zpool *zpool;
+ char *buf;
+ gfp_t gfp;
+ int ret;
+ u8 *dst;
+
+ acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+
+ mutex_lock(&acomp_ctx->mutex);
+
+ dst = acomp_ctx->buffer;
+ sg_init_table(&input, 1);
+ sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
+
+ /*
+ * We need PAGE_SIZE * 2 here since there maybe over-compression case,
+ * and hardware-accelerators may won't check the dst buffer size, so
+ * giving the dst buffer with enough length to avoid buffer overflow.
+ */
+ sg_init_one(&output, dst, PAGE_SIZE * 2);
+ acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
+
+ /*
+ * it maybe looks a little bit silly that we send an asynchronous request,
+ * then wait for its completion synchronously. This makes the process look
+ * synchronous in fact.
+ * Theoretically, acomp supports users send multiple acomp requests in one
+ * acomp instance, then get those requests done simultaneously. but in this
+ * case, zswap actually does store and load page by page, there is no
+ * existing method to send the second page before the first page is done
+ * in one thread doing zwap.
+ * but in different threads running on different cpu, we have different
+ * acomp instance, so multiple threads can do (de)compression in parallel.
+ */
+ ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
+ dlen = acomp_ctx->req->dlen;
+ if (ret) {
+ zswap_reject_compress_fail++;
+ goto unlock;
+ }
+
+ zpool = zswap_find_zpool(entry);
+ gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
+ if (zpool_malloc_support_movable(zpool))
+ gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
+ ret = zpool_malloc(zpool, dlen, gfp, &handle);
+ if (ret == -ENOSPC) {
+ zswap_reject_compress_poor++;
+ goto unlock;
+ }
+ if (ret) {
+ zswap_reject_alloc_fail++;
+ goto unlock;
+ }
+
+ buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
+ memcpy(buf, dst, dlen);
+ zpool_unmap_handle(zpool, handle);
+
+ entry->handle = handle;
+ entry->length = dlen;
+
+unlock:
+ mutex_unlock(&acomp_ctx->mutex);
+ return ret == 0;
+}
+
+static void zswap_decompress(struct zswap_entry *entry, struct page *page)
+{
+ struct zpool *zpool = zswap_find_zpool(entry);
+ struct scatterlist input, output;
+ struct crypto_acomp_ctx *acomp_ctx;
+ u8 *src;
+
+ acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+ mutex_lock(&acomp_ctx->mutex);
+
+ src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
+ if (!zpool_can_sleep_mapped(zpool)) {
+ memcpy(acomp_ctx->buffer, src, entry->length);
+ src = acomp_ctx->buffer;
+ zpool_unmap_handle(zpool, entry->handle);
+ }
+
+ sg_init_one(&input, src, entry->length);
+ sg_init_table(&output, 1);
+ sg_set_page(&output, page, PAGE_SIZE, 0);
+ acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
+ BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
+ BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
+ mutex_unlock(&acomp_ctx->mutex);
+
+ if (zpool_can_sleep_mapped(zpool))
+ zpool_unmap_handle(zpool, entry->handle);
+}
+
/*********************************
* shrinker functions
**********************************/
@@ -1317,108 +1422,6 @@ static void shrink_worker(struct work_struct *w)
zswap_pool_put(pool);
}

-static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
-{
- struct crypto_acomp_ctx *acomp_ctx;
- struct scatterlist input, output;
- unsigned int dlen = PAGE_SIZE;
- unsigned long handle;
- struct zpool *zpool;
- char *buf;
- gfp_t gfp;
- int ret;
- u8 *dst;
-
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
-
- mutex_lock(&acomp_ctx->mutex);
-
- dst = acomp_ctx->buffer;
- sg_init_table(&input, 1);
- sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
-
- /*
- * We need PAGE_SIZE * 2 here since there maybe over-compression case,
- * and hardware-accelerators may won't check the dst buffer size, so
- * giving the dst buffer with enough length to avoid buffer overflow.
- */
- sg_init_one(&output, dst, PAGE_SIZE * 2);
- acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
-
- /*
- * it maybe looks a little bit silly that we send an asynchronous request,
- * then wait for its completion synchronously. This makes the process look
- * synchronous in fact.
- * Theoretically, acomp supports users send multiple acomp requests in one
- * acomp instance, then get those requests done simultaneously. but in this
- * case, zswap actually does store and load page by page, there is no
- * existing method to send the second page before the first page is done
- * in one thread doing zwap.
- * but in different threads running on different cpu, we have different
- * acomp instance, so multiple threads can do (de)compression in parallel.
- */
- ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
- dlen = acomp_ctx->req->dlen;
- if (ret) {
- zswap_reject_compress_fail++;
- goto unlock;
- }
-
- zpool = zswap_find_zpool(entry);
- gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
- if (zpool_malloc_support_movable(zpool))
- gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
- ret = zpool_malloc(zpool, dlen, gfp, &handle);
- if (ret == -ENOSPC) {
- zswap_reject_compress_poor++;
- goto unlock;
- }
- if (ret) {
- zswap_reject_alloc_fail++;
- goto unlock;
- }
-
- buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
- memcpy(buf, dst, dlen);
- zpool_unmap_handle(zpool, handle);
-
- entry->handle = handle;
- entry->length = dlen;
-
-unlock:
- mutex_unlock(&acomp_ctx->mutex);
- return ret == 0;
-}
-
-static void zswap_decompress(struct zswap_entry *entry, struct page *page)
-{
- struct zpool *zpool = zswap_find_zpool(entry);
- struct scatterlist input, output;
- struct crypto_acomp_ctx *acomp_ctx;
- u8 *src;
-
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
- mutex_lock(&acomp_ctx->mutex);
-
- src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
- if (!zpool_can_sleep_mapped(zpool)) {
- memcpy(acomp_ctx->buffer, src, entry->length);
- src = acomp_ctx->buffer;
- zpool_unmap_handle(zpool, entry->handle);
- }
-
- sg_init_one(&input, src, entry->length);
- sg_init_table(&output, 1);
- sg_set_page(&output, page, PAGE_SIZE, 0);
- acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
- BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
- BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
- mutex_unlock(&acomp_ctx->mutex);
-
- if (zpool_can_sleep_mapped(zpool))
- zpool_unmap_handle(zpool, entry->handle);
-}
-
/*********************************
* writeback code
**********************************/
--
2.43.0


2024-01-30 01:46:39

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 19/20] mm: zswap: function ordering: writeback

Shrinking needs writeback. Naturally, move the writeback code above
the shrinking code. Delete the forward decl.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 183 ++++++++++++++++++++++++++---------------------------
1 file changed, 90 insertions(+), 93 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index acd7dcd1e0f2..0cb3437d47eb 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -276,9 +276,6 @@ static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp)
pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
zpool_get_type((p)->zpools[0]))

-static int zswap_writeback_entry(struct zswap_entry *entry,
- swp_entry_t swpentry);
-
static bool zswap_is_full(void)
{
return totalram_pages() * zswap_max_pool_percent / 100 <
@@ -1163,6 +1160,96 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page)
zpool_unmap_handle(zpool, entry->handle);
}

+/*********************************
+* writeback code
+**********************************/
+/*
+ * Attempts to free an entry by adding a folio to the swap cache,
+ * decompressing the entry data into the folio, and issuing a
+ * bio write to write the folio back to the swap device.
+ *
+ * This can be thought of as a "resumed writeback" of the folio
+ * to the swap device. We are basically resuming the same swap
+ * writeback path that was intercepted with the zswap_store()
+ * in the first place. After the folio has been decompressed into
+ * the swap cache, the compressed version stored by zswap can be
+ * freed.
+ */
+static int zswap_writeback_entry(struct zswap_entry *entry,
+ swp_entry_t swpentry)
+{
+ struct zswap_tree *tree;
+ struct folio *folio;
+ struct mempolicy *mpol;
+ bool folio_was_allocated;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE,
+ };
+
+ /* try to allocate swap cache folio */
+ mpol = get_task_policy(current);
+ folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
+ NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
+ if (!folio)
+ return -ENOMEM;
+
+ /*
+ * Found an existing folio, we raced with swapin or concurrent
+ * shrinker. We generally writeback cold folios from zswap, and
+ * swapin means the folio just became hot, so skip this folio.
+ * For unlikely concurrent shrinker case, it will be unlinked
+ * and freed when invalidated by the concurrent shrinker anyway.
+ */
+ if (!folio_was_allocated) {
+ folio_put(folio);
+ return -EEXIST;
+ }
+
+ /*
+ * folio is locked, and the swapcache is now secured against
+ * concurrent swapping to and from the slot. Verify that the
+ * swap entry hasn't been invalidated and recycled behind our
+ * backs (our zswap_entry reference doesn't prevent that), to
+ * avoid overwriting a new swap folio with old compressed data.
+ */
+ tree = swap_zswap_tree(swpentry);
+ spin_lock(&tree->lock);
+ if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) {
+ spin_unlock(&tree->lock);
+ delete_from_swap_cache(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+ return -ENOMEM;
+ }
+
+ /* Safe to deref entry after the entry is verified above. */
+ zswap_entry_get(entry);
+ spin_unlock(&tree->lock);
+
+ zswap_decompress(entry, &folio->page);
+
+ count_vm_event(ZSWPWB);
+ if (entry->objcg)
+ count_objcg_event(entry->objcg, ZSWPWB);
+
+ spin_lock(&tree->lock);
+ zswap_invalidate_entry(tree, entry);
+ zswap_entry_put(entry);
+ spin_unlock(&tree->lock);
+
+ /* folio is up to date */
+ folio_mark_uptodate(folio);
+
+ /* move it to the tail of the inactive list after end_writeback */
+ folio_set_reclaim(folio);
+
+ /* start writeback */
+ __swap_writepage(folio, &wbc);
+ folio_put(folio);
+
+ return 0;
+}
+
/*********************************
* shrinker functions
**********************************/
@@ -1419,96 +1506,6 @@ static void shrink_worker(struct work_struct *w)
zswap_pool_put(pool);
}

-/*********************************
-* writeback code
-**********************************/
-/*
- * Attempts to free an entry by adding a folio to the swap cache,
- * decompressing the entry data into the folio, and issuing a
- * bio write to write the folio back to the swap device.
- *
- * This can be thought of as a "resumed writeback" of the folio
- * to the swap device. We are basically resuming the same swap
- * writeback path that was intercepted with the zswap_store()
- * in the first place. After the folio has been decompressed into
- * the swap cache, the compressed version stored by zswap can be
- * freed.
- */
-static int zswap_writeback_entry(struct zswap_entry *entry,
- swp_entry_t swpentry)
-{
- struct zswap_tree *tree;
- struct folio *folio;
- struct mempolicy *mpol;
- bool folio_was_allocated;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_NONE,
- };
-
- /* try to allocate swap cache folio */
- mpol = get_task_policy(current);
- folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
- NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
- if (!folio)
- return -ENOMEM;
-
- /*
- * Found an existing folio, we raced with swapin or concurrent
- * shrinker. We generally writeback cold folios from zswap, and
- * swapin means the folio just became hot, so skip this folio.
- * For unlikely concurrent shrinker case, it will be unlinked
- * and freed when invalidated by the concurrent shrinker anyway.
- */
- if (!folio_was_allocated) {
- folio_put(folio);
- return -EEXIST;
- }
-
- /*
- * folio is locked, and the swapcache is now secured against
- * concurrent swapping to and from the slot. Verify that the
- * swap entry hasn't been invalidated and recycled behind our
- * backs (our zswap_entry reference doesn't prevent that), to
- * avoid overwriting a new swap folio with old compressed data.
- */
- tree = swap_zswap_tree(swpentry);
- spin_lock(&tree->lock);
- if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) {
- spin_unlock(&tree->lock);
- delete_from_swap_cache(folio);
- folio_unlock(folio);
- folio_put(folio);
- return -ENOMEM;
- }
-
- /* Safe to deref entry after the entry is verified above. */
- zswap_entry_get(entry);
- spin_unlock(&tree->lock);
-
- zswap_decompress(entry, &folio->page);
-
- count_vm_event(ZSWPWB);
- if (entry->objcg)
- count_objcg_event(entry->objcg, ZSWPWB);
-
- spin_lock(&tree->lock);
- zswap_invalidate_entry(tree, entry);
- zswap_entry_put(entry);
- spin_unlock(&tree->lock);
-
- /* folio is up to date */
- folio_mark_uptodate(folio);
-
- /* move it to the tail of the inactive list after end_writeback */
- folio_set_reclaim(folio);
-
- /* start writeback */
- __swap_writepage(folio, &wbc);
- folio_put(folio);
-
- return 0;
-}
-
static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
{
unsigned long *page;
--
2.43.0


2024-01-30 01:51:37

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 07/20] mm: zswap: break out zwap_compress()

zswap_store() is long and mixes work at the zswap layer with work at
the backend and compression layer. Move compression & backend work to
zswap_compress(), mirroring zswap_decompress().

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 145 ++++++++++++++++++++++++++++-------------------------
1 file changed, 77 insertions(+), 68 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index bdc9f82fe4b9..f9b9494156ba 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1316,6 +1316,79 @@ static int zswap_enabled_param_set(const char *val,
return ret;
}

+static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
+{
+ struct crypto_acomp_ctx *acomp_ctx;
+ struct scatterlist input, output;
+ unsigned int dlen = PAGE_SIZE;
+ unsigned long handle;
+ struct zpool *zpool;
+ char *buf;
+ gfp_t gfp;
+ int ret;
+ u8 *dst;
+
+ acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+
+ mutex_lock(&acomp_ctx->mutex);
+
+ dst = acomp_ctx->buffer;
+ sg_init_table(&input, 1);
+ sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
+
+ /*
+ * We need PAGE_SIZE * 2 here since there maybe over-compression case,
+ * and hardware-accelerators may won't check the dst buffer size, so
+ * giving the dst buffer with enough length to avoid buffer overflow.
+ */
+ sg_init_one(&output, dst, PAGE_SIZE * 2);
+ acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
+
+ /*
+ * it maybe looks a little bit silly that we send an asynchronous request,
+ * then wait for its completion synchronously. This makes the process look
+ * synchronous in fact.
+ * Theoretically, acomp supports users send multiple acomp requests in one
+ * acomp instance, then get those requests done simultaneously. but in this
+ * case, zswap actually does store and load page by page, there is no
+ * existing method to send the second page before the first page is done
+ * in one thread doing zwap.
+ * but in different threads running on different cpu, we have different
+ * acomp instance, so multiple threads can do (de)compression in parallel.
+ */
+ ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
+ dlen = acomp_ctx->req->dlen;
+ if (ret) {
+ zswap_reject_compress_fail++;
+ goto unlock;
+ }
+
+ zpool = zswap_find_zpool(entry);
+ gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
+ if (zpool_malloc_support_movable(zpool))
+ gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
+ ret = zpool_malloc(zpool, dlen, gfp, &handle);
+ if (ret == -ENOSPC) {
+ zswap_reject_compress_poor++;
+ goto unlock;
+ }
+ if (ret) {
+ zswap_reject_alloc_fail++;
+ goto unlock;
+ }
+
+ buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
+ memcpy(buf, dst, dlen);
+ zpool_unmap_handle(zpool, handle);
+
+ entry->handle = handle;
+ entry->length = dlen;
+
+unlock:
+ mutex_unlock(&acomp_ctx->mutex);
+ return ret == 0;
+}
+
static void zswap_decompress(struct zswap_entry *entry, struct page *page)
{
struct zpool *zpool = zswap_find_zpool(entry);
@@ -1472,18 +1545,11 @@ bool zswap_store(struct folio *folio)
struct page *page = &folio->page;
struct zswap_tree *tree = swap_zswap_tree(swp);
struct zswap_entry *entry, *dupentry;
- struct scatterlist input, output;
- struct crypto_acomp_ctx *acomp_ctx;
struct obj_cgroup *objcg = NULL;
struct mem_cgroup *memcg = NULL;
struct zswap_pool *pool;
- struct zpool *zpool;
- unsigned int dlen = PAGE_SIZE;
- unsigned long handle, value;
- char *buf;
- u8 *src, *dst;
- gfp_t gfp;
- int ret;
+ unsigned long value;
+ u8 *src;

VM_WARN_ON_ONCE(!folio_test_locked(folio));
VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
@@ -1568,65 +1634,10 @@ bool zswap_store(struct folio *folio)
mem_cgroup_put(memcg);
}

- /* compress */
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
-
- mutex_lock(&acomp_ctx->mutex);
-
- dst = acomp_ctx->buffer;
- sg_init_table(&input, 1);
- sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
+ if (!zswap_compress(folio, entry))
+ goto put_pool;

- /*
- * We need PAGE_SIZE * 2 here since there maybe over-compression case,
- * and hardware-accelerators may won't check the dst buffer size, so
- * giving the dst buffer with enough length to avoid buffer overflow.
- */
- sg_init_one(&output, dst, PAGE_SIZE * 2);
- acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
- /*
- * it maybe looks a little bit silly that we send an asynchronous request,
- * then wait for its completion synchronously. This makes the process look
- * synchronous in fact.
- * Theoretically, acomp supports users send multiple acomp requests in one
- * acomp instance, then get those requests done simultaneously. but in this
- * case, zswap actually does store and load page by page, there is no
- * existing method to send the second page before the first page is done
- * in one thread doing zwap.
- * but in different threads running on different cpu, we have different
- * acomp instance, so multiple threads can do (de)compression in parallel.
- */
- ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
- dlen = acomp_ctx->req->dlen;
-
- if (ret) {
- zswap_reject_compress_fail++;
- goto put_dstmem;
- }
-
- /* store */
- zpool = zswap_find_zpool(entry);
- gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
- if (zpool_malloc_support_movable(zpool))
- gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
- ret = zpool_malloc(zpool, dlen, gfp, &handle);
- if (ret == -ENOSPC) {
- zswap_reject_compress_poor++;
- goto put_dstmem;
- }
- if (ret) {
- zswap_reject_alloc_fail++;
- goto put_dstmem;
- }
- buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
- memcpy(buf, dst, dlen);
- zpool_unmap_handle(zpool, handle);
- mutex_unlock(&acomp_ctx->mutex);
-
- /* populate entry */
entry->swpentry = swp;
- entry->handle = handle;
- entry->length = dlen;

insert_entry:
entry->objcg = objcg;
@@ -1663,8 +1674,6 @@ bool zswap_store(struct folio *folio)

return true;

-put_dstmem:
- mutex_unlock(&acomp_ctx->mutex);
put_pool:
zswap_pool_put(entry->pool);
freepage:
--
2.43.0


2024-01-30 01:52:32

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 15/20] mm: zswap: function ordering: move entry sections out of LRU section

This completes consolidation of the LRU section.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 101 ++++++++++++++++++++++++++---------------------------
1 file changed, 49 insertions(+), 52 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 511bfafc1456..756d4d575efe 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -768,58 +768,6 @@ static inline int entry_to_nid(struct zswap_entry *entry)
return page_to_nid(virt_to_page(entry));
}

-void zswap_lruvec_state_init(struct lruvec *lruvec)
-{
- atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
-}
-
-void zswap_folio_swapin(struct folio *folio)
-{
- struct lruvec *lruvec;
-
- if (folio) {
- lruvec = folio_lruvec(folio);
- atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
- }
-}
-
-void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
-{
- struct zswap_pool *pool;
-
- /* lock out zswap pools list modification */
- spin_lock(&zswap_pools_lock);
- list_for_each_entry(pool, &zswap_pools, list) {
- if (pool->next_shrink == memcg)
- pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
- }
- spin_unlock(&zswap_pools_lock);
-}
-
-/*********************************
-* zswap entry functions
-**********************************/
-static struct kmem_cache *zswap_entry_cache;
-
-static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
-{
- struct zswap_entry *entry;
- entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
- if (!entry)
- return NULL;
- entry->refcount = 1;
- RB_CLEAR_NODE(&entry->rbnode);
- return entry;
-}
-
-static void zswap_entry_cache_free(struct zswap_entry *entry)
-{
- kmem_cache_free(zswap_entry_cache, entry);
-}
-
-/*********************************
-* lru functions
-**********************************/
static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry)
{
atomic_long_t *nr_zswap_protected;
@@ -872,6 +820,55 @@ static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry)
rcu_read_unlock();
}

+void zswap_lruvec_state_init(struct lruvec *lruvec)
+{
+ atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
+}
+
+void zswap_folio_swapin(struct folio *folio)
+{
+ struct lruvec *lruvec;
+
+ if (folio) {
+ lruvec = folio_lruvec(folio);
+ atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
+ }
+}
+
+void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
+{
+ struct zswap_pool *pool;
+
+ /* lock out zswap pools list modification */
+ spin_lock(&zswap_pools_lock);
+ list_for_each_entry(pool, &zswap_pools, list) {
+ if (pool->next_shrink == memcg)
+ pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
+ }
+ spin_unlock(&zswap_pools_lock);
+}
+
+/*********************************
+* zswap entry functions
+**********************************/
+static struct kmem_cache *zswap_entry_cache;
+
+static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
+{
+ struct zswap_entry *entry;
+ entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
+ if (!entry)
+ return NULL;
+ entry->refcount = 1;
+ RB_CLEAR_NODE(&entry->rbnode);
+ return entry;
+}
+
+static void zswap_entry_cache_free(struct zswap_entry *entry)
+{
+ kmem_cache_free(zswap_entry_cache, entry);
+}
+
/*********************************
* rbtree functions
**********************************/
--
2.43.0


2024-01-30 01:52:59

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 09/20] mm: zswap: simplify zswap_invalidate()

The branching is awkward and duplicates code. The comment about
writeback is also misleading: yes, the entry might have been written
back. Or it might have never been stored in zswap to begin with due to
a rejection - zswap_invalidate() is called on all exiting swap entries.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index cde309c539b3..082d076a758d 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1739,15 +1739,10 @@ void zswap_invalidate(int type, pgoff_t offset)
struct zswap_tree *tree = swap_zswap_tree(swp_entry(type, offset));
struct zswap_entry *entry;

- /* find */
spin_lock(&tree->lock);
entry = zswap_rb_search(&tree->rbroot, offset);
- if (!entry) {
- /* entry was written back */
- spin_unlock(&tree->lock);
- return;
- }
- zswap_invalidate_entry(tree, entry);
+ if (entry)
+ zswap_invalidate_entry(tree, entry);
spin_unlock(&tree->lock);
}

--
2.43.0


2024-01-30 01:54:16

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 18/20] mm: zswap: function ordering: per-cpu compression infra

The per-cpu compression init/exit callbacks are awkwardly in the
middle of the shrinker code. Move them up to the compression section.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 135 ++++++++++++++++++++++++++---------------------------
1 file changed, 66 insertions(+), 69 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 17356b2e35c2..acd7dcd1e0f2 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -995,6 +995,72 @@ static void zswap_invalidate_entry(struct zswap_tree *tree,
/*********************************
* compressed storage functions
**********************************/
+static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
+{
+ struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
+ struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
+ struct crypto_acomp *acomp;
+ struct acomp_req *req;
+ int ret;
+
+ mutex_init(&acomp_ctx->mutex);
+
+ acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+ if (!acomp_ctx->buffer)
+ return -ENOMEM;
+
+ acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
+ if (IS_ERR(acomp)) {
+ pr_err("could not alloc crypto acomp %s : %ld\n",
+ pool->tfm_name, PTR_ERR(acomp));
+ ret = PTR_ERR(acomp);
+ goto acomp_fail;
+ }
+ acomp_ctx->acomp = acomp;
+
+ req = acomp_request_alloc(acomp_ctx->acomp);
+ if (!req) {
+ pr_err("could not alloc crypto acomp_request %s\n",
+ pool->tfm_name);
+ ret = -ENOMEM;
+ goto req_fail;
+ }
+ acomp_ctx->req = req;
+
+ crypto_init_wait(&acomp_ctx->wait);
+ /*
+ * if the backend of acomp is async zip, crypto_req_done() will wakeup
+ * crypto_wait_req(); if the backend of acomp is scomp, the callback
+ * won't be called, crypto_wait_req() will return without blocking.
+ */
+ acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &acomp_ctx->wait);
+
+ return 0;
+
+req_fail:
+ crypto_free_acomp(acomp_ctx->acomp);
+acomp_fail:
+ kfree(acomp_ctx->buffer);
+ return ret;
+}
+
+static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
+{
+ struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
+ struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
+
+ if (!IS_ERR_OR_NULL(acomp_ctx)) {
+ if (!IS_ERR_OR_NULL(acomp_ctx->req))
+ acomp_request_free(acomp_ctx->req);
+ if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
+ crypto_free_acomp(acomp_ctx->acomp);
+ kfree(acomp_ctx->buffer);
+ }
+
+ return 0;
+}
+
static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
{
struct crypto_acomp_ctx *acomp_ctx;
@@ -1201,75 +1267,6 @@ static void zswap_alloc_shrinker(struct zswap_pool *pool)
pool->shrinker->seeks = DEFAULT_SEEKS;
}

-/*********************************
-* per-cpu code
-**********************************/
-static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
-{
- struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
- struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
- struct crypto_acomp *acomp;
- struct acomp_req *req;
- int ret;
-
- mutex_init(&acomp_ctx->mutex);
-
- acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- if (!acomp_ctx->buffer)
- return -ENOMEM;
-
- acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
- if (IS_ERR(acomp)) {
- pr_err("could not alloc crypto acomp %s : %ld\n",
- pool->tfm_name, PTR_ERR(acomp));
- ret = PTR_ERR(acomp);
- goto acomp_fail;
- }
- acomp_ctx->acomp = acomp;
-
- req = acomp_request_alloc(acomp_ctx->acomp);
- if (!req) {
- pr_err("could not alloc crypto acomp_request %s\n",
- pool->tfm_name);
- ret = -ENOMEM;
- goto req_fail;
- }
- acomp_ctx->req = req;
-
- crypto_init_wait(&acomp_ctx->wait);
- /*
- * if the backend of acomp is async zip, crypto_req_done() will wakeup
- * crypto_wait_req(); if the backend of acomp is scomp, the callback
- * won't be called, crypto_wait_req() will return without blocking.
- */
- acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &acomp_ctx->wait);
-
- return 0;
-
-req_fail:
- crypto_free_acomp(acomp_ctx->acomp);
-acomp_fail:
- kfree(acomp_ctx->buffer);
- return ret;
-}
-
-static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
-{
- struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
- struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
-
- if (!IS_ERR_OR_NULL(acomp_ctx)) {
- if (!IS_ERR_OR_NULL(acomp_ctx->req))
- acomp_request_free(acomp_ctx->req);
- if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
- crypto_free_acomp(acomp_ctx->acomp);
- kfree(acomp_ctx->buffer);
- }
-
- return 0;
-}
-
static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
spinlock_t *lock, void *arg)
{
--
2.43.0


2024-01-30 01:54:53

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 20/20] mm: zswap: function ordering: shrink_memcg_cb

shrink_memcg_cb() is called by the shrinker and is based on
zswap_writeback_entry(). Move it in between. Save one fwd decl.

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 125 ++++++++++++++++++++++++++---------------------------
1 file changed, 61 insertions(+), 64 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 0cb3437d47eb..4aea03285532 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1254,7 +1254,67 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
* shrinker functions
**********************************/
static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
- spinlock_t *lock, void *arg);
+ spinlock_t *lock, void *arg)
+{
+ struct zswap_entry *entry = container_of(item, struct zswap_entry, lru);
+ bool *encountered_page_in_swapcache = (bool *)arg;
+ swp_entry_t swpentry;
+ enum lru_status ret = LRU_REMOVED_RETRY;
+ int writeback_result;
+
+ /*
+ * Rotate the entry to the tail before unlocking the LRU,
+ * so that in case of an invalidation race concurrent
+ * reclaimers don't waste their time on it.
+ *
+ * If writeback succeeds, or failure is due to the entry
+ * being invalidated by the swap subsystem, the invalidation
+ * will unlink and free it.
+ *
+ * Temporary failures, where the same entry should be tried
+ * again immediately, almost never happen for this shrinker.
+ * We don't do any trylocking; -ENOMEM comes closest,
+ * but that's extremely rare and doesn't happen spuriously
+ * either. Don't bother distinguishing this case.
+ *
+ * But since they do exist in theory, the entry cannot just
+ * be unlinked, or we could leak it. Hence, rotate.
+ */
+ list_move_tail(item, &l->list);
+
+ /*
+ * Once the lru lock is dropped, the entry might get freed. The
+ * swpentry is copied to the stack, and entry isn't deref'd again
+ * until the entry is verified to still be alive in the tree.
+ */
+ swpentry = entry->swpentry;
+
+ /*
+ * It's safe to drop the lock here because we return either
+ * LRU_REMOVED_RETRY or LRU_RETRY.
+ */
+ spin_unlock(lock);
+
+ writeback_result = zswap_writeback_entry(entry, swpentry);
+
+ if (writeback_result) {
+ zswap_reject_reclaim_fail++;
+ ret = LRU_RETRY;
+
+ /*
+ * Encountering a page already in swap cache is a sign that we are shrinking
+ * into the warmer region. We should terminate shrinking (if we're in the dynamic
+ * shrinker context).
+ */
+ if (writeback_result == -EEXIST && encountered_page_in_swapcache)
+ *encountered_page_in_swapcache = true;
+ } else {
+ zswap_written_back_pages++;
+ }
+
+ spin_lock(lock);
+ return ret;
+}

static unsigned long zswap_shrinker_scan(struct shrinker *shrinker,
struct shrink_control *sc)
@@ -1354,69 +1414,6 @@ static void zswap_alloc_shrinker(struct zswap_pool *pool)
pool->shrinker->seeks = DEFAULT_SEEKS;
}

-static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
- spinlock_t *lock, void *arg)
-{
- struct zswap_entry *entry = container_of(item, struct zswap_entry, lru);
- bool *encountered_page_in_swapcache = (bool *)arg;
- swp_entry_t swpentry;
- enum lru_status ret = LRU_REMOVED_RETRY;
- int writeback_result;
-
- /*
- * Rotate the entry to the tail before unlocking the LRU,
- * so that in case of an invalidation race concurrent
- * reclaimers don't waste their time on it.
- *
- * If writeback succeeds, or failure is due to the entry
- * being invalidated by the swap subsystem, the invalidation
- * will unlink and free it.
- *
- * Temporary failures, where the same entry should be tried
- * again immediately, almost never happen for this shrinker.
- * We don't do any trylocking; -ENOMEM comes closest,
- * but that's extremely rare and doesn't happen spuriously
- * either. Don't bother distinguishing this case.
- *
- * But since they do exist in theory, the entry cannot just
- * be unlinked, or we could leak it. Hence, rotate.
- */
- list_move_tail(item, &l->list);
-
- /*
- * Once the lru lock is dropped, the entry might get freed. The
- * swpentry is copied to the stack, and entry isn't deref'd again
- * until the entry is verified to still be alive in the tree.
- */
- swpentry = entry->swpentry;
-
- /*
- * It's safe to drop the lock here because we return either
- * LRU_REMOVED_RETRY or LRU_RETRY.
- */
- spin_unlock(lock);
-
- writeback_result = zswap_writeback_entry(entry, swpentry);
-
- if (writeback_result) {
- zswap_reject_reclaim_fail++;
- ret = LRU_RETRY;
-
- /*
- * Encountering a page already in swap cache is a sign that we are shrinking
- * into the warmer region. We should terminate shrinking (if we're in the dynamic
- * shrinker context).
- */
- if (writeback_result == -EEXIST && encountered_page_in_swapcache)
- *encountered_page_in_swapcache = true;
- } else {
- zswap_written_back_pages++;
- }
-
- spin_lock(lock);
- return ret;
-}
-
static int shrink_memcg(struct mem_cgroup *memcg)
{
struct zswap_pool *pool;
--
2.43.0


2024-01-30 02:00:27

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 06/20] mm: zswap: rename __zswap_load() to zswap_decompress()

Signed-off-by: Johannes Weiner <[email protected]>
---
mm/zswap.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 7a7e8da2b4f8..bdc9f82fe4b9 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1316,7 +1316,7 @@ static int zswap_enabled_param_set(const char *val,
return ret;
}

-static void __zswap_load(struct zswap_entry *entry, struct page *page)
+static void zswap_decompress(struct zswap_entry *entry, struct page *page)
{
struct zpool *zpool = zswap_find_zpool(entry);
struct scatterlist input, output;
@@ -1411,7 +1411,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
zswap_entry_get(entry);
spin_unlock(&tree->lock);

- __zswap_load(entry, &folio->page);
+ zswap_decompress(entry, &folio->page);

count_vm_event(ZSWPWB);
if (entry->objcg)
@@ -1702,7 +1702,7 @@ bool zswap_load(struct folio *folio)
spin_unlock(&tree->lock);

if (entry->length)
- __zswap_load(entry, page);
+ zswap_decompress(entry, page);
else {
dst = kmap_local_page(page);
zswap_fill_page(dst, entry->value);
--
2.43.0


2024-01-30 03:13:34

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCH 01/20] mm: zswap: rename zswap_free_entry to zswap_entry_free

On 2024/1/30 09:36, Johannes Weiner wrote:
> There is a zswap_entry_ namespace with multiple functions already.

Another is zswap_invalidate_entry(), maybe zswap_entry_invalidate is better.

>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/zswap.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 7f88b3a77e4a..173f2e6657de 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -520,7 +520,7 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> * Carries out the common pattern of freeing and entry's zpool allocation,
> * freeing the entry itself, and decrementing the number of stored pages.
> */
> -static void zswap_free_entry(struct zswap_entry *entry)
> +static void zswap_entry_free(struct zswap_entry *entry)
> {
> if (!entry->length)
> atomic_dec(&zswap_same_filled_pages);
> @@ -555,7 +555,7 @@ static void zswap_entry_put(struct zswap_entry *entry)
> WARN_ON_ONCE(refcount < 0);
> if (refcount == 0) {
> WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
> - zswap_free_entry(entry);
> + zswap_entry_free(entry);
> }
> }
>

2024-01-30 03:20:55

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCH 06/20] mm: zswap: rename __zswap_load() to zswap_decompress()

On 2024/1/30 09:36, Johannes Weiner wrote:
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Chengming Zhou <[email protected]>

> ---
> mm/zswap.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 7a7e8da2b4f8..bdc9f82fe4b9 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1316,7 +1316,7 @@ static int zswap_enabled_param_set(const char *val,
> return ret;
> }
>
> -static void __zswap_load(struct zswap_entry *entry, struct page *page)
> +static void zswap_decompress(struct zswap_entry *entry, struct page *page)
> {
> struct zpool *zpool = zswap_find_zpool(entry);
> struct scatterlist input, output;
> @@ -1411,7 +1411,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
> zswap_entry_get(entry);
> spin_unlock(&tree->lock);
>
> - __zswap_load(entry, &folio->page);
> + zswap_decompress(entry, &folio->page);
>
> count_vm_event(ZSWPWB);
> if (entry->objcg)
> @@ -1702,7 +1702,7 @@ bool zswap_load(struct folio *folio)
> spin_unlock(&tree->lock);
>
> if (entry->length)
> - __zswap_load(entry, page);
> + zswap_decompress(entry, page);
> else {
> dst = kmap_local_page(page);
> zswap_fill_page(dst, entry->value);

2024-01-30 03:21:47

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH 01/20] mm: zswap: rename zswap_free_entry to zswap_entry_free

On Tue, Jan 30, 2024 at 11:13:16AM +0800, Chengming Zhou wrote:
> On 2024/1/30 09:36, Johannes Weiner wrote:
> > There is a zswap_entry_ namespace with multiple functions already.
>
> Another is zswap_invalidate_entry(), maybe zswap_entry_invalidate is better.

You're right.

I originally had a patch for that, but then dropped it because it is
sort of also a tree operation and it looked a bit weird to me.

But I could be persuaded to include it again, no strong preferences
either way.

Thanks

2024-01-30 03:23:56

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCH 07/20] mm: zswap: break out zwap_compress()

On 2024/1/30 09:36, Johannes Weiner wrote:
> zswap_store() is long and mixes work at the zswap layer with work at
> the backend and compression layer. Move compression & backend work to
> zswap_compress(), mirroring zswap_decompress().
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Chengming Zhou <[email protected]>

> ---
> mm/zswap.c | 145 ++++++++++++++++++++++++++++-------------------------
> 1 file changed, 77 insertions(+), 68 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index bdc9f82fe4b9..f9b9494156ba 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1316,6 +1316,79 @@ static int zswap_enabled_param_set(const char *val,
> return ret;
> }
>
> +static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> +{
> + struct crypto_acomp_ctx *acomp_ctx;
> + struct scatterlist input, output;
> + unsigned int dlen = PAGE_SIZE;
> + unsigned long handle;
> + struct zpool *zpool;
> + char *buf;
> + gfp_t gfp;
> + int ret;
> + u8 *dst;
> +
> + acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
> +
> + mutex_lock(&acomp_ctx->mutex);
> +
> + dst = acomp_ctx->buffer;
> + sg_init_table(&input, 1);
> + sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
> +
> + /*
> + * We need PAGE_SIZE * 2 here since there maybe over-compression case,
> + * and hardware-accelerators may won't check the dst buffer size, so
> + * giving the dst buffer with enough length to avoid buffer overflow.
> + */
> + sg_init_one(&output, dst, PAGE_SIZE * 2);
> + acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
> +
> + /*
> + * it maybe looks a little bit silly that we send an asynchronous request,
> + * then wait for its completion synchronously. This makes the process look
> + * synchronous in fact.
> + * Theoretically, acomp supports users send multiple acomp requests in one
> + * acomp instance, then get those requests done simultaneously. but in this
> + * case, zswap actually does store and load page by page, there is no
> + * existing method to send the second page before the first page is done
> + * in one thread doing zwap.
> + * but in different threads running on different cpu, we have different
> + * acomp instance, so multiple threads can do (de)compression in parallel.
> + */
> + ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
> + dlen = acomp_ctx->req->dlen;
> + if (ret) {
> + zswap_reject_compress_fail++;
> + goto unlock;
> + }
> +
> + zpool = zswap_find_zpool(entry);
> + gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> + if (zpool_malloc_support_movable(zpool))
> + gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> + ret = zpool_malloc(zpool, dlen, gfp, &handle);
> + if (ret == -ENOSPC) {
> + zswap_reject_compress_poor++;
> + goto unlock;
> + }
> + if (ret) {
> + zswap_reject_alloc_fail++;
> + goto unlock;
> + }
> +
> + buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
> + memcpy(buf, dst, dlen);
> + zpool_unmap_handle(zpool, handle);
> +
> + entry->handle = handle;
> + entry->length = dlen;
> +
> +unlock:
> + mutex_unlock(&acomp_ctx->mutex);
> + return ret == 0;
> +}
> +
> static void zswap_decompress(struct zswap_entry *entry, struct page *page)
> {
> struct zpool *zpool = zswap_find_zpool(entry);
> @@ -1472,18 +1545,11 @@ bool zswap_store(struct folio *folio)
> struct page *page = &folio->page;
> struct zswap_tree *tree = swap_zswap_tree(swp);
> struct zswap_entry *entry, *dupentry;
> - struct scatterlist input, output;
> - struct crypto_acomp_ctx *acomp_ctx;
> struct obj_cgroup *objcg = NULL;
> struct mem_cgroup *memcg = NULL;
> struct zswap_pool *pool;
> - struct zpool *zpool;
> - unsigned int dlen = PAGE_SIZE;
> - unsigned long handle, value;
> - char *buf;
> - u8 *src, *dst;
> - gfp_t gfp;
> - int ret;
> + unsigned long value;
> + u8 *src;
>
> VM_WARN_ON_ONCE(!folio_test_locked(folio));
> VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> @@ -1568,65 +1634,10 @@ bool zswap_store(struct folio *folio)
> mem_cgroup_put(memcg);
> }
>
> - /* compress */
> - acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
> -
> - mutex_lock(&acomp_ctx->mutex);
> -
> - dst = acomp_ctx->buffer;
> - sg_init_table(&input, 1);
> - sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
> + if (!zswap_compress(folio, entry))
> + goto put_pool;
>
> - /*
> - * We need PAGE_SIZE * 2 here since there maybe over-compression case,
> - * and hardware-accelerators may won't check the dst buffer size, so
> - * giving the dst buffer with enough length to avoid buffer overflow.
> - */
> - sg_init_one(&output, dst, PAGE_SIZE * 2);
> - acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
> - /*
> - * it maybe looks a little bit silly that we send an asynchronous request,
> - * then wait for its completion synchronously. This makes the process look
> - * synchronous in fact.
> - * Theoretically, acomp supports users send multiple acomp requests in one
> - * acomp instance, then get those requests done simultaneously. but in this
> - * case, zswap actually does store and load page by page, there is no
> - * existing method to send the second page before the first page is done
> - * in one thread doing zwap.
> - * but in different threads running on different cpu, we have different
> - * acomp instance, so multiple threads can do (de)compression in parallel.
> - */
> - ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
> - dlen = acomp_ctx->req->dlen;
> -
> - if (ret) {
> - zswap_reject_compress_fail++;
> - goto put_dstmem;
> - }
> -
> - /* store */
> - zpool = zswap_find_zpool(entry);
> - gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> - if (zpool_malloc_support_movable(zpool))
> - gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> - ret = zpool_malloc(zpool, dlen, gfp, &handle);
> - if (ret == -ENOSPC) {
> - zswap_reject_compress_poor++;
> - goto put_dstmem;
> - }
> - if (ret) {
> - zswap_reject_alloc_fail++;
> - goto put_dstmem;
> - }
> - buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
> - memcpy(buf, dst, dlen);
> - zpool_unmap_handle(zpool, handle);
> - mutex_unlock(&acomp_ctx->mutex);
> -
> - /* populate entry */
> entry->swpentry = swp;
> - entry->handle = handle;
> - entry->length = dlen;
>
> insert_entry:
> entry->objcg = objcg;
> @@ -1663,8 +1674,6 @@ bool zswap_store(struct folio *folio)
>
> return true;
>
> -put_dstmem:
> - mutex_unlock(&acomp_ctx->mutex);
> put_pool:
> zswap_pool_put(entry->pool);
> freepage:

2024-01-30 03:32:00

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCH 01/20] mm: zswap: rename zswap_free_entry to zswap_entry_free

On 2024/1/30 11:19, Johannes Weiner wrote:
> On Tue, Jan 30, 2024 at 11:13:16AM +0800, Chengming Zhou wrote:
>> On 2024/1/30 09:36, Johannes Weiner wrote:
>>> There is a zswap_entry_ namespace with multiple functions already.
>>
>> Another is zswap_invalidate_entry(), maybe zswap_entry_invalidate is better.
>
> You're right.
>
> I originally had a patch for that, but then dropped it because it is
> sort of also a tree operation and it looked a bit weird to me.

Right.

>
> But I could be persuaded to include it again, no strong preferences
> either way.

Never mind.

2024-01-30 03:35:24

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCH 08/20] mm: zswap: further cleanup zswap_store()

On 2024/1/30 09:36, Johannes Weiner wrote:
> - Remove dupentry, reusing entry works just fine.
> - Rename pool to shrink_pool, as this one actually is confusing.
> - Remove page, use folio_nid() and kmap_local_folio() directly.
> - Set entry->swpentry in a common path.
> - Move value and src to local scope of use.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Chengming Zhou <[email protected]>

> ---
> mm/zswap.c | 28 +++++++++++++---------------
> 1 file changed, 13 insertions(+), 15 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index f9b9494156ba..cde309c539b3 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1542,14 +1542,11 @@ bool zswap_store(struct folio *folio)
> {
> swp_entry_t swp = folio->swap;
> pgoff_t offset = swp_offset(swp);
> - struct page *page = &folio->page;
> struct zswap_tree *tree = swap_zswap_tree(swp);
> struct zswap_entry *entry, *dupentry;
> struct obj_cgroup *objcg = NULL;
> struct mem_cgroup *memcg = NULL;
> - struct zswap_pool *pool;
> - unsigned long value;
> - u8 *src;
> + struct zswap_pool *shrink_pool;
>
> VM_WARN_ON_ONCE(!folio_test_locked(folio));
> VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> @@ -1567,10 +1564,10 @@ bool zswap_store(struct folio *folio)
> * the tree, and it might be written back overriding the new data.
> */
> spin_lock(&tree->lock);
> - dupentry = zswap_rb_search(&tree->rbroot, offset);
> - if (dupentry) {
> + entry = zswap_rb_search(&tree->rbroot, offset);
> + if (entry) {
> + zswap_invalidate_entry(tree, entry);
> zswap_duplicate_entry++;
> - zswap_invalidate_entry(tree, dupentry);
> }
> spin_unlock(&tree->lock);
> objcg = get_obj_cgroup_from_folio(folio);
> @@ -1598,17 +1595,19 @@ bool zswap_store(struct folio *folio)
> }
>
> /* allocate entry */
> - entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> + entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
> if (!entry) {
> zswap_reject_kmemcache_fail++;
> goto reject;
> }
>
> if (zswap_same_filled_pages_enabled) {
> - src = kmap_local_page(page);
> + unsigned long value;
> + u8 *src;
> +
> + src = kmap_local_folio(folio, 0);
> if (zswap_is_page_same_filled(src, &value)) {
> kunmap_local(src);
> - entry->swpentry = swp;
> entry->length = 0;
> entry->value = value;
> atomic_inc(&zswap_same_filled_pages);
> @@ -1637,9 +1636,8 @@ bool zswap_store(struct folio *folio)
> if (!zswap_compress(folio, entry))
> goto put_pool;
>
> - entry->swpentry = swp;
> -
> insert_entry:
> + entry->swpentry = swp;
> entry->objcg = objcg;
> if (objcg) {
> obj_cgroup_charge_zswap(objcg, entry->length);
> @@ -1684,9 +1682,9 @@ bool zswap_store(struct folio *folio)
> return false;
>
> shrink:
> - pool = zswap_pool_last_get();
> - if (pool && !queue_work(shrink_wq, &pool->shrink_work))
> - zswap_pool_put(pool);
> + shrink_pool = zswap_pool_last_get();
> + if (shrink_pool && !queue_work(shrink_wq, &shrink_pool->shrink_work))
> + zswap_pool_put(shrink_pool);
> goto reject;
> }
>

2024-01-30 03:36:06

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCH 09/20] mm: zswap: simplify zswap_invalidate()

On 2024/1/30 09:36, Johannes Weiner wrote:
> The branching is awkward and duplicates code. The comment about
> writeback is also misleading: yes, the entry might have been written
> back. Or it might have never been stored in zswap to begin with due to
> a rejection - zswap_invalidate() is called on all exiting swap entries.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Chengming Zhou <[email protected]>

> ---
> mm/zswap.c | 9 ++-------
> 1 file changed, 2 insertions(+), 7 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index cde309c539b3..082d076a758d 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1739,15 +1739,10 @@ void zswap_invalidate(int type, pgoff_t offset)
> struct zswap_tree *tree = swap_zswap_tree(swp_entry(type, offset));
> struct zswap_entry *entry;
>
> - /* find */
> spin_lock(&tree->lock);
> entry = zswap_rb_search(&tree->rbroot, offset);
> - if (!entry) {
> - /* entry was written back */
> - spin_unlock(&tree->lock);
> - return;
> - }
> - zswap_invalidate_entry(tree, entry);
> + if (entry)
> + zswap_invalidate_entry(tree, entry);
> spin_unlock(&tree->lock);
> }
>

2024-01-30 03:37:01

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCH 01/20] mm: zswap: rename zswap_free_entry to zswap_entry_free

On 2024/1/30 09:36, Johannes Weiner wrote:
> There is a zswap_entry_ namespace with multiple functions already.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Chengming Zhou <[email protected]>

> ---
> mm/zswap.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 7f88b3a77e4a..173f2e6657de 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -520,7 +520,7 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> * Carries out the common pattern of freeing and entry's zpool allocation,
> * freeing the entry itself, and decrementing the number of stored pages.
> */
> -static void zswap_free_entry(struct zswap_entry *entry)
> +static void zswap_entry_free(struct zswap_entry *entry)
> {
> if (!entry->length)
> atomic_dec(&zswap_same_filled_pages);
> @@ -555,7 +555,7 @@ static void zswap_entry_put(struct zswap_entry *entry)
> WARN_ON_ONCE(refcount < 0);
> if (refcount == 0) {
> WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
> - zswap_free_entry(entry);
> + zswap_entry_free(entry);
> }
> }
>

2024-01-30 03:37:55

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCH 02/20] mm: zswap: inline and remove zswap_entry_find_get()

On 2024/1/30 09:36, Johannes Weiner wrote:
> There is only one caller and the function is trivial. Inline it.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Chengming Zhou <[email protected]>

> ---
> mm/zswap.c | 17 ++---------------
> 1 file changed, 2 insertions(+), 15 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 173f2e6657de..cf864aaa214d 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -559,19 +559,6 @@ static void zswap_entry_put(struct zswap_entry *entry)
> }
> }
>
> -/* caller must hold the tree lock */
> -static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
> - pgoff_t offset)
> -{
> - struct zswap_entry *entry;
> -
> - entry = zswap_rb_search(root, offset);
> - if (entry)
> - zswap_entry_get(entry);
> -
> - return entry;
> -}
> -
> /*********************************
> * shrinker functions
> **********************************/
> @@ -1708,13 +1695,13 @@ bool zswap_load(struct folio *folio)
>
> VM_WARN_ON_ONCE(!folio_test_locked(folio));
>
> - /* find */
> spin_lock(&tree->lock);
> - entry = zswap_entry_find_get(&tree->rbroot, offset);
> + entry = zswap_rb_search(&tree->rbroot, offset);
> if (!entry) {
> spin_unlock(&tree->lock);
> return false;
> }
> + zswap_entry_get(entry);
> spin_unlock(&tree->lock);
>
> if (entry->length)

2024-01-30 03:38:46

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCH 03/20] mm: zswap: move zswap_invalidate_entry() to related functions

On 2024/1/30 09:36, Johannes Weiner wrote:
> Move it up to the other tree and refcounting functions.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Chengming Zhou <[email protected]>

> ---
> mm/zswap.c | 24 ++++++++++++------------
> 1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index cf864aaa214d..9f05282efe3c 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -559,6 +559,18 @@ static void zswap_entry_put(struct zswap_entry *entry)
> }
> }
>
> +/*
> + * If the entry is still valid in the tree, drop the initial ref and remove it
> + * from the tree. This function must be called with an additional ref held,
> + * otherwise it may race with another invalidation freeing the entry.
> + */
> +static void zswap_invalidate_entry(struct zswap_tree *tree,
> + struct zswap_entry *entry)
> +{
> + if (zswap_rb_erase(&tree->rbroot, entry))
> + zswap_entry_put(entry);
> +}
> +
> /*********************************
> * shrinker functions
> **********************************/
> @@ -809,18 +821,6 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> return NULL;
> }
>
> -/*
> - * If the entry is still valid in the tree, drop the initial ref and remove it
> - * from the tree. This function must be called with an additional ref held,
> - * otherwise it may race with another invalidation freeing the entry.
> - */
> -static void zswap_invalidate_entry(struct zswap_tree *tree,
> - struct zswap_entry *entry)
> -{
> - if (zswap_rb_erase(&tree->rbroot, entry))
> - zswap_entry_put(entry);
> -}
> -
> static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
> spinlock_t *lock, void *arg)
> {

2024-01-30 03:39:37

by Chengming Zhou

[permalink] [raw]
Subject: Re: [External] [PATCH 04/20] mm: zswap: warn when referencing a dead entry

On 2024/1/30 09:36, Johannes Weiner wrote:
> Put a standard sanity check on zswap_entry_get() for UAF scenario.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Chengming Zhou <[email protected]>

> ---
> mm/zswap.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 9f05282efe3c..0c6adaf2fdb6 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -542,6 +542,7 @@ static void zswap_entry_free(struct zswap_entry *entry)
> /* caller must hold the tree lock */
> static void zswap_entry_get(struct zswap_entry *entry)
> {
> + WARN_ON_ONCE(!entry->refcount);
> entry->refcount++;
> }
>

2024-01-30 03:42:53

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCH 05/20] mm: zswap: clean up zswap_entry_put()

On 2024/1/30 09:36, Johannes Weiner wrote:
> Remove stale comment and unnecessary local variable.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Chengming Zhou <[email protected]>

> ---
> mm/zswap.c | 10 +++-------
> 1 file changed, 3 insertions(+), 7 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 0c6adaf2fdb6..7a7e8da2b4f8 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -546,15 +546,11 @@ static void zswap_entry_get(struct zswap_entry *entry)
> entry->refcount++;
> }
>
> -/* caller must hold the tree lock
> -* remove from the tree and free it, if nobody reference the entry
> -*/
> +/* caller must hold the tree lock */
> static void zswap_entry_put(struct zswap_entry *entry)
> {
> - int refcount = --entry->refcount;
> -
> - WARN_ON_ONCE(refcount < 0);
> - if (refcount == 0) {
> + WARN_ON_ONCE(!entry->refcount);
> + if (--entry->refcount == 0) {
> WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
> zswap_entry_free(entry);
> }

2024-01-30 07:52:04

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 05/20] mm: zswap: clean up zswap_entry_put()

On Mon, Jan 29, 2024 at 08:36:41PM -0500, Johannes Weiner wrote:
> Remove stale comment and unnecessary local variable.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/zswap.c | 10 +++-------
> 1 file changed, 3 insertions(+), 7 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 0c6adaf2fdb6..7a7e8da2b4f8 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -546,15 +546,11 @@ static void zswap_entry_get(struct zswap_entry *entry)
> entry->refcount++;
> }
>
> -/* caller must hold the tree lock
> -* remove from the tree and free it, if nobody reference the entry
> -*/
> +/* caller must hold the tree lock */

We should replace all those "caller must hold the tree lock" comments
with lockdep_assert_held() or assert_spin_locked() or something.

I can send follow up patches on top if you don't want to resend this
series.

> static void zswap_entry_put(struct zswap_entry *entry)
> {
> - int refcount = --entry->refcount;
> -
> - WARN_ON_ONCE(refcount < 0);
> - if (refcount == 0) {
> + WARN_ON_ONCE(!entry->refcount);
> + if (--entry->refcount == 0) {
> WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
> zswap_entry_free(entry);
> }
> --
> 2.43.0
>

2024-01-30 08:09:21

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 02/20] mm: zswap: inline and remove zswap_entry_find_get()

On Mon, Jan 29, 2024 at 08:36:38PM -0500, Johannes Weiner wrote:
> There is only one caller and the function is trivial. Inline it.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Yosry Ahmed <[email protected]>

2024-01-30 08:10:04

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 03/20] mm: zswap: move zswap_invalidate_entry() to related functions

On Mon, Jan 29, 2024 at 08:36:39PM -0500, Johannes Weiner wrote:
> Move it up to the other tree and refcounting functions.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Yosry Ahmed <[email protected]>

2024-01-30 08:11:35

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 06/20] mm: zswap: rename __zswap_load() to zswap_decompress()

On Mon, Jan 29, 2024 at 08:36:42PM -0500, Johannes Weiner wrote:
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Yosry Ahmed <[email protected]>

2024-01-30 08:11:49

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 07/20] mm: zswap: break out zwap_compress()

On Mon, Jan 29, 2024 at 08:36:43PM -0500, Johannes Weiner wrote:
> zswap_store() is long and mixes work at the zswap layer with work at
> the backend and compression layer. Move compression & backend work to
> zswap_compress(), mirroring zswap_decompress().
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Yosry Ahmed <[email protected]>

2024-01-30 08:12:35

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 08/20] mm: zswap: further cleanup zswap_store()

On Mon, Jan 29, 2024 at 08:36:44PM -0500, Johannes Weiner wrote:
> - Remove dupentry, reusing entry works just fine.
> - Rename pool to shrink_pool, as this one actually is confusing.
> - Remove page, use folio_nid() and kmap_local_folio() directly.
> - Set entry->swpentry in a common path.
> - Move value and src to local scope of use.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Yosry Ahmed <[email protected]>

2024-01-30 08:12:49

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 09/20] mm: zswap: simplify zswap_invalidate()

On Mon, Jan 29, 2024 at 08:36:45PM -0500, Johannes Weiner wrote:
> The branching is awkward and duplicates code. The comment about
> writeback is also misleading: yes, the entry might have been written
> back. Or it might have never been stored in zswap to begin with due to
> a rejection - zswap_invalidate() is called on all exiting swap entries.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Yosry Ahmed <[email protected]>

2024-01-30 08:18:46

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 01/20] mm: zswap: rename zswap_free_entry to zswap_entry_free

On Mon, Jan 29, 2024 at 08:36:37PM -0500, Johannes Weiner wrote:
> There is a zswap_entry_ namespace with multiple functions already.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Yosry Ahmed <[email protected]>

Thanks.

2024-01-30 08:19:03

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 05/20] mm: zswap: clean up zswap_entry_put()

On Mon, Jan 29, 2024 at 08:36:41PM -0500, Johannes Weiner wrote:
> Remove stale comment and unnecessary local variable.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Yosry Ahmed <[email protected]>

2024-01-30 08:19:51

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 04/20] mm: zswap: warn when referencing a dead entry

On Mon, Jan 29, 2024 at 08:36:40PM -0500, Johannes Weiner wrote:
> Put a standard sanity check on zswap_entry_get() for UAF scenario.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Yosry Ahmed <[email protected]>

2024-01-30 08:24:06

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 00/20] mm: zswap: cleanups

Hey Johannes,

On Mon, Jan 29, 2024 at 08:36:36PM -0500, Johannes Weiner wrote:
> Cleanups and maintenance items that accumulated while reviewing zswap
> patches. Based on akpm/mm-unstable + the UAF fix I sent just now.

Patches 1 to 9 LGTM, thanks for the great cleanups!

I am less excited about patches 10 to 20 though. Don't get me wrong, I
am all of logically ordering the code. However, it feels like in this
case, we will introduce unnecessary layers in the git history in a lot
of places where I find myself checking the history regularly.
Personally, I tend to jump around the file using vim search or using a
cscope extension to find references/definitions, so I don't feel a need
for such reordering.

I am not objecting to it, but I just find it less appealing that the
rest of the series.

>
> mm/zswap.c | 1961 +++++++++++++++++++++++++++++-----------------------------
> 1 file changed, 971 insertions(+), 990 deletions(-)
>

2024-01-30 09:12:53

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 01/20] mm: zswap: rename zswap_free_entry to zswap_entry_free

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> There is a zswap_entry_ namespace with multiple functions already.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Acked-by: Nhat Pham <[email protected]>

> ---
> mm/zswap.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 7f88b3a77e4a..173f2e6657de 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -520,7 +520,7 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> * Carries out the common pattern of freeing and entry's zpool allocation,
> * freeing the entry itself, and decrementing the number of stored pages.
> */
> -static void zswap_free_entry(struct zswap_entry *entry)
> +static void zswap_entry_free(struct zswap_entry *entry)
> {
> if (!entry->length)
> atomic_dec(&zswap_same_filled_pages);
> @@ -555,7 +555,7 @@ static void zswap_entry_put(struct zswap_entry *entry)
> WARN_ON_ONCE(refcount < 0);
> if (refcount == 0) {
> WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
> - zswap_free_entry(entry);
> + zswap_entry_free(entry);
> }
> }
>
> --
> 2.43.0
>

2024-01-30 12:21:45

by Sergey Senozhatsky

[permalink] [raw]
Subject: Re: [PATCH 00/20] mm: zswap: cleanups

On (24/01/30 08:16), Yosry Ahmed wrote:
> Hey Johannes,
>
> On Mon, Jan 29, 2024 at 08:36:36PM -0500, Johannes Weiner wrote:
> > Cleanups and maintenance items that accumulated while reviewing zswap
> > patches. Based on akpm/mm-unstable + the UAF fix I sent just now.
>
> Patches 1 to 9 LGTM, thanks for the great cleanups!
>
> I am less excited about patches 10 to 20 though. Don't get me wrong, I
> am all of logically ordering the code. However, it feels like in this
> case, we will introduce unnecessary layers in the git history in a lot

This also can complicate cherry-picking of patches to stable, prod, .etc

2024-01-30 15:52:23

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH 00/20] mm: zswap: cleanups

On Tue, Jan 30, 2024 at 09:21:31PM +0900, Sergey Senozhatsky wrote:
> On (24/01/30 08:16), Yosry Ahmed wrote:
> > Hey Johannes,
> >
> > On Mon, Jan 29, 2024 at 08:36:36PM -0500, Johannes Weiner wrote:
> > > Cleanups and maintenance items that accumulated while reviewing zswap
> > > patches. Based on akpm/mm-unstable + the UAF fix I sent just now.
> >
> > Patches 1 to 9 LGTM, thanks for the great cleanups!
> >
> > I am less excited about patches 10 to 20 though. Don't get me wrong, I
> > am all of logically ordering the code. However, it feels like in this
> > case, we will introduce unnecessary layers in the git history in a lot
>
> This also can complicate cherry-picking of patches to stable, prod, .etc

I'm sensitive to that argument, because we run our own kernel at Meta
as well.

But moves are pretty easy. The code doesn't actually change, just the
line offsets. So patch will mostly work with offset warnings. And if
not, it's easy to fix up and verify. Refactoring and API restructuring
(folios e.g.) make it much harder when it comes to this.

2024-01-30 16:09:41

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH 00/20] mm: zswap: cleanups

On Tue, Jan 30, 2024 at 08:16:27AM +0000, Yosry Ahmed wrote:
> Hey Johannes,
>
> On Mon, Jan 29, 2024 at 08:36:36PM -0500, Johannes Weiner wrote:
> > Cleanups and maintenance items that accumulated while reviewing zswap
> > patches. Based on akpm/mm-unstable + the UAF fix I sent just now.
>
> Patches 1 to 9 LGTM, thanks for the great cleanups!
>
> I am less excited about patches 10 to 20 though. Don't get me wrong, I
> am all of logically ordering the code. However, it feels like in this
> case, we will introduce unnecessary layers in the git history in a lot
> of places where I find myself checking the history regularly.
> Personally, I tend to jump around the file using vim search or using a
> cscope extension to find references/definitions, so I don't feel a need
> for such reordering.

I agree that sweeping non-functional changes can be somewhat
painful. However, moves are among the easiest of those because the
code itself doesn't change. git log is not really affected, git blame
<ref-just-before-move> mm/zswap.c works as well. Backports are easy to
adjust and verify - mostly, patch will just warn about line offsets.

What motivated this was figuring out the writeback/swapoff race. I
also use search and multiple buffers in my editor, but keeping track
of the dependencies between shrink_memcg_cb, zswap_writeback_entry and
third places like load and invalidate became quite unwieldy. There is
also the search in the logical direction not finding things, or mostly
unrelated stuff. It's just less error prone to read existing code and
write new code if related layers are grouped together and the order is
logical, despite having those tools.

The problem will also only get worse if there are no natural anchor
points for new code, and the layering isn't clear. The new shrinker
code is a case in point. We missed the opportunity in the memcg
codebase, and I've regretted it for years. It just resulted in more
fragile, less readable and debuggable code. The zswap code has been
stagnant in the last few years, and there are relatively few commits
behind us (git log --pretty=format:"%as %h %s" mm/zswap.c). I figure
now is a good chance, before the more major changes we have planned.

> I am not objecting to it, but I just find it less appealing that the
> rest of the series.

Understood.

2024-01-30 16:21:53

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 07/20] mm: zswap: break out zwap_compress()

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> zswap_store() is long and mixes work at the zswap layer with work at
> the backend and compression layer. Move compression & backend work to
> zswap_compress(), mirroring zswap_decompress().
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/zswap.c | 145 ++++++++++++++++++++++++++++-------------------------
> 1 file changed, 77 insertions(+), 68 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index bdc9f82fe4b9..f9b9494156ba 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1316,6 +1316,79 @@ static int zswap_enabled_param_set(const char *val,
> return ret;
> }
>
> +static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> +{
> + struct crypto_acomp_ctx *acomp_ctx;
> + struct scatterlist input, output;
> + unsigned int dlen = PAGE_SIZE;
> + unsigned long handle;
> + struct zpool *zpool;
> + char *buf;
> + gfp_t gfp;
> + int ret;
> + u8 *dst;
> +
> + acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
> +
> + mutex_lock(&acomp_ctx->mutex);
> +
> + dst = acomp_ctx->buffer;
> + sg_init_table(&input, 1);
> + sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
> +
> + /*
> + * We need PAGE_SIZE * 2 here since there maybe over-compression case,
> + * and hardware-accelerators may won't check the dst buffer size, so
> + * giving the dst buffer with enough length to avoid buffer overflow.
> + */
> + sg_init_one(&output, dst, PAGE_SIZE * 2);
> + acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
> +
> + /*
> + * it maybe looks a little bit silly that we send an asynchronous request,
> + * then wait for its completion synchronously. This makes the process look
> + * synchronous in fact.
> + * Theoretically, acomp supports users send multiple acomp requests in one
> + * acomp instance, then get those requests done simultaneously. but in this
> + * case, zswap actually does store and load page by page, there is no
> + * existing method to send the second page before the first page is done
> + * in one thread doing zwap.
> + * but in different threads running on different cpu, we have different
> + * acomp instance, so multiple threads can do (de)compression in parallel.
> + */
> + ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
> + dlen = acomp_ctx->req->dlen;
> + if (ret) {
> + zswap_reject_compress_fail++;
> + goto unlock;
> + }
> +
> + zpool = zswap_find_zpool(entry);
> + gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> + if (zpool_malloc_support_movable(zpool))
> + gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> + ret = zpool_malloc(zpool, dlen, gfp, &handle);
> + if (ret == -ENOSPC) {
> + zswap_reject_compress_poor++;
> + goto unlock;
> + }
> + if (ret) {
> + zswap_reject_alloc_fail++;
> + goto unlock;
> + }
> +
> + buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
> + memcpy(buf, dst, dlen);
> + zpool_unmap_handle(zpool, handle);
> +
> + entry->handle = handle;
> + entry->length = dlen;
> +
> +unlock:
> + mutex_unlock(&acomp_ctx->mutex);
> + return ret == 0;
> +}
> +
> static void zswap_decompress(struct zswap_entry *entry, struct page *page)
> {
> struct zpool *zpool = zswap_find_zpool(entry);
> @@ -1472,18 +1545,11 @@ bool zswap_store(struct folio *folio)
> struct page *page = &folio->page;
> struct zswap_tree *tree = swap_zswap_tree(swp);
> struct zswap_entry *entry, *dupentry;
> - struct scatterlist input, output;
> - struct crypto_acomp_ctx *acomp_ctx;
> struct obj_cgroup *objcg = NULL;
> struct mem_cgroup *memcg = NULL;
> struct zswap_pool *pool;
> - struct zpool *zpool;
> - unsigned int dlen = PAGE_SIZE;
> - unsigned long handle, value;
> - char *buf;
> - u8 *src, *dst;
> - gfp_t gfp;
> - int ret;
> + unsigned long value;
> + u8 *src;
>
> VM_WARN_ON_ONCE(!folio_test_locked(folio));
> VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> @@ -1568,65 +1634,10 @@ bool zswap_store(struct folio *folio)
> mem_cgroup_put(memcg);
> }
>
> - /* compress */
> - acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
> -
> - mutex_lock(&acomp_ctx->mutex);
> -
> - dst = acomp_ctx->buffer;
> - sg_init_table(&input, 1);
> - sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
> + if (!zswap_compress(folio, entry))
> + goto put_pool;
>
> - /*
> - * We need PAGE_SIZE * 2 here since there maybe over-compression case,
> - * and hardware-accelerators may won't check the dst buffer size, so
> - * giving the dst buffer with enough length to avoid buffer overflow.
> - */
> - sg_init_one(&output, dst, PAGE_SIZE * 2);
> - acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
> - /*
> - * it maybe looks a little bit silly that we send an asynchronous request,
> - * then wait for its completion synchronously. This makes the process look
> - * synchronous in fact.
> - * Theoretically, acomp supports users send multiple acomp requests in one
> - * acomp instance, then get those requests done simultaneously. but in this
> - * case, zswap actually does store and load page by page, there is no
> - * existing method to send the second page before the first page is done
> - * in one thread doing zwap.
> - * but in different threads running on different cpu, we have different
> - * acomp instance, so multiple threads can do (de)compression in parallel.
> - */
> - ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
> - dlen = acomp_ctx->req->dlen;
> -
> - if (ret) {
> - zswap_reject_compress_fail++;
> - goto put_dstmem;
> - }
> -
> - /* store */
> - zpool = zswap_find_zpool(entry);
> - gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> - if (zpool_malloc_support_movable(zpool))
> - gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> - ret = zpool_malloc(zpool, dlen, gfp, &handle);
> - if (ret == -ENOSPC) {
> - zswap_reject_compress_poor++;
> - goto put_dstmem;
> - }
> - if (ret) {
> - zswap_reject_alloc_fail++;
> - goto put_dstmem;
> - }
> - buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
> - memcpy(buf, dst, dlen);
> - zpool_unmap_handle(zpool, handle);
> - mutex_unlock(&acomp_ctx->mutex);
> -
> - /* populate entry */
> entry->swpentry = swp;
> - entry->handle = handle;
> - entry->length = dlen;
>
> insert_entry:
> entry->objcg = objcg;
> @@ -1663,8 +1674,6 @@ bool zswap_store(struct folio *folio)
>
> return true;
>
> -put_dstmem:
> - mutex_unlock(&acomp_ctx->mutex);
> put_pool:
> zswap_pool_put(entry->pool);
> freepage:
> --
> 2.43.0
>

LGTM :)
Reviewed-by: Nhat Pham <[email protected]>

2024-01-30 16:24:37

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 02/20] mm: zswap: inline and remove zswap_entry_find_get()

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> There is only one caller and the function is trivial. Inline it.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/zswap.c | 17 ++---------------
> 1 file changed, 2 insertions(+), 15 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 173f2e6657de..cf864aaa214d 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -559,19 +559,6 @@ static void zswap_entry_put(struct zswap_entry *entry)
> }
> }
>
> -/* caller must hold the tree lock */
> -static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
> - pgoff_t offset)
> -{
> - struct zswap_entry *entry;
> -
> - entry = zswap_rb_search(root, offset);
> - if (entry)
> - zswap_entry_get(entry);
> -
> - return entry;
> -}
> -
> /*********************************
> * shrinker functions
> **********************************/
> @@ -1708,13 +1695,13 @@ bool zswap_load(struct folio *folio)
>
> VM_WARN_ON_ONCE(!folio_test_locked(folio));
>
> - /* find */
> spin_lock(&tree->lock);
> - entry = zswap_entry_find_get(&tree->rbroot, offset);
> + entry = zswap_rb_search(&tree->rbroot, offset);
> if (!entry) {
> spin_unlock(&tree->lock);
> return false;
> }
> + zswap_entry_get(entry);

Neat.
Reviewed-by: Nhat Pham <[email protected]>

> spin_unlock(&tree->lock);
>
> if (entry->length)
> --
> 2.43.0
>

2024-01-30 16:42:41

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 04/20] mm: zswap: warn when referencing a dead entry

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> Put a standard sanity check on zswap_entry_get() for UAF scenario.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/zswap.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 9f05282efe3c..0c6adaf2fdb6 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -542,6 +542,7 @@ static void zswap_entry_free(struct zswap_entry *entry)
> /* caller must hold the tree lock */
> static void zswap_entry_get(struct zswap_entry *entry)
> {
> + WARN_ON_ONCE(!entry->refcount);
> entry->refcount++;
> }
>
> --
> 2.43.0
>

Reviewed-by: Nhat Pham <[email protected]>

2024-01-30 16:42:48

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 03/20] mm: zswap: move zswap_invalidate_entry() to related functions

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> Move it up to the other tree and refcounting functions.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/zswap.c | 24 ++++++++++++------------
> 1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index cf864aaa214d..9f05282efe3c 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -559,6 +559,18 @@ static void zswap_entry_put(struct zswap_entry *entry)
> }
> }
>
> +/*
> + * If the entry is still valid in the tree, drop the initial ref and remove it
> + * from the tree. This function must be called with an additional ref held,
> + * otherwise it may race with another invalidation freeing the entry.
> + */
> +static void zswap_invalidate_entry(struct zswap_tree *tree,
> + struct zswap_entry *entry)

nit? entry_invalidate? There's an entry_put above()

Anyway:
Reviewed-by: Nhat Pham <[email protected]>

> +{
> + if (zswap_rb_erase(&tree->rbroot, entry))
> + zswap_entry_put(entry);
> +}
> +
> /*********************************
> * shrinker functions
> **********************************/
> @@ -809,18 +821,6 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> return NULL;
> }
>
> -/*
> - * If the entry is still valid in the tree, drop the initial ref and remove it
> - * from the tree. This function must be called with an additional ref held,
> - * otherwise it may race with another invalidation freeing the entry.
> - */
> -static void zswap_invalidate_entry(struct zswap_tree *tree,
> - struct zswap_entry *entry)
> -{
> - if (zswap_rb_erase(&tree->rbroot, entry))
> - zswap_entry_put(entry);
> -}
> -
> static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
> spinlock_t *lock, void *arg)
> {
> --
> 2.43.0
>

2024-01-30 16:44:52

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 05/20] mm: zswap: clean up zswap_entry_put()

On Mon, Jan 29, 2024 at 11:51 PM Yosry Ahmed <[email protected]> wrote:
>
> On Mon, Jan 29, 2024 at 08:36:41PM -0500, Johannes Weiner wrote:
> > Remove stale comment and unnecessary local variable.
> >
> > Signed-off-by: Johannes Weiner <[email protected]>
> > ---
> > mm/zswap.c | 10 +++-------
> > 1 file changed, 3 insertions(+), 7 deletions(-)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index 0c6adaf2fdb6..7a7e8da2b4f8 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -546,15 +546,11 @@ static void zswap_entry_get(struct zswap_entry *entry)
> > entry->refcount++;
> > }
> >
> > -/* caller must hold the tree lock
> > -* remove from the tree and free it, if nobody reference the entry
> > -*/
> > +/* caller must hold the tree lock */
>
> We should replace all those "caller must hold the tree lock" comments
> with lockdep_assert_held() or assert_spin_locked() or something.
>
> I can send follow up patches on top if you don't want to resend this
> series.

Agree. There's also this:

/* should be called under RCU */
#ifdef CONFIG_MEMCG
static inline struct mem_cgroup *mem_cgroup_from_entry(struct
zswap_entry *entry)
{
return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL;
}

which you pointed out in the per-cgroup zswap LRU review :)


>
> > static void zswap_entry_put(struct zswap_entry *entry)
> > {
> > - int refcount = --entry->refcount;
> > -
> > - WARN_ON_ONCE(refcount < 0);
> > - if (refcount == 0) {
> > + WARN_ON_ONCE(!entry->refcount);
> > + if (--entry->refcount == 0) {
> > WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
> > zswap_entry_free(entry);
> > }
> > --
> > 2.43.0
> >

Reviewed-by: Nhat Pham <[email protected]>

2024-01-30 16:55:14

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 06/20] mm: zswap: rename __zswap_load() to zswap_decompress()

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/zswap.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 7a7e8da2b4f8..bdc9f82fe4b9 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1316,7 +1316,7 @@ static int zswap_enabled_param_set(const char *val,
> return ret;
> }
>
> -static void __zswap_load(struct zswap_entry *entry, struct page *page)
> +static void zswap_decompress(struct zswap_entry *entry, struct page *page)

This is a much better name. Was gonna point this out in the original
series - it's clearly just decompression :)
Reviewed-by: Nhat Pham <[email protected]>

> {
> struct zpool *zpool = zswap_find_zpool(entry);
> struct scatterlist input, output;
> @@ -1411,7 +1411,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
> zswap_entry_get(entry);
> spin_unlock(&tree->lock);
>
> - __zswap_load(entry, &folio->page);
> + zswap_decompress(entry, &folio->page);
>
> count_vm_event(ZSWPWB);
> if (entry->objcg)
> @@ -1702,7 +1702,7 @@ bool zswap_load(struct folio *folio)
> spin_unlock(&tree->lock);
>
> if (entry->length)
> - __zswap_load(entry, page);
> + zswap_decompress(entry, page);
> else {
> dst = kmap_local_page(page);
> zswap_fill_page(dst, entry->value);
> --
> 2.43.0
>

2024-01-30 17:05:19

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 05/20] mm: zswap: clean up zswap_entry_put()

On Tue, Jan 30, 2024 at 08:31:22AM -0800, Nhat Pham wrote:
> On Mon, Jan 29, 2024 at 11:51 PM Yosry Ahmed <[email protected]> wrote:
> >
> > On Mon, Jan 29, 2024 at 08:36:41PM -0500, Johannes Weiner wrote:
> > > Remove stale comment and unnecessary local variable.
> > >
> > > Signed-off-by: Johannes Weiner <[email protected]>
> > > ---
> > > mm/zswap.c | 10 +++-------
> > > 1 file changed, 3 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/mm/zswap.c b/mm/zswap.c
> > > index 0c6adaf2fdb6..7a7e8da2b4f8 100644
> > > --- a/mm/zswap.c
> > > +++ b/mm/zswap.c
> > > @@ -546,15 +546,11 @@ static void zswap_entry_get(struct zswap_entry *entry)
> > > entry->refcount++;
> > > }
> > >
> > > -/* caller must hold the tree lock
> > > -* remove from the tree and free it, if nobody reference the entry
> > > -*/
> > > +/* caller must hold the tree lock */
> >
> > We should replace all those "caller must hold the tree lock" comments
> > with lockdep_assert_held() or assert_spin_locked() or something.
> >
> > I can send follow up patches on top if you don't want to resend this
> > series.
>
> Agree. There's also this:
>
> /* should be called under RCU */
> #ifdef CONFIG_MEMCG
> static inline struct mem_cgroup *mem_cgroup_from_entry(struct
> zswap_entry *entry)
> {
> return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL;
> }
>
> which you pointed out in the per-cgroup zswap LRU review :)

I will send out a patch this week or so.

2024-01-30 17:13:15

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 09/20] mm: zswap: simplify zswap_invalidate()

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> The branching is awkward and duplicates code. The comment about
> writeback is also misleading: yes, the entry might have been written
> back. Or it might have never been stored in zswap to begin with due to
> a rejection - zswap_invalidate() is called on all exiting swap entries.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Nhat Pham <[email protected]>

2024-01-30 17:16:05

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [PATCH 00/20] mm: zswap: cleanups

On Tue, Jan 30, 2024 at 10:46:32AM -0500, Johannes Weiner wrote:
> On Tue, Jan 30, 2024 at 08:16:27AM +0000, Yosry Ahmed wrote:
> > Hey Johannes,
> >
> > On Mon, Jan 29, 2024 at 08:36:36PM -0500, Johannes Weiner wrote:
> > > Cleanups and maintenance items that accumulated while reviewing zswap
> > > patches. Based on akpm/mm-unstable + the UAF fix I sent just now.
> >
> > Patches 1 to 9 LGTM, thanks for the great cleanups!
> >
> > I am less excited about patches 10 to 20 though. Don't get me wrong, I
> > am all of logically ordering the code. However, it feels like in this
> > case, we will introduce unnecessary layers in the git history in a lot
> > of places where I find myself checking the history regularly.
> > Personally, I tend to jump around the file using vim search or using a
> > cscope extension to find references/definitions, so I don't feel a need
> > for such reordering.
>
> I agree that sweeping non-functional changes can be somewhat
> painful. However, moves are among the easiest of those because the
> code itself doesn't change. git log is not really affected, git blame
> <ref-just-before-move> mm/zswap.c works as well.

IIUC with git blame -L <lines> <ref-just-before-move> won't really work
because the lines will have changed significantly. In the future, going
through several layers of git blame will be less convenient because the
function moving will invalidate the lines.

> Backports are easy to
> adjust and verify - mostly, patch will just warn about line offsets.

I usually use git cherry-pick, and it sometimes gets confused if things
are moved far enough IIRC.

>
> What motivated this was figuring out the writeback/swapoff race. I
> also use search and multiple buffers in my editor, but keeping track
> of the dependencies between shrink_memcg_cb, zswap_writeback_entry and
> third places like load and invalidate became quite unwieldy. There is
> also the search in the logical direction not finding things, or mostly
> unrelated stuff. It's just less error prone to read existing code and
> write new code if related layers are grouped together and the order is
> logical, despite having those tools.
>
> The problem will also only get worse if there are no natural anchor
> points for new code, and the layering isn't clear. The new shrinker
> code is a case in point. We missed the opportunity in the memcg
> codebase, and I've regretted it for years. It just resulted in more
> fragile, less readable and debuggable code. The zswap code has been
> stagnant in the last few years, and there are relatively few commits
> behind us (git log --pretty=format:"%as %h %s" mm/zswap.c). I figure
> now is a good chance, before the more major changes we have planned.

I agree that if we want to do it, it's much better now than later, just
presenting a differet perspective. No strong feelings.

2024-01-30 18:18:37

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 08/20] mm: zswap: further cleanup zswap_store()

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> - Remove dupentry, reusing entry works just fine.
> - Rename pool to shrink_pool, as this one actually is confusing.
> - Remove page, use folio_nid() and kmap_local_folio() directly.
> - Set entry->swpentry in a common path.
> - Move value and src to local scope of use.
>
> Signed-off-by: Johannes Weiner <[email protected]>
Reviewed-by: Nhat Pham <[email protected]>

2024-01-30 19:31:52

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 10/20] mm: zswap: function ordering: pool alloc & free

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> The function ordering in zswap.c is a little chaotic, which requires
> jumping in unexpected directions when following related code. This is
> a series of patches that brings the file into the following order:
>
> - pool functions
> - lru functions
> - rbtree functions
> - zswap entry functions
> - compression/backend functions
> - writeback & shrinking functions
> - store, load, invalidate, swapon, swapoff
> - debugfs
> - init

This ordering seems reasonable to me. Then again, we don't have any
coherent ordering of functions, so anything would be an improvement :)


>
> But it has to be split up such the moving still produces halfway
> readable diffs.
>
> In this patch, move pool allocation and freeing functions.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Nhat Pham <[email protected]>

> ---
> mm/zswap.c | 297 +++++++++++++++++++++++++++--------------------------
> 1 file changed, 152 insertions(+), 145 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 082d076a758d..805d9a35f633 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -320,6 +320,158 @@ static void zswap_update_total_size(void)
> zswap_pool_total_size = total;
> }
>
> +/*********************************
> +* pool functions
> +**********************************/
> +
> +static void zswap_alloc_shrinker(struct zswap_pool *pool);
> +static void shrink_worker(struct work_struct *w);
> +
> +static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> +{
> + int i;
> + struct zswap_pool *pool;
> + char name[38]; /* 'zswap' + 32 char (max) num + \0 */
> + gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> + int ret;
> +
> + if (!zswap_has_pool) {
> + /* if either are unset, pool initialization failed, and we
> + * need both params to be set correctly before trying to
> + * create a pool.
> + */
> + if (!strcmp(type, ZSWAP_PARAM_UNSET))
> + return NULL;
> + if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
> + return NULL;
> + }
> +
> + pool = kzalloc(sizeof(*pool), GFP_KERNEL);
> + if (!pool)
> + return NULL;
> +
> + for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
> + /* unique name for each pool specifically required by zsmalloc */
> + snprintf(name, 38, "zswap%x",
> + atomic_inc_return(&zswap_pools_count));
> +
> + pool->zpools[i] = zpool_create_pool(type, name, gfp);
> + if (!pool->zpools[i]) {
> + pr_err("%s zpool not available\n", type);
> + goto error;
> + }
> + }
> + pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
> +
> + strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
> +
> + pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
> + if (!pool->acomp_ctx) {
> + pr_err("percpu alloc failed\n");
> + goto error;
> + }
> +
> + ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
> + &pool->node);
> + if (ret)
> + goto error;
> +
> + zswap_alloc_shrinker(pool);
> + if (!pool->shrinker)
> + goto error;
> +
> + pr_debug("using %s compressor\n", pool->tfm_name);
> +
> + /* being the current pool takes 1 ref; this func expects the
> + * caller to always add the new pool as the current pool
> + */
> + kref_init(&pool->kref);
> + INIT_LIST_HEAD(&pool->list);
> + if (list_lru_init_memcg(&pool->list_lru, pool->shrinker))
> + goto lru_fail;
> + shrinker_register(pool->shrinker);
> + INIT_WORK(&pool->shrink_work, shrink_worker);
> + atomic_set(&pool->nr_stored, 0);
> +
> + zswap_pool_debug("created", pool);
> +
> + return pool;
> +
> +lru_fail:
> + list_lru_destroy(&pool->list_lru);
> + shrinker_free(pool->shrinker);
> +error:
> + if (pool->acomp_ctx)
> + free_percpu(pool->acomp_ctx);
> + while (i--)
> + zpool_destroy_pool(pool->zpools[i]);
> + kfree(pool);
> + return NULL;
> +}
> +
> +static struct zswap_pool *__zswap_pool_create_fallback(void)
> +{
> + bool has_comp, has_zpool;
> +
> + has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
> + if (!has_comp && strcmp(zswap_compressor,
> + CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
> + pr_err("compressor %s not available, using default %s\n",
> + zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
> + param_free_charp(&zswap_compressor);
> + zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
> + has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
> + }
> + if (!has_comp) {
> + pr_err("default compressor %s not available\n",
> + zswap_compressor);
> + param_free_charp(&zswap_compressor);
> + zswap_compressor = ZSWAP_PARAM_UNSET;
> + }
> +
> + has_zpool = zpool_has_pool(zswap_zpool_type);
> + if (!has_zpool && strcmp(zswap_zpool_type,
> + CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
> + pr_err("zpool %s not available, using default %s\n",
> + zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
> + param_free_charp(&zswap_zpool_type);
> + zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
> + has_zpool = zpool_has_pool(zswap_zpool_type);
> + }
> + if (!has_zpool) {
> + pr_err("default zpool %s not available\n",
> + zswap_zpool_type);
> + param_free_charp(&zswap_zpool_type);
> + zswap_zpool_type = ZSWAP_PARAM_UNSET;
> + }
> +
> + if (!has_comp || !has_zpool)
> + return NULL;
> +
> + return zswap_pool_create(zswap_zpool_type, zswap_compressor);
> +}
> +
> +static void zswap_pool_destroy(struct zswap_pool *pool)
> +{
> + int i;
> +
> + zswap_pool_debug("destroying", pool);
> +
> + shrinker_free(pool->shrinker);
> + cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
> + free_percpu(pool->acomp_ctx);
> + list_lru_destroy(&pool->list_lru);
> +
> + spin_lock(&zswap_pools_lock);
> + mem_cgroup_iter_break(NULL, pool->next_shrink);
> + pool->next_shrink = NULL;
> + spin_unlock(&zswap_pools_lock);
> +
> + for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
> + zpool_destroy_pool(pool->zpools[i]);
> + kfree(pool);
> +}
> +
> /* should be called under RCU */
> #ifdef CONFIG_MEMCG
> static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
> @@ -970,151 +1122,6 @@ static void shrink_worker(struct work_struct *w)
> zswap_pool_put(pool);
> }
>
> -static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> -{
> - int i;
> - struct zswap_pool *pool;
> - char name[38]; /* 'zswap' + 32 char (max) num + \0 */
> - gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> - int ret;
> -
> - if (!zswap_has_pool) {
> - /* if either are unset, pool initialization failed, and we
> - * need both params to be set correctly before trying to
> - * create a pool.
> - */
> - if (!strcmp(type, ZSWAP_PARAM_UNSET))
> - return NULL;
> - if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
> - return NULL;
> - }
> -
> - pool = kzalloc(sizeof(*pool), GFP_KERNEL);
> - if (!pool)
> - return NULL;
> -
> - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
> - /* unique name for each pool specifically required by zsmalloc */
> - snprintf(name, 38, "zswap%x",
> - atomic_inc_return(&zswap_pools_count));
> -
> - pool->zpools[i] = zpool_create_pool(type, name, gfp);
> - if (!pool->zpools[i]) {
> - pr_err("%s zpool not available\n", type);
> - goto error;
> - }
> - }
> - pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
> -
> - strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
> -
> - pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
> - if (!pool->acomp_ctx) {
> - pr_err("percpu alloc failed\n");
> - goto error;
> - }
> -
> - ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
> - &pool->node);
> - if (ret)
> - goto error;
> -
> - zswap_alloc_shrinker(pool);
> - if (!pool->shrinker)
> - goto error;
> -
> - pr_debug("using %s compressor\n", pool->tfm_name);
> -
> - /* being the current pool takes 1 ref; this func expects the
> - * caller to always add the new pool as the current pool
> - */
> - kref_init(&pool->kref);
> - INIT_LIST_HEAD(&pool->list);
> - if (list_lru_init_memcg(&pool->list_lru, pool->shrinker))
> - goto lru_fail;
> - shrinker_register(pool->shrinker);
> - INIT_WORK(&pool->shrink_work, shrink_worker);
> - atomic_set(&pool->nr_stored, 0);
> -
> - zswap_pool_debug("created", pool);
> -
> - return pool;
> -
> -lru_fail:
> - list_lru_destroy(&pool->list_lru);
> - shrinker_free(pool->shrinker);
> -error:
> - if (pool->acomp_ctx)
> - free_percpu(pool->acomp_ctx);
> - while (i--)
> - zpool_destroy_pool(pool->zpools[i]);
> - kfree(pool);
> - return NULL;
> -}
> -
> -static struct zswap_pool *__zswap_pool_create_fallback(void)
> -{
> - bool has_comp, has_zpool;
> -
> - has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
> - if (!has_comp && strcmp(zswap_compressor,
> - CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
> - pr_err("compressor %s not available, using default %s\n",
> - zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
> - param_free_charp(&zswap_compressor);
> - zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
> - has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
> - }
> - if (!has_comp) {
> - pr_err("default compressor %s not available\n",
> - zswap_compressor);
> - param_free_charp(&zswap_compressor);
> - zswap_compressor = ZSWAP_PARAM_UNSET;
> - }
> -
> - has_zpool = zpool_has_pool(zswap_zpool_type);
> - if (!has_zpool && strcmp(zswap_zpool_type,
> - CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
> - pr_err("zpool %s not available, using default %s\n",
> - zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
> - param_free_charp(&zswap_zpool_type);
> - zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
> - has_zpool = zpool_has_pool(zswap_zpool_type);
> - }
> - if (!has_zpool) {
> - pr_err("default zpool %s not available\n",
> - zswap_zpool_type);
> - param_free_charp(&zswap_zpool_type);
> - zswap_zpool_type = ZSWAP_PARAM_UNSET;
> - }
> -
> - if (!has_comp || !has_zpool)
> - return NULL;
> -
> - return zswap_pool_create(zswap_zpool_type, zswap_compressor);
> -}
> -
> -static void zswap_pool_destroy(struct zswap_pool *pool)
> -{
> - int i;
> -
> - zswap_pool_debug("destroying", pool);
> -
> - shrinker_free(pool->shrinker);
> - cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
> - free_percpu(pool->acomp_ctx);
> - list_lru_destroy(&pool->list_lru);
> -
> - spin_lock(&zswap_pools_lock);
> - mem_cgroup_iter_break(NULL, pool->next_shrink);
> - pool->next_shrink = NULL;
> - spin_unlock(&zswap_pools_lock);
> -
> - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
> - zpool_destroy_pool(pool->zpools[i]);
> - kfree(pool);
> -}
> -
> static int __must_check zswap_pool_get(struct zswap_pool *pool)
> {
> if (!pool)
> --
> 2.43.0
>

2024-01-30 20:14:02

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 11/20] mm: zswap: function ordering: pool refcounting

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> Move pool refcounting functions into the pool section. First the
> destroy functions, then the get and put which uses them.
>
> __zswap_pool_empty() has an upward reference to the global
> zswap_pools, to sanity check it's not the currently active pool that's
> being freed. That gets the forward decl for zswap_pool_cuyrrent().

nit: zswap_pool_cuyrrent() -> zswap_pool_current() :-)

Also, would it make sense to move zswap_pool_current() above
__zswap_pool_empty() to get rid of the forward declaration? I guess
it's now grouped with current_get() etc. - those don't seem to use the
empty check, so maybe they can also go above __zswap_pool_empty()?


>
> This puts the get and put function above all callers, so kill the
> forward decls as well.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/zswap.c | 94 +++++++++++++++++++++++++++---------------------------
> 1 file changed, 47 insertions(+), 47 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 805d9a35f633..33775f2224b7 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -278,8 +278,6 @@ static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp)
>
> static int zswap_writeback_entry(struct zswap_entry *entry,
> swp_entry_t swpentry);
> -static int zswap_pool_get(struct zswap_pool *pool);
> -static void zswap_pool_put(struct zswap_pool *pool);
>
> static bool zswap_is_full(void)
> {
> @@ -472,6 +470,53 @@ static void zswap_pool_destroy(struct zswap_pool *pool)
> kfree(pool);
> }
>
> +static void __zswap_pool_release(struct work_struct *work)
> +{
> + struct zswap_pool *pool = container_of(work, typeof(*pool),
> + release_work);
> +
> + synchronize_rcu();
> +
> + /* nobody should have been able to get a kref... */
> + WARN_ON(kref_get_unless_zero(&pool->kref));
> +
> + /* pool is now off zswap_pools list and has no references. */
> + zswap_pool_destroy(pool);
> +}
> +
> +static struct zswap_pool *zswap_pool_current(void);
> +
> +static void __zswap_pool_empty(struct kref *kref)
> +{
> + struct zswap_pool *pool;
> +
> + pool = container_of(kref, typeof(*pool), kref);
> +
> + spin_lock(&zswap_pools_lock);
> +
> + WARN_ON(pool == zswap_pool_current());
> +
> + list_del_rcu(&pool->list);
> +
> + INIT_WORK(&pool->release_work, __zswap_pool_release);
> + schedule_work(&pool->release_work);
> +
> + spin_unlock(&zswap_pools_lock);
> +}
> +
> +static int __must_check zswap_pool_get(struct zswap_pool *pool)
> +{
> + if (!pool)
> + return 0;
> +
> + return kref_get_unless_zero(&pool->kref);
> +}
> +
> +static void zswap_pool_put(struct zswap_pool *pool)
> +{
> + kref_put(&pool->kref, __zswap_pool_empty);
> +}
> +
> /* should be called under RCU */
> #ifdef CONFIG_MEMCG
> static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
> @@ -1122,51 +1167,6 @@ static void shrink_worker(struct work_struct *w)
> zswap_pool_put(pool);
> }
>
> -static int __must_check zswap_pool_get(struct zswap_pool *pool)
> -{
> - if (!pool)
> - return 0;
> -
> - return kref_get_unless_zero(&pool->kref);
> -}
> -
> -static void __zswap_pool_release(struct work_struct *work)
> -{
> - struct zswap_pool *pool = container_of(work, typeof(*pool),
> - release_work);
> -
> - synchronize_rcu();
> -
> - /* nobody should have been able to get a kref... */
> - WARN_ON(kref_get_unless_zero(&pool->kref));
> -
> - /* pool is now off zswap_pools list and has no references. */
> - zswap_pool_destroy(pool);
> -}
> -
> -static void __zswap_pool_empty(struct kref *kref)
> -{
> - struct zswap_pool *pool;
> -
> - pool = container_of(kref, typeof(*pool), kref);
> -
> - spin_lock(&zswap_pools_lock);
> -
> - WARN_ON(pool == zswap_pool_current());
> -
> - list_del_rcu(&pool->list);
> -
> - INIT_WORK(&pool->release_work, __zswap_pool_release);
> - schedule_work(&pool->release_work);
> -
> - spin_unlock(&zswap_pools_lock);
> -}
> -
> -static void zswap_pool_put(struct zswap_pool *pool)
> -{
> - kref_put(&pool->kref, __zswap_pool_empty);
> -}
> -
> /*********************************
> * param callbacks
> **********************************/
> --
> 2.43.0
>

2024-01-30 21:17:10

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 12/20] mm: zswap: function ordering: zswap_pools

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> Move the operations against the global zswap_pools list (current pool,
> last, find) to the pool section.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Nhat Pham <[email protected]>

2024-01-30 21:22:35

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 13/20] mm: zswap: function ordering: pool params

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> The parameters primarily control pool attributes. Move those
> operations up to the pool section.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/zswap.c | 312 ++++++++++++++++++++++++++---------------------------
> 1 file changed, 156 insertions(+), 156 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 168afd6767b3..e650fc587116 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -590,6 +590,162 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> return NULL;
> }
>
> +/*********************************
> +* param callbacks
> +**********************************/
> +
> +static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
> +{
> + /* no change required */
> + if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
> + return false;
> + return true;
> +}
> +
> +/* val must be a null-terminated string */
> +static int __zswap_param_set(const char *val, const struct kernel_param *kp,
> + char *type, char *compressor)
> +{
> + struct zswap_pool *pool, *put_pool = NULL;
> + char *s = strstrip((char *)val);
> + int ret = 0;
> + bool new_pool = false;
> +
> + mutex_lock(&zswap_init_lock);
> + switch (zswap_init_state) {
> + case ZSWAP_UNINIT:
> + /* if this is load-time (pre-init) param setting,
> + * don't create a pool; that's done during init.
> + */
> + ret = param_set_charp(s, kp);
> + break;
> + case ZSWAP_INIT_SUCCEED:
> + new_pool = zswap_pool_changed(s, kp);
> + break;
> + case ZSWAP_INIT_FAILED:
> + pr_err("can't set param, initialization failed\n");
> + ret = -ENODEV;
> + }
> + mutex_unlock(&zswap_init_lock);
> +
> + /* no need to create a new pool, return directly */
> + if (!new_pool)
> + return ret;
> +
> + if (!type) {
> + if (!zpool_has_pool(s)) {
> + pr_err("zpool %s not available\n", s);
> + return -ENOENT;
> + }
> + type = s;
> + } else if (!compressor) {
> + if (!crypto_has_acomp(s, 0, 0)) {
> + pr_err("compressor %s not available\n", s);
> + return -ENOENT;
> + }
> + compressor = s;
> + } else {
> + WARN_ON(1);
> + return -EINVAL;
> + }
> +
> + spin_lock(&zswap_pools_lock);
> +
> + pool = zswap_pool_find_get(type, compressor);
> + if (pool) {
> + zswap_pool_debug("using existing", pool);
> + WARN_ON(pool == zswap_pool_current());
> + list_del_rcu(&pool->list);
> + }
> +
> + spin_unlock(&zswap_pools_lock);
> +
> + if (!pool)
> + pool = zswap_pool_create(type, compressor);
> +
> + if (pool)
> + ret = param_set_charp(s, kp);
> + else
> + ret = -EINVAL;
> +
> + spin_lock(&zswap_pools_lock);
> +
> + if (!ret) {
> + put_pool = zswap_pool_current();
> + list_add_rcu(&pool->list, &zswap_pools);
> + zswap_has_pool = true;
> + } else if (pool) {
> + /* add the possibly pre-existing pool to the end of the pools
> + * list; if it's new (and empty) then it'll be removed and
> + * destroyed by the put after we drop the lock
> + */
> + list_add_tail_rcu(&pool->list, &zswap_pools);
> + put_pool = pool;
> + }
> +
> + spin_unlock(&zswap_pools_lock);
> +
> + if (!zswap_has_pool && !pool) {
> + /* if initial pool creation failed, and this pool creation also
> + * failed, maybe both compressor and zpool params were bad.
> + * Allow changing this param, so pool creation will succeed
> + * when the other param is changed. We already verified this
> + * param is ok in the zpool_has_pool() or crypto_has_acomp()
> + * checks above.
> + */
> + ret = param_set_charp(s, kp);
> + }
> +
> + /* drop the ref from either the old current pool,
> + * or the new pool we failed to add
> + */
> + if (put_pool)
> + zswap_pool_put(put_pool);
> +
> + return ret;
> +}
> +
> +static int zswap_compressor_param_set(const char *val,
> + const struct kernel_param *kp)
> +{
> + return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
> +}
> +
> +static int zswap_zpool_param_set(const char *val,
> + const struct kernel_param *kp)
> +{
> + return __zswap_param_set(val, kp, NULL, zswap_compressor);
> +}
> +
> +static int zswap_enabled_param_set(const char *val,
> + const struct kernel_param *kp)
> +{
> + int ret = -ENODEV;
> +
> + /* if this is load-time (pre-init) param setting, only set param. */
> + if (system_state != SYSTEM_RUNNING)
> + return param_set_bool(val, kp);
> +
> + mutex_lock(&zswap_init_lock);
> + switch (zswap_init_state) {
> + case ZSWAP_UNINIT:
> + if (zswap_setup())
> + break;
> + fallthrough;
> + case ZSWAP_INIT_SUCCEED:
> + if (!zswap_has_pool)
> + pr_err("can't enable, no pool configured\n");
> + else
> + ret = param_set_bool(val, kp);
> + break;
> + case ZSWAP_INIT_FAILED:
> + pr_err("can't enable, initialization failed\n");
> + }
> + mutex_unlock(&zswap_init_lock);
> +
> + return ret;
> +}
> +
> /* should be called under RCU */
> #ifdef CONFIG_MEMCG
> static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
> @@ -1163,162 +1319,6 @@ static void shrink_worker(struct work_struct *w)
> zswap_pool_put(pool);
> }
>
> -/*********************************
> -* param callbacks
> -**********************************/
> -
> -static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
> -{
> - /* no change required */
> - if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
> - return false;
> - return true;
> -}
> -
> -/* val must be a null-terminated string */
> -static int __zswap_param_set(const char *val, const struct kernel_param *kp,
> - char *type, char *compressor)
> -{
> - struct zswap_pool *pool, *put_pool = NULL;
> - char *s = strstrip((char *)val);
> - int ret = 0;
> - bool new_pool = false;
> -
> - mutex_lock(&zswap_init_lock);
> - switch (zswap_init_state) {
> - case ZSWAP_UNINIT:
> - /* if this is load-time (pre-init) param setting,
> - * don't create a pool; that's done during init.
> - */
> - ret = param_set_charp(s, kp);
> - break;
> - case ZSWAP_INIT_SUCCEED:
> - new_pool = zswap_pool_changed(s, kp);
> - break;
> - case ZSWAP_INIT_FAILED:
> - pr_err("can't set param, initialization failed\n");
> - ret = -ENODEV;
> - }
> - mutex_unlock(&zswap_init_lock);
> -
> - /* no need to create a new pool, return directly */
> - if (!new_pool)
> - return ret;
> -
> - if (!type) {
> - if (!zpool_has_pool(s)) {
> - pr_err("zpool %s not available\n", s);
> - return -ENOENT;
> - }
> - type = s;
> - } else if (!compressor) {
> - if (!crypto_has_acomp(s, 0, 0)) {
> - pr_err("compressor %s not available\n", s);
> - return -ENOENT;
> - }
> - compressor = s;
> - } else {
> - WARN_ON(1);
> - return -EINVAL;
> - }
> -
> - spin_lock(&zswap_pools_lock);
> -
> - pool = zswap_pool_find_get(type, compressor);
> - if (pool) {
> - zswap_pool_debug("using existing", pool);
> - WARN_ON(pool == zswap_pool_current());
> - list_del_rcu(&pool->list);
> - }
> -
> - spin_unlock(&zswap_pools_lock);
> -
> - if (!pool)
> - pool = zswap_pool_create(type, compressor);
> -
> - if (pool)
> - ret = param_set_charp(s, kp);
> - else
> - ret = -EINVAL;
> -
> - spin_lock(&zswap_pools_lock);
> -
> - if (!ret) {
> - put_pool = zswap_pool_current();
> - list_add_rcu(&pool->list, &zswap_pools);
> - zswap_has_pool = true;
> - } else if (pool) {
> - /* add the possibly pre-existing pool to the end of the pools
> - * list; if it's new (and empty) then it'll be removed and
> - * destroyed by the put after we drop the lock
> - */
> - list_add_tail_rcu(&pool->list, &zswap_pools);
> - put_pool = pool;
> - }
> -
> - spin_unlock(&zswap_pools_lock);
> -
> - if (!zswap_has_pool && !pool) {
> - /* if initial pool creation failed, and this pool creation also
> - * failed, maybe both compressor and zpool params were bad.
> - * Allow changing this param, so pool creation will succeed
> - * when the other param is changed. We already verified this
> - * param is ok in the zpool_has_pool() or crypto_has_acomp()
> - * checks above.
> - */
> - ret = param_set_charp(s, kp);
> - }
> -
> - /* drop the ref from either the old current pool,
> - * or the new pool we failed to add
> - */
> - if (put_pool)
> - zswap_pool_put(put_pool);
> -
> - return ret;
> -}
> -
> -static int zswap_compressor_param_set(const char *val,
> - const struct kernel_param *kp)
> -{
> - return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
> -}
> -
> -static int zswap_zpool_param_set(const char *val,
> - const struct kernel_param *kp)
> -{
> - return __zswap_param_set(val, kp, NULL, zswap_compressor);
> -}
> -
> -static int zswap_enabled_param_set(const char *val,
> - const struct kernel_param *kp)
> -{
> - int ret = -ENODEV;
> -
> - /* if this is load-time (pre-init) param setting, only set param. */
> - if (system_state != SYSTEM_RUNNING)
> - return param_set_bool(val, kp);
> -
> - mutex_lock(&zswap_init_lock);
> - switch (zswap_init_state) {
> - case ZSWAP_UNINIT:
> - if (zswap_setup())
> - break;
> - fallthrough;
> - case ZSWAP_INIT_SUCCEED:
> - if (!zswap_has_pool)
> - pr_err("can't enable, no pool configured\n");
> - else
> - ret = param_set_bool(val, kp);
> - break;
> - case ZSWAP_INIT_FAILED:
> - pr_err("can't enable, initialization failed\n");
> - }
> - mutex_unlock(&zswap_init_lock);
> -
> - return ret;
> -}
> -
> static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> {
> struct crypto_acomp_ctx *acomp_ctx;
> --
> 2.43.0
>

Reviewed-by: Nhat Pham <[email protected]>

2024-01-30 23:47:49

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 14/20] mm: zswap: function ordering: public lru api

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> The zswap entry section sits awkwardly in the middle of LRU-related
> functions. Group the external LRU API functions first.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---
> mm/zswap.c | 37 +++++++++++++++++++------------------
> 1 file changed, 19 insertions(+), 18 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index e650fc587116..511bfafc1456 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -746,6 +746,10 @@ static int zswap_enabled_param_set(const char *val,
> return ret;
> }
>
> +/*********************************
> +* lru functions
> +**********************************/
> +

nit: looks like there are 2 "lru functions" headers after this patch?
You remove the "lruvec functions" header, then add another "lru
functions" header it seems. The next patch removes one of them, so end
result is fine I guess - just seems a bit odd.

That asides:
Reviewed-by: Nhat Pham <[email protected]>


> /* should be called under RCU */
> #ifdef CONFIG_MEMCG
> static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
> @@ -764,6 +768,21 @@ static inline int entry_to_nid(struct zswap_entry *entry)
> return page_to_nid(virt_to_page(entry));
> }
>
> +void zswap_lruvec_state_init(struct lruvec *lruvec)
> +{
> + atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
> +}
> +
> +void zswap_folio_swapin(struct folio *folio)
> +{
> + struct lruvec *lruvec;
> +
> + if (folio) {
> + lruvec = folio_lruvec(folio);
> + atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
> + }
> +}
> +
> void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
> {
> struct zswap_pool *pool;
> @@ -798,24 +817,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
> kmem_cache_free(zswap_entry_cache, entry);
> }
>
> -/*********************************
> -* zswap lruvec functions
> -**********************************/

Here's the removed zswap lruvec functions header.

> -void zswap_lruvec_state_init(struct lruvec *lruvec)
> -{
> - atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
> -}
> -
> -void zswap_folio_swapin(struct folio *folio)
> -{
> - struct lruvec *lruvec;
> -
> - if (folio) {
> - lruvec = folio_lruvec(folio);
> - atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
> - }
> -}
> -
> /*********************************
> * lru functions
> **********************************/
> --

Here's the second (original) lru functions header.

> 2.43.0
>

2024-01-30 23:48:35

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 15/20] mm: zswap: function ordering: move entry sections out of LRU section

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> This completes consolidation of the LRU section.
>
> Signed-off-by: Johannes Weiner <[email protected]>

LGTM.
Reviewed-by: Nhat Pham <[email protected]>

> ---
> mm/zswap.c | 101 ++++++++++++++++++++++++++---------------------------
> 1 file changed, 49 insertions(+), 52 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 511bfafc1456..756d4d575efe 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -768,58 +768,6 @@ static inline int entry_to_nid(struct zswap_entry *entry)
> return page_to_nid(virt_to_page(entry));
> }
>
> -void zswap_lruvec_state_init(struct lruvec *lruvec)
> -{
> - atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
> -}
> -
> -void zswap_folio_swapin(struct folio *folio)
> -{
> - struct lruvec *lruvec;
> -
> - if (folio) {
> - lruvec = folio_lruvec(folio);
> - atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
> - }
> -}
> -
> -void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
> -{
> - struct zswap_pool *pool;
> -
> - /* lock out zswap pools list modification */
> - spin_lock(&zswap_pools_lock);
> - list_for_each_entry(pool, &zswap_pools, list) {
> - if (pool->next_shrink == memcg)
> - pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
> - }
> - spin_unlock(&zswap_pools_lock);
> -}
> -
> -/*********************************
> -* zswap entry functions
> -**********************************/
> -static struct kmem_cache *zswap_entry_cache;
> -
> -static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
> -{
> - struct zswap_entry *entry;
> - entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
> - if (!entry)
> - return NULL;
> - entry->refcount = 1;
> - RB_CLEAR_NODE(&entry->rbnode);
> - return entry;
> -}
> -
> -static void zswap_entry_cache_free(struct zswap_entry *entry)
> -{
> - kmem_cache_free(zswap_entry_cache, entry);
> -}
> -
> -/*********************************
> -* lru functions
> -**********************************/
> static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry)
> {
> atomic_long_t *nr_zswap_protected;
> @@ -872,6 +820,55 @@ static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry)
> rcu_read_unlock();
> }
>
> +void zswap_lruvec_state_init(struct lruvec *lruvec)
> +{
> + atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
> +}
> +
> +void zswap_folio_swapin(struct folio *folio)
> +{
> + struct lruvec *lruvec;
> +
> + if (folio) {
> + lruvec = folio_lruvec(folio);
> + atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
> + }
> +}
> +
> +void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
> +{
> + struct zswap_pool *pool;
> +
> + /* lock out zswap pools list modification */
> + spin_lock(&zswap_pools_lock);
> + list_for_each_entry(pool, &zswap_pools, list) {
> + if (pool->next_shrink == memcg)
> + pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
> + }
> + spin_unlock(&zswap_pools_lock);
> +}
> +
> +/*********************************
> +* zswap entry functions
> +**********************************/
> +static struct kmem_cache *zswap_entry_cache;
> +
> +static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
> +{
> + struct zswap_entry *entry;
> + entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
> + if (!entry)
> + return NULL;
> + entry->refcount = 1;
> + RB_CLEAR_NODE(&entry->rbnode);
> + return entry;
> +}
> +
> +static void zswap_entry_cache_free(struct zswap_entry *entry)
> +{
> + kmem_cache_free(zswap_entry_cache, entry);
> +}
> +
> /*********************************
> * rbtree functions
> **********************************/
> --
> 2.43.0
>

2024-01-30 23:55:24

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 00/20] mm: zswap: cleanups

On Tue, Jan 30, 2024 at 12:16 AM Yosry Ahmed <[email protected]> wrote:
>
> Hey Johannes,
>
> On Mon, Jan 29, 2024 at 08:36:36PM -0500, Johannes Weiner wrote:
> > Cleanups and maintenance items that accumulated while reviewing zswap
> > patches. Based on akpm/mm-unstable + the UAF fix I sent just now.
>
> Patches 1 to 9 LGTM, thanks for the great cleanups!
>
> I am less excited about patches 10 to 20 though. Don't get me wrong, I
> am all of logically ordering the code. However, it feels like in this
> case, we will introduce unnecessary layers in the git history in a lot
> of places where I find myself checking the history regularly.
> Personally, I tend to jump around the file using vim search or using a
> cscope extension to find references/definitions, so I don't feel a need
> for such reordering.
>
> I am not objecting to it, but I just find it less appealing that the
> rest of the series.

As a frequent user of git blame, I kinda agree with it.

That said, zswap functions ordering hurts my brain a lot. So I vote
for the reordering, and for paying the price sooner rather than later.
The alternative is reordering sometimes in the future (which is just
delaying the pain), or never re-order at all (which sucks).

>
> >
> > mm/zswap.c | 1961 +++++++++++++++++++++++++++++-----------------------------
> > 1 file changed, 971 insertions(+), 990 deletions(-)
> >

2024-01-31 01:03:47

by Sergey Senozhatsky

[permalink] [raw]
Subject: Re: [PATCH 00/20] mm: zswap: cleanups

On (24/01/30 10:52), Johannes Weiner wrote:
> On Tue, Jan 30, 2024 at 09:21:31PM +0900, Sergey Senozhatsky wrote:
> > On (24/01/30 08:16), Yosry Ahmed wrote:
> > > Hey Johannes,
> > >
> > > On Mon, Jan 29, 2024 at 08:36:36PM -0500, Johannes Weiner wrote:
> > > > Cleanups and maintenance items that accumulated while reviewing zswap
> > > > patches. Based on akpm/mm-unstable + the UAF fix I sent just now.
> > >
> > > Patches 1 to 9 LGTM, thanks for the great cleanups!
> > >
> > > I am less excited about patches 10 to 20 though. Don't get me wrong, I
> > > am all of logically ordering the code. However, it feels like in this
> > > case, we will introduce unnecessary layers in the git history in a lot
> >
> > This also can complicate cherry-picking of patches to stable, prod, .etc
>
> I'm sensitive to that argument, because we run our own kernel at Meta
> as well.

Well, it was less of an argument and more of a "let's consider that too".

> But moves are pretty easy. The code doesn't actually change, just the
> line offsets. So patch will mostly work with offset warnings. And if
> not, it's easy to fix up and verify. Refactoring and API restructuring
> (folios e.g.) make it much harder when it comes to this.

If pros of doing it are more significant that cons, then OK.
Either way I'm not against the patches.

2024-01-31 11:23:28

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH 11/20] mm: zswap: function ordering: pool refcounting

On Tue, Jan 30, 2024 at 12:13:30PM -0800, Nhat Pham wrote:
> On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
> >
> > Move pool refcounting functions into the pool section. First the
> > destroy functions, then the get and put which uses them.
> >
> > __zswap_pool_empty() has an upward reference to the global
> > zswap_pools, to sanity check it's not the currently active pool that's
> > being freed. That gets the forward decl for zswap_pool_cuyrrent().
>
> nit: zswap_pool_cuyrrent() -> zswap_pool_current() :-)

Whoops, my bad.

Andrew, would you mind removing that typo inside your copy?

> Also, would it make sense to move zswap_pool_current() above
> __zswap_pool_empty() to get rid of the forward declaration? I guess
> it's now grouped with current_get() etc. - those don't seem to use the
> empty check, so maybe they can also go above __zswap_pool_empty()?

There is a grouping to these functions:

- low-level functions that create and destroy individual struct zswap_pool
(create, destroy, get, release, empty, put)
- high-level functions that operate on pool collections, i.e. zswap_pools
(current, last, find)

They were actually already grouped like that, just in the reverse
order. The only way to avoid ALL forward decls would be to interleave
the layers, but I think the grouping makes sense so I wanted to
preserve that. I went with low to high ordering, and forward decl the
odd one where a low-level function does one high-level sanity check.

Does that make sense?

2024-01-31 11:46:40

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH 14/20] mm: zswap: function ordering: public lru api

On Tue, Jan 30, 2024 at 03:47:25PM -0800, Nhat Pham wrote:
> On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
> >
> > The zswap entry section sits awkwardly in the middle of LRU-related
> > functions. Group the external LRU API functions first.
> >
> > Signed-off-by: Johannes Weiner <[email protected]>
> > ---
> > mm/zswap.c | 37 +++++++++++++++++++------------------
> > 1 file changed, 19 insertions(+), 18 deletions(-)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index e650fc587116..511bfafc1456 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -746,6 +746,10 @@ static int zswap_enabled_param_set(const char *val,
> > return ret;
> > }
> >
> > +/*********************************
> > +* lru functions
> > +**********************************/
> > +
>
> nit: looks like there are 2 "lru functions" headers after this patch?
> You remove the "lruvec functions" header, then add another "lru
> functions" header it seems. The next patch removes one of them, so end
> result is fine I guess - just seems a bit odd.

Yeah that's an artifact of trying to make git produce readable
diffs. Since the lru functions are right next to the entry functions,
I went through several attempts where it wouldn't generate clean moves
but instead would interleave entry and lru functions line by line to
overwrite one with the other in place. I think the above helped in
making it not do that, although I'm not positive it would still be
required in the final form of this patch. It was kind of brute force.

> That asides:
> Reviewed-by: Nhat Pham <[email protected]>

Thanks!

2024-01-31 23:37:04

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 17/20] mm: zswap: function ordering: compress & decompress functions

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> Writeback needs to decompress. Move the (de)compression API above what
> will be the consolidated shrinking/writeback code.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Nhat Pham <[email protected]>

2024-01-31 23:42:31

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 18/20] mm: zswap: function ordering: per-cpu compression infra

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> The per-cpu compression init/exit callbacks are awkwardly in the
> middle of the shrinker code. Move them up to the compression section.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> ---

Reviewed-by: Nhat Pham <[email protected]>

2024-01-31 23:42:54

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 20/20] mm: zswap: function ordering: shrink_memcg_cb

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> shrink_memcg_cb() is called by the shrinker and is based on
> zswap_writeback_entry(). Move it in between. Save one fwd decl.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Nhat Pham <[email protected]>

Nice! I just went through all the patches. LGTM.

2024-01-31 23:45:41

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 16/20] mm: zswap: function ordering: move entry section out of tree section

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> The higher-level entry operations modify the tree, so move the entry
> API after the tree section.
>
> Signed-off-by: Johannes Weiner <[email protected]>

Reviewed-by: Nhat Pham <[email protected]>

> ---
> mm/zswap.c | 42 +++++++++++++++++++++---------------------
> 1 file changed, 21 insertions(+), 21 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 756d4d575efe..80adc2f7d1a2 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -848,27 +848,6 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
> spin_unlock(&zswap_pools_lock);
> }
>
> -/*********************************
> -* zswap entry functions
> -**********************************/
> -static struct kmem_cache *zswap_entry_cache;
> -
> -static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
> -{
> - struct zswap_entry *entry;
> - entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
> - if (!entry)
> - return NULL;
> - entry->refcount = 1;
> - RB_CLEAR_NODE(&entry->rbnode);
> - return entry;
> -}
> -
> -static void zswap_entry_cache_free(struct zswap_entry *entry)
> -{
> - kmem_cache_free(zswap_entry_cache, entry);
> -}
> -
> /*********************************
> * rbtree functions
> **********************************/
> @@ -930,6 +909,27 @@ static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
> return false;
> }
>
> +/*********************************
> +* zswap entry functions
> +**********************************/
> +static struct kmem_cache *zswap_entry_cache;
> +
> +static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
> +{
> + struct zswap_entry *entry;
> + entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
> + if (!entry)
> + return NULL;
> + entry->refcount = 1;
> + RB_CLEAR_NODE(&entry->rbnode);
> + return entry;
> +}
> +
> +static void zswap_entry_cache_free(struct zswap_entry *entry)
> +{
> + kmem_cache_free(zswap_entry_cache, entry);
> +}
> +
> static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> {
> int i = 0;
> --
> 2.43.0
>

2024-01-31 23:51:45

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 19/20] mm: zswap: function ordering: writeback

On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <[email protected]> wrote:
>
> Shrinking needs writeback. Naturally, move the writeback code above
> the shrinking code. Delete the forward decl.

Honestly, do we even need a separate section haha. Writeback and
shrink are used interchangeably in my head - they're the same
category.

>
> Signed-off-by: Johannes Weiner <[email protected]>

Anyway:
Reviewed-by: Nhat Pham <[email protected]>

2024-02-01 00:02:31

by Nhat Pham

[permalink] [raw]
Subject: Re: [PATCH 11/20] mm: zswap: function ordering: pool refcounting

On Wed, Jan 31, 2024 at 3:23 AM Johannes Weiner <[email protected]> wrote:
>
> On Tue, Jan 30, 2024 at 12:13:30PM -0800, Nhat Pham wrote:
> > On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <hannes@cmpxchgorg> wrote:
> > >
> > > Move pool refcounting functions into the pool section. First the
> > > destroy functions, then the get and put which uses them.
> > >
> > > __zswap_pool_empty() has an upward reference to the global
> > > zswap_pools, to sanity check it's not the currently active pool that's
> > > being freed. That gets the forward decl for zswap_pool_cuyrrent().
> >
> > nit: zswap_pool_cuyrrent() -> zswap_pool_current() :-)
>
> Whoops, my bad.
>
> Andrew, would you mind removing that typo inside your copy?
>
> > Also, would it make sense to move zswap_pool_current() above
> > __zswap_pool_empty() to get rid of the forward declaration? I guess
> > it's now grouped with current_get() etc. - those don't seem to use the
> > empty check, so maybe they can also go above __zswap_pool_empty()?
>
> There is a grouping to these functions:
>
> - low-level functions that create and destroy individual struct zswap_pool
> (create, destroy, get, release, empty, put)
> - high-level functions that operate on pool collections, i.e. zswap_pools
> (current, last, find)
>
> They were actually already grouped like that, just in the reverse
> order. The only way to avoid ALL forward decls would be to interleave
> the layers, but I think the grouping makes sense so I wanted to
> preserve that. I went with low to high ordering, and forward decl the
> odd one where a low-level function does one high-level sanity check.
>
> Does that make sense?

Makes sense to me - just double checking :)
Reviewed-by: Nhat Pham <[email protected]>