2023-06-05 09:17:25

by Domenico Cerasuolo

[permalink] [raw]
Subject: [RFC PATCH 4/7] mm: zswap: remove page reclaim logic from zsmalloc

With the recent enhancement to zswap enabling direct page writeback, the
need for the shrink code in zsmalloc has become obsolete. As a result,
this commit removes the page reclaim logic from zsmalloc entirely.

Signed-off-by: Domenico Cerasuolo <[email protected]>
---
mm/zsmalloc.c | 291 +-------------------------------------------------
1 file changed, 2 insertions(+), 289 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 02f7f414aade..c87a60514f21 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -250,13 +250,6 @@ struct zs_pool {
/* Compact classes */
struct shrinker shrinker;

-#ifdef CONFIG_ZPOOL
- /* List tracking the zspages in LRU order by most recently added object */
- struct list_head lru;
- struct zpool *zpool;
- const struct zpool_ops *zpool_ops;
-#endif
-
#ifdef CONFIG_ZSMALLOC_STAT
struct dentry *stat_dentry;
#endif
@@ -279,13 +272,6 @@ struct zspage {
unsigned int freeobj;
struct page *first_page;
struct list_head list; /* fullness list */
-
-#ifdef CONFIG_ZPOOL
- /* links the zspage to the lru list in the pool */
- struct list_head lru;
- bool under_reclaim;
-#endif
-
struct zs_pool *pool;
rwlock_t lock;
};
@@ -393,14 +379,7 @@ static void *zs_zpool_create(const char *name, gfp_t gfp,
* different contexts and its caller must provide a valid
* gfp mask.
*/
- struct zs_pool *pool = zs_create_pool(name);
-
- if (pool) {
- pool->zpool = zpool;
- pool->zpool_ops = zpool_ops;
- }
-
- return pool;
+ return zs_create_pool(name);
}

static void zs_zpool_destroy(void *pool)
@@ -422,27 +401,6 @@ static void zs_zpool_free(void *pool, unsigned long handle)
zs_free(pool, handle);
}

-static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries);
-
-static int zs_zpool_shrink(void *pool, unsigned int pages,
- unsigned int *reclaimed)
-{
- unsigned int total = 0;
- int ret = -EINVAL;
-
- while (total < pages) {
- ret = zs_reclaim_page(pool, 8);
- if (ret < 0)
- break;
- total++;
- }
-
- if (reclaimed)
- *reclaimed = total;
-
- return ret;
-}
-
static void *zs_zpool_map(void *pool, unsigned long handle,
enum zpool_mapmode mm)
{
@@ -481,7 +439,7 @@ static struct zpool_driver zs_zpool_driver = {
.malloc_support_movable = true,
.malloc = zs_zpool_malloc,
.free = zs_zpool_free,
- .shrink = zs_zpool_shrink,
+ .shrink = NULL,
.map = zs_zpool_map,
.unmap = zs_zpool_unmap,
.total_size = zs_zpool_total_size,
@@ -884,14 +842,6 @@ static inline bool obj_allocated(struct page *page, void *obj, unsigned long *ph
return obj_tagged(page, obj, phandle, OBJ_ALLOCATED_TAG);
}

-#ifdef CONFIG_ZPOOL
-static bool obj_stores_deferred_handle(struct page *page, void *obj,
- unsigned long *phandle)
-{
- return obj_tagged(page, obj, phandle, OBJ_DEFERRED_HANDLE_TAG);
-}
-#endif
-
static void reset_page(struct page *page)
{
__ClearPageMovable(page);
@@ -1006,9 +956,6 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class,
}

remove_zspage(class, zspage, ZS_INUSE_RATIO_0);
-#ifdef CONFIG_ZPOOL
- list_del(&zspage->lru);
-#endif
__free_zspage(pool, class, zspage);
}

@@ -1054,11 +1001,6 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
off %= PAGE_SIZE;
}

-#ifdef CONFIG_ZPOOL
- INIT_LIST_HEAD(&zspage->lru);
- zspage->under_reclaim = false;
-#endif
-
set_freeobj(zspage, 0);
}

@@ -1525,13 +1467,6 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
/* We completely set up zspage so mark them as movable */
SetZsPageMovable(pool, zspage);
out:
-#ifdef CONFIG_ZPOOL
- /* Add/move zspage to beginning of LRU */
- if (!list_empty(&zspage->lru))
- list_del(&zspage->lru);
- list_add(&zspage->lru, &pool->lru);
-#endif
-
spin_unlock(&pool->lock);

return handle;
@@ -1600,20 +1535,6 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
class = zspage_class(pool, zspage);

class_stat_dec(class, ZS_OBJS_INUSE, 1);
-
-#ifdef CONFIG_ZPOOL
- if (zspage->under_reclaim) {
- /*
- * Reclaim needs the handles during writeback. It'll free
- * them along with the zspage when it's done with them.
- *
- * Record current deferred handle in the object's header.
- */
- obj_free(class->size, obj, &handle);
- spin_unlock(&pool->lock);
- return;
- }
-#endif
obj_free(class->size, obj, NULL);

fullness = fix_fullness_group(class, zspage);
@@ -1890,23 +1811,6 @@ static void lock_zspage(struct zspage *zspage)
}
#endif /* defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION) */

-#ifdef CONFIG_ZPOOL
-/*
- * Unlocks all the pages of the zspage.
- *
- * pool->lock must be held before this function is called
- * to prevent the underlying pages from migrating.
- */
-static void unlock_zspage(struct zspage *zspage)
-{
- struct page *page = get_first_page(zspage);
-
- do {
- unlock_page(page);
- } while ((page = get_next_page(page)) != NULL);
-}
-#endif /* CONFIG_ZPOOL */
-
static void migrate_lock_init(struct zspage *zspage)
{
rwlock_init(&zspage->lock);
@@ -2126,9 +2030,6 @@ static void async_free_zspage(struct work_struct *work)
VM_BUG_ON(fullness != ZS_INUSE_RATIO_0);
class = pool->size_class[class_idx];
spin_lock(&pool->lock);
-#ifdef CONFIG_ZPOOL
- list_del(&zspage->lru);
-#endif
__free_zspage(pool, class, zspage);
spin_unlock(&pool->lock);
}
@@ -2474,10 +2375,6 @@ struct zs_pool *zs_create_pool(const char *name)
*/
zs_register_shrinker(pool);

-#ifdef CONFIG_ZPOOL
- INIT_LIST_HEAD(&pool->lru);
-#endif
-
return pool;

err:
@@ -2520,190 +2417,6 @@ void zs_destroy_pool(struct zs_pool *pool)
}
EXPORT_SYMBOL_GPL(zs_destroy_pool);

-#ifdef CONFIG_ZPOOL
-static void restore_freelist(struct zs_pool *pool, struct size_class *class,
- struct zspage *zspage)
-{
- unsigned int obj_idx = 0;
- unsigned long handle, off = 0; /* off is within-page offset */
- struct page *page = get_first_page(zspage);
- struct link_free *prev_free = NULL;
- void *prev_page_vaddr = NULL;
-
- /* in case no free object found */
- set_freeobj(zspage, (unsigned int)(-1UL));
-
- while (page) {
- void *vaddr = kmap_atomic(page);
- struct page *next_page;
-
- while (off < PAGE_SIZE) {
- void *obj_addr = vaddr + off;
-
- /* skip allocated object */
- if (obj_allocated(page, obj_addr, &handle)) {
- obj_idx++;
- off += class->size;
- continue;
- }
-
- /* free deferred handle from reclaim attempt */
- if (obj_stores_deferred_handle(page, obj_addr, &handle))
- cache_free_handle(pool, handle);
-
- if (prev_free)
- prev_free->next = obj_idx << OBJ_TAG_BITS;
- else /* first free object found */
- set_freeobj(zspage, obj_idx);
-
- prev_free = (struct link_free *)vaddr + off / sizeof(*prev_free);
- /* if last free object in a previous page, need to unmap */
- if (prev_page_vaddr) {
- kunmap_atomic(prev_page_vaddr);
- prev_page_vaddr = NULL;
- }
-
- obj_idx++;
- off += class->size;
- }
-
- /*
- * Handle the last (full or partial) object on this page.
- */
- next_page = get_next_page(page);
- if (next_page) {
- if (!prev_free || prev_page_vaddr) {
- /*
- * There is no free object in this page, so we can safely
- * unmap it.
- */
- kunmap_atomic(vaddr);
- } else {
- /* update prev_page_vaddr since prev_free is on this page */
- prev_page_vaddr = vaddr;
- }
- } else { /* this is the last page */
- if (prev_free) {
- /*
- * Reset OBJ_TAG_BITS bit to last link to tell
- * whether it's allocated object or not.
- */
- prev_free->next = -1UL << OBJ_TAG_BITS;
- }
-
- /* unmap previous page (if not done yet) */
- if (prev_page_vaddr) {
- kunmap_atomic(prev_page_vaddr);
- prev_page_vaddr = NULL;
- }
-
- kunmap_atomic(vaddr);
- }
-
- page = next_page;
- off %= PAGE_SIZE;
- }
-}
-
-static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
-{
- int i, obj_idx, ret = 0;
- unsigned long handle;
- struct zspage *zspage;
- struct page *page;
- int fullness;
-
- /* Lock LRU and fullness list */
- spin_lock(&pool->lock);
- if (list_empty(&pool->lru)) {
- spin_unlock(&pool->lock);
- return -EINVAL;
- }
-
- for (i = 0; i < retries; i++) {
- struct size_class *class;
-
- zspage = list_last_entry(&pool->lru, struct zspage, lru);
- list_del(&zspage->lru);
-
- /* zs_free may free objects, but not the zspage and handles */
- zspage->under_reclaim = true;
-
- class = zspage_class(pool, zspage);
- fullness = get_fullness_group(class, zspage);
-
- /* Lock out object allocations and object compaction */
- remove_zspage(class, zspage, fullness);
-
- spin_unlock(&pool->lock);
- cond_resched();
-
- /* Lock backing pages into place */
- lock_zspage(zspage);
-
- obj_idx = 0;
- page = get_first_page(zspage);
- while (1) {
- handle = find_alloced_obj(class, page, &obj_idx);
- if (!handle) {
- page = get_next_page(page);
- if (!page)
- break;
- obj_idx = 0;
- continue;
- }
-
- /*
- * This will write the object and call zs_free.
- *
- * zs_free will free the object, but the
- * under_reclaim flag prevents it from freeing
- * the zspage altogether. This is necessary so
- * that we can continue working with the
- * zspage potentially after the last object
- * has been freed.
- */
- ret = pool->zpool_ops->evict(pool->zpool, handle);
- if (ret)
- goto next;
-
- obj_idx++;
- }
-
-next:
- /* For freeing the zspage, or putting it back in the pool and LRU list. */
- spin_lock(&pool->lock);
- zspage->under_reclaim = false;
-
- if (!get_zspage_inuse(zspage)) {
- /*
- * Fullness went stale as zs_free() won't touch it
- * while the page is removed from the pool. Fix it
- * up for the check in __free_zspage().
- */
- zspage->fullness = ZS_INUSE_RATIO_0;
-
- __free_zspage(pool, class, zspage);
- spin_unlock(&pool->lock);
- return 0;
- }
-
- /*
- * Eviction fails on one of the handles, so we need to restore zspage.
- * We need to rebuild its freelist (and free stored deferred handles),
- * put it back to the correct size class, and add it to the LRU list.
- */
- restore_freelist(pool, class, zspage);
- putback_zspage(class, zspage);
- list_add(&zspage->lru, &pool->lru);
- unlock_zspage(zspage);
- }
-
- spin_unlock(&pool->lock);
- return -EAGAIN;
-}
-#endif /* CONFIG_ZPOOL */
-
static int __init zs_init(void)
{
int ret;
--
2.34.1



2023-06-05 15:41:42

by Johannes Weiner

[permalink] [raw]
Subject: Re: [RFC PATCH 4/7] mm: zswap: remove page reclaim logic from zsmalloc

On Mon, Jun 05, 2023 at 10:54:16AM +0200, Domenico Cerasuolo wrote:
> @@ -884,14 +842,6 @@ static inline bool obj_allocated(struct page *page, void *obj, unsigned long *ph
> return obj_tagged(page, obj, phandle, OBJ_ALLOCATED_TAG);
> }
>
> -#ifdef CONFIG_ZPOOL
> -static bool obj_stores_deferred_handle(struct page *page, void *obj,
> - unsigned long *phandle)
> -{
> - return obj_tagged(page, obj, phandle, OBJ_DEFERRED_HANDLE_TAG);
> -}
> -#endif

You can actually remove even more here.

The entire concept of deferred_handle is about serializing free with
reclaim. It can all go: OBJ_DEFERRED_HANDLE_TAG, the member in struct
link_free, this function here, find_deferred_handle_obj() (declaration
and implementation), free_handles(), and the deferred handle bits in
obj_free() including the handle parameter itself.

2023-06-05 23:39:14

by Nhat Pham

[permalink] [raw]
Subject: Re: [RFC PATCH 4/7] mm: zswap: remove page reclaim logic from zsmalloc

On Mon, Jun 5, 2023 at 8:37 AM Johannes Weiner <[email protected]> wrote:
>
> On Mon, Jun 05, 2023 at 10:54:16AM +0200, Domenico Cerasuolo wrote:
> > @@ -884,14 +842,6 @@ static inline bool obj_allocated(struct page *page, void *obj, unsigned long *ph
> > return obj_tagged(page, obj, phandle, OBJ_ALLOCATED_TAG);
> > }
> >
> > -#ifdef CONFIG_ZPOOL
> > -static bool obj_stores_deferred_handle(struct page *page, void *obj,
> > - unsigned long *phandle)
> > -{
> > - return obj_tagged(page, obj, phandle, OBJ_DEFERRED_HANDLE_TAG);
> > -}
> > -#endif
>
> You can actually remove even more here.
>
> The entire concept of deferred_handle is about serializing free with
> reclaim. It can all go: OBJ_DEFERRED_HANDLE_TAG, the member in struct
> link_free, this function here, find_deferred_handle_obj() (declaration
> and implementation), free_handles(), and the deferred handle bits in
> obj_free() including the handle parameter itself.

For more context on this:
https://lore.kernel.org/all/[email protected]/T/#u

2023-06-05 23:52:46

by Nhat Pham

[permalink] [raw]
Subject: Re: [RFC PATCH 4/7] mm: zswap: remove page reclaim logic from zsmalloc

On Mon, Jun 5, 2023 at 1:54 AM Domenico Cerasuolo
<[email protected]> wrote:
>
> With the recent enhancement to zswap enabling direct page writeback, the
> need for the shrink code in zsmalloc has become obsolete. As a result,
> this commit removes the page reclaim logic from zsmalloc entirely.
>
> Signed-off-by: Domenico Cerasuolo <[email protected]>
> ---
> mm/zsmalloc.c | 291 +-------------------------------------------------
> 1 file changed, 2 insertions(+), 289 deletions(-)
>
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 02f7f414aade..c87a60514f21 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -250,13 +250,6 @@ struct zs_pool {
> /* Compact classes */
> struct shrinker shrinker;
>
> -#ifdef CONFIG_ZPOOL
> - /* List tracking the zspages in LRU order by most recently added object */
> - struct list_head lru;
> - struct zpool *zpool;
> - const struct zpool_ops *zpool_ops;
> -#endif
> -
> #ifdef CONFIG_ZSMALLOC_STAT
> struct dentry *stat_dentry;
> #endif
> @@ -279,13 +272,6 @@ struct zspage {
> unsigned int freeobj;
> struct page *first_page;
> struct list_head list; /* fullness list */
> -
> -#ifdef CONFIG_ZPOOL
> - /* links the zspage to the lru list in the pool */
> - struct list_head lru;
> - bool under_reclaim;
> -#endif
> -
> struct zs_pool *pool;
> rwlock_t lock;
> };
> @@ -393,14 +379,7 @@ static void *zs_zpool_create(const char *name, gfp_t gfp,
> * different contexts and its caller must provide a valid
> * gfp mask.
> */
> - struct zs_pool *pool = zs_create_pool(name);
> -
> - if (pool) {
> - pool->zpool = zpool;
> - pool->zpool_ops = zpool_ops;
> - }
> -
> - return pool;
> + return zs_create_pool(name);
> }
>
> static void zs_zpool_destroy(void *pool)
> @@ -422,27 +401,6 @@ static void zs_zpool_free(void *pool, unsigned long handle)
> zs_free(pool, handle);
> }
>
> -static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries);
> -
> -static int zs_zpool_shrink(void *pool, unsigned int pages,
> - unsigned int *reclaimed)
> -{
> - unsigned int total = 0;
> - int ret = -EINVAL;
> -
> - while (total < pages) {
> - ret = zs_reclaim_page(pool, 8);
> - if (ret < 0)
> - break;
> - total++;
> - }
> -
> - if (reclaimed)
> - *reclaimed = total;
> -
> - return ret;
> -}
> -
> static void *zs_zpool_map(void *pool, unsigned long handle,
> enum zpool_mapmode mm)
> {
> @@ -481,7 +439,7 @@ static struct zpool_driver zs_zpool_driver = {
> .malloc_support_movable = true,
> .malloc = zs_zpool_malloc,
> .free = zs_zpool_free,
> - .shrink = zs_zpool_shrink,
> + .shrink = NULL,
> .map = zs_zpool_map,
> .unmap = zs_zpool_unmap,
> .total_size = zs_zpool_total_size,
> @@ -884,14 +842,6 @@ static inline bool obj_allocated(struct page *page, void *obj, unsigned long *ph
> return obj_tagged(page, obj, phandle, OBJ_ALLOCATED_TAG);
> }
>
> -#ifdef CONFIG_ZPOOL
> -static bool obj_stores_deferred_handle(struct page *page, void *obj,
> - unsigned long *phandle)
> -{
> - return obj_tagged(page, obj, phandle, OBJ_DEFERRED_HANDLE_TAG);
> -}
> -#endif
> -
> static void reset_page(struct page *page)
> {
> __ClearPageMovable(page);
> @@ -1006,9 +956,6 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class,
> }
>
> remove_zspage(class, zspage, ZS_INUSE_RATIO_0);
> -#ifdef CONFIG_ZPOOL
> - list_del(&zspage->lru);
> -#endif
> __free_zspage(pool, class, zspage);
> }
>
> @@ -1054,11 +1001,6 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
> off %= PAGE_SIZE;
> }
>
> -#ifdef CONFIG_ZPOOL
> - INIT_LIST_HEAD(&zspage->lru);
> - zspage->under_reclaim = false;
> -#endif
> -
> set_freeobj(zspage, 0);
> }
>
> @@ -1525,13 +1467,6 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
> /* We completely set up zspage so mark them as movable */
> SetZsPageMovable(pool, zspage);
> out:
> -#ifdef CONFIG_ZPOOL
> - /* Add/move zspage to beginning of LRU */
> - if (!list_empty(&zspage->lru))
> - list_del(&zspage->lru);
> - list_add(&zspage->lru, &pool->lru);
> -#endif
> -
> spin_unlock(&pool->lock);
>
> return handle;
> @@ -1600,20 +1535,6 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
> class = zspage_class(pool, zspage);
>
> class_stat_dec(class, ZS_OBJS_INUSE, 1);
> -
> -#ifdef CONFIG_ZPOOL
> - if (zspage->under_reclaim) {
> - /*
> - * Reclaim needs the handles during writeback. It'll free
> - * them along with the zspage when it's done with them.
> - *
> - * Record current deferred handle in the object's header.
> - */
> - obj_free(class->size, obj, &handle);
> - spin_unlock(&pool->lock);
> - return;
> - }
> -#endif
> obj_free(class->size, obj, NULL);
>
> fullness = fix_fullness_group(class, zspage);
> @@ -1890,23 +1811,6 @@ static void lock_zspage(struct zspage *zspage)
> }
> #endif /* defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION) */

If I recall correctly, the defined(CONFIG_ZPOOL) condition is
only needed for the lock_zspage() call in zs_reclaim_page().

You might be able to get away with just
#ifdef CONFIG_COMPACTION if you're removing writeback.

Do fact-check me of course - try to build with these CONFIGs turned
off and see. I'm surprised kernel test robot has not complained about
unused static function lock_zspage() in the case
CONFIG_ZPOOL && !CONFIG_COMPACTION

>
> -#ifdef CONFIG_ZPOOL
> -/*
> - * Unlocks all the pages of the zspage.
> - *
> - * pool->lock must be held before this function is called
> - * to prevent the underlying pages from migrating.
> - */
> -static void unlock_zspage(struct zspage *zspage)
> -{
> - struct page *page = get_first_page(zspage);
> -
> - do {
> - unlock_page(page);
> - } while ((page = get_next_page(page)) != NULL);
> -}
> -#endif /* CONFIG_ZPOOL */
> -
> static void migrate_lock_init(struct zspage *zspage)
> {
> rwlock_init(&zspage->lock);
> @@ -2126,9 +2030,6 @@ static void async_free_zspage(struct work_struct *work)
> VM_BUG_ON(fullness != ZS_INUSE_RATIO_0);
> class = pool->size_class[class_idx];
> spin_lock(&pool->lock);
> -#ifdef CONFIG_ZPOOL
> - list_del(&zspage->lru);
> -#endif
> __free_zspage(pool, class, zspage);
> spin_unlock(&pool->lock);
> }
> @@ -2474,10 +2375,6 @@ struct zs_pool *zs_create_pool(const char *name)
> */
> zs_register_shrinker(pool);
>
> -#ifdef CONFIG_ZPOOL
> - INIT_LIST_HEAD(&pool->lru);
> -#endif
> -
> return pool;
>
> err:
> @@ -2520,190 +2417,6 @@ void zs_destroy_pool(struct zs_pool *pool)
> }
> EXPORT_SYMBOL_GPL(zs_destroy_pool);
>
> -#ifdef CONFIG_ZPOOL
> -static void restore_freelist(struct zs_pool *pool, struct size_class *class,
> - struct zspage *zspage)
> -{
> - unsigned int obj_idx = 0;
> - unsigned long handle, off = 0; /* off is within-page offset */
> - struct page *page = get_first_page(zspage);
> - struct link_free *prev_free = NULL;
> - void *prev_page_vaddr = NULL;
> -
> - /* in case no free object found */
> - set_freeobj(zspage, (unsigned int)(-1UL));
> -
> - while (page) {
> - void *vaddr = kmap_atomic(page);
> - struct page *next_page;
> -
> - while (off < PAGE_SIZE) {
> - void *obj_addr = vaddr + off;
> -
> - /* skip allocated object */
> - if (obj_allocated(page, obj_addr, &handle)) {
> - obj_idx++;
> - off += class->size;
> - continue;
> - }
> -
> - /* free deferred handle from reclaim attempt */
> - if (obj_stores_deferred_handle(page, obj_addr, &handle))
> - cache_free_handle(pool, handle);
> -
> - if (prev_free)
> - prev_free->next = obj_idx << OBJ_TAG_BITS;
> - else /* first free object found */
> - set_freeobj(zspage, obj_idx);
> -
> - prev_free = (struct link_free *)vaddr + off / sizeof(*prev_free);
> - /* if last free object in a previous page, need to unmap */
> - if (prev_page_vaddr) {
> - kunmap_atomic(prev_page_vaddr);
> - prev_page_vaddr = NULL;
> - }
> -
> - obj_idx++;
> - off += class->size;
> - }
> -
> - /*
> - * Handle the last (full or partial) object on this page.
> - */
> - next_page = get_next_page(page);
> - if (next_page) {
> - if (!prev_free || prev_page_vaddr) {
> - /*
> - * There is no free object in this page, so we can safely
> - * unmap it.
> - */
> - kunmap_atomic(vaddr);
> - } else {
> - /* update prev_page_vaddr since prev_free is on this page */
> - prev_page_vaddr = vaddr;
> - }
> - } else { /* this is the last page */
> - if (prev_free) {
> - /*
> - * Reset OBJ_TAG_BITS bit to last link to tell
> - * whether it's allocated object or not.
> - */
> - prev_free->next = -1UL << OBJ_TAG_BITS;
> - }
> -
> - /* unmap previous page (if not done yet) */
> - if (prev_page_vaddr) {
> - kunmap_atomic(prev_page_vaddr);
> - prev_page_vaddr = NULL;
> - }
> -
> - kunmap_atomic(vaddr);
> - }
> -
> - page = next_page;
> - off %= PAGE_SIZE;
> - }
> -}
> -
> -static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
> -{
> - int i, obj_idx, ret = 0;
> - unsigned long handle;
> - struct zspage *zspage;
> - struct page *page;
> - int fullness;
> -
> - /* Lock LRU and fullness list */
> - spin_lock(&pool->lock);
> - if (list_empty(&pool->lru)) {
> - spin_unlock(&pool->lock);
> - return -EINVAL;
> - }
> -
> - for (i = 0; i < retries; i++) {
> - struct size_class *class;
> -
> - zspage = list_last_entry(&pool->lru, struct zspage, lru);
> - list_del(&zspage->lru);
> -
> - /* zs_free may free objects, but not the zspage and handles */
> - zspage->under_reclaim = true;
> -
> - class = zspage_class(pool, zspage);
> - fullness = get_fullness_group(class, zspage);
> -
> - /* Lock out object allocations and object compaction */
> - remove_zspage(class, zspage, fullness);
> -
> - spin_unlock(&pool->lock);
> - cond_resched();
> -
> - /* Lock backing pages into place */
> - lock_zspage(zspage);
> -
> - obj_idx = 0;
> - page = get_first_page(zspage);
> - while (1) {
> - handle = find_alloced_obj(class, page, &obj_idx);
> - if (!handle) {
> - page = get_next_page(page);
> - if (!page)
> - break;
> - obj_idx = 0;
> - continue;
> - }
> -
> - /*
> - * This will write the object and call zs_free.
> - *
> - * zs_free will free the object, but the
> - * under_reclaim flag prevents it from freeing
> - * the zspage altogether. This is necessary so
> - * that we can continue working with the
> - * zspage potentially after the last object
> - * has been freed.
> - */
> - ret = pool->zpool_ops->evict(pool->zpool, handle);
> - if (ret)
> - goto next;
> -
> - obj_idx++;
> - }
> -
> -next:
> - /* For freeing the zspage, or putting it back in the pool and LRU list. */
> - spin_lock(&pool->lock);
> - zspage->under_reclaim = false;
> -
> - if (!get_zspage_inuse(zspage)) {
> - /*
> - * Fullness went stale as zs_free() won't touch it
> - * while the page is removed from the pool. Fix it
> - * up for the check in __free_zspage().
> - */
> - zspage->fullness = ZS_INUSE_RATIO_0;
> -
> - __free_zspage(pool, class, zspage);
> - spin_unlock(&pool->lock);
> - return 0;
> - }
> -
> - /*
> - * Eviction fails on one of the handles, so we need to restore zspage.
> - * We need to rebuild its freelist (and free stored deferred handles),
> - * put it back to the correct size class, and add it to the LRU list.
> - */
> - restore_freelist(pool, class, zspage);
> - putback_zspage(class, zspage);
> - list_add(&zspage->lru, &pool->lru);
> - unlock_zspage(zspage);
> - }
> -
> - spin_unlock(&pool->lock);
> - return -EAGAIN;
> -}
> -#endif /* CONFIG_ZPOOL */
> -
> static int __init zs_init(void)
> {
> int ret;
> --
> 2.34.1
>

2023-06-06 10:05:59

by Domenico Cerasuolo

[permalink] [raw]
Subject: Re: [RFC PATCH 4/7] mm: zswap: remove page reclaim logic from zsmalloc

On Tue, Jun 6, 2023 at 1:37 AM Nhat Pham <[email protected]> wrote:
>
> On Mon, Jun 5, 2023 at 1:54 AM Domenico Cerasuolo
> <[email protected]> wrote:
> >
> > With the recent enhancement to zswap enabling direct page writeback, the
> > need for the shrink code in zsmalloc has become obsolete. As a result,
> > this commit removes the page reclaim logic from zsmalloc entirely.
> >
> > Signed-off-by: Domenico Cerasuolo <[email protected]>
> > ---
> > mm/zsmalloc.c | 291 +-------------------------------------------------
> > 1 file changed, 2 insertions(+), 289 deletions(-)
> >
> > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> > index 02f7f414aade..c87a60514f21 100644
> > --- a/mm/zsmalloc.c
> > +++ b/mm/zsmalloc.c
> > @@ -250,13 +250,6 @@ struct zs_pool {
> > /* Compact classes */
> > struct shrinker shrinker;
> >
> > -#ifdef CONFIG_ZPOOL
> > - /* List tracking the zspages in LRU order by most recently added object */
> > - struct list_head lru;
> > - struct zpool *zpool;
> > - const struct zpool_ops *zpool_ops;
> > -#endif
> > -
> > #ifdef CONFIG_ZSMALLOC_STAT
> > struct dentry *stat_dentry;
> > #endif
> > @@ -279,13 +272,6 @@ struct zspage {
> > unsigned int freeobj;
> > struct page *first_page;
> > struct list_head list; /* fullness list */
> > -
> > -#ifdef CONFIG_ZPOOL
> > - /* links the zspage to the lru list in the pool */
> > - struct list_head lru;
> > - bool under_reclaim;
> > -#endif
> > -
> > struct zs_pool *pool;
> > rwlock_t lock;
> > };
> > @@ -393,14 +379,7 @@ static void *zs_zpool_create(const char *name, gfp_t gfp,
> > * different contexts and its caller must provide a valid
> > * gfp mask.
> > */
> > - struct zs_pool *pool = zs_create_pool(name);
> > -
> > - if (pool) {
> > - pool->zpool = zpool;
> > - pool->zpool_ops = zpool_ops;
> > - }
> > -
> > - return pool;
> > + return zs_create_pool(name);
> > }
> >
> > static void zs_zpool_destroy(void *pool)
> > @@ -422,27 +401,6 @@ static void zs_zpool_free(void *pool, unsigned long handle)
> > zs_free(pool, handle);
> > }
> >
> > -static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries);
> > -
> > -static int zs_zpool_shrink(void *pool, unsigned int pages,
> > - unsigned int *reclaimed)
> > -{
> > - unsigned int total = 0;
> > - int ret = -EINVAL;
> > -
> > - while (total < pages) {
> > - ret = zs_reclaim_page(pool, 8);
> > - if (ret < 0)
> > - break;
> > - total++;
> > - }
> > -
> > - if (reclaimed)
> > - *reclaimed = total;
> > -
> > - return ret;
> > -}
> > -
> > static void *zs_zpool_map(void *pool, unsigned long handle,
> > enum zpool_mapmode mm)
> > {
> > @@ -481,7 +439,7 @@ static struct zpool_driver zs_zpool_driver = {
> > .malloc_support_movable = true,
> > .malloc = zs_zpool_malloc,
> > .free = zs_zpool_free,
> > - .shrink = zs_zpool_shrink,
> > + .shrink = NULL,
> > .map = zs_zpool_map,
> > .unmap = zs_zpool_unmap,
> > .total_size = zs_zpool_total_size,
> > @@ -884,14 +842,6 @@ static inline bool obj_allocated(struct page *page, void *obj, unsigned long *ph
> > return obj_tagged(page, obj, phandle, OBJ_ALLOCATED_TAG);
> > }
> >
> > -#ifdef CONFIG_ZPOOL
> > -static bool obj_stores_deferred_handle(struct page *page, void *obj,
> > - unsigned long *phandle)
> > -{
> > - return obj_tagged(page, obj, phandle, OBJ_DEFERRED_HANDLE_TAG);
> > -}
> > -#endif
> > -
> > static void reset_page(struct page *page)
> > {
> > __ClearPageMovable(page);
> > @@ -1006,9 +956,6 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class,
> > }
> >
> > remove_zspage(class, zspage, ZS_INUSE_RATIO_0);
> > -#ifdef CONFIG_ZPOOL
> > - list_del(&zspage->lru);
> > -#endif
> > __free_zspage(pool, class, zspage);
> > }
> >
> > @@ -1054,11 +1001,6 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
> > off %= PAGE_SIZE;
> > }
> >
> > -#ifdef CONFIG_ZPOOL
> > - INIT_LIST_HEAD(&zspage->lru);
> > - zspage->under_reclaim = false;
> > -#endif
> > -
> > set_freeobj(zspage, 0);
> > }
> >
> > @@ -1525,13 +1467,6 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
> > /* We completely set up zspage so mark them as movable */
> > SetZsPageMovable(pool, zspage);
> > out:
> > -#ifdef CONFIG_ZPOOL
> > - /* Add/move zspage to beginning of LRU */
> > - if (!list_empty(&zspage->lru))
> > - list_del(&zspage->lru);
> > - list_add(&zspage->lru, &pool->lru);
> > -#endif
> > -
> > spin_unlock(&pool->lock);
> >
> > return handle;
> > @@ -1600,20 +1535,6 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
> > class = zspage_class(pool, zspage);
> >
> > class_stat_dec(class, ZS_OBJS_INUSE, 1);
> > -
> > -#ifdef CONFIG_ZPOOL
> > - if (zspage->under_reclaim) {
> > - /*
> > - * Reclaim needs the handles during writeback. It'll free
> > - * them along with the zspage when it's done with them.
> > - *
> > - * Record current deferred handle in the object's header.
> > - */
> > - obj_free(class->size, obj, &handle);
> > - spin_unlock(&pool->lock);
> > - return;
> > - }
> > -#endif
> > obj_free(class->size, obj, NULL);
> >
> > fullness = fix_fullness_group(class, zspage);
> > @@ -1890,23 +1811,6 @@ static void lock_zspage(struct zspage *zspage)
> > }
> > #endif /* defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION) */
>
> If I recall correctly, the defined(CONFIG_ZPOOL) condition is
> only needed for the lock_zspage() call in zs_reclaim_page().
>
> You might be able to get away with just
> #ifdef CONFIG_COMPACTION if you're removing writeback.
>
> Do fact-check me of course - try to build with these CONFIGs turned
> off and see. I'm surprised kernel test robot has not complained about
> unused static function lock_zspage() in the case
> CONFIG_ZPOOL && !CONFIG_COMPACTION
>

Thanks! It is indeed the case, with CONFIG_ZPOOL && !CONFIG_COMPACTION
lock_zspage becomes unused -> will go with #ifdef CONFIG_COMPACTION alone.

> >
> > -#ifdef CONFIG_ZPOOL
> > -/*
> > - * Unlocks all the pages of the zspage.
> > - *
> > - * pool->lock must be held before this function is called
> > - * to prevent the underlying pages from migrating.
> > - */
> > -static void unlock_zspage(struct zspage *zspage)
> > -{
> > - struct page *page = get_first_page(zspage);
> > -
> > - do {
> > - unlock_page(page);
> > - } while ((page = get_next_page(page)) != NULL);
> > -}
> > -#endif /* CONFIG_ZPOOL */
> > -
> > static void migrate_lock_init(struct zspage *zspage)
> > {
> > rwlock_init(&zspage->lock);
> > @@ -2126,9 +2030,6 @@ static void async_free_zspage(struct work_struct *work)
> > VM_BUG_ON(fullness != ZS_INUSE_RATIO_0);
> > class = pool->size_class[class_idx];
> > spin_lock(&pool->lock);
> > -#ifdef CONFIG_ZPOOL
> > - list_del(&zspage->lru);
> > -#endif
> > __free_zspage(pool, class, zspage);
> > spin_unlock(&pool->lock);
> > }
> > @@ -2474,10 +2375,6 @@ struct zs_pool *zs_create_pool(const char *name)
> > */
> > zs_register_shrinker(pool);
> >
> > -#ifdef CONFIG_ZPOOL
> > - INIT_LIST_HEAD(&pool->lru);
> > -#endif
> > -
> > return pool;
> >
> > err:
> > @@ -2520,190 +2417,6 @@ void zs_destroy_pool(struct zs_pool *pool)
> > }
> > EXPORT_SYMBOL_GPL(zs_destroy_pool);
> >
> > -#ifdef CONFIG_ZPOOL
> > -static void restore_freelist(struct zs_pool *pool, struct size_class *class,
> > - struct zspage *zspage)
> > -{
> > - unsigned int obj_idx = 0;
> > - unsigned long handle, off = 0; /* off is within-page offset */
> > - struct page *page = get_first_page(zspage);
> > - struct link_free *prev_free = NULL;
> > - void *prev_page_vaddr = NULL;
> > -
> > - /* in case no free object found */
> > - set_freeobj(zspage, (unsigned int)(-1UL));
> > -
> > - while (page) {
> > - void *vaddr = kmap_atomic(page);
> > - struct page *next_page;
> > -
> > - while (off < PAGE_SIZE) {
> > - void *obj_addr = vaddr + off;
> > -
> > - /* skip allocated object */
> > - if (obj_allocated(page, obj_addr, &handle)) {
> > - obj_idx++;
> > - off += class->size;
> > - continue;
> > - }
> > -
> > - /* free deferred handle from reclaim attempt */
> > - if (obj_stores_deferred_handle(page, obj_addr, &handle))
> > - cache_free_handle(pool, handle);
> > -
> > - if (prev_free)
> > - prev_free->next = obj_idx << OBJ_TAG_BITS;
> > - else /* first free object found */
> > - set_freeobj(zspage, obj_idx);
> > -
> > - prev_free = (struct link_free *)vaddr + off / sizeof(*prev_free);
> > - /* if last free object in a previous page, need to unmap */
> > - if (prev_page_vaddr) {
> > - kunmap_atomic(prev_page_vaddr);
> > - prev_page_vaddr = NULL;
> > - }
> > -
> > - obj_idx++;
> > - off += class->size;
> > - }
> > -
> > - /*
> > - * Handle the last (full or partial) object on this page.
> > - */
> > - next_page = get_next_page(page);
> > - if (next_page) {
> > - if (!prev_free || prev_page_vaddr) {
> > - /*
> > - * There is no free object in this page, so we can safely
> > - * unmap it.
> > - */
> > - kunmap_atomic(vaddr);
> > - } else {
> > - /* update prev_page_vaddr since prev_free is on this page */
> > - prev_page_vaddr = vaddr;
> > - }
> > - } else { /* this is the last page */
> > - if (prev_free) {
> > - /*
> > - * Reset OBJ_TAG_BITS bit to last link to tell
> > - * whether it's allocated object or not.
> > - */
> > - prev_free->next = -1UL << OBJ_TAG_BITS;
> > - }
> > -
> > - /* unmap previous page (if not done yet) */
> > - if (prev_page_vaddr) {
> > - kunmap_atomic(prev_page_vaddr);
> > - prev_page_vaddr = NULL;
> > - }
> > -
> > - kunmap_atomic(vaddr);
> > - }
> > -
> > - page = next_page;
> > - off %= PAGE_SIZE;
> > - }
> > -}
> > -
> > -static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
> > -{
> > - int i, obj_idx, ret = 0;
> > - unsigned long handle;
> > - struct zspage *zspage;
> > - struct page *page;
> > - int fullness;
> > -
> > - /* Lock LRU and fullness list */
> > - spin_lock(&pool->lock);
> > - if (list_empty(&pool->lru)) {
> > - spin_unlock(&pool->lock);
> > - return -EINVAL;
> > - }
> > -
> > - for (i = 0; i < retries; i++) {
> > - struct size_class *class;
> > -
> > - zspage = list_last_entry(&pool->lru, struct zspage, lru);
> > - list_del(&zspage->lru);
> > -
> > - /* zs_free may free objects, but not the zspage and handles */
> > - zspage->under_reclaim = true;
> > -
> > - class = zspage_class(pool, zspage);
> > - fullness = get_fullness_group(class, zspage);
> > -
> > - /* Lock out object allocations and object compaction */
> > - remove_zspage(class, zspage, fullness);
> > -
> > - spin_unlock(&pool->lock);
> > - cond_resched();
> > -
> > - /* Lock backing pages into place */
> > - lock_zspage(zspage);
> > -
> > - obj_idx = 0;
> > - page = get_first_page(zspage);
> > - while (1) {
> > - handle = find_alloced_obj(class, page, &obj_idx);
> > - if (!handle) {
> > - page = get_next_page(page);
> > - if (!page)
> > - break;
> > - obj_idx = 0;
> > - continue;
> > - }
> > -
> > - /*
> > - * This will write the object and call zs_free.
> > - *
> > - * zs_free will free the object, but the
> > - * under_reclaim flag prevents it from freeing
> > - * the zspage altogether. This is necessary so
> > - * that we can continue working with the
> > - * zspage potentially after the last object
> > - * has been freed.
> > - */
> > - ret = pool->zpool_ops->evict(pool->zpool, handle);
> > - if (ret)
> > - goto next;
> > -
> > - obj_idx++;
> > - }
> > -
> > -next:
> > - /* For freeing the zspage, or putting it back in the pool and LRU list. */
> > - spin_lock(&pool->lock);
> > - zspage->under_reclaim = false;
> > -
> > - if (!get_zspage_inuse(zspage)) {
> > - /*
> > - * Fullness went stale as zs_free() won't touch it
> > - * while the page is removed from the pool. Fix it
> > - * up for the check in __free_zspage().
> > - */
> > - zspage->fullness = ZS_INUSE_RATIO_0;
> > -
> > - __free_zspage(pool, class, zspage);
> > - spin_unlock(&pool->lock);
> > - return 0;
> > - }
> > -
> > - /*
> > - * Eviction fails on one of the handles, so we need to restore zspage.
> > - * We need to rebuild its freelist (and free stored deferred handles),
> > - * put it back to the correct size class, and add it to the LRU list.
> > - */
> > - restore_freelist(pool, class, zspage);
> > - putback_zspage(class, zspage);
> > - list_add(&zspage->lru, &pool->lru);
> > - unlock_zspage(zspage);
> > - }
> > -
> > - spin_unlock(&pool->lock);
> > - return -EAGAIN;
> > -}
> > -#endif /* CONFIG_ZPOOL */
> > -
> > static int __init zs_init(void)
> > {
> > int ret;
> > --
> > 2.34.1
> >