2024-03-12 15:39:50

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH V2 1/2] mm: zswap: optimize zswap pool size tracking

Profiling the munmap() of a zswapped memory region shows 60% of the
total cycles currently going into updating the zswap_pool_total_size.

There are three consumers of this counter:
- store, to enforce the globally configured pool limit
- meminfo & debugfs, to report the size to the user
- shrink, to determine the batch size for each cycle

Instead of aggregating everytime an entry enters or exits the zswap
pool, aggregate the value from the zpools on-demand:

- Stores aggregate the counter anyway upon success. Aggregating to
check the limit instead is the same amount of work.

- Meminfo & debugfs might benefit somewhat from a pre-aggregated
counter, but aren't exactly hotpaths.

- Shrinking can aggregate once for every cycle instead of doing it for
every freed entry. As the shrinker might work on tens or hundreds of
objects per scan cycle, this is a large reduction in aggregations.

The paths that benefit dramatically are swapin, swapoff, and
unmaps. There could be millions of pages being processed until
somebody asks for the pool size again. This eliminates the pool size
updates from those paths entirely.

Top profile entries for a 24G range munmap(), before:

38.54% zswap-unmap [kernel.kallsyms] [k] zs_zpool_total_size
12.51% zswap-unmap [kernel.kallsyms] [k] zpool_get_total_size
9.10% zswap-unmap [kernel.kallsyms] [k] zswap_update_total_size
2.95% zswap-unmap [kernel.kallsyms] [k] obj_cgroup_uncharge_zswap
2.88% zswap-unmap [kernel.kallsyms] [k] __slab_free
2.86% zswap-unmap [kernel.kallsyms] [k] xas_store

and after:

7.70% zswap-unmap [kernel.kallsyms] [k] __slab_free
7.16% zswap-unmap [kernel.kallsyms] [k] obj_cgroup_uncharge_zswap
6.74% zswap-unmap [kernel.kallsyms] [k] xas_store

It was also briefly considered to move to a single atomic in zswap
that is updated by the backends, since zswap only cares about the sum
of all pools anyway. However, zram directly needs per-pool information
out of zsmalloc. To keep the backend from having to update two atomics
every time, I opted for the lazy aggregation instead for now.

Signed-off-by: Johannes Weiner <[email protected]>
Acked-by: Yosry Ahmed <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
Reviewed-by: Nhat Pham <[email protected]>
---
fs/proc/meminfo.c | 3 +-
include/linux/zswap.h | 2 +-
mm/zswap.c | 101 +++++++++++++++++++++---------------------
3 files changed, 52 insertions(+), 54 deletions(-)

v2:
- added profile info (Yosry). Counter footprint is actually 60%, I had
missed the third line in perf's graphed output previously.
- zswap_accept_thr_pages() helper (Yosry)
- fixed debugfs file missing newline (Yosry)
- added changelog note on a single zswap atomic for the backend size (Yosry)
- collected acks and reviews

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 45af9a989d40..245171d9164b 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -89,8 +89,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SwapTotal: ", i.totalswap);
show_val_kb(m, "SwapFree: ", i.freeswap);
#ifdef CONFIG_ZSWAP
- seq_printf(m, "Zswap: %8lu kB\n",
- (unsigned long)(zswap_pool_total_size >> 10));
+ show_val_kb(m, "Zswap: ", zswap_total_pages());
seq_printf(m, "Zswapped: %8lu kB\n",
(unsigned long)atomic_read(&zswap_stored_pages) <<
(PAGE_SHIFT - 10));
diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 341aea490070..2a85b941db97 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -7,7 +7,6 @@

struct lruvec;

-extern u64 zswap_pool_total_size;
extern atomic_t zswap_stored_pages;

#ifdef CONFIG_ZSWAP
@@ -27,6 +26,7 @@ struct zswap_lruvec_state {
atomic_long_t nr_zswap_protected;
};

+unsigned long zswap_total_pages(void);
bool zswap_store(struct folio *folio);
bool zswap_load(struct folio *folio);
void zswap_invalidate(swp_entry_t swp);
diff --git a/mm/zswap.c b/mm/zswap.c
index 9a3237752082..1a5cc7298306 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -43,8 +43,6 @@
/*********************************
* statistics
**********************************/
-/* Total bytes used by the compressed storage */
-u64 zswap_pool_total_size;
/* The number of compressed pages currently stored in zswap */
atomic_t zswap_stored_pages = ATOMIC_INIT(0);
/* The number of same-value filled pages currently stored in zswap */
@@ -264,45 +262,6 @@ static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp)
pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
zpool_get_type((p)->zpools[0]))

-static bool zswap_is_full(void)
-{
- return totalram_pages() * zswap_max_pool_percent / 100 <
- DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
-}
-
-static bool zswap_can_accept(void)
-{
- return totalram_pages() * zswap_accept_thr_percent / 100 *
- zswap_max_pool_percent / 100 >
- DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
-}
-
-static u64 get_zswap_pool_size(struct zswap_pool *pool)
-{
- u64 pool_size = 0;
- int i;
-
- for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
- pool_size += zpool_get_total_size(pool->zpools[i]);
-
- return pool_size;
-}
-
-static void zswap_update_total_size(void)
-{
- struct zswap_pool *pool;
- u64 total = 0;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(pool, &zswap_pools, list)
- total += get_zswap_pool_size(pool);
-
- rcu_read_unlock();
-
- zswap_pool_total_size = total;
-}
-
/*********************************
* pool functions
**********************************/
@@ -540,6 +499,33 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
return NULL;
}

+static unsigned long zswap_max_pages(void)
+{
+ return totalram_pages() * zswap_max_pool_percent / 100;
+}
+
+static unsigned long zswap_accept_thr_pages(void)
+{
+ return zswap_max_pages() * zswap_accept_thr_percent / 100;
+}
+
+unsigned long zswap_total_pages(void)
+{
+ struct zswap_pool *pool;
+ u64 total = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(pool, &zswap_pools, list) {
+ int i;
+
+ for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
+ total += zpool_get_total_size(pool->zpools[i]);
+ }
+ rcu_read_unlock();
+
+ return total >> PAGE_SHIFT;
+}
+
/*********************************
* param callbacks
**********************************/
@@ -912,7 +898,6 @@ static void zswap_entry_free(struct zswap_entry *entry)
}
zswap_entry_cache_free(entry);
atomic_dec(&zswap_stored_pages);
- zswap_update_total_size();
}

/*
@@ -1317,7 +1302,7 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
#else
/* use pool stats instead of memcg stats */
- nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
+ nr_backing = zswap_total_pages();
nr_stored = atomic_read(&zswap_nr_stored);
#endif

@@ -1385,6 +1370,10 @@ static void shrink_worker(struct work_struct *w)
{
struct mem_cgroup *memcg;
int ret, failures = 0;
+ unsigned long thr;
+
+ /* Reclaim down to the accept threshold */
+ thr = zswap_accept_thr_pages();

/* global reclaim will select cgroup in a round-robin fashion. */
do {
@@ -1432,10 +1421,9 @@ static void shrink_worker(struct work_struct *w)
break;
if (ret && ++failures == MAX_RECLAIM_RETRIES)
break;
-
resched:
cond_resched();
- } while (!zswap_can_accept());
+ } while (zswap_total_pages() > thr);
}

static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
@@ -1476,6 +1464,7 @@ bool zswap_store(struct folio *folio)
struct zswap_entry *entry, *dupentry;
struct obj_cgroup *objcg = NULL;
struct mem_cgroup *memcg = NULL;
+ unsigned long max_pages, cur_pages;

VM_WARN_ON_ONCE(!folio_test_locked(folio));
VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
@@ -1487,6 +1476,7 @@ bool zswap_store(struct folio *folio)
if (!zswap_enabled)
goto check_old;

+ /* Check cgroup limits */
objcg = get_obj_cgroup_from_folio(folio);
if (objcg && !obj_cgroup_may_zswap(objcg)) {
memcg = get_mem_cgroup_from_objcg(objcg);
@@ -1497,15 +1487,18 @@ bool zswap_store(struct folio *folio)
mem_cgroup_put(memcg);
}

- /* reclaim space if needed */
- if (zswap_is_full()) {
+ /* Check global limits */
+ cur_pages = zswap_total_pages();
+ max_pages = zswap_max_pages();
+
+ if (cur_pages >= max_pages) {
zswap_pool_limit_hit++;
zswap_pool_reached_full = true;
goto shrink;
}

if (zswap_pool_reached_full) {
- if (!zswap_can_accept())
+ if (cur_pages > zswap_accept_thr_pages())
goto shrink;
else
zswap_pool_reached_full = false;
@@ -1581,7 +1574,6 @@ bool zswap_store(struct folio *folio)

/* update stats */
atomic_inc(&zswap_stored_pages);
- zswap_update_total_size();
count_vm_event(ZSWPOUT);

return true;
@@ -1711,6 +1703,13 @@ void zswap_swapoff(int type)

static struct dentry *zswap_debugfs_root;

+static int debugfs_get_total_size(void *data, u64 *val)
+{
+ *val = zswap_total_pages() * PAGE_SIZE;
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(total_size_fops, debugfs_get_total_size, NULL, "%llu\n");
+
static int zswap_debugfs_init(void)
{
if (!debugfs_initialized())
@@ -1732,8 +1731,8 @@ static int zswap_debugfs_init(void)
zswap_debugfs_root, &zswap_reject_compress_poor);
debugfs_create_u64("written_back_pages", 0444,
zswap_debugfs_root, &zswap_written_back_pages);
- debugfs_create_u64("pool_total_size", 0444,
- zswap_debugfs_root, &zswap_pool_total_size);
+ debugfs_create_file("pool_total_size", 0444,
+ zswap_debugfs_root, NULL, &total_size_fops);
debugfs_create_atomic_t("stored_pages", 0444,
zswap_debugfs_root, &zswap_stored_pages);
debugfs_create_atomic_t("same_filled_pages", 0444,
--
2.44.0



2024-03-12 15:39:51

by Johannes Weiner

[permalink] [raw]
Subject: [PATCH 2/2] mm: zpool: return pool size in pages

All zswap backends track their pool sizes in pages. Currently they
multiply by PAGE_SIZE for zswap, only for zswap to divide again in
order to do limit math. Report pages directly.

Signed-off-by: Johannes Weiner <[email protected]>
Acked-by: Yosry Ahmed <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
Reviewed-by: Nhat Pham <[email protected]>
---
include/linux/zpool.h | 4 ++--
mm/z3fold.c | 10 +++++-----
mm/zbud.c | 10 +++++-----
mm/zpool.c | 10 +++++-----
mm/zsmalloc.c | 6 +++---
mm/zswap.c | 6 +++---
6 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 3296438eec06..a67d62b79698 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -53,7 +53,7 @@ void *zpool_map_handle(struct zpool *pool, unsigned long handle,

void zpool_unmap_handle(struct zpool *pool, unsigned long handle);

-u64 zpool_get_total_size(struct zpool *pool);
+u64 zpool_get_total_pages(struct zpool *pool);


/**
@@ -91,7 +91,7 @@ struct zpool_driver {
enum zpool_mapmode mm);
void (*unmap)(void *pool, unsigned long handle);

- u64 (*total_size)(void *pool);
+ u64 (*total_pages)(void *pool);
};

void zpool_register_driver(struct zpool_driver *driver);
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 7ab05621052d..2ebfed32871b 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -1237,12 +1237,12 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
}

/**
- * z3fold_get_pool_size() - gets the z3fold pool size in pages
+ * z3fold_get_pool_pages() - gets the z3fold pool size in pages
* @pool: pool whose size is being queried
*
* Returns: size in pages of the given pool.
*/
-static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
+static u64 z3fold_get_pool_pages(struct z3fold_pool *pool)
{
return atomic64_read(&pool->pages_nr);
}
@@ -1402,9 +1402,9 @@ static void z3fold_zpool_unmap(void *pool, unsigned long handle)
z3fold_unmap(pool, handle);
}

-static u64 z3fold_zpool_total_size(void *pool)
+static u64 z3fold_zpool_total_pages(void *pool)
{
- return z3fold_get_pool_size(pool) * PAGE_SIZE;
+ return z3fold_get_pool_pages(pool);
}

static struct zpool_driver z3fold_zpool_driver = {
@@ -1417,7 +1417,7 @@ static struct zpool_driver z3fold_zpool_driver = {
.free = z3fold_zpool_free,
.map = z3fold_zpool_map,
.unmap = z3fold_zpool_unmap,
- .total_size = z3fold_zpool_total_size,
+ .total_pages = z3fold_zpool_total_pages,
};

MODULE_ALIAS("zpool-z3fold");
diff --git a/mm/zbud.c b/mm/zbud.c
index 2190cc1f37b3..e9836fff9438 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -365,13 +365,13 @@ static void zbud_unmap(struct zbud_pool *pool, unsigned long handle)
}

/**
- * zbud_get_pool_size() - gets the zbud pool size in pages
+ * zbud_get_pool_pages() - gets the zbud pool size in pages
* @pool: pool whose size is being queried
*
* Returns: size in pages of the given pool. The pool lock need not be
* taken to access pages_nr.
*/
-static u64 zbud_get_pool_size(struct zbud_pool *pool)
+static u64 zbud_get_pool_pages(struct zbud_pool *pool)
{
return pool->pages_nr;
}
@@ -410,9 +410,9 @@ static void zbud_zpool_unmap(void *pool, unsigned long handle)
zbud_unmap(pool, handle);
}

-static u64 zbud_zpool_total_size(void *pool)
+static u64 zbud_zpool_total_pages(void *pool)
{
- return zbud_get_pool_size(pool) * PAGE_SIZE;
+ return zbud_get_pool_pages(pool);
}

static struct zpool_driver zbud_zpool_driver = {
@@ -425,7 +425,7 @@ static struct zpool_driver zbud_zpool_driver = {
.free = zbud_zpool_free,
.map = zbud_zpool_map,
.unmap = zbud_zpool_unmap,
- .total_size = zbud_zpool_total_size,
+ .total_pages = zbud_zpool_total_pages,
};

MODULE_ALIAS("zpool-zbud");
diff --git a/mm/zpool.c b/mm/zpool.c
index 846410479c2f..b9fda1fa857d 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -321,16 +321,16 @@ void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
}

/**
- * zpool_get_total_size() - The total size of the pool
+ * zpool_get_total_pages() - The total size of the pool
* @zpool: The zpool to check
*
- * This returns the total size in bytes of the pool.
+ * This returns the total size in pages of the pool.
*
- * Returns: Total size of the zpool in bytes.
+ * Returns: Total size of the zpool in pages.
*/
-u64 zpool_get_total_size(struct zpool *zpool)
+u64 zpool_get_total_pages(struct zpool *zpool)
{
- return zpool->driver->total_size(zpool->pool);
+ return zpool->driver->total_pages(zpool->pool);
}

/**
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 7d7cb3eaabe0..b42d3545ca85 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -399,9 +399,9 @@ static void zs_zpool_unmap(void *pool, unsigned long handle)
zs_unmap_object(pool, handle);
}

-static u64 zs_zpool_total_size(void *pool)
+static u64 zs_zpool_total_pages(void *pool)
{
- return zs_get_total_pages(pool) << PAGE_SHIFT;
+ return zs_get_total_pages(pool);
}

static struct zpool_driver zs_zpool_driver = {
@@ -414,7 +414,7 @@ static struct zpool_driver zs_zpool_driver = {
.free = zs_zpool_free,
.map = zs_zpool_map,
.unmap = zs_zpool_unmap,
- .total_size = zs_zpool_total_size,
+ .total_pages = zs_zpool_total_pages,
};

MODULE_ALIAS("zpool-zsmalloc");
diff --git a/mm/zswap.c b/mm/zswap.c
index 1a5cc7298306..9fdf4c76d5ea 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -512,18 +512,18 @@ static unsigned long zswap_accept_thr_pages(void)
unsigned long zswap_total_pages(void)
{
struct zswap_pool *pool;
- u64 total = 0;
+ unsigned long total = 0;

rcu_read_lock();
list_for_each_entry_rcu(pool, &zswap_pools, list) {
int i;

for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
- total += zpool_get_total_size(pool->zpools[i]);
+ total += zpool_get_total_pages(pool->zpools[i]);
}
rcu_read_unlock();

- return total >> PAGE_SHIFT;
+ return total;
}

/*********************************
--
2.44.0