zsmalloc pool can be compacted concurrently by many contexts,
e.g.
cc1 handle_mm_fault()
do_anonymous_page()
__alloc_pages_slowpath()
try_to_free_pages()
do_try_to_free_pages(
lru_gen_shrink_node()
shrink_slab()
do_shrink_slab()
zs_shrinker_scan()
zs_compact()
This creates unnecessary contention as all those processes
compete for access to the same classes. A single compaction
process is enough. Moreover contention that is created by
multiple compaction processes impact other zsmalloc functions,
e.g. zs_malloc(), since zsmalloc uses "global" pool->lock to
synchronize access to pool.
Introduce pool compaction mutex and permit only one compaction
context at a time.
/proc/lock-stat after make -j$((`nproc`+1)) linux kernel for
&pool->lock#3:
Base Patched
--------------------------------------
con-bounces 9797655 8125860
contentions 11131185 9242153
waittime-min 0.09 0.10
waittime-max 4171695.76 3926258.74
waittime-total 506197629.16 417061026.20
waittime-avg 45.48 45.13
acq-bounces 13809103 11383480
acquisitions 21145155 18049364
holdtime-min 0.06 0.07
holdtime-max 7379928.80 3926274.89
holdtime-total 46273950.89 37279624.53
holdtime-avg 2.19 2.07
Signed-off-by: Sergey Senozhatsky <[email protected]>
---
mm/zsmalloc.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index cc81dfba05a0..0e036ec56c3c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -264,6 +264,7 @@ struct zs_pool {
struct work_struct free_work;
#endif
spinlock_t lock;
+ struct mutex compact_lock;
};
struct zspage {
@@ -2274,6 +2275,9 @@ unsigned long zs_compact(struct zs_pool *pool)
struct size_class *class;
unsigned long pages_freed = 0;
+ if (!mutex_trylock(&pool->compact_lock))
+ return 0;
+
for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
class = pool->size_class[i];
if (class->index != i)
@@ -2281,6 +2285,7 @@ unsigned long zs_compact(struct zs_pool *pool)
pages_freed += __zs_compact(pool, class);
}
atomic_long_add(pages_freed, &pool->stats.pages_compacted);
+ mutex_unlock(&pool->compact_lock);
return pages_freed;
}
@@ -2388,6 +2393,7 @@ struct zs_pool *zs_create_pool(const char *name)
init_deferred_free(pool);
spin_lock_init(&pool->lock);
+ mutex_init(&pool->compact_lock);
pool->name = kstrdup(name, GFP_KERNEL);
if (!pool->name)
--
2.40.0.634.g4ca3ef3211-goog
Hi Sergey,
On Mon, Apr 17, 2023 at 4:03 AM Sergey Senozhatsky
<[email protected]> wrote:
>
> zsmalloc pool can be compacted concurrently by many contexts,
> e.g.
>
> cc1 handle_mm_fault()
> do_anonymous_page()
> __alloc_pages_slowpath()
> try_to_free_pages()
> do_try_to_free_pages(
> lru_gen_shrink_node()
> shrink_slab()
> do_shrink_slab()
> zs_shrinker_scan()
> zs_compact()
>
> This creates unnecessary contention as all those processes
> compete for access to the same classes. A single compaction
> process is enough. Moreover contention that is created by
> multiple compaction processes impact other zsmalloc functions,
> e.g. zs_malloc(), since zsmalloc uses "global" pool->lock to
> synchronize access to pool.
>
> Introduce pool compaction mutex and permit only one compaction
> context at a time.
I am not sure what's the best practice here, but if the only use of
the mutex is a trylock, do we need a mutex here? It seems like a
simple atomic would do the trick. Perhaps something like:
static atomic_t ongoing_compaction = ATOMIC_INIT(0);
...
if (atomic_xchg(&ongoing_compaction, 1))
return;
....
atomic_set(&ongoing_compaction, 0);
FWIW, I am suggesting this because it was recently brought up when one
of my patches was reviewed that locks should protect data not code
paths [1], so I am trying to apply what I learned there :)
[1]https://lore.kernel.org/lkml/[email protected]/
>
> /proc/lock-stat after make -j$((`nproc`+1)) linux kernel for
> &pool->lock#3:
>
> Base Patched
> --------------------------------------
> con-bounces 9797655 8125860
> contentions 11131185 9242153
> waittime-min 0.09 0.10
> waittime-max 4171695.76 3926258.74
> waittime-total 506197629.16 417061026.20
> waittime-avg 45.48 45.13
> acq-bounces 13809103 11383480
> acquisitions 21145155 18049364
> holdtime-min 0.06 0.07
> holdtime-max 7379928.80 3926274.89
> holdtime-total 46273950.89 37279624.53
> holdtime-avg 2.19 2.07
>
> Signed-off-by: Sergey Senozhatsky <[email protected]>
> ---
> mm/zsmalloc.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index cc81dfba05a0..0e036ec56c3c 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -264,6 +264,7 @@ struct zs_pool {
> struct work_struct free_work;
> #endif
> spinlock_t lock;
> + struct mutex compact_lock;
> };
>
> struct zspage {
> @@ -2274,6 +2275,9 @@ unsigned long zs_compact(struct zs_pool *pool)
> struct size_class *class;
> unsigned long pages_freed = 0;
>
> + if (!mutex_trylock(&pool->compact_lock))
> + return 0;
> +
> for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
> class = pool->size_class[i];
> if (class->index != i)
> @@ -2281,6 +2285,7 @@ unsigned long zs_compact(struct zs_pool *pool)
> pages_freed += __zs_compact(pool, class);
> }
> atomic_long_add(pages_freed, &pool->stats.pages_compacted);
> + mutex_unlock(&pool->compact_lock);
>
> return pages_freed;
> }
> @@ -2388,6 +2393,7 @@ struct zs_pool *zs_create_pool(const char *name)
>
> init_deferred_free(pool);
> spin_lock_init(&pool->lock);
> + mutex_init(&pool->compact_lock);
>
> pool->name = kstrdup(name, GFP_KERNEL);
> if (!pool->name)
> --
> 2.40.0.634.g4ca3ef3211-goog
>
Hi,
On (23/04/17 04:53), Yosry Ahmed wrote:
> > Introduce pool compaction mutex and permit only one compaction
> > context at a time.
>
> I am not sure what's the best practice here, but if the only use of
> the mutex is a trylock, do we need a mutex here? It seems like a
> simple atomic would do the trick. Perhaps something like:
>
> static atomic_t ongoing_compaction = ATOMIC_INIT(0);
> ...
> if (atomic_xchg(&ongoing_compaction, 1))
> return;
> ....
> atomic_set(&ongoing_compaction, 0);
Looks good to me. Will switch to atomic_t in v2.