From: Peifeng Li <[email protected]>
In the case of insufficient memory, threads will be in direct_reclaim to
reclaim memory, direct_reclaim will call shrink_slab to run sequentially
each shrinker callback. If there is a lock-contention in the shrinker
callback,such as spinlock,mutex_lock and so on, threads may be likely to
be stuck in direct_reclaim for a long time, even if the memfree has reached
the high watermarks of the zone, resulting in poor performance of threads.
Example 1: shrinker callback may wait for spinlock
static unsigned long mb_cache_shrink(struct mb_cache *cache,
unsigned long nr_to_scan)
{
struct mb_cache_entry *entry;
unsigned long shrunk = 0;
spin_lock(&cache->c_list_lock);
while (nr_to_scan-- && !list_empty(&cache->c_list)) {
entry = list_first_entry(&cache->c_list,
struct mb_cache_entry, e_list);
if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
clear_bit(MBE_REFERENCED_B, &entry->e_flags);
list_move_tail(&entry->e_list, &cache->c_list);
continue;
}
list_del_init(&entry->e_list);
cache->c_entry_count--;
spin_unlock(&cache->c_list_lock);
__mb_cache_entry_free(cache, entry);
shrunk++;
cond_resched();
spin_lock(&cache->c_list_lock);
}
spin_unlock(&cache->c_list_lock);
return shrunk;
}
Example 2: shrinker callback may wait for mutex lock
static
unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
struct shrink_control *sc)
{
struct kbase_context *kctx;
struct kbase_mem_phy_alloc *alloc;
struct kbase_mem_phy_alloc *tmp;
unsigned long freed = 0;
kctx = container_of(s, struct kbase_context, reclaim);
// MTK add to prevent false alarm
lockdep_off();
mutex_lock(&kctx->jit_evict_lock);
list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
int err;
err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
0, alloc->nents);
if (err != 0) {
freed = -1;
goto out_unlock;
}
alloc->evicted = alloc->nents;
kbase_free_phy_pages_helper(alloc, alloc->evicted);
freed += alloc->evicted;
list_del_init(&alloc->evict_node);
kbase_jit_backing_lost(alloc->reg);
if (freed > sc->nr_to_scan)
break;
}
out_unlock:
mutex_unlock(&kctx->jit_evict_lock);
// MTK add to prevent false alarm
lockdep_on();
return freed;
}
In mobile-phone,threads are likely to be stuck in shrinker callback during
direct_reclaim, with example like the following:
<...>-2806 [004] ..... 866458.339840: mm_shrink_slab_start:
dynamic_mem_shrink_scan+0x0/0xb8 ... priority 2
<...>-2806 [004] ..... 866459.339933: mm_shrink_slab_end:
dynamic_mem_shrink_scan+0x0/0xb8 ...
For the above reason, the patch introduces SHRINKER_NO_DIRECT_RECLAIM that
allows driver to set shrinker callback not to be called in direct_reclaim
unless sc->priority is 0.
The reason why sc->priority=0 allows shrinker callback to be called in
direct_reclaim is for two reasons:
1.Always call all shrinker callback in drop_slab that priority is 0.
2.sc->priority is 0 during direct_reclaim, allow direct_reclaim to call
shrinker callback, to reclaim memory timely.
Note:
1.Register_shrinker_prepared() default not to set
SHRINKER_NO_DIRECT_RECLAIM, to maintain the current behavior of the code.
2.Logic of kswapd and drop_slab to call shrinker callback isn't affected.
Signed-off-by: Peifeng Li <[email protected]>
---
include/linux/shrinker.h | 5 +++++
mm/shrinker.c | 36 ++++++++++++++++++++++++++++++++++--
2 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 1a00be90d93a..2d5a8b3a720b 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -130,6 +130,11 @@ struct shrinker {
* non-MEMCG_AWARE shrinker should not have this flag set.
*/
#define SHRINKER_NONSLAB BIT(4)
+/*
+ * Can shrinker callback be called in direct_relcaim unless
+ * sc->priority is 0?
+ */
+#define SHRINKER_NO_DIRECT_RECLAIM BIT(5)
__printf(2, 3)
struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
diff --git a/mm/shrinker.c b/mm/shrinker.c
index dc5d2a6fcfc4..3ac50da72494 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -544,7 +544,23 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
if (!memcg_kmem_online() &&
!(shrinker->flags & SHRINKER_NONSLAB))
continue;
-
+ /*
+ * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
+ * should not be called in direct_reclaim unless priority
+ * is 0.
+ */
+ if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
+ !current_is_kswapd()) {
+ /*
+ * 1.Always call shrinker callback in drop_slab that
+ * priority is 0.
+ * 2.sc->priority is 0 during direct_reclaim, allow
+ * direct_reclaim to call shrinker callback, to reclaim
+ * memory timely.
+ */
+ if (priority)
+ continue;
+ }
ret = do_shrink_slab(&sc, shrinker, priority);
if (ret == SHRINK_EMPTY) {
clear_bit(offset, unit->map);
@@ -658,7 +674,23 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
continue;
rcu_read_unlock();
-
+ /*
+ * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
+ * should not be called in direct_reclaim unless priority
+ * is 0.
+ */
+ if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
+ !current_is_kswapd()) {
+ /*
+ * 1.Always call shrinker callback in drop_slab that
+ * priority is 0.
+ * 2.sc->priority is 0 during direct_reclaim, allow
+ * direct_reclaim to call shrinker callback, to reclaim
+ * memory timely.
+ */
+ if (priority)
+ continue;
+ }
ret = do_shrink_slab(&sc, shrinker, priority);
if (ret == SHRINK_EMPTY)
ret = 0;
--
2.34.1
Hi Peifeng,
On 2024/4/12 16:07, [email protected] wrote:
> From: Peifeng Li <[email protected]>
>
> In the case of insufficient memory, threads will be in direct_reclaim to
> reclaim memory, direct_reclaim will call shrink_slab to run sequentially
> each shrinker callback. If there is a lock-contention in the shrinker
> callback,such as spinlock,mutex_lock and so on, threads may be likely to
> be stuck in direct_reclaim for a long time, even if the memfree has reached
> the high watermarks of the zone, resulting in poor performance of threads.
>
> Example 1: shrinker callback may wait for spinlock
> static unsigned long mb_cache_shrink(struct mb_cache *cache,
> unsigned long nr_to_scan)
> {
> struct mb_cache_entry *entry;
> unsigned long shrunk = 0;
>
> spin_lock(&cache->c_list_lock);
> while (nr_to_scan-- && !list_empty(&cache->c_list)) {
> entry = list_first_entry(&cache->c_list,
> struct mb_cache_entry, e_list);
> if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
> atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
> clear_bit(MBE_REFERENCED_B, &entry->e_flags);
> list_move_tail(&entry->e_list, &cache->c_list);
> continue;
> }
> list_del_init(&entry->e_list);
> cache->c_entry_count--;
> spin_unlock(&cache->c_list_lock);
> __mb_cache_entry_free(cache, entry);
> shrunk++;
> cond_resched();
> spin_lock(&cache->c_list_lock);
> }
> spin_unlock(&cache->c_list_lock);
>
> return shrunk;
> }
> Example 2: shrinker callback may wait for mutex lock
> static
> unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
> struct shrink_control *sc)
> {
> struct kbase_context *kctx;
> struct kbase_mem_phy_alloc *alloc;
> struct kbase_mem_phy_alloc *tmp;
> unsigned long freed = 0;
>
> kctx = container_of(s, struct kbase_context, reclaim);
>
> // MTK add to prevent false alarm
> lockdep_off();
>
> mutex_lock(&kctx->jit_evict_lock);
>
> list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
> int err;
>
> err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
> 0, alloc->nents);
> if (err != 0) {
> freed = -1;
> goto out_unlock;
> }
>
> alloc->evicted = alloc->nents;
>
> kbase_free_phy_pages_helper(alloc, alloc->evicted);
> freed += alloc->evicted;
> list_del_init(&alloc->evict_node);
>
> kbase_jit_backing_lost(alloc->reg);
>
> if (freed > sc->nr_to_scan)
> break;
> }
> out_unlock:
> mutex_unlock(&kctx->jit_evict_lock);
>
> // MTK add to prevent false alarm
> lockdep_on();
>
> return freed;
> }
>
> In mobile-phone,threads are likely to be stuck in shrinker callback during
> direct_reclaim, with example like the following:
> <...>-2806 [004] ..... 866458.339840: mm_shrink_slab_start:
> dynamic_mem_shrink_scan+0x0/0xb8 ... priority 2
> <...>-2806 [004] ..... 866459.339933: mm_shrink_slab_end:
> dynamic_mem_shrink_scan+0x0/0xb8 ...
>
> For the above reason, the patch introduces SHRINKER_NO_DIRECT_RECLAIM that
> allows driver to set shrinker callback not to be called in direct_reclaim
> unless sc->priority is 0.
Hmm, this is just a workaround, no shrinker will want to set this flag.
If a shrinker has a lock contention problem, then it needs to be fixed.
Perhaps executing do_shrink_slab() asynchronously may be a more
fundamental solution, but this may result in untimely reclamation.
>
> The reason why sc->priority=0 allows shrinker callback to be called in
> direct_reclaim is for two reasons:
> 1.Always call all shrinker callback in drop_slab that priority is 0.
> 2.sc->priority is 0 during direct_reclaim, allow direct_reclaim to call
> shrinker callback, to reclaim memory timely.
>
> Note:
> 1.Register_shrinker_prepared() default not to set
This API is no longer included in the latest upstream code. Please
submit a patch based on the latest code.
Thanks,
Qi
> SHRINKER_NO_DIRECT_RECLAIM, to maintain the current behavior of the code.
> 2.Logic of kswapd and drop_slab to call shrinker callback isn't affected.
>
> Signed-off-by: Peifeng Li <[email protected]>
> ---
> include/linux/shrinker.h | 5 +++++
> mm/shrinker.c | 36 ++++++++++++++++++++++++++++++++++--
> 2 files changed, 39 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> index 1a00be90d93a..2d5a8b3a720b 100644
> --- a/include/linux/shrinker.h
> +++ b/include/linux/shrinker.h
> @@ -130,6 +130,11 @@ struct shrinker {
> * non-MEMCG_AWARE shrinker should not have this flag set.
> */
> #define SHRINKER_NONSLAB BIT(4)
> +/*
> + * Can shrinker callback be called in direct_relcaim unless
> + * sc->priority is 0?
> + */
> +#define SHRINKER_NO_DIRECT_RECLAIM BIT(5)
>
> __printf(2, 3)
> struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
> diff --git a/mm/shrinker.c b/mm/shrinker.c
> index dc5d2a6fcfc4..3ac50da72494 100644
> --- a/mm/shrinker.c
> +++ b/mm/shrinker.c
> @@ -544,7 +544,23 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
> if (!memcg_kmem_online() &&
> !(shrinker->flags & SHRINKER_NONSLAB))
> continue;
> -
> + /*
> + * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
> + * should not be called in direct_reclaim unless priority
> + * is 0.
> + */
> + if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
> + !current_is_kswapd()) {
> + /*
> + * 1.Always call shrinker callback in drop_slab that
> + * priority is 0.
> + * 2.sc->priority is 0 during direct_reclaim, allow
> + * direct_reclaim to call shrinker callback, to reclaim
> + * memory timely.
> + */
> + if (priority)
> + continue;
> + }
> ret = do_shrink_slab(&sc, shrinker, priority);
> if (ret == SHRINK_EMPTY) {
> clear_bit(offset, unit->map);
> @@ -658,7 +674,23 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
> continue;
>
> rcu_read_unlock();
> -
> + /*
> + * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
> + * should not be called in direct_reclaim unless priority
> + * is 0.
> + */
> + if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
> + !current_is_kswapd()) {
> + /*
> + * 1.Always call shrinker callback in drop_slab that
> + * priority is 0.
> + * 2.sc->priority is 0 during direct_reclaim, allow
> + * direct_reclaim to call shrinker callback, to reclaim
> + * memory timely.
> + */
> + if (priority)
> + continue;
> + }
> ret = do_shrink_slab(&sc, shrinker, priority);
> if (ret == SHRINK_EMPTY)
> ret = 0;
在 2024/4/12 16:47, Qi Zheng 写道:
> Hi Peifeng,
>
> On 2024/4/12 16:07, [email protected] wrote:
>> From: Peifeng Li <[email protected]>
>>
>> In the case of insufficient memory, threads will be in direct_reclaim to
>> reclaim memory, direct_reclaim will call shrink_slab to run sequentially
>> each shrinker callback. If there is a lock-contention in the shrinker
>> callback,such as spinlock,mutex_lock and so on, threads may be likely to
>> be stuck in direct_reclaim for a long time, even if the memfree has
>> reached
>> the high watermarks of the zone, resulting in poor performance of
>> threads.
>>
>> Example 1: shrinker callback may wait for spinlock
>> static unsigned long mb_cache_shrink(struct mb_cache *cache,
>> unsigned long nr_to_scan)
>> {
>> struct mb_cache_entry *entry;
>> unsigned long shrunk = 0;
>>
>> spin_lock(&cache->c_list_lock);
>> while (nr_to_scan-- && !list_empty(&cache->c_list)) {
>> entry = list_first_entry(&cache->c_list,
>> struct mb_cache_entry,
>> e_list);
>> if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
>> atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
>> clear_bit(MBE_REFERENCED_B, &entry->e_flags);
>> list_move_tail(&entry->e_list, &cache->c_list);
>> continue;
>> }
>> list_del_init(&entry->e_list);
>> cache->c_entry_count--;
>> spin_unlock(&cache->c_list_lock);
>> __mb_cache_entry_free(cache, entry);
>> shrunk++;
>> cond_resched();
>> spin_lock(&cache->c_list_lock);
>> }
>> spin_unlock(&cache->c_list_lock);
>>
>> return shrunk;
>> }
>> Example 2: shrinker callback may wait for mutex lock
>> static
>> unsigned long kbase_mem_evictable_reclaim_scan_objects(struct
>> shrinker *s,
>> struct shrink_control *sc)
>> {
>> struct kbase_context *kctx;
>> struct kbase_mem_phy_alloc *alloc;
>> struct kbase_mem_phy_alloc *tmp;
>> unsigned long freed = 0;
>>
>> kctx = container_of(s, struct kbase_context, reclaim);
>>
>> // MTK add to prevent false alarm
>> lockdep_off();
>>
>> mutex_lock(&kctx->jit_evict_lock);
>>
>> list_for_each_entry_safe(alloc, tmp, &kctx->evict_list,
>> evict_node) {
>> int err;
>>
>> err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
>> 0, alloc->nents);
>> if (err != 0) {
>> freed = -1;
>> goto out_unlock;
>> }
>>
>> alloc->evicted = alloc->nents;
>>
>> kbase_free_phy_pages_helper(alloc, alloc->evicted);
>> freed += alloc->evicted;
>> list_del_init(&alloc->evict_node);
>>
>> kbase_jit_backing_lost(alloc->reg);
>>
>> if (freed > sc->nr_to_scan)
>> break;
>> }
>> out_unlock:
>> mutex_unlock(&kctx->jit_evict_lock);
>>
>> // MTK add to prevent false alarm
>> lockdep_on();
>>
>> return freed;
>> }
>>
>> In mobile-phone,threads are likely to be stuck in shrinker callback
>> during
>> direct_reclaim, with example like the following:
>> <...>-2806 [004] ..... 866458.339840: mm_shrink_slab_start:
>> dynamic_mem_shrink_scan+0x0/0xb8 ... priority 2
>> <...>-2806 [004] ..... 866459.339933: mm_shrink_slab_end:
>> dynamic_mem_shrink_scan+0x0/0xb8 ...
>>
>> For the above reason, the patch introduces SHRINKER_NO_DIRECT_RECLAIM
>> that
>> allows driver to set shrinker callback not to be called in
>> direct_reclaim
>> unless sc->priority is 0.
>
> Hmm, this is just a workaround, no shrinker will want to set this flag.
> If a shrinker has a lock contention problem, then it needs to be fixed.
>
> Perhaps executing do_shrink_slab() asynchronously may be a more
> fundamental solution, but this may result in untimely reclamation.
>
In fact, we have implemented do_shrink_slab() asynchronous, but the code
changes are
relatively large, which may have a large impact on different products.
This submit also wants to consult the community experts on which
solution you prefer.
In real projects, most of the shrinker callback has synchronization
mechanism, and many
drivers want to remove synchronization operation will be difficult, such
as the mali driver
of ARM. If the memory-reclaim-path of the kernel will be affected by the
driver, the
robustness of the kernel will be greatly reduced.
Back to this patch, with this flag, at least in the case that the driver
cannot remove the
synchronization mechanism, we can recommend the corresponding driver to
set this flag
to improve the performance of the kernel memory reclaim.
>>
>> The reason why sc->priority=0 allows shrinker callback to be called in
>> direct_reclaim is for two reasons:
>> 1.Always call all shrinker callback in drop_slab that priority is 0.
>> 2.sc->priority is 0 during direct_reclaim, allow direct_reclaim to call
>> shrinker callback, to reclaim memory timely.
>>
>> Note:
>> 1.Register_shrinker_prepared() default not to set
>
> This API is no longer included in the latest upstream code. Please
> submit a patch based on the latest code.
>
> Thanks,
> Qi
Allright, I will submit the V2 patch with the new commit message.
>
>> SHRINKER_NO_DIRECT_RECLAIM, to maintain the current behavior of the
>> code.
>> 2.Logic of kswapd and drop_slab to call shrinker callback isn't
>> affected.
>>
>> Signed-off-by: Peifeng Li <[email protected]>
>> ---
>> include/linux/shrinker.h | 5 +++++
>> mm/shrinker.c | 36 ++++++++++++++++++++++++++++++++++--
>> 2 files changed, 39 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
>> index 1a00be90d93a..2d5a8b3a720b 100644
>> --- a/include/linux/shrinker.h
>> +++ b/include/linux/shrinker.h
>> @@ -130,6 +130,11 @@ struct shrinker {
>> * non-MEMCG_AWARE shrinker should not have this flag set.
>> */
>> #define SHRINKER_NONSLAB BIT(4)
>> +/*
>> + * Can shrinker callback be called in direct_relcaim unless
>> + * sc->priority is 0?
>> + */
>> +#define SHRINKER_NO_DIRECT_RECLAIM BIT(5)
>> __printf(2, 3)
>> struct shrinker *shrinker_alloc(unsigned int flags, const char
>> *fmt, ...);
>> diff --git a/mm/shrinker.c b/mm/shrinker.c
>> index dc5d2a6fcfc4..3ac50da72494 100644
>> --- a/mm/shrinker.c
>> +++ b/mm/shrinker.c
>> @@ -544,7 +544,23 @@ static unsigned long shrink_slab_memcg(gfp_t
>> gfp_mask, int nid,
>> if (!memcg_kmem_online() &&
>> !(shrinker->flags & SHRINKER_NONSLAB))
>> continue;
>> -
>> + /*
>> + * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
>> + * should not be called in direct_reclaim unless priority
>> + * is 0.
>> + */
>> + if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
>> + !current_is_kswapd()) {
>> + /*
>> + * 1.Always call shrinker callback in drop_slab that
>> + * priority is 0.
>> + * 2.sc->priority is 0 during direct_reclaim, allow
>> + * direct_reclaim to call shrinker callback, to reclaim
>> + * memory timely.
>> + */
>> + if (priority)
>> + continue;
>> + }
>> ret = do_shrink_slab(&sc, shrinker, priority);
>> if (ret == SHRINK_EMPTY) {
>> clear_bit(offset, unit->map);
>> @@ -658,7 +674,23 @@ unsigned long shrink_slab(gfp_t gfp_mask, int
>> nid, struct mem_cgroup *memcg,
>> continue;
>> rcu_read_unlock();
>> -
>> + /*
>> + * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
>> + * should not be called in direct_reclaim unless priority
>> + * is 0.
>> + */
>> + if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
>> + !current_is_kswapd()) {
>> + /*
>> + * 1.Always call shrinker callback in drop_slab that
>> + * priority is 0.
>> + * 2.sc->priority is 0 during direct_reclaim, allow
>> + * direct_reclaim to call shrinker callback, to reclaim
>> + * memory timely.
>> + */
>> + if (priority)
>> + continue;
>> + }
>> ret = do_shrink_slab(&sc, shrinker, priority);
>> if (ret == SHRINK_EMPTY)
>> ret = 0;