2023-06-09 08:28:19

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 0/7] revert shrinker_srcu related changes

From: Qi Zheng <[email protected]>

Hi all,

Kernel test robot reports -88.8% regression in stress-ng.ramfs.ops_per_sec
test case [1], which is caused by commit f95bdb700bc6 ("mm: vmscan: make
global slab shrink lockless"). The root cause is that SRCU has to be careful
to not frequently check for SRCU read-side critical section exits. Therefore,
even if no one is currently in the SRCU read-side critical section,
synchronize_srcu() cannot return quickly. That's why unregister_shrinker()
has become slower.

After discussion, we will try to use the refcount+RCU method [2] proposed
by Dave Chinner to continue to re-implement the lockless slab shrink. So
revert the shrinker_srcu related changes first.

[1]. https://lore.kernel.org/lkml/[email protected]/
[2]. https://lore.kernel.org/lkml/[email protected]/

And hi Andrew, the commit c3b5cb881de6 ("mm: vmscan: move
shrinker_debugfs_remove() before synchronize_srcu()") is still in the
mm-unstable branch, you can drop it directly.

This series is based on v6.4-rc5.

Thanks,
Qi

Qi Zheng (7):
Revert "mm: shrinkers: convert shrinker_rwsem to mutex"
Revert "mm: vmscan: remove shrinker_rwsem from
synchronize_shrinkers()"
Revert "mm: vmscan: hold write lock to reparent shrinker nr_deferred"
Revert "mm: shrinkers: make count and scan in shrinker debugfs
lockless"
Revert "mm: vmscan: add shrinker_srcu_generation"
Revert "mm: vmscan: make memcg slab shrink lockless"
Revert "mm: vmscan: make global slab shrink lockless"

drivers/md/dm-cache-metadata.c | 2 +-
drivers/md/dm-thin-metadata.c | 2 +-
fs/super.c | 2 +-
mm/shrinker_debug.c | 39 ++++++----
mm/vmscan.c | 125 +++++++++++++++------------------
5 files changed, 82 insertions(+), 88 deletions(-)

--
2.30.2



2023-06-09 08:28:39

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 6/7] Revert "mm: vmscan: make memcg slab shrink lockless"

From: Qi Zheng <[email protected]>

This reverts commit caa05325c9126c77ebf114edce51536a0d0a9a08.

Kernel test robot reports -88.8% regression in stress-ng.ramfs.ops_per_sec
test case [1], which is caused by commit f95bdb700bc6 ("mm: vmscan: make
global slab shrink lockless"). The root cause is that SRCU has to be careful
to not frequently check for SRCU read-side critical section exits. Therefore,
even if no one is currently in the SRCU read-side critical section,
synchronize_srcu() cannot return quickly. That's why unregister_shrinker()
has become slower.

After discussion, we will try to use the refcount+RCU method [2] proposed
by Dave Chinner to continue to re-implement the lockless slab shrink. So
revert the shrinker_srcu related changes first.

[1]. https://lore.kernel.org/lkml/[email protected]/
[2]. https://lore.kernel.org/lkml/[email protected]/

Reported-by: kernel test robot <[email protected]>
Closes: https://lore.kernel.org/oe-lkp/[email protected]
Signed-off-by: Qi Zheng <[email protected]>
---
mm/vmscan.c | 45 +++++++++++++++++++--------------------------
1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 50775b73d0c7..a008d7f2d0fc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -210,21 +210,8 @@ static inline int shrinker_defer_size(int nr_items)
static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
int nid)
{
- return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info,
- &shrinker_srcu,
- lockdep_is_held(&shrinker_rwsem));
-}
-
-static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
- int nid)
-{
- return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info,
- &shrinker_srcu);
-}
-
-static void free_shrinker_info_rcu(struct rcu_head *head)
-{
- kvfree(container_of(head, struct shrinker_info, rcu));
+ return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
+ lockdep_is_held(&shrinker_rwsem));
}

static int expand_one_shrinker_info(struct mem_cgroup *memcg,
@@ -265,7 +252,7 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
defer_size - old_defer_size);

rcu_assign_pointer(pn->shrinker_info, new);
- call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_rcu);
+ kvfree_rcu(old, rcu);
}

return 0;
@@ -351,16 +338,15 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
{
if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
struct shrinker_info *info;
- int srcu_idx;

- srcu_idx = srcu_read_lock(&shrinker_srcu);
- info = shrinker_info_srcu(memcg, nid);
+ rcu_read_lock();
+ info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
/* Pairs with smp mb in shrink_slab() */
smp_mb__before_atomic();
set_bit(shrinker_id, info->map);
}
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ rcu_read_unlock();
}
}

@@ -374,6 +360,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
return -ENOSYS;

down_write(&shrinker_rwsem);
+ /* This may call shrinker, so it must use down_read_trylock() */
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
if (id < 0)
goto unlock;
@@ -407,7 +394,7 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
{
struct shrinker_info *info;

- info = shrinker_info_srcu(memcg, nid);
+ info = shrinker_info_protected(memcg, nid);
return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
}

@@ -416,7 +403,7 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
{
struct shrinker_info *info;

- info = shrinker_info_srcu(memcg, nid);
+ info = shrinker_info_protected(memcg, nid);
return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
}

@@ -947,14 +934,15 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
{
struct shrinker_info *info;
unsigned long ret, freed = 0;
- int srcu_idx;
int i;

if (!mem_cgroup_online(memcg))
return 0;

- srcu_idx = srcu_read_lock(&shrinker_srcu);
- info = shrinker_info_srcu(memcg, nid);
+ if (!down_read_trylock(&shrinker_rwsem))
+ return 0;
+
+ info = shrinker_info_protected(memcg, nid);
if (unlikely(!info))
goto unlock;

@@ -1004,9 +992,14 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
set_shrinker_bit(memcg, nid, i);
}
freed += ret;
+
+ if (rwsem_is_contended(&shrinker_rwsem)) {
+ freed = freed ? : 1;
+ break;
+ }
}
unlock:
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ up_read(&shrinker_rwsem);
return freed;
}
#else /* CONFIG_MEMCG */
--
2.30.2


2023-06-09 08:28:51

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 5/7] Revert "mm: vmscan: add shrinker_srcu_generation"

From: Qi Zheng <[email protected]>

This reverts commit 475733dda5aedba9e086379aafe6b5ffd53e8f5e.

Kernel test robot reports -88.8% regression in stress-ng.ramfs.ops_per_sec
test case [1], which is caused by commit f95bdb700bc6 ("mm: vmscan: make
global slab shrink lockless"). The root cause is that SRCU has to be careful
to not frequently check for SRCU read-side critical section exits. Therefore,
even if no one is currently in the SRCU read-side critical section,
synchronize_srcu() cannot return quickly. That's why unregister_shrinker()
has become slower.

We will try to use the refcount+RCU method [2] proposed by Dave Chinner
to continue to re-implement the lockless slab shrink. So revert the
shrinker_srcu related changes first.

[1]. https://lore.kernel.org/lkml/[email protected]/
[2]. https://lore.kernel.org/lkml/[email protected]/

Reported-by: kernel test robot <[email protected]>
Closes: https://lore.kernel.org/oe-lkp/[email protected]
Signed-off-by: Qi Zheng <[email protected]>
---
mm/vmscan.c | 24 ++++--------------------
1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index d1d309fc3212..50775b73d0c7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -192,7 +192,6 @@ int vm_swappiness = 60;
LIST_HEAD(shrinker_list);
DECLARE_RWSEM(shrinker_rwsem);
DEFINE_SRCU(shrinker_srcu);
-static atomic_t shrinker_srcu_generation = ATOMIC_INIT(0);

#ifdef CONFIG_MEMCG
static int shrinker_nr_max;
@@ -818,7 +817,6 @@ void unregister_shrinker(struct shrinker *shrinker)
debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
up_write(&shrinker_rwsem);

- atomic_inc(&shrinker_srcu_generation);
synchronize_srcu(&shrinker_srcu);

shrinker_debugfs_remove(debugfs_entry, debugfs_id);
@@ -840,7 +838,6 @@ void synchronize_shrinkers(void)
{
down_write(&shrinker_rwsem);
up_write(&shrinker_rwsem);
- atomic_inc(&shrinker_srcu_generation);
synchronize_srcu(&shrinker_srcu);
}
EXPORT_SYMBOL(synchronize_shrinkers);
@@ -950,20 +947,18 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
{
struct shrinker_info *info;
unsigned long ret, freed = 0;
- int srcu_idx, generation;
- int i = 0;
+ int srcu_idx;
+ int i;

if (!mem_cgroup_online(memcg))
return 0;

-again:
srcu_idx = srcu_read_lock(&shrinker_srcu);
info = shrinker_info_srcu(memcg, nid);
if (unlikely(!info))
goto unlock;

- generation = atomic_read(&shrinker_srcu_generation);
- for_each_set_bit_from(i, info->map, info->map_nr_max) {
+ for_each_set_bit(i, info->map, info->map_nr_max) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
@@ -1009,11 +1004,6 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
set_shrinker_bit(memcg, nid, i);
}
freed += ret;
- if (atomic_read(&shrinker_srcu_generation) != generation) {
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
- i++;
- goto again;
- }
}
unlock:
srcu_read_unlock(&shrinker_srcu, srcu_idx);
@@ -1053,7 +1043,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
{
unsigned long ret, freed = 0;
struct shrinker *shrinker;
- int srcu_idx, generation;
+ int srcu_idx;

/*
* The root memcg might be allocated even though memcg is disabled
@@ -1067,7 +1057,6 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,

srcu_idx = srcu_read_lock(&shrinker_srcu);

- generation = atomic_read(&shrinker_srcu_generation);
list_for_each_entry_srcu(shrinker, &shrinker_list, list,
srcu_read_lock_held(&shrinker_srcu)) {
struct shrink_control sc = {
@@ -1080,11 +1069,6 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
if (ret == SHRINK_EMPTY)
ret = 0;
freed += ret;
-
- if (atomic_read(&shrinker_srcu_generation) != generation) {
- freed = freed ? : 1;
- break;
- }
}

srcu_read_unlock(&shrinker_srcu, srcu_idx);
--
2.30.2


2023-06-09 08:30:49

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 1/7] Revert "mm: shrinkers: convert shrinker_rwsem to mutex"

From: Qi Zheng <[email protected]>

This reverts commit cf2e309ebca7bb0916771839f9b580b06c778530.

Kernel test robot reports -88.8% regression in stress-ng.ramfs.ops_per_sec
test case [1], which is caused by commit f95bdb700bc6 ("mm: vmscan: make
global slab shrink lockless"). The root cause is that SRCU has to be careful
to not frequently check for SRCU read-side critical section exits. Therefore,
even if no one is currently in the SRCU read-side critical section,
synchronize_srcu() cannot return quickly. That's why unregister_shrinker()
has become slower.

After discussion, we will try to use the refcount+RCU method [2] proposed
by Dave Chinner to continue to re-implement the lockless slab shrink. So
revert the shrinker_mutex back to shrinker_rwsem first.

[1]. https://lore.kernel.org/lkml/[email protected]/
[2]. https://lore.kernel.org/lkml/[email protected]/

Reported-by: kernel test robot <[email protected]>
Closes: https://lore.kernel.org/oe-lkp/[email protected]
Signed-off-by: Qi Zheng <[email protected]>
---
drivers/md/dm-cache-metadata.c | 2 +-
drivers/md/dm-thin-metadata.c | 2 +-
fs/super.c | 2 +-
mm/shrinker_debug.c | 14 +++++++-------
mm/vmscan.c | 34 +++++++++++++++++-----------------
5 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 9e0c69958587..acffed750e3e 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -1828,7 +1828,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs cmd root_lock).
- * - must take shrinker_mutex without holding cmd->root_lock
+ * - must take shrinker_rwsem without holding cmd->root_lock
*/
new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
CACHE_MAX_CONCURRENT_LOCKS);
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 9f5cb52c5763..fd464fb024c3 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1887,7 +1887,7 @@ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs pmd root_lock).
- * - must take shrinker_mutex without holding pmd->root_lock
+ * - must take shrinker_rwsem without holding pmd->root_lock
*/
new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
THIN_MAX_CONCURRENT_LOCKS);
diff --git a/fs/super.c b/fs/super.c
index 34afe411cf2b..04bc62ab7dfe 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
* One thing we have to be careful of with a per-sb shrinker is that we don't
* drop the last active reference to the superblock from within the shrinker.
* If that happens we could trigger unregistering the shrinker from within the
- * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we
+ * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
* take a passive reference to the superblock to avoid this from occurring.
*/
static unsigned long super_cache_scan(struct shrinker *shrink,
diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c
index fe10436d9911..2be15b8a6d0b 100644
--- a/mm/shrinker_debug.c
+++ b/mm/shrinker_debug.c
@@ -8,7 +8,7 @@
#include <linux/srcu.h>

/* defined in vmscan.c */
-extern struct mutex shrinker_mutex;
+extern struct rw_semaphore shrinker_rwsem;
extern struct list_head shrinker_list;
extern struct srcu_struct shrinker_srcu;

@@ -168,7 +168,7 @@ int shrinker_debugfs_add(struct shrinker *shrinker)
char buf[128];
int id;

- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);

/* debugfs isn't initialized yet, add debugfs entries later. */
if (!shrinker_debugfs_root)
@@ -211,7 +211,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
if (!new)
return -ENOMEM;

- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);

old = shrinker->name;
shrinker->name = new;
@@ -229,7 +229,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
shrinker->debugfs_entry = entry;
}

- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);

kfree_const(old);

@@ -242,7 +242,7 @@ struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
{
struct dentry *entry = shrinker->debugfs_entry;

- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);

kfree_const(shrinker->name);
shrinker->name = NULL;
@@ -271,14 +271,14 @@ static int __init shrinker_debugfs_init(void)
shrinker_debugfs_root = dentry;

/* Create debugfs entries for shrinkers registered at boot */
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
list_for_each_entry(shrinker, &shrinker_list, list)
if (!shrinker->debugfs_entry) {
ret = shrinker_debugfs_add(shrinker);
if (ret)
break;
}
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);

return ret;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6d0cd2840cf0..4730dba253c8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -35,7 +35,7 @@
#include <linux/cpuset.h>
#include <linux/compaction.h>
#include <linux/notifier.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -190,7 +190,7 @@ struct scan_control {
int vm_swappiness = 60;

LIST_HEAD(shrinker_list);
-DEFINE_MUTEX(shrinker_mutex);
+DECLARE_RWSEM(shrinker_rwsem);
DEFINE_SRCU(shrinker_srcu);
static atomic_t shrinker_srcu_generation = ATOMIC_INIT(0);

@@ -213,7 +213,7 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
{
return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info,
&shrinker_srcu,
- lockdep_is_held(&shrinker_mutex));
+ lockdep_is_held(&shrinker_rwsem));
}

static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
@@ -292,7 +292,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
int nid, size, ret = 0;
int map_size, defer_size = 0;

- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
map_size = shrinker_map_size(shrinker_nr_max);
defer_size = shrinker_defer_size(shrinker_nr_max);
size = map_size + defer_size;
@@ -308,7 +308,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
info->map_nr_max = shrinker_nr_max;
rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
}
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);

return ret;
}
@@ -324,7 +324,7 @@ static int expand_shrinker_info(int new_id)
if (!root_mem_cgroup)
goto out;

- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);

map_size = shrinker_map_size(new_nr_max);
defer_size = shrinker_defer_size(new_nr_max);
@@ -374,7 +374,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
if (mem_cgroup_disabled())
return -ENOSYS;

- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
if (id < 0)
goto unlock;
@@ -388,7 +388,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
shrinker->id = id;
ret = 0;
unlock:
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return ret;
}

@@ -398,7 +398,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)

BUG_ON(id < 0);

- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);

idr_remove(&shrinker_idr, id);
}
@@ -433,7 +433,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
parent = root_mem_cgroup;

/* Prevent from concurrent shrinker_info expand */
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
for_each_node(nid) {
child_info = shrinker_info_protected(memcg, nid);
parent_info = shrinker_info_protected(parent, nid);
@@ -442,7 +442,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
atomic_long_add(nr, &parent_info->nr_deferred[i]);
}
}
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
}

static bool cgroup_reclaim(struct scan_control *sc)
@@ -743,9 +743,9 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
shrinker->name = NULL;
#endif
if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
unregister_memcg_shrinker(shrinker);
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return;
}

@@ -755,11 +755,11 @@ void free_prealloced_shrinker(struct shrinker *shrinker)

void register_shrinker_prepared(struct shrinker *shrinker)
{
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
list_add_tail_rcu(&shrinker->list, &shrinker_list);
shrinker->flags |= SHRINKER_REGISTERED;
shrinker_debugfs_add(shrinker);
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
}

static int __register_shrinker(struct shrinker *shrinker)
@@ -810,13 +810,13 @@ void unregister_shrinker(struct shrinker *shrinker)
if (!(shrinker->flags & SHRINKER_REGISTERED))
return;

- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
list_del_rcu(&shrinker->list);
shrinker->flags &= ~SHRINKER_REGISTERED;
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
unregister_memcg_shrinker(shrinker);
debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);

atomic_inc(&shrinker_srcu_generation);
synchronize_srcu(&shrinker_srcu);
--
2.30.2


2023-06-09 08:32:55

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 4/7] Revert "mm: shrinkers: make count and scan in shrinker debugfs lockless"

From: Qi Zheng <[email protected]>

This reverts commit 20cd1892fcc3efc10a7ac327cc3790494bec46b5.

Kernel test robot reports -88.8% regression in stress-ng.ramfs.ops_per_sec
test case [1], which is caused by commit f95bdb700bc6 ("mm: vmscan: make
global slab shrink lockless"). The root cause is that SRCU has to be careful
to not frequently check for SRCU read-side critical section exits. Therefore,
even if no one is currently in the SRCU read-side critical section,
synchronize_srcu() cannot return quickly. That's why unregister_shrinker()
has become slower.

We will try to use the refcount+RCU method [2] proposed by Dave Chinner
to continue to re-implement the lockless slab shrink. So revert the
shrinker_srcu related changes first.

[1]. https://lore.kernel.org/lkml/[email protected]/
[2]. https://lore.kernel.org/lkml/[email protected]/

Reported-by: kernel test robot <[email protected]>
Closes: https://lore.kernel.org/oe-lkp/[email protected]
Signed-off-by: Qi Zheng <[email protected]>
---
mm/shrinker_debug.c | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c
index 2be15b8a6d0b..3ab53fad8876 100644
--- a/mm/shrinker_debug.c
+++ b/mm/shrinker_debug.c
@@ -5,12 +5,10 @@
#include <linux/seq_file.h>
#include <linux/shrinker.h>
#include <linux/memcontrol.h>
-#include <linux/srcu.h>

/* defined in vmscan.c */
extern struct rw_semaphore shrinker_rwsem;
extern struct list_head shrinker_list;
-extern struct srcu_struct shrinker_srcu;

static DEFINE_IDA(shrinker_debugfs_ida);
static struct dentry *shrinker_debugfs_root;
@@ -51,13 +49,18 @@ static int shrinker_debugfs_count_show(struct seq_file *m, void *v)
struct mem_cgroup *memcg;
unsigned long total;
bool memcg_aware;
- int ret = 0, nid, srcu_idx;
+ int ret, nid;

count_per_node = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
if (!count_per_node)
return -ENOMEM;

- srcu_idx = srcu_read_lock(&shrinker_srcu);
+ ret = down_read_killable(&shrinker_rwsem);
+ if (ret) {
+ kfree(count_per_node);
+ return ret;
+ }
+ rcu_read_lock();

memcg_aware = shrinker->flags & SHRINKER_MEMCG_AWARE;

@@ -88,7 +91,8 @@ static int shrinker_debugfs_count_show(struct seq_file *m, void *v)
}
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);

- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ rcu_read_unlock();
+ up_read(&shrinker_rwsem);

kfree(count_per_node);
return ret;
@@ -111,8 +115,9 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
.gfp_mask = GFP_KERNEL,
};
struct mem_cgroup *memcg = NULL;
- int nid, srcu_idx;
+ int nid;
char kbuf[72];
+ ssize_t ret;

read_len = size < (sizeof(kbuf) - 1) ? size : (sizeof(kbuf) - 1);
if (copy_from_user(kbuf, buf, read_len))
@@ -141,7 +146,11 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
return -EINVAL;
}

- srcu_idx = srcu_read_lock(&shrinker_srcu);
+ ret = down_read_killable(&shrinker_rwsem);
+ if (ret) {
+ mem_cgroup_put(memcg);
+ return ret;
+ }

sc.nid = nid;
sc.memcg = memcg;
@@ -150,7 +159,7 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,

shrinker->scan_objects(shrinker, &sc);

- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ up_read(&shrinker_rwsem);
mem_cgroup_put(memcg);

return size;
--
2.30.2