2021-03-17 07:59:02

by Xunlei Pang

[permalink] [raw]
Subject: [PATCH v4 0/3] mm/slub: Fix count_partial() problem

count_partial() can hold n->list_lock spinlock for quite long, which
makes much trouble to the system. This series eliminate this problem.

v1->v2:
- Improved changelog and variable naming for PATCH 1~2.
- PATCH3 adds per-cpu counter to avoid performance regression
in concurrent __slab_free().

v2->v3:
- Changed "page->inuse" to the safe "new.inuse", etc.
- Used CONFIG_SLUB_DEBUG and CONFIG_SYSFS condition for new counters.
- atomic_long_t -> unsigned long

v3->v4:
- introduced new CONFIG_SLUB_DEBUG_PARTIAL to give a chance to be enabled for production use.
- Merged PATCH 4 into PATCH 1.

[Testing]
There seems might be a little performance impact under extreme
__slab_free() concurrent calls according to my tests.

On my 32-cpu 2-socket physical machine:
Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz

1) perf stat --null --repeat 10 -- hackbench 20 thread 20000

== original, no patched
Performance counter stats for 'hackbench 20 thread 20000' (10 runs):

24.536050899 seconds time elapsed ( +- 0.24% )


Performance counter stats for 'hackbench 20 thread 20000' (10 runs):

24.588049142 seconds time elapsed ( +- 0.35% )


== patched with patch1~4
Performance counter stats for 'hackbench 20 thread 20000' (10 runs):

24.670892273 seconds time elapsed ( +- 0.29% )


Performance counter stats for 'hackbench 20 thread 20000' (10 runs):

24.746755689 seconds time elapsed ( +- 0.21% )


2) perf stat --null --repeat 10 -- hackbench 32 thread 20000

== original, no patched
Performance counter stats for 'hackbench 32 thread 20000' (10 runs):

39.784911855 seconds time elapsed ( +- 0.14% )

Performance counter stats for 'hackbench 32 thread 20000' (10 runs):

39.868687608 seconds time elapsed ( +- 0.19% )

== patched with patch1~4
Performance counter stats for 'hackbench 32 thread 20000' (10 runs):

39.681273015 seconds time elapsed ( +- 0.21% )

Performance counter stats for 'hackbench 32 thread 20000' (10 runs):

39.681238459 seconds time elapsed ( +- 0.09% )

Xunlei Pang (3):
mm/slub: Introduce two counters for partial objects
percpu: Export per_cpu_sum()
mm/slub: Get rid of count_partial()

include/linux/percpu-defs.h | 10 ++++
init/Kconfig | 13 +++++
kernel/locking/percpu-rwsem.c | 10 ----
mm/slab.h | 6 ++
mm/slub.c | 129 +++++++++++++++++++++++++++++-------------
5 files changed, 120 insertions(+), 48 deletions(-)

--
1.8.3.1


2021-03-17 07:59:45

by Xunlei Pang

[permalink] [raw]
Subject: [PATCH v4 3/3] mm/slub: Get rid of count_partial()

Now the partial counters are ready, let's use them to get rid
of count_partial().

The partial counters will involve in to calculate the accurate
partial usage when CONFIG_SLUB_DEBUG_PARTIAL is on, otherwise
simply assume their zero usage statistics.

Tested-by: James Wang <[email protected]>
Signed-off-by: Xunlei Pang <[email protected]>
---
mm/slub.c | 64 +++++++++++++++++++++++++++++++--------------------------------
1 file changed, 31 insertions(+), 33 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 856aea4..9bff669 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2533,11 +2533,6 @@ static inline int node_match(struct page *page, int node)
}

#ifdef CONFIG_SLUB_DEBUG
-static int count_free(struct page *page)
-{
- return page->objects - page->inuse;
-}
-
static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
{
return atomic_long_read(&n->total_objects);
@@ -2545,18 +2540,33 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
#endif /* CONFIG_SLUB_DEBUG */

#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
-static unsigned long count_partial(struct kmem_cache_node *n,
- int (*get_count)(struct page *))
+enum partial_item { PARTIAL_FREE, PARTIAL_INUSE, PARTIAL_TOTAL, PARTIAL_SLAB };
+
+static unsigned long partial_counter(struct kmem_cache_node *n,
+ enum partial_item item)
{
- unsigned long flags;
- unsigned long x = 0;
- struct page *page;
+ unsigned long ret = 0;

- spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, slab_list)
- x += get_count(page);
- spin_unlock_irqrestore(&n->list_lock, flags);
- return x;
+#ifdef CONFIG_SLUB_DEBUG_PARTIAL
+ if (item == PARTIAL_FREE) {
+ ret = per_cpu_sum(*n->partial_free_objs);
+ if ((long)ret < 0)
+ ret = 0;
+ } else if (item == PARTIAL_TOTAL) {
+ ret = n->partial_total_objs;
+ } else if (item == PARTIAL_INUSE) {
+ ret = per_cpu_sum(*n->partial_free_objs);
+ if ((long)ret < 0)
+ ret = 0;
+ ret = n->partial_total_objs - ret;
+ if ((long)ret < 0)
+ ret = 0;
+ } else { /* item == PARTIAL_SLAB */
+ ret = n->nr_partial;
+ }
+#endif
+
+ return ret;
}
#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */

@@ -2587,7 +2597,7 @@ static unsigned long count_partial(struct kmem_cache_node *n,
unsigned long nr_objs;
unsigned long nr_free;

- nr_free = count_partial(n, count_free);
+ nr_free = partial_counter(n, PARTIAL_FREE);
nr_slabs = node_nr_slabs(n);
nr_objs = node_nr_objs(n);

@@ -4654,18 +4664,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
EXPORT_SYMBOL(__kmalloc_node_track_caller);
#endif

-#ifdef CONFIG_SYSFS
-static int count_inuse(struct page *page)
-{
- return page->inuse;
-}
-
-static int count_total(struct page *page)
-{
- return page->objects;
-}
-#endif
-
#ifdef CONFIG_SLUB_DEBUG
static void validate_slab(struct kmem_cache *s, struct page *page)
{
@@ -5102,7 +5100,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
x = atomic_long_read(&n->total_objects);
else if (flags & SO_OBJECTS)
x = atomic_long_read(&n->total_objects) -
- count_partial(n, count_free);
+ partial_counter(n, PARTIAL_FREE);
else
x = atomic_long_read(&n->nr_slabs);
total += x;
@@ -5116,11 +5114,11 @@ static ssize_t show_slab_objects(struct kmem_cache *s,

for_each_kmem_cache_node(s, node, n) {
if (flags & SO_TOTAL)
- x = count_partial(n, count_total);
+ x = partial_counter(n, PARTIAL_TOTAL);
else if (flags & SO_OBJECTS)
- x = count_partial(n, count_inuse);
+ x = partial_counter(n, PARTIAL_INUSE);
else
- x = n->nr_partial;
+ x = partial_counter(n, PARTIAL_SLAB);
total += x;
nodes[node] += x;
}
@@ -5884,7 +5882,7 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
for_each_kmem_cache_node(s, node, n) {
nr_slabs += node_nr_slabs(n);
nr_objs += node_nr_objs(n);
- nr_free += count_partial(n, count_free);
+ nr_free += partial_counter(n, PARTIAL_FREE);
}

sinfo->active_objs = nr_objs - nr_free;
--
1.8.3.1