2023-10-24 09:35:05

by Chengming Zhou

[permalink] [raw]
Subject: [RFC PATCH v3 6/7] slub: Delay freezing of partial slabs

From: Chengming Zhou <[email protected]>

Now we will freeze slabs when moving them out of node partial list to
cpu partial list, this method needs two cmpxchg_double operations:

1. freeze slab (acquire_slab()) under the node list_lock
2. get_freelist() when pick used in ___slab_alloc()

Actually we don't need to freeze when moving slabs out of node partial
list, we can delay freezing to when use slab freelist in ___slab_alloc(),
so we can save one cmpxchg_double().

And there are other good points:
- The moving of slabs between node partial list and cpu partial list
becomes simpler, since we don't need to freeze or unfreeze at all.

- The node list_lock contention would be less, since we don't need to
freeze any slab under the node list_lock.

We can achieve this because there is no concurrent path would manipulate
the partial slab list except the __slab_free() path, which is serialized
now.

Since the slab returned by get_partial() interfaces is not frozen anymore
and no freelist in the partial_context, so we need to use the introduced
freeze_slab() to freeze it and get its freelist.

Similarly, the slabs on the CPU partial list are not frozen anymore,
we need to freeze_slab() on it before use.

Signed-off-by: Chengming Zhou <[email protected]>
---
mm/slub.c | 111 +++++++++++-------------------------------------------
1 file changed, 21 insertions(+), 90 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 5b428648021f..486d44421432 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2215,51 +2215,6 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s,
return object;
}

-/*
- * Remove slab from the partial list, freeze it and
- * return the pointer to the freelist.
- *
- * Returns a list of objects or NULL if it fails.
- */
-static inline void *acquire_slab(struct kmem_cache *s,
- struct kmem_cache_node *n, struct slab *slab,
- int mode)
-{
- void *freelist;
- unsigned long counters;
- struct slab new;
-
- lockdep_assert_held(&n->list_lock);
-
- /*
- * Zap the freelist and set the frozen bit.
- * The old freelist is the list of objects for the
- * per cpu allocation list.
- */
- freelist = slab->freelist;
- counters = slab->counters;
- new.counters = counters;
- if (mode) {
- new.inuse = slab->objects;
- new.freelist = NULL;
- } else {
- new.freelist = freelist;
- }
-
- VM_BUG_ON(new.frozen);
- new.frozen = 1;
-
- if (!__slab_update_freelist(s, slab,
- freelist, counters,
- new.freelist, new.counters,
- "acquire_slab"))
- return NULL;
-
- remove_partial(n, slab);
- WARN_ON(!freelist);
- return freelist;
-}
-
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain);
#else
@@ -2276,7 +2231,6 @@ static struct slab *get_partial_node(struct kmem_cache *s,
struct partial_context *pc)
{
struct slab *slab, *slab2, *partial = NULL;
- void *object = NULL;
unsigned long flags;
unsigned int partial_slabs = 0;

@@ -2295,7 +2249,7 @@ static struct slab *get_partial_node(struct kmem_cache *s,
continue;

if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
- object = alloc_single_from_partial(s, n, slab,
+ void *object = alloc_single_from_partial(s, n, slab,
pc->orig_size);
if (object) {
partial = slab;
@@ -2305,13 +2259,10 @@ static struct slab *get_partial_node(struct kmem_cache *s,
continue;
}

- object = acquire_slab(s, n, slab, object == NULL);
- if (!object)
- break;
+ remove_partial(n, slab);

if (!partial) {
partial = slab;
- pc->object = object;
stat(s, ALLOC_FROM_PARTIAL);
} else {
put_cpu_partial(s, slab, 0);
@@ -2610,9 +2561,6 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
unsigned long flags = 0;

while (partial_slab) {
- struct slab new;
- struct slab old;
-
slab = partial_slab;
partial_slab = slab->next;

@@ -2625,23 +2573,7 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
spin_lock_irqsave(&n->list_lock, flags);
}

- do {
-
- old.freelist = slab->freelist;
- old.counters = slab->counters;
- VM_BUG_ON(!old.frozen);
-
- new.counters = old.counters;
- new.freelist = old.freelist;
-
- new.frozen = 0;
-
- } while (!__slab_update_freelist(s, slab,
- old.freelist, old.counters,
- new.freelist, new.counters,
- "unfreezing slab"));
-
- if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
+ if (unlikely(!slab->inuse && n->nr_partial >= s->min_partial)) {
slab->next = slab_to_discard;
slab_to_discard = slab;
} else {
@@ -3148,7 +3080,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
node = NUMA_NO_NODE;
goto new_slab;
}
-redo:

if (unlikely(!node_match(slab, node))) {
/*
@@ -3224,7 +3155,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,

new_slab:

- if (slub_percpu_partial(c)) {
+ while (slub_percpu_partial(c)) {
local_lock_irqsave(&s->cpu_slab->lock, flags);
if (unlikely(c->slab)) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
@@ -3236,11 +3167,20 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
goto new_objects;
}

- slab = c->slab = slub_percpu_partial(c);
+ slab = slub_percpu_partial(c);
slub_set_percpu_partial(c, slab);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, CPU_PARTIAL_ALLOC);
- goto redo;
+
+ if (unlikely(!node_match(slab, node) ||
+ !pfmemalloc_match(slab, gfpflags))) {
+ slab->next = NULL;
+ __unfreeze_partials(s, slab);
+ continue;
+ }
+
+ freelist = freeze_slab(s, slab);
+ goto retry_load_slab;
}

new_objects:
@@ -3249,8 +3189,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
pc.orig_size = orig_size;
slab = get_partial(s, node, &pc);
if (slab) {
- freelist = pc.object;
if (kmem_cache_debug(s)) {
+ freelist = pc.object;
/*
* For debug caches here we had to go through
* alloc_single_from_partial() so just store the
@@ -3262,6 +3202,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
return freelist;
}

+ freelist = freeze_slab(s, slab);
goto retry_load_slab;
}

@@ -3663,18 +3604,8 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
was_frozen = new.frozen;
new.inuse -= cnt;
if ((!new.inuse || !prior) && !was_frozen) {
-
- if (kmem_cache_has_cpu_partial(s) && !prior) {
-
- /*
- * Slab was on no list before and will be
- * partially empty
- * We can defer the list move and instead
- * freeze it.
- */
- new.frozen = 1;
-
- } else { /* Needs to be taken off a list */
+ /* Needs to be taken off a list */
+ if (!kmem_cache_has_cpu_partial(s) || prior) {

n = get_node(s, slab_nid(slab));
/*
@@ -3704,9 +3635,9 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
* activity can be necessary.
*/
stat(s, FREE_FROZEN);
- } else if (new.frozen) {
+ } else if (kmem_cache_has_cpu_partial(s) && !prior) {
/*
- * If we just froze the slab then put it onto the
+ * If we started with a full slab then put it onto the
* per cpu partial list.
*/
put_cpu_partial(s, slab, 1);
--
2.40.1


2023-10-25 02:19:07

by Chengming Zhou

[permalink] [raw]
Subject: Re: [RFC PATCH v3 6/7] slub: Delay freezing of partial slabs

On 2023/10/24 17:33, [email protected] wrote:
> From: Chengming Zhou <[email protected]>
>
> Now we will freeze slabs when moving them out of node partial list to
> cpu partial list, this method needs two cmpxchg_double operations:
>
> 1. freeze slab (acquire_slab()) under the node list_lock
> 2. get_freelist() when pick used in ___slab_alloc()
>
> Actually we don't need to freeze when moving slabs out of node partial
> list, we can delay freezing to when use slab freelist in ___slab_alloc(),
> so we can save one cmpxchg_double().
>
> And there are other good points:
> - The moving of slabs between node partial list and cpu partial list
> becomes simpler, since we don't need to freeze or unfreeze at all.
>
> - The node list_lock contention would be less, since we don't need to
> freeze any slab under the node list_lock.
>
> We can achieve this because there is no concurrent path would manipulate
> the partial slab list except the __slab_free() path, which is serialized
> now.
>
> Since the slab returned by get_partial() interfaces is not frozen anymore
> and no freelist in the partial_context, so we need to use the introduced
> freeze_slab() to freeze it and get its freelist.
>
> Similarly, the slabs on the CPU partial list are not frozen anymore,
> we need to freeze_slab() on it before use.
>
> Signed-off-by: Chengming Zhou <[email protected]>
> ---
> mm/slub.c | 111 +++++++++++-------------------------------------------
> 1 file changed, 21 insertions(+), 90 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 5b428648021f..486d44421432 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -2215,51 +2215,6 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s,
> return object;
> }
>
> -/*
> - * Remove slab from the partial list, freeze it and
> - * return the pointer to the freelist.
> - *
> - * Returns a list of objects or NULL if it fails.
> - */
> -static inline void *acquire_slab(struct kmem_cache *s,
> - struct kmem_cache_node *n, struct slab *slab,
> - int mode)
> -{
> - void *freelist;
> - unsigned long counters;
> - struct slab new;
> -
> - lockdep_assert_held(&n->list_lock);
> -
> - /*
> - * Zap the freelist and set the frozen bit.
> - * The old freelist is the list of objects for the
> - * per cpu allocation list.
> - */
> - freelist = slab->freelist;
> - counters = slab->counters;
> - new.counters = counters;
> - if (mode) {
> - new.inuse = slab->objects;
> - new.freelist = NULL;
> - } else {
> - new.freelist = freelist;
> - }
> -
> - VM_BUG_ON(new.frozen);
> - new.frozen = 1;
> -
> - if (!__slab_update_freelist(s, slab,
> - freelist, counters,
> - new.freelist, new.counters,
> - "acquire_slab"))
> - return NULL;
> -
> - remove_partial(n, slab);
> - WARN_ON(!freelist);
> - return freelist;
> -}
> -
> #ifdef CONFIG_SLUB_CPU_PARTIAL
> static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain);
> #else
> @@ -2276,7 +2231,6 @@ static struct slab *get_partial_node(struct kmem_cache *s,
> struct partial_context *pc)
> {
> struct slab *slab, *slab2, *partial = NULL;
> - void *object = NULL;
> unsigned long flags;
> unsigned int partial_slabs = 0;
>
> @@ -2295,7 +2249,7 @@ static struct slab *get_partial_node(struct kmem_cache *s,
> continue;
>
> if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
> - object = alloc_single_from_partial(s, n, slab,
> + void *object = alloc_single_from_partial(s, n, slab,
> pc->orig_size);
> if (object) {
> partial = slab;
> @@ -2305,13 +2259,10 @@ static struct slab *get_partial_node(struct kmem_cache *s,
> continue;
> }
>
> - object = acquire_slab(s, n, slab, object == NULL);
> - if (!object)
> - break;
> + remove_partial(n, slab);
>
> if (!partial) {
> partial = slab;
> - pc->object = object;
> stat(s, ALLOC_FROM_PARTIAL);
> } else {
> put_cpu_partial(s, slab, 0);
> @@ -2610,9 +2561,6 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
> unsigned long flags = 0;
>
> while (partial_slab) {
> - struct slab new;
> - struct slab old;
> -
> slab = partial_slab;
> partial_slab = slab->next;
>
> @@ -2625,23 +2573,7 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
> spin_lock_irqsave(&n->list_lock, flags);
> }
>
> - do {
> -
> - old.freelist = slab->freelist;
> - old.counters = slab->counters;
> - VM_BUG_ON(!old.frozen);
> -
> - new.counters = old.counters;
> - new.freelist = old.freelist;
> -
> - new.frozen = 0;
> -
> - } while (!__slab_update_freelist(s, slab,
> - old.freelist, old.counters,
> - new.freelist, new.counters,
> - "unfreezing slab"));
> -
> - if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
> + if (unlikely(!slab->inuse && n->nr_partial >= s->min_partial)) {
> slab->next = slab_to_discard;
> slab_to_discard = slab;
> } else {
> @@ -3148,7 +3080,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
> node = NUMA_NO_NODE;
> goto new_slab;
> }
> -redo:
>
> if (unlikely(!node_match(slab, node))) {
> /*
> @@ -3224,7 +3155,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
>
> new_slab:
>
> - if (slub_percpu_partial(c)) {
> + while (slub_percpu_partial(c)) {
> local_lock_irqsave(&s->cpu_slab->lock, flags);
> if (unlikely(c->slab)) {
> local_unlock_irqrestore(&s->cpu_slab->lock, flags);
> @@ -3236,11 +3167,20 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
> goto new_objects;
> }
>
> - slab = c->slab = slub_percpu_partial(c);
> + slab = slub_percpu_partial(c);
> slub_set_percpu_partial(c, slab);
> local_unlock_irqrestore(&s->cpu_slab->lock, flags);
> stat(s, CPU_PARTIAL_ALLOC);
> - goto redo;
> +
> + if (unlikely(!node_match(slab, node) ||
> + !pfmemalloc_match(slab, gfpflags))) {
> + slab->next = NULL;
> + __unfreeze_partials(s, slab);
> + continue;
> + }
> +
> + freelist = freeze_slab(s, slab);
> + goto retry_load_slab;
> }

Oops, this while(slub_percpu_partial(c)) loop block should be put in #ifdef CONFIG_SLUB_CPU_PARTIAL,
since the slab->next and __unfreeze_partials() only defined when CONFIG_SLUB_CPU_PARTIAL.

And I should append a cleanup patch to rename all *unfreeze_partials* functions to *put_partials*
since there is no "unfreeze" in these functions anymore.

Will do in the next version.

Thanks.

>
> new_objects:
> @@ -3249,8 +3189,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
> pc.orig_size = orig_size;
> slab = get_partial(s, node, &pc);
> if (slab) {
> - freelist = pc.object;
> if (kmem_cache_debug(s)) {
> + freelist = pc.object;
> /*
> * For debug caches here we had to go through
> * alloc_single_from_partial() so just store the
> @@ -3262,6 +3202,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
> return freelist;
> }
>
> + freelist = freeze_slab(s, slab);
> goto retry_load_slab;
> }
>
> @@ -3663,18 +3604,8 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
> was_frozen = new.frozen;
> new.inuse -= cnt;
> if ((!new.inuse || !prior) && !was_frozen) {
> -
> - if (kmem_cache_has_cpu_partial(s) && !prior) {
> -
> - /*
> - * Slab was on no list before and will be
> - * partially empty
> - * We can defer the list move and instead
> - * freeze it.
> - */
> - new.frozen = 1;
> -
> - } else { /* Needs to be taken off a list */
> + /* Needs to be taken off a list */
> + if (!kmem_cache_has_cpu_partial(s) || prior) {
>
> n = get_node(s, slab_nid(slab));
> /*
> @@ -3704,9 +3635,9 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
> * activity can be necessary.
> */
> stat(s, FREE_FROZEN);
> - } else if (new.frozen) {
> + } else if (kmem_cache_has_cpu_partial(s) && !prior) {
> /*
> - * If we just froze the slab then put it onto the
> + * If we started with a full slab then put it onto the
> * per cpu partial list.
> */
> put_cpu_partial(s, slab, 1);

2023-10-26 05:50:00

by kernel test robot

[permalink] [raw]
Subject: Re: [RFC PATCH v3 6/7] slub: Delay freezing of partial slabs



Hello,

kernel test robot noticed "WARNING:at_mm/slub.c:#___slab_alloc" on:

commit: b34342e2732b0dc92b29d6807c5314e2e5e0c27c ("[RFC PATCH v3 6/7] slub: Delay freezing of partial slabs")
url: https://github.com/intel-lab-lkp/linux/commits/chengming-zhou-linux-dev/slub-Keep-track-of-whether-slub-is-on-the-per-node-partial-list/20231024-173519
base: git://git.kernel.org/cgit/linux/kernel/git/vbabka/slab.git for-next
patch link: https://lore.kernel.org/all/[email protected]/
patch subject: [RFC PATCH v3 6/7] slub: Delay freezing of partial slabs

in testcase: rcutorture
version:
with following parameters:

runtime: 300s
test: default
torture_type: rcu



compiler: gcc-12
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)


+------------------------------------------------+------------+------------+
| | e87d1d9973 | b34342e273 |
+------------------------------------------------+------------+------------+
| boot_successes | 15 | 0 |
| boot_failures | 0 | 15 |
| WARNING:at_mm/slub.c:#___slab_alloc | 0 | 15 |
| RIP:___slab_alloc | 0 | 15 |
+------------------------------------------------+------------+------------+


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-lkp/[email protected]


[ 4.907500][ T1] ------------[ cut here ]------------
[ 4.908232][ T1] WARNING: CPU: 0 PID: 1 at mm/slub.c:577 ___slab_alloc (mm/slub.c:577 mm/slub.c:3033 mm/slub.c:3205)
[ 4.909242][ T1] Modules linked in:
[ 4.909739][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc5-00013-gb34342e2732b #1
[ 4.910746][ T1] RIP: 0010:___slab_alloc (mm/slub.c:577 mm/slub.c:3033 mm/slub.c:3205)
[ 4.911433][ T1] Code: 9f 05 66 25 ff 7f 66 89 45 b8 48 8b 4d b8 45 85 e4 74 1a 65 8b 05 dd c4 a3 7e 85 c0 75 0f 65 8b 05 de c1 87 7e 85 c0 74 04 90 <0f> 0b 90 81 e6 00 00 00 40 74 31 4c 89 f8 f0 49 0f c7 48 20 0f 84
All code
========
0: 9f lahf
1: 05 66 25 ff 7f add $0x7fff2566,%eax
6: 66 89 45 b8 mov %ax,-0x48(%rbp)
a: 48 8b 4d b8 mov -0x48(%rbp),%rcx
e: 45 85 e4 test %r12d,%r12d
11: 74 1a je 0x2d
13: 65 8b 05 dd c4 a3 7e mov %gs:0x7ea3c4dd(%rip),%eax # 0x7ea3c4f7
1a: 85 c0 test %eax,%eax
1c: 75 0f jne 0x2d
1e: 65 8b 05 de c1 87 7e mov %gs:0x7e87c1de(%rip),%eax # 0x7e87c203
25: 85 c0 test %eax,%eax
27: 74 04 je 0x2d
29: 90 nop
2a:* 0f 0b ud2 <-- trapping instruction
2c: 90 nop
2d: 81 e6 00 00 00 40 and $0x40000000,%esi
33: 74 31 je 0x66
35: 4c 89 f8 mov %r15,%rax
38: f0 49 0f c7 48 20 lock cmpxchg16b 0x20(%r8)
3e: 0f .byte 0xf
3f: 84 .byte 0x84

Code starting with the faulting instruction
===========================================
0: 0f 0b ud2
2: 90 nop
3: 81 e6 00 00 00 40 and $0x40000000,%esi
9: 74 31 je 0x3c
b: 4c 89 f8 mov %r15,%rax
e: f0 49 0f c7 48 20 lock cmpxchg16b 0x20(%r8)
14: 0f .byte 0xf
15: 84 .byte 0x84
[ 4.913822][ T1] RSP: 0000:ffffc9000001f830 EFLAGS: 00010202
[ 4.914602][ T1] RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000080270027
[ 4.914743][ T1] RDX: 0000000000270021 RSI: 0000000048000000 RDI: ffffffff842e066b
[ 4.915745][ T1] RBP: ffffc9000001f8e0 R08: ffffea0005931d40 R09: fffffbfff0e33a5a
[ 4.916743][ T1] R10: ffffffff8719d2d7 R11: 0000000000000000 R12: 0000000000000001
[ 4.917696][ T1] R13: ffff8883ae809430 R14: ffff88810033e3c0 R15: ffff888164c75dd0
[ 4.918688][ T1] FS: 0000000000000000(0000) GS:ffff8883ae600000(0000) knlGS:0000000000000000
[ 4.918754][ T1] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 4.919522][ T1] CR2: ffff88843ffff000 CR3: 0000000005c89000 CR4: 00000000000406f0
[ 4.920500][ T1] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 4.921496][ T1] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 4.922461][ T1] Call Trace:
[ 4.922742][ T1] <TASK>
[ 4.923107][ T1] ? __warn (kernel/panic.c:673)
[ 4.923632][ T1] ? ___slab_alloc (mm/slub.c:577 mm/slub.c:3033 mm/slub.c:3205)
[ 4.924250][ T1] ? report_bug (lib/bug.c:180 lib/bug.c:219)
[ 4.924831][ T1] ? handle_bug (arch/x86/kernel/traps.c:237)
[ 4.925376][ T1] ? exc_invalid_op (arch/x86/kernel/traps.c:258 (discriminator 1))
[ 4.925962][ T1] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:568)
[ 4.926595][ T1] ? _raw_spin_unlock_irqrestore (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 arch/x86/include/asm/irqflags.h:135 include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
[ 4.926741][ T1] ? ___slab_alloc (mm/slub.c:577 mm/slub.c:3033 mm/slub.c:3205)
[ 4.927370][ T1] ? acpi_ut_allocate_object_desc_dbg (include/linux/slab.h:710 include/acpi/platform/aclinuxex.h:67 drivers/acpi/acpica/utobject.c:359)
[ 4.928186][ T1] ? kmem_cache_alloc (mm/slub.c:3295 mm/slub.c:3348 mm/slub.c:3440 mm/slub.c:3458 mm/slub.c:3465 mm/slub.c:3474)
[ 4.928824][ T1] kmem_cache_alloc (mm/slub.c:3295 mm/slub.c:3348 mm/slub.c:3440 mm/slub.c:3458 mm/slub.c:3465 mm/slub.c:3474)
[ 4.929442][ T1] ? acpi_ut_allocate_object_desc_dbg (include/linux/slab.h:710 include/acpi/platform/aclinuxex.h:67 drivers/acpi/acpica/utobject.c:359)
[ 4.930221][ T1] acpi_ut_allocate_object_desc_dbg (include/linux/slab.h:710 include/acpi/platform/aclinuxex.h:67 drivers/acpi/acpica/utobject.c:359)
[ 4.930743][ T1] acpi_ut_create_internal_object_dbg (drivers/acpi/acpica/utobject.c:71)
[ 4.931576][ T1] acpi_ut_copy_esimple_to_isimple (drivers/acpi/acpica/utcopy.c:434)
[ 4.932394][ T1] acpi_ut_copy_eobject_to_iobject (drivers/acpi/acpica/utcopy.c:618)
[ 4.933185][ T1] ? __kasan_kmalloc (mm/kasan/common.c:374 mm/kasan/common.c:383)
[ 4.933816][ T1] acpi_evaluate_object (drivers/acpi/acpica/nsxfeval.c:259)
[ 4.934486][ T1] ? acpi_get_data_full (drivers/acpi/acpica/nsxfeval.c:167)
[ 4.934756][ T1] ? hlock_class (arch/x86/include/asm/bitops.h:228 arch/x86/include/asm/bitops.h:240 include/asm-generic/bitops/instrumented-non-atomic.h:142 kernel/locking/lockdep.c:228)
[ 4.935345][ T1] ? __uuid_parse+0xd0/0x1b0
[ 4.936024][ T1] acpi_run_osc (drivers/acpi/bus.c:217)
[ 4.936609][ T1] ? acpi_bus_detach_private_data (drivers/acpi/bus.c:187)
[ 4.937363][ T1] ? _raw_spin_unlock_irqrestore (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 arch/x86/include/asm/irqflags.h:135 include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
[ 4.938127][ T1] ? acpi_get_data (drivers/acpi/acpica/nsxfname.c:48)
[ 4.938710][ T1] acpi_bus_init (drivers/acpi/bus.c:352 drivers/acpi/bus.c:1329)
[ 4.938742][ T1] ? up (include/linux/list.h:373 kernel/locking/semaphore.c:188)
[ 4.939205][ T1] ? acpi_sleep_proc_init (drivers/acpi/bus.c:1289)
[ 4.939862][ T1] ? _raw_spin_unlock_irqrestore (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 arch/x86/include/asm/irqflags.h:135 include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
[ 4.940581][ T1] ? acpi_os_signal_semaphore (drivers/acpi/osl.c:1294 (discriminator 5))
[ 4.941313][ T1] ? acpi_ut_release_mutex (drivers/acpi/acpica/utmutex.c:329)
[ 4.942007][ T1] ? acpi_install_address_space_handler_internal (drivers/acpi/acpica/evxfregn.c:94)
[ 4.942744][ T1] ? acpi_bus_init (drivers/acpi/bus.c:1390)
[ 4.943325][ T1] acpi_init (drivers/acpi/bus.c:1404)
[ 4.943832][ T1] ? acpi_bus_init (drivers/acpi/bus.c:1390)
[ 4.944420][ T1] ? kb3886_init (drivers/video/fbdev/core/fbmem.c:1111)
[ 4.944976][ T1] ? acpi_bus_init (drivers/acpi/bus.c:1390)
[ 4.945579][ T1] do_one_initcall (init/main.c:1232)
[ 4.946157][ T1] ? trace_event_raw_event_initcall_level (init/main.c:1223)
[ 4.946747][ T1] ? kasan_set_track (mm/kasan/common.c:52)
[ 4.947373][ T1] ? __kasan_kmalloc (mm/kasan/common.c:374 mm/kasan/common.c:383)
[ 4.948010][ T1] do_initcalls (init/main.c:1293 init/main.c:1310)
[ 4.948614][ T1] kernel_init_freeable (init/main.c:1549)
[ 4.949315][ T1] ? rest_init (init/main.c:1429)
[ 4.949894][ T1] kernel_init (init/main.c:1439)
[ 4.950743][ T1] ? _raw_spin_unlock_irq (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 include/linux/spinlock_api_smp.h:159 kernel/locking/spinlock.c:202)
[ 4.951386][ T1] ret_from_fork (arch/x86/kernel/process.c:153)
[ 4.951957][ T1] ? rest_init (init/main.c:1429)
[ 4.952520][ T1] ret_from_fork_asm (arch/x86/entry/entry_64.S:312)
[ 4.953143][ T1] </TASK>
[ 4.953514][ T1] irq event stamp: 85121
[ 4.954040][ T1] hardirqs last enabled at (85129): console_unlock (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 arch/x86/include/asm/irqflags.h:135 kernel/printk/printk.c:347 kernel/printk/printk.c:2718 kernel/printk/printk.c:3037)
[ 4.954736][ T1] hardirqs last disabled at (85138): console_unlock (kernel/printk/printk.c:345 kernel/printk/printk.c:2718 kernel/printk/printk.c:3037)
[ 4.955884][ T1] softirqs last enabled at (84862): __do_softirq (arch/x86/include/asm/preempt.h:27 kernel/softirq.c:400 kernel/softirq.c:582)
[ 4.957017][ T1] softirqs last disabled at (84857): irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632 kernel/softirq.c:644)
[ 4.958128][ T1] ---[ end trace 0000000000000000 ]---


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20231026/[email protected]



--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-10-26 07:42:24

by Chengming Zhou

[permalink] [raw]
Subject: Re: [RFC PATCH v3 6/7] slub: Delay freezing of partial slabs

On 2023/10/26 13:49, kernel test robot wrote:
>
>
> Hello,
>
> kernel test robot noticed "WARNING:at_mm/slub.c:#___slab_alloc" on:
>
> commit: b34342e2732b0dc92b29d6807c5314e2e5e0c27c ("[RFC PATCH v3 6/7] slub: Delay freezing of partial slabs")
> url: https://github.com/intel-lab-lkp/linux/commits/chengming-zhou-linux-dev/slub-Keep-track-of-whether-slub-is-on-the-per-node-partial-list/20231024-173519
> base: git://git.kernel.org/cgit/linux/kernel/git/vbabka/slab.git for-next
> patch link: https://lore.kernel.org/all/[email protected]/
> patch subject: [RFC PATCH v3 6/7] slub: Delay freezing of partial slabs
>
> in testcase: rcutorture
> version:
> with following parameters:
>
> runtime: 300s
> test: default
> torture_type: rcu
>
>
>
> compiler: gcc-12
> test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
>
> (please refer to attached dmesg/kmsg for entire log/backtrace)
>
>
> +------------------------------------------------+------------+------------+
> | | e87d1d9973 | b34342e273 |
> +------------------------------------------------+------------+------------+
> | boot_successes | 15 | 0 |
> | boot_failures | 0 | 15 |
> | WARNING:at_mm/slub.c:#___slab_alloc | 0 | 15 |
> | RIP:___slab_alloc | 0 | 15 |
> +------------------------------------------------+------------+------------+
>
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <[email protected]>
> | Closes: https://lore.kernel.org/oe-lkp/[email protected]
>
>
> [ 4.907500][ T1] ------------[ cut here ]------------
> [ 4.908232][ T1] WARNING: CPU: 0 PID: 1 at mm/slub.c:577 ___slab_alloc (mm/slub.c:577 mm/slub.c:3033 mm/slub.c:3205)
> [ 4.909242][ T1] Modules linked in:
> [ 4.909739][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc5-00013-gb34342e2732b #1
> [ 4.910746][ T1] RIP: 0010:___slab_alloc (mm/slub.c:577 mm/slub.c:3033 mm/slub.c:3205)

The warning is triggered by "lockdep_assert_irqs_disabled()" in __slab_update_freelist(),
which is called in the new introduced freeze_slab().

We should fix it by using "slab_update_freelist()" in freeze_slab() instead, which will
disable the interrupts correctly.

Thanks!

2023-10-31 09:50:32

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [RFC PATCH v3 6/7] slub: Delay freezing of partial slabs

On 10/24/23 11:33, [email protected] wrote:
> From: Chengming Zhou <[email protected]>
>
> Now we will freeze slabs when moving them out of node partial list to
> cpu partial list, this method needs two cmpxchg_double operations:
>
> 1. freeze slab (acquire_slab()) under the node list_lock
> 2. get_freelist() when pick used in ___slab_alloc()
>
> Actually we don't need to freeze when moving slabs out of node partial
> list, we can delay freezing to when use slab freelist in ___slab_alloc(),
> so we can save one cmpxchg_double().
>
> And there are other good points:
> - The moving of slabs between node partial list and cpu partial list
> becomes simpler, since we don't need to freeze or unfreeze at all.
>
> - The node list_lock contention would be less, since we don't need to
> freeze any slab under the node list_lock.
>
> We can achieve this because there is no concurrent path would manipulate
> the partial slab list except the __slab_free() path, which is serialized
> now.

"which is now serialized by slab_test_node_partial() under the list_lock." ?

> Since the slab returned by get_partial() interfaces is not frozen anymore
> and no freelist in the partial_context, so we need to use the introduced

^ is returned in

> freeze_slab() to freeze it and get its freelist.
>
> Similarly, the slabs on the CPU partial list are not frozen anymore,
> we need to freeze_slab() on it before use.

We can now delete acquire_slab() as it became unused.

> Signed-off-by: Chengming Zhou <[email protected]>

With the fixup for CONFIG_SLUB_CPU_PARTIAL you mentioned,
Reviewed-by: Vlastimil Babka <[email protected]>

Also agreed with followup patch to rename unfreeze_partials().
Thanks!