The two functions slab_update_freelist() and __slab_update_freelist()
are nearly identical, fold and add a boolean argument and rely on
constant propagation.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
mm/slub.c | 80 +++++++++++++++++++++-----------------------------------------
1 file changed, 28 insertions(+), 52 deletions(-)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -559,53 +559,29 @@ __update_freelist_slow(struct slab *slab
* allocation/ free operation in hardirq context. Therefore nothing can
* interrupt the operation.
*/
-static inline bool __slab_update_freelist(struct kmem_cache *s, struct slab *slab,
- void *freelist_old, unsigned long counters_old,
- void *freelist_new, unsigned long counters_new,
- const char *n)
+static __always_inline
+bool slab_update_freelist(struct kmem_cache *s, struct slab *slab,
+ void *freelist_old, unsigned long counters_old,
+ void *freelist_new, unsigned long counters_new,
+ bool irq_save, const char *n)
{
bool ret;
- if (USE_LOCKLESS_FAST_PATH())
+ if (!irq_save && USE_LOCKLESS_FAST_PATH())
lockdep_assert_irqs_disabled();
if (s->flags & __CMPXCHG_DOUBLE) {
ret = __update_freelist_fast(slab, freelist_old, counters_old,
freelist_new, counters_new);
} else {
- ret = __update_freelist_slow(slab, freelist_old, counters_old,
- freelist_new, counters_new);
- }
- if (likely(ret))
- return true;
-
- cpu_relax();
- stat(s, CMPXCHG_DOUBLE_FAIL);
-
-#ifdef SLUB_DEBUG_CMPXCHG
- pr_info("%s %s: cmpxchg double redo ", n, s->name);
-#endif
-
- return false;
-}
-
-static inline bool slab_update_freelist(struct kmem_cache *s, struct slab *slab,
- void *freelist_old, unsigned long counters_old,
- void *freelist_new, unsigned long counters_new,
- const char *n)
-{
- bool ret;
-
- if (s->flags & __CMPXCHG_DOUBLE) {
- ret = __update_freelist_fast(slab, freelist_old, counters_old,
- freelist_new, counters_new);
- } else {
unsigned long flags;
- local_irq_save(flags);
+ if (irq_save)
+ local_irq_save(flags);
ret = __update_freelist_slow(slab, freelist_old, counters_old,
freelist_new, counters_new);
- local_irq_restore(flags);
+ if (irq_save)
+ local_irq_restore(flags);
}
if (likely(ret))
return true;
@@ -2250,10 +2226,10 @@ static inline void *acquire_slab(struct
VM_BUG_ON(new.frozen);
new.frozen = 1;
- if (!__slab_update_freelist(s, slab,
- freelist, counters,
- new.freelist, new.counters,
- "acquire_slab"))
+ if (!slab_update_freelist(s, slab,
+ freelist, counters,
+ new.freelist, new.counters,
+ false, "acquire_slab"))
return NULL;
remove_partial(n, slab);
@@ -2577,9 +2553,9 @@ static void deactivate_slab(struct kmem_
if (!slab_update_freelist(s, slab,
- old.freelist, old.counters,
- new.freelist, new.counters,
- "unfreezing slab")) {
+ old.freelist, old.counters,
+ new.freelist, new.counters,
+ true, "unfreezing slab")) {
if (mode == M_PARTIAL)
spin_unlock_irqrestore(&n->list_lock, flags);
goto redo;
@@ -2633,10 +2609,10 @@ static void __unfreeze_partials(struct k
new.frozen = 0;
- } while (!__slab_update_freelist(s, slab,
- old.freelist, old.counters,
- new.freelist, new.counters,
- "unfreezing slab"));
+ } while (!slab_update_freelist(s, slab,
+ old.freelist, old.counters,
+ new.freelist, new.counters,
+ false, "unfreezing slab"));
if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
slab->next = slab_to_discard;
@@ -3072,10 +3048,10 @@ static inline void *get_freelist(struct
new.inuse = slab->objects;
new.frozen = freelist != NULL;
- } while (!__slab_update_freelist(s, slab,
- freelist, counters,
- NULL, new.counters,
- "get_freelist"));
+ } while (!slab_update_freelist(s, slab,
+ freelist, counters,
+ NULL, new.counters,
+ false, "get_freelist"));
return freelist;
}
@@ -3666,9 +3642,9 @@ static void __slab_free(struct kmem_cach
}
} while (!slab_update_freelist(s, slab,
- prior, counters,
- head, new.counters,
- "__slab_free"));
+ prior, counters,
+ head, new.counters,
+ true, "__slab_free"));
if (likely(!n)) {
On 5/15/23 09:57, Peter Zijlstra wrote:
> The two functions slab_update_freelist() and __slab_update_freelist()
> are nearly identical, fold and add a boolean argument and rely on
> constant propagation.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Something like that has been tried before and the result:
https://lore.kernel.org/all/CAHk-=wiJLqL2cUhJbvpyPQpkbVOu1rVSzgO2=S2jC55hneLtfQ@mail.gmail.com/
Your parameter is not called 'locked' but 'irq_save' which is better, but
that's just one detail.
After your refactoring in 08/11 which puts most of the code into
__update_freelist_fast() and _slow() I'd say the result is not so bad already.
BTW I have some suspicion that some SLUB code is based on assumptions that
are no longer true these days. IIRC I've seen some microbenchmark results a
while ago that showed that disabling/enabling irqs is surprisingly (to me)
very cheap today, so maybe it's not so useful to keep doing the
this_cpu_cmpxchg128 for the struct kmem_cache_cpu operations (less so for
struct slab cmpxchg128 where actually different cpus may be involved). But
it needs a closer look.
> ---
> mm/slub.c | 80 +++++++++++++++++++++-----------------------------------------
> 1 file changed, 28 insertions(+), 52 deletions(-)
>
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -559,53 +559,29 @@ __update_freelist_slow(struct slab *slab
> * allocation/ free operation in hardirq context. Therefore nothing can
> * interrupt the operation.
> */
> -static inline bool __slab_update_freelist(struct kmem_cache *s, struct slab *slab,
> - void *freelist_old, unsigned long counters_old,
> - void *freelist_new, unsigned long counters_new,
> - const char *n)
> +static __always_inline
> +bool slab_update_freelist(struct kmem_cache *s, struct slab *slab,
> + void *freelist_old, unsigned long counters_old,
> + void *freelist_new, unsigned long counters_new,
> + bool irq_save, const char *n)
> {
> bool ret;
>
> - if (USE_LOCKLESS_FAST_PATH())
> + if (!irq_save && USE_LOCKLESS_FAST_PATH())
> lockdep_assert_irqs_disabled();
>
> if (s->flags & __CMPXCHG_DOUBLE) {
> ret = __update_freelist_fast(slab, freelist_old, counters_old,
> freelist_new, counters_new);
> } else {
> - ret = __update_freelist_slow(slab, freelist_old, counters_old,
> - freelist_new, counters_new);
> - }
> - if (likely(ret))
> - return true;
> -
> - cpu_relax();
> - stat(s, CMPXCHG_DOUBLE_FAIL);
> -
> -#ifdef SLUB_DEBUG_CMPXCHG
> - pr_info("%s %s: cmpxchg double redo ", n, s->name);
> -#endif
> -
> - return false;
> -}
> -
> -static inline bool slab_update_freelist(struct kmem_cache *s, struct slab *slab,
> - void *freelist_old, unsigned long counters_old,
> - void *freelist_new, unsigned long counters_new,
> - const char *n)
> -{
> - bool ret;
> -
> - if (s->flags & __CMPXCHG_DOUBLE) {
> - ret = __update_freelist_fast(slab, freelist_old, counters_old,
> - freelist_new, counters_new);
> - } else {
> unsigned long flags;
>
> - local_irq_save(flags);
> + if (irq_save)
> + local_irq_save(flags);
> ret = __update_freelist_slow(slab, freelist_old, counters_old,
> freelist_new, counters_new);
> - local_irq_restore(flags);
> + if (irq_save)
> + local_irq_restore(flags);
> }
> if (likely(ret))
> return true;
> @@ -2250,10 +2226,10 @@ static inline void *acquire_slab(struct
> VM_BUG_ON(new.frozen);
> new.frozen = 1;
>
> - if (!__slab_update_freelist(s, slab,
> - freelist, counters,
> - new.freelist, new.counters,
> - "acquire_slab"))
> + if (!slab_update_freelist(s, slab,
> + freelist, counters,
> + new.freelist, new.counters,
> + false, "acquire_slab"))
> return NULL;
>
> remove_partial(n, slab);
> @@ -2577,9 +2553,9 @@ static void deactivate_slab(struct kmem_
>
>
> if (!slab_update_freelist(s, slab,
> - old.freelist, old.counters,
> - new.freelist, new.counters,
> - "unfreezing slab")) {
> + old.freelist, old.counters,
> + new.freelist, new.counters,
> + true, "unfreezing slab")) {
> if (mode == M_PARTIAL)
> spin_unlock_irqrestore(&n->list_lock, flags);
> goto redo;
> @@ -2633,10 +2609,10 @@ static void __unfreeze_partials(struct k
>
> new.frozen = 0;
>
> - } while (!__slab_update_freelist(s, slab,
> - old.freelist, old.counters,
> - new.freelist, new.counters,
> - "unfreezing slab"));
> + } while (!slab_update_freelist(s, slab,
> + old.freelist, old.counters,
> + new.freelist, new.counters,
> + false, "unfreezing slab"));
>
> if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
> slab->next = slab_to_discard;
> @@ -3072,10 +3048,10 @@ static inline void *get_freelist(struct
> new.inuse = slab->objects;
> new.frozen = freelist != NULL;
>
> - } while (!__slab_update_freelist(s, slab,
> - freelist, counters,
> - NULL, new.counters,
> - "get_freelist"));
> + } while (!slab_update_freelist(s, slab,
> + freelist, counters,
> + NULL, new.counters,
> + false, "get_freelist"));
>
> return freelist;
> }
> @@ -3666,9 +3642,9 @@ static void __slab_free(struct kmem_cach
> }
>
> } while (!slab_update_freelist(s, slab,
> - prior, counters,
> - head, new.counters,
> - "__slab_free"));
> + prior, counters,
> + head, new.counters,
> + true, "__slab_free"));
>
> if (likely(!n)) {
>
>
>