2022-03-04 10:44:11

by Hyeonggon Yoo

[permalink] [raw]
Subject: [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator

There is not much benefit for serving large objects in kmalloc().
Let's pass large requests to page allocator like SLUB for better
maintenance of common code.

Signed-off-by: Hyeonggon Yoo <[email protected]>
---
include/linux/slab.h | 35 ++++++++++++++++-------------------
mm/slab.c | 31 +++++++++++++++++++++++++++----
mm/slab.h | 19 +++++++++++++++++++
mm/slub.c | 19 -------------------
4 files changed, 62 insertions(+), 42 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 37bde99b74af..e7b3330db4f3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -224,29 +224,19 @@ void kmem_dump_obj(void *object);
* Kmalloc array related definitions
*/

-#ifdef CONFIG_SLAB
/*
- * The largest kmalloc size supported by the SLAB allocators is
- * 32 megabyte (2^25) or the maximum allocatable page order if that is
- * less than 32 MB.
- *
- * WARNING: Its not easy to increase this value since the allocators have
- * to do various tricks to work around compiler limitations in order to
- * ensure proper constant folding.
+ * SLAB and SLUB directly allocates requests fitting in to an order-1 page
+ * (PAGE_SIZE*2). Larger requests are passed to the page allocator.
*/
-#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
- (MAX_ORDER + PAGE_SHIFT - 1) : 25)
-#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH
+#ifdef CONFIG_SLAB
+#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
+#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 5
#endif
#endif

#ifdef CONFIG_SLUB
-/*
- * SLUB directly allocates requests fitting in to an order-1 page
- * (PAGE_SIZE*2). Larger requests are passed to the page allocator.
- */
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
#ifndef KMALLOC_SHIFT_LOW
@@ -564,15 +554,15 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
* Try really hard to succeed the allocation but fail
* eventually.
*/
+#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
-#ifndef CONFIG_SLOB
unsigned int index;
-#endif
+
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
-#ifndef CONFIG_SLOB
+
index = kmalloc_index(size);

if (!index)
@@ -581,10 +571,17 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
return kmem_cache_alloc_trace(
kmalloc_caches[kmalloc_type(flags)][index],
flags, size);
-#endif
}
return __kmalloc(size, flags);
}
+#else
+static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
+{
+ if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
+ return kmalloc_large(size, flags);
+ return __kmalloc(size, flags);
+}
+#endif

static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
{
diff --git a/mm/slab.c b/mm/slab.c
index ddf5737c63d9..570af6dc3478 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3624,7 +3624,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
void *ret;

if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
- return NULL;
+ return kmalloc_large(size, flags);
+
cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
@@ -3685,7 +3686,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
void *ret;

if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
- return NULL;
+ return kmalloc_large(size, flags);
+
cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
@@ -3739,14 +3741,21 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
{
struct kmem_cache *s;
size_t i;
+ struct folio *folio;

local_irq_disable();
for (i = 0; i < size; i++) {
void *objp = p[i];

- if (!orig_s) /* called via kfree_bulk */
+ if (!orig_s) {
+ /* called via kfree_bulk */
+ folio = virt_to_folio(objp);
+ if (unlikely(!folio_test_slab(folio))) {
+ free_large_kmalloc(folio, objp);
+ continue;
+ }
s = virt_to_cache(objp);
- else
+ } else
s = cache_from_obj(orig_s, objp);
if (!s)
continue;
@@ -3776,11 +3785,20 @@ void kfree(const void *objp)
{
struct kmem_cache *c;
unsigned long flags;
+ struct folio *folio;
+ void *object = (void *) objp;

trace_kfree(_RET_IP_, objp);

if (unlikely(ZERO_OR_NULL_PTR(objp)))
return;
+
+ folio = virt_to_folio(objp);
+ if (unlikely(!folio_test_slab(folio))) {
+ free_large_kmalloc(folio, object);
+ return;
+ }
+
local_irq_save(flags);
kfree_debugcheck(objp);
c = virt_to_cache(objp);
@@ -4211,12 +4229,17 @@ void __check_heap_object(const void *ptr, unsigned long n,
size_t __ksize(const void *objp)
{
struct kmem_cache *c;
+ struct folio *folio;
size_t size;

BUG_ON(!objp);
if (unlikely(objp == ZERO_SIZE_PTR))
return 0;

+ folio = virt_to_folio(objp);
+ if (!folio_test_slab(folio))
+ return folio_size(folio);
+
c = virt_to_cache(objp);
size = c ? c->object_size : 0;

diff --git a/mm/slab.h b/mm/slab.h
index c7f2abc2b154..31e98beb47a3 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -664,6 +664,25 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
print_tracking(cachep, x);
return cachep;
}
+
+static __always_inline void kfree_hook(void *x)
+{
+ kmemleak_free(x);
+ kasan_kfree_large(x);
+}
+
+static inline void free_large_kmalloc(struct folio *folio, void *object)
+{
+ unsigned int order = folio_order(folio);
+
+ if (WARN_ON_ONCE(order == 0))
+ pr_warn_once("object pointer: 0x%p\n", object);
+
+ kfree_hook(object);
+ mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
+ -(PAGE_SIZE << order));
+ __free_pages(folio_page(folio, 0), order);
+}
#endif /* CONFIG_SLOB */

static inline size_t slab_ksize(const struct kmem_cache *s)
diff --git a/mm/slub.c b/mm/slub.c
index 261474092e43..04fd084f4709 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1686,12 +1686,6 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
return ptr;
}

-static __always_inline void kfree_hook(void *x)
-{
- kmemleak_free(x);
- kasan_kfree_large(x);
-}
-
static __always_inline bool slab_free_hook(struct kmem_cache *s,
void *x, bool init)
{
@@ -3535,19 +3529,6 @@ struct detached_freelist {
struct kmem_cache *s;
};

-static inline void free_large_kmalloc(struct folio *folio, void *object)
-{
- unsigned int order = folio_order(folio);
-
- if (WARN_ON_ONCE(order == 0))
- pr_warn_once("object pointer: 0x%p\n", object);
-
- kfree_hook(object);
- mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
- -(PAGE_SIZE << order));
- __free_pages(folio_page(folio, 0), order);
-}
-
/*
* This function progressively scans the array with free objects (with
* a limited look ahead) and extract objects belonging to the same
--
2.33.1


2022-03-04 15:12:57

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator

On 3/4/22 07:34, Hyeonggon Yoo wrote:
> There is not much benefit for serving large objects in kmalloc().
> Let's pass large requests to page allocator like SLUB for better
> maintenance of common code.
>
> Signed-off-by: Hyeonggon Yoo <[email protected]>

Looks like it's on the right path! Some suggestions follow.

> ---
> include/linux/slab.h | 35 ++++++++++++++++-------------------
> mm/slab.c | 31 +++++++++++++++++++++++++++----
> mm/slab.h | 19 +++++++++++++++++++
> mm/slub.c | 19 -------------------
> 4 files changed, 62 insertions(+), 42 deletions(-)
>
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 37bde99b74af..e7b3330db4f3 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -224,29 +224,19 @@ void kmem_dump_obj(void *object);
> * Kmalloc array related definitions
> */
>
> -#ifdef CONFIG_SLAB
> /*
> - * The largest kmalloc size supported by the SLAB allocators is
> - * 32 megabyte (2^25) or the maximum allocatable page order if that is
> - * less than 32 MB.
> - *
> - * WARNING: Its not easy to increase this value since the allocators have
> - * to do various tricks to work around compiler limitations in order to
> - * ensure proper constant folding.
> + * SLAB and SLUB directly allocates requests fitting in to an order-1 page
> + * (PAGE_SIZE*2). Larger requests are passed to the page allocator.
> */
> -#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
> - (MAX_ORDER + PAGE_SHIFT - 1) : 25)
> -#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH
> +#ifdef CONFIG_SLAB
> +#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
> +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)

I think we should be able to also remove the larger sizes from
__kmalloc_index() later in this file.

> #ifndef KMALLOC_SHIFT_LOW
> #define KMALLOC_SHIFT_LOW 5
> #endif
> #endif
>
> #ifdef CONFIG_SLUB
> -/*
> - * SLUB directly allocates requests fitting in to an order-1 page
> - * (PAGE_SIZE*2). Larger requests are passed to the page allocator.
> - */
> #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
> #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
> #ifndef KMALLOC_SHIFT_LOW
> @@ -564,15 +554,15 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
> * Try really hard to succeed the allocation but fail
> * eventually.
> */
> +#ifndef CONFIG_SLOB
> static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> {
> if (__builtin_constant_p(size)) {
> -#ifndef CONFIG_SLOB
> unsigned int index;
> -#endif
> +
> if (size > KMALLOC_MAX_CACHE_SIZE)
> return kmalloc_large(size, flags);
> -#ifndef CONFIG_SLOB
> +
> index = kmalloc_index(size);
>
> if (!index)
> @@ -581,10 +571,17 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> return kmem_cache_alloc_trace(
> kmalloc_caches[kmalloc_type(flags)][index],
> flags, size);
> -#endif
> }
> return __kmalloc(size, flags);
> }
> +#else
> +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> +{
> + if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
> + return kmalloc_large(size, flags);
> + return __kmalloc(size, flags);
> +}
> +#endif
>
> static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
> {
> diff --git a/mm/slab.c b/mm/slab.c
> index ddf5737c63d9..570af6dc3478 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3624,7 +3624,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
> void *ret;
>
> if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> - return NULL;
> + return kmalloc_large(size, flags);

This loses the node information and NUMA locality. It would be better to
generalize kmalloc_large_node() from mm/slub.c instead.

> +
> cachep = kmalloc_slab(size, flags);
> if (unlikely(ZERO_OR_NULL_PTR(cachep)))
> return cachep;
> @@ -3685,7 +3686,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
> void *ret;
>
> if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> - return NULL;
> + return kmalloc_large(size, flags);
> +
> cachep = kmalloc_slab(size, flags);
> if (unlikely(ZERO_OR_NULL_PTR(cachep)))
> return cachep;
> @@ -3739,14 +3741,21 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
> {
> struct kmem_cache *s;
> size_t i;
> + struct folio *folio;
>
> local_irq_disable();
> for (i = 0; i < size; i++) {
> void *objp = p[i];
>
> - if (!orig_s) /* called via kfree_bulk */
> + if (!orig_s) {
> + /* called via kfree_bulk */
> + folio = virt_to_folio(objp);
> + if (unlikely(!folio_test_slab(folio))) {
> + free_large_kmalloc(folio, objp);

Hmm I'm not sure it's a good idea to call this and the page allocator with
disabled irq's. Maybe simply re-enable around it?

> + continue;
> + }
> s = virt_to_cache(objp);

Since we already have the folio, we shoold instead use folio_slab and
slab->slab_cache (see how mm/slub.c does it).
The same applies to functions below as well.

> - else
> + } else
> s = cache_from_obj(orig_s, objp);
> if (!s)
> continue;
> @@ -3776,11 +3785,20 @@ void kfree(const void *objp)
> {
> struct kmem_cache *c;
> unsigned long flags;
> + struct folio *folio;
> + void *object = (void *) objp;
>
> trace_kfree(_RET_IP_, objp);
>
> if (unlikely(ZERO_OR_NULL_PTR(objp)))
> return;
> +
> + folio = virt_to_folio(objp);
> + if (unlikely(!folio_test_slab(folio))) {
> + free_large_kmalloc(folio, object);
> + return;
> + }
> +
> local_irq_save(flags);
> kfree_debugcheck(objp);
> c = virt_to_cache(objp);
> @@ -4211,12 +4229,17 @@ void __check_heap_object(const void *ptr, unsigned long n,
> size_t __ksize(const void *objp)
> {
> struct kmem_cache *c;
> + struct folio *folio;
> size_t size;
>
> BUG_ON(!objp);
> if (unlikely(objp == ZERO_SIZE_PTR))
> return 0;
>
> + folio = virt_to_folio(objp);
> + if (!folio_test_slab(folio))
> + return folio_size(folio);
> +
> c = virt_to_cache(objp);
> size = c ? c->object_size : 0;
>
> diff --git a/mm/slab.h b/mm/slab.h
> index c7f2abc2b154..31e98beb47a3 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -664,6 +664,25 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
> print_tracking(cachep, x);
> return cachep;
> }
> +
> +static __always_inline void kfree_hook(void *x)
> +{
> + kmemleak_free(x);
> + kasan_kfree_large(x);

Looks like there's only one caller of kfree_hook() so we could do that
directly there.

> +}
> +
> +static inline void free_large_kmalloc(struct folio *folio, void *object)
> +{
> + unsigned int order = folio_order(folio);
> +
> + if (WARN_ON_ONCE(order == 0))
> + pr_warn_once("object pointer: 0x%p\n", object);
> +
> + kfree_hook(object);
> + mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> + -(PAGE_SIZE << order));
> + __free_pages(folio_page(folio, 0), order);
> +}
> #endif /* CONFIG_SLOB */
>
> static inline size_t slab_ksize(const struct kmem_cache *s)
> diff --git a/mm/slub.c b/mm/slub.c
> index 261474092e43..04fd084f4709 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1686,12 +1686,6 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
> return ptr;
> }
>
> -static __always_inline void kfree_hook(void *x)
> -{
> - kmemleak_free(x);
> - kasan_kfree_large(x);
> -}
> -
> static __always_inline bool slab_free_hook(struct kmem_cache *s,
> void *x, bool init)
> {
> @@ -3535,19 +3529,6 @@ struct detached_freelist {
> struct kmem_cache *s;
> };
>
> -static inline void free_large_kmalloc(struct folio *folio, void *object)
> -{
> - unsigned int order = folio_order(folio);
> -
> - if (WARN_ON_ONCE(order == 0))
> - pr_warn_once("object pointer: 0x%p\n", object);
> -
> - kfree_hook(object);
> - mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> - -(PAGE_SIZE << order));
> - __free_pages(folio_page(folio, 0), order);
> -}
> -
> /*
> * This function progressively scans the array with free objects (with
> * a limited look ahead) and extract objects belonging to the same

2022-03-05 08:57:59

by Hyeonggon Yoo

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator

On Fri, Mar 04, 2022 at 01:45:16PM +0100, Vlastimil Babka wrote:
> On 3/4/22 07:34, Hyeonggon Yoo wrote:
> > There is not much benefit for serving large objects in kmalloc().
> > Let's pass large requests to page allocator like SLUB for better
> > maintenance of common code.
> >
> > Signed-off-by: Hyeonggon Yoo <[email protected]>
>
> Looks like it's on the right path! Some suggestions follow.
>

Thank you for reviewing this!
You have really good eye.

> > ---
> > include/linux/slab.h | 35 ++++++++++++++++-------------------
> > mm/slab.c | 31 +++++++++++++++++++++++++++----
> > mm/slab.h | 19 +++++++++++++++++++
> > mm/slub.c | 19 -------------------
> > 4 files changed, 62 insertions(+), 42 deletions(-)
> >
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 37bde99b74af..e7b3330db4f3 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -224,29 +224,19 @@ void kmem_dump_obj(void *object);
> > * Kmalloc array related definitions
> > */
> >
> > -#ifdef CONFIG_SLAB
> > /*
> > - * The largest kmalloc size supported by the SLAB allocators is
> > - * 32 megabyte (2^25) or the maximum allocatable page order if that is
> > - * less than 32 MB.
> > - *
> > - * WARNING: Its not easy to increase this value since the allocators have
> > - * to do various tricks to work around compiler limitations in order to
> > - * ensure proper constant folding.
> > + * SLAB and SLUB directly allocates requests fitting in to an order-1 page
> > + * (PAGE_SIZE*2). Larger requests are passed to the page allocator.
> > */
> > -#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
> > - (MAX_ORDER + PAGE_SHIFT - 1) : 25)
> > -#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH
> > +#ifdef CONFIG_SLAB
> > +#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
> > +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
>
> I think we should be able to also remove the larger sizes from
> __kmalloc_index() later in this file.
>

Right. we can remove sizes larger than max(PAGE_SHIFT) + 1.

searching using cscope, maximum PAGE_SHIFT is 20 (1MB) in hexagon.
I think we can remove anything larger than 2MB.

But Hmm, I'll add something like static_assert(PAGE_SHIFT <= 20).
Let's see if any architecture/config complain this.

> > #ifndef KMALLOC_SHIFT_LOW
> > #define KMALLOC_SHIFT_LOW 5
> > #endif
> > #endif
> >
> > #ifdef CONFIG_SLUB
> > -/*
> > - * SLUB directly allocates requests fitting in to an order-1 page
> > - * (PAGE_SIZE*2). Larger requests are passed to the page allocator.
> > - */
> > #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
> > #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
> > #ifndef KMALLOC_SHIFT_LOW
> > @@ -564,15 +554,15 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
> > * Try really hard to succeed the allocation but fail
> > * eventually.
> > */
> > +#ifndef CONFIG_SLOB
> > static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> > {
> > if (__builtin_constant_p(size)) {
> > -#ifndef CONFIG_SLOB
> > unsigned int index;
> > -#endif
> > +
> > if (size > KMALLOC_MAX_CACHE_SIZE)
> > return kmalloc_large(size, flags);
> > -#ifndef CONFIG_SLOB
> > +
> > index = kmalloc_index(size);
> >
> > if (!index)
> > @@ -581,10 +571,17 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> > return kmem_cache_alloc_trace(
> > kmalloc_caches[kmalloc_type(flags)][index],
> > flags, size);
> > -#endif
> > }
> > return __kmalloc(size, flags);
> > }
> > +#else
> > +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> > +{
> > + if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
> > + return kmalloc_large(size, flags);
> > + return __kmalloc(size, flags);
> > +}
> > +#endif
> >
> > static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
> > {
> > diff --git a/mm/slab.c b/mm/slab.c
> > index ddf5737c63d9..570af6dc3478 100644
> > --- a/mm/slab.c
> > +++ b/mm/slab.c
> > @@ -3624,7 +3624,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
> > void *ret;
> >
> > if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> > - return NULL;
> > + return kmalloc_large(size, flags);
>
> This loses the node information and NUMA locality.

Oh, my fault. Thank you for catching this.

> It would be better to generalize kmalloc_large_node() from mm/slub.c instead.

Sounds nice.

> > +
> > cachep = kmalloc_slab(size, flags);
> > if (unlikely(ZERO_OR_NULL_PTR(cachep)))
> > return cachep;
> > @@ -3685,7 +3686,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
> > void *ret;
> >
> > if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> > - return NULL;
> > + return kmalloc_large(size, flags);
> > +
> > cachep = kmalloc_slab(size, flags);
> > if (unlikely(ZERO_OR_NULL_PTR(cachep)))
> > return cachep;
> > @@ -3739,14 +3741,21 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
> > {
> > struct kmem_cache *s;
> > size_t i;
> > + struct folio *folio;
> >
> > local_irq_disable();
> > for (i = 0; i < size; i++) {
> > void *objp = p[i];
> >
> > - if (!orig_s) /* called via kfree_bulk */
> > + if (!orig_s) {
> > + /* called via kfree_bulk */
> > + folio = virt_to_folio(objp);
> > + if (unlikely(!folio_test_slab(folio))) {
> > + free_large_kmalloc(folio, objp);
>
> Hmm I'm not sure it's a good idea to call this and the page allocator with
> disabled irq's. Maybe simply re-enable around it?

Seems reasonable. re-enabling around here will be sufficient for this
function.

By The Way, it seems SLAB really does not care about disabling irq
even if it's unnecessary.

I would like to try reducing irq-disabled area in SLAB and how it
affects on macro/micro benchmarks after this series is done :)

> > + continue;
> > + }
> > s = virt_to_cache(objp);
>
> Since we already have the folio, we shoold instead use folio_slab and
> slab->slab_cache (see how mm/slub.c does it).
> The same applies to functions below as well.

Right. I'll change them in v3.

> > - else
> > + } else
> > s = cache_from_obj(orig_s, objp);
> > if (!s)
> > continue;
> > @@ -3776,11 +3785,20 @@ void kfree(const void *objp)
> > {
> > struct kmem_cache *c;
> > unsigned long flags;
> > + struct folio *folio;
> > + void *object = (void *) objp;
> >
> > trace_kfree(_RET_IP_, objp);
> >
> > if (unlikely(ZERO_OR_NULL_PTR(objp)))
> > return;
> > +
> > + folio = virt_to_folio(objp);
> > + if (unlikely(!folio_test_slab(folio))) {
> > + free_large_kmalloc(folio, object);
> > + return;
> > + }
> > +
> > local_irq_save(flags);
> > kfree_debugcheck(objp);
> > c = virt_to_cache(objp);
> > @@ -4211,12 +4229,17 @@ void __check_heap_object(const void *ptr, unsigned long n,
> > size_t __ksize(const void *objp)
> > {
> > struct kmem_cache *c;
> > + struct folio *folio;
> > size_t size;
> >
> > BUG_ON(!objp);
> > if (unlikely(objp == ZERO_SIZE_PTR))
> > return 0;
> >
> > + folio = virt_to_folio(objp);
> > + if (!folio_test_slab(folio))
> > + return folio_size(folio);
> > +
> > c = virt_to_cache(objp);
> > size = c ? c->object_size : 0;
> >
> > diff --git a/mm/slab.h b/mm/slab.h
> > index c7f2abc2b154..31e98beb47a3 100644
> > --- a/mm/slab.h
> > +++ b/mm/slab.h
> > @@ -664,6 +664,25 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
> > print_tracking(cachep, x);
> > return cachep;
> > }
> > +
> > +static __always_inline void kfree_hook(void *x)
> > +{
> > + kmemleak_free(x);
> > + kasan_kfree_large(x);
>
> Looks like there's only one caller of kfree_hook() so we could do that
> directly there.
>

Right.

> > +}
> > +
> > +static inline void free_large_kmalloc(struct folio *folio, void *object)
> > +{
> > + unsigned int order = folio_order(folio);
> > +
> > + if (WARN_ON_ONCE(order == 0))
> > + pr_warn_once("object pointer: 0x%p\n", object);
> > +
> > + kfree_hook(object);
> > + mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> > + -(PAGE_SIZE << order));
> > + __free_pages(folio_page(folio, 0), order);
> > +}
> > #endif /* CONFIG_SLOB */
> >
> > static inline size_t slab_ksize(const struct kmem_cache *s)
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 261474092e43..04fd084f4709 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -1686,12 +1686,6 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
> > return ptr;
> > }
> >
> > -static __always_inline void kfree_hook(void *x)
> > -{
> > - kmemleak_free(x);
> > - kasan_kfree_large(x);
> > -}
> > -
> > static __always_inline bool slab_free_hook(struct kmem_cache *s,
> > void *x, bool init)
> > {
> > @@ -3535,19 +3529,6 @@ struct detached_freelist {
> > struct kmem_cache *s;
> > };
> >
> > -static inline void free_large_kmalloc(struct folio *folio, void *object)
> > -{
> > - unsigned int order = folio_order(folio);
> > -
> > - if (WARN_ON_ONCE(order == 0))
> > - pr_warn_once("object pointer: 0x%p\n", object);
> > -
> > - kfree_hook(object);
> > - mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> > - -(PAGE_SIZE << order));
> > - __free_pages(folio_page(folio, 0), order);
> > -}
> > -
> > /*
> > * This function progressively scans the array with free objects (with
> > * a limited look ahead) and extract objects belonging to the same
>

Thank you so much!

--
Thank you, You are awesome!
Hyeonggon :-)