ARCH_KMALLOC_MINALIGN represents the minimum guaranteed kmalloc()
alignment but an architecture may require a larger run-time alignment.
Do not create kmalloc caches smaller than arch_kmalloc_minalign().
Signed-off-by: Catalin Marinas <[email protected]>
Cc: Andrew Morton <[email protected]>
---
include/linux/slab.h | 2 ++
mm/slab.c | 6 +-----
mm/slab.h | 2 ++
mm/slab_common.c | 33 +++++++++++++++++++++++----------
4 files changed, 28 insertions(+), 15 deletions(-)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index d58211bdeceb..2137dba85691 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -332,6 +332,8 @@ enum kmalloc_cache_type {
extern struct kmem_cache *
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
+unsigned int arch_kmalloc_minalign(void);
+
/*
* Define gfp bits that should not be set for KMALLOC_NORMAL.
*/
diff --git a/mm/slab.c b/mm/slab.c
index b04e40078bdf..4aaeeb9c994d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1256,11 +1256,7 @@ void __init kmem_cache_init(void)
* Initialize the caches that provide memory for the kmem_cache_node
* structures first. Without this, further allocations will bug.
*/
- kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache(
- kmalloc_info[INDEX_NODE].name[KMALLOC_NORMAL],
- kmalloc_info[INDEX_NODE].size,
- ARCH_KMALLOC_FLAGS, 0,
- kmalloc_info[INDEX_NODE].size);
+ new_kmalloc_cache(INDEX_NODE, KMALLOC_NORMAL, ARCH_KMALLOC_FLAGS);
slab_state = PARTIAL_NODE;
setup_kmalloc_cache_index_table();
diff --git a/mm/slab.h b/mm/slab.h
index fd7ae2024897..e9238406602a 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -283,6 +283,8 @@ int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size,
slab_flags_t flags, unsigned int useroffset,
unsigned int usersize);
+void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
+ slab_flags_t flags);
extern void create_boot_cache(struct kmem_cache *, const char *name,
unsigned int size, slab_flags_t flags,
unsigned int useroffset, unsigned int usersize);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6ee64d6208b3..594d8a8a68d0 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
}
}
-static void __init
+unsigned int __weak arch_kmalloc_minalign(void)
+{
+ return ARCH_KMALLOC_MINALIGN;
+}
+
+void __init
new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
{
+ unsigned int minalign = arch_kmalloc_minalign();
+ unsigned int aligned_size = kmalloc_info[idx].size;
+ int aligned_idx = idx;
+
if (type == KMALLOC_RECLAIM) {
flags |= SLAB_RECLAIM_ACCOUNT;
} else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
@@ -851,10 +860,17 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
flags |= SLAB_ACCOUNT;
}
- kmalloc_caches[type][idx] = create_kmalloc_cache(
- kmalloc_info[idx].name[type],
- kmalloc_info[idx].size, flags, 0,
- kmalloc_info[idx].size);
+ if (minalign > ARCH_KMALLOC_MINALIGN) {
+ aligned_size = ALIGN(aligned_size, minalign);
+ aligned_idx = __kmalloc_index(aligned_size, false);
+ }
+
+ if (!kmalloc_caches[type][aligned_idx])
+ kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
+ kmalloc_info[aligned_idx].name[type],
+ aligned_size, flags, 0, aligned_size);
+ if (idx != aligned_idx)
+ kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
/*
* If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
@@ -904,11 +920,8 @@ void __init create_kmalloc_caches(slab_flags_t flags)
struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
if (s) {
- kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
- kmalloc_info[i].name[KMALLOC_DMA],
- kmalloc_info[i].size,
- SLAB_CACHE_DMA | flags, 0,
- kmalloc_info[i].size);
+ new_kmalloc_cache(i, KMALLOC_DMA,
+ SLAB_CACHE_DMA | flags);
}
}
#endif
On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> ARCH_KMALLOC_MINALIGN represents the minimum guaranteed kmalloc()
> alignment but an architecture may require a larger run-time alignment.
> Do not create kmalloc caches smaller than arch_kmalloc_minalign().
>
> Signed-off-by: Catalin Marinas <[email protected]>
> Cc: Andrew Morton <[email protected]>
> ---
> include/linux/slab.h | 2 ++
> mm/slab.c | 6 +-----
> mm/slab.h | 2 ++
> mm/slab_common.c | 33 +++++++++++++++++++++++----------
> 4 files changed, 28 insertions(+), 15 deletions(-)
>
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index d58211bdeceb..2137dba85691 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -332,6 +332,8 @@ enum kmalloc_cache_type {
> extern struct kmem_cache *
> kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
>
> +unsigned int arch_kmalloc_minalign(void);
> +
> /*
> * Define gfp bits that should not be set for KMALLOC_NORMAL.
> */
> diff --git a/mm/slab.c b/mm/slab.c
> index b04e40078bdf..4aaeeb9c994d 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -1256,11 +1256,7 @@ void __init kmem_cache_init(void)
> * Initialize the caches that provide memory for the kmem_cache_node
> * structures first. Without this, further allocations will bug.
> */
> - kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache(
> - kmalloc_info[INDEX_NODE].name[KMALLOC_NORMAL],
> - kmalloc_info[INDEX_NODE].size,
> - ARCH_KMALLOC_FLAGS, 0,
> - kmalloc_info[INDEX_NODE].size);
> + new_kmalloc_cache(INDEX_NODE, KMALLOC_NORMAL, ARCH_KMALLOC_FLAGS);
> slab_state = PARTIAL_NODE;
> setup_kmalloc_cache_index_table();
>
> diff --git a/mm/slab.h b/mm/slab.h
> index fd7ae2024897..e9238406602a 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -283,6 +283,8 @@ int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
> struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size,
> slab_flags_t flags, unsigned int useroffset,
> unsigned int usersize);
> +void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
> + slab_flags_t flags);
> extern void create_boot_cache(struct kmem_cache *, const char *name,
> unsigned int size, slab_flags_t flags,
> unsigned int useroffset, unsigned int usersize);
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 6ee64d6208b3..594d8a8a68d0 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
> }
> }
>
> -static void __init
> +unsigned int __weak arch_kmalloc_minalign(void)
> +{
> + return ARCH_KMALLOC_MINALIGN;
> +}
> +
As ARCH_KMALLOC_ALIGN and arch_kmalloc_minalign() may not be same after
patch 10, I think s/ARCH_KMALLOC_ALIGN/arch_kmalloc_minalign/g
for every user of it would be more correct?
> +void __init
> new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
> {
> + unsigned int minalign = arch_kmalloc_minalign();
> + unsigned int aligned_size = kmalloc_info[idx].size;
> + int aligned_idx = idx;
> +
> if (type == KMALLOC_RECLAIM) {
> flags |= SLAB_RECLAIM_ACCOUNT;
> } else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
> @@ -851,10 +860,17 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
> flags |= SLAB_ACCOUNT;
> }
>
> - kmalloc_caches[type][idx] = create_kmalloc_cache(
> - kmalloc_info[idx].name[type],
> - kmalloc_info[idx].size, flags, 0,
> - kmalloc_info[idx].size);
> + if (minalign > ARCH_KMALLOC_MINALIGN) {
> + aligned_size = ALIGN(aligned_size, minalign);
> + aligned_idx = __kmalloc_index(aligned_size, false);
> + }
> +
> + if (!kmalloc_caches[type][aligned_idx])
> + kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
> + kmalloc_info[aligned_idx].name[type],
> + aligned_size, flags, 0, aligned_size);
> + if (idx != aligned_idx)
> + kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
I would prefer detecting minimum kmalloc size in create_kmalloc_caches()
in runtime instead of changing behavior of new_kmalloc_cache().
> /*
> * If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
> @@ -904,11 +920,8 @@ void __init create_kmalloc_caches(slab_flags_t flags)
> struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
>
> if (s) {
> - kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
> - kmalloc_info[i].name[KMALLOC_DMA],
> - kmalloc_info[i].size,
> - SLAB_CACHE_DMA | flags, 0,
> - kmalloc_info[i].size);
> + new_kmalloc_cache(i, KMALLOC_DMA,
> + SLAB_CACHE_DMA | flags);
> }
> }
> #endif
--
Thank you, You are awesome!
Hyeonggon :-)
On Thu, Apr 07, 2022 at 06:18:16PM +0900, Hyeonggon Yoo wrote:
> On Thu, Apr 07, 2022 at 09:50:23AM +0100, Catalin Marinas wrote:
> > On Thu, Apr 07, 2022 at 03:46:37AM +0000, Hyeonggon Yoo wrote:
> > > On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> > > > --- a/mm/slab_common.c
> > > > +++ b/mm/slab_common.c
> > > > @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
> > > > }
> > > > }
> > > >
> > > > -static void __init
> > > > +unsigned int __weak arch_kmalloc_minalign(void)
> > > > +{
> > > > + return ARCH_KMALLOC_MINALIGN;
> > > > +}
> > > > +
> > >
> > > As ARCH_KMALLOC_ALIGN and arch_kmalloc_minalign() may not be same after
> > > patch 10, I think s/ARCH_KMALLOC_ALIGN/arch_kmalloc_minalign/g
> > > for every user of it would be more correct?
> >
> > Not if the code currently using ARCH_KMALLOC_MINALIGN needs a constant.
> > Yes, there probably are a few places where the code can cope with a
> > dynamic arch_kmalloc_minalign() but there are two other cases where a
> > constant is needed:
> >
> > 1. As a BUILD_BUG check because the code is storing some flags in the
> > bottom bits of a pointer. A smaller ARCH_KMALLOC_MINALIGN works just
> > fine here.
> >
> > 2. As a static alignment for DMA requirements. That's where the newly
> > exposed ARCH_DMA_MINALIGN should be used.
> >
> > Note that this series doesn't make the situation any worse than before
> > since ARCH_DMA_MINALIGN stays at 128 bytes for arm64. Current users can
> > evolve to use a dynamic alignment in future patches. My main aim with
> > this series is to be able to create kmalloc-64 caches on arm64.
>
> AFAIK there are bunch of drivers that directly calls kmalloc().
Well, lots of drivers call kmalloc() ;).
> It becomes tricky when e.g.) a driver allocates just 32 bytes,
> but architecture requires it to be 128-byte aligned.
That's the current behaviour, a 32 byte allocation would return an
object from kmalloc-128. I want to reduce this to at least kmalloc-64
(or smaller) if the CPU/SoC allows it.
> That's why everything allocated from kmalloc() need to be aligned in
> ARCH_DMA_MINALIGN.
I don't get your conclusion here. Would you mind explaining?
> So I'm yet skeptical on decoupling ARCH_DMA/KMALLOC_MINALIGN. Instead
> of decoupling it, I'm more into dynamically decreasing it.
The reason for decoupling is mostly that there are some static uses of
ARCH_KMALLOC_MINALIGN as per point 1 above. The other is the
__assume_kmalloc_alignment attribute. We shouldn't have such assumed
alignment larger than what a dynamic kmalloc() would return. To me it
makes a lot more sense for ARCH_KMALLOC_MINALIGN to be the minimum
guaranteed in a kernel build but kmalloc() returning a larger alignment
at run-time than the other way around.
Thanks.
--
Catalin
On Thu, Apr 07, 2022 at 03:46:37AM +0000, Hyeonggon Yoo wrote:
> On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
> > }
> > }
> >
> > -static void __init
> > +unsigned int __weak arch_kmalloc_minalign(void)
> > +{
> > + return ARCH_KMALLOC_MINALIGN;
> > +}
> > +
>
> As ARCH_KMALLOC_ALIGN and arch_kmalloc_minalign() may not be same after
> patch 10, I think s/ARCH_KMALLOC_ALIGN/arch_kmalloc_minalign/g
> for every user of it would be more correct?
Not if the code currently using ARCH_KMALLOC_MINALIGN needs a constant.
Yes, there probably are a few places where the code can cope with a
dynamic arch_kmalloc_minalign() but there are two other cases where a
constant is needed:
1. As a BUILD_BUG check because the code is storing some flags in the
bottom bits of a pointer. A smaller ARCH_KMALLOC_MINALIGN works just
fine here.
2. As a static alignment for DMA requirements. That's where the newly
exposed ARCH_DMA_MINALIGN should be used.
Note that this series doesn't make the situation any worse than before
since ARCH_DMA_MINALIGN stays at 128 bytes for arm64. Current users can
evolve to use a dynamic alignment in future patches. My main aim with
this series is to be able to create kmalloc-64 caches on arm64.
> > @@ -851,10 +860,17 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
> > flags |= SLAB_ACCOUNT;
> > }
> >
> > - kmalloc_caches[type][idx] = create_kmalloc_cache(
> > - kmalloc_info[idx].name[type],
> > - kmalloc_info[idx].size, flags, 0,
> > - kmalloc_info[idx].size);
> > + if (minalign > ARCH_KMALLOC_MINALIGN) {
> > + aligned_size = ALIGN(aligned_size, minalign);
> > + aligned_idx = __kmalloc_index(aligned_size, false);
> > + }
> > +
> > + if (!kmalloc_caches[type][aligned_idx])
> > + kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
> > + kmalloc_info[aligned_idx].name[type],
> > + aligned_size, flags, 0, aligned_size);
> > + if (idx != aligned_idx)
> > + kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
>
> I would prefer detecting minimum kmalloc size in create_kmalloc_caches()
> in runtime instead of changing behavior of new_kmalloc_cache().
That was my initial attempt but we have a couple of
create_kmalloc_cache() (not *_caches) calls directly, one of them in
mm/slab.c kmem_cache_init(). So I wanted all the minalign logic in a
single place, hence I replaced the explicit create_kmalloc_cache() call
with new_kmalloc_cache(). See this patch and patch 9 for some clean-up.
--
Catalin
On Thu, Apr 07, 2022 at 09:50:23AM +0100, Catalin Marinas wrote:
> On Thu, Apr 07, 2022 at 03:46:37AM +0000, Hyeonggon Yoo wrote:
> > On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> > > --- a/mm/slab_common.c
> > > +++ b/mm/slab_common.c
> > > @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
> > > }
> > > }
> > >
> > > -static void __init
> > > +unsigned int __weak arch_kmalloc_minalign(void)
> > > +{
> > > + return ARCH_KMALLOC_MINALIGN;
> > > +}
> > > +
> >
> > As ARCH_KMALLOC_ALIGN and arch_kmalloc_minalign() may not be same after
> > patch 10, I think s/ARCH_KMALLOC_ALIGN/arch_kmalloc_minalign/g
> > for every user of it would be more correct?
>
> Not if the code currently using ARCH_KMALLOC_MINALIGN needs a constant.
> Yes, there probably are a few places where the code can cope with a
> dynamic arch_kmalloc_minalign() but there are two other cases where a
> constant is needed:
>
> 1. As a BUILD_BUG check because the code is storing some flags in the
> bottom bits of a pointer. A smaller ARCH_KMALLOC_MINALIGN works just
> fine here.
>
> 2. As a static alignment for DMA requirements. That's where the newly
> exposed ARCH_DMA_MINALIGN should be used.
>
> Note that this series doesn't make the situation any worse than before
> since ARCH_DMA_MINALIGN stays at 128 bytes for arm64. Current users can
> evolve to use a dynamic alignment in future patches. My main aim with
> this series is to be able to create kmalloc-64 caches on arm64.
AFAIK there are bunch of drivers that directly calls kmalloc().
It becomes tricky when e.g.) a driver allocates just 32 bytes,
but architecture requires it to be 128-byte aligned.
That's why everything allocated from kmalloc() need to be aligned in
ARCH_DMA_MINALIGN. And It's too hard to update all of drivers
that depends on this fact.
So I'm yet skeptical on decoupling ARCH_DMA/KMALLOC_MINALIGN.
Instead of decoupling it, I'm more into dynamically decreasing it.
Please kindly let me know If I'm missing something ;-)
> > > @@ -851,10 +860,17 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
> > > flags |= SLAB_ACCOUNT;
> > > }
> > >
> > > - kmalloc_caches[type][idx] = create_kmalloc_cache(
> > > - kmalloc_info[idx].name[type],
> > > - kmalloc_info[idx].size, flags, 0,
> > > - kmalloc_info[idx].size);
> > > + if (minalign > ARCH_KMALLOC_MINALIGN) {
> > > + aligned_size = ALIGN(aligned_size, minalign);
> > > + aligned_idx = __kmalloc_index(aligned_size, false);
> > > + }
> > > +
> > > + if (!kmalloc_caches[type][aligned_idx])
> > > + kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
> > > + kmalloc_info[aligned_idx].name[type],
> > > + aligned_size, flags, 0, aligned_size);
> > > + if (idx != aligned_idx)
> > > + kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
> >
> > I would prefer detecting minimum kmalloc size in create_kmalloc_caches()
> > in runtime instead of changing behavior of new_kmalloc_cache().
>
> That was my initial attempt but we have a couple of
> create_kmalloc_cache() (not *_caches) calls directly, one of them in
> mm/slab.c kmem_cache_init(). So I wanted all the minalign logic in a
> single place, hence I replaced the explicit create_kmalloc_cache() call
> with new_kmalloc_cache(). See this patch and patch 9 for some clean-up.
>
> --
> Catalin
--
Thanks,
Hyeonggon
On Thu, Apr 07, 2022 at 10:35:04AM +0100, Catalin Marinas wrote:
> On Thu, Apr 07, 2022 at 06:18:16PM +0900, Hyeonggon Yoo wrote:
> > On Thu, Apr 07, 2022 at 09:50:23AM +0100, Catalin Marinas wrote:
> > > On Thu, Apr 07, 2022 at 03:46:37AM +0000, Hyeonggon Yoo wrote:
> > > > On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> > > > > --- a/mm/slab_common.c
> > > > > +++ b/mm/slab_common.c
> > > > > @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
> > > > > }
> > > > > }
> > > > >
> > > > > -static void __init
> > > > > +unsigned int __weak arch_kmalloc_minalign(void)
> > > > > +{
> > > > > + return ARCH_KMALLOC_MINALIGN;
> > > > > +}
> > > > > +
> > > >
> > > > As ARCH_KMALLOC_ALIGN and arch_kmalloc_minalign() may not be same after
> > > > patch 10, I think s/ARCH_KMALLOC_ALIGN/arch_kmalloc_minalign/g
> > > > for every user of it would be more correct?
> > >
> > > Not if the code currently using ARCH_KMALLOC_MINALIGN needs a constant.
> > > Yes, there probably are a few places where the code can cope with a
> > > dynamic arch_kmalloc_minalign() but there are two other cases where a
> > > constant is needed:
> > >
> > > 1. As a BUILD_BUG check because the code is storing some flags in the
> > > bottom bits of a pointer. A smaller ARCH_KMALLOC_MINALIGN works just
> > > fine here.
> > >
> > > 2. As a static alignment for DMA requirements. That's where the newly
> > > exposed ARCH_DMA_MINALIGN should be used.
> > >
> > > Note that this series doesn't make the situation any worse than before
> > > since ARCH_DMA_MINALIGN stays at 128 bytes for arm64. Current users can
> > > evolve to use a dynamic alignment in future patches. My main aim with
> > > this series is to be able to create kmalloc-64 caches on arm64.
> >
> > AFAIK there are bunch of drivers that directly calls kmalloc().
>
> Well, lots of drivers call kmalloc() ;).
>
> > It becomes tricky when e.g.) a driver allocates just 32 bytes,
> > but architecture requires it to be 128-byte aligned.
>
> That's the current behaviour, a 32 byte allocation would return an
> object from kmalloc-128. I want to reduce this to at least kmalloc-64
> (or smaller) if the CPU/SoC allows it.
Yeah I agree the change is worth :) Thanks for the work.
> > That's why everything allocated from kmalloc() need to be aligned in
> > ARCH_DMA_MINALIGN.
>
> I don't get your conclusion here. Would you mind explaining?
What I wanted to say was that, why ARCH_DMA_MINALIGN should be
different from ARCH_KMALLOC_MINALIGN.
I thought the two were basically same thing. Instead of
decoupling them, I thought just decreasing them in runtime makes more sense.
> > So I'm yet skeptical on decoupling ARCH_DMA/KMALLOC_MINALIGN. Instead
> > of decoupling it, I'm more into dynamically decreasing it.
>
> The reason for decoupling is mostly that there are some static uses of
> ARCH_KMALLOC_MINALIGN as per point 1 above. The other is the
> __assume_kmalloc_alignment attribute. We shouldn't have such assumed
> alignment larger than what a dynamic kmalloc() would return. To me it
> makes a lot more sense for ARCH_KMALLOC_MINALIGN to be the minimum
> guaranteed in a kernel build but kmalloc() returning a larger alignment
> at run-time than the other way around.
But yeah, considering the problems you mentioned, it seems unavoidable
to decouple them.
Thank you for explanation and I will review slab part soon.
> Thanks.
>
> --
> Catalin
--
Thanks,
Hyeonggon
On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> ARCH_KMALLOC_MINALIGN represents the minimum guaranteed kmalloc()
> alignment but an architecture may require a larger run-time alignment.
> Do not create kmalloc caches smaller than arch_kmalloc_minalign().
>
> Signed-off-by: Catalin Marinas <[email protected]>
> Cc: Andrew Morton <[email protected]>
> ---
> include/linux/slab.h | 2 ++
> mm/slab.c | 6 +-----
> mm/slab.h | 2 ++
> mm/slab_common.c | 33 +++++++++++++++++++++++----------
> 4 files changed, 28 insertions(+), 15 deletions(-)
>
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index d58211bdeceb..2137dba85691 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -332,6 +332,8 @@ enum kmalloc_cache_type {
> extern struct kmem_cache *
> kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
>
> +unsigned int arch_kmalloc_minalign(void);
> +
> /*
> * Define gfp bits that should not be set for KMALLOC_NORMAL.
> */
> diff --git a/mm/slab.c b/mm/slab.c
> index b04e40078bdf..4aaeeb9c994d 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -1256,11 +1256,7 @@ void __init kmem_cache_init(void)
> * Initialize the caches that provide memory for the kmem_cache_node
> * structures first. Without this, further allocations will bug.
> */
> - kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache(
> - kmalloc_info[INDEX_NODE].name[KMALLOC_NORMAL],
> - kmalloc_info[INDEX_NODE].size,
> - ARCH_KMALLOC_FLAGS, 0,
> - kmalloc_info[INDEX_NODE].size);
> + new_kmalloc_cache(INDEX_NODE, KMALLOC_NORMAL, ARCH_KMALLOC_FLAGS);
> slab_state = PARTIAL_NODE;
> setup_kmalloc_cache_index_table();
>
> diff --git a/mm/slab.h b/mm/slab.h
> index fd7ae2024897..e9238406602a 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -283,6 +283,8 @@ int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
> struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size,
> slab_flags_t flags, unsigned int useroffset,
> unsigned int usersize);
> +void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
> + slab_flags_t flags);
> extern void create_boot_cache(struct kmem_cache *, const char *name,
> unsigned int size, slab_flags_t flags,
> unsigned int useroffset, unsigned int usersize);
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 6ee64d6208b3..594d8a8a68d0 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
> }
> }
>
> -static void __init
> +unsigned int __weak arch_kmalloc_minalign(void)
> +{
> + return ARCH_KMALLOC_MINALIGN;
> +}
> +
> +void __init
> new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
> {
> + unsigned int minalign = arch_kmalloc_minalign();
> + unsigned int aligned_size = kmalloc_info[idx].size;
> + int aligned_idx = idx;
> +
> if (type == KMALLOC_RECLAIM) {
> flags |= SLAB_RECLAIM_ACCOUNT;
> } else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
> @@ -851,10 +860,17 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
> flags |= SLAB_ACCOUNT;
> }
>
> - kmalloc_caches[type][idx] = create_kmalloc_cache(
> - kmalloc_info[idx].name[type],
> - kmalloc_info[idx].size, flags, 0,
> - kmalloc_info[idx].size);
> + if (minalign > ARCH_KMALLOC_MINALIGN) {
> + aligned_size = ALIGN(aligned_size, minalign);
> + aligned_idx = __kmalloc_index(aligned_size, false);
> + }
> +
> + if (!kmalloc_caches[type][aligned_idx])
> + kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
> + kmalloc_info[aligned_idx].name[type],
> + aligned_size, flags, 0, aligned_size);
> + if (idx != aligned_idx)
> + kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
>
> /*
> * If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
> @@ -904,11 +920,8 @@ void __init create_kmalloc_caches(slab_flags_t flags)
> struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
>
> if (s) {
> - kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
> - kmalloc_info[i].name[KMALLOC_DMA],
> - kmalloc_info[i].size,
> - SLAB_CACHE_DMA | flags, 0,
> - kmalloc_info[i].size);
> + new_kmalloc_cache(i, KMALLOC_DMA,
> + SLAB_CACHE_DMA | flags);
> }
> }
> #endif
Looks good to me.
Reviewed-by: Hyeonggon Yoo <[email protected]>
And works fine with SLAB/SLUB/SLOB on my arm64 machine.
Tested-by: Hyeonggon Yoo <[email protected]>
Thanks!
--
Thanks,
Hyeonggon