alloc_cpumask_var_node() and friends allocate cpumask var dynamically
for CONFIG_CPUMASK_OFFSTACK=y kernel. The allocated size of cpumask var
is cpumask_size(), which is runtime constant after nr_cpu_ids is
freezed.
Create a dedicated kmem cache for dynamic allocation of cpumask var.
The window for creation of cache is somewhat narrow:
- After last update of nr_cpu_ids(via set_nr_cpu_ids())
- After kmem cache is available.
- Before any alloc_cpumask_var_node() invocations(sched_init() e.g).
Signed-off-by: Dawei Li <[email protected]>
---
include/linux/cpumask.h | 7 +++++++
init/main.c | 1 +
lib/cpumask.c | 15 +++++++++++++--
3 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 1c29947db848..2963155f1776 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -902,6 +902,8 @@ typedef struct cpumask *cpumask_var_t;
#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x)
#define __cpumask_var_read_mostly __read_mostly
+int __init cpumask_cache_init(void);
+
bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
static inline
@@ -949,6 +951,11 @@ typedef struct cpumask cpumask_var_t[1];
#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
#define __cpumask_var_read_mostly
+static inline int cpumask_cache_init(void)
+{
+ return 0;
+}
+
static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
return true;
diff --git a/init/main.c b/init/main.c
index 2ca52474d0c3..c7a20779aba1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -931,6 +931,7 @@ void start_kernel(void)
sort_main_extable();
trap_init();
mm_core_init();
+ cpumask_cache_init();
poking_init();
ftrace_init();
diff --git a/lib/cpumask.c b/lib/cpumask.c
index e77ee9d46f71..f3f68c45caba 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -57,9 +57,20 @@ EXPORT_SYMBOL(cpumask_next_wrap);
* CONFIG_CPUMASK_OFFSTACK=n, so does code elimination in that case
* too.
*/
+
+static struct kmem_cache *cpumask_cache __ro_after_init;
+
+int __init cpumask_cache_init(void)
+{
+ cpumask_cache = kmem_cache_create("cpumask", cpumask_size(), sizeof(long),
+ SLAB_HWCACHE_ALIGN, NULL);
+
+ return cpumask_cache ? 0 : -ENOMEM;
+}
+
bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
{
- *mask = kmalloc_node(cpumask_size(), flags, node);
+ *mask = kmem_cache_alloc_node(cpumask_cache, flags, node);
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
if (!*mask) {
@@ -97,7 +108,7 @@ void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
*/
void free_cpumask_var(cpumask_var_t mask)
{
- kfree(mask);
+ kmem_cache_free(cpumask_cache, mask);
}
EXPORT_SYMBOL(free_cpumask_var);
--
2.27.0
On 19/03/2024 13.24, Dawei Li wrote:
> alloc_cpumask_var_node() and friends allocate cpumask var dynamically
> for CONFIG_CPUMASK_OFFSTACK=y kernel. The allocated size of cpumask var
> is cpumask_size(), which is runtime constant after nr_cpu_ids is
> freezed.
>
> Create a dedicated kmem cache for dynamic allocation of cpumask var.
Why?
> The window for creation of cache is somewhat narrow:
> - After last update of nr_cpu_ids(via set_nr_cpu_ids())
> - After kmem cache is available.
> - Before any alloc_cpumask_var_node() invocations(sched_init() e.g).
OK, so this sounds somewhat fragile. It's maybe correct, but I fail to
see what is gained by this, and the commit message does not provide any
hints.
Rasmus
On Wed, Mar 20, 2024 at 10:03:02AM +0100, Rasmus Villemoes wrote:
> On 19/03/2024 13.24, Dawei Li wrote:
> > alloc_cpumask_var_node() and friends allocate cpumask var dynamically
> > for CONFIG_CPUMASK_OFFSTACK=y kernel. The allocated size of cpumask var
> > is cpumask_size(), which is runtime constant after nr_cpu_ids is
> > freezed.
> >
> > Create a dedicated kmem cache for dynamic allocation of cpumask var.
>
> Why?
Hi Dawei,
Agree with Rasmus. CPUMASK_OFFSTACK=y is quite a rare configuration,
normally disabled. Can you show the architecture that you're working
with and how the cache you're adding affects performance.
> > The window for creation of cache is somewhat narrow:
> > - After last update of nr_cpu_ids(via set_nr_cpu_ids())
> > - After kmem cache is available.
> > - Before any alloc_cpumask_var_node() invocations(sched_init() e.g).
Not only narrow but also not uniform across platforms. For example,
on XEN xen_smp_count_cpus() may adjust nr_cpu_ids. I don't think that
people runn XEN with CPUMASK_OFFSTACK=y, but you have to make sure
that your cache is always created before the 1st allocation.
> OK, so this sounds somewhat fragile. It's maybe correct, but I fail to
> see what is gained by this, and the commit message does not provide any
> hints.
Agree. To make it less vulnerable, you have to enforce something like:
bool cpumask_cache_used = false;
static inline void set_nr_cpu_ids(unsigned int nr)
{
if (WARN_ON(cpumask_cache_used))
return;
nr_cpu_ids = nr;
cpumask_cache_destroy();
cpumask_cache_init()
}
bool alloc_cpumask_var_node()
{
cpumask_cache_used = true;
*mask = kmalloc_node(cpumask_size(), flags, node);
...
}
But at the very first, we need to understand under which scenarios the
new cache would benefit performance?
Thnaks,
Yury
benefits performance