Subject: [tip: x86/cpu] x86/cpu: Take NUMA node into account when allocating per-CPU cpumasks

The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: e0a9ac192fd62322b932c6018db60217b3ad866d
Gitweb: https://git.kernel.org/tip/e0a9ac192fd62322b932c6018db60217b3ad866d
Author: Li RongQing <[email protected]>
AuthorDate: Wed, 10 Apr 2024 11:01:14 +08:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Wed, 10 Apr 2024 06:55:31 +02:00

x86/cpu: Take NUMA node into account when allocating per-CPU cpumasks

per-CPU cpumasks are dominantly accessed from their own local CPUs,
so allocate them node-local to improve performance.

[ mingo: Rewrote the changelog. ]

Signed-off-by: Li RongQing <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/kernel/apic/x2apic_cluster.c | 3 ++-
arch/x86/kernel/smpboot.c | 13 +++++++------
2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 567dbd2..afbb885 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -183,7 +183,8 @@ static int x2apic_prepare_cpu(unsigned int cpu)

if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0)
return -ENOMEM;
- if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL))
+ if (!zalloc_cpumask_var_node(&per_cpu(ipi_mask, cpu), GFP_KERNEL,
+ cpu_to_node(cpu)))
return -ENOMEM;
return 0;
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 76bb650..536dad1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1033,7 +1033,7 @@ static __init void disable_smp(void)

void __init smp_prepare_cpus_common(void)
{
- unsigned int i;
+ unsigned int i, n;

/* Mark all except the boot CPU as hotpluggable */
for_each_possible_cpu(i) {
@@ -1042,11 +1042,12 @@ void __init smp_prepare_cpus_common(void)
}

for_each_possible_cpu(i) {
- zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL);
+ n = cpu_to_node(i);
+ zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, i), GFP_KERNEL, n);
+ zalloc_cpumask_var_node(&per_cpu(cpu_core_map, i), GFP_KERNEL, n);
+ zalloc_cpumask_var_node(&per_cpu(cpu_die_map, i), GFP_KERNEL, n);
+ zalloc_cpumask_var_node(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL, n);
+ zalloc_cpumask_var_node(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL, n);
}

set_cpu_sibling_map(0);