2024-03-12 17:43:36

by Saurabh Sengar

[permalink] [raw]
Subject: [PATCH] x86/numa: Map NUMA node to CPUs as per DeviceTree

Currently for DeviceTree bootup, x86 code does the default mapping of
CPUs to NUMA, which is wrong. This can cause incorrect mapping and WARN
on a SMT enabled system like below:

[0.417551] ------------[ cut here ]------------
[0.417551] Saurabh sched: CPU #1's smt-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency.
[0.417551] WARNING: CPU: 1 PID: 0 at topology_sane.isra.0+0x5c/0x6d
[0.417551] Modules linked in:
[0.417551] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.1.71-microsoft-hcl+ #4
[0.417551] RIP: 0010:topology_sane.isra.0+0x5c/0x6d
[0.417551] Code: 41 39 dc 74 27 80 3d 32 ae 2d 00 00 75 1e 41 89 d9 45 89 e0 44 89 d6 48 c7 c7 00 a6 4a 88 c6 05 19 ae 2d 00 01 e8 6e 1f cb ff <0f> 0b 41 39 dc 5b 41 5c 0f 94 c0 5d c3 cc cc cc cc 55 48 8b 05 05
[0.417551] RSP: 0000:ffffc9000013feb0 EFLAGS: 00010086
[0.417551] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[0.417551] RDX: 0000000000000003 RSI: 0000000000000086 RDI: 00000000ffffffff
[0.417551] RBP: ffffc9000013fec0 R08: ffffffff88778160 R09: ffffffff88778160
[0.417551] R10: ffff888227fe26da R11: ffff888227fe26c1 R12: 0000000000000001
[0.417551] R13: 0000000000000000 R14: ffff888216415040 R15: 0000000000000000
[0.417551] FS: 0000000000000000(0000) GS:ffff888216400000(0000) knlGS:0000000000000000
[0.417551] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[0.417551] CR2: 0000000000000000 CR3: 0000000208809001 CR4: 0000000000330ea0
[0.417551] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[0.417551] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
[0.417551] Call Trace:
[0.417551] <TASK>
[0.417551] ? show_regs.cold+0x1a/0x1f
[0.417551] ? __warn+0x6e/0xc0
[0.417551] ? report_bug+0x101/0x1a0
[0.417551] ? handle_bug+0x40/0x70
[0.417551] ? exc_invalid_op+0x19/0x70
[0.417551] ? asm_exc_invalid_op+0x1b/0x20
[0.417551] ? topology_sane.isra.0+0x5c/0x6d
[0.417551] match_smt+0xf6/0xfc
[0.417551] set_cpu_sibling_map.cold+0x24f/0x512
[0.417551] start_secondary+0x5c/0x110
[0.417551] secondary_startup_64_no_verify+0xcd/0xdb
[0.417551] </TASK>
[0.417551] ---[ end trace 0000000000000000 ]---

Add the correct mapping of CPUs to NUMA node as per DeviceTree to fix
this issue.

Signed-off-by: Saurabh Sengar <[email protected]>
---
arch/x86/mm/numa.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 65e9a6e..9dacf60 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -601,6 +601,23 @@ static void __init numa_init_array(void)
}
}

+static void __init of_parse_and_init_cpus(void)
+{
+ struct device_node *dn;
+ int cpuid = 0;
+ int nid;
+
+ for_each_of_cpu_node(dn) {
+ if (cpuid >= NR_CPUS) {
+ pr_warn("NR_CPUS too small for %d cpuid\n", cpuid);
+ return;
+ }
+ nid = of_node_to_nid(dn);
+ numa_set_node(cpuid, nid);
+ cpuid++;
+ }
+}
+
static int __init numa_init(int (*init_func)(void))
{
int i;
@@ -645,6 +662,9 @@ static int __init numa_init(int (*init_func)(void))
if (ret < 0)
return ret;

+ if (acpi_disabled)
+ of_parse_and_init_cpus();
+
for (i = 0; i < nr_cpu_ids; i++) {
int nid = early_cpu_to_node(i);

--
1.8.3.1



2024-03-24 14:59:47

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] x86/numa: Map NUMA node to CPUs as per DeviceTree

On Tue, Mar 12 2024 at 10:43, Saurabh Sengar wrote:
> Currently for DeviceTree bootup, x86 code does the default mapping of
> CPUs to NUMA, which is wrong. This can cause incorrect mapping and WARN
> on a SMT enabled system like below:
>
> [0.417551] ------------[ cut here ]------------
> [0.417551] Saurabh sched: CPU #1's smt-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency.
> [0.417551] WARNING: CPU: 1 PID: 0 at topology_sane.isra.0+0x5c/0x6d
> [0.417551] Modules linked in:
> [0.417551] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.1.71-microsoft-hcl+ #4
> [0.417551] RIP: 0010:topology_sane.isra.0+0x5c/0x6d
> [0.417551] Code: 41 39 dc 74 27 80 3d 32 ae 2d 00 00 75 1e 41 89 d9 45 89 e0 44 89 d6 48 c7 c7 00 a6 4a 88 c6 05 19 ae 2d 00 01 e8 6e 1f cb ff <0f> 0b 41 39 dc 5b 41 5c 0f 94 c0 5d c3 cc cc cc cc 55 48 8b 05 05
> [0.417551] RSP: 0000:ffffc9000013feb0 EFLAGS: 00010086
> [0.417551] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
> [0.417551] RDX: 0000000000000003 RSI: 0000000000000086 RDI: 00000000ffffffff
> [0.417551] RBP: ffffc9000013fec0 R08: ffffffff88778160 R09: ffffffff88778160
> [0.417551] R10: ffff888227fe26da R11: ffff888227fe26c1 R12: 0000000000000001
> [0.417551] R13: 0000000000000000 R14: ffff888216415040 R15: 0000000000000000
> [0.417551] FS: 0000000000000000(0000) GS:ffff888216400000(0000) knlGS:0000000000000000
> [0.417551] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [0.417551] CR2: 0000000000000000 CR3: 0000000208809001 CR4: 0000000000330ea0
> [0.417551] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [0.417551] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
> [0.417551] Call Trace:
> [0.417551] <TASK>
> [0.417551] ? show_regs.cold+0x1a/0x1f
> [0.417551] ? __warn+0x6e/0xc0
> [0.417551] ? report_bug+0x101/0x1a0
> [0.417551] ? handle_bug+0x40/0x70
> [0.417551] ? exc_invalid_op+0x19/0x70
> [0.417551] ? asm_exc_invalid_op+0x1b/0x20
> [0.417551] ? topology_sane.isra.0+0x5c/0x6d
> [0.417551] match_smt+0xf6/0xfc
> [0.417551] set_cpu_sibling_map.cold+0x24f/0x512
> [0.417551] start_secondary+0x5c/0x110
> [0.417551] secondary_startup_64_no_verify+0xcd/0xdb
> [0.417551] </TASK>
> [0.417551] ---[ end trace 0000000000000000 ]---

Can you please trim the backtrace like documented. 95% of the pasted
information above is completely irrelevant to understand the issue.

CPU #1's smt-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency.
WARNING: CPU: 1 PID: 0 at topology_sane.isra.0+0x5c/0x6d
match_smt+0xf6/0xfc
set_cpu_sibling_map.cold+0x24f/0x512
start_secondary+0x5c/0x110

is more than sufficient, no?

> +static void __init of_parse_and_init_cpus(void)
> +{
> + struct device_node *dn;
> + int cpuid = 0;
> + int nid;
> +
> + for_each_of_cpu_node(dn) {
> + if (cpuid >= NR_CPUS) {

This condition is wrong. nr_cpu_ids != NR_CPUS.

> + pr_warn("NR_CPUS too small for %d cpuid\n", cpuid);

What's this for? The APIC enumeration code warns about this already.

> + return;
> + }
> + nid = of_node_to_nid(dn);
> + numa_set_node(cpuid, nid);
> + cpuid++;
> + }
> +}
> +
> static int __init numa_init(int (*init_func)(void))
> {
> int i;
> @@ -645,6 +662,9 @@ static int __init numa_init(int (*init_func)(void))
> if (ret < 0)
> return ret;
>
> + if (acpi_disabled)
> + of_parse_and_init_cpus();

numa_init() is invoked from x86_numa_init() with the various NUMA init
functions as argument and x86_numa_init() already has OF NUMA logic:

#ifdef CONFIG_ACPI_NUMA
if (!numa_init(x86_acpi_numa_init))
return;
#endif
#ifdef CONFIG_AMD_NUMA
if (!numa_init(amd_numa_init))
return;
#endif
if (acpi_disabled && !numa_init(of_numa_init))
return;

of_numa_init() does not do the numa_set_node() part, but that's not a
justification to glue this into numa_init() which is firmware
independent except for the firmware specific callback argument.

Also x86_numa_init() is invoked _before_ the APIC ID to Linux CPU number
association happens, so doing the CPU number to node mapping at this
point is just wrong for any CPU number != 0.

It "works" for OF just by chance as the actual APIC enumeration which
associates Linux CPU numbers works in the same order, but that does not
make it correct in any way.

x86_acpi_numa_init() and amd_numa_init() set up the nodes like
of_numa_init() does and aside of that save the APIC ID to node mapping.

Aside of that the numa_set_node() variant happens to "work" for Intel
CPUs as srat_detect_node() falls back to cpu_to_node() when
numa_cpu_node() returns NO_NUMA_NODE.

Though the AMD variant falls back to cpu_info.topo.llc_id which is not
necessarily the same result as what the device tree enumerated.

I can see why you glued it into numa_init():

of_node_to_nid() requires node_possible_map to be initialized, which
happens in numa_register_memblks() invoked from numa_init() if the
firmware specific callback which enumerates the nodes was successful.

Of course the change log is silent about this.

But there is no reason to scan this right in numa_init() as nothing
needs the information to be set at this point, unless I'm missing
something obscure, which might be the case when staring at this NUMA
enumeration maze.

The CPU to node mapping based on the APIC ID to node mapping happens in
init_cpu_to_node() which is after the APIC enumeration and the
finalizing of cpu_possible_map completed. At this point the CPU number
to APIC ID mapping is stable.

So the uncompiled below should just work, no?

Thanks,

tglx
---
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -24,6 +24,7 @@
#include <asm/pci_x86.h>
#include <asm/setup.h>
#include <asm/i8259.h>
+#include <asm/numa.h>
#include <asm/prom.h>

__initdata u64 initial_dtb;
@@ -137,6 +138,7 @@ static void __init dtb_cpu_setup(void)
continue;
}
topology_register_apic(apic_id, CPU_ACPIID_INVALID, true);
+ set_apicid_to_node(apic_id, of_node_to_nid(dn));
}
}


2024-03-24 16:28:48

by Saurabh Sengar

[permalink] [raw]
Subject: Re: [PATCH] x86/numa: Map NUMA node to CPUs as per DeviceTree

On Sun, Mar 24, 2024 at 03:59:27PM +0100, Thomas Gleixner wrote:
> On Tue, Mar 12 2024 at 10:43, Saurabh Sengar wrote:
> > Currently for DeviceTree bootup, x86 code does the default mapping of
> > CPUs to NUMA, which is wrong. This can cause incorrect mapping and WARN
> > on a SMT enabled system like below:
> >
> > [0.417551] ------------[ cut here ]------------
> > [0.417551] Saurabh sched: CPU #1's smt-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency.
> > [0.417551] WARNING: CPU: 1 PID: 0 at topology_sane.isra.0+0x5c/0x6d
> > [0.417551] Modules linked in:
> > [0.417551] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.1.71-microsoft-hcl+ #4
> > [0.417551] RIP: 0010:topology_sane.isra.0+0x5c/0x6d
> > [0.417551] Code: 41 39 dc 74 27 80 3d 32 ae 2d 00 00 75 1e 41 89 d9 45 89 e0 44 89 d6 48 c7 c7 00 a6 4a 88 c6 05 19 ae 2d 00 01 e8 6e 1f cb ff <0f> 0b 41 39 dc 5b 41 5c 0f 94 c0 5d c3 cc cc cc cc 55 48 8b 05 05
> > [0.417551] RSP: 0000:ffffc9000013feb0 EFLAGS: 00010086
> > [0.417551] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
> > [0.417551] RDX: 0000000000000003 RSI: 0000000000000086 RDI: 00000000ffffffff
> > [0.417551] RBP: ffffc9000013fec0 R08: ffffffff88778160 R09: ffffffff88778160
> > [0.417551] R10: ffff888227fe26da R11: ffff888227fe26c1 R12: 0000000000000001
> > [0.417551] R13: 0000000000000000 R14: ffff888216415040 R15: 0000000000000000
> > [0.417551] FS: 0000000000000000(0000) GS:ffff888216400000(0000) knlGS:0000000000000000
> > [0.417551] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [0.417551] CR2: 0000000000000000 CR3: 0000000208809001 CR4: 0000000000330ea0
> > [0.417551] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > [0.417551] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
> > [0.417551] Call Trace:
> > [0.417551] <TASK>
> > [0.417551] ? show_regs.cold+0x1a/0x1f
> > [0.417551] ? __warn+0x6e/0xc0
> > [0.417551] ? report_bug+0x101/0x1a0
> > [0.417551] ? handle_bug+0x40/0x70
> > [0.417551] ? exc_invalid_op+0x19/0x70
> > [0.417551] ? asm_exc_invalid_op+0x1b/0x20
> > [0.417551] ? topology_sane.isra.0+0x5c/0x6d
> > [0.417551] match_smt+0xf6/0xfc
> > [0.417551] set_cpu_sibling_map.cold+0x24f/0x512
> > [0.417551] start_secondary+0x5c/0x110
> > [0.417551] secondary_startup_64_no_verify+0xcd/0xdb
> > [0.417551] </TASK>
> > [0.417551] ---[ end trace 0000000000000000 ]---
>
> Can you please trim the backtrace like documented. 95% of the pasted
> information above is completely irrelevant to understand the issue.
>
> CPU #1's smt-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency.
> WARNING: CPU: 1 PID: 0 at topology_sane.isra.0+0x5c/0x6d
> match_smt+0xf6/0xfc
> set_cpu_sibling_map.cold+0x24f/0x512
> start_secondary+0x5c/0x110
>
> is more than sufficient, no?
>
> > +static void __init of_parse_and_init_cpus(void)
> > +{
> > + struct device_node *dn;
> > + int cpuid = 0;
> > + int nid;
> > +
> > + for_each_of_cpu_node(dn) {
> > + if (cpuid >= NR_CPUS) {
>
> This condition is wrong. nr_cpu_ids != NR_CPUS.
>
> > + pr_warn("NR_CPUS too small for %d cpuid\n", cpuid);
>
> What's this for? The APIC enumeration code warns about this already.
>
> > + return;
> > + }
> > + nid = of_node_to_nid(dn);
> > + numa_set_node(cpuid, nid);
> > + cpuid++;
> > + }
> > +}
> > +
> > static int __init numa_init(int (*init_func)(void))
> > {
> > int i;
> > @@ -645,6 +662,9 @@ static int __init numa_init(int (*init_func)(void))
> > if (ret < 0)
> > return ret;
> >
> > + if (acpi_disabled)
> > + of_parse_and_init_cpus();
>
> numa_init() is invoked from x86_numa_init() with the various NUMA init
> functions as argument and x86_numa_init() already has OF NUMA logic:
>
> #ifdef CONFIG_ACPI_NUMA
> if (!numa_init(x86_acpi_numa_init))
> return;
> #endif
> #ifdef CONFIG_AMD_NUMA
> if (!numa_init(amd_numa_init))
> return;
> #endif
> if (acpi_disabled && !numa_init(of_numa_init))
> return;
>
> of_numa_init() does not do the numa_set_node() part, but that's not a
> justification to glue this into numa_init() which is firmware
> independent except for the firmware specific callback argument.
>
> Also x86_numa_init() is invoked _before_ the APIC ID to Linux CPU number
> association happens, so doing the CPU number to node mapping at this
> point is just wrong for any CPU number != 0.
>
> It "works" for OF just by chance as the actual APIC enumeration which
> associates Linux CPU numbers works in the same order, but that does not
> make it correct in any way.
>
> x86_acpi_numa_init() and amd_numa_init() set up the nodes like
> of_numa_init() does and aside of that save the APIC ID to node mapping.
>
> Aside of that the numa_set_node() variant happens to "work" for Intel
> CPUs as srat_detect_node() falls back to cpu_to_node() when
> numa_cpu_node() returns NO_NUMA_NODE.
>
> Though the AMD variant falls back to cpu_info.topo.llc_id which is not
> necessarily the same result as what the device tree enumerated.
>
> I can see why you glued it into numa_init():
>
> of_node_to_nid() requires node_possible_map to be initialized, which
> happens in numa_register_memblks() invoked from numa_init() if the
> firmware specific callback which enumerates the nodes was successful.
>
> Of course the change log is silent about this.
>
> But there is no reason to scan this right in numa_init() as nothing
> needs the information to be set at this point, unless I'm missing
> something obscure, which might be the case when staring at this NUMA
> enumeration maze.
>
> The CPU to node mapping based on the APIC ID to node mapping happens in
> init_cpu_to_node() which is after the APIC enumeration and the
> finalizing of cpu_possible_map completed. At this point the CPU number
> to APIC ID mapping is stable.
>
> So the uncompiled below should just work, no?
>
> Thanks,
>
> tglx
> ---
> --- a/arch/x86/kernel/devicetree.c
> +++ b/arch/x86/kernel/devicetree.c
> @@ -24,6 +24,7 @@
> #include <asm/pci_x86.h>
> #include <asm/setup.h>
> #include <asm/i8259.h>
> +#include <asm/numa.h>
> #include <asm/prom.h>
>
> __initdata u64 initial_dtb;
> @@ -137,6 +138,7 @@ static void __init dtb_cpu_setup(void)
> continue;
> }
> topology_register_apic(apic_id, CPU_ACPIID_INVALID, true);
> + set_apicid_to_node(apic_id, of_node_to_nid(dn));
> }
> }
>

Thanks for your suggestion. I use this approach because ARM64 and riscv
platforms were having a function of_parse_and_init_cpus to do the same.
But I agree in x86 DeviceTree is handled differently, and we can restrict
the DT related code from numa_init. I will look in to making this approach
work for our platform and send the new patch.

Few thoughts related to recent change wrt removal of x86_dtb_init:

I recognize that due to recent changes, each dtb platform will now need to set
a pointer for x86_init.mpparse.early_parse_smp_cfg to get the dtb_cpu_setup
executed. This was not the requirement before because earlier x86_dtb_init was
anyway getting called. Do you think we should improve this as well by setting
x86_init.mpparse.early_parse_smp_cfg to x86_dtb_parse_smp_config for all the
dtb platforms by default.

I see the ce4100 platform is setting the parse_smp_cfg, shouldn't the
early_parse_smp_cfg be more accurate there ?

Let me know if my understanding is correct, I can accommodate these improvements
as well in my patch series.

- Saurabh

2024-03-25 16:36:49

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] x86/numa: Map NUMA node to CPUs as per DeviceTree

On Sun, Mar 24 2024 at 09:28, Saurabh Singh Sengar wrote:
> I recognize that due to recent changes, each dtb platform will now need to set
> a pointer for x86_init.mpparse.early_parse_smp_cfg to get the dtb_cpu_setup
> executed.

No. DT does not need the early parse call. The early parse call _cannot_
enumerate APICs.

> This was not the requirement before because earlier x86_dtb_init was
> anyway getting called.

For the wrong reasons.

> Do you think we should improve this as well by setting
> x86_init.mpparse.early_parse_smp_cfg to x86_dtb_parse_smp_config for all the
> dtb platforms by default.

No.

> I see the ce4100 platform is setting the parse_smp_cfg, shouldn't the
> early_parse_smp_cfg be more accurate there ?

Again. No. Early is not the point where APICs can be enumerated.

What we can do is the below.

Thanks,

tglx
---
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -24,18 +24,15 @@ extern u64 initial_dtb;
extern void add_dtb(u64 data);
void x86_of_pci_init(void);
void x86_dtb_parse_smp_config(void);
+void x86_flattree_get_config(void);
#else
static inline void add_dtb(u64 data) { }
static inline void x86_of_pci_init(void) { }
static inline void x86_dtb_parse_smp_config(void) { }
+static inline void x86_flattree_get_config(void) { }
#define of_ioapic 0
#endif

-#ifdef CONFIG_OF_EARLY_FLATTREE
-void x86_flattree_get_config(void);
-#else
-static inline void x86_flattree_get_config(void) { }
-#endif
extern char cmd_line[COMMAND_LINE_SIZE];

#endif /* __ASSEMBLY__ */
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -277,9 +277,9 @@ static void __init dtb_apic_setup(void)
dtb_ioapic_setup();
}

-#ifdef CONFIG_OF_EARLY_FLATTREE
void __init x86_flattree_get_config(void)
{
+#ifdef CONFIG_OF_EARLY_FLATTREE
u32 size, map_len;
void *dt;

@@ -301,8 +301,10 @@ void __init x86_flattree_get_config(void

if (initial_dtb)
early_memunmap(dt, map_len);
-}
#endif
+ if (of_have_populated_dt())
+ x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config;
+}

void __init x86_dtb_parse_smp_config(void)
{

2024-03-25 17:39:08

by Saurabh Sengar

[permalink] [raw]
Subject: Re: [PATCH] x86/numa: Map NUMA node to CPUs as per DeviceTree

On Mon, Mar 25, 2024 at 03:42:32PM +0100, Thomas Gleixner wrote:
> On Sun, Mar 24 2024 at 09:28, Saurabh Singh Sengar wrote:
> > I recognize that due to recent changes, each dtb platform will now need to set
> > a pointer for x86_init.mpparse.early_parse_smp_cfg to get the dtb_cpu_setup
> > executed.
>
> No. DT does not need the early parse call. The early parse call _cannot_
> enumerate APICs.
>
> > This was not the requirement before because earlier x86_dtb_init was
> > anyway getting called.
>
> For the wrong reasons.
>
> > Do you think we should improve this as well by setting
> > x86_init.mpparse.early_parse_smp_cfg to x86_dtb_parse_smp_config for all the
> > dtb platforms by default.
>
> No.
>
> > I see the ce4100 platform is setting the parse_smp_cfg, shouldn't the
> > early_parse_smp_cfg be more accurate there ?
>
> Again. No. Early is not the point where APICs can be enumerated.
>
> What we can do is the below.
>
> Thanks,
>
> tglx
> ---
> --- a/arch/x86/include/asm/prom.h
> +++ b/arch/x86/include/asm/prom.h
> @@ -24,18 +24,15 @@ extern u64 initial_dtb;
> extern void add_dtb(u64 data);
> void x86_of_pci_init(void);
> void x86_dtb_parse_smp_config(void);
> +void x86_flattree_get_config(void);
> #else
> static inline void add_dtb(u64 data) { }
> static inline void x86_of_pci_init(void) { }
> static inline void x86_dtb_parse_smp_config(void) { }
> +static inline void x86_flattree_get_config(void) { }
> #define of_ioapic 0
> #endif
>
> -#ifdef CONFIG_OF_EARLY_FLATTREE
> -void x86_flattree_get_config(void);
> -#else
> -static inline void x86_flattree_get_config(void) { }
> -#endif
> extern char cmd_line[COMMAND_LINE_SIZE];
>
> #endif /* __ASSEMBLY__ */
> --- a/arch/x86/kernel/devicetree.c
> +++ b/arch/x86/kernel/devicetree.c
> @@ -277,9 +277,9 @@ static void __init dtb_apic_setup(void)
> dtb_ioapic_setup();
> }
>
> -#ifdef CONFIG_OF_EARLY_FLATTREE
> void __init x86_flattree_get_config(void)
> {
> +#ifdef CONFIG_OF_EARLY_FLATTREE
> u32 size, map_len;
> void *dt;
>
> @@ -301,8 +301,10 @@ void __init x86_flattree_get_config(void
>
> if (initial_dtb)
> early_memunmap(dt, map_len);
> -}
> #endif
> + if (of_have_populated_dt())
> + x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config;
> +}
>
> void __init x86_dtb_parse_smp_config(void)
> {

Thanks for suggestion, this will fix for all the x86 dtb platforms at once.
I wonder if there will be any user of x86_dtb_parse_smp_config outside
of this file after this change. Possibly we can make it static ?

I can send the v2, however if you prefer pushing all these changes yourself
please let me know.

- Saurabh

2024-03-26 00:14:55

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] x86/numa: Map NUMA node to CPUs as per DeviceTree

On Mon, Mar 25 2024 at 09:24, Saurabh Singh Sengar wrote:
> On Mon, Mar 25, 2024 at 03:42:32PM +0100, Thomas Gleixner wrote:
> Thanks for suggestion, this will fix for all the x86 dtb platforms at once.
> I wonder if there will be any user of x86_dtb_parse_smp_config outside
> of this file after this change. Possibly we can make it static ?
>
> I can send the v2, however if you prefer pushing all these changes yourself
> please let me know.

It's a suggestion. Feel free to make it work and write the change logs
for it :)

Thanks,

tglx