2024-04-11 01:05:36

by Tiezhu Yang

[permalink] [raw]
Subject: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c

Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
actually used, this is not reasonable and does not work as intended.

Let us select HAVE_SETUP_PER_CPU_AREA unconditionally and then move
CONFIG_HAVE_SETUP_PER_CPU_AREA related code from numa.c to smp.c to
avoid this problem.

Signed-off-by: Tiezhu Yang <[email protected]>
---
arch/loongarch/Kconfig | 2 +-
arch/loongarch/kernel/numa.c | 58 -----------------------------------
arch/loongarch/kernel/smp.c | 59 ++++++++++++++++++++++++++++++++++++
3 files changed, 60 insertions(+), 59 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index a5f300ec6f28..64052ae2c2af 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -151,7 +151,7 @@ config LOONGARCH
select HAVE_RUST
select HAVE_SAMPLE_FTRACE_DIRECT
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
- select HAVE_SETUP_PER_CPU_AREA if NUMA
+ select HAVE_SETUP_PER_CPU_AREA
select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index 8fe21f868f72..49dc1d932ce2 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -48,64 +48,6 @@ EXPORT_SYMBOL(__cpuid_to_node);

nodemask_t numa_nodes_parsed __initdata;

-#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static int __init pcpu_cpu_to_node(int cpu)
-{
- return early_cpu_to_node(cpu);
-}
-
-static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
-{
- if (early_cpu_to_node(from) == early_cpu_to_node(to))
- return LOCAL_DISTANCE;
- else
- return REMOTE_DISTANCE;
-}
-
-void __init pcpu_populate_pte(unsigned long addr)
-{
- populate_kernel_pte(addr);
-}
-
-void __init setup_per_cpu_areas(void)
-{
- unsigned long delta;
- unsigned int cpu;
- int rc = -EINVAL;
-
- if (pcpu_chosen_fc == PCPU_FC_AUTO) {
- if (nr_node_ids >= 8)
- pcpu_chosen_fc = PCPU_FC_PAGE;
- else
- pcpu_chosen_fc = PCPU_FC_EMBED;
- }
-
- /*
- * Always reserve area for module percpu variables. That's
- * what the legacy allocator did.
- */
- if (pcpu_chosen_fc != PCPU_FC_PAGE) {
- rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
- PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
- pcpu_cpu_distance, pcpu_cpu_to_node);
- if (rc < 0)
- pr_warn("%s allocator failed (%d), falling back to page size\n",
- pcpu_fc_names[pcpu_chosen_fc], rc);
- }
- if (rc < 0)
- rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
- if (rc < 0)
- panic("cannot initialize percpu area (err=%d)", rc);
-
- delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
- for_each_possible_cpu(cpu)
- __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
-}
-#endif
-
/*
* Get nodeid by logical cpu number.
* __cpuid_to_node maps phyical cpu id to node, so we
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index aabee0b280fe..88b9c6b68d1e 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -29,6 +29,7 @@
#include <asm/loongson.h>
#include <asm/mmu_context.h>
#include <asm/numa.h>
+#include <asm/pgalloc.h>
#include <asm/processor.h>
#include <asm/setup.h>
#include <asm/time.h>
@@ -717,3 +718,61 @@ void flush_tlb_one(unsigned long vaddr)
on_each_cpu(flush_tlb_one_ipi, (void *)vaddr, 1);
}
EXPORT_SYMBOL(flush_tlb_one);
+
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static int __init pcpu_cpu_to_node(int cpu)
+{
+ return early_cpu_to_node(cpu);
+}
+
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+ if (early_cpu_to_node(from) == early_cpu_to_node(to))
+ return LOCAL_DISTANCE;
+ else
+ return REMOTE_DISTANCE;
+}
+
+void __init pcpu_populate_pte(unsigned long addr)
+{
+ populate_kernel_pte(addr);
+}
+
+void __init setup_per_cpu_areas(void)
+{
+ unsigned long delta;
+ unsigned int cpu;
+ int rc = -EINVAL;
+
+ if (pcpu_chosen_fc == PCPU_FC_AUTO) {
+ if (nr_node_ids >= 8)
+ pcpu_chosen_fc = PCPU_FC_PAGE;
+ else
+ pcpu_chosen_fc = PCPU_FC_EMBED;
+ }
+
+ /*
+ * Always reserve area for module percpu variables. That's
+ * what the legacy allocator did.
+ */
+ if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+ rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+ PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
+ pcpu_cpu_distance, pcpu_cpu_to_node);
+ if (rc < 0)
+ pr_warn("%s allocator failed (%d), falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
+ }
+ if (rc < 0)
+ rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
+ if (rc < 0)
+ panic("cannot initialize percpu area (err=%d)", rc);
+
+ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+ for_each_possible_cpu(cpu)
+ __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+}
+#endif
--
2.42.0



2024-04-12 04:16:55

by Huacai Chen

[permalink] [raw]
Subject: Re: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c

On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <[email protected]> wrote:
>
> Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
> specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
> not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
> actually used, this is not reasonable and does not work as intended.
Why is the generic version not reasonable? We need a custom version
just because it can put the percpu variable in the best node. If NUMA
disabled, software can only see one node, how to optimize?

Huacai

>
> Let us select HAVE_SETUP_PER_CPU_AREA unconditionally and then move
> CONFIG_HAVE_SETUP_PER_CPU_AREA related code from numa.c to smp.c to
> avoid this problem.
>
> Signed-off-by: Tiezhu Yang <[email protected]>
> ---
> arch/loongarch/Kconfig | 2 +-
> arch/loongarch/kernel/numa.c | 58 -----------------------------------
> arch/loongarch/kernel/smp.c | 59 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 60 insertions(+), 59 deletions(-)
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index a5f300ec6f28..64052ae2c2af 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -151,7 +151,7 @@ config LOONGARCH
> select HAVE_RUST
> select HAVE_SAMPLE_FTRACE_DIRECT
> select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
> - select HAVE_SETUP_PER_CPU_AREA if NUMA
> + select HAVE_SETUP_PER_CPU_AREA
> select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
> select HAVE_STACKPROTECTOR
> select HAVE_SYSCALL_TRACEPOINTS
> diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
> index 8fe21f868f72..49dc1d932ce2 100644
> --- a/arch/loongarch/kernel/numa.c
> +++ b/arch/loongarch/kernel/numa.c
> @@ -48,64 +48,6 @@ EXPORT_SYMBOL(__cpuid_to_node);
>
> nodemask_t numa_nodes_parsed __initdata;
>
> -#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
> -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> -EXPORT_SYMBOL(__per_cpu_offset);
> -
> -static int __init pcpu_cpu_to_node(int cpu)
> -{
> - return early_cpu_to_node(cpu);
> -}
> -
> -static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> -{
> - if (early_cpu_to_node(from) == early_cpu_to_node(to))
> - return LOCAL_DISTANCE;
> - else
> - return REMOTE_DISTANCE;
> -}
> -
> -void __init pcpu_populate_pte(unsigned long addr)
> -{
> - populate_kernel_pte(addr);
> -}
> -
> -void __init setup_per_cpu_areas(void)
> -{
> - unsigned long delta;
> - unsigned int cpu;
> - int rc = -EINVAL;
> -
> - if (pcpu_chosen_fc == PCPU_FC_AUTO) {
> - if (nr_node_ids >= 8)
> - pcpu_chosen_fc = PCPU_FC_PAGE;
> - else
> - pcpu_chosen_fc = PCPU_FC_EMBED;
> - }
> -
> - /*
> - * Always reserve area for module percpu variables. That's
> - * what the legacy allocator did.
> - */
> - if (pcpu_chosen_fc != PCPU_FC_PAGE) {
> - rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> - PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
> - pcpu_cpu_distance, pcpu_cpu_to_node);
> - if (rc < 0)
> - pr_warn("%s allocator failed (%d), falling back to page size\n",
> - pcpu_fc_names[pcpu_chosen_fc], rc);
> - }
> - if (rc < 0)
> - rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
> - if (rc < 0)
> - panic("cannot initialize percpu area (err=%d)", rc);
> -
> - delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
> - for_each_possible_cpu(cpu)
> - __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
> -}
> -#endif
> -
> /*
> * Get nodeid by logical cpu number.
> * __cpuid_to_node maps phyical cpu id to node, so we
> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
> index aabee0b280fe..88b9c6b68d1e 100644
> --- a/arch/loongarch/kernel/smp.c
> +++ b/arch/loongarch/kernel/smp.c
> @@ -29,6 +29,7 @@
> #include <asm/loongson.h>
> #include <asm/mmu_context.h>
> #include <asm/numa.h>
> +#include <asm/pgalloc.h>
> #include <asm/processor.h>
> #include <asm/setup.h>
> #include <asm/time.h>
> @@ -717,3 +718,61 @@ void flush_tlb_one(unsigned long vaddr)
> on_each_cpu(flush_tlb_one_ipi, (void *)vaddr, 1);
> }
> EXPORT_SYMBOL(flush_tlb_one);
> +
> +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
> +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> +EXPORT_SYMBOL(__per_cpu_offset);
> +
> +static int __init pcpu_cpu_to_node(int cpu)
> +{
> + return early_cpu_to_node(cpu);
> +}
> +
> +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> +{
> + if (early_cpu_to_node(from) == early_cpu_to_node(to))
> + return LOCAL_DISTANCE;
> + else
> + return REMOTE_DISTANCE;
> +}
> +
> +void __init pcpu_populate_pte(unsigned long addr)
> +{
> + populate_kernel_pte(addr);
> +}
> +
> +void __init setup_per_cpu_areas(void)
> +{
> + unsigned long delta;
> + unsigned int cpu;
> + int rc = -EINVAL;
> +
> + if (pcpu_chosen_fc == PCPU_FC_AUTO) {
> + if (nr_node_ids >= 8)
> + pcpu_chosen_fc = PCPU_FC_PAGE;
> + else
> + pcpu_chosen_fc = PCPU_FC_EMBED;
> + }
> +
> + /*
> + * Always reserve area for module percpu variables. That's
> + * what the legacy allocator did.
> + */
> + if (pcpu_chosen_fc != PCPU_FC_PAGE) {
> + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> + PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
> + pcpu_cpu_distance, pcpu_cpu_to_node);
> + if (rc < 0)
> + pr_warn("%s allocator failed (%d), falling back to page size\n",
> + pcpu_fc_names[pcpu_chosen_fc], rc);
> + }
> + if (rc < 0)
> + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
> + if (rc < 0)
> + panic("cannot initialize percpu area (err=%d)", rc);
> +
> + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
> + for_each_possible_cpu(cpu)
> + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
> +}
> +#endif
> --
> 2.42.0
>
>

2024-04-12 09:27:45

by Tiezhu Yang

[permalink] [raw]
Subject: Re: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c



On 04/12/2024 12:12 PM, Huacai Chen wrote:
> On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <[email protected]> wrote:
>>
>> Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
>> specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
>> not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
>> actually used, this is not reasonable and does not work as intended.
> Why is the generic version not reasonable? We need a custom version
> just because it can put the percpu variable in the best node. If NUMA
> disabled, software can only see one node, how to optimize?

The initial aim is to use the arch specific setup_per_cpu_areas()
in any case under CONFIG_SMP, this patch can be dropped if it is
meaningless for the case of !CONFIG_NUMA and CONFIG_SMP.

Thanks,
Tiezhu


2024-04-13 07:20:25

by Huacai Chen

[permalink] [raw]
Subject: Re: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c

On Fri, Apr 12, 2024 at 5:27 PM Tiezhu Yang <[email protected]> wrote:
>
>
>
> On 04/12/2024 12:12 PM, Huacai Chen wrote:
> > On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <[email protected]> wrote:
> >>
> >> Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
> >> specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
> >> not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
> >> actually used, this is not reasonable and does not work as intended.
> > Why is the generic version not reasonable? We need a custom version
> > just because it can put the percpu variable in the best node. If NUMA
> > disabled, software can only see one node, how to optimize?
>
> The initial aim is to use the arch specific setup_per_cpu_areas()
> in any case under CONFIG_SMP, this patch can be dropped if it is
> meaningless for the case of !CONFIG_NUMA and CONFIG_SMP.
Yes, it is better to drop this patch.

Huacai
>
> Thanks,
> Tiezhu
>
>