2021-11-21 09:25:14

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton

When support page mapping percpu first chunk allocator on arm64, we
found there are lots of duplicated codes in percpu embed/page first
chunk allocator. This patchset is aimed to cleanup them and should
no funciton change, only test on arm64.

Kefeng Wang (4):
mm: percpu: Generalize percpu related config
mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
mm: percpu: Add generic pcpu_fc_alloc/free funciton
mm: percpu: Add generic pcpu_populate_pte() function

arch/arm64/Kconfig | 20 +----
arch/ia64/Kconfig | 9 +--
arch/mips/Kconfig | 10 +--
arch/mips/mm/init.c | 14 +---
arch/powerpc/Kconfig | 17 +---
arch/powerpc/kernel/setup_64.c | 92 +--------------------
arch/riscv/Kconfig | 10 +--
arch/sparc/Kconfig | 12 +--
arch/sparc/kernel/smp_64.c | 105 +-----------------------
arch/x86/Kconfig | 17 +---
arch/x86/kernel/setup_percpu.c | 66 ++-------------
drivers/base/arch_numa.c | 68 +---------------
include/linux/percpu.h | 13 +--
mm/Kconfig | 12 +++
mm/percpu.c | 143 +++++++++++++++++++++++++--------
15 files changed, 165 insertions(+), 443 deletions(-)

--
2.26.2



2021-11-21 09:25:24

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH RFC 1/4] mm: percpu: Generalize percpu related config

The HAVE_SETUP_PER_CPU_AREA/NEED_PER_CPU_EMBED_FIRST_CHUNK/
NEED_PER_CPU_PAGE_FIRST_CHUNK/USE_PERCPU_NUMA_NODE_ID configs,
which has duplicate definitions on platforms that subscribe it.

Move them into mm, drop these redundant definitions and instead
just select it on applicable platforms.

Signed-off-by: Kefeng Wang <[email protected]>
---
arch/arm64/Kconfig | 20 ++++----------------
arch/ia64/Kconfig | 9 ++-------
arch/mips/Kconfig | 10 ++--------
arch/powerpc/Kconfig | 17 ++++-------------
arch/riscv/Kconfig | 10 ++--------
arch/sparc/Kconfig | 12 +++---------
arch/x86/Kconfig | 17 ++++-------------
mm/Kconfig | 12 ++++++++++++
8 files changed, 33 insertions(+), 74 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c4207cf9bb17..4ff73299f8a9 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1135,6 +1135,10 @@ config NUMA
select GENERIC_ARCH_NUMA
select ACPI_NUMA if ACPI
select OF_NUMA
+ select HAVE_SETUP_PER_CPU_AREA
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK
+ select USE_PERCPU_NUMA_NODE_ID
help
Enable NUMA (Non-Uniform Memory Access) support.

@@ -1151,22 +1155,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.

-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
source "kernel/Kconfig.hz"

config ARCH_SPARSEMEM_ENABLE
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 1e33666fa679..703952819e10 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -32,6 +32,7 @@ config IA64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
select HAVE_FUNCTION_TRACER
+ select HAVE_SETUP_PER_CPU_AREA
select TTY
select HAVE_ARCH_TRACEHOOK
select HAVE_VIRT_CPU_ACCOUNTING
@@ -88,9 +89,6 @@ config GENERIC_CALIBRATE_DELAY
bool
default y

-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
-
config DMI
bool
default y
@@ -292,6 +290,7 @@ config NUMA
bool "NUMA support"
depends on !FLATMEM
select SMP
+ select USE_PERCPU_NUMA_NODE_ID
help
Say Y to compile the kernel to support NUMA (Non-Uniform Memory
Access). This option is for configuring high-end multiprocessor
@@ -311,10 +310,6 @@ config HAVE_ARCH_NODEDATA_EXTENSION
def_bool y
depends on NUMA

-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
config HAVE_MEMORYLESS_NODES
def_bool NUMA

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index de60ad190057..c106a2080877 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2666,6 +2666,8 @@ config NUMA
bool "NUMA Support"
depends on SYS_SUPPORTS_NUMA
select SMP
+ select HAVE_SETUP_PER_CPU_AREA
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
help
Say Y to compile the kernel to support NUMA (Non-Uniform Memory
Access). This option improves performance on systems with more
@@ -2676,14 +2678,6 @@ config NUMA
config SYS_SUPPORTS_NUMA
bool

-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
config RELOCATABLE
bool "Relocatable kernel"
depends on SYS_SUPPORTS_RELOCATABLE
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index dea74d7717c0..8badd39854a0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -55,15 +55,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
default 9 if PPC_16K_PAGES # 9 = 23 (8MB) - 14 (16K)
default 11 # 11 = 23 (8MB) - 12 (4K)

-config HAVE_SETUP_PER_CPU_AREA
- def_bool PPC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y if PPC64
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y if PPC64
-
config NR_IRQS
int "Number of virtual interrupt numbers"
range 32 1048576
@@ -240,6 +231,7 @@ config PPC
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
select HAVE_RSEQ
+ select HAVE_SETUP_PER_CPU_AREA if PPC64
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
@@ -254,6 +246,8 @@ config PPC
select MMU_GATHER_RCU_TABLE_FREE
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK if PPC64
select NEED_SG_DMA_LENGTH
select OF
select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
@@ -659,6 +653,7 @@ config NUMA
bool "NUMA Memory Allocation and Scheduler Support"
depends on PPC64 && SMP
default y if PPC_PSERIES || PPC_POWERNV
+ select USE_PERCPU_NUMA_NODE_ID
help
Enable NUMA (Non-Uniform Memory Access) support.

@@ -672,10 +667,6 @@ config NODES_SHIFT
default "4"
depends on NUMA

-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
config HAVE_MEMORYLESS_NODES
def_bool y
depends on NUMA
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 821252b65f89..bf66bcbc5a39 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -334,6 +334,8 @@ config NUMA
select GENERIC_ARCH_NUMA
select OF_NUMA
select ARCH_SUPPORTS_NUMA_BALANCING
+ select USE_PERCPU_NUMA_NODE_ID
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
help
Enable NUMA (Non-Uniform Memory Access) support.

@@ -349,14 +351,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.

-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
config RISCV_ISA_C
bool "Emit compressed instructions when building Linux"
default y
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 66fc08646be5..a6765e0fe6a8 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -97,6 +97,9 @@ config SPARC64
select PCI_DOMAINS if PCI
select ARCH_HAS_GIGANTIC_PAGE
select HAVE_SOFTIRQ_ON_OWN_STACK
+ select HAVE_SETUP_PER_CPU_AREA
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PER_CPU_PAGE_FIRST_CHUUNK

config ARCH_PROC_KCORE_TEXT
def_bool y
@@ -123,15 +126,6 @@ config AUDIT_ARCH
bool
default y

-config HAVE_SETUP_PER_CPU_AREA
- def_bool y if SPARC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y if SPARC64
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y if SPARC64
-
config MMU
bool
default y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7399327d1eff..ca120a1f5857 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -239,6 +239,7 @@ config X86
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
select HAVE_FUNCTION_ARG_ACCESS_API
+ select HAVE_SETUP_PER_CPU_AREA
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
select HAVE_STACK_VALIDATION if X86_64
@@ -252,6 +253,8 @@ config X86
select HAVE_GENERIC_VDSO
select HOTPLUG_SMT if SMP
select IRQ_FORCED_THREADING
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
select PCI_DOMAINS if PCI
select PCI_LOCKLESS_CONFIG if PCI
@@ -331,15 +334,6 @@ config ARCH_HAS_CPU_RELAX
config ARCH_HAS_FILTER_PGPROT
def_bool y

-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y
-
config ARCH_HIBERNATION_POSSIBLE
def_bool y

@@ -1557,6 +1551,7 @@ config NUMA
depends on SMP
depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
default y if X86_BIGSMP
+ select USE_PERCPU_NUMA_NODE_ID
help
Enable NUMA (Non-Uniform Memory Access) support.

@@ -2430,10 +2425,6 @@ config ARCH_HAS_ADD_PAGES
config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
def_bool y

-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
menu "Power management and ACPI options"

config ARCH_HIBERNATION_HEADER
diff --git a/mm/Kconfig b/mm/Kconfig
index 28edafc820ad..6bc5d780c51b 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -432,6 +432,18 @@ config NEED_PER_CPU_KM
bool
default y

+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+ bool
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+ bool
+
+config USE_PERCPU_NUMA_NODE_ID
+ bool
+
+config HAVE_SETUP_PER_CPU_AREA
+ bool
+
config CLEANCACHE
bool "Enable cleancache driver to cache clean pages if tmem is present"
help
--
2.26.2


2021-11-21 09:25:24

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function

When NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to
populate pte, add a generic pcpu populate pte function and switch
to use it.

Signed-off-by: Kefeng Wang <[email protected]>
---
arch/powerpc/kernel/setup_64.c | 47 +--------------------
arch/sparc/kernel/smp_64.c | 57 +------------------------
arch/x86/kernel/setup_percpu.c | 5 +--
drivers/base/arch_numa.c | 51 +---------------------
include/linux/percpu.h | 5 +--
mm/percpu.c | 77 +++++++++++++++++++++++++++++++---
6 files changed, 79 insertions(+), 163 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 364b1567f822..1a17828af77f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -788,51 +788,6 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);

-static void __init pcpu_populate_pte(unsigned long addr)
-{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-}
-
-
void __init setup_per_cpu_areas(void)
{
const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
@@ -861,7 +816,7 @@ void __init setup_per_cpu_areas(void)
}

if (rc < 0)
- rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, pcpu_populate_pte);
+ rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 198dadddb75d..00dffe2d834b 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1534,59 +1534,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}

-static void __init pcpu_populate_pte(unsigned long addr)
-{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- if (pgd_none(*pgd)) {
- pud_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pgd_populate(&init_mm, pgd, new);
- }
-
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-}
-
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
@@ -1604,9 +1551,7 @@ void __init setup_per_cpu_areas(void)
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
- rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
- cpu_to_node,
- pcpu_populate_pte);
+ rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index cd672bd46241..4eadbe45078e 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -101,7 +101,7 @@ static int __init pcpu_cpu_to_node(int cpu)
return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
}

-static void __init pcpup_populate_pte(unsigned long addr)
+void __init pcpu_populate_pte(unsigned long addr)
{
populate_extra_pte(addr);
}
@@ -163,8 +163,7 @@ void __init setup_per_cpu_areas(void)
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
- pcpu_cpu_to_node,
- pcpup_populate_pte);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);

diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index 23a10cc36165..eaa31e567d1e 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -14,7 +14,6 @@
#include <linux/of.h>

#include <asm/sections.h>
-#include <asm/pgalloc.h>

struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
@@ -155,52 +154,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
}

-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
-static void __init pcpu_populate_pte(unsigned long addr)
-{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-}
-#endif
-
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
@@ -225,9 +178,7 @@ void __init setup_per_cpu_areas(void)

#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
if (rc < 0)
- rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
- early_cpu_to_node,
- pcpu_populate_pte);
+ rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
#endif
if (rc < 0)
panic("Failed to initialize percpu areas (err=%d).", rc);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index d73c97ef4ff4..f1ec5ad1351c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -95,7 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
extern enum pcpu_fc pcpu_chosen_fc;

typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
-typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);

extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
@@ -113,9 +112,9 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
#endif

#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+void __init pcpu_populate_pte(unsigned long addr);
extern int __init pcpu_page_first_chunk(size_t reserved_size,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
- pcpu_fc_populate_pte_fn_t populate_pte_fn);
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
#endif

extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
diff --git a/mm/percpu.c b/mm/percpu.c
index efaa1cbaf73d..d907daed04eb 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -3162,11 +3162,80 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
#endif /* BUILD_EMBED_FIRST_CHUNK */

#ifdef BUILD_PAGE_FIRST_CHUNK
+#include <asm/pgalloc.h>
+
+#ifndef P4D_TABLE_SIZE
+#define P4D_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PUD_TABLE_SIZE
+#define PUD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PMD_TABLE_SIZE
+#define PMD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PTE_TABLE_SIZE
+#define PTE_TABLE_SIZE PAGE_SIZE
+#endif
+void __init __weak pcpu_populate_pte(unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (pgd_none(*pgd)) {
+ p4d_t *new;
+
+ new = memblock_alloc_from(P4D_TABLE_SIZE, P4D_TABLE_SIZE, PAGE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pgd_populate(&init_mm, pgd, new);
+ }
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ pud_t *new;
+
+ new = memblock_alloc_from(PUD_TABLE_SIZE, PUD_TABLE_SIZE, PAGE_SIZE);
+ if (!new)
+ goto err_alloc;
+ p4d_populate(&init_mm, p4d, new);
+ }
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ pmd_t *new;
+
+ new = memblock_alloc_from(PMD_TABLE_SIZE, PMD_TABLE_SIZE, PAGE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pud_populate(&init_mm, pud, new);
+ }
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd)) {
+ pte_t *new;
+
+ new = memblock_alloc_from(PTE_TABLE_SIZE, PTE_TABLE_SIZE, PAGE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+
+ return;
+
+err_alloc:
+ panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+
/**
* pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
* @reserved_size: the size of reserved percpu area in bytes
* @cpu_to_nd_fn: callback to convert cpu to it's node, optional
- * @populate_pte_fn: function to populate pte
*
* This is a helper to ease setting up page-remapped first percpu
* chunk and can be called where pcpu_setup_first_chunk() is expected.
@@ -3177,9 +3246,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
* RETURNS:
* 0 on success, -errno on failure.
*/
-int __init pcpu_page_first_chunk(size_t reserved_size,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
- pcpu_fc_populate_pte_fn_t populate_pte_fn)
+int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
static struct vm_struct vm;
struct pcpu_alloc_info *ai;
@@ -3243,7 +3310,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
(unsigned long)vm.addr + unit * ai->unit_size;

for (i = 0; i < unit_pages; i++)
- populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
+ pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));

/* pte already populated, the following shouldn't fail */
rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
--
2.26.2


2021-11-21 09:25:24

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef

Add pcpu_fc_cpu_to_node_fn_t and pass it into pcpu_fc_alloc_fn_t,
pcpu first chunk allocation will call it to alloc memblock on the
corresponding node by it.

Signed-off-by: Kefeng Wang <[email protected]>
---
arch/mips/mm/init.c | 12 +++++++++---
arch/powerpc/kernel/setup_64.c | 14 +++++++++++---
arch/sparc/kernel/smp_64.c | 8 +++++---
arch/x86/kernel/setup_percpu.c | 18 +++++++++++++-----
drivers/base/arch_numa.c | 8 +++++---
include/linux/percpu.h | 7 +++++--
mm/percpu.c | 14 +++++++++-----
7 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 325e1552cbea..ebbf6923532c 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -519,12 +519,17 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return node_distance(cpu_to_node(from), cpu_to_node(to));
}

-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
- size_t align)
+static int __init pcpu_cpu_to_node(int cpu)
+{
+ return cpu_to_node(cpu);
+}
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
MEMBLOCK_ALLOC_ACCESSIBLE,
- cpu_to_node(cpu));
+ cpu_to_nd_fun(cpu));
}

static void __init pcpu_fc_free(void *ptr, size_t size)
@@ -545,6 +550,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
pcpu_cpu_distance,
+ pcpu_cpu_to_node,
pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
panic("Failed to initialize percpu areas.");
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6052f5d5ded3..9a5609c821df 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -771,6 +771,12 @@ void __init emergency_stack_init(void)
}

#ifdef CONFIG_SMP
+
+static __init int pcpu_cpu_to_node(int cpu)
+{
+ return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
+}
+
/**
* pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
* @cpu: cpu to allocate for
@@ -784,12 +790,12 @@ void __init emergency_stack_init(void)
* RETURNS:
* Pointer to the allocated area on success, NULL on failure.
*/
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
- size_t align)
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
const unsigned long goal = __pa(MAX_DMA_ADDRESS);
#ifdef CONFIG_NUMA
- int node = early_cpu_to_node(cpu);
+ int node = cpu_to_nd_fun(cpu);
void *ptr;

if (!node_online(node) || !NODE_DATA(node)) {
@@ -891,6 +897,7 @@ void __init setup_per_cpu_areas(void)

if (pcpu_chosen_fc != PCPU_FC_PAGE) {
rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+ pcpu_cpu_to_node,
pcpu_alloc_bootmem, pcpu_free_bootmem);
if (rc)
pr_warn("PERCPU: %s allocator failed (%d), "
@@ -900,6 +907,7 @@ void __init setup_per_cpu_areas(void)

if (rc < 0)
rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
+ pcpu_cpu_to_node,
pcpu_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b98a7bbe6728..026aa3ccbc30 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1539,12 +1539,12 @@ void smp_send_stop(void)
* RETURNS:
* Pointer to the allocated area on success, NULL on failure.
*/
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
- size_t align)
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
const unsigned long goal = __pa(MAX_DMA_ADDRESS);
#ifdef CONFIG_NUMA
- int node = cpu_to_node(cpu);
+ int node = cpu_to_nd_fn(cpu);
void *ptr;

if (!node_online(node) || !NODE_DATA(node)) {
@@ -1641,6 +1641,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, 4 << 20,
pcpu_cpu_distance,
+ cpu_to_node,
pcpu_alloc_bootmem,
pcpu_free_bootmem);
if (rc)
@@ -1652,6 +1653,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
pcpu_alloc_bootmem,
pcpu_free_bootmem,
+ cpu_to_node,
pcpu_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7b65275544b2..bba4fa174a16 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -97,12 +97,12 @@ static bool __init pcpu_need_numa(void)
* RETURNS:
* Pointer to the allocated area on success, NULL on failure.
*/
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
- unsigned long align)
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, unsigned long align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
const unsigned long goal = __pa(MAX_DMA_ADDRESS);
#ifdef CONFIG_NUMA
- int node = early_cpu_to_node(cpu);
+ int node = cpu_to_nd_fn(cpu);
void *ptr;

if (!node_online(node) || !NODE_DATA(node)) {
@@ -128,9 +128,10 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
/*
* Helpers for first chunk memory allocation
*/
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
- return pcpu_alloc_bootmem(cpu, size, align);
+ return pcpu_alloc_bootmem(cpu, size, align, cpu_to_nd_fn);
}

static void __init pcpu_fc_free(void *ptr, size_t size)
@@ -150,6 +151,11 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
#endif
}

+static int __init pcpu_cpu_to_node(int cpu)
+{
+ return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
+}
+
static void __init pcpup_populate_pte(unsigned long addr)
{
populate_extra_pte(addr);
@@ -205,6 +211,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
dyn_size, atom_size,
pcpu_cpu_distance,
+ pcpu_cpu_to_node,
pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
pr_warn("%s allocator failed (%d), falling back to page size\n",
@@ -213,6 +220,7 @@ void __init setup_per_cpu_areas(void)
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
pcpu_fc_alloc, pcpu_fc_free,
+ pcpu_cpu_to_node,
pcpup_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index bc1876915457..273543d9ff85 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -155,10 +155,10 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
}

-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
- size_t align)
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
- int nid = early_cpu_to_node(cpu);
+ int nid = cpu_to_nd_fn(cpu);

return memblock_alloc_try_nid(size, align,
__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
@@ -229,6 +229,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
pcpu_cpu_distance,
+ early_cpu_to_node,
pcpu_fc_alloc, pcpu_fc_free);
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
if (rc < 0)
@@ -242,6 +243,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
pcpu_fc_alloc,
pcpu_fc_free,
+ early_cpu_to_node,
pcpu_populate_pte);
#endif
if (rc < 0)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index ae4004e7957e..41bb54715b0c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -94,8 +94,9 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];

extern enum pcpu_fc pcpu_chosen_fc;

-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
- size_t align);
+typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
+typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
@@ -111,6 +112,7 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
pcpu_fc_alloc_fn_t alloc_fn,
pcpu_fc_free_fn_t free_fn);
#endif
@@ -119,6 +121,7 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
extern int __init pcpu_page_first_chunk(size_t reserved_size,
pcpu_fc_alloc_fn_t alloc_fn,
pcpu_fc_free_fn_t free_fn,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
pcpu_fc_populate_pte_fn_t populate_pte_fn);
#endif

diff --git a/mm/percpu.c b/mm/percpu.c
index f5b2c2ea5a54..3f6cf1ff0be2 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -3001,6 +3001,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
* @dyn_size: minimum free size for dynamic allocation in bytes
* @atom_size: allocation atom size
* @cpu_distance_fn: callback to determine distance between cpus, optional
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
* @alloc_fn: function to allocate percpu page
* @free_fn: function to free percpu page
*
@@ -3030,6 +3031,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
pcpu_fc_alloc_fn_t alloc_fn,
pcpu_fc_free_fn_t free_fn)
{
@@ -3066,7 +3068,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
BUG_ON(cpu == NR_CPUS);

/* allocate space for the whole group */
- ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
+ ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
if (!ptr) {
rc = -ENOMEM;
goto out_free_areas;
@@ -3145,6 +3147,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
* @reserved_size: the size of reserved percpu area in bytes
* @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
* @free_fn: function to free percpu page, always called with PAGE_SIZE
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
* @populate_pte_fn: function to populate pte
*
* This is a helper to ease setting up page-remapped first percpu
@@ -3159,6 +3162,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
int __init pcpu_page_first_chunk(size_t reserved_size,
pcpu_fc_alloc_fn_t alloc_fn,
pcpu_fc_free_fn_t free_fn,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
pcpu_fc_populate_pte_fn_t populate_pte_fn)
{
static struct vm_struct vm;
@@ -3201,7 +3205,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
for (i = 0; i < unit_pages; i++) {
void *ptr;

- ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
+ ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
if (!ptr) {
pr_warn("failed to allocate %s page for cpu%u\n",
psize_str, cpu);
@@ -3278,8 +3282,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);

-static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
- size_t align)
+static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
}
@@ -3300,7 +3304,7 @@ void __init setup_per_cpu_areas(void)
* what the legacy allocator did.
*/
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
- PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
+ PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, NULL,
pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
if (rc < 0)
panic("Failed to initialize percpu areas.");
--
2.26.2


2021-11-21 09:25:24

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton

With previous patch, we could add a generic pcpu first chunk
allocation and free function to cleanup the duplicated definations
on each architecture.

Signed-off-by: Kefeng Wang <[email protected]>
---
arch/mips/mm/init.c | 16 +--------
arch/powerpc/kernel/setup_64.c | 51 ++------------------------
arch/sparc/kernel/smp_64.c | 50 +-------------------------
arch/x86/kernel/setup_percpu.c | 59 +-----------------------------
drivers/base/arch_numa.c | 19 +---------
include/linux/percpu.h | 9 +----
mm/percpu.c | 66 ++++++++++++++++++----------------
7 files changed, 42 insertions(+), 228 deletions(-)

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index ebbf6923532c..5a8002839550 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -524,19 +524,6 @@ static int __init pcpu_cpu_to_node(int cpu)
return cpu_to_node(cpu);
}

-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
- return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
- MEMBLOCK_ALLOC_ACCESSIBLE,
- cpu_to_nd_fun(cpu));
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
@@ -550,8 +537,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
pcpu_cpu_distance,
- pcpu_cpu_to_node,
- pcpu_fc_alloc, pcpu_fc_free);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("Failed to initialize percpu areas.");

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 9a5609c821df..364b1567f822 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -777,50 +777,6 @@ static __init int pcpu_cpu_to_node(int cpu)
return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
}

-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = cpu_to_nd_fun(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE, node);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (early_cpu_to_node(from) == early_cpu_to_node(to))
@@ -897,8 +853,7 @@ void __init setup_per_cpu_areas(void)

if (pcpu_chosen_fc != PCPU_FC_PAGE) {
rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
- pcpu_cpu_to_node,
- pcpu_alloc_bootmem, pcpu_free_bootmem);
+ pcpu_cpu_to_node);
if (rc)
pr_warn("PERCPU: %s allocator failed (%d), "
"falling back to page size\n",
@@ -906,9 +861,7 @@ void __init setup_per_cpu_areas(void)
}

if (rc < 0)
- rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
- pcpu_cpu_to_node,
- pcpu_populate_pte);
+ rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, pcpu_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 026aa3ccbc30..198dadddb75d 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1526,50 +1526,6 @@ void smp_send_stop(void)
smp_call_function(stop_this_cpu, NULL, 0);
}

-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = cpu_to_nd_fn(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE, node);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (cpu_to_node(from) == cpu_to_node(to))
@@ -1641,9 +1597,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, 4 << 20,
pcpu_cpu_distance,
- cpu_to_node,
- pcpu_alloc_bootmem,
- pcpu_free_bootmem);
+ cpu_to_node);
if (rc)
pr_warn("PERCPU: %s allocator failed (%d), "
"falling back to page size\n",
@@ -1651,8 +1605,6 @@ void __init setup_per_cpu_areas(void)
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
- pcpu_alloc_bootmem,
- pcpu_free_bootmem,
cpu_to_node,
pcpu_populate_pte);
if (rc < 0)
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index bba4fa174a16..cd672bd46241 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -84,61 +84,6 @@ static bool __init pcpu_need_numa(void)
}
#endif

-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, unsigned long align,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = cpu_to_nd_fn(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE,
- node);
-
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
- cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-/*
- * Helpers for first chunk memory allocation
- */
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
- return pcpu_alloc_bootmem(cpu, size, align, cpu_to_nd_fn);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
#ifdef CONFIG_NUMA
@@ -211,15 +156,13 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
dyn_size, atom_size,
pcpu_cpu_distance,
- pcpu_cpu_to_node,
- pcpu_fc_alloc, pcpu_fc_free);
+ pcpu_cpu_to_node);
if (rc < 0)
pr_warn("%s allocator failed (%d), falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
- pcpu_fc_alloc, pcpu_fc_free,
pcpu_cpu_to_node,
pcpup_populate_pte);
if (rc < 0)
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index 273543d9ff85..23a10cc36165 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -155,20 +155,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
}

-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
- int nid = cpu_to_nd_fn(cpu);
-
- return memblock_alloc_try_nid(size, align,
- __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
static void __init pcpu_populate_pte(unsigned long addr)
{
@@ -229,8 +215,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
pcpu_cpu_distance,
- early_cpu_to_node,
- pcpu_fc_alloc, pcpu_fc_free);
+ early_cpu_to_node);
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
if (rc < 0)
pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
@@ -241,8 +226,6 @@ void __init setup_per_cpu_areas(void)
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
- pcpu_fc_alloc,
- pcpu_fc_free,
early_cpu_to_node,
pcpu_populate_pte);
#endif
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 41bb54715b0c..d73c97ef4ff4 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -95,9 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
extern enum pcpu_fc pcpu_chosen_fc;

typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
-typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);

@@ -112,15 +109,11 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn);
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
#endif

#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
extern int __init pcpu_page_first_chunk(size_t reserved_size,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn,
pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
pcpu_fc_populate_pte_fn_t populate_pte_fn);
#endif
diff --git a/mm/percpu.c b/mm/percpu.c
index 3f6cf1ff0be2..efaa1cbaf73d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2992,6 +2992,30 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(

return ai;
}
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
+{
+ const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+ int node = NUMA_NO_NODE;
+ void *ptr;
+
+ if (cpu_to_nd_fn)
+ node = cpu_to_nd_fn(cpu);
+
+ if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
+ ptr = memblock_alloc_from(size, align, goal);
+ } else {
+ ptr = memblock_alloc_try_nid(size, align, goal,
+ MEMBLOCK_ALLOC_ACCESSIBLE, node);
+ }
+ return ptr;
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+ memblock_free(ptr, size);
+}
#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */

#if defined(BUILD_EMBED_FIRST_CHUNK)
@@ -3002,14 +3026,12 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
* @atom_size: allocation atom size
* @cpu_distance_fn: callback to determine distance between cpus, optional
* @cpu_to_nd_fn: callback to convert cpu to it's node, optional
- * @alloc_fn: function to allocate percpu page
- * @free_fn: function to free percpu page
*
* This is a helper to ease setting up embedded first percpu chunk and
* can be called where pcpu_setup_first_chunk() is expected.
*
* If this function is used to setup the first chunk, it is allocated
- * by calling @alloc_fn and used as-is without being mapped into
+ * by calling pcpu_fc_alloc and used as-is without being mapped into
* vmalloc area. Allocations are always whole multiples of @atom_size
* aligned to @atom_size.
*
@@ -3023,7 +3045,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
* @dyn_size specifies the minimum dynamic area size.
*
* If the needed size is smaller than the minimum or specified unit
- * size, the leftover is returned using @free_fn.
+ * size, the leftover is returned using pcpu_fc_free.
*
* RETURNS:
* 0 on success, -errno on failure.
@@ -3031,9 +3053,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn)
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
void *base = (void *)ULONG_MAX;
void **areas = NULL;
@@ -3068,7 +3088,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
BUG_ON(cpu == NR_CPUS);

/* allocate space for the whole group */
- ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
+ ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
if (!ptr) {
rc = -ENOMEM;
goto out_free_areas;
@@ -3107,12 +3127,12 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
if (gi->cpu_map[i] == NR_CPUS) {
/* unused unit, free whole */
- free_fn(ptr, ai->unit_size);
+ pcpu_fc_free(ptr, ai->unit_size);
continue;
}
/* copy and return the unused part */
memcpy(ptr, __per_cpu_load, ai->static_size);
- free_fn(ptr + size_sum, ai->unit_size - size_sum);
+ pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
}
}

@@ -3131,7 +3151,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
out_free_areas:
for (group = 0; group < ai->nr_groups; group++)
if (areas[group])
- free_fn(areas[group],
+ pcpu_fc_free(areas[group],
ai->groups[group].nr_units * ai->unit_size);
out_free:
pcpu_free_alloc_info(ai);
@@ -3145,8 +3165,6 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
/**
* pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
* @reserved_size: the size of reserved percpu area in bytes
- * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
- * @free_fn: function to free percpu page, always called with PAGE_SIZE
* @cpu_to_nd_fn: callback to convert cpu to it's node, optional
* @populate_pte_fn: function to populate pte
*
@@ -3160,8 +3178,6 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
* 0 on success, -errno on failure.
*/
int __init pcpu_page_first_chunk(size_t reserved_size,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn,
pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
pcpu_fc_populate_pte_fn_t populate_pte_fn)
{
@@ -3205,7 +3221,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
for (i = 0; i < unit_pages; i++) {
void *ptr;

- ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
+ ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
if (!ptr) {
pr_warn("failed to allocate %s page for cpu%u\n",
psize_str, cpu);
@@ -3257,7 +3273,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,

enomem:
while (--j >= 0)
- free_fn(page_address(pages[j]), PAGE_SIZE);
+ pcpu_fc_free(page_address(pages[j]), PAGE_SIZE);
rc = -ENOMEM;
out_free_ar:
memblock_free(pages, pages_size);
@@ -3282,17 +3298,6 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);

-static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align,
- pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
- return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
-}
-
-static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
@@ -3303,9 +3308,8 @@ void __init setup_per_cpu_areas(void)
* Always reserve area for module percpu variables. That's
* what the legacy allocator did.
*/
- rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
- PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, NULL,
- pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
+ rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE,
+ PAGE_SIZE, NULL, NULL);
if (rc < 0)
panic("Failed to initialize percpu areas.");

--
2.26.2


2021-11-29 02:53:48

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton

Hi Dennis and all maintainers, any comments about the changes, many thanks.

On 2021/11/21 17:35, Kefeng Wang wrote:
> When support page mapping percpu first chunk allocator on arm64, we
> found there are lots of duplicated codes in percpu embed/page first
> chunk allocator. This patchset is aimed to cleanup them and should
> no funciton change, only test on arm64.
>
> Kefeng Wang (4):
> mm: percpu: Generalize percpu related config
> mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
> mm: percpu: Add generic pcpu_fc_alloc/free funciton
> mm: percpu: Add generic pcpu_populate_pte() function
>
> arch/arm64/Kconfig | 20 +----
> arch/ia64/Kconfig | 9 +--
> arch/mips/Kconfig | 10 +--
> arch/mips/mm/init.c | 14 +---
> arch/powerpc/Kconfig | 17 +---
> arch/powerpc/kernel/setup_64.c | 92 +--------------------
> arch/riscv/Kconfig | 10 +--
> arch/sparc/Kconfig | 12 +--
> arch/sparc/kernel/smp_64.c | 105 +-----------------------
> arch/x86/Kconfig | 17 +---
> arch/x86/kernel/setup_percpu.c | 66 ++-------------
> drivers/base/arch_numa.c | 68 +---------------
> include/linux/percpu.h | 13 +--
> mm/Kconfig | 12 +++
> mm/percpu.c | 143 +++++++++++++++++++++++++--------
> 15 files changed, 165 insertions(+), 443 deletions(-)
>

2021-11-29 02:56:39

by Dennis Zhou

[permalink] [raw]
Subject: Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton

On Mon, Nov 29, 2021 at 10:51:18AM +0800, Kefeng Wang wrote:
> Hi Dennis and all maintainers, any comments about the changes, many thanks.
>
> On 2021/11/21 17:35, Kefeng Wang wrote:
> > When support page mapping percpu first chunk allocator on arm64, we
> > found there are lots of duplicated codes in percpu embed/page first
> > chunk allocator. This patchset is aimed to cleanup them and should
> > no funciton change, only test on arm64.
> >
> > Kefeng Wang (4):
> > mm: percpu: Generalize percpu related config
> > mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
> > mm: percpu: Add generic pcpu_fc_alloc/free funciton
> > mm: percpu: Add generic pcpu_populate_pte() function
> >
> > arch/arm64/Kconfig | 20 +----
> > arch/ia64/Kconfig | 9 +--
> > arch/mips/Kconfig | 10 +--
> > arch/mips/mm/init.c | 14 +---
> > arch/powerpc/Kconfig | 17 +---
> > arch/powerpc/kernel/setup_64.c | 92 +--------------------
> > arch/riscv/Kconfig | 10 +--
> > arch/sparc/Kconfig | 12 +--
> > arch/sparc/kernel/smp_64.c | 105 +-----------------------
> > arch/x86/Kconfig | 17 +---
> > arch/x86/kernel/setup_percpu.c | 66 ++-------------
> > drivers/base/arch_numa.c | 68 +---------------
> > include/linux/percpu.h | 13 +--
> > mm/Kconfig | 12 +++
> > mm/percpu.c | 143 +++++++++++++++++++++++++--------
> > 15 files changed, 165 insertions(+), 443 deletions(-)
> >

Hi Kefang,

I apologize for the delay. It's a holiday week in the US + I had some
personal things come up at the beginning of last week. I'll have it
reviewed by tomorrow.

Thanks,
Dennis

2021-11-29 03:08:29

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton


On 2021/11/29 10:54, Dennis Zhou wrote:
> On Mon, Nov 29, 2021 at 10:51:18AM +0800, Kefeng Wang wrote:
>> Hi Dennis and all maintainers, any comments about the changes, many thanks.
>>
>> On 2021/11/21 17:35, Kefeng Wang wrote:
>>> When support page mapping percpu first chunk allocator on arm64, we
>>> found there are lots of duplicated codes in percpu embed/page first
>>> chunk allocator. This patchset is aimed to cleanup them and should
>>> no funciton change, only test on arm64.
>>>
>>> Kefeng Wang (4):
>>> mm: percpu: Generalize percpu related config
>>> mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
>>> mm: percpu: Add generic pcpu_fc_alloc/free funciton
>>> mm: percpu: Add generic pcpu_populate_pte() function
>>>
>>> arch/arm64/Kconfig | 20 +----
>>> arch/ia64/Kconfig | 9 +--
>>> arch/mips/Kconfig | 10 +--
>>> arch/mips/mm/init.c | 14 +---
>>> arch/powerpc/Kconfig | 17 +---
>>> arch/powerpc/kernel/setup_64.c | 92 +--------------------
>>> arch/riscv/Kconfig | 10 +--
>>> arch/sparc/Kconfig | 12 +--
>>> arch/sparc/kernel/smp_64.c | 105 +-----------------------
>>> arch/x86/Kconfig | 17 +---
>>> arch/x86/kernel/setup_percpu.c | 66 ++-------------
>>> drivers/base/arch_numa.c | 68 +---------------
>>> include/linux/percpu.h | 13 +--
>>> mm/Kconfig | 12 +++
>>> mm/percpu.c | 143 +++++++++++++++++++++++++--------
>>> 15 files changed, 165 insertions(+), 443 deletions(-)
>>>
> Hi Kefang,
>
> I apologize for the delay. It's a holiday week in the US + I had some
> personal things come up at the beginning of last week. I'll have it
> reviewed by tomorrow.
It's great to hear about your reply,  thanks.
>
> Thanks,
> Dennis
> .

2021-11-29 22:37:04

by Dennis Zhou

[permalink] [raw]
Subject: Re: [PATCH RFC 1/4] mm: percpu: Generalize percpu related config

Hello,

On Sun, Nov 21, 2021 at 05:35:54PM +0800, Kefeng Wang wrote:
> The HAVE_SETUP_PER_CPU_AREA/NEED_PER_CPU_EMBED_FIRST_CHUNK/
> NEED_PER_CPU_PAGE_FIRST_CHUNK/USE_PERCPU_NUMA_NODE_ID configs,
> which has duplicate definitions on platforms that subscribe it.
>
> Move them into mm, drop these redundant definitions and instead
> just select it on applicable platforms.
>
> Signed-off-by: Kefeng Wang <[email protected]>
> ---
> arch/arm64/Kconfig | 20 ++++----------------
> arch/ia64/Kconfig | 9 ++-------
> arch/mips/Kconfig | 10 ++--------
> arch/powerpc/Kconfig | 17 ++++-------------
> arch/riscv/Kconfig | 10 ++--------
> arch/sparc/Kconfig | 12 +++---------
> arch/x86/Kconfig | 17 ++++-------------
> mm/Kconfig | 12 ++++++++++++
> 8 files changed, 33 insertions(+), 74 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index c4207cf9bb17..4ff73299f8a9 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1135,6 +1135,10 @@ config NUMA
> select GENERIC_ARCH_NUMA
> select ACPI_NUMA if ACPI
> select OF_NUMA
> + select HAVE_SETUP_PER_CPU_AREA
> + select NEED_PER_CPU_EMBED_FIRST_CHUNK
> + select NEED_PER_CPU_PAGE_FIRST_CHUNK
> + select USE_PERCPU_NUMA_NODE_ID
> help
> Enable NUMA (Non-Uniform Memory Access) support.
>
> @@ -1151,22 +1155,6 @@ config NODES_SHIFT
> Specify the maximum number of NUMA Nodes available on the target
> system. Increases memory reserved to accommodate various tables.
>
> -config USE_PERCPU_NUMA_NODE_ID
> - def_bool y
> - depends on NUMA
> -
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool y
> - depends on NUMA
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> - def_bool y
> - depends on NUMA
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> - def_bool y
> - depends on NUMA
> -
> source "kernel/Kconfig.hz"
>
> config ARCH_SPARSEMEM_ENABLE
> diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
> index 1e33666fa679..703952819e10 100644
> --- a/arch/ia64/Kconfig
> +++ b/arch/ia64/Kconfig
> @@ -32,6 +32,7 @@ config IA64
> select HAVE_FTRACE_MCOUNT_RECORD
> select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
> select HAVE_FUNCTION_TRACER
> + select HAVE_SETUP_PER_CPU_AREA
> select TTY
> select HAVE_ARCH_TRACEHOOK
> select HAVE_VIRT_CPU_ACCOUNTING
> @@ -88,9 +89,6 @@ config GENERIC_CALIBRATE_DELAY
> bool
> default y
>
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool y
> -
> config DMI
> bool
> default y
> @@ -292,6 +290,7 @@ config NUMA
> bool "NUMA support"
> depends on !FLATMEM
> select SMP
> + select USE_PERCPU_NUMA_NODE_ID
> help
> Say Y to compile the kernel to support NUMA (Non-Uniform Memory
> Access). This option is for configuring high-end multiprocessor
> @@ -311,10 +310,6 @@ config HAVE_ARCH_NODEDATA_EXTENSION
> def_bool y
> depends on NUMA
>
> -config USE_PERCPU_NUMA_NODE_ID
> - def_bool y
> - depends on NUMA
> -
> config HAVE_MEMORYLESS_NODES
> def_bool NUMA
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index de60ad190057..c106a2080877 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -2666,6 +2666,8 @@ config NUMA
> bool "NUMA Support"
> depends on SYS_SUPPORTS_NUMA
> select SMP
> + select HAVE_SETUP_PER_CPU_AREA
> + select NEED_PER_CPU_EMBED_FIRST_CHUNK
> help
> Say Y to compile the kernel to support NUMA (Non-Uniform Memory
> Access). This option improves performance on systems with more
> @@ -2676,14 +2678,6 @@ config NUMA
> config SYS_SUPPORTS_NUMA
> bool
>
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool y
> - depends on NUMA
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> - def_bool y
> - depends on NUMA
> -
> config RELOCATABLE
> bool "Relocatable kernel"
> depends on SYS_SUPPORTS_RELOCATABLE
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index dea74d7717c0..8badd39854a0 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -55,15 +55,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
> default 9 if PPC_16K_PAGES # 9 = 23 (8MB) - 14 (16K)
> default 11 # 11 = 23 (8MB) - 12 (4K)
>
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool PPC64
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> - def_bool y if PPC64
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> - def_bool y if PPC64
> -
> config NR_IRQS
> int "Number of virtual interrupt numbers"
> range 32 1048576
> @@ -240,6 +231,7 @@ config PPC
> select HAVE_REGS_AND_STACK_ACCESS_API
> select HAVE_RELIABLE_STACKTRACE
> select HAVE_RSEQ
> + select HAVE_SETUP_PER_CPU_AREA if PPC64
> select HAVE_SOFTIRQ_ON_OWN_STACK
> select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
> select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
> @@ -254,6 +246,8 @@ config PPC
> select MMU_GATHER_RCU_TABLE_FREE
> select MODULES_USE_ELF_RELA
> select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
> + select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64
> + select NEED_PER_CPU_PAGE_FIRST_CHUNK if PPC64
> select NEED_SG_DMA_LENGTH
> select OF
> select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
> @@ -659,6 +653,7 @@ config NUMA
> bool "NUMA Memory Allocation and Scheduler Support"
> depends on PPC64 && SMP
> default y if PPC_PSERIES || PPC_POWERNV
> + select USE_PERCPU_NUMA_NODE_ID
> help
> Enable NUMA (Non-Uniform Memory Access) support.
>
> @@ -672,10 +667,6 @@ config NODES_SHIFT
> default "4"
> depends on NUMA
>
> -config USE_PERCPU_NUMA_NODE_ID
> - def_bool y
> - depends on NUMA
> -
> config HAVE_MEMORYLESS_NODES
> def_bool y
> depends on NUMA
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 821252b65f89..bf66bcbc5a39 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -334,6 +334,8 @@ config NUMA
> select GENERIC_ARCH_NUMA
> select OF_NUMA
> select ARCH_SUPPORTS_NUMA_BALANCING
> + select USE_PERCPU_NUMA_NODE_ID
> + select NEED_PER_CPU_EMBED_FIRST_CHUNK
> help
> Enable NUMA (Non-Uniform Memory Access) support.
>
> @@ -349,14 +351,6 @@ config NODES_SHIFT
> Specify the maximum number of NUMA Nodes available on the target
> system. Increases memory reserved to accommodate various tables.
>
> -config USE_PERCPU_NUMA_NODE_ID
> - def_bool y
> - depends on NUMA
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> - def_bool y
> - depends on NUMA
> -
> config RISCV_ISA_C
> bool "Emit compressed instructions when building Linux"
> default y
> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
> index 66fc08646be5..a6765e0fe6a8 100644
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -97,6 +97,9 @@ config SPARC64
> select PCI_DOMAINS if PCI
> select ARCH_HAS_GIGANTIC_PAGE
> select HAVE_SOFTIRQ_ON_OWN_STACK
> + select HAVE_SETUP_PER_CPU_AREA
> + select NEED_PER_CPU_EMBED_FIRST_CHUNK
> + select NEED_PER_CPU_PAGE_FIRST_CHUUNK
>
> config ARCH_PROC_KCORE_TEXT
> def_bool y
> @@ -123,15 +126,6 @@ config AUDIT_ARCH
> bool
> default y
>
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool y if SPARC64
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> - def_bool y if SPARC64
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> - def_bool y if SPARC64
> -
> config MMU
> bool
> default y
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 7399327d1eff..ca120a1f5857 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -239,6 +239,7 @@ config X86
> select HAVE_REGS_AND_STACK_ACCESS_API
> select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
> select HAVE_FUNCTION_ARG_ACCESS_API
> + select HAVE_SETUP_PER_CPU_AREA
> select HAVE_SOFTIRQ_ON_OWN_STACK
> select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
> select HAVE_STACK_VALIDATION if X86_64
> @@ -252,6 +253,8 @@ config X86
> select HAVE_GENERIC_VDSO
> select HOTPLUG_SMT if SMP
> select IRQ_FORCED_THREADING
> + select NEED_PER_CPU_EMBED_FIRST_CHUNK
> + select NEED_PER_CPU_PAGE_FIRST_CHUNK
> select NEED_SG_DMA_LENGTH
> select PCI_DOMAINS if PCI
> select PCI_LOCKLESS_CONFIG if PCI
> @@ -331,15 +334,6 @@ config ARCH_HAS_CPU_RELAX
> config ARCH_HAS_FILTER_PGPROT
> def_bool y
>
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool y
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> - def_bool y
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> - def_bool y
> -
> config ARCH_HIBERNATION_POSSIBLE
> def_bool y
>
> @@ -1557,6 +1551,7 @@ config NUMA
> depends on SMP
> depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
> default y if X86_BIGSMP
> + select USE_PERCPU_NUMA_NODE_ID
> help
> Enable NUMA (Non-Uniform Memory Access) support.
>
> @@ -2430,10 +2425,6 @@ config ARCH_HAS_ADD_PAGES
> config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
> def_bool y
>
> -config USE_PERCPU_NUMA_NODE_ID
> - def_bool y
> - depends on NUMA
> -
> menu "Power management and ACPI options"
>
> config ARCH_HIBERNATION_HEADER
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 28edafc820ad..6bc5d780c51b 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -432,6 +432,18 @@ config NEED_PER_CPU_KM
> bool
> default y
>
> +config NEED_PER_CPU_EMBED_FIRST_CHUNK
> + bool
> +
> +config NEED_PER_CPU_PAGE_FIRST_CHUNK
> + bool
> +
> +config USE_PERCPU_NUMA_NODE_ID
> + bool
> +
> +config HAVE_SETUP_PER_CPU_AREA
> + bool
> +
> config CLEANCACHE
> bool "Enable cleancache driver to cache clean pages if tmem is present"
> help
> --
> 2.26.2
>

This makes sense and looks good. A series like this is a little tricky.
The latter patches change the contracts so it'd be easiest to run it
through my tree. We'd need to get explicit acks from each arch
maintainer to make sure they're fine with this.

Thanks,
Dennis

2021-11-29 22:41:19

by Dennis Zhou

[permalink] [raw]
Subject: Re: [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef

On Sun, Nov 21, 2021 at 05:35:55PM +0800, Kefeng Wang wrote:
> Add pcpu_fc_cpu_to_node_fn_t and pass it into pcpu_fc_alloc_fn_t,
> pcpu first chunk allocation will call it to alloc memblock on the
> corresponding node by it.
>
> Signed-off-by: Kefeng Wang <[email protected]>
> ---
> arch/mips/mm/init.c | 12 +++++++++---
> arch/powerpc/kernel/setup_64.c | 14 +++++++++++---
> arch/sparc/kernel/smp_64.c | 8 +++++---
> arch/x86/kernel/setup_percpu.c | 18 +++++++++++++-----
> drivers/base/arch_numa.c | 8 +++++---
> include/linux/percpu.h | 7 +++++--
> mm/percpu.c | 14 +++++++++-----
> 7 files changed, 57 insertions(+), 24 deletions(-)
>
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index 325e1552cbea..ebbf6923532c 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -519,12 +519,17 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> return node_distance(cpu_to_node(from), cpu_to_node(to));
> }
>
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
> - size_t align)
> +static int __init pcpu_cpu_to_node(int cpu)
> +{
> + return cpu_to_node(cpu);
> +}
> +
> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> {
> return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
> MEMBLOCK_ALLOC_ACCESSIBLE,
> - cpu_to_node(cpu));
> + cpu_to_nd_fun(cpu));
> }
>
> static void __init pcpu_fc_free(void *ptr, size_t size)
> @@ -545,6 +550,7 @@ void __init setup_per_cpu_areas(void)
> rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
> pcpu_cpu_distance,
> + pcpu_cpu_to_node,
> pcpu_fc_alloc, pcpu_fc_free);
> if (rc < 0)
> panic("Failed to initialize percpu areas.");
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 6052f5d5ded3..9a5609c821df 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -771,6 +771,12 @@ void __init emergency_stack_init(void)
> }
>
> #ifdef CONFIG_SMP
> +
> +static __init int pcpu_cpu_to_node(int cpu)
> +{
> + return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
> +}
> +
> /**
> * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
> * @cpu: cpu to allocate for
> @@ -784,12 +790,12 @@ void __init emergency_stack_init(void)
> * RETURNS:
> * Pointer to the allocated area on success, NULL on failure.
> */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
> - size_t align)
> +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> {
> const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> #ifdef CONFIG_NUMA
> - int node = early_cpu_to_node(cpu);
> + int node = cpu_to_nd_fun(cpu);

^ typo - cpu_to_nd_fn().

> void *ptr;
>
> if (!node_online(node) || !NODE_DATA(node)) {
> @@ -891,6 +897,7 @@ void __init setup_per_cpu_areas(void)
>
> if (pcpu_chosen_fc != PCPU_FC_PAGE) {
> rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
> + pcpu_cpu_to_node,
> pcpu_alloc_bootmem, pcpu_free_bootmem);
> if (rc)
> pr_warn("PERCPU: %s allocator failed (%d), "
> @@ -900,6 +907,7 @@ void __init setup_per_cpu_areas(void)
>
> if (rc < 0)
> rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
> + pcpu_cpu_to_node,
> pcpu_populate_pte);
> if (rc < 0)
> panic("cannot initialize percpu area (err=%d)", rc);
> diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
> index b98a7bbe6728..026aa3ccbc30 100644
> --- a/arch/sparc/kernel/smp_64.c
> +++ b/arch/sparc/kernel/smp_64.c
> @@ -1539,12 +1539,12 @@ void smp_send_stop(void)
> * RETURNS:
> * Pointer to the allocated area on success, NULL on failure.
> */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
> - size_t align)
> +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> {
> const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> #ifdef CONFIG_NUMA
> - int node = cpu_to_node(cpu);
> + int node = cpu_to_nd_fn(cpu);
> void *ptr;
>
> if (!node_online(node) || !NODE_DATA(node)) {
> @@ -1641,6 +1641,7 @@ void __init setup_per_cpu_areas(void)
> rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> PERCPU_DYNAMIC_RESERVE, 4 << 20,
> pcpu_cpu_distance,
> + cpu_to_node,
> pcpu_alloc_bootmem,
> pcpu_free_bootmem);
> if (rc)
> @@ -1652,6 +1653,7 @@ void __init setup_per_cpu_areas(void)
> rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> pcpu_alloc_bootmem,
> pcpu_free_bootmem,
> + cpu_to_node,
> pcpu_populate_pte);
> if (rc < 0)
> panic("cannot initialize percpu area (err=%d)", rc);
> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
> index 7b65275544b2..bba4fa174a16 100644
> --- a/arch/x86/kernel/setup_percpu.c
> +++ b/arch/x86/kernel/setup_percpu.c
> @@ -97,12 +97,12 @@ static bool __init pcpu_need_numa(void)
> * RETURNS:
> * Pointer to the allocated area on success, NULL on failure.
> */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
> - unsigned long align)
> +static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, unsigned long align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> {
> const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> #ifdef CONFIG_NUMA
> - int node = early_cpu_to_node(cpu);
> + int node = cpu_to_nd_fn(cpu);
> void *ptr;
>
> if (!node_online(node) || !NODE_DATA(node)) {
> @@ -128,9 +128,10 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
> /*
> * Helpers for first chunk memory allocation
> */
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> {
> - return pcpu_alloc_bootmem(cpu, size, align);
> + return pcpu_alloc_bootmem(cpu, size, align, cpu_to_nd_fn);
> }
>
> static void __init pcpu_fc_free(void *ptr, size_t size)
> @@ -150,6 +151,11 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> #endif
> }
>
> +static int __init pcpu_cpu_to_node(int cpu)
> +{
> + return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
> +}
> +
> static void __init pcpup_populate_pte(unsigned long addr)
> {
> populate_extra_pte(addr);
> @@ -205,6 +211,7 @@ void __init setup_per_cpu_areas(void)
> rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
> dyn_size, atom_size,
> pcpu_cpu_distance,
> + pcpu_cpu_to_node,
> pcpu_fc_alloc, pcpu_fc_free);
> if (rc < 0)
> pr_warn("%s allocator failed (%d), falling back to page size\n",
> @@ -213,6 +220,7 @@ void __init setup_per_cpu_areas(void)
> if (rc < 0)
> rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
> pcpu_fc_alloc, pcpu_fc_free,
> + pcpu_cpu_to_node,
> pcpup_populate_pte);
> if (rc < 0)
> panic("cannot initialize percpu area (err=%d)", rc);
> diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
> index bc1876915457..273543d9ff85 100644
> --- a/drivers/base/arch_numa.c
> +++ b/drivers/base/arch_numa.c
> @@ -155,10 +155,10 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
> }
>
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
> - size_t align)
> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> {
> - int nid = early_cpu_to_node(cpu);
> + int nid = cpu_to_nd_fn(cpu);
>
> return memblock_alloc_try_nid(size, align,
> __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> @@ -229,6 +229,7 @@ void __init setup_per_cpu_areas(void)
> rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
> pcpu_cpu_distance,
> + early_cpu_to_node,
> pcpu_fc_alloc, pcpu_fc_free);
> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> if (rc < 0)
> @@ -242,6 +243,7 @@ void __init setup_per_cpu_areas(void)
> rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> pcpu_fc_alloc,
> pcpu_fc_free,
> + early_cpu_to_node,
> pcpu_populate_pte);
> #endif
> if (rc < 0)
> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
> index ae4004e7957e..41bb54715b0c 100644
> --- a/include/linux/percpu.h
> +++ b/include/linux/percpu.h
> @@ -94,8 +94,9 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
>
> extern enum pcpu_fc pcpu_chosen_fc;
>
> -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
> - size_t align);
> +typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
> +typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
> typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
> typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
> typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
> @@ -111,6 +112,7 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
> extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> size_t atom_size,
> pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> pcpu_fc_alloc_fn_t alloc_fn,
> pcpu_fc_free_fn_t free_fn);
> #endif
> @@ -119,6 +121,7 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> extern int __init pcpu_page_first_chunk(size_t reserved_size,
> pcpu_fc_alloc_fn_t alloc_fn,
> pcpu_fc_free_fn_t free_fn,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> pcpu_fc_populate_pte_fn_t populate_pte_fn);
> #endif

Be consistent here. In pcpu_setup_first_chunk() you add the
cpu_to_node() before alloc()/free() and then in pcpu_embed_first_chunk()
you add it after. I'd prefer to add it before as to keep the
cpu_distance()/cpu_to_node() grouping.

>
> diff --git a/mm/percpu.c b/mm/percpu.c
> index f5b2c2ea5a54..3f6cf1ff0be2 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -3001,6 +3001,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
> * @dyn_size: minimum free size for dynamic allocation in bytes
> * @atom_size: allocation atom size
> * @cpu_distance_fn: callback to determine distance between cpus, optional
> + * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
> * @alloc_fn: function to allocate percpu page
> * @free_fn: function to free percpu page
> *
> @@ -3030,6 +3031,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
> int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> size_t atom_size,
> pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> pcpu_fc_alloc_fn_t alloc_fn,
> pcpu_fc_free_fn_t free_fn)
> {
> @@ -3066,7 +3068,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> BUG_ON(cpu == NR_CPUS);
>
> /* allocate space for the whole group */
> - ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
> + ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
> if (!ptr) {
> rc = -ENOMEM;
> goto out_free_areas;
> @@ -3145,6 +3147,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> * @reserved_size: the size of reserved percpu area in bytes
> * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
> * @free_fn: function to free percpu page, always called with PAGE_SIZE
> + * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
> * @populate_pte_fn: function to populate pte
> *
> * This is a helper to ease setting up page-remapped first percpu
> @@ -3159,6 +3162,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> int __init pcpu_page_first_chunk(size_t reserved_size,
> pcpu_fc_alloc_fn_t alloc_fn,
> pcpu_fc_free_fn_t free_fn,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> pcpu_fc_populate_pte_fn_t populate_pte_fn)
> {
> static struct vm_struct vm;
> @@ -3201,7 +3205,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
> for (i = 0; i < unit_pages; i++) {
> void *ptr;
>
> - ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
> + ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
> if (!ptr) {
> pr_warn("failed to allocate %s page for cpu%u\n",
> psize_str, cpu);
> @@ -3278,8 +3282,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
> unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> EXPORT_SYMBOL(__per_cpu_offset);
>
> -static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
> - size_t align)
> +static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> {
> return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
> }
> @@ -3300,7 +3304,7 @@ void __init setup_per_cpu_areas(void)
> * what the legacy allocator did.
> */
> rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> - PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
> + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, NULL,
> pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
> if (rc < 0)
> panic("Failed to initialize percpu areas.");
> --
> 2.26.2
>

Thanks,
Dennis

2021-11-29 22:46:15

by Dennis Zhou

[permalink] [raw]
Subject: Re: [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton

On Sun, Nov 21, 2021 at 05:35:56PM +0800, Kefeng Wang wrote:
> With previous patch, we could add a generic pcpu first chunk
> allocation and free function to cleanup the duplicated definations
> on each architecture.
>
> Signed-off-by: Kefeng Wang <[email protected]>
> ---
> arch/mips/mm/init.c | 16 +--------
> arch/powerpc/kernel/setup_64.c | 51 ++------------------------
> arch/sparc/kernel/smp_64.c | 50 +-------------------------
> arch/x86/kernel/setup_percpu.c | 59 +-----------------------------
> drivers/base/arch_numa.c | 19 +---------
> include/linux/percpu.h | 9 +----
> mm/percpu.c | 66 ++++++++++++++++++----------------
> 7 files changed, 42 insertions(+), 228 deletions(-)
>
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index ebbf6923532c..5a8002839550 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -524,19 +524,6 @@ static int __init pcpu_cpu_to_node(int cpu)
> return cpu_to_node(cpu);
> }
>
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> - return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
> - MEMBLOCK_ALLOC_ACCESSIBLE,
> - cpu_to_nd_fun(cpu));
> -}
> -
> -static void __init pcpu_fc_free(void *ptr, size_t size)
> -{
> - memblock_free(ptr, size);
> -}
> -
> void __init setup_per_cpu_areas(void)
> {
> unsigned long delta;
> @@ -550,8 +537,7 @@ void __init setup_per_cpu_areas(void)
> rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
> pcpu_cpu_distance,
> - pcpu_cpu_to_node,
> - pcpu_fc_alloc, pcpu_fc_free);
> + pcpu_cpu_to_node);
> if (rc < 0)
> panic("Failed to initialize percpu areas.");
>
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 9a5609c821df..364b1567f822 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -777,50 +777,6 @@ static __init int pcpu_cpu_to_node(int cpu)
> return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
> }
>
> -/**
> - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
> - * @cpu: cpu to allocate for
> - * @size: size allocation in bytes
> - * @align: alignment
> - *
> - * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
> - * does the right thing for NUMA regardless of the current
> - * configuration.
> - *
> - * RETURNS:
> - * Pointer to the allocated area on success, NULL on failure.
> - */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> - const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> -#ifdef CONFIG_NUMA
> - int node = cpu_to_nd_fun(cpu);
> - void *ptr;
> -
> - if (!node_online(node) || !NODE_DATA(node)) {
> - ptr = memblock_alloc_from(size, align, goal);
> - pr_info("cpu %d has no node %d or node-local memory\n",
> - cpu, node);
> - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
> - cpu, size, __pa(ptr));
> - } else {
> - ptr = memblock_alloc_try_nid(size, align, goal,
> - MEMBLOCK_ALLOC_ACCESSIBLE, node);
> - pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
> - "%016lx\n", cpu, size, node, __pa(ptr));
> - }
> - return ptr;
> -#else
> - return memblock_alloc_from(size, align, goal);
> -#endif
> -}
> -
> -static void __init pcpu_free_bootmem(void *ptr, size_t size)
> -{
> - memblock_free(ptr, size);
> -}
> -
> static int pcpu_cpu_distance(unsigned int from, unsigned int to)
> {
> if (early_cpu_to_node(from) == early_cpu_to_node(to))
> @@ -897,8 +853,7 @@ void __init setup_per_cpu_areas(void)
>
> if (pcpu_chosen_fc != PCPU_FC_PAGE) {
> rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
> - pcpu_cpu_to_node,
> - pcpu_alloc_bootmem, pcpu_free_bootmem);
> + pcpu_cpu_to_node);
> if (rc)
> pr_warn("PERCPU: %s allocator failed (%d), "
> "falling back to page size\n",
> @@ -906,9 +861,7 @@ void __init setup_per_cpu_areas(void)
> }
>
> if (rc < 0)
> - rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
> - pcpu_cpu_to_node,
> - pcpu_populate_pte);
> + rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, pcpu_populate_pte);
> if (rc < 0)
> panic("cannot initialize percpu area (err=%d)", rc);
>
> diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
> index 026aa3ccbc30..198dadddb75d 100644
> --- a/arch/sparc/kernel/smp_64.c
> +++ b/arch/sparc/kernel/smp_64.c
> @@ -1526,50 +1526,6 @@ void smp_send_stop(void)
> smp_call_function(stop_this_cpu, NULL, 0);
> }
>
> -/**
> - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
> - * @cpu: cpu to allocate for
> - * @size: size allocation in bytes
> - * @align: alignment
> - *
> - * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
> - * does the right thing for NUMA regardless of the current
> - * configuration.
> - *
> - * RETURNS:
> - * Pointer to the allocated area on success, NULL on failure.
> - */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> - const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> -#ifdef CONFIG_NUMA
> - int node = cpu_to_nd_fn(cpu);
> - void *ptr;
> -
> - if (!node_online(node) || !NODE_DATA(node)) {
> - ptr = memblock_alloc_from(size, align, goal);
> - pr_info("cpu %d has no node %d or node-local memory\n",
> - cpu, node);
> - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
> - cpu, size, __pa(ptr));
> - } else {
> - ptr = memblock_alloc_try_nid(size, align, goal,
> - MEMBLOCK_ALLOC_ACCESSIBLE, node);
> - pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
> - "%016lx\n", cpu, size, node, __pa(ptr));
> - }
> - return ptr;
> -#else
> - return memblock_alloc_from(size, align, goal);
> -#endif
> -}
> -
> -static void __init pcpu_free_bootmem(void *ptr, size_t size)
> -{
> - memblock_free(ptr, size);
> -}
> -
> static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> {
> if (cpu_to_node(from) == cpu_to_node(to))
> @@ -1641,9 +1597,7 @@ void __init setup_per_cpu_areas(void)
> rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> PERCPU_DYNAMIC_RESERVE, 4 << 20,
> pcpu_cpu_distance,
> - cpu_to_node,
> - pcpu_alloc_bootmem,
> - pcpu_free_bootmem);
> + cpu_to_node);
> if (rc)
> pr_warn("PERCPU: %s allocator failed (%d), "
> "falling back to page size\n",
> @@ -1651,8 +1605,6 @@ void __init setup_per_cpu_areas(void)
> }
> if (rc < 0)
> rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> - pcpu_alloc_bootmem,
> - pcpu_free_bootmem,
> cpu_to_node,
> pcpu_populate_pte);
> if (rc < 0)
> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
> index bba4fa174a16..cd672bd46241 100644
> --- a/arch/x86/kernel/setup_percpu.c
> +++ b/arch/x86/kernel/setup_percpu.c
> @@ -84,61 +84,6 @@ static bool __init pcpu_need_numa(void)
> }
> #endif
>
> -/**
> - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
> - * @cpu: cpu to allocate for
> - * @size: size allocation in bytes
> - * @align: alignment
> - *
> - * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
> - * does the right thing for NUMA regardless of the current
> - * configuration.
> - *
> - * RETURNS:
> - * Pointer to the allocated area on success, NULL on failure.
> - */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, unsigned long align,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> - const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> -#ifdef CONFIG_NUMA
> - int node = cpu_to_nd_fn(cpu);
> - void *ptr;
> -
> - if (!node_online(node) || !NODE_DATA(node)) {
> - ptr = memblock_alloc_from(size, align, goal);
> - pr_info("cpu %d has no node %d or node-local memory\n",
> - cpu, node);
> - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
> - cpu, size, __pa(ptr));
> - } else {
> - ptr = memblock_alloc_try_nid(size, align, goal,
> - MEMBLOCK_ALLOC_ACCESSIBLE,
> - node);
> -
> - pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
> - cpu, size, node, __pa(ptr));
> - }
> - return ptr;
> -#else
> - return memblock_alloc_from(size, align, goal);
> -#endif
> -}
> -
> -/*
> - * Helpers for first chunk memory allocation
> - */
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> - return pcpu_alloc_bootmem(cpu, size, align, cpu_to_nd_fn);
> -}
> -
> -static void __init pcpu_fc_free(void *ptr, size_t size)
> -{
> - memblock_free(ptr, size);
> -}
> -
> static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> {
> #ifdef CONFIG_NUMA
> @@ -211,15 +156,13 @@ void __init setup_per_cpu_areas(void)
> rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
> dyn_size, atom_size,
> pcpu_cpu_distance,
> - pcpu_cpu_to_node,
> - pcpu_fc_alloc, pcpu_fc_free);
> + pcpu_cpu_to_node);
> if (rc < 0)
> pr_warn("%s allocator failed (%d), falling back to page size\n",
> pcpu_fc_names[pcpu_chosen_fc], rc);
> }
> if (rc < 0)
> rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
> - pcpu_fc_alloc, pcpu_fc_free,
> pcpu_cpu_to_node,
> pcpup_populate_pte);
> if (rc < 0)
> diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
> index 273543d9ff85..23a10cc36165 100644
> --- a/drivers/base/arch_numa.c
> +++ b/drivers/base/arch_numa.c
> @@ -155,20 +155,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
> }
>
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> - int nid = cpu_to_nd_fn(cpu);
> -
> - return memblock_alloc_try_nid(size, align,
> - __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> -}
> -
> -static void __init pcpu_fc_free(void *ptr, size_t size)
> -{
> - memblock_free(ptr, size);
> -}
> -
> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> static void __init pcpu_populate_pte(unsigned long addr)
> {
> @@ -229,8 +215,7 @@ void __init setup_per_cpu_areas(void)
> rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
> pcpu_cpu_distance,
> - early_cpu_to_node,
> - pcpu_fc_alloc, pcpu_fc_free);
> + early_cpu_to_node);
> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> if (rc < 0)
> pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
> @@ -241,8 +226,6 @@ void __init setup_per_cpu_areas(void)
> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> if (rc < 0)
> rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> - pcpu_fc_alloc,
> - pcpu_fc_free,
> early_cpu_to_node,
> pcpu_populate_pte);
> #endif
> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
> index 41bb54715b0c..d73c97ef4ff4 100644
> --- a/include/linux/percpu.h
> +++ b/include/linux/percpu.h
> @@ -95,9 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
> extern enum pcpu_fc pcpu_chosen_fc;
>
> typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
> -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
> -typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
> typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
> typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>
> @@ -112,15 +109,11 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
> extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> size_t atom_size,
> pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> - pcpu_fc_alloc_fn_t alloc_fn,
> - pcpu_fc_free_fn_t free_fn);
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
> #endif
>
> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> extern int __init pcpu_page_first_chunk(size_t reserved_size,
> - pcpu_fc_alloc_fn_t alloc_fn,
> - pcpu_fc_free_fn_t free_fn,
> pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> pcpu_fc_populate_pte_fn_t populate_pte_fn);
> #endif
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 3f6cf1ff0be2..efaa1cbaf73d 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -2992,6 +2992,30 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
>
> return ai;
> }
> +
> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> +{
> + const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> + int node = NUMA_NO_NODE;
> + void *ptr;
> +
> + if (cpu_to_nd_fn)
> + node = cpu_to_nd_fn(cpu);
> +
> + if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
> + ptr = memblock_alloc_from(size, align, goal);
> + } else {
> + ptr = memblock_alloc_try_nid(size, align, goal,
> + MEMBLOCK_ALLOC_ACCESSIBLE, node);
> + }
> + return ptr;
> +}

My preference here would be to keep this identical to the x86
implementation where we #ifdef CONFIG_NUMA.

> +
> +static void __init pcpu_fc_free(void *ptr, size_t size)
> +{
> + memblock_free(ptr, size);
> +}
> #endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
>
> #if defined(BUILD_EMBED_FIRST_CHUNK)
> @@ -3002,14 +3026,12 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
> * @atom_size: allocation atom size
> * @cpu_distance_fn: callback to determine distance between cpus, optional
> * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
> - * @alloc_fn: function to allocate percpu page
> - * @free_fn: function to free percpu page
> *
> * This is a helper to ease setting up embedded first percpu chunk and
> * can be called where pcpu_setup_first_chunk() is expected.
> *
> * If this function is used to setup the first chunk, it is allocated
> - * by calling @alloc_fn and used as-is without being mapped into
> + * by calling pcpu_fc_alloc and used as-is without being mapped into
> * vmalloc area. Allocations are always whole multiples of @atom_size
> * aligned to @atom_size.
> *
> @@ -3023,7 +3045,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
> * @dyn_size specifies the minimum dynamic area size.
> *
> * If the needed size is smaller than the minimum or specified unit
> - * size, the leftover is returned using @free_fn.
> + * size, the leftover is returned using pcpu_fc_free.
> *
> * RETURNS:
> * 0 on success, -errno on failure.
> @@ -3031,9 +3053,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
> int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> size_t atom_size,
> pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> - pcpu_fc_alloc_fn_t alloc_fn,
> - pcpu_fc_free_fn_t free_fn)
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> {
> void *base = (void *)ULONG_MAX;
> void **areas = NULL;
> @@ -3068,7 +3088,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> BUG_ON(cpu == NR_CPUS);
>
> /* allocate space for the whole group */
> - ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
> + ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
> if (!ptr) {
> rc = -ENOMEM;
> goto out_free_areas;
> @@ -3107,12 +3127,12 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
> if (gi->cpu_map[i] == NR_CPUS) {
> /* unused unit, free whole */
> - free_fn(ptr, ai->unit_size);
> + pcpu_fc_free(ptr, ai->unit_size);
> continue;
> }
> /* copy and return the unused part */
> memcpy(ptr, __per_cpu_load, ai->static_size);
> - free_fn(ptr + size_sum, ai->unit_size - size_sum);
> + pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
> }
> }
>
> @@ -3131,7 +3151,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> out_free_areas:
> for (group = 0; group < ai->nr_groups; group++)
> if (areas[group])
> - free_fn(areas[group],
> + pcpu_fc_free(areas[group],
> ai->groups[group].nr_units * ai->unit_size);
> out_free:
> pcpu_free_alloc_info(ai);
> @@ -3145,8 +3165,6 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> /**
> * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
> * @reserved_size: the size of reserved percpu area in bytes
> - * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
> - * @free_fn: function to free percpu page, always called with PAGE_SIZE
> * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
> * @populate_pte_fn: function to populate pte
> *
> @@ -3160,8 +3178,6 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> * 0 on success, -errno on failure.
> */
> int __init pcpu_page_first_chunk(size_t reserved_size,
> - pcpu_fc_alloc_fn_t alloc_fn,
> - pcpu_fc_free_fn_t free_fn,
> pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> pcpu_fc_populate_pte_fn_t populate_pte_fn)
> {
> @@ -3205,7 +3221,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
> for (i = 0; i < unit_pages; i++) {
> void *ptr;
>
> - ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
> + ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
> if (!ptr) {
> pr_warn("failed to allocate %s page for cpu%u\n",
> psize_str, cpu);
> @@ -3257,7 +3273,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
>
> enomem:
> while (--j >= 0)
> - free_fn(page_address(pages[j]), PAGE_SIZE);
> + pcpu_fc_free(page_address(pages[j]), PAGE_SIZE);
> rc = -ENOMEM;
> out_free_ar:
> memblock_free(pages, pages_size);
> @@ -3282,17 +3298,6 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
> unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> EXPORT_SYMBOL(__per_cpu_offset);
>
> -static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> - return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
> -}
> -
> -static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
> -{
> - memblock_free(ptr, size);
> -}
> -
> void __init setup_per_cpu_areas(void)
> {
> unsigned long delta;
> @@ -3303,9 +3308,8 @@ void __init setup_per_cpu_areas(void)
> * Always reserve area for module percpu variables. That's
> * what the legacy allocator did.
> */
> - rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> - PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, NULL,
> - pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
> + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE,
> + PAGE_SIZE, NULL, NULL);
> if (rc < 0)
> panic("Failed to initialize percpu areas.");
>
> --
> 2.26.2
>

Overall this makes sense.

Thanks,
Dennis

2021-11-29 22:50:49

by Dennis Zhou

[permalink] [raw]
Subject: Re: [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function

On Sun, Nov 21, 2021 at 05:35:57PM +0800, Kefeng Wang wrote:
> When NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to
> populate pte, add a generic pcpu populate pte function and switch
> to use it.
>
> Signed-off-by: Kefeng Wang <[email protected]>
> ---
> arch/powerpc/kernel/setup_64.c | 47 +--------------------
> arch/sparc/kernel/smp_64.c | 57 +------------------------
> arch/x86/kernel/setup_percpu.c | 5 +--
> drivers/base/arch_numa.c | 51 +---------------------
> include/linux/percpu.h | 5 +--
> mm/percpu.c | 77 +++++++++++++++++++++++++++++++---
> 6 files changed, 79 insertions(+), 163 deletions(-)
>
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 364b1567f822..1a17828af77f 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -788,51 +788,6 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
> unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> EXPORT_SYMBOL(__per_cpu_offset);
>
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> - pgd_t *pgd = pgd_offset_k(addr);
> - p4d_t *p4d;
> - pud_t *pud;
> - pmd_t *pmd;
> -
> - p4d = p4d_offset(pgd, addr);
> - if (p4d_none(*p4d)) {
> - pud_t *new;
> -
> - new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
> - if (!new)
> - goto err_alloc;
> - p4d_populate(&init_mm, p4d, new);
> - }
> -
> - pud = pud_offset(p4d, addr);
> - if (pud_none(*pud)) {
> - pmd_t *new;
> -
> - new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pud_populate(&init_mm, pud, new);
> - }
> -
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_present(*pmd)) {
> - pte_t *new;
> -
> - new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pmd_populate_kernel(&init_mm, pmd, new);
> - }
> -
> - return;
> -
> -err_alloc:
> - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -
> -
> void __init setup_per_cpu_areas(void)
> {
> const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
> @@ -861,7 +816,7 @@ void __init setup_per_cpu_areas(void)
> }
>
> if (rc < 0)
> - rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, pcpu_populate_pte);
> + rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
> if (rc < 0)
> panic("cannot initialize percpu area (err=%d)", rc);
>
> diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
> index 198dadddb75d..00dffe2d834b 100644
> --- a/arch/sparc/kernel/smp_64.c
> +++ b/arch/sparc/kernel/smp_64.c
> @@ -1534,59 +1534,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> return REMOTE_DISTANCE;
> }
>
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> - pgd_t *pgd = pgd_offset_k(addr);
> - p4d_t *p4d;
> - pud_t *pud;
> - pmd_t *pmd;
> -
> - if (pgd_none(*pgd)) {
> - pud_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pgd_populate(&init_mm, pgd, new);
> - }
> -
> - p4d = p4d_offset(pgd, addr);
> - if (p4d_none(*p4d)) {
> - pud_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - p4d_populate(&init_mm, p4d, new);
> - }
> -
> - pud = pud_offset(p4d, addr);
> - if (pud_none(*pud)) {
> - pmd_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pud_populate(&init_mm, pud, new);
> - }
> -
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_present(*pmd)) {
> - pte_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pmd_populate_kernel(&init_mm, pmd, new);
> - }
> -
> - return;
> -
> -err_alloc:
> - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -
> void __init setup_per_cpu_areas(void)
> {
> unsigned long delta;
> @@ -1604,9 +1551,7 @@ void __init setup_per_cpu_areas(void)
> pcpu_fc_names[pcpu_chosen_fc], rc);
> }
> if (rc < 0)
> - rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> - cpu_to_node,
> - pcpu_populate_pte);
> + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, cpu_to_node);
> if (rc < 0)
> panic("cannot initialize percpu area (err=%d)", rc);
>
> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
> index cd672bd46241..4eadbe45078e 100644
> --- a/arch/x86/kernel/setup_percpu.c
> +++ b/arch/x86/kernel/setup_percpu.c
> @@ -101,7 +101,7 @@ static int __init pcpu_cpu_to_node(int cpu)
> return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
> }
>
> -static void __init pcpup_populate_pte(unsigned long addr)
> +void __init pcpu_populate_pte(unsigned long addr)
> {
> populate_extra_pte(addr);
> }
> @@ -163,8 +163,7 @@ void __init setup_per_cpu_areas(void)
> }
> if (rc < 0)
> rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
> - pcpu_cpu_to_node,
> - pcpup_populate_pte);
> + pcpu_cpu_to_node);

x86 has it's own implementation that differs for 32 bit. I'm not
confident this is correct to drop in as a replacement for x86, so I'd
prefer to keep populate_pte_fn() around.

> if (rc < 0)
> panic("cannot initialize percpu area (err=%d)", rc);
>
> diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
> index 23a10cc36165..eaa31e567d1e 100644
> --- a/drivers/base/arch_numa.c
> +++ b/drivers/base/arch_numa.c
> @@ -14,7 +14,6 @@
> #include <linux/of.h>
>
> #include <asm/sections.h>
> -#include <asm/pgalloc.h>
>
> struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
> EXPORT_SYMBOL(node_data);
> @@ -155,52 +154,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
> }
>
> -#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> - pgd_t *pgd = pgd_offset_k(addr);
> - p4d_t *p4d;
> - pud_t *pud;
> - pmd_t *pmd;
> -
> - p4d = p4d_offset(pgd, addr);
> - if (p4d_none(*p4d)) {
> - pud_t *new;
> -
> - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - p4d_populate(&init_mm, p4d, new);
> - }
> -
> - pud = pud_offset(p4d, addr);
> - if (pud_none(*pud)) {
> - pmd_t *new;
> -
> - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pud_populate(&init_mm, pud, new);
> - }
> -
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_present(*pmd)) {
> - pte_t *new;
> -
> - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pmd_populate_kernel(&init_mm, pmd, new);
> - }
> -
> - return;
> -
> -err_alloc:
> - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -#endif
> -
> void __init setup_per_cpu_areas(void)
> {
> unsigned long delta;
> @@ -225,9 +178,7 @@ void __init setup_per_cpu_areas(void)
>
> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> if (rc < 0)
> - rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> - early_cpu_to_node,
> - pcpu_populate_pte);
> + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
> #endif
> if (rc < 0)
> panic("Failed to initialize percpu areas (err=%d).", rc);
> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
> index d73c97ef4ff4..f1ec5ad1351c 100644
> --- a/include/linux/percpu.h
> +++ b/include/linux/percpu.h
> @@ -95,7 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
> extern enum pcpu_fc pcpu_chosen_fc;
>
> typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
> -typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
> typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>
> extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
> @@ -113,9 +112,9 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> #endif
>
> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> +void __init pcpu_populate_pte(unsigned long addr);
> extern int __init pcpu_page_first_chunk(size_t reserved_size,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> - pcpu_fc_populate_pte_fn_t populate_pte_fn);
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
> #endif
>
> extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
> diff --git a/mm/percpu.c b/mm/percpu.c
> index efaa1cbaf73d..d907daed04eb 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -3162,11 +3162,80 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> #endif /* BUILD_EMBED_FIRST_CHUNK */
>
> #ifdef BUILD_PAGE_FIRST_CHUNK
> +#include <asm/pgalloc.h>
> +
> +#ifndef P4D_TABLE_SIZE
> +#define P4D_TABLE_SIZE PAGE_SIZE
> +#endif
> +
> +#ifndef PUD_TABLE_SIZE
> +#define PUD_TABLE_SIZE PAGE_SIZE
> +#endif
> +
> +#ifndef PMD_TABLE_SIZE
> +#define PMD_TABLE_SIZE PAGE_SIZE
> +#endif
> +
> +#ifndef PTE_TABLE_SIZE
> +#define PTE_TABLE_SIZE PAGE_SIZE
> +#endif
> +void __init __weak pcpu_populate_pte(unsigned long addr)
> +{
> + pgd_t *pgd = pgd_offset_k(addr);
> + p4d_t *p4d;
> + pud_t *pud;
> + pmd_t *pmd;
> +
> + if (pgd_none(*pgd)) {
> + p4d_t *new;
> +
> + new = memblock_alloc_from(P4D_TABLE_SIZE, P4D_TABLE_SIZE, PAGE_SIZE);

It's unnecessary to specify a min_addr to memblock_alloc_from() as it
won't allocate 0 anyway. So please use memblock_alloc() instead.

> + if (!new)
> + goto err_alloc;
> + pgd_populate(&init_mm, pgd, new);
> + }
> +
> + p4d = p4d_offset(pgd, addr);
> + if (p4d_none(*p4d)) {
> + pud_t *new;
> +
> + new = memblock_alloc_from(PUD_TABLE_SIZE, PUD_TABLE_SIZE, PAGE_SIZE);

See above.

> + if (!new)
> + goto err_alloc;
> + p4d_populate(&init_mm, p4d, new);
> + }
> +
> + pud = pud_offset(p4d, addr);
> + if (pud_none(*pud)) {
> + pmd_t *new;
> +
> + new = memblock_alloc_from(PMD_TABLE_SIZE, PMD_TABLE_SIZE, PAGE_SIZE);

See above.

> + if (!new)
> + goto err_alloc;
> + pud_populate(&init_mm, pud, new);
> + }
> +
> + pmd = pmd_offset(pud, addr);
> + if (!pmd_present(*pmd)) {
> + pte_t *new;
> +
> + new = memblock_alloc_from(PTE_TABLE_SIZE, PTE_TABLE_SIZE, PAGE_SIZE);

See above.

> + if (!new)
> + goto err_alloc;
> + pmd_populate_kernel(&init_mm, pmd, new);
> + }
> +
> + return;
> +
> +err_alloc:
> + panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> + __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> +}
> +
> /**
> * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
> * @reserved_size: the size of reserved percpu area in bytes
> * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
> - * @populate_pte_fn: function to populate pte
> *
> * This is a helper to ease setting up page-remapped first percpu
> * chunk and can be called where pcpu_setup_first_chunk() is expected.
> @@ -3177,9 +3246,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> * RETURNS:
> * 0 on success, -errno on failure.
> */
> -int __init pcpu_page_first_chunk(size_t reserved_size,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> - pcpu_fc_populate_pte_fn_t populate_pte_fn)
> +int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> {
> static struct vm_struct vm;
> struct pcpu_alloc_info *ai;
> @@ -3243,7 +3310,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
> (unsigned long)vm.addr + unit * ai->unit_size;
>
> for (i = 0; i < unit_pages; i++)
> - populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
> + pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));
>
> /* pte already populated, the following shouldn't fail */
> rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
> --
> 2.26.2
>

2021-11-29 22:56:03

by Dennis Zhou

[permalink] [raw]
Subject: Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton

Hello,

On Sun, Nov 21, 2021 at 05:35:53PM +0800, Kefeng Wang wrote:
> When support page mapping percpu first chunk allocator on arm64, we
> found there are lots of duplicated codes in percpu embed/page first
> chunk allocator. This patchset is aimed to cleanup them and should
> no funciton change, only test on arm64.
>
> Kefeng Wang (4):
> mm: percpu: Generalize percpu related config
> mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
> mm: percpu: Add generic pcpu_fc_alloc/free funciton
> mm: percpu: Add generic pcpu_populate_pte() function
>
> arch/arm64/Kconfig | 20 +----
> arch/ia64/Kconfig | 9 +--
> arch/mips/Kconfig | 10 +--
> arch/mips/mm/init.c | 14 +---
> arch/powerpc/Kconfig | 17 +---
> arch/powerpc/kernel/setup_64.c | 92 +--------------------
> arch/riscv/Kconfig | 10 +--
> arch/sparc/Kconfig | 12 +--
> arch/sparc/kernel/smp_64.c | 105 +-----------------------
> arch/x86/Kconfig | 17 +---
> arch/x86/kernel/setup_percpu.c | 66 ++-------------
> drivers/base/arch_numa.c | 68 +---------------
> include/linux/percpu.h | 13 +--
> mm/Kconfig | 12 +++
> mm/percpu.c | 143 +++++++++++++++++++++++++--------
> 15 files changed, 165 insertions(+), 443 deletions(-)
>
> --
> 2.26.2
>

I've made a few comments. I think this will be a little bit of a
challenge to get through due to it touching so many architectures. For
ease, it probably makes sense to run it through mny tree, but we'll need
explicit acks as I mentioned.

I like getting rid of the pcpu_alloc_bootmem()/pcpu_free_bootmem()
functions. However, let's keep the implementation identical to x86.


I don't think we should get rid of the populate_pte_fn(). I'm not
comfortable changing x86's implementation. Simply offer a NULL, and if
NULL use the default.

Do you have a tree that intel pulls? I suggest cleaning up the patches
and pushing to a remote branch that they pick up. That would have caught
the mips typo. Send a PR creating a file in [1] for your branch, github
is fine. Basic validation needs to be done before I can pick this up
too on more than arm64.

[1] https://github.com/intel/lkp-tests/tree/master/repo/linux

Thanks,
Dennis

2021-11-30 06:22:15

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH RFC 1/4] mm: percpu: Generalize percpu related config


On 2021/11/30 6:36, Dennis Zhou wrote:
> Hello,
>
> On Sun, Nov 21, 2021 at 05:35:54PM +0800, Kefeng Wang wrote:
>> The HAVE_SETUP_PER_CPU_AREA/NEED_PER_CPU_EMBED_FIRST_CHUNK/
>> NEED_PER_CPU_PAGE_FIRST_CHUNK/USE_PERCPU_NUMA_NODE_ID configs,
>> which has duplicate definitions on platforms that subscribe it.
>>
>> Move them into mm, drop these redundant definitions and instead
>> just select it on applicable platforms.
>>
>> Signed-off-by: Kefeng Wang <[email protected]>
>> ---
>> arch/arm64/Kconfig | 20 ++++----------------
>> arch/ia64/Kconfig | 9 ++-------
>> arch/mips/Kconfig | 10 ++--------
>> arch/powerpc/Kconfig | 17 ++++-------------
>> arch/riscv/Kconfig | 10 ++--------
>> arch/sparc/Kconfig | 12 +++---------
>> arch/x86/Kconfig | 17 ++++-------------
>> mm/Kconfig | 12 ++++++++++++
>> 8 files changed, 33 insertions(+), 74 deletions(-)
...
>>
> This makes sense and looks good. A series like this is a little tricky.
> The latter patches change the contracts so it'd be easiest to run it
> through my tree. We'd need to get explicit acks from each arch
> maintainer to make sure they're fine with this.

Got it, I will resend without RFC and hope to get ACKs from related arch
maintainers.


>
> Thanks,
> Dennis
> .

2021-11-30 06:22:29

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef


On 2021/11/30 6:40, Dennis Zhou wrote:
> On Sun, Nov 21, 2021 at 05:35:55PM +0800, Kefeng Wang wrote:
>> Add pcpu_fc_cpu_to_node_fn_t and pass it into pcpu_fc_alloc_fn_t,
>> pcpu first chunk allocation will call it to alloc memblock on the
>> corresponding node by it.
>>
>> Signed-off-by: Kefeng Wang <[email protected]>
>> ---
>> arch/mips/mm/init.c | 12 +++++++++---
>> arch/powerpc/kernel/setup_64.c | 14 +++++++++++---
>> arch/sparc/kernel/smp_64.c | 8 +++++---
>> arch/x86/kernel/setup_percpu.c | 18 +++++++++++++-----
>> drivers/base/arch_numa.c | 8 +++++---
>> include/linux/percpu.h | 7 +++++--
>> mm/percpu.c | 14 +++++++++-----
>> 7 files changed, 57 insertions(+), 24 deletions(-)
>>
>> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
...
>> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
>> index 6052f5d5ded3..9a5609c821df 100644
>> --- a/arch/powerpc/kernel/setup_64.c
>> +++ b/arch/powerpc/kernel/setup_64.c
>> @@ -771,6 +771,12 @@ void __init emergency_stack_init(void)
>> }
>>
>> #ifdef CONFIG_SMP
>> +
>> +static __init int pcpu_cpu_to_node(int cpu)
>> +{
>> + return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
>> +}
>> +
>> /**
>> * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
>> * @cpu: cpu to allocate for
>> @@ -784,12 +790,12 @@ void __init emergency_stack_init(void)
>> * RETURNS:
>> * Pointer to the allocated area on success, NULL on failure.
>> */
>> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
>> - size_t align)
>> +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
>> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>> {
>> const unsigned long goal = __pa(MAX_DMA_ADDRESS);
>> #ifdef CONFIG_NUMA
>> - int node = early_cpu_to_node(cpu);
>> + int node = cpu_to_nd_fun(cpu);
> ^ typo - cpu_to_nd_fn().

Will fix.

...

>> if (rc < 0)
>> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
>> index ae4004e7957e..41bb54715b0c 100644
>> --- a/include/linux/percpu.h
>> +++ b/include/linux/percpu.h
>> @@ -94,8 +94,9 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
>>
>> extern enum pcpu_fc pcpu_chosen_fc;
>>
>> -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
>> - size_t align);
>> +typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
>> +typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
>> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>> typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
>> typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
>> typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>> @@ -111,6 +112,7 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
>> extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>> size_t atom_size,
>> pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
>> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>> pcpu_fc_alloc_fn_t alloc_fn,
>> pcpu_fc_free_fn_t free_fn);
>> #endif
>> @@ -119,6 +121,7 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>> extern int __init pcpu_page_first_chunk(size_t reserved_size,
>> pcpu_fc_alloc_fn_t alloc_fn,
>> pcpu_fc_free_fn_t free_fn,
>> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>> pcpu_fc_populate_pte_fn_t populate_pte_fn);
>> #endif
> Be consistent here. In pcpu_setup_first_chunk() you add the
> cpu_to_node() before alloc()/free() and then in pcpu_embed_first_chunk()
> you add it after. I'd prefer to add it before as to keep the
> cpu_distance()/cpu_to_node() grouping.
Sure, will adjust the order.

2021-11-30 06:27:32

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton


On 2021/11/30 6:45, Dennis Zhou wrote:
> On Sun, Nov 21, 2021 at 05:35:56PM +0800, Kefeng Wang wrote:
>> With previous patch, we could add a generic pcpu first chunk
>> allocation and free function to cleanup the duplicated definations
>> on each architecture.
>>
>> Signed-off-by: Kefeng Wang <[email protected]>
>> ---
>> arch/mips/mm/init.c | 16 +--------
>> arch/powerpc/kernel/setup_64.c | 51 ++------------------------
>> arch/sparc/kernel/smp_64.c | 50 +-------------------------
>> arch/x86/kernel/setup_percpu.c | 59 +-----------------------------
>> drivers/base/arch_numa.c | 19 +---------
>> include/linux/percpu.h | 9 +----
>> mm/percpu.c | 66 ++++++++++++++++++----------------
>> 7 files changed, 42 insertions(+), 228 deletions(-)
...
>> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
>> index 41bb54715b0c..d73c97ef4ff4 100644
>> --- a/include/linux/percpu.h
>> +++ b/include/linux/percpu.h
>> @@ -95,9 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
>> extern enum pcpu_fc pcpu_chosen_fc;
>>
>> typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
>> -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
>> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>> -typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
>> typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
>> typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>>
>> @@ -112,15 +109,11 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
>> extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>> size_t atom_size,
>> pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
>> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>> - pcpu_fc_alloc_fn_t alloc_fn,
>> - pcpu_fc_free_fn_t free_fn);
>> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>> #endif
>>
>> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
>> extern int __init pcpu_page_first_chunk(size_t reserved_size,
>> - pcpu_fc_alloc_fn_t alloc_fn,
>> - pcpu_fc_free_fn_t free_fn,
>> pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>> pcpu_fc_populate_pte_fn_t populate_pte_fn);
>> #endif
>> diff --git a/mm/percpu.c b/mm/percpu.c
>> index 3f6cf1ff0be2..efaa1cbaf73d 100644
>> --- a/mm/percpu.c
>> +++ b/mm/percpu.c
>> @@ -2992,6 +2992,30 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
>>
>> return ai;
>> }
>> +
>> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
>> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>> +{
>> + const unsigned long goal = __pa(MAX_DMA_ADDRESS);
>> + int node = NUMA_NO_NODE;
>> + void *ptr;
>> +
>> + if (cpu_to_nd_fn)
>> + node = cpu_to_nd_fn(cpu);
>> +
>> + if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
>> + ptr = memblock_alloc_from(size, align, goal);
>> + } else {
>> + ptr = memblock_alloc_try_nid(size, align, goal,
>> + MEMBLOCK_ALLOC_ACCESSIBLE, node);
>> + }
>> + return ptr;
>> +}
> My preference here would be to keep this identical to the x86
> implementation where we #ifdef CONFIG_NUMA.

I will add back '#ifdef CONFIG_NUMA',  also will add back the
pr_debug/pr_info part as x86 does,

> Overall this makes sense.
>
> Thanks,
> Dennis
> .

2021-11-30 06:42:27

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function


On 2021/11/30 6:49, Dennis Zhou wrote:
> On Sun, Nov 21, 2021 at 05:35:57PM +0800, Kefeng Wang wrote:
>> When NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to
>> populate pte, add a generic pcpu populate pte function and switch
>> to use it.
>>
>> Signed-off-by: Kefeng Wang <[email protected]>
>> ---
>> arch/powerpc/kernel/setup_64.c | 47 +--------------------
>> arch/sparc/kernel/smp_64.c | 57 +------------------------
>> arch/x86/kernel/setup_percpu.c | 5 +--
>> drivers/base/arch_numa.c | 51 +---------------------
>> include/linux/percpu.h | 5 +--
>> mm/percpu.c | 77 +++++++++++++++++++++++++++++++---
>> 6 files changed, 79 insertions(+), 163 deletions(-)
>>
...
>> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
>> index cd672bd46241..4eadbe45078e 100644
>> --- a/arch/x86/kernel/setup_percpu.c
>> +++ b/arch/x86/kernel/setup_percpu.c
>> @@ -101,7 +101,7 @@ static int __init pcpu_cpu_to_node(int cpu)
>> return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
>> }
>>
>> -static void __init pcpup_populate_pte(unsigned long addr)
>> +void __init pcpu_populate_pte(unsigned long addr)
>> {
>> populate_extra_pte(addr);
>> }
>> @@ -163,8 +163,7 @@ void __init setup_per_cpu_areas(void)
>> }
>> if (rc < 0)
>> rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
>> - pcpu_cpu_to_node,
>> - pcpup_populate_pte);
>> + pcpu_cpu_to_node);
> x86 has it's own implementation that differs for 32 bit. I'm not
> confident this is correct to drop in as a replacement for x86, so I'd
> prefer to keep populate_pte_fn() around.


The x86's pcpup_populate_pte() version is not dropped.

We define a __weak pcpu_populate_pte function in mm/percpu.c,

and there is a own version on x86,  so no function change on x86.

I will add this into changelog,

arch/x86/kernel/setup_percpu.c:  void __init pcpu_populate_pte(unsigned
long addr)
include/linux/percpu.h:                 void __init
pcpu_populate_pte(unsigned long addr);
mm/percpu.c:                                void __init __weak
pcpu_populate_pte(unsigned long addr)
mm/percpu.c: pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));


>> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
>> index d73c97ef4ff4..f1ec5ad1351c 100644
>> --- a/include/linux/percpu.h
>> +++ b/include/linux/percpu.h
>> @@ -95,7 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
>> extern enum pcpu_fc pcpu_chosen_fc;
>>
>> typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
>> -typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
>> typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>>
>> extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
>> @@ -113,9 +112,9 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>> #endif
>>
>> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
>> +void __init pcpu_populate_pte(unsigned long addr);
>> extern int __init pcpu_page_first_chunk(size_t reserved_size,
>> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>> - pcpu_fc_populate_pte_fn_t populate_pte_fn);
>> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>> #endif
>>
>> extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
>> diff --git a/mm/percpu.c b/mm/percpu.c
>> index efaa1cbaf73d..d907daed04eb 100644
>> --- a/mm/percpu.c
>> +++ b/mm/percpu.c
>> @@ -3162,11 +3162,80 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>> #endif /* BUILD_EMBED_FIRST_CHUNK */
>>
...
>> +void __init __weak pcpu_populate_pte(unsigned long addr)
>> +{
>> + pgd_t *pgd = pgd_offset_k(addr);
>> + p4d_t *p4d;
>> + pud_t *pud;
>> + pmd_t *pmd;
>> +
>> + if (pgd_none(*pgd)) {
>> + p4d_t *new;
>> +
>> + new = memblock_alloc_from(P4D_TABLE_SIZE, P4D_TABLE_SIZE, PAGE_SIZE);
> It's unnecessary to specify a min_addr to memblock_alloc_from() as it
> won't allocate 0 anyway. So please use memblock_alloc() instead.

ok, will use memblock_alloc in this function


2021-11-30 06:53:39

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton


On 2021/11/30 6:55, Dennis Zhou wrote:
> Hello,
>
> On Sun, Nov 21, 2021 at 05:35:53PM +0800, Kefeng Wang wrote:
>> When support page mapping percpu first chunk allocator on arm64, we
>> found there are lots of duplicated codes in percpu embed/page first
>> chunk allocator. This patchset is aimed to cleanup them and should
>> no funciton change, only test on arm64.
>>
>> Kefeng Wang (4):
>> mm: percpu: Generalize percpu related config
>> mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
>> mm: percpu: Add generic pcpu_fc_alloc/free funciton
>> mm: percpu: Add generic pcpu_populate_pte() function
>>
>> arch/arm64/Kconfig | 20 +----
>> arch/ia64/Kconfig | 9 +--
>> arch/mips/Kconfig | 10 +--
>> arch/mips/mm/init.c | 14 +---
>> arch/powerpc/Kconfig | 17 +---
>> arch/powerpc/kernel/setup_64.c | 92 +--------------------
>> arch/riscv/Kconfig | 10 +--
>> arch/sparc/Kconfig | 12 +--
>> arch/sparc/kernel/smp_64.c | 105 +-----------------------
>> arch/x86/Kconfig | 17 +---
>> arch/x86/kernel/setup_percpu.c | 66 ++-------------
>> drivers/base/arch_numa.c | 68 +---------------
>> include/linux/percpu.h | 13 +--
>> mm/Kconfig | 12 +++
>> mm/percpu.c | 143 +++++++++++++++++++++++++--------
>> 15 files changed, 165 insertions(+), 443 deletions(-)
>>
>> --
>> 2.26.2
>>
> I've made a few comments. I think this will be a little bit of a
> challenge to get through due to it touching so many architectures. For
> ease, it probably makes sense to run it through mny tree, but we'll need
> explicit acks as I mentioned.
>
> I like getting rid of the pcpu_alloc_bootmem()/pcpu_free_bootmem()
> functions. However, let's keep the implementation identical to x86.
ok , will change patch3 in v2
>
>
> I don't think we should get rid of the populate_pte_fn(). I'm not
> comfortable changing x86's implementation. Simply offer a NULL, and if
> NULL use the default.

As replied in patch4, we use __weak method, and x86's implementation is

not changed in patch4, is this ok?

>
> Do you have a tree that intel pulls? I suggest cleaning up the patches
> and pushing to a remote branch that they pick up. That would have caught
> the mips typo. Send a PR creating a file in [1] for your branch, github
> is fine. Basic validation needs to be done before I can pick this up
> too on more than arm64.

Ok, x86/arm64/riscv are tested, but I don't has ppc/mips/sparc compliler.

I will try to push new version into github and test by lkp.

Thanks.

>
> [1] https://github.com/intel/lkp-tests/tree/master/repo/linux
>
> Thanks,
> Dennis
> .

2021-12-03 18:54:50

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH RFC 1/4] mm: percpu: Generalize percpu related config

On Sun, Nov 21, 2021 at 05:35:54PM +0800, Kefeng Wang wrote:
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index c4207cf9bb17..4ff73299f8a9 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1135,6 +1135,10 @@ config NUMA
> select GENERIC_ARCH_NUMA
> select ACPI_NUMA if ACPI
> select OF_NUMA
> + select HAVE_SETUP_PER_CPU_AREA
> + select NEED_PER_CPU_EMBED_FIRST_CHUNK
> + select NEED_PER_CPU_PAGE_FIRST_CHUNK
> + select USE_PERCPU_NUMA_NODE_ID
> help
> Enable NUMA (Non-Uniform Memory Access) support.
>
> @@ -1151,22 +1155,6 @@ config NODES_SHIFT
> Specify the maximum number of NUMA Nodes available on the target
> system. Increases memory reserved to accommodate various tables.
>
> -config USE_PERCPU_NUMA_NODE_ID
> - def_bool y
> - depends on NUMA
> -
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool y
> - depends on NUMA
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> - def_bool y
> - depends on NUMA
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> - def_bool y
> - depends on NUMA
> -
> source "kernel/Kconfig.hz"
>
> config ARCH_SPARSEMEM_ENABLE

For arm64:

Acked-by: Catalin Marinas <[email protected]>