This series is in order to enable sparse-vmemmap for LoongArch. But
LoongArch cannot use generic helpers directly because MIPS&LoongArch
need to call pgd_init()/pud_init()/pmd_init() when populating page
tables. So we adjust the prototypes of p?d_init() to make generic
helpers can call them, then enable sparse-vmemmap with generic helpers,
and to be further, generalise vmemmap_populate_hugepages() for ARM64,
X86 and LoongArch.
V1 -> V2:
Split ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP to a separate patch.
V2 -> V3:
1, Change the Signed-off-by order of author and committer;
2, Update commit message about the build error on LoongArch.
V3 -> V4:
Change pmd to pmdp for ARM64 for consistency.
Huacai Chen and Feiyang Chen(4):
MIPS&LoongArch: Adjust prototypes of p?d_init().
LoongArch: Add sparse memory vmemmap support.
mm/sparse-vmemmap: Generalise vmemmap_populate_hugepages().
LoongArch: Enable ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP.
Signed-off-by: Huacai Chen <[email protected]>
Signed-off-by: Feiyang Chen <[email protected]>
---
arch/arm64/mm/mmu.c | 53 ++++++--------------
arch/loongarch/Kconfig | 2 +
arch/loongarch/include/asm/pgalloc.h | 13 +----
arch/loongarch/include/asm/pgtable.h | 13 +++--
arch/loongarch/include/asm/sparsemem.h | 8 +++
arch/loongarch/kernel/numa.c | 4 +-
arch/loongarch/mm/init.c | 44 +++++++++++++++-
arch/loongarch/mm/pgtable.c | 23 +++++----
arch/mips/include/asm/pgalloc.h | 8 +--
arch/mips/include/asm/pgtable-64.h | 8 +--
arch/mips/kvm/mmu.c | 3 +-
arch/mips/mm/pgtable-32.c | 10 ++--
arch/mips/mm/pgtable-64.c | 18 ++++---
arch/mips/mm/pgtable.c | 2 +-
arch/x86/mm/init_64.c | 92 ++++++++++++----------------------
include/linux/mm.h | 8 +++
include/linux/page-flags.h | 1 +
mm/sparse-vmemmap.c | 64 +++++++++++++++++++++++
18 files changed, 222 insertions(+), 152 deletions(-)
--
2.27.0
From: Feiyang Chen <[email protected]>
Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can
share its implementation.
Signed-off-by: Feiyang Chen <[email protected]>
Signed-off-by: Huacai Chen <[email protected]>
---
arch/arm64/mm/mmu.c | 53 ++++++-----------------
arch/loongarch/mm/init.c | 63 ++++++++-------------------
arch/x86/mm/init_64.c | 92 ++++++++++++++--------------------------
include/linux/mm.h | 6 +++
mm/sparse-vmemmap.c | 54 +++++++++++++++++++++++
5 files changed, 124 insertions(+), 144 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 626ec32873c6..b080a65c719d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1158,49 +1158,24 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return vmemmap_populate_basepages(start, end, node, altmap);
}
#else /* !ARM64_KERNEL_USES_PMD_MAPS */
+void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
+ unsigned long addr, unsigned long next)
+{
+ pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr,
+ unsigned long next)
+{
+ vmemmap_verify((pte_t *)pmdp, node, addr, next);
+ return 1;
+}
+
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
- unsigned long addr = start;
- unsigned long next;
- pgd_t *pgdp;
- p4d_t *p4dp;
- pud_t *pudp;
- pmd_t *pmdp;
-
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
- do {
- next = pmd_addr_end(addr, end);
-
- pgdp = vmemmap_pgd_populate(addr, node);
- if (!pgdp)
- return -ENOMEM;
-
- p4dp = vmemmap_p4d_populate(pgdp, addr, node);
- if (!p4dp)
- return -ENOMEM;
-
- pudp = vmemmap_pud_populate(p4dp, addr, node);
- if (!pudp)
- return -ENOMEM;
-
- pmdp = pmd_offset(pudp, addr);
- if (pmd_none(READ_ONCE(*pmdp))) {
- void *p = NULL;
-
- p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
- if (!p) {
- if (vmemmap_populate_basepages(addr, next, node, altmap))
- return -ENOMEM;
- continue;
- }
-
- pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
- } else
- vmemmap_verify((pte_t *)pmdp, node, addr, next);
- } while (addr = next, addr != end);
-
- return 0;
+ return vmemmap_populate_hugepages(start, end, node, altmap);
}
#endif /* !ARM64_KERNEL_USES_PMD_MAPS */
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 35128229fe46..3190b3cd52d1 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -158,52 +158,25 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
- int node, struct vmem_altmap *altmap)
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
{
- unsigned long addr = start;
- unsigned long next;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- for (addr = start; addr < end; addr = next) {
- next = pmd_addr_end(addr, end);
-
- pgd = vmemmap_pgd_populate(addr, node);
- if (!pgd)
- return -ENOMEM;
- p4d = vmemmap_p4d_populate(pgd, addr, node);
- if (!p4d)
- return -ENOMEM;
- pud = vmemmap_pud_populate(p4d, addr, node);
- if (!pud)
- return -ENOMEM;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- void *p = NULL;
-
- p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
- if (p) {
- pmd_t entry;
-
- entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
- pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
- set_pmd_at(&init_mm, addr, pmd, entry);
-
- continue;
- }
- } else if (pmd_val(*pmd) & _PAGE_HUGE) {
- vmemmap_verify((pte_t *)pmd, node, addr, next);
- continue;
- }
- if (vmemmap_populate_basepages(addr, next, node, NULL))
- return -ENOMEM;
- }
-
- return 0;
+ pmd_t entry;
+
+ entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
+ pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
+ set_pmd_at(&init_mm, addr, pmd, entry);
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ int huge = pmd_val(*pmd) & _PAGE_HUGE;
+
+ if (huge)
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+
+ return huge;
}
#if CONFIG_PGTABLE_LEVELS == 2
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 39c5246964a9..4911093ee2f3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1532,72 +1532,44 @@ static long __meminitdata addr_start, addr_end;
static void __meminitdata *p_start, *p_end;
static int __meminitdata node_start;
-static int __meminit vmemmap_populate_hugepages(unsigned long start,
- unsigned long end, int node, struct vmem_altmap *altmap)
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
{
- unsigned long addr;
- unsigned long next;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- for (addr = start; addr < end; addr = next) {
- next = pmd_addr_end(addr, end);
-
- pgd = vmemmap_pgd_populate(addr, node);
- if (!pgd)
- return -ENOMEM;
-
- p4d = vmemmap_p4d_populate(pgd, addr, node);
- if (!p4d)
- return -ENOMEM;
-
- pud = vmemmap_pud_populate(p4d, addr, node);
- if (!pud)
- return -ENOMEM;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- void *p;
-
- p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
- if (p) {
- pte_t entry;
-
- entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
- PAGE_KERNEL_LARGE);
- set_pmd(pmd, __pmd(pte_val(entry)));
+ pte_t entry;
+
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
+ PAGE_KERNEL_LARGE);
+ set_pmd(pmd, __pmd(pte_val(entry)));
+
+ /* check to see if we have contiguous blocks */
+ if (p_end != p || node_start != node) {
+ if (p_start)
+ pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+ addr_start, addr_end-1, p_start, p_end-1, node_start);
+ addr_start = addr;
+ node_start = node;
+ p_start = p;
+ }
- /* check to see if we have contiguous blocks */
- if (p_end != p || node_start != node) {
- if (p_start)
- pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
- addr_start, addr_end-1, p_start, p_end-1, node_start);
- addr_start = addr;
- node_start = node;
- p_start = p;
- }
+ addr_end = addr + PMD_SIZE;
+ p_end = p + PMD_SIZE;
- addr_end = addr + PMD_SIZE;
- p_end = p + PMD_SIZE;
+ if (!IS_ALIGNED(addr, PMD_SIZE) ||
+ !IS_ALIGNED(next, PMD_SIZE))
+ vmemmap_use_new_sub_pmd(addr, next);
+}
- if (!IS_ALIGNED(addr, PMD_SIZE) ||
- !IS_ALIGNED(next, PMD_SIZE))
- vmemmap_use_new_sub_pmd(addr, next);
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ int large = pmd_large(*pmd);
- continue;
- } else if (altmap)
- return -ENOMEM; /* no fallback */
- } else if (pmd_large(*pmd)) {
- vmemmap_verify((pte_t *)pmd, node, addr, next);
- vmemmap_use_sub_pmd(addr, next);
- continue;
- }
- if (vmemmap_populate_basepages(addr, next, node, NULL))
- return -ENOMEM;
+ if (pmd_large(*pmd)) {
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+ vmemmap_use_sub_pmd(addr, next);
}
- return 0;
+
+ return large;
}
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6ed6bc0a65f..894d59e1ffd6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3216,8 +3216,14 @@ struct vmem_altmap;
void *vmemmap_alloc_block_buf(unsigned long size, int node,
struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
+void vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next);
+int vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node, struct vmem_altmap *altmap);
+int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 33e2a1ceee72..6f2e40bb695d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
return vmemmap_populate_range(start, end, node, altmap, NULL);
}
+void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
+{
+}
+
+int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ return 0;
+}
+
+int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap)
+{
+ unsigned long addr;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ for (addr = start; addr < end; addr = next) {
+ next = pmd_addr_end(addr, end);
+
+ pgd = vmemmap_pgd_populate(addr, node);
+ if (!pgd)
+ return -ENOMEM;
+
+ p4d = vmemmap_p4d_populate(pgd, addr, node);
+ if (!p4d)
+ return -ENOMEM;
+
+ pud = vmemmap_pud_populate(p4d, addr, node);
+ if (!pud)
+ return -ENOMEM;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(READ_ONCE(*pmd))) {
+ void *p;
+
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+ if (p) {
+ vmemmap_set_pmd(pmd, p, node, addr, next);
+ continue;
+ } else if (altmap)
+ return -ENOMEM; /* no fallback */
+ } else if (vmemmap_check_pmd(pmd, node, addr, next))
+ continue;
+ if (vmemmap_populate_basepages(addr, next, node, altmap))
+ return -ENOMEM;
+ }
+ return 0;
+}
+
/*
* For compound pages bigger than section size (e.g. x86 1G compound
* pages with 2M subsection size) fill the rest of sections as tail
--
2.27.0
From: Feiyang Chen <[email protected]>
The feature of minimizing overhead of struct page associated with each
HugeTLB page is implemented on x86_64. However, the infrastructure of
this feature is already there, so just select ARCH_WANT_HUGETLB_PAGE_
OPTIMIZE_VMEMMAP is enough to enable this feature for LoongArch.
To avoid the following build error on LoongArch we should include linux/
static_key.h in page-flags.h.
In file included from ./include/linux/mmzone.h:22,
from ./include/linux/gfp.h:6,
from ./include/linux/mm.h:7,
from arch/loongarch/kernel/asm-offsets.c:9:
./include/linux/page-flags.h:208:1: warning: data definition has no
type or storage class
208 | DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
| ^~~~~~~~~~~~~~~~~~~~~~~~
./include/linux/page-flags.h:208:1: error: type defaults to 'int' in
declaration of 'DECLARE_STATIC_KEY_MAYBE' [-Werror=implicit-int]
./include/linux/page-flags.h:209:26: warning: parameter names (without
types) in function declaration
209 | hugetlb_optimize_vmemmap_key);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
./include/linux/page-flags.h: In function 'hugetlb_optimize_vmemmap_enabled':
./include/linux/page-flags.h:213:16: error: implicit declaration of
function 'static_branch_maybe' [-Werror=implicit-function-declaration]
213 | return static_branch_maybe(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
| ^~~~~~~~~~~~~~~~~~~
./include/linux/page-flags.h:213:36: error:
'CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON' undeclared (first
use in this function); did you mean
'CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP'?
213 | return static_branch_maybe(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
./include/linux/page-flags.h:213:36: note: each undeclared identifier
is reported only once for each function it appears in
./include/linux/page-flags.h:214:37: error:
'hugetlb_optimize_vmemmap_key' undeclared (first use in this
function); did you mean 'hugetlb_optimize_vmemmap_enabled'?
214 | &hugetlb_optimize_vmemmap_key);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
| hugetlb_optimize_vmemmap_enabled
Signed-off-by: Feiyang Chen <[email protected]>
Signed-off-by: Huacai Chen <[email protected]>
---
arch/loongarch/Kconfig | 1 +
include/linux/page-flags.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 55ab84fd70e5..8e56ca28165e 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -49,6 +49,7 @@ config LOONGARCH
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+ select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
select ARCH_WANTS_NO_INSTR
select BUILDTIME_TABLE_SORT
select COMMON_CLK
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e66f7aa3191d..28a53ac7aa3e 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/bug.h>
#include <linux/mmdebug.h>
+#include <linux/static_key.h>
#ifndef __GENERATING_BOUNDS_H
#include <linux/mm_types.h>
#include <generated/bounds.h>
--
2.27.0
From: Feiyang Chen <[email protected]>
Add sparse memory vmemmap support for LoongArch. SPARSEMEM_VMEMMAP
uses a virtually mapped memmap to optimise pfn_to_page and page_to_pfn
operations. This is the most efficient option when sufficient kernel
resources are available.
Signed-off-by: Min Zhou <[email protected]>
Signed-off-by: Feiyang Chen <[email protected]>
Signed-off-by: Huacai Chen <[email protected]>
---
arch/loongarch/Kconfig | 1 +
arch/loongarch/include/asm/pgtable.h | 5 +-
arch/loongarch/include/asm/sparsemem.h | 8 +++
arch/loongarch/mm/init.c | 71 +++++++++++++++++++++++++-
include/linux/mm.h | 2 +
mm/sparse-vmemmap.c | 10 ++++
6 files changed, 95 insertions(+), 2 deletions(-)
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index dc19cf3071ea..55ab84fd70e5 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -422,6 +422,7 @@ config ARCH_FLATMEM_ENABLE
config ARCH_SPARSEMEM_ENABLE
def_bool y
+ select SPARSEMEM_VMEMMAP_ENABLE
help
Say Y to support efficient handling of sparse physical memory,
for architectures which are either NUMA (Non-Uniform Memory Access)
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 9c811c3f7572..b701ec7a0309 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -92,7 +92,10 @@ extern unsigned long zero_page_mask;
#define VMALLOC_START MODULES_END
#define VMALLOC_END \
(vm_map_base + \
- min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE)
+ min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+
+#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
+#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h
index 3d18cdf1b069..a1e440f6bec7 100644
--- a/arch/loongarch/include/asm/sparsemem.h
+++ b/arch/loongarch/include/asm/sparsemem.h
@@ -11,6 +11,14 @@
#define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */
#define MAX_PHYSMEM_BITS 48
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
+#define VMEMMAP_SIZE 0
+#else
+#define VMEMMAP_SIZE (sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT)))
+#endif
+
+#include <linux/mm_types.h>
+
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 7094a68c9b83..35128229fe46 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -22,7 +22,7 @@
#include <linux/pfn.h>
#include <linux/hardirq.h>
#include <linux/gfp.h>
-#include <linux/initrd.h>
+#include <linux/hugetlb.h>
#include <linux/mmzone.h>
#include <asm/asm-offsets.h>
@@ -157,6 +157,75 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
#endif
#endif
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap)
+{
+ unsigned long addr = start;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ for (addr = start; addr < end; addr = next) {
+ next = pmd_addr_end(addr, end);
+
+ pgd = vmemmap_pgd_populate(addr, node);
+ if (!pgd)
+ return -ENOMEM;
+ p4d = vmemmap_p4d_populate(pgd, addr, node);
+ if (!p4d)
+ return -ENOMEM;
+ pud = vmemmap_pud_populate(p4d, addr, node);
+ if (!pud)
+ return -ENOMEM;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd)) {
+ void *p = NULL;
+
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
+ if (p) {
+ pmd_t entry;
+
+ entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
+ pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
+ set_pmd_at(&init_mm, addr, pmd, entry);
+
+ continue;
+ }
+ } else if (pmd_val(*pmd) & _PAGE_HUGE) {
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+ continue;
+ }
+ if (vmemmap_populate_basepages(addr, next, node, NULL))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+#if CONFIG_PGTABLE_LEVELS == 2
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+ return vmemmap_populate_basepages(start, end, node, NULL);
+}
+#else
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+ return vmemmap_populate_hugepages(start, end, node, NULL);
+}
+#endif
+
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+}
+#endif
+
/*
* Align swapper_pg_dir in to 64K, allows its address to be loaded
* with a single LUI instruction in the TLB handlers. If we used
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cf3d0d673f6b..f6ed6bc0a65f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3203,6 +3203,8 @@ void *sparse_buffer_alloc(unsigned long size);
struct page * __populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap);
+void pmd_init(void *addr);
+void pud_init(void *addr);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index f4fa61dbbee3..33e2a1ceee72 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -587,6 +587,10 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
return pmd;
}
+void __weak __meminit pmd_init(void *addr)
+{
+}
+
pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
{
pud_t *pud = pud_offset(p4d, addr);
@@ -594,11 +598,16 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
+ pmd_init(p);
pud_populate(&init_mm, pud, p);
}
return pud;
}
+void __weak __meminit pud_init(void *addr)
+{
+}
+
p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
{
p4d_t *p4d = p4d_offset(pgd, addr);
@@ -606,6 +615,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
+ pud_init(p);
p4d_populate(&init_mm, p4d, p);
}
return p4d;
--
2.27.0
On Mon, Jul 4, 2022 at 1:25 PM Huacai Chen <[email protected]> wrote:
> To avoid the following build error on LoongArch we should include linux/
> static_key.h in page-flags.h.
>
> In file included from ./include/linux/mmzone.h:22,
> from ./include/linux/gfp.h:6,
> from ./include/linux/mm.h:7,
> from arch/loongarch/kernel/asm-offsets.c:9:
> ./include/linux/page-flags.h:208:1: warning: data definition has no
> type or storage class
> 208 | DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
> | ^~~~~~~~~~~~~~~~~~~~~~~~
> ./include/linux/page-flags.h:208:1: error: type defaults to 'int' in
> declaration of 'DECLARE_STATIC_KEY_MAYBE' [-Werror=implicit-int]
> ./include/linux/page-flags.h:209:26: warning: parameter names (without
> types) in function declaration
I wonder if page_fixed_fake_head() should be moved out of line to avoid
this, it's already nontrivial here, and that would avoid the static key
in a central header.
Arnd
Hi, Arnd,
On Mon, Jul 4, 2022 at 8:18 PM Arnd Bergmann <[email protected]> wrote:
>
> On Mon, Jul 4, 2022 at 1:25 PM Huacai Chen <[email protected]> wrote:
> > To avoid the following build error on LoongArch we should include linux/
> > static_key.h in page-flags.h.
> >
> > In file included from ./include/linux/mmzone.h:22,
> > from ./include/linux/gfp.h:6,
> > from ./include/linux/mm.h:7,
> > from arch/loongarch/kernel/asm-offsets.c:9:
> > ./include/linux/page-flags.h:208:1: warning: data definition has no
> > type or storage class
> > 208 | DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
> > | ^~~~~~~~~~~~~~~~~~~~~~~~
> > ./include/linux/page-flags.h:208:1: error: type defaults to 'int' in
> > declaration of 'DECLARE_STATIC_KEY_MAYBE' [-Werror=implicit-int]
> > ./include/linux/page-flags.h:209:26: warning: parameter names (without
> > types) in function declaration
>
> I wonder if page_fixed_fake_head() should be moved out of line to avoid
> this, it's already nontrivial here, and that would avoid the static key
> in a central header.
I have some consideration here. I think both inline function and
static key are instruments to make things faster, in other words,
page_fixed_fake_head() is a performance critical function. If so, it
is not suitable to move it out of line.
Huacai
>
> Arnd
>
On Tue, Jul 5, 2022 at 9:51 AM Muchun Song <[email protected]> wrote:
>
> On Tue, Jul 5, 2022 at 2:22 PM Huacai Chen <[email protected]> wrote:
> >
> > Hi, Arnd,
> >
> > On Mon, Jul 4, 2022 at 8:18 PM Arnd Bergmann <[email protected]> wrote:
> > >
> > > On Mon, Jul 4, 2022 at 1:25 PM Huacai Chen <[email protected]> wrote:
> > > > To avoid the following build error on LoongArch we should include linux/
> > > > static_key.h in page-flags.h.
> > > >
> > > > In file included from ./include/linux/mmzone.h:22,
> > > > from ./include/linux/gfp.h:6,
> > > > from ./include/linux/mm.h:7,
> > > > from arch/loongarch/kernel/asm-offsets.c:9:
> > > > ./include/linux/page-flags.h:208:1: warning: data definition has no
> > > > type or storage class
> > > > 208 | DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
> > > > | ^~~~~~~~~~~~~~~~~~~~~~~~
> > > > ./include/linux/page-flags.h:208:1: error: type defaults to 'int' in
> > > > declaration of 'DECLARE_STATIC_KEY_MAYBE' [-Werror=implicit-int]
> > > > ./include/linux/page-flags.h:209:26: warning: parameter names (without
> > > > types) in function declaration
> > >
> > > I wonder if page_fixed_fake_head() should be moved out of line to avoid
> > > this, it's already nontrivial here, and that would avoid the static key
> > > in a central header.
> > I have some consideration here. I think both inline function and
> > static key are instruments to make things faster, in other words,
> > page_fixed_fake_head() is a performance critical function. If so, it
> > is not suitable to move it out of line.
>
> +1
>
> The static key is an optimization when HVO is disabled.
How about splitting up linux/page_flags.h so the static_key header is
only included
in those places that use one of the inline functions that depend on it?
Arnd
On Tue, Jul 5, 2022 at 2:22 PM Huacai Chen <[email protected]> wrote:
>
> Hi, Arnd,
>
> On Mon, Jul 4, 2022 at 8:18 PM Arnd Bergmann <[email protected]> wrote:
> >
> > On Mon, Jul 4, 2022 at 1:25 PM Huacai Chen <[email protected]> wrote:
> > > To avoid the following build error on LoongArch we should include linux/
> > > static_key.h in page-flags.h.
> > >
> > > In file included from ./include/linux/mmzone.h:22,
> > > from ./include/linux/gfp.h:6,
> > > from ./include/linux/mm.h:7,
> > > from arch/loongarch/kernel/asm-offsets.c:9:
> > > ./include/linux/page-flags.h:208:1: warning: data definition has no
> > > type or storage class
> > > 208 | DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
> > > | ^~~~~~~~~~~~~~~~~~~~~~~~
> > > ./include/linux/page-flags.h:208:1: error: type defaults to 'int' in
> > > declaration of 'DECLARE_STATIC_KEY_MAYBE' [-Werror=implicit-int]
> > > ./include/linux/page-flags.h:209:26: warning: parameter names (without
> > > types) in function declaration
> >
> > I wonder if page_fixed_fake_head() should be moved out of line to avoid
> > this, it's already nontrivial here, and that would avoid the static key
> > in a central header.
> I have some consideration here. I think both inline function and
> static key are instruments to make things faster, in other words,
> page_fixed_fake_head() is a performance critical function. If so, it
> is not suitable to move it out of line.
+1
The static key is an optimization when HVO is disabled.
Thanks.
>
> Huacai
> >
> > Arnd
> >
On Tue, Jul 5, 2022 at 4:06 PM Arnd Bergmann <[email protected]> wrote:
>
> On Tue, Jul 5, 2022 at 9:51 AM Muchun Song <[email protected]> wrote:
> >
> > On Tue, Jul 5, 2022 at 2:22 PM Huacai Chen <[email protected]> wrote:
> > >
> > > Hi, Arnd,
> > >
> > > On Mon, Jul 4, 2022 at 8:18 PM Arnd Bergmann <[email protected]> wrote:
> > > >
> > > > On Mon, Jul 4, 2022 at 1:25 PM Huacai Chen <[email protected]> wrote:
> > > > > To avoid the following build error on LoongArch we should include linux/
> > > > > static_key.h in page-flags.h.
> > > > >
> > > > > In file included from ./include/linux/mmzone.h:22,
> > > > > from ./include/linux/gfp.h:6,
> > > > > from ./include/linux/mm.h:7,
> > > > > from arch/loongarch/kernel/asm-offsets.c:9:
> > > > > ./include/linux/page-flags.h:208:1: warning: data definition has no
> > > > > type or storage class
> > > > > 208 | DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
> > > > > | ^~~~~~~~~~~~~~~~~~~~~~~~
> > > > > ./include/linux/page-flags.h:208:1: error: type defaults to 'int' in
> > > > > declaration of 'DECLARE_STATIC_KEY_MAYBE' [-Werror=implicit-int]
> > > > > ./include/linux/page-flags.h:209:26: warning: parameter names (without
> > > > > types) in function declaration
> > > >
> > > > I wonder if page_fixed_fake_head() should be moved out of line to avoid
> > > > this, it's already nontrivial here, and that would avoid the static key
> > > > in a central header.
> > > I have some consideration here. I think both inline function and
> > > static key are instruments to make things faster, in other words,
> > > page_fixed_fake_head() is a performance critical function. If so, it
> > > is not suitable to move it out of line.
> >
> > +1
> >
> > The static key is an optimization when HVO is disabled.
>
> How about splitting up linux/page_flags.h so the static_key header is
> only included
> in those places that use one of the inline functions that depend on it?
>
How about including the static key header in the scope of
CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP?
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 465ff35a8c00..4dd005ad43fc 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -205,6 +205,8 @@ enum pageflags {
#ifndef __GENERATING_BOUNDS_H
#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+#include <linux/static_key.h>
+
DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);
/*
On Tue, Jul 5, 2022 at 10:38 AM Muchun Song <[email protected]> wrote:
> On Tue, Jul 5, 2022 at 4:06 PM Arnd Bergmann <[email protected]> wrote:
> > On Tue, Jul 5, 2022 at 9:51 AM Muchun Song <[email protected]> wrote:
>
> How about including the static key header in the scope of
> CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP?
That helps a little, but it means we still pay for it on x86 and
arm64, which are the
most common architectures.
Arnd
On Tue, Jul 5, 2022 at 4:46 PM Arnd Bergmann <[email protected]> wrote:
>
> On Tue, Jul 5, 2022 at 10:38 AM Muchun Song <[email protected]> wrote:
> > On Tue, Jul 5, 2022 at 4:06 PM Arnd Bergmann <[email protected]> wrote:
> > > On Tue, Jul 5, 2022 at 9:51 AM Muchun Song <[email protected]> wrote:
> >
> > How about including the static key header in the scope of
> > CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP?
>
> That helps a little, but it means we still pay for it on x86 and
> arm64, which are the
> most common architectures.
Alright. Make sense.
>
> Arnd
On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
> From: Feiyang Chen <[email protected]>
>
> Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can
> share its implementation.
>
> Signed-off-by: Feiyang Chen <[email protected]>
> Signed-off-by: Huacai Chen <[email protected]>
> ---
> arch/arm64/mm/mmu.c | 53 ++++++-----------------
> arch/loongarch/mm/init.c | 63 ++++++++-------------------
> arch/x86/mm/init_64.c | 92 ++++++++++++++--------------------------
> include/linux/mm.h | 6 +++
> mm/sparse-vmemmap.c | 54 +++++++++++++++++++++++
> 5 files changed, 124 insertions(+), 144 deletions(-)
>
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 626ec32873c6..b080a65c719d 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -1158,49 +1158,24 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> return vmemmap_populate_basepages(start, end, node, altmap);
> }
> #else /* !ARM64_KERNEL_USES_PMD_MAPS */
> +void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
> + unsigned long addr, unsigned long next)
> +{
> + pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
> +}
> +
> +int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr,
> + unsigned long next)
> +{
> + vmemmap_verify((pte_t *)pmdp, node, addr, next);
> + return 1;
> +}
> +
> int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> struct vmem_altmap *altmap)
> {
> - unsigned long addr = start;
> - unsigned long next;
> - pgd_t *pgdp;
> - p4d_t *p4dp;
> - pud_t *pudp;
> - pmd_t *pmdp;
> -
> WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
> - do {
> - next = pmd_addr_end(addr, end);
> -
> - pgdp = vmemmap_pgd_populate(addr, node);
> - if (!pgdp)
> - return -ENOMEM;
> -
> - p4dp = vmemmap_p4d_populate(pgdp, addr, node);
> - if (!p4dp)
> - return -ENOMEM;
> -
> - pudp = vmemmap_pud_populate(p4dp, addr, node);
> - if (!pudp)
> - return -ENOMEM;
> -
> - pmdp = pmd_offset(pudp, addr);
> - if (pmd_none(READ_ONCE(*pmdp))) {
> - void *p = NULL;
> -
> - p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> - if (!p) {
> - if (vmemmap_populate_basepages(addr, next, node, altmap))
> - return -ENOMEM;
> - continue;
> - }
> -
> - pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
> - } else
> - vmemmap_verify((pte_t *)pmdp, node, addr, next);
> - } while (addr = next, addr != end);
> -
> - return 0;
> + return vmemmap_populate_hugepages(start, end, node, altmap);
> }
> #endif /* !ARM64_KERNEL_USES_PMD_MAPS */
I think the arm64 change is mostly ok (thanks!), but I have a question about
the core code you're introducing:
> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> index 33e2a1ceee72..6f2e40bb695d 100644
> --- a/mm/sparse-vmemmap.c
> +++ b/mm/sparse-vmemmap.c
> @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
> return vmemmap_populate_range(start, end, node, altmap, NULL);
> }
>
> +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
> + unsigned long addr, unsigned long next)
> +{
> +}
> +
> +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
> + unsigned long next)
> +{
> + return 0;
> +}
> +
> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> + int node, struct vmem_altmap *altmap)
> +{
> + unsigned long addr;
> + unsigned long next;
> + pgd_t *pgd;
> + p4d_t *p4d;
> + pud_t *pud;
> + pmd_t *pmd;
> +
> + for (addr = start; addr < end; addr = next) {
> + next = pmd_addr_end(addr, end);
> +
> + pgd = vmemmap_pgd_populate(addr, node);
> + if (!pgd)
> + return -ENOMEM;
> +
> + p4d = vmemmap_p4d_populate(pgd, addr, node);
> + if (!p4d)
> + return -ENOMEM;
> +
> + pud = vmemmap_pud_populate(p4d, addr, node);
> + if (!pud)
> + return -ENOMEM;
> +
> + pmd = pmd_offset(pud, addr);
> + if (pmd_none(READ_ONCE(*pmd))) {
> + void *p;
> +
> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> + if (p) {
> + vmemmap_set_pmd(pmd, p, node, addr, next);
> + continue;
> + } else if (altmap)
> + return -ENOMEM; /* no fallback */
Why do you return -ENOMEM if 'altmap' here? That seems to be different to
what we currently have on arm64 and it's not clear to me why we're happy
with an altmap for the pmd case, but not for the pte case.
Will
On Tue, 5 Jul 2022 at 16:45, Arnd Bergmann <[email protected]> wrote:
>
> On Tue, Jul 5, 2022 at 10:38 AM Muchun Song <[email protected]> wrote:
> > On Tue, Jul 5, 2022 at 4:06 PM Arnd Bergmann <[email protected]> wrote:
> > > On Tue, Jul 5, 2022 at 9:51 AM Muchun Song <[email protected]> wrote:
> >
> > How about including the static key header in the scope of
> > CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP?
>
> That helps a little, but it means we still pay for it on x86 and
> arm64, which are the
> most common architectures.
>
Hi, Arnd,
It seems that arm64 and x86 include static_key.h or jump_label.h in some
more basic header files, otherwise they would not compile successfully
when they including page-flags.h.
In file included from ./arch/arm64/include/asm/lse.h:13,
from ./arch/arm64/include/asm/cmpxchg.h:14,
from ./arch/arm64/include/asm/atomic.h:16,
from ./include/linux/atomic.h:7,
from ./include/asm-generic/bitops/atomic.h:5,
from ./arch/arm64/include/asm/bitops.h:25,
from ./include/linux/bitops.h:33,
from ./include/linux/log2.h:12,
from kernel/bounds.c:13:
./include/linux/jump_label.h:5:2: error: #error "Hi"
5 | #error "Hi"
In file included from ./arch/x86/include/asm/nospec-branch.h:6,
from ./arch/x86/include/asm/paravirt_types.h:40,
from ./arch/x86/include/asm/ptrace.h:97,
from ./arch/x86/include/asm/math_emu.h:5,
from ./arch/x86/include/asm/processor.h:13,
from ./arch/x86/include/asm/cpufeature.h:5,
from ./arch/x86/include/asm/thread_info.h:53,
from ./include/linux/thread_info.h:60,
from ./arch/x86/include/asm/preempt.h:7,
from ./include/linux/preempt.h:78,
from ./include/linux/spinlock.h:55,
from ./include/linux/mmzone.h:8,
from ./include/linux/gfp.h:6,
from ./include/linux/slab.h:15,
from ./include/linux/crypto.h:20,
from arch/x86/kernel/asm-offsets.c:9:
./include/linux/static_key.h:3:2: error: #error "Hi"
#error "Hi"
^~~~~
In file included from ./include/linux/tracepoint-defs.h:12,
from ./arch/x86/include/asm/msr.h:58,
from ./arch/x86/include/asm/processor.h:22,
from ./arch/x86/include/asm/cpufeature.h:5,
from ./arch/x86/include/asm/thread_info.h:53,
from ./include/linux/thread_info.h:60,
from ./arch/x86/include/asm/preempt.h:7,
from ./include/linux/preempt.h:78,
from ./include/linux/spinlock.h:55,
from ./include/linux/mmzone.h:8,
from ./include/linux/gfp.h:6,
from ./include/linux/slab.h:15,
from ./include/linux/crypto.h:20,
from arch/x86/kernel/asm-offsets.c:9:
./include/linux/static_key.h:3:2: error: #error "Hi"
#error "Hi"
^~~~~
In file included from ./include/linux/kasan-enabled.h:5,
from ./include/linux/kasan.h:6,
from ./include/linux/slab.h:140,
from ./include/linux/crypto.h:20,
from arch/x86/kernel/asm-offsets.c:9:
./include/linux/static_key.h:3:2: error: #error "Hi"
#error "Hi"
^~~~~
In file included from ./include/linux/kasan.h:8,
from ./include/linux/slab.h:140,
from ./include/linux/crypto.h:20,
from arch/x86/kernel/asm-offsets.c:9:
./include/linux/static_key.h:3:2: error: #error "Hi"
#error "Hi"
^~~~~
In file included from ./include/linux/context_tracking_state.h:6,
from ./include/linux/hardirq.h:5,
from arch/x86/kernel/asm-offsets.c:12:
./include/linux/static_key.h:3:2: error: #error "Hi"
#error "Hi"
^~~~~
In file included from ./include/linux/vmstat.h:10,
from ./include/linux/mm.h:1771,
from ./include/linux/memcontrol.h:20,
from ./include/linux/swap.h:9,
from ./include/linux/suspend.h:5,
from arch/x86/kernel/asm-offsets.c:13:
./include/linux/static_key.h:3:2: error: #error "Hi"
#error "Hi"
Thanks,
Feiyang
> Arnd
>
Hi, Will,
On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <[email protected]> wrote:
>
> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
> > From: Feiyang Chen <[email protected]>
> >
> > Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can
> > share its implementation.
> >
> > Signed-off-by: Feiyang Chen <[email protected]>
> > Signed-off-by: Huacai Chen <[email protected]>
> > ---
> > arch/arm64/mm/mmu.c | 53 ++++++-----------------
> > arch/loongarch/mm/init.c | 63 ++++++++-------------------
> > arch/x86/mm/init_64.c | 92 ++++++++++++++--------------------------
> > include/linux/mm.h | 6 +++
> > mm/sparse-vmemmap.c | 54 +++++++++++++++++++++++
> > 5 files changed, 124 insertions(+), 144 deletions(-)
> >
> > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > index 626ec32873c6..b080a65c719d 100644
> > --- a/arch/arm64/mm/mmu.c
> > +++ b/arch/arm64/mm/mmu.c
> > @@ -1158,49 +1158,24 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> > return vmemmap_populate_basepages(start, end, node, altmap);
> > }
> > #else /* !ARM64_KERNEL_USES_PMD_MAPS */
> > +void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
> > + unsigned long addr, unsigned long next)
> > +{
> > + pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
> > +}
> > +
> > +int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr,
> > + unsigned long next)
> > +{
> > + vmemmap_verify((pte_t *)pmdp, node, addr, next);
> > + return 1;
> > +}
> > +
> > int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> > struct vmem_altmap *altmap)
> > {
> > - unsigned long addr = start;
> > - unsigned long next;
> > - pgd_t *pgdp;
> > - p4d_t *p4dp;
> > - pud_t *pudp;
> > - pmd_t *pmdp;
> > -
> > WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
> > - do {
> > - next = pmd_addr_end(addr, end);
> > -
> > - pgdp = vmemmap_pgd_populate(addr, node);
> > - if (!pgdp)
> > - return -ENOMEM;
> > -
> > - p4dp = vmemmap_p4d_populate(pgdp, addr, node);
> > - if (!p4dp)
> > - return -ENOMEM;
> > -
> > - pudp = vmemmap_pud_populate(p4dp, addr, node);
> > - if (!pudp)
> > - return -ENOMEM;
> > -
> > - pmdp = pmd_offset(pudp, addr);
> > - if (pmd_none(READ_ONCE(*pmdp))) {
> > - void *p = NULL;
> > -
> > - p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> > - if (!p) {
> > - if (vmemmap_populate_basepages(addr, next, node, altmap))
> > - return -ENOMEM;
> > - continue;
> > - }
> > -
> > - pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
> > - } else
> > - vmemmap_verify((pte_t *)pmdp, node, addr, next);
> > - } while (addr = next, addr != end);
> > -
> > - return 0;
> > + return vmemmap_populate_hugepages(start, end, node, altmap);
> > }
> > #endif /* !ARM64_KERNEL_USES_PMD_MAPS */
>
>
> I think the arm64 change is mostly ok (thanks!), but I have a question about
> the core code you're introducing:
>
> > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> > index 33e2a1ceee72..6f2e40bb695d 100644
> > --- a/mm/sparse-vmemmap.c
> > +++ b/mm/sparse-vmemmap.c
> > @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
> > return vmemmap_populate_range(start, end, node, altmap, NULL);
> > }
> >
> > +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
> > + unsigned long addr, unsigned long next)
> > +{
> > +}
> > +
> > +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
> > + unsigned long next)
> > +{
> > + return 0;
> > +}
> > +
> > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> > + int node, struct vmem_altmap *altmap)
> > +{
> > + unsigned long addr;
> > + unsigned long next;
> > + pgd_t *pgd;
> > + p4d_t *p4d;
> > + pud_t *pud;
> > + pmd_t *pmd;
> > +
> > + for (addr = start; addr < end; addr = next) {
> > + next = pmd_addr_end(addr, end);
> > +
> > + pgd = vmemmap_pgd_populate(addr, node);
> > + if (!pgd)
> > + return -ENOMEM;
> > +
> > + p4d = vmemmap_p4d_populate(pgd, addr, node);
> > + if (!p4d)
> > + return -ENOMEM;
> > +
> > + pud = vmemmap_pud_populate(p4d, addr, node);
> > + if (!pud)
> > + return -ENOMEM;
> > +
> > + pmd = pmd_offset(pud, addr);
> > + if (pmd_none(READ_ONCE(*pmd))) {
> > + void *p;
> > +
> > + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> > + if (p) {
> > + vmemmap_set_pmd(pmd, p, node, addr, next);
> > + continue;
> > + } else if (altmap)
> > + return -ENOMEM; /* no fallback */
>
> Why do you return -ENOMEM if 'altmap' here? That seems to be different to
> what we currently have on arm64 and it's not clear to me why we're happy
> with an altmap for the pmd case, but not for the pte case.
The generic version is the same as X86. It seems that ARM64 always
fallback whether there is an altmap, but X86 only fallback in the no
altmap case. I don't know the reason of X86, can Dan Williams give
some explaination?
Huacai
>
> Will
On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote:
> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <[email protected]> wrote:
> > On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
> > > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> > > index 33e2a1ceee72..6f2e40bb695d 100644
> > > --- a/mm/sparse-vmemmap.c
> > > +++ b/mm/sparse-vmemmap.c
> > > @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
> > > return vmemmap_populate_range(start, end, node, altmap, NULL);
> > > }
> > >
> > > +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
> > > + unsigned long addr, unsigned long next)
> > > +{
> > > +}
> > > +
> > > +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
> > > + unsigned long next)
> > > +{
> > > + return 0;
> > > +}
> > > +
> > > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> > > + int node, struct vmem_altmap *altmap)
> > > +{
> > > + unsigned long addr;
> > > + unsigned long next;
> > > + pgd_t *pgd;
> > > + p4d_t *p4d;
> > > + pud_t *pud;
> > > + pmd_t *pmd;
> > > +
> > > + for (addr = start; addr < end; addr = next) {
> > > + next = pmd_addr_end(addr, end);
> > > +
> > > + pgd = vmemmap_pgd_populate(addr, node);
> > > + if (!pgd)
> > > + return -ENOMEM;
> > > +
> > > + p4d = vmemmap_p4d_populate(pgd, addr, node);
> > > + if (!p4d)
> > > + return -ENOMEM;
> > > +
> > > + pud = vmemmap_pud_populate(p4d, addr, node);
> > > + if (!pud)
> > > + return -ENOMEM;
> > > +
> > > + pmd = pmd_offset(pud, addr);
> > > + if (pmd_none(READ_ONCE(*pmd))) {
> > > + void *p;
> > > +
> > > + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> > > + if (p) {
> > > + vmemmap_set_pmd(pmd, p, node, addr, next);
> > > + continue;
> > > + } else if (altmap)
> > > + return -ENOMEM; /* no fallback */
> >
> > Why do you return -ENOMEM if 'altmap' here? That seems to be different to
> > what we currently have on arm64 and it's not clear to me why we're happy
> > with an altmap for the pmd case, but not for the pte case.
> The generic version is the same as X86. It seems that ARM64 always
> fallback whether there is an altmap, but X86 only fallback in the no
> altmap case. I don't know the reason of X86, can Dan Williams give
> some explaination?
Right, I think we need to understand the new behaviour here before we adopt
it on arm64.
Will
+Dan Williams
+Sudarshan Rajagopalan
On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <[email protected]> wrote:
>
> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote:
> > On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <[email protected]> wrote:
> > > On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
> > > > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> > > > index 33e2a1ceee72..6f2e40bb695d 100644
> > > > --- a/mm/sparse-vmemmap.c
> > > > +++ b/mm/sparse-vmemmap.c
> > > > @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
> > > > return vmemmap_populate_range(start, end, node, altmap, NULL);
> > > > }
> > > >
> > > > +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
> > > > + unsigned long addr, unsigned long next)
> > > > +{
> > > > +}
> > > > +
> > > > +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
> > > > + unsigned long next)
> > > > +{
> > > > + return 0;
> > > > +}
> > > > +
> > > > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> > > > + int node, struct vmem_altmap *altmap)
> > > > +{
> > > > + unsigned long addr;
> > > > + unsigned long next;
> > > > + pgd_t *pgd;
> > > > + p4d_t *p4d;
> > > > + pud_t *pud;
> > > > + pmd_t *pmd;
> > > > +
> > > > + for (addr = start; addr < end; addr = next) {
> > > > + next = pmd_addr_end(addr, end);
> > > > +
> > > > + pgd = vmemmap_pgd_populate(addr, node);
> > > > + if (!pgd)
> > > > + return -ENOMEM;
> > > > +
> > > > + p4d = vmemmap_p4d_populate(pgd, addr, node);
> > > > + if (!p4d)
> > > > + return -ENOMEM;
> > > > +
> > > > + pud = vmemmap_pud_populate(p4d, addr, node);
> > > > + if (!pud)
> > > > + return -ENOMEM;
> > > > +
> > > > + pmd = pmd_offset(pud, addr);
> > > > + if (pmd_none(READ_ONCE(*pmd))) {
> > > > + void *p;
> > > > +
> > > > + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> > > > + if (p) {
> > > > + vmemmap_set_pmd(pmd, p, node, addr, next);
> > > > + continue;
> > > > + } else if (altmap)
> > > > + return -ENOMEM; /* no fallback */
> > >
> > > Why do you return -ENOMEM if 'altmap' here? That seems to be different to
> > > what we currently have on arm64 and it's not clear to me why we're happy
> > > with an altmap for the pmd case, but not for the pte case.
> > The generic version is the same as X86. It seems that ARM64 always
> > fallback whether there is an altmap, but X86 only fallback in the no
> > altmap case. I don't know the reason of X86, can Dan Williams give
> > some explaination?
>
> Right, I think we need to understand the new behaviour here before we adopt
> it on arm64.
Hi, Dan,
Could you please tell us the reason? Thanks.
And Sudarshan,
You are the author of adding a fallback mechanism to ARM64, do you
know why ARM64 is different from X86 (only fallback in no altmap
case)?
Huacai
>
> Will
Oh, Sudarshan Rajagopalan's Email has changed, Let's update.
Huacai
On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <[email protected]> wrote:
>
> +Dan Williams
> +Sudarshan Rajagopalan
>
> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <[email protected]> wrote:
> >
> > On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote:
> > > On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <[email protected]> wrote:
> > > > On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
> > > > > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> > > > > index 33e2a1ceee72..6f2e40bb695d 100644
> > > > > --- a/mm/sparse-vmemmap.c
> > > > > +++ b/mm/sparse-vmemmap.c
> > > > > @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
> > > > > return vmemmap_populate_range(start, end, node, altmap, NULL);
> > > > > }
> > > > >
> > > > > +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
> > > > > + unsigned long addr, unsigned long next)
> > > > > +{
> > > > > +}
> > > > > +
> > > > > +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
> > > > > + unsigned long next)
> > > > > +{
> > > > > + return 0;
> > > > > +}
> > > > > +
> > > > > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> > > > > + int node, struct vmem_altmap *altmap)
> > > > > +{
> > > > > + unsigned long addr;
> > > > > + unsigned long next;
> > > > > + pgd_t *pgd;
> > > > > + p4d_t *p4d;
> > > > > + pud_t *pud;
> > > > > + pmd_t *pmd;
> > > > > +
> > > > > + for (addr = start; addr < end; addr = next) {
> > > > > + next = pmd_addr_end(addr, end);
> > > > > +
> > > > > + pgd = vmemmap_pgd_populate(addr, node);
> > > > > + if (!pgd)
> > > > > + return -ENOMEM;
> > > > > +
> > > > > + p4d = vmemmap_p4d_populate(pgd, addr, node);
> > > > > + if (!p4d)
> > > > > + return -ENOMEM;
> > > > > +
> > > > > + pud = vmemmap_pud_populate(p4d, addr, node);
> > > > > + if (!pud)
> > > > > + return -ENOMEM;
> > > > > +
> > > > > + pmd = pmd_offset(pud, addr);
> > > > > + if (pmd_none(READ_ONCE(*pmd))) {
> > > > > + void *p;
> > > > > +
> > > > > + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> > > > > + if (p) {
> > > > > + vmemmap_set_pmd(pmd, p, node, addr, next);
> > > > > + continue;
> > > > > + } else if (altmap)
> > > > > + return -ENOMEM; /* no fallback */
> > > >
> > > > Why do you return -ENOMEM if 'altmap' here? That seems to be different to
> > > > what we currently have on arm64 and it's not clear to me why we're happy
> > > > with an altmap for the pmd case, but not for the pte case.
> > > The generic version is the same as X86. It seems that ARM64 always
> > > fallback whether there is an altmap, but X86 only fallback in the no
> > > altmap case. I don't know the reason of X86, can Dan Williams give
> > > some explaination?
> >
> > Right, I think we need to understand the new behaviour here before we adopt
> > it on arm64.
> Hi, Dan,
> Could you please tell us the reason? Thanks.
>
> And Sudarshan,
> You are the author of adding a fallback mechanism to ARM64, do you
> know why ARM64 is different from X86 (only fallback in no altmap
> case)?
>
> Huacai
>
> >
> > Will
On 14.07.22 14:34, Huacai Chen wrote:
> Oh, Sudarshan Rajagopalan's Email has changed, Let's update.
>
> Huacai
>
> On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <[email protected]> wrote:
>>
>> +Dan Williams
>> +Sudarshan Rajagopalan
>>
>> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <[email protected]> wrote:
>>>
>>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote:
>>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <[email protected]> wrote:
>>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
>>>>>> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
>>>>>> index 33e2a1ceee72..6f2e40bb695d 100644
>>>>>> --- a/mm/sparse-vmemmap.c
>>>>>> +++ b/mm/sparse-vmemmap.c
>>>>>> @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
>>>>>> return vmemmap_populate_range(start, end, node, altmap, NULL);
>>>>>> }
>>>>>>
>>>>>> +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
>>>>>> + unsigned long addr, unsigned long next)
>>>>>> +{
>>>>>> +}
>>>>>> +
>>>>>> +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
>>>>>> + unsigned long next)
>>>>>> +{
>>>>>> + return 0;
>>>>>> +}
>>>>>> +
>>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
>>>>>> + int node, struct vmem_altmap *altmap)
>>>>>> +{
>>>>>> + unsigned long addr;
>>>>>> + unsigned long next;
>>>>>> + pgd_t *pgd;
>>>>>> + p4d_t *p4d;
>>>>>> + pud_t *pud;
>>>>>> + pmd_t *pmd;
>>>>>> +
>>>>>> + for (addr = start; addr < end; addr = next) {
>>>>>> + next = pmd_addr_end(addr, end);
>>>>>> +
>>>>>> + pgd = vmemmap_pgd_populate(addr, node);
>>>>>> + if (!pgd)
>>>>>> + return -ENOMEM;
>>>>>> +
>>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node);
>>>>>> + if (!p4d)
>>>>>> + return -ENOMEM;
>>>>>> +
>>>>>> + pud = vmemmap_pud_populate(p4d, addr, node);
>>>>>> + if (!pud)
>>>>>> + return -ENOMEM;
>>>>>> +
>>>>>> + pmd = pmd_offset(pud, addr);
>>>>>> + if (pmd_none(READ_ONCE(*pmd))) {
>>>>>> + void *p;
>>>>>> +
>>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
>>>>>> + if (p) {
>>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next);
>>>>>> + continue;
>>>>>> + } else if (altmap)
>>>>>> + return -ENOMEM; /* no fallback */
>>>>>
>>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to
>>>>> what we currently have on arm64 and it's not clear to me why we're happy
>>>>> with an altmap for the pmd case, but not for the pte case.
>>>> The generic version is the same as X86. It seems that ARM64 always
>>>> fallback whether there is an altmap, but X86 only fallback in the no
>>>> altmap case. I don't know the reason of X86, can Dan Williams give
>>>> some explaination?
>>>
>>> Right, I think we need to understand the new behaviour here before we adopt
>>> it on arm64.
>> Hi, Dan,
>> Could you please tell us the reason? Thanks.
>>
>> And Sudarshan,
>> You are the author of adding a fallback mechanism to ARM64, do you
>> know why ARM64 is different from X86 (only fallback in no altmap
>> case)?
I think that's a purely theoretical issue: I assume that in any case we
care about, the altmap should be reasonably sized and aligned such that
this will always succeed.
To me it even sounds like the best idea to *consistently* fail if there
is no more space in the altmap, even if we'd have to fallback to PTE
(again, highly unlikely that this is relevant in practice). Could
indicate an altmap-size configuration issue.
--
Thanks,
David / dhildenb
Hi, Will,
On Wed, Jul 20, 2022 at 5:34 PM David Hildenbrand <[email protected]> wrote:
>
> On 14.07.22 14:34, Huacai Chen wrote:
> > Oh, Sudarshan Rajagopalan's Email has changed, Let's update.
> >
> > Huacai
> >
> > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <[email protected]> wrote:
> >>
> >> +Dan Williams
> >> +Sudarshan Rajagopalan
> >>
> >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <[email protected]> wrote:
> >>>
> >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote:
> >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <[email protected]> wrote:
> >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
> >>>>>> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> >>>>>> index 33e2a1ceee72..6f2e40bb695d 100644
> >>>>>> --- a/mm/sparse-vmemmap.c
> >>>>>> +++ b/mm/sparse-vmemmap.c
> >>>>>> @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
> >>>>>> return vmemmap_populate_range(start, end, node, altmap, NULL);
> >>>>>> }
> >>>>>>
> >>>>>> +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
> >>>>>> + unsigned long addr, unsigned long next)
> >>>>>> +{
> >>>>>> +}
> >>>>>> +
> >>>>>> +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
> >>>>>> + unsigned long next)
> >>>>>> +{
> >>>>>> + return 0;
> >>>>>> +}
> >>>>>> +
> >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> >>>>>> + int node, struct vmem_altmap *altmap)
> >>>>>> +{
> >>>>>> + unsigned long addr;
> >>>>>> + unsigned long next;
> >>>>>> + pgd_t *pgd;
> >>>>>> + p4d_t *p4d;
> >>>>>> + pud_t *pud;
> >>>>>> + pmd_t *pmd;
> >>>>>> +
> >>>>>> + for (addr = start; addr < end; addr = next) {
> >>>>>> + next = pmd_addr_end(addr, end);
> >>>>>> +
> >>>>>> + pgd = vmemmap_pgd_populate(addr, node);
> >>>>>> + if (!pgd)
> >>>>>> + return -ENOMEM;
> >>>>>> +
> >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node);
> >>>>>> + if (!p4d)
> >>>>>> + return -ENOMEM;
> >>>>>> +
> >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node);
> >>>>>> + if (!pud)
> >>>>>> + return -ENOMEM;
> >>>>>> +
> >>>>>> + pmd = pmd_offset(pud, addr);
> >>>>>> + if (pmd_none(READ_ONCE(*pmd))) {
> >>>>>> + void *p;
> >>>>>> +
> >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> >>>>>> + if (p) {
> >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next);
> >>>>>> + continue;
> >>>>>> + } else if (altmap)
> >>>>>> + return -ENOMEM; /* no fallback */
> >>>>>
> >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to
> >>>>> what we currently have on arm64 and it's not clear to me why we're happy
> >>>>> with an altmap for the pmd case, but not for the pte case.
> >>>> The generic version is the same as X86. It seems that ARM64 always
> >>>> fallback whether there is an altmap, but X86 only fallback in the no
> >>>> altmap case. I don't know the reason of X86, can Dan Williams give
> >>>> some explaination?
> >>>
> >>> Right, I think we need to understand the new behaviour here before we adopt
> >>> it on arm64.
> >> Hi, Dan,
> >> Could you please tell us the reason? Thanks.
> >>
> >> And Sudarshan,
> >> You are the author of adding a fallback mechanism to ARM64, do you
> >> know why ARM64 is different from X86 (only fallback in no altmap
> >> case)?
>
> I think that's a purely theoretical issue: I assume that in any case we
> care about, the altmap should be reasonably sized and aligned such that
> this will always succeed.
>
> To me it even sounds like the best idea to *consistently* fail if there
> is no more space in the altmap, even if we'd have to fallback to PTE
> (again, highly unlikely that this is relevant in practice). Could
> indicate an altmap-size configuration issue.
Does David's explanation make things clear? Moreover, I think Dan's
dedicated comments "no fallback" implies that his design is carefully
considered. So I think the generic version using the X86 logic is just
OK.
Huacai
>
> --
> Thanks,
>
> David / dhildenb
>
>
On Thu, Jul 21, 2022 at 10:08:10AM +0800, Huacai Chen wrote:
> On Wed, Jul 20, 2022 at 5:34 PM David Hildenbrand <[email protected]> wrote:
> > On 14.07.22 14:34, Huacai Chen wrote:
> > > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <[email protected]> wrote:
> > >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <[email protected]> wrote:
> > >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote:
> > >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <[email protected]> wrote:
> > >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
> > >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> > >>>>>> + int node, struct vmem_altmap *altmap)
> > >>>>>> +{
> > >>>>>> + unsigned long addr;
> > >>>>>> + unsigned long next;
> > >>>>>> + pgd_t *pgd;
> > >>>>>> + p4d_t *p4d;
> > >>>>>> + pud_t *pud;
> > >>>>>> + pmd_t *pmd;
> > >>>>>> +
> > >>>>>> + for (addr = start; addr < end; addr = next) {
> > >>>>>> + next = pmd_addr_end(addr, end);
> > >>>>>> +
> > >>>>>> + pgd = vmemmap_pgd_populate(addr, node);
> > >>>>>> + if (!pgd)
> > >>>>>> + return -ENOMEM;
> > >>>>>> +
> > >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node);
> > >>>>>> + if (!p4d)
> > >>>>>> + return -ENOMEM;
> > >>>>>> +
> > >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node);
> > >>>>>> + if (!pud)
> > >>>>>> + return -ENOMEM;
> > >>>>>> +
> > >>>>>> + pmd = pmd_offset(pud, addr);
> > >>>>>> + if (pmd_none(READ_ONCE(*pmd))) {
> > >>>>>> + void *p;
> > >>>>>> +
> > >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> > >>>>>> + if (p) {
> > >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next);
> > >>>>>> + continue;
> > >>>>>> + } else if (altmap)
> > >>>>>> + return -ENOMEM; /* no fallback */
> > >>>>>
> > >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to
> > >>>>> what we currently have on arm64 and it's not clear to me why we're happy
> > >>>>> with an altmap for the pmd case, but not for the pte case.
> > >>>> The generic version is the same as X86. It seems that ARM64 always
> > >>>> fallback whether there is an altmap, but X86 only fallback in the no
> > >>>> altmap case. I don't know the reason of X86, can Dan Williams give
> > >>>> some explaination?
> > >>>
> > >>> Right, I think we need to understand the new behaviour here before we adopt
> > >>> it on arm64.
> > >> Hi, Dan,
> > >> Could you please tell us the reason? Thanks.
> > >>
> > >> And Sudarshan,
> > >> You are the author of adding a fallback mechanism to ARM64, do you
> > >> know why ARM64 is different from X86 (only fallback in no altmap
> > >> case)?
> >
> > I think that's a purely theoretical issue: I assume that in any case we
> > care about, the altmap should be reasonably sized and aligned such that
> > this will always succeed.
> >
> > To me it even sounds like the best idea to *consistently* fail if there
> > is no more space in the altmap, even if we'd have to fallback to PTE
> > (again, highly unlikely that this is relevant in practice). Could
> > indicate an altmap-size configuration issue.
>
> Does David's explanation make things clear? Moreover, I think Dan's
> dedicated comments "no fallback" implies that his design is carefully
> considered. So I think the generic version using the X86 logic is just
> OK.
I think the comment isn't worth the metaphorical paper that it's written
on! If you can bulk it up a bit based on David's reasoning, then that would
help. But yes, I'm happy with the code now, thanks both.
Will
Hi, Will,
On Thu, Jul 21, 2022 at 5:55 PM Will Deacon <[email protected]> wrote:
>
> On Thu, Jul 21, 2022 at 10:08:10AM +0800, Huacai Chen wrote:
> > On Wed, Jul 20, 2022 at 5:34 PM David Hildenbrand <[email protected]> wrote:
> > > On 14.07.22 14:34, Huacai Chen wrote:
> > > > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <[email protected]> wrote:
> > > >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <[email protected]> wrote:
> > > >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote:
> > > >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <[email protected]> wrote:
> > > >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
> > > >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> > > >>>>>> + int node, struct vmem_altmap *altmap)
> > > >>>>>> +{
> > > >>>>>> + unsigned long addr;
> > > >>>>>> + unsigned long next;
> > > >>>>>> + pgd_t *pgd;
> > > >>>>>> + p4d_t *p4d;
> > > >>>>>> + pud_t *pud;
> > > >>>>>> + pmd_t *pmd;
> > > >>>>>> +
> > > >>>>>> + for (addr = start; addr < end; addr = next) {
> > > >>>>>> + next = pmd_addr_end(addr, end);
> > > >>>>>> +
> > > >>>>>> + pgd = vmemmap_pgd_populate(addr, node);
> > > >>>>>> + if (!pgd)
> > > >>>>>> + return -ENOMEM;
> > > >>>>>> +
> > > >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node);
> > > >>>>>> + if (!p4d)
> > > >>>>>> + return -ENOMEM;
> > > >>>>>> +
> > > >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node);
> > > >>>>>> + if (!pud)
> > > >>>>>> + return -ENOMEM;
> > > >>>>>> +
> > > >>>>>> + pmd = pmd_offset(pud, addr);
> > > >>>>>> + if (pmd_none(READ_ONCE(*pmd))) {
> > > >>>>>> + void *p;
> > > >>>>>> +
> > > >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> > > >>>>>> + if (p) {
> > > >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next);
> > > >>>>>> + continue;
> > > >>>>>> + } else if (altmap)
> > > >>>>>> + return -ENOMEM; /* no fallback */
> > > >>>>>
> > > >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to
> > > >>>>> what we currently have on arm64 and it's not clear to me why we're happy
> > > >>>>> with an altmap for the pmd case, but not for the pte case.
> > > >>>> The generic version is the same as X86. It seems that ARM64 always
> > > >>>> fallback whether there is an altmap, but X86 only fallback in the no
> > > >>>> altmap case. I don't know the reason of X86, can Dan Williams give
> > > >>>> some explaination?
> > > >>>
> > > >>> Right, I think we need to understand the new behaviour here before we adopt
> > > >>> it on arm64.
> > > >> Hi, Dan,
> > > >> Could you please tell us the reason? Thanks.
> > > >>
> > > >> And Sudarshan,
> > > >> You are the author of adding a fallback mechanism to ARM64, do you
> > > >> know why ARM64 is different from X86 (only fallback in no altmap
> > > >> case)?
> > >
> > > I think that's a purely theoretical issue: I assume that in any case we
> > > care about, the altmap should be reasonably sized and aligned such that
> > > this will always succeed.
> > >
> > > To me it even sounds like the best idea to *consistently* fail if there
> > > is no more space in the altmap, even if we'd have to fallback to PTE
> > > (again, highly unlikely that this is relevant in practice). Could
> > > indicate an altmap-size configuration issue.
> >
> > Does David's explanation make things clear? Moreover, I think Dan's
> > dedicated comments "no fallback" implies that his design is carefully
> > considered. So I think the generic version using the X86 logic is just
> > OK.
>
> I think the comment isn't worth the metaphorical paper that it's written
> on! If you can bulk it up a bit based on David's reasoning, then that would
> help. But yes, I'm happy with the code now, thanks both.
OK, I will add a detailed comment here.
Huacai
>
> Will
Huacai Chen wrote:
> Hi, Will,
>
> On Thu, Jul 21, 2022 at 5:55 PM Will Deacon <[email protected]> wrote:
> >
> > On Thu, Jul 21, 2022 at 10:08:10AM +0800, Huacai Chen wrote:
> > > On Wed, Jul 20, 2022 at 5:34 PM David Hildenbrand <[email protected]> wrote:
> > > > On 14.07.22 14:34, Huacai Chen wrote:
> > > > > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <[email protected]> wrote:
> > > > >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <[email protected]> wrote:
> > > > >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote:
> > > > >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <[email protected]> wrote:
> > > > >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
> > > > >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> > > > >>>>>> + int node, struct vmem_altmap *altmap)
> > > > >>>>>> +{
> > > > >>>>>> + unsigned long addr;
> > > > >>>>>> + unsigned long next;
> > > > >>>>>> + pgd_t *pgd;
> > > > >>>>>> + p4d_t *p4d;
> > > > >>>>>> + pud_t *pud;
> > > > >>>>>> + pmd_t *pmd;
> > > > >>>>>> +
> > > > >>>>>> + for (addr = start; addr < end; addr = next) {
> > > > >>>>>> + next = pmd_addr_end(addr, end);
> > > > >>>>>> +
> > > > >>>>>> + pgd = vmemmap_pgd_populate(addr, node);
> > > > >>>>>> + if (!pgd)
> > > > >>>>>> + return -ENOMEM;
> > > > >>>>>> +
> > > > >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node);
> > > > >>>>>> + if (!p4d)
> > > > >>>>>> + return -ENOMEM;
> > > > >>>>>> +
> > > > >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node);
> > > > >>>>>> + if (!pud)
> > > > >>>>>> + return -ENOMEM;
> > > > >>>>>> +
> > > > >>>>>> + pmd = pmd_offset(pud, addr);
> > > > >>>>>> + if (pmd_none(READ_ONCE(*pmd))) {
> > > > >>>>>> + void *p;
> > > > >>>>>> +
> > > > >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> > > > >>>>>> + if (p) {
> > > > >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next);
> > > > >>>>>> + continue;
> > > > >>>>>> + } else if (altmap)
> > > > >>>>>> + return -ENOMEM; /* no fallback */
> > > > >>>>>
> > > > >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to
> > > > >>>>> what we currently have on arm64 and it's not clear to me why we're happy
> > > > >>>>> with an altmap for the pmd case, but not for the pte case.
> > > > >>>> The generic version is the same as X86. It seems that ARM64 always
> > > > >>>> fallback whether there is an altmap, but X86 only fallback in the no
> > > > >>>> altmap case. I don't know the reason of X86, can Dan Williams give
> > > > >>>> some explaination?
> > > > >>>
> > > > >>> Right, I think we need to understand the new behaviour here before we adopt
> > > > >>> it on arm64.
> > > > >> Hi, Dan,
> > > > >> Could you please tell us the reason? Thanks.
> > > > >>
> > > > >> And Sudarshan,
> > > > >> You are the author of adding a fallback mechanism to ARM64, do you
> > > > >> know why ARM64 is different from X86 (only fallback in no altmap
> > > > >> case)?
> > > >
> > > > I think that's a purely theoretical issue: I assume that in any case we
> > > > care about, the altmap should be reasonably sized and aligned such that
> > > > this will always succeed.
> > > >
> > > > To me it even sounds like the best idea to *consistently* fail if there
> > > > is no more space in the altmap, even if we'd have to fallback to PTE
> > > > (again, highly unlikely that this is relevant in practice). Could
> > > > indicate an altmap-size configuration issue.
> > >
> > > Does David's explanation make things clear? Moreover, I think Dan's
> > > dedicated comments "no fallback" implies that his design is carefully
> > > considered. So I think the generic version using the X86 logic is just
> > > OK.
> >
> > I think the comment isn't worth the metaphorical paper that it's written
> > on! If you can bulk it up a bit based on David's reasoning, then that would
> > help. But yes, I'm happy with the code now, thanks both.
> OK, I will add a detailed comment here.
Apologies for coming late to the party here, original ping came while I
was on vacation and I only just now noticed the direct questions. All
resolved now or is a question still pending?
Hi, Dan,
On Fri, Jul 22, 2022 at 5:54 AM Dan Williams <[email protected]> wrote:
>
> Huacai Chen wrote:
> > Hi, Will,
> >
> > On Thu, Jul 21, 2022 at 5:55 PM Will Deacon <[email protected]> wrote:
> > >
> > > On Thu, Jul 21, 2022 at 10:08:10AM +0800, Huacai Chen wrote:
> > > > On Wed, Jul 20, 2022 at 5:34 PM David Hildenbrand <[email protected]> wrote:
> > > > > On 14.07.22 14:34, Huacai Chen wrote:
> > > > > > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <[email protected]> wrote:
> > > > > >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <[email protected]> wrote:
> > > > > >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote:
> > > > > >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <[email protected]> wrote:
> > > > > >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote:
> > > > > >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> > > > > >>>>>> + int node, struct vmem_altmap *altmap)
> > > > > >>>>>> +{
> > > > > >>>>>> + unsigned long addr;
> > > > > >>>>>> + unsigned long next;
> > > > > >>>>>> + pgd_t *pgd;
> > > > > >>>>>> + p4d_t *p4d;
> > > > > >>>>>> + pud_t *pud;
> > > > > >>>>>> + pmd_t *pmd;
> > > > > >>>>>> +
> > > > > >>>>>> + for (addr = start; addr < end; addr = next) {
> > > > > >>>>>> + next = pmd_addr_end(addr, end);
> > > > > >>>>>> +
> > > > > >>>>>> + pgd = vmemmap_pgd_populate(addr, node);
> > > > > >>>>>> + if (!pgd)
> > > > > >>>>>> + return -ENOMEM;
> > > > > >>>>>> +
> > > > > >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node);
> > > > > >>>>>> + if (!p4d)
> > > > > >>>>>> + return -ENOMEM;
> > > > > >>>>>> +
> > > > > >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node);
> > > > > >>>>>> + if (!pud)
> > > > > >>>>>> + return -ENOMEM;
> > > > > >>>>>> +
> > > > > >>>>>> + pmd = pmd_offset(pud, addr);
> > > > > >>>>>> + if (pmd_none(READ_ONCE(*pmd))) {
> > > > > >>>>>> + void *p;
> > > > > >>>>>> +
> > > > > >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
> > > > > >>>>>> + if (p) {
> > > > > >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next);
> > > > > >>>>>> + continue;
> > > > > >>>>>> + } else if (altmap)
> > > > > >>>>>> + return -ENOMEM; /* no fallback */
> > > > > >>>>>
> > > > > >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to
> > > > > >>>>> what we currently have on arm64 and it's not clear to me why we're happy
> > > > > >>>>> with an altmap for the pmd case, but not for the pte case.
> > > > > >>>> The generic version is the same as X86. It seems that ARM64 always
> > > > > >>>> fallback whether there is an altmap, but X86 only fallback in the no
> > > > > >>>> altmap case. I don't know the reason of X86, can Dan Williams give
> > > > > >>>> some explaination?
> > > > > >>>
> > > > > >>> Right, I think we need to understand the new behaviour here before we adopt
> > > > > >>> it on arm64.
> > > > > >> Hi, Dan,
> > > > > >> Could you please tell us the reason? Thanks.
> > > > > >>
> > > > > >> And Sudarshan,
> > > > > >> You are the author of adding a fallback mechanism to ARM64, do you
> > > > > >> know why ARM64 is different from X86 (only fallback in no altmap
> > > > > >> case)?
> > > > >
> > > > > I think that's a purely theoretical issue: I assume that in any case we
> > > > > care about, the altmap should be reasonably sized and aligned such that
> > > > > this will always succeed.
> > > > >
> > > > > To me it even sounds like the best idea to *consistently* fail if there
> > > > > is no more space in the altmap, even if we'd have to fallback to PTE
> > > > > (again, highly unlikely that this is relevant in practice). Could
> > > > > indicate an altmap-size configuration issue.
> > > >
> > > > Does David's explanation make things clear? Moreover, I think Dan's
> > > > dedicated comments "no fallback" implies that his design is carefully
> > > > considered. So I think the generic version using the X86 logic is just
> > > > OK.
> > >
> > > I think the comment isn't worth the metaphorical paper that it's written
> > > on! If you can bulk it up a bit based on David's reasoning, then that would
> > > help. But yes, I'm happy with the code now, thanks both.
> > OK, I will add a detailed comment here.
>
> Apologies for coming late to the party here, original ping came while I
> was on vacation and I only just now noticed the direct questions. All
> resolved now or is a question still pending?
I updated the patch and added a detailed comment based on David's
explanation [1]. If that description is correct, I think there are no
more questions. Thank you.
[1] https://lore.kernel.org/loongarch/[email protected]/T/#u
Huacai
>