This series is in order to enable sparse-vmemmap for LoongArch. But
LoongArch cannot use generic helpers directly because MIPS&LoongArch
need to call pgd_init()/pud_init()/pmd_init() when populating page
tables. So we adjust the prototypes of p?d_init() to make generic
helpers can call them, then enable sparse-vmemmap with generic helpers,
and to be further, generalise vmemmap_populate_hugepages() for ARM64,
X86 and LoongArch.
V1 -> V2:
Split ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP to a separate patch.
Huacai Chen and Feiyang Chen(4):
MIPS&LoongArch: Adjust prototypes of p?d_init().
LoongArch: Add sparse memory vmemmap support.
mm/sparse-vmemmap: Generalise vmemmap_populate_hugepages().
LoongArch: Enable ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP.
Signed-off-by: Huacai Chen <[email protected]>
Signed-off-by: Feiyang Chen <[email protected]>
---
arch/arm64/mm/mmu.c | 53 ++++++--------------
arch/loongarch/Kconfig | 2 +
arch/loongarch/include/asm/pgalloc.h | 13 +----
arch/loongarch/include/asm/pgtable.h | 13 +++--
arch/loongarch/include/asm/sparsemem.h | 8 +++
arch/loongarch/kernel/numa.c | 4 +-
arch/loongarch/mm/init.c | 44 +++++++++++++++-
arch/loongarch/mm/pgtable.c | 23 +++++----
arch/mips/include/asm/pgalloc.h | 8 +--
arch/mips/include/asm/pgtable-64.h | 8 +--
arch/mips/kvm/mmu.c | 3 +-
arch/mips/mm/pgtable-32.c | 10 ++--
arch/mips/mm/pgtable-64.c | 18 ++++---
arch/mips/mm/pgtable.c | 2 +-
arch/x86/mm/init_64.c | 92 ++++++++++++----------------------
include/linux/mm.h | 8 +++
include/linux/page-flags.h | 1 +
mm/sparse-vmemmap.c | 64 +++++++++++++++++++++++
18 files changed, 222 insertions(+), 152 deletions(-)
--
2.27.0
From: Feiyang Chen <[email protected]>
Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can
share its implementation.
Signed-off-by: Huacai Chen <[email protected]>
Signed-off-by: Feiyang Chen <[email protected]>
---
arch/arm64/mm/mmu.c | 53 ++++++-----------------
arch/loongarch/mm/init.c | 63 ++++++++-------------------
arch/x86/mm/init_64.c | 92 ++++++++++++++--------------------------
include/linux/mm.h | 6 +++
mm/sparse-vmemmap.c | 54 +++++++++++++++++++++++
5 files changed, 124 insertions(+), 144 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 626ec32873c6..b080a65c719d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1158,49 +1158,24 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return vmemmap_populate_basepages(start, end, node, altmap);
}
#else /* !ARM64_KERNEL_USES_PMD_MAPS */
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
+{
+ pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+ return 1;
+}
+
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
- unsigned long addr = start;
- unsigned long next;
- pgd_t *pgdp;
- p4d_t *p4dp;
- pud_t *pudp;
- pmd_t *pmdp;
-
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
- do {
- next = pmd_addr_end(addr, end);
-
- pgdp = vmemmap_pgd_populate(addr, node);
- if (!pgdp)
- return -ENOMEM;
-
- p4dp = vmemmap_p4d_populate(pgdp, addr, node);
- if (!p4dp)
- return -ENOMEM;
-
- pudp = vmemmap_pud_populate(p4dp, addr, node);
- if (!pudp)
- return -ENOMEM;
-
- pmdp = pmd_offset(pudp, addr);
- if (pmd_none(READ_ONCE(*pmdp))) {
- void *p = NULL;
-
- p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
- if (!p) {
- if (vmemmap_populate_basepages(addr, next, node, altmap))
- return -ENOMEM;
- continue;
- }
-
- pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
- } else
- vmemmap_verify((pte_t *)pmdp, node, addr, next);
- } while (addr = next, addr != end);
-
- return 0;
+ return vmemmap_populate_hugepages(start, end, node, altmap);
}
#endif /* !ARM64_KERNEL_USES_PMD_MAPS */
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 35128229fe46..3190b3cd52d1 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -158,52 +158,25 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
- int node, struct vmem_altmap *altmap)
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
{
- unsigned long addr = start;
- unsigned long next;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- for (addr = start; addr < end; addr = next) {
- next = pmd_addr_end(addr, end);
-
- pgd = vmemmap_pgd_populate(addr, node);
- if (!pgd)
- return -ENOMEM;
- p4d = vmemmap_p4d_populate(pgd, addr, node);
- if (!p4d)
- return -ENOMEM;
- pud = vmemmap_pud_populate(p4d, addr, node);
- if (!pud)
- return -ENOMEM;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- void *p = NULL;
-
- p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
- if (p) {
- pmd_t entry;
-
- entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
- pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
- set_pmd_at(&init_mm, addr, pmd, entry);
-
- continue;
- }
- } else if (pmd_val(*pmd) & _PAGE_HUGE) {
- vmemmap_verify((pte_t *)pmd, node, addr, next);
- continue;
- }
- if (vmemmap_populate_basepages(addr, next, node, NULL))
- return -ENOMEM;
- }
-
- return 0;
+ pmd_t entry;
+
+ entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
+ pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
+ set_pmd_at(&init_mm, addr, pmd, entry);
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ int huge = pmd_val(*pmd) & _PAGE_HUGE;
+
+ if (huge)
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+
+ return huge;
}
#if CONFIG_PGTABLE_LEVELS == 2
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 39c5246964a9..4911093ee2f3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1532,72 +1532,44 @@ static long __meminitdata addr_start, addr_end;
static void __meminitdata *p_start, *p_end;
static int __meminitdata node_start;
-static int __meminit vmemmap_populate_hugepages(unsigned long start,
- unsigned long end, int node, struct vmem_altmap *altmap)
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
{
- unsigned long addr;
- unsigned long next;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- for (addr = start; addr < end; addr = next) {
- next = pmd_addr_end(addr, end);
-
- pgd = vmemmap_pgd_populate(addr, node);
- if (!pgd)
- return -ENOMEM;
-
- p4d = vmemmap_p4d_populate(pgd, addr, node);
- if (!p4d)
- return -ENOMEM;
-
- pud = vmemmap_pud_populate(p4d, addr, node);
- if (!pud)
- return -ENOMEM;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- void *p;
-
- p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
- if (p) {
- pte_t entry;
-
- entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
- PAGE_KERNEL_LARGE);
- set_pmd(pmd, __pmd(pte_val(entry)));
+ pte_t entry;
+
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
+ PAGE_KERNEL_LARGE);
+ set_pmd(pmd, __pmd(pte_val(entry)));
+
+ /* check to see if we have contiguous blocks */
+ if (p_end != p || node_start != node) {
+ if (p_start)
+ pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+ addr_start, addr_end-1, p_start, p_end-1, node_start);
+ addr_start = addr;
+ node_start = node;
+ p_start = p;
+ }
- /* check to see if we have contiguous blocks */
- if (p_end != p || node_start != node) {
- if (p_start)
- pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
- addr_start, addr_end-1, p_start, p_end-1, node_start);
- addr_start = addr;
- node_start = node;
- p_start = p;
- }
+ addr_end = addr + PMD_SIZE;
+ p_end = p + PMD_SIZE;
- addr_end = addr + PMD_SIZE;
- p_end = p + PMD_SIZE;
+ if (!IS_ALIGNED(addr, PMD_SIZE) ||
+ !IS_ALIGNED(next, PMD_SIZE))
+ vmemmap_use_new_sub_pmd(addr, next);
+}
- if (!IS_ALIGNED(addr, PMD_SIZE) ||
- !IS_ALIGNED(next, PMD_SIZE))
- vmemmap_use_new_sub_pmd(addr, next);
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ int large = pmd_large(*pmd);
- continue;
- } else if (altmap)
- return -ENOMEM; /* no fallback */
- } else if (pmd_large(*pmd)) {
- vmemmap_verify((pte_t *)pmd, node, addr, next);
- vmemmap_use_sub_pmd(addr, next);
- continue;
- }
- if (vmemmap_populate_basepages(addr, next, node, NULL))
- return -ENOMEM;
+ if (pmd_large(*pmd)) {
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+ vmemmap_use_sub_pmd(addr, next);
}
- return 0;
+
+ return large;
}
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6ed6bc0a65f..894d59e1ffd6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3216,8 +3216,14 @@ struct vmem_altmap;
void *vmemmap_alloc_block_buf(unsigned long size, int node,
struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
+void vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next);
+int vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node, struct vmem_altmap *altmap);
+int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 33e2a1ceee72..6f2e40bb695d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
return vmemmap_populate_range(start, end, node, altmap, NULL);
}
+void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
+{
+}
+
+int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ return 0;
+}
+
+int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap)
+{
+ unsigned long addr;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ for (addr = start; addr < end; addr = next) {
+ next = pmd_addr_end(addr, end);
+
+ pgd = vmemmap_pgd_populate(addr, node);
+ if (!pgd)
+ return -ENOMEM;
+
+ p4d = vmemmap_p4d_populate(pgd, addr, node);
+ if (!p4d)
+ return -ENOMEM;
+
+ pud = vmemmap_pud_populate(p4d, addr, node);
+ if (!pud)
+ return -ENOMEM;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(READ_ONCE(*pmd))) {
+ void *p;
+
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+ if (p) {
+ vmemmap_set_pmd(pmd, p, node, addr, next);
+ continue;
+ } else if (altmap)
+ return -ENOMEM; /* no fallback */
+ } else if (vmemmap_check_pmd(pmd, node, addr, next))
+ continue;
+ if (vmemmap_populate_basepages(addr, next, node, altmap))
+ return -ENOMEM;
+ }
+ return 0;
+}
+
/*
* For compound pages bigger than section size (e.g. x86 1G compound
* pages with 2M subsection size) fill the rest of sections as tail
--
2.27.0
On Thu, Jun 30, 2022 at 6:32 AM Huacai Chen <[email protected]> wrote:
>
> From: Feiyang Chen <[email protected]>
>
> Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can
> share its implementation.
Sharing this function is good, thanks for consolidating this
> Signed-off-by: Huacai Chen <[email protected]>
> Signed-off-by: Feiyang Chen <[email protected]>
The Signed-off-by lines are in the wrong order, it should start with the author
and end with the final submitter.
> index 33e2a1ceee72..6f2e40bb695d 100644
> --- a/mm/sparse-vmemmap.c
> +++ b/mm/sparse-vmemmap.c
> @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
> return vmemmap_populate_range(start, end, node, altmap, NULL);
> }
>
> +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
> + unsigned long addr, unsigned long next)
> +{
> +}
> +
> +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
> + unsigned long next)
> +{
> + return 0;
> +}
> +
I think inline functions would be better here, both for compiler optimization
and to make it easier to track the code flow. The normal way we do these
in architecture specific headers is to override the functions by defining a
macro of the same name.
Arnd
Hi, Arnd,
On Thu, Jun 30, 2022 at 2:05 PM Arnd Bergmann <[email protected]> wrote:
>
> On Thu, Jun 30, 2022 at 6:32 AM Huacai Chen <[email protected]> wrote:
> >
> > From: Feiyang Chen <[email protected]>
> >
> > Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can
> > share its implementation.
>
> Sharing this function is good, thanks for consolidating this
>
> > Signed-off-by: Huacai Chen <[email protected]>
> > Signed-off-by: Feiyang Chen <[email protected]>
>
> The Signed-off-by lines are in the wrong order, it should start with the author
> and end with the final submitter.
OK, I will change the order.
>
> > index 33e2a1ceee72..6f2e40bb695d 100644
> > --- a/mm/sparse-vmemmap.c
> > +++ b/mm/sparse-vmemmap.c
> > @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
> > return vmemmap_populate_range(start, end, node, altmap, NULL);
> > }
> >
> > +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
> > + unsigned long addr, unsigned long next)
> > +{
> > +}
> > +
> > +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
> > + unsigned long next)
> > +{
> > + return 0;
> > +}
> > +
>
> I think inline functions would be better here, both for compiler optimization
> and to make it easier to track the code flow. The normal way we do these
> in architecture specific headers is to override the functions by defining a
> macro of the same name.
In my opinion, weak functions are suitable for overriding if they are
only used in a single .c file (this case). If we don't use weak
functions, this series needs as many as 4 #ifdefs, for pud_init(),
pmd_init(), vmemmap_set_pmd() and vmemmap_check_pmd() respectively,
which increase the difficulty of maintain (just my own opinion, maybe
not a objective fact).
Huacai
>
>
> Arnd
Hello!
Your subject looks unfinished...
MBR, Sergey
Hi, Sergei,
On Thu, Jun 30, 2022 at 4:30 PM Sergei Shtylyov
<[email protected]> wrote:
>
> Hello!
>
> Your subject looks unfinished...
Thank you, this seems because the subject is too long.
Huacai
>
> MBR, Sergey