2022-06-25 09:59:05

by Huacai Chen

[permalink] [raw]
Subject: [PATCH 1/3] MIPS&LoongArch: Adjust prototypes of p?d_init()

From: Feiyang Chen <[email protected]>

We are preparing to add sparse vmemmap support to LoongArch. MIPS and
LoongArch need to call pgd_init()/pud_init()/pmd_init() when populating
page tables, so adjust their prototypes to make generic helpers can call
them.

Signed-off-by: Huacai Chen <[email protected]>
Signed-off-by: Feiyang Chen <[email protected]>
---
arch/loongarch/include/asm/pgalloc.h | 13 ++-----------
arch/loongarch/include/asm/pgtable.h | 8 ++++----
arch/loongarch/kernel/numa.c | 4 ++--
arch/loongarch/mm/pgtable.c | 23 +++++++++++++----------
arch/mips/include/asm/pgalloc.h | 8 ++++----
arch/mips/include/asm/pgtable-64.h | 8 ++++----
arch/mips/kvm/mmu.c | 3 +--
arch/mips/mm/pgtable-32.c | 10 +++++-----
arch/mips/mm/pgtable-64.c | 18 ++++++++++--------
arch/mips/mm/pgtable.c | 2 +-
10 files changed, 46 insertions(+), 51 deletions(-)

diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index b0a57b25c131..6a492d7aeeec 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -42,15 +42,6 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)

extern void pagetable_init(void);

-/*
- * Initialize a new pmd table with invalid pointers.
- */
-extern void pmd_init(unsigned long page, unsigned long pagetable);
-
-/*
- * Initialize a new pgd / pmd table with invalid pointers.
- */
-extern void pgd_init(unsigned long page);
extern pgd_t *pgd_alloc(struct mm_struct *mm);

#define __pte_free_tlb(tlb, pte, address) \
@@ -76,7 +67,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
}

pmd = (pmd_t *)page_address(pg);
- pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
+ pmd_init(pmd);
return pmd;
}

@@ -92,7 +83,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)

pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
if (pud)
- pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+ pud_init(pud);
return pud;
}

diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index d9e86cfa53e2..9c811c3f7572 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -243,11 +243,11 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pm
#define pfn_pmd(pfn, prot) __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))

/*
- * Initialize a new pgd / pmd table with invalid pointers.
+ * Initialize a new pgd / pud / pmd table with invalid pointers.
*/
-extern void pgd_init(unsigned long page);
-extern void pud_init(unsigned long page, unsigned long pagetable);
-extern void pmd_init(unsigned long page, unsigned long pagetable);
+extern void pgd_init(void *addr);
+extern void pud_init(void *addr);
+extern void pmd_init(void *addr);

/*
* Non-present pages: high 40 bits are offset, next 8 bits type,
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index a76f547a5aa3..839ce601f220 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -78,7 +78,7 @@ void __init pcpu_populate_pte(unsigned long addr)
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
pgd_populate(&init_mm, pgd, new);
#ifndef __PAGETABLE_PUD_FOLDED
- pud_init((unsigned long)new, (unsigned long)invalid_pmd_table);
+ pud_init(new);
#endif
}

@@ -89,7 +89,7 @@ void __init pcpu_populate_pte(unsigned long addr)
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
pud_populate(&init_mm, pud, new);
#ifndef __PAGETABLE_PMD_FOLDED
- pmd_init((unsigned long)new, (unsigned long)invalid_pte_table);
+ pmd_init(new);
#endif
}

diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index 0569647152e9..e79cc41acac5 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -16,7 +16,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER);
if (ret) {
init = pgd_offset(&init_mm, 0UL);
- pgd_init((unsigned long)ret);
+ pgd_init(ret);
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
@@ -25,7 +25,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(pgd_alloc);

-void pgd_init(unsigned long page)
+void pgd_init(void *addr)
{
unsigned long *p, *end;
unsigned long entry;
@@ -38,7 +38,7 @@ void pgd_init(unsigned long page)
entry = (unsigned long)invalid_pte_table;
#endif

- p = (unsigned long *) page;
+ p = (unsigned long *)addr;
end = p + PTRS_PER_PGD;

do {
@@ -56,11 +56,12 @@ void pgd_init(unsigned long page)
EXPORT_SYMBOL_GPL(pgd_init);

#ifndef __PAGETABLE_PMD_FOLDED
-void pmd_init(unsigned long addr, unsigned long pagetable)
+void pmd_init(void *addr)
{
unsigned long *p, *end;
+ unsigned long pagetable = (unsigned long)invalid_pte_table;

- p = (unsigned long *) addr;
+ p = (unsigned long *)addr;
end = p + PTRS_PER_PMD;

do {
@@ -79,9 +80,10 @@ EXPORT_SYMBOL_GPL(pmd_init);
#endif

#ifndef __PAGETABLE_PUD_FOLDED
-void pud_init(unsigned long addr, unsigned long pagetable)
+void pud_init(void *addr)
{
unsigned long *p, *end;
+ unsigned long pagetable = (unsigned long)invalid_pmd_table;

p = (unsigned long *)addr;
end = p + PTRS_PER_PUD;
@@ -98,6 +100,7 @@ void pud_init(unsigned long addr, unsigned long pagetable)
p[-1] = pagetable;
} while (p != end);
}
+EXPORT_SYMBOL_GPL(pud_init);
#endif

pmd_t mk_pmd(struct page *page, pgprot_t prot)
@@ -119,12 +122,12 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
void __init pagetable_init(void)
{
/* Initialize the entire pgd. */
- pgd_init((unsigned long)swapper_pg_dir);
- pgd_init((unsigned long)invalid_pg_dir);
+ pgd_init(swapper_pg_dir);
+ pgd_init(invalid_pg_dir);
#ifndef __PAGETABLE_PUD_FOLDED
- pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
+ pud_init(invalid_pud_table);
#endif
#ifndef __PAGETABLE_PMD_FOLDED
- pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
+ pmd_init(invalid_pmd_table);
#endif
}
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 867e9c3db76e..9f7d117c5ebf 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -33,7 +33,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
/*
* Initialize a new pmd table with invalid pointers.
*/
-extern void pmd_init(unsigned long page, unsigned long pagetable);
+extern void pmd_init(void *addr);

#ifndef __PAGETABLE_PMD_FOLDED

@@ -44,9 +44,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
#endif

/*
- * Initialize a new pgd / pmd table with invalid pointers.
+ * Initialize a new pgd table with invalid pointers.
*/
-extern void pgd_init(unsigned long page);
+extern void pgd_init(void *addr);
extern pgd_t *pgd_alloc(struct mm_struct *mm);

static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -77,7 +77,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
}

pmd = (pmd_t *)page_address(pg);
- pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
+ pmd_init(pmd);
return pmd;
}

diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 41921acdc9d8..8ac76bbb4b38 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -323,11 +323,11 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#endif

/*
- * Initialize a new pgd / pmd table with invalid pointers.
+ * Initialize a new pgd / pud / pmd table with invalid pointers.
*/
-extern void pgd_init(unsigned long page);
-extern void pud_init(unsigned long page, unsigned long pagetable);
-extern void pmd_init(unsigned long page, unsigned long pagetable);
+extern void pgd_init(void *addr);
+extern void pud_init(void *addr);
+extern void pmd_init(void *addr);

/*
* Non-present pages: high 40 bits are offset, next 8 bits type,
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 1bfd1b501d82..cb10a92ad3ae 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -122,8 +122,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
if (!cache)
return NULL;
new_pmd = kvm_mmu_memory_cache_alloc(cache);
- pmd_init((unsigned long)new_pmd,
- (unsigned long)invalid_pte_table);
+ pmd_init(new_pmd);
pud_populate(NULL, pud, new_pmd);
}
pmd = pmd_offset(pud, addr);
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index 61891af25019..88819a21d97e 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -13,9 +13,9 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>

-void pgd_init(unsigned long page)
+void pgd_init(void *addr)
{
- unsigned long *p = (unsigned long *) page;
+ unsigned long *p = (unsigned long *)addr;
int i;

for (i = 0; i < USER_PTRS_PER_PGD; i+=8) {
@@ -61,9 +61,9 @@ void __init pagetable_init(void)
#endif

/* Initialize the entire pgd. */
- pgd_init((unsigned long)swapper_pg_dir);
- pgd_init((unsigned long)swapper_pg_dir
- + sizeof(pgd_t) * USER_PTRS_PER_PGD);
+ pgd_init(swapper_pg_dir);
+ pgd_init((void *)((unsigned long)swapper_pg_dir
+ + sizeof(pgd_t) * USER_PTRS_PER_PGD));

pgd_base = swapper_pg_dir;

diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index 7536f7804c44..b4386a0e2ef8 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -13,7 +13,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>

-void pgd_init(unsigned long page)
+void pgd_init(void *addr)
{
unsigned long *p, *end;
unsigned long entry;
@@ -26,7 +26,7 @@ void pgd_init(unsigned long page)
entry = (unsigned long)invalid_pte_table;
#endif

- p = (unsigned long *) page;
+ p = (unsigned long *) addr;
end = p + PTRS_PER_PGD;

do {
@@ -43,11 +43,12 @@ void pgd_init(unsigned long page)
}

#ifndef __PAGETABLE_PMD_FOLDED
-void pmd_init(unsigned long addr, unsigned long pagetable)
+void pmd_init(void *addr)
{
unsigned long *p, *end;
+ unsigned long pagetable = (unsigned long)invalid_pte_table;

- p = (unsigned long *) addr;
+ p = (unsigned long *)addr;
end = p + PTRS_PER_PMD;

do {
@@ -66,9 +67,10 @@ EXPORT_SYMBOL_GPL(pmd_init);
#endif

#ifndef __PAGETABLE_PUD_FOLDED
-void pud_init(unsigned long addr, unsigned long pagetable)
+void pud_init(void *addr)
{
unsigned long *p, *end;
+ unsigned long pagetable = (unsigned long)invalid_pmd_table;

p = (unsigned long *)addr;
end = p + PTRS_PER_PUD;
@@ -108,12 +110,12 @@ void __init pagetable_init(void)
pgd_t *pgd_base;

/* Initialize the entire pgd. */
- pgd_init((unsigned long)swapper_pg_dir);
+ pgd_init(swapper_pg_dir);
#ifndef __PAGETABLE_PUD_FOLDED
- pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
+ pud_init(invalid_pud_table);
#endif
#ifndef __PAGETABLE_PMD_FOLDED
- pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
+ pmd_init(invalid_pmd_table);
#endif
pgd_base = swapper_pg_dir;
/*
diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c
index 05560b042d82..98e5e880927a 100644
--- a/arch/mips/mm/pgtable.c
+++ b/arch/mips/mm/pgtable.c
@@ -15,7 +15,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER);
if (ret) {
init = pgd_offset(&init_mm, 0UL);
- pgd_init((unsigned long)ret);
+ pgd_init(ret);
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
--
2.27.0


2022-06-25 10:00:06

by Huacai Chen

[permalink] [raw]
Subject: [PATCH 3/3] mm/sparse-vmemmap: Generalise vmemmap_populate_hugepages()

From: Feiyang Chen <[email protected]>

Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can
share its implementation.

Signed-off-by: Huacai Chen <[email protected]>
Signed-off-by: Feiyang Chen <[email protected]>
---
arch/arm64/mm/mmu.c | 53 ++++++-----------------
arch/loongarch/mm/init.c | 63 ++++++++-------------------
arch/x86/mm/init_64.c | 92 ++++++++++++++--------------------------
include/linux/mm.h | 6 +++
mm/sparse-vmemmap.c | 54 +++++++++++++++++++++++
5 files changed, 124 insertions(+), 144 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 626ec32873c6..b080a65c719d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1158,49 +1158,24 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return vmemmap_populate_basepages(start, end, node, altmap);
}
#else /* !ARM64_KERNEL_USES_PMD_MAPS */
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
+{
+ pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+ return 1;
+}
+
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
- unsigned long addr = start;
- unsigned long next;
- pgd_t *pgdp;
- p4d_t *p4dp;
- pud_t *pudp;
- pmd_t *pmdp;
-
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
- do {
- next = pmd_addr_end(addr, end);
-
- pgdp = vmemmap_pgd_populate(addr, node);
- if (!pgdp)
- return -ENOMEM;
-
- p4dp = vmemmap_p4d_populate(pgdp, addr, node);
- if (!p4dp)
- return -ENOMEM;
-
- pudp = vmemmap_pud_populate(p4dp, addr, node);
- if (!pudp)
- return -ENOMEM;
-
- pmdp = pmd_offset(pudp, addr);
- if (pmd_none(READ_ONCE(*pmdp))) {
- void *p = NULL;
-
- p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
- if (!p) {
- if (vmemmap_populate_basepages(addr, next, node, altmap))
- return -ENOMEM;
- continue;
- }
-
- pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
- } else
- vmemmap_verify((pte_t *)pmdp, node, addr, next);
- } while (addr = next, addr != end);
-
- return 0;
+ return vmemmap_populate_hugepages(start, end, node, altmap);
}
#endif /* !ARM64_KERNEL_USES_PMD_MAPS */

diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 35128229fe46..3190b3cd52d1 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -158,52 +158,25 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
#endif

#ifdef CONFIG_SPARSEMEM_VMEMMAP
-int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
- int node, struct vmem_altmap *altmap)
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
{
- unsigned long addr = start;
- unsigned long next;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- for (addr = start; addr < end; addr = next) {
- next = pmd_addr_end(addr, end);
-
- pgd = vmemmap_pgd_populate(addr, node);
- if (!pgd)
- return -ENOMEM;
- p4d = vmemmap_p4d_populate(pgd, addr, node);
- if (!p4d)
- return -ENOMEM;
- pud = vmemmap_pud_populate(p4d, addr, node);
- if (!pud)
- return -ENOMEM;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- void *p = NULL;
-
- p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
- if (p) {
- pmd_t entry;
-
- entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
- pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
- set_pmd_at(&init_mm, addr, pmd, entry);
-
- continue;
- }
- } else if (pmd_val(*pmd) & _PAGE_HUGE) {
- vmemmap_verify((pte_t *)pmd, node, addr, next);
- continue;
- }
- if (vmemmap_populate_basepages(addr, next, node, NULL))
- return -ENOMEM;
- }
-
- return 0;
+ pmd_t entry;
+
+ entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
+ pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
+ set_pmd_at(&init_mm, addr, pmd, entry);
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ int huge = pmd_val(*pmd) & _PAGE_HUGE;
+
+ if (huge)
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+
+ return huge;
}

#if CONFIG_PGTABLE_LEVELS == 2
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 39c5246964a9..4911093ee2f3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1532,72 +1532,44 @@ static long __meminitdata addr_start, addr_end;
static void __meminitdata *p_start, *p_end;
static int __meminitdata node_start;

-static int __meminit vmemmap_populate_hugepages(unsigned long start,
- unsigned long end, int node, struct vmem_altmap *altmap)
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
{
- unsigned long addr;
- unsigned long next;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- for (addr = start; addr < end; addr = next) {
- next = pmd_addr_end(addr, end);
-
- pgd = vmemmap_pgd_populate(addr, node);
- if (!pgd)
- return -ENOMEM;
-
- p4d = vmemmap_p4d_populate(pgd, addr, node);
- if (!p4d)
- return -ENOMEM;
-
- pud = vmemmap_pud_populate(p4d, addr, node);
- if (!pud)
- return -ENOMEM;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- void *p;
-
- p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
- if (p) {
- pte_t entry;
-
- entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
- PAGE_KERNEL_LARGE);
- set_pmd(pmd, __pmd(pte_val(entry)));
+ pte_t entry;
+
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
+ PAGE_KERNEL_LARGE);
+ set_pmd(pmd, __pmd(pte_val(entry)));
+
+ /* check to see if we have contiguous blocks */
+ if (p_end != p || node_start != node) {
+ if (p_start)
+ pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+ addr_start, addr_end-1, p_start, p_end-1, node_start);
+ addr_start = addr;
+ node_start = node;
+ p_start = p;
+ }

- /* check to see if we have contiguous blocks */
- if (p_end != p || node_start != node) {
- if (p_start)
- pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
- addr_start, addr_end-1, p_start, p_end-1, node_start);
- addr_start = addr;
- node_start = node;
- p_start = p;
- }
+ addr_end = addr + PMD_SIZE;
+ p_end = p + PMD_SIZE;

- addr_end = addr + PMD_SIZE;
- p_end = p + PMD_SIZE;
+ if (!IS_ALIGNED(addr, PMD_SIZE) ||
+ !IS_ALIGNED(next, PMD_SIZE))
+ vmemmap_use_new_sub_pmd(addr, next);
+}

- if (!IS_ALIGNED(addr, PMD_SIZE) ||
- !IS_ALIGNED(next, PMD_SIZE))
- vmemmap_use_new_sub_pmd(addr, next);
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ int large = pmd_large(*pmd);

- continue;
- } else if (altmap)
- return -ENOMEM; /* no fallback */
- } else if (pmd_large(*pmd)) {
- vmemmap_verify((pte_t *)pmd, node, addr, next);
- vmemmap_use_sub_pmd(addr, next);
- continue;
- }
- if (vmemmap_populate_basepages(addr, next, node, NULL))
- return -ENOMEM;
+ if (pmd_large(*pmd)) {
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+ vmemmap_use_sub_pmd(addr, next);
}
- return 0;
+
+ return large;
}

int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3472b924a1ea..45df4b7562bc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3216,8 +3216,14 @@ struct vmem_altmap;
void *vmemmap_alloc_block_buf(unsigned long size, int node,
struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
+void vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next);
+int vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node, struct vmem_altmap *altmap);
+int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 33e2a1ceee72..6f2e40bb695d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
return vmemmap_populate_range(start, end, node, altmap, NULL);
}

+void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
+{
+}
+
+int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+ unsigned long next)
+{
+ return 0;
+}
+
+int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap)
+{
+ unsigned long addr;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ for (addr = start; addr < end; addr = next) {
+ next = pmd_addr_end(addr, end);
+
+ pgd = vmemmap_pgd_populate(addr, node);
+ if (!pgd)
+ return -ENOMEM;
+
+ p4d = vmemmap_p4d_populate(pgd, addr, node);
+ if (!p4d)
+ return -ENOMEM;
+
+ pud = vmemmap_pud_populate(p4d, addr, node);
+ if (!pud)
+ return -ENOMEM;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(READ_ONCE(*pmd))) {
+ void *p;
+
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+ if (p) {
+ vmemmap_set_pmd(pmd, p, node, addr, next);
+ continue;
+ } else if (altmap)
+ return -ENOMEM; /* no fallback */
+ } else if (vmemmap_check_pmd(pmd, node, addr, next))
+ continue;
+ if (vmemmap_populate_basepages(addr, next, node, altmap))
+ return -ENOMEM;
+ }
+ return 0;
+}
+
/*
* For compound pages bigger than section size (e.g. x86 1G compound
* pages with 2M subsection size) fill the rest of sections as tail
--
2.27.0

2022-06-25 10:17:08

by Huacai Chen

[permalink] [raw]
Subject: [PATCH 2/3] LoongArch: Add sparse memory vmemmap support

From: Feiyang Chen <[email protected]>

Add sparse memory vmemmap support for LoongArch. SPARSEMEM_VMEMMAP
uses a virtually mapped memmap to optimise pfn_to_page and page_to_pfn
operations. This is the most efficient option when sufficient kernel
resources are available.

Signed-off-by: Min Zhou <[email protected]>
Signed-off-by: Huacai Chen <[email protected]>
Signed-off-by: Feiyang Chen <[email protected]>
---
arch/loongarch/Kconfig | 2 +
arch/loongarch/include/asm/pgtable.h | 5 +-
arch/loongarch/include/asm/sparsemem.h | 8 +++
arch/loongarch/mm/init.c | 71 +++++++++++++++++++++++++-
include/linux/mm.h | 2 +
mm/sparse-vmemmap.c | 10 ++++
6 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index dc19cf3071ea..8e56ca28165e 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -49,6 +49,7 @@ config LOONGARCH
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+ select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
select ARCH_WANTS_NO_INSTR
select BUILDTIME_TABLE_SORT
select COMMON_CLK
@@ -422,6 +423,7 @@ config ARCH_FLATMEM_ENABLE

config ARCH_SPARSEMEM_ENABLE
def_bool y
+ select SPARSEMEM_VMEMMAP_ENABLE
help
Say Y to support efficient handling of sparse physical memory,
for architectures which are either NUMA (Non-Uniform Memory Access)
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 9c811c3f7572..b701ec7a0309 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -92,7 +92,10 @@ extern unsigned long zero_page_mask;
#define VMALLOC_START MODULES_END
#define VMALLOC_END \
(vm_map_base + \
- min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE)
+ min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+
+#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
+#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)

#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h
index 3d18cdf1b069..a1e440f6bec7 100644
--- a/arch/loongarch/include/asm/sparsemem.h
+++ b/arch/loongarch/include/asm/sparsemem.h
@@ -11,6 +11,14 @@
#define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */
#define MAX_PHYSMEM_BITS 48

+#ifndef CONFIG_SPARSEMEM_VMEMMAP
+#define VMEMMAP_SIZE 0
+#else
+#define VMEMMAP_SIZE (sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT)))
+#endif
+
+#include <linux/mm_types.h>
+
#endif /* CONFIG_SPARSEMEM */

#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 7094a68c9b83..35128229fe46 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -22,7 +22,7 @@
#include <linux/pfn.h>
#include <linux/hardirq.h>
#include <linux/gfp.h>
-#include <linux/initrd.h>
+#include <linux/hugetlb.h>
#include <linux/mmzone.h>

#include <asm/asm-offsets.h>
@@ -157,6 +157,75 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
#endif
#endif

+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap)
+{
+ unsigned long addr = start;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ for (addr = start; addr < end; addr = next) {
+ next = pmd_addr_end(addr, end);
+
+ pgd = vmemmap_pgd_populate(addr, node);
+ if (!pgd)
+ return -ENOMEM;
+ p4d = vmemmap_p4d_populate(pgd, addr, node);
+ if (!p4d)
+ return -ENOMEM;
+ pud = vmemmap_pud_populate(p4d, addr, node);
+ if (!pud)
+ return -ENOMEM;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd)) {
+ void *p = NULL;
+
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
+ if (p) {
+ pmd_t entry;
+
+ entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
+ pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
+ set_pmd_at(&init_mm, addr, pmd, entry);
+
+ continue;
+ }
+ } else if (pmd_val(*pmd) & _PAGE_HUGE) {
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+ continue;
+ }
+ if (vmemmap_populate_basepages(addr, next, node, NULL))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+#if CONFIG_PGTABLE_LEVELS == 2
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+ return vmemmap_populate_basepages(start, end, node, NULL);
+}
+#else
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+ return vmemmap_populate_hugepages(start, end, node, NULL);
+}
+#endif
+
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+}
+#endif
+
/*
* Align swapper_pg_dir in to 64K, allows its address to be loaded
* with a single LUI instruction in the TLB handlers. If we used
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc8f326be0ce..3472b924a1ea 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3203,6 +3203,8 @@ void *sparse_buffer_alloc(unsigned long size);
struct page * __populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap);
+void pmd_init(void *addr);
+void pud_init(void *addr);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index f4fa61dbbee3..33e2a1ceee72 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -587,6 +587,10 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
return pmd;
}

+void __weak __meminit pmd_init(void *addr)
+{
+}
+
pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
{
pud_t *pud = pud_offset(p4d, addr);
@@ -594,11 +598,16 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
+ pmd_init(p);
pud_populate(&init_mm, pud, p);
}
return pud;
}

+void __weak __meminit pud_init(void *addr)
+{
+}
+
p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
{
p4d_t *p4d = p4d_offset(pgd, addr);
@@ -606,6 +615,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
+ pud_init(p);
p4d_populate(&init_mm, p4d, p);
}
return p4d;
--
2.27.0

2022-06-27 10:54:52

by Muchun Song

[permalink] [raw]
Subject: Re: [PATCH 2/3] LoongArch: Add sparse memory vmemmap support

On Sat, Jun 25, 2022 at 5:54 PM Huacai Chen <[email protected]> wrote:
>
> From: Feiyang Chen <[email protected]>
>
> Add sparse memory vmemmap support for LoongArch. SPARSEMEM_VMEMMAP
> uses a virtually mapped memmap to optimise pfn_to_page and page_to_pfn
> operations. This is the most efficient option when sufficient kernel
> resources are available.
>
> Signed-off-by: Min Zhou <[email protected]>
> Signed-off-by: Huacai Chen <[email protected]>
> Signed-off-by: Feiyang Chen <[email protected]>
> ---
> arch/loongarch/Kconfig | 2 +
> arch/loongarch/include/asm/pgtable.h | 5 +-
> arch/loongarch/include/asm/sparsemem.h | 8 +++
> arch/loongarch/mm/init.c | 71 +++++++++++++++++++++++++-
> include/linux/mm.h | 2 +
> mm/sparse-vmemmap.c | 10 ++++
> 6 files changed, 96 insertions(+), 2 deletions(-)
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index dc19cf3071ea..8e56ca28165e 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -49,6 +49,7 @@ config LOONGARCH
> select ARCH_USE_QUEUED_RWLOCKS
> select ARCH_USE_QUEUED_SPINLOCKS
> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> + select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP

I think this should be a separate patch to enable HVO (HugeTLB Vmemmap
Optimization) since it is irrelevant to this patch.

Thanks.

> select ARCH_WANTS_NO_INSTR
> select BUILDTIME_TABLE_SORT
> select COMMON_CLK
> @@ -422,6 +423,7 @@ config ARCH_FLATMEM_ENABLE
>
> config ARCH_SPARSEMEM_ENABLE
> def_bool y
> + select SPARSEMEM_VMEMMAP_ENABLE
> help
> Say Y to support efficient handling of sparse physical memory,
> for architectures which are either NUMA (Non-Uniform Memory Access)
> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> index 9c811c3f7572..b701ec7a0309 100644
> --- a/arch/loongarch/include/asm/pgtable.h
> +++ b/arch/loongarch/include/asm/pgtable.h
> @@ -92,7 +92,10 @@ extern unsigned long zero_page_mask;
> #define VMALLOC_START MODULES_END
> #define VMALLOC_END \
> (vm_map_base + \
> - min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE)
> + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
> +
> +#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
> +#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
>
> #define pte_ERROR(e) \
> pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
> diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h
> index 3d18cdf1b069..a1e440f6bec7 100644
> --- a/arch/loongarch/include/asm/sparsemem.h
> +++ b/arch/loongarch/include/asm/sparsemem.h
> @@ -11,6 +11,14 @@
> #define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */
> #define MAX_PHYSMEM_BITS 48
>
> +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> +#define VMEMMAP_SIZE 0
> +#else
> +#define VMEMMAP_SIZE (sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT)))
> +#endif
> +
> +#include <linux/mm_types.h>
> +
> #endif /* CONFIG_SPARSEMEM */
>
> #ifdef CONFIG_MEMORY_HOTPLUG
> diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
> index 7094a68c9b83..35128229fe46 100644
> --- a/arch/loongarch/mm/init.c
> +++ b/arch/loongarch/mm/init.c
> @@ -22,7 +22,7 @@
> #include <linux/pfn.h>
> #include <linux/hardirq.h>
> #include <linux/gfp.h>
> -#include <linux/initrd.h>
> +#include <linux/hugetlb.h>
> #include <linux/mmzone.h>
>
> #include <asm/asm-offsets.h>
> @@ -157,6 +157,75 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
> #endif
> #endif
>
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> + int node, struct vmem_altmap *altmap)
> +{
> + unsigned long addr = start;
> + unsigned long next;
> + pgd_t *pgd;
> + p4d_t *p4d;
> + pud_t *pud;
> + pmd_t *pmd;
> +
> + for (addr = start; addr < end; addr = next) {
> + next = pmd_addr_end(addr, end);
> +
> + pgd = vmemmap_pgd_populate(addr, node);
> + if (!pgd)
> + return -ENOMEM;
> + p4d = vmemmap_p4d_populate(pgd, addr, node);
> + if (!p4d)
> + return -ENOMEM;
> + pud = vmemmap_pud_populate(p4d, addr, node);
> + if (!pud)
> + return -ENOMEM;
> +
> + pmd = pmd_offset(pud, addr);
> + if (pmd_none(*pmd)) {
> + void *p = NULL;
> +
> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
> + if (p) {
> + pmd_t entry;
> +
> + entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
> + pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
> + set_pmd_at(&init_mm, addr, pmd, entry);
> +
> + continue;
> + }
> + } else if (pmd_val(*pmd) & _PAGE_HUGE) {
> + vmemmap_verify((pte_t *)pmd, node, addr, next);
> + continue;
> + }
> + if (vmemmap_populate_basepages(addr, next, node, NULL))
> + return -ENOMEM;
> + }
> +
> + return 0;
> +}
> +
> +#if CONFIG_PGTABLE_LEVELS == 2
> +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> + struct vmem_altmap *altmap)
> +{
> + return vmemmap_populate_basepages(start, end, node, NULL);
> +}
> +#else
> +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> + struct vmem_altmap *altmap)
> +{
> + return vmemmap_populate_hugepages(start, end, node, NULL);
> +}
> +#endif
> +
> +void vmemmap_free(unsigned long start, unsigned long end,
> + struct vmem_altmap *altmap)
> +{
> +}
> +#endif
> +
> /*
> * Align swapper_pg_dir in to 64K, allows its address to be loaded
> * with a single LUI instruction in the TLB handlers. If we used
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index bc8f326be0ce..3472b924a1ea 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3203,6 +3203,8 @@ void *sparse_buffer_alloc(unsigned long size);
> struct page * __populate_section_memmap(unsigned long pfn,
> unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
> struct dev_pagemap *pgmap);
> +void pmd_init(void *addr);
> +void pud_init(void *addr);
> pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
> p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
> pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> index f4fa61dbbee3..33e2a1ceee72 100644
> --- a/mm/sparse-vmemmap.c
> +++ b/mm/sparse-vmemmap.c
> @@ -587,6 +587,10 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
> return pmd;
> }
>
> +void __weak __meminit pmd_init(void *addr)
> +{
> +}
> +
> pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
> {
> pud_t *pud = pud_offset(p4d, addr);
> @@ -594,11 +598,16 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
> void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
> if (!p)
> return NULL;
> + pmd_init(p);
> pud_populate(&init_mm, pud, p);
> }
> return pud;
> }
>
> +void __weak __meminit pud_init(void *addr)
> +{
> +}
> +
> p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
> {
> p4d_t *p4d = p4d_offset(pgd, addr);
> @@ -606,6 +615,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
> void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
> if (!p)
> return NULL;
> + pud_init(p);
> p4d_populate(&init_mm, p4d, p);
> }
> return p4d;
> --
> 2.27.0
>

2022-06-28 03:47:11

by Huacai Chen

[permalink] [raw]
Subject: Re: [PATCH 2/3] LoongArch: Add sparse memory vmemmap support

Hi, Muchun,

On Mon, Jun 27, 2022 at 6:33 PM Muchun Song <[email protected]> wrote:
>
> On Sat, Jun 25, 2022 at 5:54 PM Huacai Chen <[email protected]> wrote:
> >
> > From: Feiyang Chen <[email protected]>
> >
> > Add sparse memory vmemmap support for LoongArch. SPARSEMEM_VMEMMAP
> > uses a virtually mapped memmap to optimise pfn_to_page and page_to_pfn
> > operations. This is the most efficient option when sufficient kernel
> > resources are available.
> >
> > Signed-off-by: Min Zhou <[email protected]>
> > Signed-off-by: Huacai Chen <[email protected]>
> > Signed-off-by: Feiyang Chen <[email protected]>
> > ---
> > arch/loongarch/Kconfig | 2 +
> > arch/loongarch/include/asm/pgtable.h | 5 +-
> > arch/loongarch/include/asm/sparsemem.h | 8 +++
> > arch/loongarch/mm/init.c | 71 +++++++++++++++++++++++++-
> > include/linux/mm.h | 2 +
> > mm/sparse-vmemmap.c | 10 ++++
> > 6 files changed, 96 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> > index dc19cf3071ea..8e56ca28165e 100644
> > --- a/arch/loongarch/Kconfig
> > +++ b/arch/loongarch/Kconfig
> > @@ -49,6 +49,7 @@ config LOONGARCH
> > select ARCH_USE_QUEUED_RWLOCKS
> > select ARCH_USE_QUEUED_SPINLOCKS
> > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> > + select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
>
> I think this should be a separate patch to enable HVO (HugeTLB Vmemmap
> Optimization) since it is irrelevant to this patch.
It seems I have misunderstood HVO, then I will remove HVO parts from
this patch. Thank you.

Huacai
>
> Thanks.
>
> > select ARCH_WANTS_NO_INSTR
> > select BUILDTIME_TABLE_SORT
> > select COMMON_CLK
> > @@ -422,6 +423,7 @@ config ARCH_FLATMEM_ENABLE
> >
> > config ARCH_SPARSEMEM_ENABLE
> > def_bool y
> > + select SPARSEMEM_VMEMMAP_ENABLE
> > help
> > Say Y to support efficient handling of sparse physical memory,
> > for architectures which are either NUMA (Non-Uniform Memory Access)
> > diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> > index 9c811c3f7572..b701ec7a0309 100644
> > --- a/arch/loongarch/include/asm/pgtable.h
> > +++ b/arch/loongarch/include/asm/pgtable.h
> > @@ -92,7 +92,10 @@ extern unsigned long zero_page_mask;
> > #define VMALLOC_START MODULES_END
> > #define VMALLOC_END \
> > (vm_map_base + \
> > - min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE)
> > + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
> > +
> > +#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
> > +#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
> >
> > #define pte_ERROR(e) \
> > pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
> > diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h
> > index 3d18cdf1b069..a1e440f6bec7 100644
> > --- a/arch/loongarch/include/asm/sparsemem.h
> > +++ b/arch/loongarch/include/asm/sparsemem.h
> > @@ -11,6 +11,14 @@
> > #define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */
> > #define MAX_PHYSMEM_BITS 48
> >
> > +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> > +#define VMEMMAP_SIZE 0
> > +#else
> > +#define VMEMMAP_SIZE (sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT)))
> > +#endif
> > +
> > +#include <linux/mm_types.h>
> > +
> > #endif /* CONFIG_SPARSEMEM */
> >
> > #ifdef CONFIG_MEMORY_HOTPLUG
> > diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
> > index 7094a68c9b83..35128229fe46 100644
> > --- a/arch/loongarch/mm/init.c
> > +++ b/arch/loongarch/mm/init.c
> > @@ -22,7 +22,7 @@
> > #include <linux/pfn.h>
> > #include <linux/hardirq.h>
> > #include <linux/gfp.h>
> > -#include <linux/initrd.h>
> > +#include <linux/hugetlb.h>
> > #include <linux/mmzone.h>
> >
> > #include <asm/asm-offsets.h>
> > @@ -157,6 +157,75 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
> > #endif
> > #endif
> >
> > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> > + int node, struct vmem_altmap *altmap)
> > +{
> > + unsigned long addr = start;
> > + unsigned long next;
> > + pgd_t *pgd;
> > + p4d_t *p4d;
> > + pud_t *pud;
> > + pmd_t *pmd;
> > +
> > + for (addr = start; addr < end; addr = next) {
> > + next = pmd_addr_end(addr, end);
> > +
> > + pgd = vmemmap_pgd_populate(addr, node);
> > + if (!pgd)
> > + return -ENOMEM;
> > + p4d = vmemmap_p4d_populate(pgd, addr, node);
> > + if (!p4d)
> > + return -ENOMEM;
> > + pud = vmemmap_pud_populate(p4d, addr, node);
> > + if (!pud)
> > + return -ENOMEM;
> > +
> > + pmd = pmd_offset(pud, addr);
> > + if (pmd_none(*pmd)) {
> > + void *p = NULL;
> > +
> > + p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
> > + if (p) {
> > + pmd_t entry;
> > +
> > + entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
> > + pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
> > + set_pmd_at(&init_mm, addr, pmd, entry);
> > +
> > + continue;
> > + }
> > + } else if (pmd_val(*pmd) & _PAGE_HUGE) {
> > + vmemmap_verify((pte_t *)pmd, node, addr, next);
> > + continue;
> > + }
> > + if (vmemmap_populate_basepages(addr, next, node, NULL))
> > + return -ENOMEM;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +#if CONFIG_PGTABLE_LEVELS == 2
> > +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> > + struct vmem_altmap *altmap)
> > +{
> > + return vmemmap_populate_basepages(start, end, node, NULL);
> > +}
> > +#else
> > +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> > + struct vmem_altmap *altmap)
> > +{
> > + return vmemmap_populate_hugepages(start, end, node, NULL);
> > +}
> > +#endif
> > +
> > +void vmemmap_free(unsigned long start, unsigned long end,
> > + struct vmem_altmap *altmap)
> > +{
> > +}
> > +#endif
> > +
> > /*
> > * Align swapper_pg_dir in to 64K, allows its address to be loaded
> > * with a single LUI instruction in the TLB handlers. If we used
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index bc8f326be0ce..3472b924a1ea 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -3203,6 +3203,8 @@ void *sparse_buffer_alloc(unsigned long size);
> > struct page * __populate_section_memmap(unsigned long pfn,
> > unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
> > struct dev_pagemap *pgmap);
> > +void pmd_init(void *addr);
> > +void pud_init(void *addr);
> > pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
> > p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
> > pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
> > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> > index f4fa61dbbee3..33e2a1ceee72 100644
> > --- a/mm/sparse-vmemmap.c
> > +++ b/mm/sparse-vmemmap.c
> > @@ -587,6 +587,10 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
> > return pmd;
> > }
> >
> > +void __weak __meminit pmd_init(void *addr)
> > +{
> > +}
> > +
> > pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
> > {
> > pud_t *pud = pud_offset(p4d, addr);
> > @@ -594,11 +598,16 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
> > void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
> > if (!p)
> > return NULL;
> > + pmd_init(p);
> > pud_populate(&init_mm, pud, p);
> > }
> > return pud;
> > }
> >
> > +void __weak __meminit pud_init(void *addr)
> > +{
> > +}
> > +
> > p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
> > {
> > p4d_t *p4d = p4d_offset(pgd, addr);
> > @@ -606,6 +615,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
> > void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
> > if (!p)
> > return NULL;
> > + pud_init(p);
> > p4d_populate(&init_mm, p4d, p);
> > }
> > return p4d;
> > --
> > 2.27.0
> >