2021-04-28 17:42:08

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v1 0/4] Implement huge VMAP and VMALLOC on powerpc 8xx

This series is a first tentative to implement huge VMAP and VMALLOC
on powerpc 8xx. This series applies on Linux next.
For the time being the 8xx specificities are plugged directly into
generic mm functions. I have no real idea on how to make it a nice
beautiful generic implementation for the time being, hence this RFC
in order to get suggestions.

powerpc 8xx has 4 page sizes:
- 4k
- 16k
- 512k
- 8M

At the time being, vmalloc and vmap only support huge pages which are
leaf at PMD level.

Here the PMD level is 4M, it doesn't correspond to any supported
page size.

For the time being, implement use of 16k and 512k pages which is done
at PTE level.

Support of 8M pages will be implemented later, it requires use of
hugepd tables.

Christophe Leroy (4):
mm/ioremap: Fix iomap_max_page_shift
mm/hugetlb: Change parameters of arch_make_huge_pte()
mm/pgtable: Add stubs for {pmd/pub}_{set/clear}_huge
mm/vmalloc: Add support for huge pages on VMAP and VMALLOC for powerpc
8xx

arch/arm64/include/asm/hugetlb.h | 3 +-
arch/arm64/mm/hugetlbpage.c | 5 +-
arch/powerpc/Kconfig | 3 +-
.../include/asm/nohash/32/hugetlb-8xx.h | 5 +-
arch/sparc/include/asm/pgtable_64.h | 3 +-
arch/sparc/mm/hugetlbpage.c | 6 +-
include/linux/hugetlb.h | 4 +-
include/linux/pgtable.h | 26 ++++++-
mm/hugetlb.c | 6 +-
mm/ioremap.c | 6 +-
mm/migrate.c | 4 +-
mm/vmalloc.c | 74 ++++++++++++++++---
12 files changed, 111 insertions(+), 34 deletions(-)

--
2.25.0


2021-04-28 17:42:09

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v1 2/4] mm/hugetlb: Change parameters of arch_make_huge_pte()

At the time being, arch_make_huge_pte() has the following prototype:

pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
struct page *page, int writable);

vma is used to get the pages shift or size.
vma is also used on Sparc to get vm_flags.
page is not used.
writable is not used.

In order to use this function without a vma, and replace vma by shift
and flags. Also remove the used parameters.

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/arm64/include/asm/hugetlb.h | 3 +--
arch/arm64/mm/hugetlbpage.c | 5 ++---
arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 5 ++---
arch/sparc/include/asm/pgtable_64.h | 3 +--
arch/sparc/mm/hugetlbpage.c | 6 ++----
include/linux/hugetlb.h | 4 ++--
mm/hugetlb.c | 6 ++++--
mm/migrate.c | 4 +++-
8 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 5abf91e3494c..1242f71937f8 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -23,8 +23,7 @@ static inline void arch_clear_hugepage_flags(struct page *page)
}
#define arch_clear_hugepage_flags arch_clear_hugepage_flags

-extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
- struct page *page, int writable);
+pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
#define arch_make_huge_pte arch_make_huge_pte
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 58987a98e179..23505fc35324 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -339,10 +339,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return NULL;
}

-pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
- struct page *page, int writable)
+pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
{
- size_t pagesize = huge_page_size(hstate_vma(vma));
+ size_t pagesize = 1UL << shift;

if (pagesize == CONT_PTE_SIZE) {
entry = pte_mkcont(entry);
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
index 39be9aea86db..64b6c608eca4 100644
--- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -66,10 +66,9 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
}

#ifdef CONFIG_PPC_4K_PAGES
-static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
- struct page *page, int writable)
+static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
{
- size_t size = huge_page_size(hstate_vma(vma));
+ size_t size = 1UL << shift;

if (size == SZ_16K)
return __pte(pte_val(entry) & ~_PAGE_HUGE);
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 550d3904de65..2cd80a0a9795 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -377,8 +377,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
#define pgprot_noncached pgprot_noncached

#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
- struct page *page, int writable);
+pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
#define arch_make_huge_pte arch_make_huge_pte
static inline unsigned long __pte_default_huge_mask(void)
{
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 04d8790f6c32..0f49fada2093 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -177,10 +177,8 @@ static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
return sun4u_hugepage_shift_to_tte(entry, shift);
}

-pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
- struct page *page, int writeable)
+pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
{
- unsigned int shift = huge_page_shift(hstate_vma(vma));
pte_t pte;

pte = hugepage_shift_to_tte(entry, shift);
@@ -188,7 +186,7 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
#ifdef CONFIG_SPARC64
/* If this vma has ADI enabled on it, turn on TTE.mcd
*/
- if (vma->vm_flags & VM_SPARC_ADI)
+ if (flags & VM_SPARC_ADI)
return pte_mkmcd(pte);
else
return pte_mknotmcd(pte);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b92f25ccef58..24f47981c166 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -710,8 +710,8 @@ static inline void arch_clear_hugepage_flags(struct page *page) { }
#endif

#ifndef arch_make_huge_pte
-static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
- struct page *page, int writable)
+static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
+ vm_flags_t flags)
{
return entry;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3db405dea3dc..396285b16dd8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3856,6 +3856,7 @@ static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
int writable)
{
pte_t entry;
+ unsigned int shift = huge_page_shift(hstate_vma(vma));

if (writable) {
entry = huge_pte_mkwrite(huge_pte_mkdirty(mk_huge_pte(page,
@@ -3866,7 +3867,7 @@ static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
}
entry = pte_mkyoung(entry);
entry = pte_mkhuge(entry);
- entry = arch_make_huge_pte(entry, vma, page, writable);
+ entry = arch_make_huge_pte(entry, shift, vma->vm_flags);

return entry;
}
@@ -5250,10 +5251,11 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
}
if (!huge_pte_none(pte)) {
pte_t old_pte;
+ unsigned int shift = huge_page_shift(hstate_vma(vma));

old_pte = huge_ptep_modify_prot_start(vma, address, ptep);
pte = pte_mkhuge(huge_pte_modify(old_pte, newprot));
- pte = arch_make_huge_pte(pte, vma, NULL, 0);
+ pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
pages++;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index b234c3f3acb7..49ee64cd2ff3 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -226,8 +226,10 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,

#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(new)) {
+ unsigned int shift = huge_page_shift(hstate_vma(vma));
+
pte = pte_mkhuge(pte);
- pte = arch_make_huge_pte(pte, vma, new, 0);
+ pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
if (PageAnon(new))
hugepage_add_anon_rmap(new, vma, pvmw.address);
--
2.25.0

2021-04-28 17:42:13

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v1 4/4] mm/vmalloc: Add support for huge pages on VMAP and VMALLOC for powerpc 8xx

powerpc 8xx has 4 page sizes:
- 4k
- 16k
- 512k
- 8M

At the time being, vmalloc and vmap only support huge pages which are
leaf at PMD level.

Here the PMD level is 4M, it doesn't correspond to any supported
page size.

For the time being, implement use of 16k and 512k pages which is done
at PTE level.

Support of 8M pages will be implemented later, it requires use of
hugepd tables.

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/Kconfig | 3 +-
mm/vmalloc.c | 74 ++++++++++++++++++++++++++++++++++++++------
2 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 27e88c38fdf7..b443716f7413 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -188,7 +188,8 @@ config PPC
select GENERIC_TIME_VSYSCALL
select GENERIC_VDSO_TIME_NS
select HAVE_ARCH_AUDITSYSCALL
- select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
+ select HAVE_ARCH_HUGE_VMAP if (PPC_BOOK3S_64 && PPC_RADIX_MMU) || PPC_8xx
+ select HAVE_ARCH_HUGE_VMALLOC if PPC_8xx
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5d96fee17226..1f9f9be8ec01 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -36,6 +36,7 @@
#include <linux/overflow.h>
#include <linux/pgtable.h>
#include <linux/uaccess.h>
+#include <linux/hugetlb.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>

@@ -81,12 +82,55 @@ static void free_work(struct work_struct *w)
}

/*** Page table manipulation functions ***/
+static int vmap_try_huge_pte(pte_t *ptep, unsigned long addr, unsigned long end,
+ u64 pfn, pgprot_t prot, unsigned int max_page_shift)
+{
+ unsigned long size = end - addr;
+ pte_t pte;
+
+ if (!IS_ENABLED(CONFIG_PPC_8xx))
+ return 0;
+
+ if (PAGE_SIZE == SZ_16K && size < SZ_512K)
+ return 0;
+
+ if (size < SZ_16K)
+ return 0;
+
+ if (max_page_shift < 14)
+ return 0;
+
+ if (size > SZ_512K)
+ size = SZ_512K;
+
+ if (max_page_shift < 19 && size > SZ_16K)
+ size = SZ_16K;
+
+ if (!IS_ALIGNED(addr, size))
+ return 0;
+
+ if (!IS_ALIGNED(PFN_PHYS(pfn), size))
+ return 0;
+
+ if (pte_present(*ptep))
+ return 0;
+
+ pte = pfn_pte(pfn, prot);
+ pte = pte_mkhuge(pte);
+ pte = arch_make_huge_pte(pte, ilog2(size), 0);
+
+ set_huge_pte_at(&init_mm, addr, ptep, pte);
+
+ return PFN_DOWN(size);
+}
+
static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
pte_t *pte;
u64 pfn;
+ int npages;

pfn = phys_addr >> PAGE_SHIFT;
pte = pte_alloc_kernel_track(pmd, addr, mask);
@@ -94,9 +138,14 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
return -ENOMEM;
do {
BUG_ON(!pte_none(*pte));
- set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
- pfn++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
+
+ npages = vmap_try_huge_pte(pte, addr, end, pfn, prot, max_page_shift);
+ if (!npages) {
+ set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
+ npages = 1;
+ }
+ pfn += npages;
+ } while (pte += npages, addr += PAGE_SIZE * npages, addr != end);
*mask |= PGTBL_PTE_MODIFIED;
return 0;
}
@@ -145,7 +194,7 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
continue;
}

- if (vmap_pte_range(pmd, addr, next, phys_addr, prot, mask))
+ if (vmap_pte_range(pmd, addr, next, phys_addr, prot, max_page_shift, mask))
return -ENOMEM;
} while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
return 0;
@@ -2881,8 +2930,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return NULL;
}

- if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP) &&
- arch_vmap_pmd_supported(prot)) {
+ if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
unsigned long size_per_node;

/*
@@ -2895,11 +2943,17 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
size_per_node = size;
if (node == NUMA_NO_NODE)
size_per_node /= num_online_nodes();
- if (size_per_node >= PMD_SIZE) {
+ if (arch_vmap_pmd_supported(prot) && size_per_node >= PMD_SIZE) {
shift = PMD_SHIFT;
- align = max(real_align, 1UL << shift);
- size = ALIGN(real_size, 1UL << shift);
+ } else if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (size_per_node >= SZ_512K) {
+ shift = 19;
+ } else if (size_per_node >= SZ_16K) {
+ shift = 14;
+ }
}
+ align = max(real_align, 1UL << shift);
+ size = ALIGN(real_size, 1UL << shift);
}

again:
--
2.25.0

2021-04-28 20:50:56

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v1 1/4] mm/ioremap: Fix iomap_max_page_shift

iomap_max_page_shift is expected to contain a page shift,
so it can't be a 'bool', has to be an 'unsigned int'

And fix the default values: P4D_SHIFT is when huge iomap is allowed.

Signed-off-by: Christophe Leroy <[email protected]>
---
mm/ioremap.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/ioremap.c b/mm/ioremap.c
index d1dcc7e744ac..2f7193c6a99e 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -16,16 +16,16 @@
#include "pgalloc-track.h"

#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT;
+static unsigned int __ro_after_init iomap_max_page_shift = P4D_SHIFT;

static int __init set_nohugeiomap(char *str)
{
- iomap_max_page_shift = P4D_SHIFT;
+ iomap_max_page_shift = PAGE_SHIFT;
return 0;
}
early_param("nohugeiomap", set_nohugeiomap);
#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-static const bool iomap_max_page_shift = PAGE_SHIFT;
+static const unsigned int iomap_max_page_shift = PAGE_SHIFT;
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */

int ioremap_page_range(unsigned long addr,
--
2.25.0

2021-04-28 20:52:11

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v1 3/4] mm/pgtable: Add stubs for {pmd/pub}_{set/clear}_huge

For architectures with no PMD and/or no PUD, add stubs
similar to what we have for architectures without P4D.

Signed-off-by: Christophe Leroy <[email protected]>
---
include/linux/pgtable.h | 26 +++++++++++++++++++++++++-
1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 46b13780c2c8..d41474a2d255 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1371,10 +1371,34 @@ static inline int p4d_clear_huge(p4d_t *p4d)
}
#endif /* !__PAGETABLE_P4D_FOLDED */

+#ifndef __PAGETABLE_PUD_FOLDED
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
-int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
int pud_clear_huge(pud_t *pud);
+#else
+static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+{
+ return 0;
+}
+static inline int pud_clear_huge(pud_t *pud)
+{
+ return 0;
+}
+#endif /* !__PAGETABLE_PUD_FOLDED */
+
+#ifndef __PAGETABLE_PMD_FOLDED
+int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
int pmd_clear_huge(pmd_t *pmd);
+#else
+static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+{
+ return 0;
+}
+static inline int pmd_clear_huge(pmd_t *pmd)
+{
+ return 0;
+}
+#endif /* !__PAGETABLE_PMD_FOLDED */
+
int p4d_free_pud_page(p4d_t *p4d, unsigned long addr);
int pud_free_pmd_page(pud_t *pud, unsigned long addr);
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
--
2.25.0

2021-04-29 17:55:18

by Mike Kravetz

[permalink] [raw]
Subject: Re: [RFC PATCH v1 2/4] mm/hugetlb: Change parameters of arch_make_huge_pte()

On 4/28/21 9:46 AM, Christophe Leroy wrote:
> At the time being, arch_make_huge_pte() has the following prototype:
>
> pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
> struct page *page, int writable);
>
> vma is used to get the pages shift or size.
> vma is also used on Sparc to get vm_flags.
> page is not used.
> writable is not used.
>
> In order to use this function without a vma, and replace vma by shift
> and flags. Also remove the used parameters.
>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> arch/arm64/include/asm/hugetlb.h | 3 +--
> arch/arm64/mm/hugetlbpage.c | 5 ++---
> arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 5 ++---
> arch/sparc/include/asm/pgtable_64.h | 3 +--
> arch/sparc/mm/hugetlbpage.c | 6 ++----
> include/linux/hugetlb.h | 4 ++--
> mm/hugetlb.c | 6 ++++--
> mm/migrate.c | 4 +++-
> 8 files changed, 17 insertions(+), 19 deletions(-)

Hi Christophe,

Sorry, no suggestion for how to make a beautiful generic implementation.

This patch is straight forward.
Acked-by: Mike Kravetz <[email protected]>
--
Mike Kravetz