2021-04-30 07:57:25

by Nanyong Sun

[permalink] [raw]
Subject: [PATCH -next 0/4] THP supprt for RISCV

This series brings transparent huge pages to RISCV on 64-bit.

I have tested these testcases of vm selftest under qemu-riscv64:
khugepaged
split_huge_page_test
transhuge-stress

Nanyong Sun (4):
riscv: mm: add _PAGE_LEAF macro
riscv: mm: make pmd_bad() check leaf condition
riscv: mm: add param stride for __sbi_tlb_flush_range
riscv: mm: add THP support on 64-bit

arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/pgtable-64.h | 3 +-
arch/riscv/include/asm/pgtable-bits.h | 5 +
arch/riscv/include/asm/pgtable.h | 164 +++++++++++++++++++++++++-
arch/riscv/mm/tlbflush.c | 17 ++-
5 files changed, 178 insertions(+), 12 deletions(-)

--
2.25.1


2021-04-30 07:57:25

by Nanyong Sun

[permalink] [raw]
Subject: [PATCH -next 3/4] riscv: mm: add param stride for __sbi_tlb_flush_range

Add a parameter: stride for __sbi_tlb_flush_range(),
represent the page stride between the address of start and end.
Normally, the stride is PAGE_SIZE, and when flush huge page
address, the stride can be the huge page size such as:PMD_SIZE,
then it only need to flush one tlb entry if the address range
within PMD_SIZE.

Signed-off-by: Nanyong Sun <[email protected]>
---
arch/riscv/mm/tlbflush.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 720b443c4..382781abf 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -15,7 +15,7 @@ void flush_tlb_all(void)
* Kernel may panic if cmask is NULL.
*/
static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start,
- unsigned long size)
+ unsigned long size, unsigned long stride)
{
struct cpumask hmask;
unsigned int cpuid;
@@ -27,7 +27,7 @@ static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start,

if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
/* local cpu is the only cpu present in cpumask */
- if (size <= PAGE_SIZE)
+ if (size <= stride)
local_flush_tlb_page(start);
else
local_flush_tlb_all();
@@ -41,16 +41,16 @@ static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start,

void flush_tlb_mm(struct mm_struct *mm)
{
- __sbi_tlb_flush_range(mm_cpumask(mm), 0, -1);
+ __sbi_tlb_flush_range(mm_cpumask(mm), 0, -1, PAGE_SIZE);
}

void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
{
- __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE);
+ __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, PAGE_SIZE);
}

void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
- __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start);
+ __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start, PAGE_SIZE);
}
--
2.25.1

2021-04-30 07:57:26

by Nanyong Sun

[permalink] [raw]
Subject: [PATCH -next 1/4] riscv: mm: add _PAGE_LEAF macro

In riscv, a page table entry is leaf when any bit of read, write,
or execute bit is set. So add a macro:_PAGE_LEAF instead of
(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC), which is frequently used
to determine if it is a leaf page. This make code easier to read,
without any functional change.

Signed-off-by: Nanyong Sun <[email protected]>
---
arch/riscv/include/asm/pgtable-64.h | 3 +--
arch/riscv/include/asm/pgtable-bits.h | 5 +++++
arch/riscv/include/asm/pgtable.h | 6 ++----
3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index f3b0da64c..e3b7c5dd6 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -46,8 +46,7 @@ static inline int pud_bad(pud_t pud)
#define pud_leaf pud_leaf
static inline int pud_leaf(pud_t pud)
{
- return pud_present(pud) &&
- (pud_val(pud) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
+ return pud_present(pud) && (pud_val(pud) & _PAGE_LEAF);
}

static inline void set_pud(pud_t *pudp, pud_t pud)
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index bbaeb5d35..2ee413912 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -39,5 +39,10 @@
#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \
_PAGE_WRITE | _PAGE_EXEC | \
_PAGE_USER | _PAGE_GLOBAL))
+/*
+ * when all of R/W/X are zero, the PTE is a pointer to the next level
+ * of the page table; otherwise, it is a leaf PTE.
+ */
+#define _PAGE_LEAF (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)

#endif /* _ASM_RISCV_PGTABLE_BITS_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 80e63a93e..f489e412f 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -166,8 +166,7 @@ static inline int pmd_bad(pmd_t pmd)
#define pmd_leaf pmd_leaf
static inline int pmd_leaf(pmd_t pmd)
{
- return pmd_present(pmd) &&
- (pmd_val(pmd) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
+ return pmd_present(pmd) && (pmd_val(pmd) & _PAGE_LEAF);
}

static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
@@ -243,8 +242,7 @@ static inline int pte_exec(pte_t pte)

static inline int pte_huge(pte_t pte)
{
- return pte_present(pte)
- && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
+ return pte_present(pte) && (pte_val(pte) & _PAGE_LEAF);
}

static inline int pte_dirty(pte_t pte)
--
2.25.1

2021-04-30 07:59:20

by Nanyong Sun

[permalink] [raw]
Subject: [PATCH -next 4/4] riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <[email protected]>
---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/pgtable.h | 156 +++++++++++++++++++++++++++++++
arch/riscv/mm/tlbflush.c | 7 ++
3 files changed, 164 insertions(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 4e124b2eb..3628f9f12 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -101,6 +101,7 @@ config RISCV
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select UACCESS_MEMCPY if !MMU
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT

config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index f7fc47c58..ceb4b9c82 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -148,10 +148,23 @@ extern pgd_t swapper_pg_dir[];
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC

+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_present(pmd_t pmd)
+{
+ /*
+ * Checking for _PAGE_LEAF is needed too because:
+ * When splitting a THP, split_huge_page() will temporarily clear
+ * the present bit, in this situation, pmd_present() and
+ * pmd_trans_huge() still needs to return true.
+ */
+ return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE | _PAGE_LEAF));
+}
+#else
static inline int pmd_present(pmd_t pmd)
{
return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
}
+#endif

static inline int pmd_none(pmd_t pmd)
{
@@ -345,6 +358,14 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
local_flush_tlb_page(address);
}

+static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ pte_t *ptep = (pte_t *)pmdp;
+
+ update_mmu_cache(vma, address, ptep);
+}
+
#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
@@ -438,6 +459,141 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
return ptep_test_and_clear_young(vma, address, ptep);
}

+/*
+ * THP functions
+ */
+static inline pmd_t pte_pmd(pte_t pte)
+{
+ return __pmd(pte_val(pte));
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ return pmd;
+}
+
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE));
+}
+
+#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT)
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT);
+}
+
+static inline pmd_t mk_pmd(struct page *page, pgprot_t prot)
+{
+ return pfn_pmd(page_to_pfn(page), prot);
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
+}
+
+#define pmd_write pmd_write
+static inline int pmd_write(pmd_t pmd)
+{
+ return pte_write(pmd_pte(pmd));
+}
+
+static inline int pmd_dirty(pmd_t pmd)
+{
+ return pte_dirty(pmd_pte(pmd));
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+ return pte_young(pmd_pte(pmd));
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+ return pte_pmd(pte_mkold(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ return pte_pmd(pte_mkwrite(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ return pte_pmd(pte_wrprotect(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+ return pte_pmd(pte_mkclean(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ return pte_pmd(pte_mkdirty(pmd_pte(pmd)));
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ return set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_leaf(pmd);
+}
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty)
+{
+ return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
+}
+
+#define pmdp_establish pmdp_establish
+static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp, pmd_t pmd)
+{
+ return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd)));
+}
+
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end);
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
/*
* Encode and decode a swap entry
*
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 382781abf..fea45af91 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -54,3 +54,10 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
{
__sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start, PAGE_SIZE);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start, PMD_SIZE);
+}
+#endif
--
2.25.1

2021-04-30 08:00:00

by Nanyong Sun

[permalink] [raw]
Subject: [PATCH -next 2/4] riscv: mm: make pmd_bad() check leaf condition

In the definition in Documentation/vm/arch_pgtable_helpers.rst,
pmd_bad() means test a non-table mapped PMD, so it should also
return true when it is a leaf page.

Signed-off-by: Nanyong Sun <[email protected]>
---
arch/riscv/include/asm/pgtable.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index f489e412f..f7fc47c58 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -160,7 +160,7 @@ static inline int pmd_none(pmd_t pmd)

static inline int pmd_bad(pmd_t pmd)
{
- return !pmd_present(pmd);
+ return !pmd_present(pmd) || (pmd_val(pmd) & _PAGE_LEAF);
}

#define pmd_leaf pmd_leaf
--
2.25.1

2021-05-06 08:28:04

by Palmer Dabbelt

[permalink] [raw]
Subject: Re: [PATCH -next 0/4] THP supprt for RISCV

On Fri, 30 Apr 2021 01:28:46 PDT (-0700), [email protected] wrote:
> This series brings transparent huge pages to RISCV on 64-bit.
>
> I have tested these testcases of vm selftest under qemu-riscv64:
> khugepaged
> split_huge_page_test
> transhuge-stress
>
> Nanyong Sun (4):
> riscv: mm: add _PAGE_LEAF macro
> riscv: mm: make pmd_bad() check leaf condition
> riscv: mm: add param stride for __sbi_tlb_flush_range
> riscv: mm: add THP support on 64-bit
>
> arch/riscv/Kconfig | 1 +
> arch/riscv/include/asm/pgtable-64.h | 3 +-
> arch/riscv/include/asm/pgtable-bits.h | 5 +
> arch/riscv/include/asm/pgtable.h | 164 +++++++++++++++++++++++++-
> arch/riscv/mm/tlbflush.c | 17 ++-
> 5 files changed, 178 insertions(+), 12 deletions(-)

Thanks. These were too late for the current merge window. We still
have some stuff in flight, I'll take a look after things calm down a
bit.

2021-05-22 18:56:57

by Palmer Dabbelt

[permalink] [raw]
Subject: Re: [PATCH -next 0/4] THP supprt for RISCV

On Fri, 30 Apr 2021 01:28:46 PDT (-0700), [email protected] wrote:
> This series brings transparent huge pages to RISCV on 64-bit.
>
> I have tested these testcases of vm selftest under qemu-riscv64:
> khugepaged
> split_huge_page_test
> transhuge-stress
>
> Nanyong Sun (4):
> riscv: mm: add _PAGE_LEAF macro
> riscv: mm: make pmd_bad() check leaf condition
> riscv: mm: add param stride for __sbi_tlb_flush_range
> riscv: mm: add THP support on 64-bit
>
> arch/riscv/Kconfig | 1 +
> arch/riscv/include/asm/pgtable-64.h | 3 +-
> arch/riscv/include/asm/pgtable-bits.h | 5 +
> arch/riscv/include/asm/pgtable.h | 164 +++++++++++++++++++++++++-
> arch/riscv/mm/tlbflush.c | 17 ++-
> 5 files changed, 178 insertions(+), 12 deletions(-)

Thanks, these are on for-next.

2021-05-25 12:16:29

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [PATCH -next 4/4] riscv: mm: add THP support on 64-bit

Hi Nanyong,

On Fri, Apr 30, 2021 at 9:58 AM Nanyong Sun <[email protected]> wrote:
> Bring Transparent HugePage support to riscv. A
> transparent huge page is always represented as a pmd.
>
> Signed-off-by: Nanyong Sun <[email protected]>

Thanks for your patch, which is now commit e88b333142e4aba7 ("riscv:
mm: add THP support on 64-bit") in riscv/for-next.

> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h

> @@ -438,6 +459,141 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
> return ptep_test_and_clear_young(vma, address, ptep);
> }
>
> +/*
> + * THP functions
> + */
> +static inline pmd_t pte_pmd(pte_t pte)
> +{
> + return __pmd(pte_val(pte));
> +}
> +
> +static inline pmd_t pmd_mkhuge(pmd_t pmd)
> +{
> + return pmd;
> +}
> +
> +static inline pmd_t pmd_mkinvalid(pmd_t pmd)
> +{
> + return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE));
> +}
> +
> +#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT)
> +
> +static inline unsigned long pmd_pfn(pmd_t pmd)s
> +{
> + return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT);
> +}
> +
> +static inline pmd_t mk_pmd(struct page *page, pgprot_t prot)
> +{
> + return pfn_pmd(page_to_pfn(page), prot);
> +}
> +
> +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
> +{
> + return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
> +}
> +
> +#define pmd_write pmd_write
> +static inline int pmd_write(pmd_t pmd)
> +{
> + return pte_write(pmd_pte(pmd));
> +}
> +
> +static inline int pmd_dirty(pmd_t pmd)
> +{
> + return pte_dirty(pmd_pte(pmd));
> +}
> +
> +static inline int pmd_young(pmd_t pmd)
> +{
> + return pte_young(pmd_pte(pmd));
> +}
> +
> +static inline pmd_t pmd_mkold(pmd_t pmd)
> +{
> + return pte_pmd(pte_mkold(pmd_pte(pmd)));
> +}
> +
> +static inline pmd_t pmd_mkyoung(pmd_t pmd)
> +{
> + return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
> +}
> +
> +static inline pmd_t pmd_mkwrite(pmd_t pmd)
> +{
> + return pte_pmd(pte_mkwrite(pmd_pte(pmd)));
> +}
> +
> +static inline pmd_t pmd_wrprotect(pmd_t pmd)
> +{
> + return pte_pmd(pte_wrprotect(pmd_pte(pmd)));
> +}
> +
> +static inline pmd_t pmd_mkclean(pmd_t pmd)
> +{
> + return pte_pmd(pte_mkclean(pmd_pte(pmd)));
> +}
> +
> +static inline pmd_t pmd_mkdirty(pmd_t pmd)
> +{
> + return pte_pmd(pte_mkdirty(pmd_pte(pmd)));
> +}
> +
> +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> + pmd_t *pmdp, pmd_t pmd)
> +{
> + return set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
> +}

This conflicts with the existing definition recently added in commit
756a8896b6166bba ("riscv: mremap speedup - enable HAVE_MOVE_PUD and
HAVE_MOVE_PMD"):

arch/riscv/include/asm/pgtable.h:579:20: error: redefinition of ‘set_pmd_at’
579 | static inline void set_pmd_at(struct mm_struct *mm,
unsigned long addr,
| ^~~~~~~~~~
arch/riscv/include/asm/pgtable.h:420:20: note: previous definition
of ‘set_pmd_at’ was here
420 | static inline void set_pmd_at(struct mm_struct *mm,
unsigned long addr,
| ^~~~~~~~~~

> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +static inline int pmd_trans_huge(pmd_t pmd)
> +{
> + return pmd_leaf(pmd);
> +}
> +
> +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
> +static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
> + unsigned long address, pmd_t *pmdp,
> + pmd_t entry, int dirty)
> +{
> + return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
> +}
> +
> +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
> +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
> + unsigned long address, pmd_t *pmdp)
> +{
> + return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
> +}
> +
> +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
> +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
> + unsigned long address, pmd_t *pmdp)
> +{
> + return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
> +}
> +
> +#define __HAVE_ARCH_PMDP_SET_WRPROTECT
> +static inline void pmdp_set_wrprotect(struct mm_struct *mm,
> + unsigned long address, pmd_t *pmdp)
> +{
> + ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
> +}
> +
> +#define pmdp_establish pmdp_establish
> +static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
> + unsigned long address, pmd_t *pmdp, pmd_t pmd)
> +{
> + return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd)));
> +}
> +
> +#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
> +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
> + unsigned long end);
> +
> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> +
> /*
> * Encode and decode a swap entry
> *

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

2021-05-26 06:14:03

by Palmer Dabbelt

[permalink] [raw]
Subject: Re: [PATCH -next 4/4] riscv: mm: add THP support on 64-bit

On Tue, 25 May 2021 05:14:45 PDT (-0700), [email protected] wrote:
> Hi Nanyong,
>
> On Fri, Apr 30, 2021 at 9:58 AM Nanyong Sun <[email protected]> wrote:
>> Bring Transparent HugePage support to riscv. A
>> transparent huge page is always represented as a pmd.
>>
>> Signed-off-by: Nanyong Sun <[email protected]>
>
> Thanks for your patch, which is now commit e88b333142e4aba7 ("riscv:
> mm: add THP support on 64-bit") in riscv/for-next.
>
>> --- a/arch/riscv/include/asm/pgtable.h
>> +++ b/arch/riscv/include/asm/pgtable.h
>
>> @@ -438,6 +459,141 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
>> return ptep_test_and_clear_young(vma, address, ptep);
>> }
>>
>> +/*
>> + * THP functions
>> + */
>> +static inline pmd_t pte_pmd(pte_t pte)
>> +{
>> + return __pmd(pte_val(pte));
>> +}
>> +
>> +static inline pmd_t pmd_mkhuge(pmd_t pmd)
>> +{
>> + return pmd;
>> +}
>> +
>> +static inline pmd_t pmd_mkinvalid(pmd_t pmd)
>> +{
>> + return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE));
>> +}
>> +
>> +#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT)
>> +
>> +static inline unsigned long pmd_pfn(pmd_t pmd)s
>> +{
>> + return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT);
>> +}
>> +
>> +static inline pmd_t mk_pmd(struct page *page, pgprot_t prot)
>> +{
>> + return pfn_pmd(page_to_pfn(page), prot);
>> +}
>> +
>> +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
>> +{
>> + return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
>> +}
>> +
>> +#define pmd_write pmd_write
>> +static inline int pmd_write(pmd_t pmd)
>> +{
>> + return pte_write(pmd_pte(pmd));
>> +}
>> +
>> +static inline int pmd_dirty(pmd_t pmd)
>> +{
>> + return pte_dirty(pmd_pte(pmd));
>> +}
>> +
>> +static inline int pmd_young(pmd_t pmd)
>> +{
>> + return pte_young(pmd_pte(pmd));
>> +}
>> +
>> +static inline pmd_t pmd_mkold(pmd_t pmd)
>> +{
>> + return pte_pmd(pte_mkold(pmd_pte(pmd)));
>> +}
>> +
>> +static inline pmd_t pmd_mkyoung(pmd_t pmd)
>> +{
>> + return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
>> +}
>> +
>> +static inline pmd_t pmd_mkwrite(pmd_t pmd)
>> +{
>> + return pte_pmd(pte_mkwrite(pmd_pte(pmd)));
>> +}
>> +
>> +static inline pmd_t pmd_wrprotect(pmd_t pmd)
>> +{
>> + return pte_pmd(pte_wrprotect(pmd_pte(pmd)));
>> +}
>> +
>> +static inline pmd_t pmd_mkclean(pmd_t pmd)
>> +{
>> + return pte_pmd(pte_mkclean(pmd_pte(pmd)));
>> +}
>> +
>> +static inline pmd_t pmd_mkdirty(pmd_t pmd)
>> +{
>> + return pte_pmd(pte_mkdirty(pmd_pte(pmd)));
>> +}
>> +
>> +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> + pmd_t *pmdp, pmd_t pmd)
>> +{
>> + return set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
>> +}
>
> This conflicts with the existing definition recently added in commit
> 756a8896b6166bba ("riscv: mremap speedup - enable HAVE_MOVE_PUD and
> HAVE_MOVE_PMD"):
>
> arch/riscv/include/asm/pgtable.h:579:20: error: redefinition of ‘set_pmd_at’
> 579 | static inline void set_pmd_at(struct mm_struct *mm,
> unsigned long addr,
> | ^~~~~~~~~~
> arch/riscv/include/asm/pgtable.h:420:20: note: previous definition
> of ‘set_pmd_at’ was here
> 420 | static inline void set_pmd_at(struct mm_struct *mm,
> unsigned long addr,
> | ^~~~~~~~~~

Sorry about that. I was trying to re-arrange for-next to keep patch
sets on branches to avoid the rebasing like we had last time, but that
makes merges like this more of a headache and just makes the whole thing
a bit clunky.

I've fixed this up, along with just making the history linear again.

Thanks!

>
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +static inline int pmd_trans_huge(pmd_t pmd)
>> +{
>> + return pmd_leaf(pmd);
>> +}
>> +
>> +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
>> +static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
>> + unsigned long address, pmd_t *pmdp,
>> + pmd_t entry, int dirty)
>> +{
>> + return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
>> +}
>> +
>> +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
>> +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
>> + unsigned long address, pmd_t *pmdp)
>> +{
>> + return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
>> +}
>> +
>> +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
>> +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
>> + unsigned long address, pmd_t *pmdp)
>> +{
>> + return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
>> +}
>> +
>> +#define __HAVE_ARCH_PMDP_SET_WRPROTECT
>> +static inline void pmdp_set_wrprotect(struct mm_struct *mm,
>> + unsigned long address, pmd_t *pmdp)
>> +{
>> + ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
>> +}
>> +
>> +#define pmdp_establish pmdp_establish
>> +static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>> + unsigned long address, pmd_t *pmdp, pmd_t pmd)
>> +{
>> + return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd)));
>> +}
>> +
>> +#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
>> +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
>> + unsigned long end);
>> +
>> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>> +
>> /*
>> * Encode and decode a swap entry
>> *
>
> Gr{oetje,eeting}s,
>
> Geert
>
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
>
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like that.
> -- Linus Torvalds