HVO was previously disabled on arm64 [1] due to the lack of necessary
BBM(break-before-make) logic when changing page tables.
This set of patches fix this by adding necessary BBM sequence when
changing page table, and supporting vmemmap page fault handling to
fixup kernel address fault if vmemmap is concurrently accessed.
[1] commit 060a2c92d1b6 ("arm64: mm: hugetlb: Disable HUGETLB_PAGE_OPTIMIZE_VMEMMAP")
Nanyong Sun (3):
mm: HVO: introduce helper function to update and flush pgtable
arm64: mm: HVO: support BBM of vmemmap pgtable safely
arm64: mm: Re-enable OPTIMIZE_HUGETLB_VMEMMAP
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/esr.h | 4 ++
arch/arm64/include/asm/mmu.h | 20 ++++++++
arch/arm64/mm/fault.c | 94 ++++++++++++++++++++++++++++++++++++
arch/arm64/mm/mmu.c | 28 +++++++++++
mm/hugetlb_vmemmap.c | 55 ++++++++++++++++-----
6 files changed, 190 insertions(+), 12 deletions(-)
--
2.25.1
Now update of vmemmap page table can follow the rule of
break-before-make safely for arm64 architecture, re-enable
HVO on arm64.
Signed-off-by: Nanyong Sun <[email protected]>
---
arch/arm64/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b071a00425d..43e3d5576fb2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -103,6 +103,7 @@ config ARM64
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
+ select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
--
2.25.1
Add pmd/pte update and tlb flush helper function to update page
table. This refactoring patch is designed to facilitate each
architecture to implement its own special logic in preparation
for the arm64 architecture to follow the necessary break-before-make
sequence when updating page tables.
Signed-off-by: Nanyong Sun <[email protected]>
---
mm/hugetlb_vmemmap.c | 55 ++++++++++++++++++++++++++++++++++----------
1 file changed, 43 insertions(+), 12 deletions(-)
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 87818ee7f01d..49e8b351def3 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -45,6 +45,37 @@ struct vmemmap_remap_walk {
unsigned long flags;
};
+#ifndef vmemmap_update_pmd
+static inline void vmemmap_update_pmd(unsigned long start,
+ pmd_t *pmd, pte_t *pgtable)
+{
+ pmd_populate_kernel(&init_mm, pmd, pgtable);
+}
+#endif
+
+#ifndef vmemmap_update_pte
+static inline void vmemmap_update_pte(unsigned long addr,
+ pte_t *pte, pte_t entry)
+{
+ set_pte_at(&init_mm, addr, pte, entry);
+}
+#endif
+
+#ifndef flush_tlb_vmemmap_all
+static inline void flush_tlb_vmemmap_all(void)
+{
+ flush_tlb_all();
+}
+#endif
+
+#ifndef flush_tlb_vmemmap_range
+static inline void flush_tlb_vmemmap_range(unsigned long start,
+ unsigned long end)
+{
+ flush_tlb_kernel_range(start, end);
+}
+#endif
+
static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start, bool flush)
{
pmd_t __pmd;
@@ -87,9 +118,9 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start, bool flush)
/* Make pte visible before pmd. See comment in pmd_install(). */
smp_wmb();
- pmd_populate_kernel(&init_mm, pmd, pgtable);
+ vmemmap_update_pmd(start, pmd, pgtable);
if (flush)
- flush_tlb_kernel_range(start, start + PMD_SIZE);
+ flush_tlb_vmemmap_range(start, start + PMD_SIZE);
} else {
pte_free_kernel(&init_mm, pgtable);
}
@@ -217,7 +248,7 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
} while (pgd++, addr = next, addr != end);
if (walk->remap_pte && !(walk->flags & VMEMMAP_REMAP_NO_TLB_FLUSH))
- flush_tlb_kernel_range(start, end);
+ flush_tlb_vmemmap_range(start, end);
return 0;
}
@@ -263,15 +294,15 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
/*
* Makes sure that preceding stores to the page contents from
- * vmemmap_remap_free() become visible before the set_pte_at()
- * write.
+ * vmemmap_remap_free() become visible before the
+ * vmemmap_update_pte() write.
*/
smp_wmb();
}
entry = mk_pte(walk->reuse_page, pgprot);
list_add(&page->lru, walk->vmemmap_pages);
- set_pte_at(&init_mm, addr, pte, entry);
+ vmemmap_update_pte(addr, pte, entry);
}
/*
@@ -310,10 +341,10 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
/*
* Makes sure that preceding stores to the page contents become visible
- * before the set_pte_at() write.
+ * before the vmemmap_update_pte() write.
*/
smp_wmb();
- set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot));
+ vmemmap_update_pte(addr, pte, mk_pte(page, pgprot));
}
/**
@@ -576,7 +607,7 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
}
if (restored)
- flush_tlb_all();
+ flush_tlb_vmemmap_all();
if (!ret)
ret = restored;
return ret;
@@ -744,7 +775,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
break;
}
- flush_tlb_all();
+ flush_tlb_vmemmap_all();
list_for_each_entry(folio, folio_list, lru) {
int ret = __hugetlb_vmemmap_optimize_folio(h, folio,
@@ -760,7 +791,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
* allowing more vmemmap remaps to occur.
*/
if (ret == -ENOMEM && !list_empty(&vmemmap_pages)) {
- flush_tlb_all();
+ flush_tlb_vmemmap_all();
free_vmemmap_page_list(&vmemmap_pages);
INIT_LIST_HEAD(&vmemmap_pages);
__hugetlb_vmemmap_optimize_folio(h, folio,
@@ -769,7 +800,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
}
}
- flush_tlb_all();
+ flush_tlb_vmemmap_all();
free_vmemmap_page_list(&vmemmap_pages);
}
--
2.25.1
On 2023/12/14 15:39, Nanyong Sun wrote:
> Add pmd/pte update and tlb flush helper function to update page
> table. This refactoring patch is designed to facilitate each
> architecture to implement its own special logic in preparation
> for the arm64 architecture to follow the necessary break-before-make
> sequence when updating page tables.
>
> Signed-off-by: Nanyong Sun <[email protected]>
> ---
> mm/hugetlb_vmemmap.c | 55 ++++++++++++++++++++++++++++++++++----------
> 1 file changed, 43 insertions(+), 12 deletions(-)
>
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 87818ee7f01d..49e8b351def3 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -45,6 +45,37 @@ struct vmemmap_remap_walk {
> unsigned long flags;
> };
>
> +#ifndef vmemmap_update_pmd
> +static inline void vmemmap_update_pmd(unsigned long start,
> + pmd_t *pmd, pte_t *pgtable)
> +{
> + pmd_populate_kernel(&init_mm, pmd, pgtable);
> +}
> +#endif
> +
> +#ifndef vmemmap_update_pte
> +static inline void vmemmap_update_pte(unsigned long addr,
> + pte_t *pte, pte_t entry)
> +{
> + set_pte_at(&init_mm, addr, pte, entry);
> +}
> +#endif
> +
> +#ifndef flush_tlb_vmemmap_all
> +static inline void flush_tlb_vmemmap_all(void)
> +{
> + flush_tlb_all();
> +}
> +#endif
> +
> +#ifndef flush_tlb_vmemmap_range
> +static inline void flush_tlb_vmemmap_range(unsigned long start,
> + unsigned long end)
> +{
> + flush_tlb_kernel_range(start, end);
> +}
> +#endif
I'd like to rename both tlb-flush helpers to vmemmap_flush_tlb_all/range
since other helpers all are prefixed with "vmemmap". It'll be more
consistent for me.
Otherwise LGTM. Thanks.
Reviewed-by: Muchun Song <[email protected]>
> +
> static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start, bool flush)
> {
> pmd_t __pmd;
> @@ -87,9 +118,9 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start, bool flush)
>
> /* Make pte visible before pmd. See comment in pmd_install(). */
> smp_wmb();
> - pmd_populate_kernel(&init_mm, pmd, pgtable);
> + vmemmap_update_pmd(start, pmd, pgtable);
> if (flush)
> - flush_tlb_kernel_range(start, start + PMD_SIZE);
> + flush_tlb_vmemmap_range(start, start + PMD_SIZE);
> } else {
> pte_free_kernel(&init_mm, pgtable);
> }
> @@ -217,7 +248,7 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
> } while (pgd++, addr = next, addr != end);
>
> if (walk->remap_pte && !(walk->flags & VMEMMAP_REMAP_NO_TLB_FLUSH))
> - flush_tlb_kernel_range(start, end);
> + flush_tlb_vmemmap_range(start, end);
>
> return 0;
> }
> @@ -263,15 +294,15 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
>
> /*
> * Makes sure that preceding stores to the page contents from
> - * vmemmap_remap_free() become visible before the set_pte_at()
> - * write.
> + * vmemmap_remap_free() become visible before the
> + * vmemmap_update_pte() write.
> */
> smp_wmb();
> }
>
> entry = mk_pte(walk->reuse_page, pgprot);
> list_add(&page->lru, walk->vmemmap_pages);
> - set_pte_at(&init_mm, addr, pte, entry);
> + vmemmap_update_pte(addr, pte, entry);
> }
>
> /*
> @@ -310,10 +341,10 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
>
> /*
> * Makes sure that preceding stores to the page contents become visible
> - * before the set_pte_at() write.
> + * before the vmemmap_update_pte() write.
> */
> smp_wmb();
> - set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot));
> + vmemmap_update_pte(addr, pte, mk_pte(page, pgprot));
> }
>
> /**
> @@ -576,7 +607,7 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
> }
>
> if (restored)
> - flush_tlb_all();
> + flush_tlb_vmemmap_all();
> if (!ret)
> ret = restored;
> return ret;
> @@ -744,7 +775,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
> break;
> }
>
> - flush_tlb_all();
> + flush_tlb_vmemmap_all();
>
> list_for_each_entry(folio, folio_list, lru) {
> int ret = __hugetlb_vmemmap_optimize_folio(h, folio,
> @@ -760,7 +791,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
> * allowing more vmemmap remaps to occur.
> */
> if (ret == -ENOMEM && !list_empty(&vmemmap_pages)) {
> - flush_tlb_all();
> + flush_tlb_vmemmap_all();
> free_vmemmap_page_list(&vmemmap_pages);
> INIT_LIST_HEAD(&vmemmap_pages);
> __hugetlb_vmemmap_optimize_folio(h, folio,
> @@ -769,7 +800,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
> }
> }
>
> - flush_tlb_all();
> + flush_tlb_vmemmap_all();
> free_vmemmap_page_list(&vmemmap_pages);
> }
>
> On Dec 14, 2023, at 15:39, Nanyong Sun <[email protected]> wrote:
>
> Now update of vmemmap page table can follow the rule of
> break-before-make safely for arm64 architecture, re-enable
> HVO on arm64.
>
> Signed-off-by: Nanyong Sun <[email protected]>
Reviewed-by: Muchun Song <[email protected]>
Thanks.
On 2023/12/14 15:39, Nanyong Sun wrote:
> Add pmd/pte update and tlb flush helper function to update page
> table. This refactoring patch is designed to facilitate each
> architecture to implement its own special logic in preparation
> for the arm64 architecture to follow the necessary break-before-make
> sequence when updating page tables.
>
> Signed-off-by: Nanyong Sun <[email protected]>
> ---
> mm/hugetlb_vmemmap.c | 55 ++++++++++++++++++++++++++++++++++----------
> 1 file changed, 43 insertions(+), 12 deletions(-)
>
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 87818ee7f01d..49e8b351def3 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -45,6 +45,37 @@ struct vmemmap_remap_walk {
> unsigned long flags;
> };
>
> +#ifndef vmemmap_update_pmd
> +static inline void vmemmap_update_pmd(unsigned long start,
> + pmd_t *pmd, pte_t *pgtable)
pgtable -> ptep
> +{
> + pmd_populate_kernel(&init_mm, pmd, pgtable);
> +}
> +#endif
> +
> +#ifndef vmemmap_update_pte
> +static inline void vmemmap_update_pte(unsigned long addr,
> + pte_t *pte, pte_t entry)
pte -> ptep
entry -> pte
> +{
> + set_pte_at(&init_mm, addr, pte, entry);
> +}
> +#endif
On 2023/12/15 11:36, Muchun Song wrote:
>
>
> On 2023/12/14 15:39, Nanyong Sun wrote:
>> Add pmd/pte update and tlb flush helper function to update page
>> table. This refactoring patch is designed to facilitate each
>> architecture to implement its own special logic in preparation
>> for the arm64 architecture to follow the necessary break-before-make
>> sequence when updating page tables.
>>
>> Signed-off-by: Nanyong Sun <[email protected]>
>> ---
>> mm/hugetlb_vmemmap.c | 55 ++++++++++++++++++++++++++++++++++----------
>> 1 file changed, 43 insertions(+), 12 deletions(-)
>>
>> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
>> index 87818ee7f01d..49e8b351def3 100644
>> --- a/mm/hugetlb_vmemmap.c
>> +++ b/mm/hugetlb_vmemmap.c
>> @@ -45,6 +45,37 @@ struct vmemmap_remap_walk {
>> unsigned long flags;
>> };
>> +#ifndef vmemmap_update_pmd
>> +static inline void vmemmap_update_pmd(unsigned long start,
>> + pmd_t *pmd, pte_t *pgtable)
>> +{
>> + pmd_populate_kernel(&init_mm, pmd, pgtable);
>> +}
>> +#endif
>> +
>> +#ifndef vmemmap_update_pte
>> +static inline void vmemmap_update_pte(unsigned long addr,
>> + pte_t *pte, pte_t entry)
>> +{
>> + set_pte_at(&init_mm, addr, pte, entry);
>> +}
>> +#endif
>> +
>> +#ifndef flush_tlb_vmemmap_all
>> +static inline void flush_tlb_vmemmap_all(void)
>> +{
>> + flush_tlb_all();
>> +}
>> +#endif
>> +
>> +#ifndef flush_tlb_vmemmap_range
>> +static inline void flush_tlb_vmemmap_range(unsigned long start,
>> + unsigned long end)
>> +{
>> + flush_tlb_kernel_range(start, end);
>> +}
>> +#endif
>
> I'd like to rename both tlb-flush helpers to vmemmap_flush_tlb_all/range
> since other helpers all are prefixed with "vmemmap". It'll be more
> consistent for me.
>
> Otherwise LGTM. Thanks.
>
> Reviewed-by: Muchun Song <[email protected]>
Hi Muchun,
Thank you for your review on this patch set, I'll fix them and send out
the v2 version later.
>
>> +
>> static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start,
>> bool flush)
>> {
>> pmd_t __pmd;
>> @@ -87,9 +118,9 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd,
>> unsigned long start, bool flush)
>> /* Make pte visible before pmd. See comment in
>> pmd_install(). */
>> smp_wmb();
>> - pmd_populate_kernel(&init_mm, pmd, pgtable);
>> + vmemmap_update_pmd(start, pmd, pgtable);
>> if (flush)
>> - flush_tlb_kernel_range(start, start + PMD_SIZE);
>> + flush_tlb_vmemmap_range(start, start + PMD_SIZE);
>> } else {
>> pte_free_kernel(&init_mm, pgtable);
>> }
>> @@ -217,7 +248,7 @@ static int vmemmap_remap_range(unsigned long
>> start, unsigned long end,
>> } while (pgd++, addr = next, addr != end);
>> if (walk->remap_pte && !(walk->flags &
>> VMEMMAP_REMAP_NO_TLB_FLUSH))
>> - flush_tlb_kernel_range(start, end);
>> + flush_tlb_vmemmap_range(start, end);
>> return 0;
>> }
>> @@ -263,15 +294,15 @@ static void vmemmap_remap_pte(pte_t *pte,
>> unsigned long addr,
>> /*
>> * Makes sure that preceding stores to the page contents from
>> - * vmemmap_remap_free() become visible before the set_pte_at()
>> - * write.
>> + * vmemmap_remap_free() become visible before the
>> + * vmemmap_update_pte() write.
>> */
>> smp_wmb();
>> }
>> entry = mk_pte(walk->reuse_page, pgprot);
>> list_add(&page->lru, walk->vmemmap_pages);
>> - set_pte_at(&init_mm, addr, pte, entry);
>> + vmemmap_update_pte(addr, pte, entry);
>> }
>> /*
>> @@ -310,10 +341,10 @@ static void vmemmap_restore_pte(pte_t *pte,
>> unsigned long addr,
>> /*
>> * Makes sure that preceding stores to the page contents become
>> visible
>> - * before the set_pte_at() write.
>> + * before the vmemmap_update_pte() write.
>> */
>> smp_wmb();
>> - set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot));
>> + vmemmap_update_pte(addr, pte, mk_pte(page, pgprot));
>> }
>> /**
>> @@ -576,7 +607,7 @@ long hugetlb_vmemmap_restore_folios(const struct
>> hstate *h,
>> }
>> if (restored)
>> - flush_tlb_all();
>> + flush_tlb_vmemmap_all();
>> if (!ret)
>> ret = restored;
>> return ret;
>> @@ -744,7 +775,7 @@ void hugetlb_vmemmap_optimize_folios(struct
>> hstate *h, struct list_head *folio_l
>> break;
>> }
>> - flush_tlb_all();
>> + flush_tlb_vmemmap_all();
>> list_for_each_entry(folio, folio_list, lru) {
>> int ret = __hugetlb_vmemmap_optimize_folio(h, folio,
>> @@ -760,7 +791,7 @@ void hugetlb_vmemmap_optimize_folios(struct
>> hstate *h, struct list_head *folio_l
>> * allowing more vmemmap remaps to occur.
>> */
>> if (ret == -ENOMEM && !list_empty(&vmemmap_pages)) {
>> - flush_tlb_all();
>> + flush_tlb_vmemmap_all();
>> free_vmemmap_page_list(&vmemmap_pages);
>> INIT_LIST_HEAD(&vmemmap_pages);
>> __hugetlb_vmemmap_optimize_folio(h, folio,
>> @@ -769,7 +800,7 @@ void hugetlb_vmemmap_optimize_folios(struct
>> hstate *h, struct list_head *folio_l
>> }
>> }
>> - flush_tlb_all();
>> + flush_tlb_vmemmap_all();
>> free_vmemmap_page_list(&vmemmap_pages);
>> }
>
> .
On 2023/12/15 14:16, Kefeng Wang wrote:
>
>
> On 2023/12/14 15:39, Nanyong Sun wrote:
>> Add pmd/pte update and tlb flush helper function to update page
>> table. This refactoring patch is designed to facilitate each
>> architecture to implement its own special logic in preparation
>> for the arm64 architecture to follow the necessary break-before-make
>> sequence when updating page tables.
>>
>> Signed-off-by: Nanyong Sun <[email protected]>
>> ---
>> mm/hugetlb_vmemmap.c | 55 ++++++++++++++++++++++++++++++++++----------
>> 1 file changed, 43 insertions(+), 12 deletions(-)
>>
>> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
>> index 87818ee7f01d..49e8b351def3 100644
>> --- a/mm/hugetlb_vmemmap.c
>> +++ b/mm/hugetlb_vmemmap.c
>> @@ -45,6 +45,37 @@ struct vmemmap_remap_walk {
>> unsigned long flags;
>> };
>> +#ifndef vmemmap_update_pmd
>> +static inline void vmemmap_update_pmd(unsigned long start,
>> + pmd_t *pmd, pte_t *pgtable)
>
> pgtable -> ptep
Hi Kefeng,
Thank you for your review on this patch set, I'll fix them and send out
the v2 version later.
>
>> +{
>> + pmd_populate_kernel(&init_mm, pmd, pgtable);
>> +}
>> +#endif
>> +
>> +#ifndef vmemmap_update_pte
>> +static inline void vmemmap_update_pte(unsigned long addr,
>> + pte_t *pte, pte_t entry)
>
> pte -> ptep
> entry -> pte
>
>> +{
>> + set_pte_at(&init_mm, addr, pte, entry);
>> +}
>> +#endif
>
> .