Page table check performs extra verifications at the time when new
pages become accessible from the userspace by getting their page
table entries (PTEs PMDs etc.) added into the table. It is supported
on X86[1].
This patchset made some simple changes and make it easier to support
new architecture, then we support this feature on ARM64 and RISCV.
[1]https://lore.kernel.org/lkml/[email protected]/
v6 -> v7:
According to Anshuman and Catalin's suggestion, optimized code
implementation:
1. __set_pte_at() code specification modification.
2. pmdp_huge_get_and_clear() implementation optimization.
v5 -> v6:
According to Anshuman's suggestion, optimized partial implementation and
commit message:
1. Remove redundant IS_ENABLED() in ptep_clear().
2. Remove redundant __HAVE_ARCH_PTEP_CLEAR usage in pgtable.h.
3. Remove redundant __ptep_get_and_clear() on arm64 and riscv.
v4 -> v5:
According to Anshuman's suggestion, using PxD_SIZE instead of
PxD_PAGE_SIZE in mm/page_table_check.c and it is checked by Pasha.
v3 -> v4:
Adapt to next-20220414
v2 -> v3:
Modify ptep_clear() in include/linux/pgtable.h, using IS_ENABLED according
to the suggestions of Pasha.
v1 -> v2:
1. Fix arm64's pte/pmd/pud_user_accessible_page() according to the
suggestions of Catalin.
2. Also fix riscv's pte_pmd_pud_user_accessible_page().
Kefeng Wang (2):
mm: page_table_check: move pxx_user_accessible_page into x86
arm64/mm: Enable ARCH_SUPPORTS_PAGE_TABLE_CHECK
Tong Tiangen (4):
mm: page_table_check: using PxD_SIZE instead of PxD_PAGE_SIZE
mm: page_table_check: add hooks to public helpers
mm: remove __HAVE_ARCH_PTEP_CLEAR in pgtable.h
riscv/mm: Enable ARCH_SUPPORTS_PAGE_TABLE_CHECK
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/pgtable.h | 61 ++++++++++++++++++++++++---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/pgtable.h | 71 +++++++++++++++++++++++++++++---
arch/x86/include/asm/pgtable.h | 27 +++++++-----
include/linux/pgtable.h | 21 ++++++----
mm/page_table_check.c | 25 ++---------
7 files changed, 156 insertions(+), 51 deletions(-)
--
2.25.1
From: Kefeng Wang <[email protected]>
The pxx_user_accessible_page() checks the PTE bit, it's
architecture-specific code, move them into x86's pgtable.h.
These helpers are being moved out to make the page table check framework
platform independent.
Signed-off-by: Kefeng Wang <[email protected]>
Signed-off-by: Tong Tiangen <[email protected]>
Acked-by: Pasha Tatashin <[email protected]>
Reviewed-by: Anshuman Khandual <[email protected]>
---
arch/x86/include/asm/pgtable.h | 17 +++++++++++++++++
mm/page_table_check.c | 17 -----------------
2 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 0821f87d495f..46fa65d818bd 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1447,6 +1447,23 @@ static inline bool arch_faults_on_old_pte(void)
return false;
}
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER);
+}
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_H */
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index eb0d0b71cdf6..3692bea2ea2c 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -52,23 +52,6 @@ static struct page_table_check *get_page_table_check(struct page_ext *page_ext)
return (void *)(page_ext) + page_table_check_ops.offset;
}
-static inline bool pte_user_accessible_page(pte_t pte)
-{
- return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER);
-}
-
-static inline bool pmd_user_accessible_page(pmd_t pmd)
-{
- return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) &&
- (pmd_val(pmd) & _PAGE_USER);
-}
-
-static inline bool pud_user_accessible_page(pud_t pud)
-{
- return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) &&
- (pud_val(pud) & _PAGE_USER);
-}
-
/*
* An enty is removed from the page table, decrement the counters for that page
* verify that it is of correct type and counters do not become negative.
--
2.25.1
From: Kefeng Wang <[email protected]>
As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table check")
, enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on arm64.
Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.
Signed-off-by: Kefeng Wang <[email protected]>
Signed-off-by: Tong Tiangen <[email protected]>
Reviewed-by: Pasha Tatashin <[email protected]>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/pgtable.h | 61 ++++++++++++++++++++++++++++----
2 files changed, 56 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 272c75af9302..3055fb5b3fb4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -92,6 +92,7 @@ config ARM64
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 8ebf1cec5d90..4e61cde27f9f 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -33,6 +33,7 @@
#include <linux/mmdebug.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
+#include <linux/page_table_check.h>
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
@@ -96,6 +97,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
+#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
@@ -312,8 +314,8 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
__func__, pte_val(old_pte), pte_val(pte));
}
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
{
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
@@ -343,6 +345,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
set_pte(ptep, pte);
}
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ page_table_check_pte_set(mm, addr, ptep, pte);
+ return __set_pte_at(mm, addr, ptep, pte);
+}
+
/*
* Huge pte definitions.
*/
@@ -454,6 +463,8 @@ static inline int pmd_trans_huge(pmd_t pmd)
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
+#define pmd_user(pmd) pte_user(pmd_pte(pmd))
+#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd))
#define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
@@ -501,8 +512,19 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
-#define set_pud_at(mm, addr, pudp, pud) set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud))
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ page_table_check_pmd_set(mm, addr, pmdp, pmd);
+ return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
+}
+
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(mm, addr, pudp, pud);
+ return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
+}
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
@@ -643,6 +665,24 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define pud_present(pud) pte_present(pud_pte(pud))
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
#define pud_valid(pud) pte_valid(pud_pte(pud))
+#define pud_user(pud) pte_user(pud_pte(pud))
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_present(pud) && pud_user(pud);
+}
+#endif
static inline void set_pud(pud_t *pudp, pud_t pud)
{
@@ -876,7 +916,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
- return __pte(xchg_relaxed(&pte_val(*ptep), 0));
+ pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
+
+ page_table_check_pte_clear(mm, address, pte);
+
+ return pte;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -884,7 +928,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
+ pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
+
+ page_table_check_pmd_clear(mm, address, pmd);
+
+ return pmd;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -918,6 +966,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
}
#endif
--
2.25.1
On 5/7/22 16:31, Tong Tiangen wrote:
> From: Kefeng Wang <[email protected]>
>
> As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table check")
> , enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on arm64.
>
> Add additional page table check stubs for page table helpers, these stubs
> can be used to check the existing page table entries.
>
> Signed-off-by: Kefeng Wang <[email protected]>
> Signed-off-by: Tong Tiangen <[email protected]>
> Reviewed-by: Pasha Tatashin <[email protected]>
Reviewed-by: Anshuman Khandual <[email protected]>
> ---
> arch/arm64/Kconfig | 1 +
> arch/arm64/include/asm/pgtable.h | 61 ++++++++++++++++++++++++++++----
> 2 files changed, 56 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 272c75af9302..3055fb5b3fb4 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -92,6 +92,7 @@ config ARM64
> select ARCH_SUPPORTS_ATOMIC_RMW
> select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
> select ARCH_SUPPORTS_NUMA_BALANCING
> + select ARCH_SUPPORTS_PAGE_TABLE_CHECK
> select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
> select ARCH_WANT_DEFAULT_BPF_JIT
> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 8ebf1cec5d90..4e61cde27f9f 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -33,6 +33,7 @@
> #include <linux/mmdebug.h>
> #include <linux/mm_types.h>
> #include <linux/sched.h>
> +#include <linux/page_table_check.h>
>
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
> @@ -96,6 +97,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
> #define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
> #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
> #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
> +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
> #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
> #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
> #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
> @@ -312,8 +314,8 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
> __func__, pte_val(old_pte), pte_val(pte));
> }
>
> -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> - pte_t *ptep, pte_t pte)
> +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
> + pte_t *ptep, pte_t pte)
> {
> if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
> __sync_icache_dcache(pte);
> @@ -343,6 +345,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> set_pte(ptep, pte);
> }
>
> +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> + pte_t *ptep, pte_t pte)
> +{
> + page_table_check_pte_set(mm, addr, ptep, pte);
> + return __set_pte_at(mm, addr, ptep, pte);
> +}
> +
> /*
> * Huge pte definitions.
> */
> @@ -454,6 +463,8 @@ static inline int pmd_trans_huge(pmd_t pmd)
> #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
> #define pmd_young(pmd) pte_young(pmd_pte(pmd))
> #define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
> +#define pmd_user(pmd) pte_user(pmd_pte(pmd))
> +#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd))
> #define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
> #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
> #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
> @@ -501,8 +512,19 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
> #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
> #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
>
> -#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
> -#define set_pud_at(mm, addr, pudp, pud) set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud))
> +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> + pmd_t *pmdp, pmd_t pmd)
> +{
> + page_table_check_pmd_set(mm, addr, pmdp, pmd);
> + return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
> +}
> +
> +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
> + pud_t *pudp, pud_t pud)
> +{
> + page_table_check_pud_set(mm, addr, pudp, pud);
> + return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
> +}
>
> #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
> #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
> @@ -643,6 +665,24 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
> #define pud_present(pud) pte_present(pud_pte(pud))
> #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
> #define pud_valid(pud) pte_valid(pud_pte(pud))
> +#define pud_user(pud) pte_user(pud_pte(pud))
> +
> +#ifdef CONFIG_PAGE_TABLE_CHECK
> +static inline bool pte_user_accessible_page(pte_t pte)
> +{
> + return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
> +}
> +
> +static inline bool pmd_user_accessible_page(pmd_t pmd)
> +{
> + return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
> +}
> +
> +static inline bool pud_user_accessible_page(pud_t pud)
> +{
> + return pud_present(pud) && pud_user(pud);
> +}
> +#endif
>
> static inline void set_pud(pud_t *pudp, pud_t pud)
> {
> @@ -876,7 +916,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
> static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> unsigned long address, pte_t *ptep)
> {
> - return __pte(xchg_relaxed(&pte_val(*ptep), 0));
> + pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
> +
> + page_table_check_pte_clear(mm, address, pte);
> +
> + return pte;
> }
>
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> @@ -884,7 +928,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
> unsigned long address, pmd_t *pmdp)
> {
> - return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
> + pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
> +
> + page_table_check_pmd_clear(mm, address, pmd);
> +
> + return pmd;
> }
> #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>
> @@ -918,6 +966,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
> static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
> unsigned long address, pmd_t *pmdp, pmd_t pmd)
> {
> + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
> return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
> }
> #endif
Compared with PxD_PAGE_SIZE, which is defined and used only on X86,
PxD_SIZE is more common in each architecture. Therefore, it is more
reasonable to use PxD_SIZE instead of PxD_PAGE_SIZE in page_table_check.c.
At the same time, it is easier to support page table check in other
architectures. The substitution has no functional impact on the x86.
Suggested-by: Anshuman Khandual <[email protected]>
Signed-off-by: Tong Tiangen <[email protected]>
Acked-by: Pasha Tatashin <[email protected]>
Reviewed-by: Anshuman Khandual <[email protected]>
---
mm/page_table_check.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 2458281bff89..eb0d0b71cdf6 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -177,7 +177,7 @@ void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
if (pmd_user_accessible_page(pmd)) {
page_table_check_clear(mm, addr, pmd_pfn(pmd),
- PMD_PAGE_SIZE >> PAGE_SHIFT);
+ PMD_SIZE >> PAGE_SHIFT);
}
}
EXPORT_SYMBOL(__page_table_check_pmd_clear);
@@ -190,7 +190,7 @@ void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
if (pud_user_accessible_page(pud)) {
page_table_check_clear(mm, addr, pud_pfn(pud),
- PUD_PAGE_SIZE >> PAGE_SHIFT);
+ PUD_SIZE >> PAGE_SHIFT);
}
}
EXPORT_SYMBOL(__page_table_check_pud_clear);
@@ -219,7 +219,7 @@ void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
__page_table_check_pmd_clear(mm, addr, *pmdp);
if (pmd_user_accessible_page(pmd)) {
page_table_check_set(mm, addr, pmd_pfn(pmd),
- PMD_PAGE_SIZE >> PAGE_SHIFT,
+ PMD_SIZE >> PAGE_SHIFT,
pmd_write(pmd));
}
}
@@ -234,7 +234,7 @@ void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
__page_table_check_pud_clear(mm, addr, *pudp);
if (pud_user_accessible_page(pud)) {
page_table_check_set(mm, addr, pud_pfn(pud),
- PUD_PAGE_SIZE >> PAGE_SHIFT,
+ PUD_SIZE >> PAGE_SHIFT,
pud_write(pud));
}
}
--
2.25.1
On 2022-05-07 11:01, Tong Tiangen wrote:
> From: Kefeng Wang <[email protected]>
>
> As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table check")
> , enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on arm64.
>
> Add additional page table check stubs for page table helpers, these stubs
> can be used to check the existing page table entries.
>
> Signed-off-by: Kefeng Wang <[email protected]>
> Signed-off-by: Tong Tiangen <[email protected]>
> Reviewed-by: Pasha Tatashin <[email protected]>
When building and booting an arm64 allmodconfig kernel on the next tree, branch next-20220516,
see the following kernel oops when booting in QEMU [1]:
T35] ------------[ cut here ]------------
[ 578.695796][ T35] kernel BUG at mm/page_table_check.c:82!
[ 578.697292][ T35] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
[ 578.704318][ T35] Modules linked in:
[ 578.705907][ T35] CPU: 0 PID: 35 Comm: khugepaged Tainted: G T 5.18.0-rc6-next-20220513 #1 893498a5d8159d9fb26e12492a93c07e83dd4b7f
[ 578.711170][ T35] Hardware name: linux,dummy-virt (DT)
[ 578.713315][ T35] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 578.716398][ T35] pc : page_table_check_clear.constprop.0+0x1f4/0x280
[ 578.719107][ T35] lr : page_table_check_clear.constprop.0+0x1cc/0x280
[ 578.721781][ T35] sp : ffff80000f3778b0
[ 578.723446][ T35] x29: ffff80000f3778b0 x28: ffff80000b891218 x27: ffff000012dd55f0
[ 578.726667][ T35] x26: 0000000000000008 x25: ffff80000c38cd80 x24: 0000000000000000
[ 578.729870][ T35] x23: ffff80000c38c9c0 x22: 0000000000000000 x21: 0000000000000200
[ 578.733079][ T35] x20: ffff000007bae000 x19: ffff000007bae008 x18: 0000000000000000
[ 578.736299][ T35] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
[ 578.739505][ T35] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
[ 578.742735][ T35] x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
[ 578.745925][ T35] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
[ 578.749145][ T35] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff000007bae00c
[ 578.752348][ T35] x2 : 0000000000000000 x1 : 0000000000000001 x0 : 00000000ffffffff
[ 578.755556][ T35] Call trace:
[ 578.756877][ T35] page_table_check_clear.constprop.0+0x1f4/0x280
[ 578.759446][ T35] __page_table_check_pmd_clear+0xc4/0x140
[ 578.761757][ T35] pmdp_collapse_flush+0xa4/0x1c0
[ 578.763771][ T35] collapse_huge_page+0x4e4/0xb00
[ 578.765778][ T35] khugepaged_scan_pmd+0xc18/0xd00
[ 578.767840][ T35] khugepaged_scan_mm_slot+0x580/0x780
[ 578.770018][ T35] khugepaged+0x2dc/0x400
[ 578.771786][ T35] kthread+0x164/0x180
[ 578.773430][ T35] ret_from_fork+0x10/0x20
[ 578.775253][ T35] Code: 52800021 91001263 14000388 36f80040 (d4210000)
[ 578.777990][ T35] ---[ end trace 0000000000000000 ]---
[ 578.778021][ T35] Kernel panic - not syncing: Oops - BUG: Fatal exception
[ 578.782934][ T35] Kernel Offset: disabled
[ 578.784642][ T35] CPU features: 0x000,00100010,00001086
[ 578.786848][ T35] Memory Limit: none
[ 578.788433][ T35] ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]---
Bisected down to this patch, see the bisect log [2].
When I revert this patch I don't see the issue anymore.
Cheers,
Anders
[1] https://people.linaro.org/~anders.roxell/output-next-20220513.log
[2] http://ix.io/3XZB
> ---
> arch/arm64/Kconfig | 1 +
> arch/arm64/include/asm/pgtable.h | 61 ++++++++++++++++++++++++++++----
> 2 files changed, 56 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 272c75af9302..3055fb5b3fb4 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -92,6 +92,7 @@ config ARM64
> select ARCH_SUPPORTS_ATOMIC_RMW
> select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
> select ARCH_SUPPORTS_NUMA_BALANCING
> + select ARCH_SUPPORTS_PAGE_TABLE_CHECK
> select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
> select ARCH_WANT_DEFAULT_BPF_JIT
> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 8ebf1cec5d90..4e61cde27f9f 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -33,6 +33,7 @@
> #include <linux/mmdebug.h>
> #include <linux/mm_types.h>
> #include <linux/sched.h>
> +#include <linux/page_table_check.h>
>
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
> @@ -96,6 +97,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
> #define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
> #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
> #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
> +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
> #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
> #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
> #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
> @@ -312,8 +314,8 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
> __func__, pte_val(old_pte), pte_val(pte));
> }
>
> -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> - pte_t *ptep, pte_t pte)
> +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
> + pte_t *ptep, pte_t pte)
> {
> if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
> __sync_icache_dcache(pte);
> @@ -343,6 +345,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> set_pte(ptep, pte);
> }
>
> +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> + pte_t *ptep, pte_t pte)
> +{
> + page_table_check_pte_set(mm, addr, ptep, pte);
> + return __set_pte_at(mm, addr, ptep, pte);
> +}
> +
> /*
> * Huge pte definitions.
> */
> @@ -454,6 +463,8 @@ static inline int pmd_trans_huge(pmd_t pmd)
> #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
> #define pmd_young(pmd) pte_young(pmd_pte(pmd))
> #define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
> +#define pmd_user(pmd) pte_user(pmd_pte(pmd))
> +#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd))
> #define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
> #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
> #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
> @@ -501,8 +512,19 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
> #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
> #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
>
> -#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
> -#define set_pud_at(mm, addr, pudp, pud) set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud))
> +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> + pmd_t *pmdp, pmd_t pmd)
> +{
> + page_table_check_pmd_set(mm, addr, pmdp, pmd);
> + return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
> +}
> +
> +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
> + pud_t *pudp, pud_t pud)
> +{
> + page_table_check_pud_set(mm, addr, pudp, pud);
> + return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
> +}
>
> #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
> #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
> @@ -643,6 +665,24 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
> #define pud_present(pud) pte_present(pud_pte(pud))
> #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
> #define pud_valid(pud) pte_valid(pud_pte(pud))
> +#define pud_user(pud) pte_user(pud_pte(pud))
> +
> +#ifdef CONFIG_PAGE_TABLE_CHECK
> +static inline bool pte_user_accessible_page(pte_t pte)
> +{
> + return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
> +}
> +
> +static inline bool pmd_user_accessible_page(pmd_t pmd)
> +{
> + return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
> +}
> +
> +static inline bool pud_user_accessible_page(pud_t pud)
> +{
> + return pud_present(pud) && pud_user(pud);
> +}
> +#endif
>
> static inline void set_pud(pud_t *pudp, pud_t pud)
> {
> @@ -876,7 +916,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
> static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> unsigned long address, pte_t *ptep)
> {
> - return __pte(xchg_relaxed(&pte_val(*ptep), 0));
> + pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
> +
> + page_table_check_pte_clear(mm, address, pte);
> +
> + return pte;
> }
>
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> @@ -884,7 +928,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
> unsigned long address, pmd_t *pmdp)
> {
> - return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
> + pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
> +
> + page_table_check_pmd_clear(mm, address, pmd);
> +
> + return pmd;
> }
> #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>
> @@ -918,6 +966,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
> static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
> unsigned long address, pmd_t *pmdp, pmd_t pmd)
> {
> + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
> return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
> }
> #endif
> --
> 2.25.1
>
--
Anders Roxell
[email protected]
M: +46 708 22 71 05 | IRC: roxell
On Tue, May 17, 2022 at 9:54 AM Anders Roxell <[email protected]> wrote:
>
> On 2022-05-07 11:01, Tong Tiangen wrote:
> > From: Kefeng Wang <[email protected]>
> >
> > As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table check")
> > , enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on arm64.
> >
> > Add additional page table check stubs for page table helpers, these stubs
> > can be used to check the existing page table entries.
> >
> > Signed-off-by: Kefeng Wang <[email protected]>
> > Signed-off-by: Tong Tiangen <[email protected]>
> > Reviewed-by: Pasha Tatashin <[email protected]>
>
> When building and booting an arm64 allmodconfig kernel on the next tree, branch next-20220516,
> see the following kernel oops when booting in QEMU [1]:
>
> T35] ------------[ cut here ]------------
> [ 578.695796][ T35] kernel BUG at mm/page_table_check.c:82!
> [ 578.697292][ T35] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
> [ 578.704318][ T35] Modules linked in:
> [ 578.705907][ T35] CPU: 0 PID: 35 Comm: khugepaged Tainted: G T 5.18.0-rc6-next-20220513 #1 893498a5d8159d9fb26e12492a93c07e83dd4b7f
> [ 578.711170][ T35] Hardware name: linux,dummy-virt (DT)
> [ 578.713315][ T35] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> [ 578.716398][ T35] pc : page_table_check_clear.constprop.0+0x1f4/0x280
> [ 578.719107][ T35] lr : page_table_check_clear.constprop.0+0x1cc/0x280
> [ 578.721781][ T35] sp : ffff80000f3778b0
> [ 578.723446][ T35] x29: ffff80000f3778b0 x28: ffff80000b891218 x27: ffff000012dd55f0
> [ 578.726667][ T35] x26: 0000000000000008 x25: ffff80000c38cd80 x24: 0000000000000000
> [ 578.729870][ T35] x23: ffff80000c38c9c0 x22: 0000000000000000 x21: 0000000000000200
> [ 578.733079][ T35] x20: ffff000007bae000 x19: ffff000007bae008 x18: 0000000000000000
> [ 578.736299][ T35] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
> [ 578.739505][ T35] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
> [ 578.742735][ T35] x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
> [ 578.745925][ T35] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
> [ 578.749145][ T35] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff000007bae00c
> [ 578.752348][ T35] x2 : 0000000000000000 x1 : 0000000000000001 x0 : 00000000ffffffff
> [ 578.755556][ T35] Call trace:
> [ 578.756877][ T35] page_table_check_clear.constprop.0+0x1f4/0x280
> [ 578.759446][ T35] __page_table_check_pmd_clear+0xc4/0x140
> [ 578.761757][ T35] pmdp_collapse_flush+0xa4/0x1c0
> [ 578.763771][ T35] collapse_huge_page+0x4e4/0xb00
> [ 578.765778][ T35] khugepaged_scan_pmd+0xc18/0xd00
> [ 578.767840][ T35] khugepaged_scan_mm_slot+0x580/0x780
> [ 578.770018][ T35] khugepaged+0x2dc/0x400
> [ 578.771786][ T35] kthread+0x164/0x180
> [ 578.773430][ T35] ret_from_fork+0x10/0x20
> [ 578.775253][ T35] Code: 52800021 91001263 14000388 36f80040 (d4210000)
> [ 578.777990][ T35] ---[ end trace 0000000000000000 ]---
> [ 578.778021][ T35] Kernel panic - not syncing: Oops - BUG: Fatal exception
> [ 578.782934][ T35] Kernel Offset: disabled
> [ 578.784642][ T35] CPU features: 0x000,00100010,00001086
> [ 578.786848][ T35] Memory Limit: none
> [ 578.788433][ T35] ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]---
>
> Bisected down to this patch, see the bisect log [2].
>
> When I revert this patch I don't see the issue anymore.
Thank you for reporting this. I believe, this is the same problem that
Anshuman saw [1]. However, at that time he could not reproduce it
anymore. Can you please provide QEMU command line, QEMU version,
kernel config, and information about the base image you are using.
Thank you,
Pasha
[1] https://lore.kernel.org/all/[email protected]/
>
> Cheers,
> Anders
> [1] https://people.linaro.org/~anders.roxell/output-next-20220513.log
> [2] http://ix.io/3XZB
>
> > ---
> > arch/arm64/Kconfig | 1 +
> > arch/arm64/include/asm/pgtable.h | 61 ++++++++++++++++++++++++++++----
> > 2 files changed, 56 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 272c75af9302..3055fb5b3fb4 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -92,6 +92,7 @@ config ARM64
> > select ARCH_SUPPORTS_ATOMIC_RMW
> > select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
> > select ARCH_SUPPORTS_NUMA_BALANCING
> > + select ARCH_SUPPORTS_PAGE_TABLE_CHECK
> > select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
> > select ARCH_WANT_DEFAULT_BPF_JIT
> > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> > index 8ebf1cec5d90..4e61cde27f9f 100644
> > --- a/arch/arm64/include/asm/pgtable.h
> > +++ b/arch/arm64/include/asm/pgtable.h
> > @@ -33,6 +33,7 @@
> > #include <linux/mmdebug.h>
> > #include <linux/mm_types.h>
> > #include <linux/sched.h>
> > +#include <linux/page_table_check.h>
> >
> > #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
> > @@ -96,6 +97,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
> > #define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
> > #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
> > #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
> > +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
> > #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
> > #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
> > #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
> > @@ -312,8 +314,8 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
> > __func__, pte_val(old_pte), pte_val(pte));
> > }
> >
> > -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> > - pte_t *ptep, pte_t pte)
> > +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
> > + pte_t *ptep, pte_t pte)
> > {
> > if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
> > __sync_icache_dcache(pte);
> > @@ -343,6 +345,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> > set_pte(ptep, pte);
> > }
> >
> > +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> > + pte_t *ptep, pte_t pte)
> > +{
> > + page_table_check_pte_set(mm, addr, ptep, pte);
> > + return __set_pte_at(mm, addr, ptep, pte);
> > +}
> > +
> > /*
> > * Huge pte definitions.
> > */
> > @@ -454,6 +463,8 @@ static inline int pmd_trans_huge(pmd_t pmd)
> > #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
> > #define pmd_young(pmd) pte_young(pmd_pte(pmd))
> > #define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
> > +#define pmd_user(pmd) pte_user(pmd_pte(pmd))
> > +#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd))
> > #define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
> > #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
> > #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
> > @@ -501,8 +512,19 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
> > #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
> > #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
> >
> > -#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
> > -#define set_pud_at(mm, addr, pudp, pud) set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud))
> > +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > + pmd_t *pmdp, pmd_t pmd)
> > +{
> > + page_table_check_pmd_set(mm, addr, pmdp, pmd);
> > + return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
> > +}
> > +
> > +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
> > + pud_t *pudp, pud_t pud)
> > +{
> > + page_table_check_pud_set(mm, addr, pudp, pud);
> > + return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
> > +}
> >
> > #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
> > #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
> > @@ -643,6 +665,24 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
> > #define pud_present(pud) pte_present(pud_pte(pud))
> > #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
> > #define pud_valid(pud) pte_valid(pud_pte(pud))
> > +#define pud_user(pud) pte_user(pud_pte(pud))
> > +
> > +#ifdef CONFIG_PAGE_TABLE_CHECK
> > +static inline bool pte_user_accessible_page(pte_t pte)
> > +{
> > + return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
> > +}
> > +
> > +static inline bool pmd_user_accessible_page(pmd_t pmd)
> > +{
> > + return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
> > +}
> > +
> > +static inline bool pud_user_accessible_page(pud_t pud)
> > +{
> > + return pud_present(pud) && pud_user(pud);
> > +}
> > +#endif
> >
> > static inline void set_pud(pud_t *pudp, pud_t pud)
> > {
> > @@ -876,7 +916,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
> > static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> > unsigned long address, pte_t *ptep)
> > {
> > - return __pte(xchg_relaxed(&pte_val(*ptep), 0));
> > + pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
> > +
> > + page_table_check_pte_clear(mm, address, pte);
> > +
> > + return pte;
> > }
> >
> > #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > @@ -884,7 +928,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> > static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
> > unsigned long address, pmd_t *pmdp)
> > {
> > - return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
> > + pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
> > +
> > + page_table_check_pmd_clear(mm, address, pmd);
> > +
> > + return pmd;
> > }
> > #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> >
> > @@ -918,6 +966,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
> > static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
> > unsigned long address, pmd_t *pmdp, pmd_t pmd)
> > {
> > + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
> > return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
> > }
> > #endif
> > --
> > 2.25.1
> >
>
> --
> Anders Roxell
> [email protected]
> M: +46 708 22 71 05 | IRC: roxell
On Tue, 17 May 2022 at 16:02, Pasha Tatashin <[email protected]> wrote:
>
> On Tue, May 17, 2022 at 9:54 AM Anders Roxell <[email protected]> wrote:
> >
> > On 2022-05-07 11:01, Tong Tiangen wrote:
> > > From: Kefeng Wang <[email protected]>
> > >
> > > As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table check")
> > > , enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on arm64.
> > >
> > > Add additional page table check stubs for page table helpers, these stubs
> > > can be used to check the existing page table entries.
> > >
> > > Signed-off-by: Kefeng Wang <[email protected]>
> > > Signed-off-by: Tong Tiangen <[email protected]>
> > > Reviewed-by: Pasha Tatashin <[email protected]>
> >
> > When building and booting an arm64 allmodconfig kernel on the next tree, branch next-20220516,
> > see the following kernel oops when booting in QEMU [1]:
> >
> > T35] ------------[ cut here ]------------
> > [ 578.695796][ T35] kernel BUG at mm/page_table_check.c:82!
> > [ 578.697292][ T35] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
> > [ 578.704318][ T35] Modules linked in:
> > [ 578.705907][ T35] CPU: 0 PID: 35 Comm: khugepaged Tainted: G T 5.18.0-rc6-next-20220513 #1 893498a5d8159d9fb26e12492a93c07e83dd4b7f
> > [ 578.711170][ T35] Hardware name: linux,dummy-virt (DT)
> > [ 578.713315][ T35] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > [ 578.716398][ T35] pc : page_table_check_clear.constprop.0+0x1f4/0x280
> > [ 578.719107][ T35] lr : page_table_check_clear.constprop.0+0x1cc/0x280
> > [ 578.721781][ T35] sp : ffff80000f3778b0
> > [ 578.723446][ T35] x29: ffff80000f3778b0 x28: ffff80000b891218 x27: ffff000012dd55f0
> > [ 578.726667][ T35] x26: 0000000000000008 x25: ffff80000c38cd80 x24: 0000000000000000
> > [ 578.729870][ T35] x23: ffff80000c38c9c0 x22: 0000000000000000 x21: 0000000000000200
> > [ 578.733079][ T35] x20: ffff000007bae000 x19: ffff000007bae008 x18: 0000000000000000
> > [ 578.736299][ T35] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
> > [ 578.739505][ T35] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
> > [ 578.742735][ T35] x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
> > [ 578.745925][ T35] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
> > [ 578.749145][ T35] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff000007bae00c
> > [ 578.752348][ T35] x2 : 0000000000000000 x1 : 0000000000000001 x0 : 00000000ffffffff
> > [ 578.755556][ T35] Call trace:
> > [ 578.756877][ T35] page_table_check_clear.constprop.0+0x1f4/0x280
> > [ 578.759446][ T35] __page_table_check_pmd_clear+0xc4/0x140
> > [ 578.761757][ T35] pmdp_collapse_flush+0xa4/0x1c0
> > [ 578.763771][ T35] collapse_huge_page+0x4e4/0xb00
> > [ 578.765778][ T35] khugepaged_scan_pmd+0xc18/0xd00
> > [ 578.767840][ T35] khugepaged_scan_mm_slot+0x580/0x780
> > [ 578.770018][ T35] khugepaged+0x2dc/0x400
> > [ 578.771786][ T35] kthread+0x164/0x180
> > [ 578.773430][ T35] ret_from_fork+0x10/0x20
> > [ 578.775253][ T35] Code: 52800021 91001263 14000388 36f80040 (d4210000)
> > [ 578.777990][ T35] ---[ end trace 0000000000000000 ]---
> > [ 578.778021][ T35] Kernel panic - not syncing: Oops - BUG: Fatal exception
> > [ 578.782934][ T35] Kernel Offset: disabled
> > [ 578.784642][ T35] CPU features: 0x000,00100010,00001086
> > [ 578.786848][ T35] Memory Limit: none
> > [ 578.788433][ T35] ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]---
> >
> > Bisected down to this patch, see the bisect log [2].
> >
> > When I revert this patch I don't see the issue anymore.
>
> Thank you for reporting this. I believe, this is the same problem that
> Anshuman saw [1].
Yes looks like the same issue, I missed that.
> However, at that time he could not reproduce it
> anymore. Can you please provide QEMU command line
$ qemu-system-aarch64 --enable-kvm -cpu cortex-a53 -kernel
Image-20220517-1.gz -serial stdio -monitor none -nographic -m 2G -M
virt -fsdev local,id=root,path=/srv/kvm/tmp/stretch/arm64,security_model=none,writeout=immediate
-device virtio-rng-pci -device
virtio-9p-pci,fsdev=root,mount_tag=/dev/root -append "root=/dev/root
rootfstype=9p rootflags=trans=virtio,msize=131072
console=ttyAMA0,38400n8 earlycon=pl011,0x9000000 initcall_debug
softlockup_panic=0 security=none kpti=no kfence.sample_interval=0"
-object rng-random,id=rng0,filename=/dev/urandom -device
virtio-rng-pci,rng=rng0
>, QEMU version,
$ qemu-system-aarch64 --version
QEMU emulator version 5.2.0 (Debian 1:5.2+dfsg-11+deb11u1)
Copyright (c) 2003-2020 Fabrice Bellard and the QEMU Project developers
I'm running on an arm64 host.
> kernel config
Kernel config [1].
I build the kernel with tuxmake [2] like this:
$ tuxmake --runtime podman --target-arch arm64 --toolchain gcc-11
--kconfig http://ix.io/3Y06
>, and information about the base image you are using.
Using a debian:stretch release when booting up QEMU.
Host system debian:bullseye
Cheers,
Anders
[1] http://ix.io/3Y06
[2] https://tuxmake.org/install-pypi/
>
> Thank you,
> Pasha
>
> [1] https://lore.kernel.org/all/[email protected]/
>
>
> >
> > Cheers,
> > Anders
> > [1] https://people.linaro.org/~anders.roxell/output-next-20220513.log
> > [2] http://ix.io/3XZB
> >
> > > ---
> > > arch/arm64/Kconfig | 1 +
> > > arch/arm64/include/asm/pgtable.h | 61 ++++++++++++++++++++++++++++----
> > > 2 files changed, 56 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > > index 272c75af9302..3055fb5b3fb4 100644
> > > --- a/arch/arm64/Kconfig
> > > +++ b/arch/arm64/Kconfig
> > > @@ -92,6 +92,7 @@ config ARM64
> > > select ARCH_SUPPORTS_ATOMIC_RMW
> > > select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
> > > select ARCH_SUPPORTS_NUMA_BALANCING
> > > + select ARCH_SUPPORTS_PAGE_TABLE_CHECK
> > > select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
> > > select ARCH_WANT_DEFAULT_BPF_JIT
> > > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> > > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> > > index 8ebf1cec5d90..4e61cde27f9f 100644
> > > --- a/arch/arm64/include/asm/pgtable.h
> > > +++ b/arch/arm64/include/asm/pgtable.h
> > > @@ -33,6 +33,7 @@
> > > #include <linux/mmdebug.h>
> > > #include <linux/mm_types.h>
> > > #include <linux/sched.h>
> > > +#include <linux/page_table_check.h>
> > >
> > > #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > > #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
> > > @@ -96,6 +97,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
> > > #define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
> > > #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
> > > #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
> > > +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
> > > #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
> > > #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
> > > #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
> > > @@ -312,8 +314,8 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
> > > __func__, pte_val(old_pte), pte_val(pte));
> > > }
> > >
> > > -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> > > - pte_t *ptep, pte_t pte)
> > > +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
> > > + pte_t *ptep, pte_t pte)
> > > {
> > > if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
> > > __sync_icache_dcache(pte);
> > > @@ -343,6 +345,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> > > set_pte(ptep, pte);
> > > }
> > >
> > > +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> > > + pte_t *ptep, pte_t pte)
> > > +{
> > > + page_table_check_pte_set(mm, addr, ptep, pte);
> > > + return __set_pte_at(mm, addr, ptep, pte);
> > > +}
> > > +
> > > /*
> > > * Huge pte definitions.
> > > */
> > > @@ -454,6 +463,8 @@ static inline int pmd_trans_huge(pmd_t pmd)
> > > #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
> > > #define pmd_young(pmd) pte_young(pmd_pte(pmd))
> > > #define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
> > > +#define pmd_user(pmd) pte_user(pmd_pte(pmd))
> > > +#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd))
> > > #define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
> > > #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
> > > #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
> > > @@ -501,8 +512,19 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
> > > #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
> > > #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
> > >
> > > -#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
> > > -#define set_pud_at(mm, addr, pudp, pud) set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud))
> > > +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > + pmd_t *pmdp, pmd_t pmd)
> > > +{
> > > + page_table_check_pmd_set(mm, addr, pmdp, pmd);
> > > + return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
> > > +}
> > > +
> > > +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
> > > + pud_t *pudp, pud_t pud)
> > > +{
> > > + page_table_check_pud_set(mm, addr, pudp, pud);
> > > + return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
> > > +}
> > >
> > > #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
> > > #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
> > > @@ -643,6 +665,24 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
> > > #define pud_present(pud) pte_present(pud_pte(pud))
> > > #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
> > > #define pud_valid(pud) pte_valid(pud_pte(pud))
> > > +#define pud_user(pud) pte_user(pud_pte(pud))
> > > +
> > > +#ifdef CONFIG_PAGE_TABLE_CHECK
> > > +static inline bool pte_user_accessible_page(pte_t pte)
> > > +{
> > > + return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
> > > +}
> > > +
> > > +static inline bool pmd_user_accessible_page(pmd_t pmd)
> > > +{
> > > + return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
> > > +}
> > > +
> > > +static inline bool pud_user_accessible_page(pud_t pud)
> > > +{
> > > + return pud_present(pud) && pud_user(pud);
> > > +}
> > > +#endif
> > >
> > > static inline void set_pud(pud_t *pudp, pud_t pud)
> > > {
> > > @@ -876,7 +916,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
> > > static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> > > unsigned long address, pte_t *ptep)
> > > {
> > > - return __pte(xchg_relaxed(&pte_val(*ptep), 0));
> > > + pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
> > > +
> > > + page_table_check_pte_clear(mm, address, pte);
> > > +
> > > + return pte;
> > > }
> > >
> > > #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > > @@ -884,7 +928,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> > > static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
> > > unsigned long address, pmd_t *pmdp)
> > > {
> > > - return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
> > > + pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
> > > +
> > > + page_table_check_pmd_clear(mm, address, pmd);
> > > +
> > > + return pmd;
> > > }
> > > #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> > >
> > > @@ -918,6 +966,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
> > > static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
> > > unsigned long address, pmd_t *pmdp, pmd_t pmd)
> > > {
> > > + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
> > > return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
> > > }
> > > #endif
> > > --
> > > 2.25.1
> > >
> >
> > --
> > Anders Roxell
> > [email protected]
> > M: +46 708 22 71 05 | IRC: roxell
On Tue, 17 May 2022 at 16:33, Anders Roxell <[email protected]> wrote:
>
> On Tue, 17 May 2022 at 16:02, Pasha Tatashin <[email protected]> wrote:
> >
> > On Tue, May 17, 2022 at 9:54 AM Anders Roxell <[email protected]> wrote:
> > >
> > > On 2022-05-07 11:01, Tong Tiangen wrote:
> > > > From: Kefeng Wang <[email protected]>
> > > >
> > > > As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table check")
> > > > , enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on arm64.
> > > >
> > > > Add additional page table check stubs for page table helpers, these stubs
> > > > can be used to check the existing page table entries.
> > > >
> > > > Signed-off-by: Kefeng Wang <[email protected]>
> > > > Signed-off-by: Tong Tiangen <[email protected]>
> > > > Reviewed-by: Pasha Tatashin <[email protected]>
> > >
> > > When building and booting an arm64 allmodconfig kernel on the next tree, branch next-20220516,
> > > see the following kernel oops when booting in QEMU [1]:
> > >
> > > T35] ------------[ cut here ]------------
> > > [ 578.695796][ T35] kernel BUG at mm/page_table_check.c:82!
> > > [ 578.697292][ T35] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
> > > [ 578.704318][ T35] Modules linked in:
> > > [ 578.705907][ T35] CPU: 0 PID: 35 Comm: khugepaged Tainted: G T 5.18.0-rc6-next-20220513 #1 893498a5d8159d9fb26e12492a93c07e83dd4b7f
> > > [ 578.711170][ T35] Hardware name: linux,dummy-virt (DT)
> > > [ 578.713315][ T35] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > > [ 578.716398][ T35] pc : page_table_check_clear.constprop.0+0x1f4/0x280
> > > [ 578.719107][ T35] lr : page_table_check_clear.constprop.0+0x1cc/0x280
> > > [ 578.721781][ T35] sp : ffff80000f3778b0
> > > [ 578.723446][ T35] x29: ffff80000f3778b0 x28: ffff80000b891218 x27: ffff000012dd55f0
> > > [ 578.726667][ T35] x26: 0000000000000008 x25: ffff80000c38cd80 x24: 0000000000000000
> > > [ 578.729870][ T35] x23: ffff80000c38c9c0 x22: 0000000000000000 x21: 0000000000000200
> > > [ 578.733079][ T35] x20: ffff000007bae000 x19: ffff000007bae008 x18: 0000000000000000
> > > [ 578.736299][ T35] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
> > > [ 578.739505][ T35] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
> > > [ 578.742735][ T35] x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
> > > [ 578.745925][ T35] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
> > > [ 578.749145][ T35] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff000007bae00c
> > > [ 578.752348][ T35] x2 : 0000000000000000 x1 : 0000000000000001 x0 : 00000000ffffffff
> > > [ 578.755556][ T35] Call trace:
> > > [ 578.756877][ T35] page_table_check_clear.constprop.0+0x1f4/0x280
> > > [ 578.759446][ T35] __page_table_check_pmd_clear+0xc4/0x140
> > > [ 578.761757][ T35] pmdp_collapse_flush+0xa4/0x1c0
> > > [ 578.763771][ T35] collapse_huge_page+0x4e4/0xb00
> > > [ 578.765778][ T35] khugepaged_scan_pmd+0xc18/0xd00
> > > [ 578.767840][ T35] khugepaged_scan_mm_slot+0x580/0x780
> > > [ 578.770018][ T35] khugepaged+0x2dc/0x400
> > > [ 578.771786][ T35] kthread+0x164/0x180
> > > [ 578.773430][ T35] ret_from_fork+0x10/0x20
> > > [ 578.775253][ T35] Code: 52800021 91001263 14000388 36f80040 (d4210000)
> > > [ 578.777990][ T35] ---[ end trace 0000000000000000 ]---
> > > [ 578.778021][ T35] Kernel panic - not syncing: Oops - BUG: Fatal exception
> > > [ 578.782934][ T35] Kernel Offset: disabled
> > > [ 578.784642][ T35] CPU features: 0x000,00100010,00001086
> > > [ 578.786848][ T35] Memory Limit: none
> > > [ 578.788433][ T35] ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]---
Now I see this oops on the mainline kernel too when I'm building and booting an
arm64 allmodconfig kernel, sha
9d004b2f4fea ("Merge tag 'cxl-for-5.19' of
git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl").
building and booting an arm64 allmodconfig kernel.
When I revert 42b2547137f5 ("arm64/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK") I'm able to boot.
The kernel boots fine.
This is the config [1].
Building with tuxmake[2].
$ tuxmake --runtime podman --target-arch arm64 --toolchain gcc-11
--kconfig http://ix.io/3YPH
Cheers,
Anders
[1] http://ix.io/3YPH
> > >
> > > Bisected down to this patch, see the bisect log [2].
> > >
> > > When I revert this patch I don't see the issue anymore.
> >
> > Thank you for reporting this. I believe, this is the same problem that
> > Anshuman saw [1].
>
> Yes looks like the same issue, I missed that.
>
> > However, at that time he could not reproduce it
> > anymore. Can you please provide QEMU command line
>
> $ qemu-system-aarch64 --enable-kvm -cpu cortex-a53 -kernel
> Image-20220517-1.gz -serial stdio -monitor none -nographic -m 2G -M
> virt -fsdev local,id=root,path=/srv/kvm/tmp/stretch/arm64,security_model=none,writeout=immediate
> -device virtio-rng-pci -device
> virtio-9p-pci,fsdev=root,mount_tag=/dev/root -append "root=/dev/root
> rootfstype=9p rootflags=trans=virtio,msize=131072
> console=ttyAMA0,38400n8 earlycon=pl011,0x9000000 initcall_debug
> softlockup_panic=0 security=none kpti=no kfence.sample_interval=0"
> -object rng-random,id=rng0,filename=/dev/urandom -device
> virtio-rng-pci,rng=rng0
>
> >, QEMU version,
>
> $ qemu-system-aarch64 --version
> QEMU emulator version 5.2.0 (Debian 1:5.2+dfsg-11+deb11u1)
> Copyright (c) 2003-2020 Fabrice Bellard and the QEMU Project developers
>
> I'm running on an arm64 host.
>
> > kernel config
>
> Kernel config [1].
>
> I build the kernel with tuxmake [2] like this:
> $ tuxmake --runtime podman --target-arch arm64 --toolchain gcc-11
> --kconfig http://ix.io/3Y06
>
> >, and information about the base image you are using.
>
> Using a debian:stretch release when booting up QEMU.
> Host system debian:bullseye
>
> Cheers,
> Anders
> [1] http://ix.io/3Y06
> [2] https://tuxmake.org/install-pypi/
>
> >
> > Thank you,
> > Pasha
> >
> > [1] https://lore.kernel.org/all/[email protected]/
> >
> >
> > >
> > > Cheers,
> > > Anders
> > > [1] https://people.linaro.org/~anders.roxell/output-next-20220513.log
> > > [2] http://ix.io/3XZB
> > >
> > > > ---
> > > > arch/arm64/Kconfig | 1 +
> > > > arch/arm64/include/asm/pgtable.h | 61 ++++++++++++++++++++++++++++----
> > > > 2 files changed, 56 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > > > index 272c75af9302..3055fb5b3fb4 100644
> > > > --- a/arch/arm64/Kconfig
> > > > +++ b/arch/arm64/Kconfig
> > > > @@ -92,6 +92,7 @@ config ARM64
> > > > select ARCH_SUPPORTS_ATOMIC_RMW
> > > > select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
> > > > select ARCH_SUPPORTS_NUMA_BALANCING
> > > > + select ARCH_SUPPORTS_PAGE_TABLE_CHECK
> > > > select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
> > > > select ARCH_WANT_DEFAULT_BPF_JIT
> > > > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> > > > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> > > > index 8ebf1cec5d90..4e61cde27f9f 100644
> > > > --- a/arch/arm64/include/asm/pgtable.h
> > > > +++ b/arch/arm64/include/asm/pgtable.h
> > > > @@ -33,6 +33,7 @@
> > > > #include <linux/mmdebug.h>
> > > > #include <linux/mm_types.h>
> > > > #include <linux/sched.h>
> > > > +#include <linux/page_table_check.h>
> > > >
> > > > #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > > > #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
> > > > @@ -96,6 +97,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
> > > > #define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
> > > > #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
> > > > #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
> > > > +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
> > > > #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
> > > > #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
> > > > #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
> > > > @@ -312,8 +314,8 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
> > > > __func__, pte_val(old_pte), pte_val(pte));
> > > > }
> > > >
> > > > -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> > > > - pte_t *ptep, pte_t pte)
> > > > +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
> > > > + pte_t *ptep, pte_t pte)
> > > > {
> > > > if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
> > > > __sync_icache_dcache(pte);
> > > > @@ -343,6 +345,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> > > > set_pte(ptep, pte);
> > > > }
> > > >
> > > > +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> > > > + pte_t *ptep, pte_t pte)
> > > > +{
> > > > + page_table_check_pte_set(mm, addr, ptep, pte);
> > > > + return __set_pte_at(mm, addr, ptep, pte);
> > > > +}
> > > > +
> > > > /*
> > > > * Huge pte definitions.
> > > > */
> > > > @@ -454,6 +463,8 @@ static inline int pmd_trans_huge(pmd_t pmd)
> > > > #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
> > > > #define pmd_young(pmd) pte_young(pmd_pte(pmd))
> > > > #define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
> > > > +#define pmd_user(pmd) pte_user(pmd_pte(pmd))
> > > > +#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd))
> > > > #define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
> > > > #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
> > > > #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
> > > > @@ -501,8 +512,19 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
> > > > #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
> > > > #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
> > > >
> > > > -#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
> > > > -#define set_pud_at(mm, addr, pudp, pud) set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud))
> > > > +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> > > > + pmd_t *pmdp, pmd_t pmd)
> > > > +{
> > > > + page_table_check_pmd_set(mm, addr, pmdp, pmd);
> > > > + return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
> > > > +}
> > > > +
> > > > +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
> > > > + pud_t *pudp, pud_t pud)
> > > > +{
> > > > + page_table_check_pud_set(mm, addr, pudp, pud);
> > > > + return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
> > > > +}
> > > >
> > > > #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
> > > > #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
> > > > @@ -643,6 +665,24 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
> > > > #define pud_present(pud) pte_present(pud_pte(pud))
> > > > #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
> > > > #define pud_valid(pud) pte_valid(pud_pte(pud))
> > > > +#define pud_user(pud) pte_user(pud_pte(pud))
> > > > +
> > > > +#ifdef CONFIG_PAGE_TABLE_CHECK
> > > > +static inline bool pte_user_accessible_page(pte_t pte)
> > > > +{
> > > > + return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
> > > > +}
> > > > +
> > > > +static inline bool pmd_user_accessible_page(pmd_t pmd)
> > > > +{
> > > > + return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
> > > > +}
> > > > +
> > > > +static inline bool pud_user_accessible_page(pud_t pud)
> > > > +{
> > > > + return pud_present(pud) && pud_user(pud);
> > > > +}
> > > > +#endif
> > > >
> > > > static inline void set_pud(pud_t *pudp, pud_t pud)
> > > > {
> > > > @@ -876,7 +916,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
> > > > static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> > > > unsigned long address, pte_t *ptep)
> > > > {
> > > > - return __pte(xchg_relaxed(&pte_val(*ptep), 0));
> > > > + pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
> > > > +
> > > > + page_table_check_pte_clear(mm, address, pte);
> > > > +
> > > > + return pte;
> > > > }
> > > >
> > > > #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > > > @@ -884,7 +928,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> > > > static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
> > > > unsigned long address, pmd_t *pmdp)
> > > > {
> > > > - return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
> > > > + pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
> > > > +
> > > > + page_table_check_pmd_clear(mm, address, pmd);
> > > > +
> > > > + return pmd;
> > > > }
> > > > #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> > > >
> > > > @@ -918,6 +966,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
> > > > static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
> > > > unsigned long address, pmd_t *pmdp, pmd_t pmd)
> > > > {
> > > > + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
> > > > return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
> > > > }
> > > > #endif
> > > > --
> > > > 2.25.1
> > > >
> > >
> > > --
> > > Anders Roxell
> > > [email protected]
> > > M: +46 708 22 71 05 | IRC: roxell
On Sat, May 28, 2022 at 06:41:55PM +0200, Anders Roxell wrote:
> On Tue, 17 May 2022 at 16:33, Anders Roxell <[email protected]> wrote:
> >
> > On Tue, 17 May 2022 at 16:02, Pasha Tatashin <[email protected]> wrote:
> > >
> > > On Tue, May 17, 2022 at 9:54 AM Anders Roxell <[email protected]> wrote:
> > > >
> > > > On 2022-05-07 11:01, Tong Tiangen wrote:
> > > > > From: Kefeng Wang <[email protected]>
> > > > >
> > > > > As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table check")
> > > > > , enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on arm64.
> > > > >
> > > > > Add additional page table check stubs for page table helpers, these stubs
> > > > > can be used to check the existing page table entries.
> > > > >
> > > > > Signed-off-by: Kefeng Wang <[email protected]>
> > > > > Signed-off-by: Tong Tiangen <[email protected]>
> > > > > Reviewed-by: Pasha Tatashin <[email protected]>
> > > >
> > > > When building and booting an arm64 allmodconfig kernel on the next tree, branch next-20220516,
> > > > see the following kernel oops when booting in QEMU [1]:
> > > >
> > > > T35] ------------[ cut here ]------------
> > > > [ 578.695796][ T35] kernel BUG at mm/page_table_check.c:82!
That seems to be:
BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0);
> > > > [ 578.697292][ T35] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
> > > > [ 578.704318][ T35] Modules linked in:
> > > > [ 578.705907][ T35] CPU: 0 PID: 35 Comm: khugepaged Tainted: G T 5.18.0-rc6-next-20220513 #1 893498a5d8159d9fb26e12492a93c07e83dd4b7f
> > > > [ 578.711170][ T35] Hardware name: linux,dummy-virt (DT)
> > > > [ 578.713315][ T35] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > > > [ 578.716398][ T35] pc : page_table_check_clear.constprop.0+0x1f4/0x280
> > > > [ 578.719107][ T35] lr : page_table_check_clear.constprop.0+0x1cc/0x280
> > > > [ 578.721781][ T35] sp : ffff80000f3778b0
> > > > [ 578.723446][ T35] x29: ffff80000f3778b0 x28: ffff80000b891218 x27: ffff000012dd55f0
> > > > [ 578.726667][ T35] x26: 0000000000000008 x25: ffff80000c38cd80 x24: 0000000000000000
> > > > [ 578.729870][ T35] x23: ffff80000c38c9c0 x22: 0000000000000000 x21: 0000000000000200
> > > > [ 578.733079][ T35] x20: ffff000007bae000 x19: ffff000007bae008 x18: 0000000000000000
> > > > [ 578.736299][ T35] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
> > > > [ 578.739505][ T35] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
> > > > [ 578.742735][ T35] x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
> > > > [ 578.745925][ T35] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
> > > > [ 578.749145][ T35] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff000007bae00c
> > > > [ 578.752348][ T35] x2 : 0000000000000000 x1 : 0000000000000001 x0 : 00000000ffffffff
> > > > [ 578.755556][ T35] Call trace:
> > > > [ 578.756877][ T35] page_table_check_clear.constprop.0+0x1f4/0x280
> > > > [ 578.759446][ T35] __page_table_check_pmd_clear+0xc4/0x140
> > > > [ 578.761757][ T35] pmdp_collapse_flush+0xa4/0x1c0
> > > > [ 578.763771][ T35] collapse_huge_page+0x4e4/0xb00
> > > > [ 578.765778][ T35] khugepaged_scan_pmd+0xc18/0xd00
> > > > [ 578.767840][ T35] khugepaged_scan_mm_slot+0x580/0x780
> > > > [ 578.770018][ T35] khugepaged+0x2dc/0x400
> > > > [ 578.771786][ T35] kthread+0x164/0x180
> > > > [ 578.773430][ T35] ret_from_fork+0x10/0x20
> > > > [ 578.775253][ T35] Code: 52800021 91001263 14000388 36f80040 (d4210000)
> > > > [ 578.777990][ T35] ---[ end trace 0000000000000000 ]---
> > > > [ 578.778021][ T35] Kernel panic - not syncing: Oops - BUG: Fatal exception
> > > > [ 578.782934][ T35] Kernel Offset: disabled
> > > > [ 578.784642][ T35] CPU features: 0x000,00100010,00001086
> > > > [ 578.786848][ T35] Memory Limit: none
> > > > [ 578.788433][ T35] ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]---
>
> Now I see this oops on the mainline kernel too when I'm building and booting an
> arm64 allmodconfig kernel, sha
> 9d004b2f4fea ("Merge tag 'cxl-for-5.19' of
> git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl").
>
> building and booting an arm64 allmodconfig kernel.
>
> When I revert 42b2547137f5 ("arm64/mm: enable
> ARCH_SUPPORTS_PAGE_TABLE_CHECK") I'm able to boot.
> The kernel boots fine.
I don't think disabling the check is the right thing to do, and I'm not
really seeing anything arm64-specific from the information here either. It's
more likely that one of the many other options (or combination of options)
enabled in allmodconfig is causing the problem. Are you able to reproduce on
x86?
Anshuman -- any ideas?
Will