2023-12-13 20:30:44

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH v2 0/4] riscv: Use READ_ONCE()/WRITE_ONCE() for pte accesses

This series is a follow-up for riscv of a recent series from Ryan [1] which
converts all direct dereferences of pte_t into a ptet_get() access.

The goal here for riscv is to use READ_ONCE()/WRITE_ONCE() for all page
table entries accesses to avoid any compiler transformation when the
hardware can concurrently modify the page tables entries (A/D bits for
example).

I went a bit further and added pud/p4d/pgd_get() helpers as such concurrent
modifications can happen too at those levels.

[1] https://lore.kernel.org/all/[email protected]/

Changes in v2:
- Fix the kernel test report on arm32
- Remove the pte suffix patch
- Fix pud_offset/p4d_offset which were missing the use of accessors
- Rebase on top of 6.7-rc4

Alexandre Ghiti (4):
riscv: Use WRITE_ONCE() when setting page table entries
mm: Introduce pudp/p4dp/pgdp_get() functions
riscv: mm: Only compile pgtable.c if MMU
riscv: Use accessors to page table entries instead of direct
dereference

arch/arm/include/asm/pgtable.h | 2 ++
arch/riscv/include/asm/kfence.h | 4 +--
arch/riscv/include/asm/pgtable-64.h | 22 +++----------
arch/riscv/include/asm/pgtable.h | 33 +++++--------------
arch/riscv/kernel/efi.c | 2 +-
arch/riscv/kvm/mmu.c | 22 ++++++-------
arch/riscv/mm/Makefile | 3 +-
arch/riscv/mm/fault.c | 16 ++++-----
arch/riscv/mm/hugetlbpage.c | 12 +++----
arch/riscv/mm/kasan_init.c | 45 +++++++++++++------------
arch/riscv/mm/pageattr.c | 44 ++++++++++++-------------
arch/riscv/mm/pgtable.c | 51 ++++++++++++++++++++++++++---
include/linux/pgtable.h | 21 ++++++++++++
13 files changed, 157 insertions(+), 120 deletions(-)

--
2.39.2


2023-12-13 20:31:33

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH v2 1/4] riscv: Use WRITE_ONCE() when setting page table entries

To avoid any compiler "weirdness" when accessing page table entries which
are concurrently modified by the HW, let's use WRITE_ONCE() macro
(commit 20a004e7b017 ("arm64: mm: Use READ_ONCE/WRITE_ONCE when accessing
page tables") gives a great explanation with more details).

Signed-off-by: Alexandre Ghiti <[email protected]>
---
arch/riscv/include/asm/pgtable-64.h | 6 +++---
arch/riscv/include/asm/pgtable.h | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 9a2c780a11e9..5d8431a390dd 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -202,7 +202,7 @@ static inline int pud_user(pud_t pud)

static inline void set_pud(pud_t *pudp, pud_t pud)
{
- *pudp = pud;
+ WRITE_ONCE(*pudp, pud);
}

static inline void pud_clear(pud_t *pudp)
@@ -278,7 +278,7 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
if (pgtable_l4_enabled)
- *p4dp = p4d;
+ WRITE_ONCE(*p4dp, p4d);
else
set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
}
@@ -351,7 +351,7 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
if (pgtable_l5_enabled)
- *pgdp = pgd;
+ WRITE_ONCE(*pgdp, pgd);
else
set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
}
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 294044429e8e..c9f4b250b4ee 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -248,7 +248,7 @@ static inline int pmd_leaf(pmd_t pmd)

static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
}

static inline void pmd_clear(pmd_t *pmdp)
@@ -510,7 +510,7 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
*/
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
- *ptep = pteval;
+ WRITE_ONCE(*ptep, pteval);
}

void flush_icache_pte(pte_t pte);
--
2.39.2

2023-12-13 20:32:34

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH v2 2/4] mm: Introduce pudp/p4dp/pgdp_get() functions

Instead of directly dereferencing page tables entries, which can cause
issues (see commit 20a004e7b017 ("arm64: mm: Use READ_ONCE/WRITE_ONCE when
accessing page tables"), let's introduce new functions to get the
pud/p4d/pgd entries (the pte and pmd versions already exist).

Note that arm pgd_t is actually an array so pgdp_get() is defined as a
macro to avoid a build error.

Those new functions will be used in subsequent commits by the riscv
architecture.

Signed-off-by: Alexandre Ghiti <[email protected]>
---
arch/arm/include/asm/pgtable.h | 2 ++
include/linux/pgtable.h | 21 +++++++++++++++++++++
2 files changed, 23 insertions(+)

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 16b02f44c7d3..d657b84b6bf7 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -151,6 +151,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,

extern pgd_t swapper_pg_dir[PTRS_PER_PGD];

+#define pgdp_get(pgpd) READ_ONCE(*pgdp)
+
#define pud_page(pud) pmd_page(__pmd(pud_val(pud)))
#define pud_write(pud) pmd_write(__pmd(pud_val(pud)))

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index af7639c3b0a3..8b7daccd11be 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -292,6 +292,27 @@ static inline pmd_t pmdp_get(pmd_t *pmdp)
}
#endif

+#ifndef pudp_get
+static inline pud_t pudp_get(pud_t *pudp)
+{
+ return READ_ONCE(*pudp);
+}
+#endif
+
+#ifndef p4dp_get
+static inline p4d_t p4dp_get(p4d_t *p4dp)
+{
+ return READ_ONCE(*p4dp);
+}
+#endif
+
+#ifndef pgdp_get
+static inline pgd_t pgdp_get(pgd_t *pgdp)
+{
+ return READ_ONCE(*pgdp);
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
--
2.39.2

2023-12-13 20:33:33

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH v2 3/4] riscv: mm: Only compile pgtable.c if MMU

All functions defined in there depend on MMU, so no need to compile it
for !MMU configs.

Signed-off-by: Alexandre Ghiti <[email protected]>
---
arch/riscv/mm/Makefile | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 3a4dfc8babcf..2c869f8026a8 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -13,10 +13,9 @@ endif
KCOV_INSTRUMENT_init.o := n

obj-y += init.o
-obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o
+obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o
obj-y += cacheflush.o
obj-y += context.o
-obj-y += pgtable.o
obj-y += pmem.o

ifeq ($(CONFIG_MMU),y)
--
2.39.2

2023-12-13 20:36:12

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH v2 4/4] riscv: Use accessors to page table entries instead of direct dereference

As very well explained in commit 20a004e7b017 ("arm64: mm: Use
READ_ONCE/WRITE_ONCE when accessing page tables"), an architecture whose
page table walker can modify the PTE in parallel must use
READ_ONCE()/WRITE_ONCE() macro to avoid any compiler transformation.

So apply that to riscv which is such architecture.

Signed-off-by: Alexandre Ghiti <[email protected]>
---
arch/riscv/include/asm/kfence.h | 4 +--
arch/riscv/include/asm/pgtable-64.h | 16 ++-------
arch/riscv/include/asm/pgtable.h | 29 ++++------------
arch/riscv/kernel/efi.c | 2 +-
arch/riscv/kvm/mmu.c | 22 ++++++-------
arch/riscv/mm/fault.c | 16 ++++-----
arch/riscv/mm/hugetlbpage.c | 12 +++----
arch/riscv/mm/kasan_init.c | 45 +++++++++++++------------
arch/riscv/mm/pageattr.c | 44 ++++++++++++-------------
arch/riscv/mm/pgtable.c | 51 ++++++++++++++++++++++++++---
10 files changed, 128 insertions(+), 113 deletions(-)

diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h
index 0bbffd528096..7388edd88986 100644
--- a/arch/riscv/include/asm/kfence.h
+++ b/arch/riscv/include/asm/kfence.h
@@ -18,9 +18,9 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
pte_t *pte = virt_to_kpte(addr);

if (protect)
- set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+ set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~_PAGE_PRESENT));
else
- set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+ set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT));

flush_tlb_kernel_range(addr, addr + PAGE_SIZE);

diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 5d8431a390dd..b42017d76924 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -340,13 +340,7 @@ static inline struct page *p4d_page(p4d_t p4d)
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))

#define pud_offset pud_offset
-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
-{
- if (pgtable_l4_enabled)
- return p4d_pgtable(*p4d) + pud_index(address);
-
- return (pud_t *)p4d;
-}
+pud_t *pud_offset(p4d_t *p4d, unsigned long address);

static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
@@ -404,12 +398,6 @@ static inline struct page *pgd_page(pgd_t pgd)
#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))

#define p4d_offset p4d_offset
-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
-{
- if (pgtable_l5_enabled)
- return pgd_pgtable(*pgd) + p4d_index(address);
-
- return (p4d_t *)pgd;
-}
+p4d_t *p4d_offset(pgd_t *pgd, unsigned long address);

#endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index c9f4b250b4ee..3773f454f0fa 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -544,19 +544,12 @@ static inline void pte_clear(struct mm_struct *mm,
__set_pte_at(ptep, __pte(0));
}

-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-static inline int ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep,
- pte_t entry, int dirty)
-{
- if (!pte_same(*ptep, entry))
- __set_pte_at(ptep, entry);
- /*
- * update_mmu_cache will unconditionally execute, handling both
- * the case that the PTE changed and the spurious fault case.
- */
- return true;
-}
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* defined in mm/pgtable.c */
+extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep, pte_t entry, int dirty);
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG /* defined in mm/pgtable.c */
+extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep);

#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
@@ -569,16 +562,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
return pte;
}

-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address,
- pte_t *ptep)
-{
- if (!pte_young(*ptep))
- return 0;
- return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
-}
-
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c
index aa6209a74c83..b64bf1624a05 100644
--- a/arch/riscv/kernel/efi.c
+++ b/arch/riscv/kernel/efi.c
@@ -60,7 +60,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
- pte_t pte = READ_ONCE(*ptep);
+ pte_t pte = ptep_get(ptep);
unsigned long val;

if (md->attribute & EFI_MEMORY_RO) {
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 068c74593871..a9e2fd7245e1 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -103,7 +103,7 @@ static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
*ptep_level = current_level;
ptep = (pte_t *)kvm->arch.pgd;
ptep = &ptep[gstage_pte_index(addr, current_level)];
- while (ptep && pte_val(*ptep)) {
+ while (ptep && pte_val(ptep_get(ptep))) {
if (gstage_pte_leaf(ptep)) {
*ptep_level = current_level;
*ptepp = ptep;
@@ -113,7 +113,7 @@ static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
if (current_level) {
current_level--;
*ptep_level = current_level;
- ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
ptep = &ptep[gstage_pte_index(addr, current_level)];
} else {
ptep = NULL;
@@ -149,25 +149,25 @@ static int gstage_set_pte(struct kvm *kvm, u32 level,
if (gstage_pte_leaf(ptep))
return -EEXIST;

- if (!pte_val(*ptep)) {
+ if (!pte_val(ptep_get(ptep))) {
if (!pcache)
return -ENOMEM;
next_ptep = kvm_mmu_memory_cache_alloc(pcache);
if (!next_ptep)
return -ENOMEM;
- *ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
- __pgprot(_PAGE_TABLE));
+ set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)),
+ __pgprot(_PAGE_TABLE)));
} else {
if (gstage_pte_leaf(ptep))
return -EEXIST;
- next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
}

current_level--;
ptep = &next_ptep[gstage_pte_index(addr, current_level)];
}

- *ptep = *new_pte;
+ set_pte(ptep, *new_pte);
if (gstage_pte_leaf(ptep))
gstage_remote_tlb_flush(kvm, current_level, addr);

@@ -239,11 +239,11 @@ static void gstage_op_pte(struct kvm *kvm, gpa_t addr,

BUG_ON(addr & (page_size - 1));

- if (!pte_val(*ptep))
+ if (!pte_val(ptep_get(ptep)))
return;

if (ptep_level && !gstage_pte_leaf(ptep)) {
- next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
next_ptep_level = ptep_level - 1;
ret = gstage_level_to_page_size(next_ptep_level,
&next_page_size);
@@ -261,7 +261,7 @@ static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
if (op == GSTAGE_OP_CLEAR)
set_pte(ptep, __pte(0));
else if (op == GSTAGE_OP_WP)
- set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE));
+ set_pte(ptep, __pte(pte_val(ptep_get(ptep)) & ~_PAGE_WRITE));
gstage_remote_tlb_flush(kvm, ptep_level, addr);
}
}
@@ -603,7 +603,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
&ptep, &ptep_level))
return false;

- return pte_young(*ptep);
+ return pte_young(ptep_get(ptep));
}

int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 90d4ba36d1d0..76f1df709a21 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -136,24 +136,24 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
pgd = (pgd_t *)pfn_to_virt(pfn) + index;
pgd_k = init_mm.pgd + index;

- if (!pgd_present(*pgd_k)) {
+ if (!pgd_present(pgdp_get(pgd_k))) {
no_context(regs, addr);
return;
}
- set_pgd(pgd, *pgd_k);
+ set_pgd(pgd, pgdp_get(pgd_k));

p4d_k = p4d_offset(pgd_k, addr);
- if (!p4d_present(*p4d_k)) {
+ if (!p4d_present(p4dp_get(p4d_k))) {
no_context(regs, addr);
return;
}

pud_k = pud_offset(p4d_k, addr);
- if (!pud_present(*pud_k)) {
+ if (!pud_present(pudp_get(pud_k))) {
no_context(regs, addr);
return;
}
- if (pud_leaf(*pud_k))
+ if (pud_leaf(pudp_get(pud_k)))
goto flush_tlb;

/*
@@ -161,11 +161,11 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
* to copy individual PTEs
*/
pmd_k = pmd_offset(pud_k, addr);
- if (!pmd_present(*pmd_k)) {
+ if (!pmd_present(pmdp_get(pmd_k))) {
no_context(regs, addr);
return;
}
- if (pmd_leaf(*pmd_k))
+ if (pmd_leaf(pmdp_get(pmd_k)))
goto flush_tlb;

/*
@@ -175,7 +175,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
* silently loop forever.
*/
pte_k = pte_offset_kernel(pmd_k, addr);
- if (!pte_present(*pte_k)) {
+ if (!pte_present(ptep_get(pte_k))) {
no_context(regs, addr);
return;
}
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index b52f0210481f..431596c0e20e 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -54,7 +54,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
}

if (sz == PMD_SIZE) {
- if (want_pmd_share(vma, addr) && pud_none(*pud))
+ if (want_pmd_share(vma, addr) && pud_none(pudp_get(pud)))
pte = huge_pmd_share(mm, vma, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
@@ -93,11 +93,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
pmd_t *pmd;

pgd = pgd_offset(mm, addr);
- if (!pgd_present(*pgd))
+ if (!pgd_present(pgdp_get(pgd)))
return NULL;

p4d = p4d_offset(pgd, addr);
- if (!p4d_present(*p4d))
+ if (!p4d_present(p4dp_get(p4d)))
return NULL;

pud = pud_offset(p4d, addr);
@@ -105,7 +105,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
/* must be pud huge, non-present or none */
return (pte_t *)pud;

- if (!pud_present(*pud))
+ if (!pud_present(pudp_get(pud)))
return NULL;

pmd = pmd_offset(pud, addr);
@@ -113,7 +113,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
/* must be pmd huge, non-present or none */
return (pte_t *)pmd;

- if (!pmd_present(*pmd))
+ if (!pmd_present(pmdp_get(pmd)))
return NULL;

for_each_napot_order(order) {
@@ -293,7 +293,7 @@ void huge_pte_clear(struct mm_struct *mm,
pte_t *ptep,
unsigned long sz)
{
- pte_t pte = READ_ONCE(*ptep);
+ pte_t pte = ptep_get(ptep);
int i, pte_num;

if (!pte_napot(pte)) {
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 5e39dcf23fdb..e96251853037 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -31,7 +31,7 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
phys_addr_t phys_addr;
pte_t *ptep, *p;

- if (pmd_none(*pmd)) {
+ if (pmd_none(pmdp_get(pmd))) {
p = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
@@ -39,7 +39,7 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
ptep = pte_offset_kernel(pmd, vaddr);

do {
- if (pte_none(*ptep)) {
+ if (pte_none(ptep_get(ptep))) {
phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL));
memset(__va(phys_addr), KASAN_SHADOW_INIT, PAGE_SIZE);
@@ -53,7 +53,7 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned
pmd_t *pmdp, *p;
unsigned long next;

- if (pud_none(*pud)) {
+ if (pud_none(pudp_get(pud))) {
p = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
set_pud(pud, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
@@ -63,7 +63,8 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned
do {
next = pmd_addr_end(vaddr, end);

- if (pmd_none(*pmdp) && IS_ALIGNED(vaddr, PMD_SIZE) && (next - vaddr) >= PMD_SIZE) {
+ if (pmd_none(pmdp_get(pmdp)) && IS_ALIGNED(vaddr, PMD_SIZE) &&
+ (next - vaddr) >= PMD_SIZE) {
phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE);
if (phys_addr) {
set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL));
@@ -83,7 +84,7 @@ static void __init kasan_populate_pud(p4d_t *p4d,
pud_t *pudp, *p;
unsigned long next;

- if (p4d_none(*p4d)) {
+ if (p4d_none(p4dp_get(p4d))) {
p = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
@@ -93,7 +94,8 @@ static void __init kasan_populate_pud(p4d_t *p4d,
do {
next = pud_addr_end(vaddr, end);

- if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
+ if (pud_none(pudp_get(pudp)) && IS_ALIGNED(vaddr, PUD_SIZE) &&
+ (next - vaddr) >= PUD_SIZE) {
phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
if (phys_addr) {
set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
@@ -113,7 +115,7 @@ static void __init kasan_populate_p4d(pgd_t *pgd,
p4d_t *p4dp, *p;
unsigned long next;

- if (pgd_none(*pgd)) {
+ if (pgd_none(pgdp_get(pgd))) {
p = memblock_alloc(PTRS_PER_P4D * sizeof(p4d_t), PAGE_SIZE);
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
@@ -123,7 +125,8 @@ static void __init kasan_populate_p4d(pgd_t *pgd,
do {
next = p4d_addr_end(vaddr, end);

- if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) {
+ if (p4d_none(p4dp_get(p4dp)) && IS_ALIGNED(vaddr, P4D_SIZE) &&
+ (next - vaddr) >= P4D_SIZE) {
phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE);
if (phys_addr) {
set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL));
@@ -145,7 +148,7 @@ static void __init kasan_populate_pgd(pgd_t *pgdp,
do {
next = pgd_addr_end(vaddr, end);

- if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
+ if (pgd_none(pgdp_get(pgdp)) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
(next - vaddr) >= PGDIR_SIZE) {
phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
if (phys_addr) {
@@ -168,7 +171,7 @@ static void __init kasan_early_clear_pud(p4d_t *p4dp,
if (!pgtable_l4_enabled) {
pudp = (pud_t *)p4dp;
} else {
- base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp)));
+ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(p4dp_get(p4dp))));
pudp = base_pud + pud_index(vaddr);
}

@@ -193,7 +196,7 @@ static void __init kasan_early_clear_p4d(pgd_t *pgdp,
if (!pgtable_l5_enabled) {
p4dp = (p4d_t *)pgdp;
} else {
- base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp)));
+ base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(pgdp_get(pgdp))));
p4dp = base_p4d + p4d_index(vaddr);
}

@@ -239,14 +242,14 @@ static void __init kasan_early_populate_pud(p4d_t *p4dp,
if (!pgtable_l4_enabled) {
pudp = (pud_t *)p4dp;
} else {
- base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp)));
+ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(p4dp_get(p4dp))));
pudp = base_pud + pud_index(vaddr);
}

do {
next = pud_addr_end(vaddr, end);

- if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) &&
+ if (pud_none(pudp_get(pudp)) && IS_ALIGNED(vaddr, PUD_SIZE) &&
(next - vaddr) >= PUD_SIZE) {
phys_addr = __pa((uintptr_t)kasan_early_shadow_pmd);
set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
@@ -277,14 +280,14 @@ static void __init kasan_early_populate_p4d(pgd_t *pgdp,
if (!pgtable_l5_enabled) {
p4dp = (p4d_t *)pgdp;
} else {
- base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp)));
+ base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(pgdp_get(pgdp))));
p4dp = base_p4d + p4d_index(vaddr);
}

do {
next = p4d_addr_end(vaddr, end);

- if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) &&
+ if (p4d_none(p4dp_get(p4dp)) && IS_ALIGNED(vaddr, P4D_SIZE) &&
(next - vaddr) >= P4D_SIZE) {
phys_addr = __pa((uintptr_t)kasan_early_shadow_pud);
set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE));
@@ -305,7 +308,7 @@ static void __init kasan_early_populate_pgd(pgd_t *pgdp,
do {
next = pgd_addr_end(vaddr, end);

- if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
+ if (pgd_none(pgdp_get(pgdp)) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
(next - vaddr) >= PGDIR_SIZE) {
phys_addr = __pa((uintptr_t)kasan_early_shadow_p4d);
set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
@@ -381,7 +384,7 @@ static void __init kasan_shallow_populate_pud(p4d_t *p4d,
do {
next = pud_addr_end(vaddr, end);

- if (pud_none(*pud_k)) {
+ if (pud_none(pudp_get(pud_k))) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pud(pud_k, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE));
continue;
@@ -401,7 +404,7 @@ static void __init kasan_shallow_populate_p4d(pgd_t *pgd,
do {
next = p4d_addr_end(vaddr, end);

- if (p4d_none(*p4d_k)) {
+ if (p4d_none(p4dp_get(p4d_k))) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_p4d(p4d_k, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE));
continue;
@@ -420,7 +423,7 @@ static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long
do {
next = pgd_addr_end(vaddr, end);

- if (pgd_none(*pgd_k)) {
+ if (pgd_none(pgdp_get(pgd_k))) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
continue;
@@ -451,7 +454,7 @@ static void __init create_tmp_mapping(void)

/* Copy the last p4d since it is shared with the kernel mapping. */
if (pgtable_l5_enabled) {
- ptr = (p4d_t *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
+ ptr = (p4d_t *)pgd_page_vaddr(pgdp_get(pgd_offset_k(KASAN_SHADOW_END)));
memcpy(tmp_p4d, ptr, sizeof(p4d_t) * PTRS_PER_P4D);
set_pgd(&tmp_pg_dir[pgd_index(KASAN_SHADOW_END)],
pfn_pgd(PFN_DOWN(__pa(tmp_p4d)), PAGE_TABLE));
@@ -462,7 +465,7 @@ static void __init create_tmp_mapping(void)

/* Copy the last pud since it is shared with the kernel mapping. */
if (pgtable_l4_enabled) {
- ptr = (pud_t *)p4d_page_vaddr(*(base_p4d + p4d_index(KASAN_SHADOW_END)));
+ ptr = (pud_t *)p4d_page_vaddr(p4dp_get(base_p4d + p4d_index(KASAN_SHADOW_END)));
memcpy(tmp_pud, ptr, sizeof(pud_t) * PTRS_PER_PUD);
set_p4d(&base_p4d[p4d_index(KASAN_SHADOW_END)],
pfn_p4d(PFN_DOWN(__pa(tmp_pud)), PAGE_TABLE));
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index fc5fc4f785c4..0b5e38e018c8 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -29,7 +29,7 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- p4d_t val = READ_ONCE(*p4d);
+ p4d_t val = p4dp_get(p4d);

if (p4d_leaf(val)) {
val = __p4d(set_pageattr_masks(p4d_val(val), walk));
@@ -42,7 +42,7 @@ static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- pud_t val = READ_ONCE(*pud);
+ pud_t val = pudp_get(pud);

if (pud_leaf(val)) {
val = __pud(set_pageattr_masks(pud_val(val), walk));
@@ -55,7 +55,7 @@ static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- pmd_t val = READ_ONCE(*pmd);
+ pmd_t val = pmdp_get(pmd);

if (pmd_leaf(val)) {
val = __pmd(set_pageattr_masks(pmd_val(val), walk));
@@ -68,7 +68,7 @@ static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- pte_t val = READ_ONCE(*pte);
+ pte_t val = ptep_get(pte);

val = __pte(set_pageattr_masks(pte_val(val), walk));
set_pte(pte, val);
@@ -108,10 +108,10 @@ static int __split_linear_mapping_pmd(pud_t *pudp,
vaddr <= (vaddr & PMD_MASK) && end >= next)
continue;

- if (pmd_leaf(*pmdp)) {
+ if (pmd_leaf(pmdp_get(pmdp))) {
struct page *pte_page;
- unsigned long pfn = _pmd_pfn(*pmdp);
- pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK);
+ unsigned long pfn = _pmd_pfn(pmdp_get(pmdp));
+ pgprot_t prot = __pgprot(pmd_val(pmdp_get(pmdp)) & ~_PAGE_PFN_MASK);
pte_t *ptep_new;
int i;

@@ -148,10 +148,10 @@ static int __split_linear_mapping_pud(p4d_t *p4dp,
vaddr <= (vaddr & PUD_MASK) && end >= next)
continue;

- if (pud_leaf(*pudp)) {
+ if (pud_leaf(pudp_get(pudp))) {
struct page *pmd_page;
- unsigned long pfn = _pud_pfn(*pudp);
- pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK);
+ unsigned long pfn = _pud_pfn(pudp_get(pudp));
+ pgprot_t prot = __pgprot(pud_val(pudp_get(pudp)) & ~_PAGE_PFN_MASK);
pmd_t *pmdp_new;
int i;

@@ -197,10 +197,10 @@ static int __split_linear_mapping_p4d(pgd_t *pgdp,
vaddr <= (vaddr & P4D_MASK) && end >= next)
continue;

- if (p4d_leaf(*p4dp)) {
+ if (p4d_leaf(p4dp_get(p4dp))) {
struct page *pud_page;
- unsigned long pfn = _p4d_pfn(*p4dp);
- pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK);
+ unsigned long pfn = _p4d_pfn(p4dp_get(p4dp));
+ pgprot_t prot = __pgprot(p4d_val(p4dp_get(p4dp)) & ~_PAGE_PFN_MASK);
pud_t *pudp_new;
int i;

@@ -406,29 +406,29 @@ bool kernel_page_present(struct page *page)
pte_t *pte;

pgd = pgd_offset_k(addr);
- if (!pgd_present(*pgd))
+ if (!pgd_present(pgdp_get(pgd)))
return false;
- if (pgd_leaf(*pgd))
+ if (pgd_leaf(pgdp_get(pgd)))
return true;

p4d = p4d_offset(pgd, addr);
- if (!p4d_present(*p4d))
+ if (!p4d_present(p4dp_get(p4d)))
return false;
- if (p4d_leaf(*p4d))
+ if (p4d_leaf(p4dp_get(p4d)))
return true;

pud = pud_offset(p4d, addr);
- if (!pud_present(*pud))
+ if (!pud_present(pudp_get(pud)))
return false;
- if (pud_leaf(*pud))
+ if (pud_leaf(pudp_get(pud)))
return true;

pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd))
+ if (!pmd_present(pmdp_get(pmd)))
return false;
- if (pmd_leaf(*pmd))
+ if (pmd_leaf(pmdp_get(pmd)))
return true;

pte = pte_offset_kernel(pmd, addr);
- return pte_present(*pte);
+ return pte_present(ptep_get(pte));
}
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index fef4e7328e49..ef887efcb679 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -5,6 +5,47 @@
#include <linux/kernel.h>
#include <linux/pgtable.h>

+int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ if (!pte_same(ptep_get(ptep), entry))
+ __set_pte_at(ptep, entry);
+ /*
+ * update_mmu_cache will unconditionally execute, handling both
+ * the case that the PTE changed and the spurious fault case.
+ */
+ return true;
+}
+
+int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address,
+ pte_t *ptep)
+{
+ if (!pte_young(ptep_get(ptep)))
+ return 0;
+ return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
+}
+EXPORT_SYMBOL_GPL(ptep_test_and_clear_young);
+
+#ifdef CONFIG_64BIT
+pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ if (pgtable_l4_enabled)
+ return p4d_pgtable(p4dp_get(p4d)) + pud_index(address);
+
+ return (pud_t *)p4d;
+}
+
+p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+ if (pgtable_l5_enabled)
+ return pgd_pgtable(pgdp_get(pgd)) + p4d_index(address);
+
+ return (p4d_t *)pgd;
+}
+#endif
+
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{
@@ -25,7 +66,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)

int pud_clear_huge(pud_t *pud)
{
- if (!pud_leaf(READ_ONCE(*pud)))
+ if (!pud_leaf(pudp_get(pud)))
return 0;
pud_clear(pud);
return 1;
@@ -33,7 +74,7 @@ int pud_clear_huge(pud_t *pud)

int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
- pmd_t *pmd = pud_pgtable(*pud);
+ pmd_t *pmd = pud_pgtable(pudp_get(pud));
int i;

pud_clear(pud);
@@ -63,7 +104,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)

int pmd_clear_huge(pmd_t *pmd)
{
- if (!pmd_leaf(READ_ONCE(*pmd)))
+ if (!pmd_leaf(pmdp_get(pmd)))
return 0;
pmd_clear(pmd);
return 1;
@@ -71,7 +112,7 @@ int pmd_clear_huge(pmd_t *pmd)

int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
- pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
+ pte_t *pte = (pte_t *)pmd_page_vaddr(pmdp_get(pmd));

pmd_clear(pmd);

@@ -88,7 +129,7 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);

VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- VM_BUG_ON(pmd_trans_huge(*pmdp));
+ VM_BUG_ON(pmd_trans_huge(pmdp_get(pmdp)));
/*
* When leaf PTE entries (regular pages) are collapsed into a leaf
* PMD entry (huge page), a valid non-leaf PTE is converted into a
--
2.39.2

2023-12-14 05:36:15

by Anup Patel

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] riscv: Use accessors to page table entries instead of direct dereference

On Thu, Dec 14, 2023 at 2:04 AM Alexandre Ghiti <[email protected]> wrote:
>
> As very well explained in commit 20a004e7b017 ("arm64: mm: Use
> READ_ONCE/WRITE_ONCE when accessing page tables"), an architecture whose
> page table walker can modify the PTE in parallel must use
> READ_ONCE()/WRITE_ONCE() macro to avoid any compiler transformation.
>
> So apply that to riscv which is such architecture.
>
> Signed-off-by: Alexandre Ghiti <[email protected]>

For KVM RISC-V:
Acked-by: Anup Patel <[email protected]>

Thanks,
Anup

> ---
> arch/riscv/include/asm/kfence.h | 4 +--
> arch/riscv/include/asm/pgtable-64.h | 16 ++-------
> arch/riscv/include/asm/pgtable.h | 29 ++++------------
> arch/riscv/kernel/efi.c | 2 +-
> arch/riscv/kvm/mmu.c | 22 ++++++-------
> arch/riscv/mm/fault.c | 16 ++++-----
> arch/riscv/mm/hugetlbpage.c | 12 +++----
> arch/riscv/mm/kasan_init.c | 45 +++++++++++++------------
> arch/riscv/mm/pageattr.c | 44 ++++++++++++-------------
> arch/riscv/mm/pgtable.c | 51 ++++++++++++++++++++++++++---
> 10 files changed, 128 insertions(+), 113 deletions(-)
>
> diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h
> index 0bbffd528096..7388edd88986 100644
> --- a/arch/riscv/include/asm/kfence.h
> +++ b/arch/riscv/include/asm/kfence.h
> @@ -18,9 +18,9 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
> pte_t *pte = virt_to_kpte(addr);
>
> if (protect)
> - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
> + set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~_PAGE_PRESENT));
> else
> - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
> + set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT));
>
> flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>
> diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
> index 5d8431a390dd..b42017d76924 100644
> --- a/arch/riscv/include/asm/pgtable-64.h
> +++ b/arch/riscv/include/asm/pgtable-64.h
> @@ -340,13 +340,7 @@ static inline struct page *p4d_page(p4d_t p4d)
> #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
>
> #define pud_offset pud_offset
> -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
> -{
> - if (pgtable_l4_enabled)
> - return p4d_pgtable(*p4d) + pud_index(address);
> -
> - return (pud_t *)p4d;
> -}
> +pud_t *pud_offset(p4d_t *p4d, unsigned long address);
>
> static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
> {
> @@ -404,12 +398,6 @@ static inline struct page *pgd_page(pgd_t pgd)
> #define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
>
> #define p4d_offset p4d_offset
> -static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
> -{
> - if (pgtable_l5_enabled)
> - return pgd_pgtable(*pgd) + p4d_index(address);
> -
> - return (p4d_t *)pgd;
> -}
> +p4d_t *p4d_offset(pgd_t *pgd, unsigned long address);
>
> #endif /* _ASM_RISCV_PGTABLE_64_H */
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index c9f4b250b4ee..3773f454f0fa 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -544,19 +544,12 @@ static inline void pte_clear(struct mm_struct *mm,
> __set_pte_at(ptep, __pte(0));
> }
>
> -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
> -static inline int ptep_set_access_flags(struct vm_area_struct *vma,
> - unsigned long address, pte_t *ptep,
> - pte_t entry, int dirty)
> -{
> - if (!pte_same(*ptep, entry))
> - __set_pte_at(ptep, entry);
> - /*
> - * update_mmu_cache will unconditionally execute, handling both
> - * the case that the PTE changed and the spurious fault case.
> - */
> - return true;
> -}
> +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* defined in mm/pgtable.c */
> +extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
> + pte_t *ptep, pte_t entry, int dirty);
> +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG /* defined in mm/pgtable.c */
> +extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address,
> + pte_t *ptep);
>
> #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
> static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> @@ -569,16 +562,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> return pte;
> }
>
> -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
> -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
> - unsigned long address,
> - pte_t *ptep)
> -{
> - if (!pte_young(*ptep))
> - return 0;
> - return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
> -}
> -
> #define __HAVE_ARCH_PTEP_SET_WRPROTECT
> static inline void ptep_set_wrprotect(struct mm_struct *mm,
> unsigned long address, pte_t *ptep)
> diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c
> index aa6209a74c83..b64bf1624a05 100644
> --- a/arch/riscv/kernel/efi.c
> +++ b/arch/riscv/kernel/efi.c
> @@ -60,7 +60,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
> static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
> {
> efi_memory_desc_t *md = data;
> - pte_t pte = READ_ONCE(*ptep);
> + pte_t pte = ptep_get(ptep);
> unsigned long val;
>
> if (md->attribute & EFI_MEMORY_RO) {
> diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
> index 068c74593871..a9e2fd7245e1 100644
> --- a/arch/riscv/kvm/mmu.c
> +++ b/arch/riscv/kvm/mmu.c
> @@ -103,7 +103,7 @@ static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
> *ptep_level = current_level;
> ptep = (pte_t *)kvm->arch.pgd;
> ptep = &ptep[gstage_pte_index(addr, current_level)];
> - while (ptep && pte_val(*ptep)) {
> + while (ptep && pte_val(ptep_get(ptep))) {
> if (gstage_pte_leaf(ptep)) {
> *ptep_level = current_level;
> *ptepp = ptep;
> @@ -113,7 +113,7 @@ static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
> if (current_level) {
> current_level--;
> *ptep_level = current_level;
> - ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
> + ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
> ptep = &ptep[gstage_pte_index(addr, current_level)];
> } else {
> ptep = NULL;
> @@ -149,25 +149,25 @@ static int gstage_set_pte(struct kvm *kvm, u32 level,
> if (gstage_pte_leaf(ptep))
> return -EEXIST;
>
> - if (!pte_val(*ptep)) {
> + if (!pte_val(ptep_get(ptep))) {
> if (!pcache)
> return -ENOMEM;
> next_ptep = kvm_mmu_memory_cache_alloc(pcache);
> if (!next_ptep)
> return -ENOMEM;
> - *ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
> - __pgprot(_PAGE_TABLE));
> + set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)),
> + __pgprot(_PAGE_TABLE)));
> } else {
> if (gstage_pte_leaf(ptep))
> return -EEXIST;
> - next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
> + next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
> }
>
> current_level--;
> ptep = &next_ptep[gstage_pte_index(addr, current_level)];
> }
>
> - *ptep = *new_pte;
> + set_pte(ptep, *new_pte);
> if (gstage_pte_leaf(ptep))
> gstage_remote_tlb_flush(kvm, current_level, addr);
>
> @@ -239,11 +239,11 @@ static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
>
> BUG_ON(addr & (page_size - 1));
>
> - if (!pte_val(*ptep))
> + if (!pte_val(ptep_get(ptep)))
> return;
>
> if (ptep_level && !gstage_pte_leaf(ptep)) {
> - next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
> + next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
> next_ptep_level = ptep_level - 1;
> ret = gstage_level_to_page_size(next_ptep_level,
> &next_page_size);
> @@ -261,7 +261,7 @@ static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
> if (op == GSTAGE_OP_CLEAR)
> set_pte(ptep, __pte(0));
> else if (op == GSTAGE_OP_WP)
> - set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE));
> + set_pte(ptep, __pte(pte_val(ptep_get(ptep)) & ~_PAGE_WRITE));
> gstage_remote_tlb_flush(kvm, ptep_level, addr);
> }
> }
> @@ -603,7 +603,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
> &ptep, &ptep_level))
> return false;
>
> - return pte_young(*ptep);
> + return pte_young(ptep_get(ptep));
> }
>
> int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
> diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
> index 90d4ba36d1d0..76f1df709a21 100644
> --- a/arch/riscv/mm/fault.c
> +++ b/arch/riscv/mm/fault.c
> @@ -136,24 +136,24 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
> pgd = (pgd_t *)pfn_to_virt(pfn) + index;
> pgd_k = init_mm.pgd + index;
>
> - if (!pgd_present(*pgd_k)) {
> + if (!pgd_present(pgdp_get(pgd_k))) {
> no_context(regs, addr);
> return;
> }
> - set_pgd(pgd, *pgd_k);
> + set_pgd(pgd, pgdp_get(pgd_k));
>
> p4d_k = p4d_offset(pgd_k, addr);
> - if (!p4d_present(*p4d_k)) {
> + if (!p4d_present(p4dp_get(p4d_k))) {
> no_context(regs, addr);
> return;
> }
>
> pud_k = pud_offset(p4d_k, addr);
> - if (!pud_present(*pud_k)) {
> + if (!pud_present(pudp_get(pud_k))) {
> no_context(regs, addr);
> return;
> }
> - if (pud_leaf(*pud_k))
> + if (pud_leaf(pudp_get(pud_k)))
> goto flush_tlb;
>
> /*
> @@ -161,11 +161,11 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
> * to copy individual PTEs
> */
> pmd_k = pmd_offset(pud_k, addr);
> - if (!pmd_present(*pmd_k)) {
> + if (!pmd_present(pmdp_get(pmd_k))) {
> no_context(regs, addr);
> return;
> }
> - if (pmd_leaf(*pmd_k))
> + if (pmd_leaf(pmdp_get(pmd_k)))
> goto flush_tlb;
>
> /*
> @@ -175,7 +175,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
> * silently loop forever.
> */
> pte_k = pte_offset_kernel(pmd_k, addr);
> - if (!pte_present(*pte_k)) {
> + if (!pte_present(ptep_get(pte_k))) {
> no_context(regs, addr);
> return;
> }
> diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
> index b52f0210481f..431596c0e20e 100644
> --- a/arch/riscv/mm/hugetlbpage.c
> +++ b/arch/riscv/mm/hugetlbpage.c
> @@ -54,7 +54,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
> }
>
> if (sz == PMD_SIZE) {
> - if (want_pmd_share(vma, addr) && pud_none(*pud))
> + if (want_pmd_share(vma, addr) && pud_none(pudp_get(pud)))
> pte = huge_pmd_share(mm, vma, addr, pud);
> else
> pte = (pte_t *)pmd_alloc(mm, pud, addr);
> @@ -93,11 +93,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
> pmd_t *pmd;
>
> pgd = pgd_offset(mm, addr);
> - if (!pgd_present(*pgd))
> + if (!pgd_present(pgdp_get(pgd)))
> return NULL;
>
> p4d = p4d_offset(pgd, addr);
> - if (!p4d_present(*p4d))
> + if (!p4d_present(p4dp_get(p4d)))
> return NULL;
>
> pud = pud_offset(p4d, addr);
> @@ -105,7 +105,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
> /* must be pud huge, non-present or none */
> return (pte_t *)pud;
>
> - if (!pud_present(*pud))
> + if (!pud_present(pudp_get(pud)))
> return NULL;
>
> pmd = pmd_offset(pud, addr);
> @@ -113,7 +113,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
> /* must be pmd huge, non-present or none */
> return (pte_t *)pmd;
>
> - if (!pmd_present(*pmd))
> + if (!pmd_present(pmdp_get(pmd)))
> return NULL;
>
> for_each_napot_order(order) {
> @@ -293,7 +293,7 @@ void huge_pte_clear(struct mm_struct *mm,
> pte_t *ptep,
> unsigned long sz)
> {
> - pte_t pte = READ_ONCE(*ptep);
> + pte_t pte = ptep_get(ptep);
> int i, pte_num;
>
> if (!pte_napot(pte)) {
> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
> index 5e39dcf23fdb..e96251853037 100644
> --- a/arch/riscv/mm/kasan_init.c
> +++ b/arch/riscv/mm/kasan_init.c
> @@ -31,7 +31,7 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
> phys_addr_t phys_addr;
> pte_t *ptep, *p;
>
> - if (pmd_none(*pmd)) {
> + if (pmd_none(pmdp_get(pmd))) {
> p = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
> set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(p)), PAGE_TABLE));
> }
> @@ -39,7 +39,7 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
> ptep = pte_offset_kernel(pmd, vaddr);
>
> do {
> - if (pte_none(*ptep)) {
> + if (pte_none(ptep_get(ptep))) {
> phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL));
> memset(__va(phys_addr), KASAN_SHADOW_INIT, PAGE_SIZE);
> @@ -53,7 +53,7 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned
> pmd_t *pmdp, *p;
> unsigned long next;
>
> - if (pud_none(*pud)) {
> + if (pud_none(pudp_get(pud))) {
> p = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
> set_pud(pud, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE));
> }
> @@ -63,7 +63,8 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned
> do {
> next = pmd_addr_end(vaddr, end);
>
> - if (pmd_none(*pmdp) && IS_ALIGNED(vaddr, PMD_SIZE) && (next - vaddr) >= PMD_SIZE) {
> + if (pmd_none(pmdp_get(pmdp)) && IS_ALIGNED(vaddr, PMD_SIZE) &&
> + (next - vaddr) >= PMD_SIZE) {
> phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE);
> if (phys_addr) {
> set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL));
> @@ -83,7 +84,7 @@ static void __init kasan_populate_pud(p4d_t *p4d,
> pud_t *pudp, *p;
> unsigned long next;
>
> - if (p4d_none(*p4d)) {
> + if (p4d_none(p4dp_get(p4d))) {
> p = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
> set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE));
> }
> @@ -93,7 +94,8 @@ static void __init kasan_populate_pud(p4d_t *p4d,
> do {
> next = pud_addr_end(vaddr, end);
>
> - if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
> + if (pud_none(pudp_get(pudp)) && IS_ALIGNED(vaddr, PUD_SIZE) &&
> + (next - vaddr) >= PUD_SIZE) {
> phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
> if (phys_addr) {
> set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
> @@ -113,7 +115,7 @@ static void __init kasan_populate_p4d(pgd_t *pgd,
> p4d_t *p4dp, *p;
> unsigned long next;
>
> - if (pgd_none(*pgd)) {
> + if (pgd_none(pgdp_get(pgd))) {
> p = memblock_alloc(PTRS_PER_P4D * sizeof(p4d_t), PAGE_SIZE);
> set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
> }
> @@ -123,7 +125,8 @@ static void __init kasan_populate_p4d(pgd_t *pgd,
> do {
> next = p4d_addr_end(vaddr, end);
>
> - if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) {
> + if (p4d_none(p4dp_get(p4dp)) && IS_ALIGNED(vaddr, P4D_SIZE) &&
> + (next - vaddr) >= P4D_SIZE) {
> phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE);
> if (phys_addr) {
> set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL));
> @@ -145,7 +148,7 @@ static void __init kasan_populate_pgd(pgd_t *pgdp,
> do {
> next = pgd_addr_end(vaddr, end);
>
> - if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
> + if (pgd_none(pgdp_get(pgdp)) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
> (next - vaddr) >= PGDIR_SIZE) {
> phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
> if (phys_addr) {
> @@ -168,7 +171,7 @@ static void __init kasan_early_clear_pud(p4d_t *p4dp,
> if (!pgtable_l4_enabled) {
> pudp = (pud_t *)p4dp;
> } else {
> - base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp)));
> + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(p4dp_get(p4dp))));
> pudp = base_pud + pud_index(vaddr);
> }
>
> @@ -193,7 +196,7 @@ static void __init kasan_early_clear_p4d(pgd_t *pgdp,
> if (!pgtable_l5_enabled) {
> p4dp = (p4d_t *)pgdp;
> } else {
> - base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp)));
> + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(pgdp_get(pgdp))));
> p4dp = base_p4d + p4d_index(vaddr);
> }
>
> @@ -239,14 +242,14 @@ static void __init kasan_early_populate_pud(p4d_t *p4dp,
> if (!pgtable_l4_enabled) {
> pudp = (pud_t *)p4dp;
> } else {
> - base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp)));
> + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(p4dp_get(p4dp))));
> pudp = base_pud + pud_index(vaddr);
> }
>
> do {
> next = pud_addr_end(vaddr, end);
>
> - if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) &&
> + if (pud_none(pudp_get(pudp)) && IS_ALIGNED(vaddr, PUD_SIZE) &&
> (next - vaddr) >= PUD_SIZE) {
> phys_addr = __pa((uintptr_t)kasan_early_shadow_pmd);
> set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
> @@ -277,14 +280,14 @@ static void __init kasan_early_populate_p4d(pgd_t *pgdp,
> if (!pgtable_l5_enabled) {
> p4dp = (p4d_t *)pgdp;
> } else {
> - base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp)));
> + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(pgdp_get(pgdp))));
> p4dp = base_p4d + p4d_index(vaddr);
> }
>
> do {
> next = p4d_addr_end(vaddr, end);
>
> - if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) &&
> + if (p4d_none(p4dp_get(p4dp)) && IS_ALIGNED(vaddr, P4D_SIZE) &&
> (next - vaddr) >= P4D_SIZE) {
> phys_addr = __pa((uintptr_t)kasan_early_shadow_pud);
> set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE));
> @@ -305,7 +308,7 @@ static void __init kasan_early_populate_pgd(pgd_t *pgdp,
> do {
> next = pgd_addr_end(vaddr, end);
>
> - if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
> + if (pgd_none(pgdp_get(pgdp)) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
> (next - vaddr) >= PGDIR_SIZE) {
> phys_addr = __pa((uintptr_t)kasan_early_shadow_p4d);
> set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
> @@ -381,7 +384,7 @@ static void __init kasan_shallow_populate_pud(p4d_t *p4d,
> do {
> next = pud_addr_end(vaddr, end);
>
> - if (pud_none(*pud_k)) {
> + if (pud_none(pudp_get(pud_k))) {
> p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> set_pud(pud_k, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE));
> continue;
> @@ -401,7 +404,7 @@ static void __init kasan_shallow_populate_p4d(pgd_t *pgd,
> do {
> next = p4d_addr_end(vaddr, end);
>
> - if (p4d_none(*p4d_k)) {
> + if (p4d_none(p4dp_get(p4d_k))) {
> p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> set_p4d(p4d_k, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE));
> continue;
> @@ -420,7 +423,7 @@ static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long
> do {
> next = pgd_addr_end(vaddr, end);
>
> - if (pgd_none(*pgd_k)) {
> + if (pgd_none(pgdp_get(pgd_k))) {
> p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
> continue;
> @@ -451,7 +454,7 @@ static void __init create_tmp_mapping(void)
>
> /* Copy the last p4d since it is shared with the kernel mapping. */
> if (pgtable_l5_enabled) {
> - ptr = (p4d_t *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
> + ptr = (p4d_t *)pgd_page_vaddr(pgdp_get(pgd_offset_k(KASAN_SHADOW_END)));
> memcpy(tmp_p4d, ptr, sizeof(p4d_t) * PTRS_PER_P4D);
> set_pgd(&tmp_pg_dir[pgd_index(KASAN_SHADOW_END)],
> pfn_pgd(PFN_DOWN(__pa(tmp_p4d)), PAGE_TABLE));
> @@ -462,7 +465,7 @@ static void __init create_tmp_mapping(void)
>
> /* Copy the last pud since it is shared with the kernel mapping. */
> if (pgtable_l4_enabled) {
> - ptr = (pud_t *)p4d_page_vaddr(*(base_p4d + p4d_index(KASAN_SHADOW_END)));
> + ptr = (pud_t *)p4d_page_vaddr(p4dp_get(base_p4d + p4d_index(KASAN_SHADOW_END)));
> memcpy(tmp_pud, ptr, sizeof(pud_t) * PTRS_PER_PUD);
> set_p4d(&base_p4d[p4d_index(KASAN_SHADOW_END)],
> pfn_p4d(PFN_DOWN(__pa(tmp_pud)), PAGE_TABLE));
> diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> index fc5fc4f785c4..0b5e38e018c8 100644
> --- a/arch/riscv/mm/pageattr.c
> +++ b/arch/riscv/mm/pageattr.c
> @@ -29,7 +29,7 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
> static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
> unsigned long next, struct mm_walk *walk)
> {
> - p4d_t val = READ_ONCE(*p4d);
> + p4d_t val = p4dp_get(p4d);
>
> if (p4d_leaf(val)) {
> val = __p4d(set_pageattr_masks(p4d_val(val), walk));
> @@ -42,7 +42,7 @@ static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
> static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
> unsigned long next, struct mm_walk *walk)
> {
> - pud_t val = READ_ONCE(*pud);
> + pud_t val = pudp_get(pud);
>
> if (pud_leaf(val)) {
> val = __pud(set_pageattr_masks(pud_val(val), walk));
> @@ -55,7 +55,7 @@ static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
> static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
> unsigned long next, struct mm_walk *walk)
> {
> - pmd_t val = READ_ONCE(*pmd);
> + pmd_t val = pmdp_get(pmd);
>
> if (pmd_leaf(val)) {
> val = __pmd(set_pageattr_masks(pmd_val(val), walk));
> @@ -68,7 +68,7 @@ static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
> static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
> unsigned long next, struct mm_walk *walk)
> {
> - pte_t val = READ_ONCE(*pte);
> + pte_t val = ptep_get(pte);
>
> val = __pte(set_pageattr_masks(pte_val(val), walk));
> set_pte(pte, val);
> @@ -108,10 +108,10 @@ static int __split_linear_mapping_pmd(pud_t *pudp,
> vaddr <= (vaddr & PMD_MASK) && end >= next)
> continue;
>
> - if (pmd_leaf(*pmdp)) {
> + if (pmd_leaf(pmdp_get(pmdp))) {
> struct page *pte_page;
> - unsigned long pfn = _pmd_pfn(*pmdp);
> - pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK);
> + unsigned long pfn = _pmd_pfn(pmdp_get(pmdp));
> + pgprot_t prot = __pgprot(pmd_val(pmdp_get(pmdp)) & ~_PAGE_PFN_MASK);
> pte_t *ptep_new;
> int i;
>
> @@ -148,10 +148,10 @@ static int __split_linear_mapping_pud(p4d_t *p4dp,
> vaddr <= (vaddr & PUD_MASK) && end >= next)
> continue;
>
> - if (pud_leaf(*pudp)) {
> + if (pud_leaf(pudp_get(pudp))) {
> struct page *pmd_page;
> - unsigned long pfn = _pud_pfn(*pudp);
> - pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK);
> + unsigned long pfn = _pud_pfn(pudp_get(pudp));
> + pgprot_t prot = __pgprot(pud_val(pudp_get(pudp)) & ~_PAGE_PFN_MASK);
> pmd_t *pmdp_new;
> int i;
>
> @@ -197,10 +197,10 @@ static int __split_linear_mapping_p4d(pgd_t *pgdp,
> vaddr <= (vaddr & P4D_MASK) && end >= next)
> continue;
>
> - if (p4d_leaf(*p4dp)) {
> + if (p4d_leaf(p4dp_get(p4dp))) {
> struct page *pud_page;
> - unsigned long pfn = _p4d_pfn(*p4dp);
> - pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK);
> + unsigned long pfn = _p4d_pfn(p4dp_get(p4dp));
> + pgprot_t prot = __pgprot(p4d_val(p4dp_get(p4dp)) & ~_PAGE_PFN_MASK);
> pud_t *pudp_new;
> int i;
>
> @@ -406,29 +406,29 @@ bool kernel_page_present(struct page *page)
> pte_t *pte;
>
> pgd = pgd_offset_k(addr);
> - if (!pgd_present(*pgd))
> + if (!pgd_present(pgdp_get(pgd)))
> return false;
> - if (pgd_leaf(*pgd))
> + if (pgd_leaf(pgdp_get(pgd)))
> return true;
>
> p4d = p4d_offset(pgd, addr);
> - if (!p4d_present(*p4d))
> + if (!p4d_present(p4dp_get(p4d)))
> return false;
> - if (p4d_leaf(*p4d))
> + if (p4d_leaf(p4dp_get(p4d)))
> return true;
>
> pud = pud_offset(p4d, addr);
> - if (!pud_present(*pud))
> + if (!pud_present(pudp_get(pud)))
> return false;
> - if (pud_leaf(*pud))
> + if (pud_leaf(pudp_get(pud)))
> return true;
>
> pmd = pmd_offset(pud, addr);
> - if (!pmd_present(*pmd))
> + if (!pmd_present(pmdp_get(pmd)))
> return false;
> - if (pmd_leaf(*pmd))
> + if (pmd_leaf(pmdp_get(pmd)))
> return true;
>
> pte = pte_offset_kernel(pmd, addr);
> - return pte_present(*pte);
> + return pte_present(ptep_get(pte));
> }
> diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
> index fef4e7328e49..ef887efcb679 100644
> --- a/arch/riscv/mm/pgtable.c
> +++ b/arch/riscv/mm/pgtable.c
> @@ -5,6 +5,47 @@
> #include <linux/kernel.h>
> #include <linux/pgtable.h>
>
> +int ptep_set_access_flags(struct vm_area_struct *vma,
> + unsigned long address, pte_t *ptep,
> + pte_t entry, int dirty)
> +{
> + if (!pte_same(ptep_get(ptep), entry))
> + __set_pte_at(ptep, entry);
> + /*
> + * update_mmu_cache will unconditionally execute, handling both
> + * the case that the PTE changed and the spurious fault case.
> + */
> + return true;
> +}
> +
> +int ptep_test_and_clear_young(struct vm_area_struct *vma,
> + unsigned long address,
> + pte_t *ptep)
> +{
> + if (!pte_young(ptep_get(ptep)))
> + return 0;
> + return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
> +}
> +EXPORT_SYMBOL_GPL(ptep_test_and_clear_young);
> +
> +#ifdef CONFIG_64BIT
> +pud_t *pud_offset(p4d_t *p4d, unsigned long address)
> +{
> + if (pgtable_l4_enabled)
> + return p4d_pgtable(p4dp_get(p4d)) + pud_index(address);
> +
> + return (pud_t *)p4d;
> +}
> +
> +p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
> +{
> + if (pgtable_l5_enabled)
> + return pgd_pgtable(pgdp_get(pgd)) + p4d_index(address);
> +
> + return (p4d_t *)pgd;
> +}
> +#endif
> +
> #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
> {
> @@ -25,7 +66,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
>
> int pud_clear_huge(pud_t *pud)
> {
> - if (!pud_leaf(READ_ONCE(*pud)))
> + if (!pud_leaf(pudp_get(pud)))
> return 0;
> pud_clear(pud);
> return 1;
> @@ -33,7 +74,7 @@ int pud_clear_huge(pud_t *pud)
>
> int pud_free_pmd_page(pud_t *pud, unsigned long addr)
> {
> - pmd_t *pmd = pud_pgtable(*pud);
> + pmd_t *pmd = pud_pgtable(pudp_get(pud));
> int i;
>
> pud_clear(pud);
> @@ -63,7 +104,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
>
> int pmd_clear_huge(pmd_t *pmd)
> {
> - if (!pmd_leaf(READ_ONCE(*pmd)))
> + if (!pmd_leaf(pmdp_get(pmd)))
> return 0;
> pmd_clear(pmd);
> return 1;
> @@ -71,7 +112,7 @@ int pmd_clear_huge(pmd_t *pmd)
>
> int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
> {
> - pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
> + pte_t *pte = (pte_t *)pmd_page_vaddr(pmdp_get(pmd));
>
> pmd_clear(pmd);
>
> @@ -88,7 +129,7 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
> pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
>
> VM_BUG_ON(address & ~HPAGE_PMD_MASK);
> - VM_BUG_ON(pmd_trans_huge(*pmdp));
> + VM_BUG_ON(pmd_trans_huge(pmdp_get(pmdp)));
> /*
> * When leaf PTE entries (regular pages) are collapsed into a leaf
> * PMD entry (huge page), a valid non-leaf PTE is converted into a
> --
> 2.39.2
>

Subject: Re: [PATCH v2 0/4] riscv: Use READ_ONCE()/WRITE_ONCE() for pte accesses

Hello:

This series was applied to riscv/linux.git (for-next)
by Palmer Dabbelt <[email protected]>:

On Wed, 13 Dec 2023 21:29:57 +0100 you wrote:
> This series is a follow-up for riscv of a recent series from Ryan [1] which
> converts all direct dereferences of pte_t into a ptet_get() access.
>
> The goal here for riscv is to use READ_ONCE()/WRITE_ONCE() for all page
> table entries accesses to avoid any compiler transformation when the
> hardware can concurrently modify the page tables entries (A/D bits for
> example).
>
> [...]

Here is the summary with links:
- [v2,1/4] riscv: Use WRITE_ONCE() when setting page table entries
https://git.kernel.org/riscv/c/c30fa83b4989
- [v2,2/4] mm: Introduce pudp/p4dp/pgdp_get() functions
https://git.kernel.org/riscv/c/eba2591d99d1
- [v2,3/4] riscv: mm: Only compile pgtable.c if MMU
https://git.kernel.org/riscv/c/d6508999d188
- [v2,4/4] riscv: Use accessors to page table entries instead of direct dereference
https://git.kernel.org/riscv/c/edf955647269

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html