2023-03-15 05:15:59

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH v4 16/36] mips: Implement the new page table range API

Rename _PFN_SHIFT to PFN_PTE_SHIFT. Convert a few places
to call set_pte() instead of set_pte_at(). Add set_ptes(),
update_mmu_cache_range(), flush_icache_pages() and flush_dcache_folio().
Change the PG_arch_1 (aka PG_dcache_dirty) flag from being per-page
to per-folio.

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
Cc: Thomas Bogendoerfer <[email protected]>
Cc: [email protected]
---
arch/mips/bcm47xx/prom.c | 2 +-
arch/mips/include/asm/cacheflush.h | 32 ++++++++++------
arch/mips/include/asm/pgtable-32.h | 10 ++---
arch/mips/include/asm/pgtable-64.h | 6 +--
arch/mips/include/asm/pgtable-bits.h | 6 +--
arch/mips/include/asm/pgtable.h | 44 +++++++++++++---------
arch/mips/mm/c-r4k.c | 5 ++-
arch/mips/mm/cache.c | 56 ++++++++++++++--------------
arch/mips/mm/init.c | 21 +++++++----
arch/mips/mm/pgtable-32.c | 2 +-
arch/mips/mm/pgtable-64.c | 2 +-
arch/mips/mm/tlbex.c | 2 +-
12 files changed, 107 insertions(+), 81 deletions(-)

diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c
index a9bea411d928..99a1ba5394e0 100644
--- a/arch/mips/bcm47xx/prom.c
+++ b/arch/mips/bcm47xx/prom.c
@@ -116,7 +116,7 @@ void __init prom_init(void)
#if defined(CONFIG_BCM47XX_BCMA) && defined(CONFIG_HIGHMEM)

#define EXTVBASE 0xc0000000
-#define ENTRYLO(x) ((pte_val(pfn_pte((x) >> _PFN_SHIFT, PAGE_KERNEL_UNCACHED)) >> 6) | 1)
+#define ENTRYLO(x) ((pte_val(pfn_pte((x) >> PFN_PTE_SHIFT, PAGE_KERNEL_UNCACHED)) >> 6) | 1)

#include <asm/tlbflush.h>

diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index b3dc9c589442..2683cade42ef 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -36,12 +36,12 @@
*/
#define PG_dcache_dirty PG_arch_1

-#define Page_dcache_dirty(page) \
- test_bit(PG_dcache_dirty, &(page)->flags)
-#define SetPageDcacheDirty(page) \
- set_bit(PG_dcache_dirty, &(page)->flags)
-#define ClearPageDcacheDirty(page) \
- clear_bit(PG_dcache_dirty, &(page)->flags)
+#define folio_test_dcache_dirty(folio) \
+ test_bit(PG_dcache_dirty, &(folio)->flags)
+#define folio_set_dcache_dirty(folio) \
+ set_bit(PG_dcache_dirty, &(folio)->flags)
+#define folio_clear_dcache_dirty(folio) \
+ clear_bit(PG_dcache_dirty, &(folio)->flags)

extern void (*flush_cache_all)(void);
extern void (*__flush_cache_all)(void);
@@ -50,15 +50,24 @@ extern void (*flush_cache_mm)(struct mm_struct *mm);
extern void (*flush_cache_range)(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
-extern void __flush_dcache_page(struct page *page);
+extern void __flush_dcache_pages(struct page *page, unsigned int nr);

#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+static inline void flush_dcache_folio(struct folio *folio)
+{
+ if (cpu_has_dc_aliases)
+ __flush_dcache_pages(&folio->page, folio_nr_pages(folio));
+ else if (!cpu_has_ic_fills_f_dc)
+ folio_set_dcache_dirty(folio);
+}
+#define flush_dcache_folio flush_dcache_folio
+
static inline void flush_dcache_page(struct page *page)
{
if (cpu_has_dc_aliases)
- __flush_dcache_page(page);
+ __flush_dcache_pages(page, 1);
else if (!cpu_has_ic_fills_f_dc)
- SetPageDcacheDirty(page);
+ folio_set_dcache_dirty(page_folio(page));
}

#define flush_dcache_mmap_lock(mapping) do { } while (0)
@@ -73,10 +82,11 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
__flush_anon_page(page, vmaddr);
}

-static inline void flush_icache_page(struct vm_area_struct *vma,
- struct page *page)
+static inline void flush_icache_pages(struct vm_area_struct *vma,
+ struct page *page, unsigned int nr)
{
}
+#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1)

extern void (*flush_icache_range)(unsigned long start, unsigned long end);
extern void (*local_flush_icache_range)(unsigned long start, unsigned long end);
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index ba0016709a1a..0e196650f4f4 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -153,7 +153,7 @@ static inline void pmd_clear(pmd_t *pmdp)
#if defined(CONFIG_XPA)

#define MAX_POSSIBLE_PHYSMEM_BITS 40
-#define pte_pfn(x) (((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT))
+#define pte_pfn(x) (((unsigned long)((x).pte_high >> PFN_PTE_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT))
static inline pte_t
pfn_pte(unsigned long pfn, pgprot_t prot)
{
@@ -161,7 +161,7 @@ pfn_pte(unsigned long pfn, pgprot_t prot)

pte.pte_low = (pfn >> _PAGE_PRESENT_SHIFT) |
(pgprot_val(prot) & ~_PFNX_MASK);
- pte.pte_high = (pfn << _PFN_SHIFT) |
+ pte.pte_high = (pfn << PFN_PTE_SHIFT) |
(pgprot_val(prot) & ~_PFN_MASK);
return pte;
}
@@ -184,9 +184,9 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
#else

#define MAX_POSSIBLE_PHYSMEM_BITS 32
-#define pte_pfn(x) ((unsigned long)((x).pte >> _PFN_SHIFT))
-#define pfn_pte(pfn, prot) __pte(((unsigned long long)(pfn) << _PFN_SHIFT) | pgprot_val(prot))
-#define pfn_pmd(pfn, prot) __pmd(((unsigned long long)(pfn) << _PFN_SHIFT) | pgprot_val(prot))
+#define pte_pfn(x) ((unsigned long)((x).pte >> PFN_PTE_SHIFT))
+#define pfn_pte(pfn, prot) __pte(((unsigned long long)(pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
+#define pfn_pmd(pfn, prot) __pmd(((unsigned long long)(pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
#endif /* defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) */

#define pte_page(x) pfn_to_page(pte_pfn(x))
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 98e24e3e7f2b..20ca48c1b606 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -298,9 +298,9 @@ static inline void pud_clear(pud_t *pudp)

#define pte_page(x) pfn_to_page(pte_pfn(x))

-#define pte_pfn(x) ((unsigned long)((x).pte >> _PFN_SHIFT))
-#define pfn_pte(pfn, prot) __pte(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
-#define pfn_pmd(pfn, prot) __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
+#define pte_pfn(x) ((unsigned long)((x).pte >> PFN_PTE_SHIFT))
+#define pfn_pte(pfn, prot) __pte(((pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
+#define pfn_pmd(pfn, prot) __pmd(((pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))

#ifndef __PAGETABLE_PMD_FOLDED
static inline pmd_t *pud_pgtable(pud_t pud)
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index 2362842ee2b5..744abba9111f 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -182,10 +182,10 @@ enum pgtable_bits {
#if defined(CONFIG_CPU_R3K_TLB)
# define _CACHE_UNCACHED (1 << _CACHE_UNCACHED_SHIFT)
# define _CACHE_MASK _CACHE_UNCACHED
-# define _PFN_SHIFT PAGE_SHIFT
+# define PFN_PTE_SHIFT PAGE_SHIFT
#else
# define _CACHE_MASK (7 << _CACHE_SHIFT)
-# define _PFN_SHIFT (PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
+# define PFN_PTE_SHIFT (PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
#endif

#ifndef _PAGE_NO_EXEC
@@ -195,7 +195,7 @@ enum pgtable_bits {
#define _PAGE_SILENT_READ _PAGE_VALID
#define _PAGE_SILENT_WRITE _PAGE_DIRTY

-#define _PFN_MASK (~((1 << (_PFN_SHIFT)) - 1))
+#define _PFN_MASK (~((1 << (PFN_PTE_SHIFT)) - 1))

/*
* The final layouts of the PTE bits are:
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 574fa14ac8b2..cfcd6a8ba8ef 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -66,7 +66,7 @@ extern void paging_init(void);

static inline unsigned long pmd_pfn(pmd_t pmd)
{
- return pmd_val(pmd) >> _PFN_SHIFT;
+ return pmd_val(pmd) >> PFN_PTE_SHIFT;
}

#ifndef CONFIG_MIPS_HUGE_TLB_SUPPORT
@@ -105,9 +105,6 @@ do { \
} \
} while(0)

-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval);
-
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)

#ifdef CONFIG_XPA
@@ -157,7 +154,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
null.pte_low = null.pte_high = _PAGE_GLOBAL;
}

- set_pte_at(mm, addr, ptep, null);
+ set_pte(ptep, null);
htw_start();
}
#else
@@ -196,28 +193,41 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
#if !defined(CONFIG_CPU_R3K_TLB)
/* Preserve global status for the pair */
if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL)
- set_pte_at(mm, addr, ptep, __pte(_PAGE_GLOBAL));
+ set_pte(ptep, __pte(_PAGE_GLOBAL));
else
#endif
- set_pte_at(mm, addr, ptep, __pte(0));
+ set_pte(ptep, __pte(0));
htw_start();
}
#endif

-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval)
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
{
+ unsigned int i;
+ bool do_sync = false;

- if (!pte_present(pteval))
- goto cache_sync_done;
+ for (i = 0; i < nr; i++) {
+ if (!pte_present(pte))
+ continue;
+ if (pte_present(ptep[i]) &&
+ (pte_pfn(ptep[i]) == pte_pfn(pte)))
+ continue;
+ do_sync = true;
+ }

- if (pte_present(*ptep) && (pte_pfn(*ptep) == pte_pfn(pteval)))
- goto cache_sync_done;
+ if (do_sync)
+ __update_cache(addr, pte);

- __update_cache(addr, pteval);
-cache_sync_done:
- set_pte(ptep, pteval);
+ for (;;) {
+ set_pte(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte_val(pte) += 1 << PFN_PTE_SHIFT;
+ }
}
+#define set_ptes set_ptes

/*
* (pmds are folded into puds so this doesn't get actually called,
@@ -486,7 +496,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
pte_t entry, int dirty)
{
if (!pte_same(*ptep, entry))
- set_pte_at(vma->vm_mm, address, ptep, entry);
+ set_pte(ptep, entry);
/*
* update_mmu_cache will unconditionally execute, handling both
* the case that the PTE changed and the spurious fault case.
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index a549fa98c2f4..7d2a42f0cffd 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -679,13 +679,14 @@ static inline void local_r4k_flush_cache_page(void *args)
if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID))
vaddr = NULL;
else {
+ struct folio *folio = page_folio(page);
/*
* Use kmap_coherent or kmap_atomic to do flushes for
* another ASID than the current one.
*/
map_coherent = (cpu_has_dc_aliases &&
- page_mapcount(page) &&
- !Page_dcache_dirty(page));
+ folio_mapped(folio) &&
+ !folio_test_dcache_dirty(folio));
if (map_coherent)
vaddr = kmap_coherent(page, addr);
else
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 11b3e7ddafd5..0668435521fc 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -82,13 +82,15 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
return 0;
}

-void __flush_dcache_page(struct page *page)
+void __flush_dcache_pages(struct page *page, unsigned int nr)
{
- struct address_space *mapping = page_mapping_file(page);
+ struct folio *folio = page_folio(page);
+ struct address_space *mapping = folio_flush_mapping(folio);
unsigned long addr;
+ unsigned int i;

if (mapping && !mapping_mapped(mapping)) {
- SetPageDcacheDirty(page);
+ folio_set_dcache_dirty(folio);
return;
}

@@ -97,25 +99,21 @@ void __flush_dcache_page(struct page *page)
* case is for exec env/arg pages and those are %99 certainly going to
* get faulted into the tlb (and thus flushed) anyways.
*/
- if (PageHighMem(page))
- addr = (unsigned long)kmap_atomic(page);
- else
- addr = (unsigned long)page_address(page);
-
- flush_data_cache_page(addr);
-
- if (PageHighMem(page))
- kunmap_atomic((void *)addr);
+ for (i = 0; i < nr; i++) {
+ addr = (unsigned long)kmap_local_page(page + i);
+ flush_data_cache_page(addr);
+ kunmap_local((void *)addr);
+ }
}
-
-EXPORT_SYMBOL(__flush_dcache_page);
+EXPORT_SYMBOL(__flush_dcache_pages);

void __flush_anon_page(struct page *page, unsigned long vmaddr)
{
unsigned long addr = (unsigned long) page_address(page);
+ struct folio *folio = page_folio(page);

if (pages_do_alias(addr, vmaddr)) {
- if (page_mapcount(page) && !Page_dcache_dirty(page)) {
+ if (folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
void *kaddr;

kaddr = kmap_coherent(page, vmaddr);
@@ -130,27 +128,29 @@ EXPORT_SYMBOL(__flush_anon_page);

void __update_cache(unsigned long address, pte_t pte)
{
- struct page *page;
+ struct folio *folio;
unsigned long pfn, addr;
int exec = !pte_no_exec(pte) && !cpu_has_ic_fills_f_dc;
+ unsigned int i;

pfn = pte_pfn(pte);
if (unlikely(!pfn_valid(pfn)))
return;
- page = pfn_to_page(pfn);
- if (Page_dcache_dirty(page)) {
- if (PageHighMem(page))
- addr = (unsigned long)kmap_atomic(page);
- else
- addr = (unsigned long)page_address(page);
-
- if (exec || pages_do_alias(addr, address & PAGE_MASK))
- flush_data_cache_page(addr);

- if (PageHighMem(page))
- kunmap_atomic((void *)addr);
+ folio = page_folio(pfn_to_page(pfn));
+ address &= PAGE_MASK;
+ address -= offset_in_folio(folio, pfn << PAGE_SHIFT);
+
+ if (folio_test_dcache_dirty(folio)) {
+ for (i = 0; i < folio_nr_pages(folio); i++) {
+ addr = (unsigned long)kmap_local_folio(folio, i);

- ClearPageDcacheDirty(page);
+ if (exec || pages_do_alias(addr, address))
+ flush_data_cache_page(addr);
+ kunmap_local((void *)addr);
+ address += PAGE_SIZE;
+ }
+ folio_clear_dcache_dirty(folio);
}
}

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 5a8002839550..5dcb525a8995 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -88,7 +88,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
pte_t pte;
int tlbidx;

- BUG_ON(Page_dcache_dirty(page));
+ BUG_ON(folio_test_dcache_dirty(page_folio(page)));

preempt_disable();
pagefault_disable();
@@ -169,11 +169,12 @@ void kunmap_coherent(void)
void copy_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
{
+ struct folio *src = page_folio(from);
void *vfrom, *vto;

vto = kmap_atomic(to);
if (cpu_has_dc_aliases &&
- page_mapcount(from) && !Page_dcache_dirty(from)) {
+ folio_mapped(src) && !folio_test_dcache_dirty(src)) {
vfrom = kmap_coherent(from, vaddr);
copy_page(vto, vfrom);
kunmap_coherent();
@@ -194,15 +195,17 @@ void copy_to_user_page(struct vm_area_struct *vma,
struct page *page, unsigned long vaddr, void *dst, const void *src,
unsigned long len)
{
+ struct folio *folio = page_folio(page);
+
if (cpu_has_dc_aliases &&
- page_mapcount(page) && !Page_dcache_dirty(page)) {
+ folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
memcpy(vto, src, len);
kunmap_coherent();
} else {
memcpy(dst, src, len);
if (cpu_has_dc_aliases)
- SetPageDcacheDirty(page);
+ folio_set_dcache_dirty(folio);
}
if (vma->vm_flags & VM_EXEC)
flush_cache_page(vma, vaddr, page_to_pfn(page));
@@ -212,15 +215,17 @@ void copy_from_user_page(struct vm_area_struct *vma,
struct page *page, unsigned long vaddr, void *dst, const void *src,
unsigned long len)
{
+ struct folio *folio = page_folio(page);
+
if (cpu_has_dc_aliases &&
- page_mapcount(page) && !Page_dcache_dirty(page)) {
+ folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
memcpy(dst, vfrom, len);
kunmap_coherent();
} else {
memcpy(dst, src, len);
if (cpu_has_dc_aliases)
- SetPageDcacheDirty(page);
+ folio_set_dcache_dirty(folio);
}
}
EXPORT_SYMBOL_GPL(copy_from_user_page);
@@ -448,10 +453,10 @@ static inline void __init mem_init_free_highmem(void)
void __init mem_init(void)
{
/*
- * When _PFN_SHIFT is greater than PAGE_SHIFT we won't have enough PTE
+ * When PFN_PTE_SHIFT is greater than PAGE_SHIFT we won't have enough PTE
* bits to hold a full 32b physical address on MIPS32 systems.
*/
- BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (_PFN_SHIFT > PAGE_SHIFT));
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT));

#ifdef CONFIG_HIGHMEM
max_mapnr = highend_pfn ? highend_pfn : max_low_pfn;
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index f57fb69472f8..84dd5136d53a 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -35,7 +35,7 @@ pmd_t mk_pmd(struct page *page, pgprot_t prot)
{
pmd_t pmd;

- pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot);
+ pmd_val(pmd) = (page_to_pfn(page) << PFN_PTE_SHIFT) | pgprot_val(prot);

return pmd;
}
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index b4386a0e2ef8..c76d21f7dffb 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -93,7 +93,7 @@ pmd_t mk_pmd(struct page *page, pgprot_t prot)
{
pmd_t pmd;

- pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot);
+ pmd_val(pmd) = (page_to_pfn(page) << PFN_PTE_SHIFT) | pgprot_val(prot);

return pmd;
}
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 80e05ee98d62..1393a11af539 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -253,7 +253,7 @@ static void output_pgtable_bits_defines(void)
pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT);
pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT);
pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT);
- pr_define("_PFN_SHIFT %d\n", _PFN_SHIFT);
+ pr_define("PFN_PTE_SHIFT %d\n", PFN_PTE_SHIFT);
pr_debug("\n");
}

--
2.39.2



2023-03-15 10:09:22

by Mike Rapoport

[permalink] [raw]
Subject: Re: [PATCH v4 16/36] mips: Implement the new page table range API

On Wed, Mar 15, 2023 at 05:14:24AM +0000, Matthew Wilcox (Oracle) wrote:
> Rename _PFN_SHIFT to PFN_PTE_SHIFT. Convert a few places
> to call set_pte() instead of set_pte_at(). Add set_ptes(),
> update_mmu_cache_range(), flush_icache_pages() and flush_dcache_folio().
> Change the PG_arch_1 (aka PG_dcache_dirty) flag from being per-page
> to per-folio.
>
> Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
> Cc: Thomas Bogendoerfer <[email protected]>
> Cc: [email protected]

Acked-by: Mike Rapoport (IBM) <[email protected]>

> ---
> arch/mips/bcm47xx/prom.c | 2 +-
> arch/mips/include/asm/cacheflush.h | 32 ++++++++++------
> arch/mips/include/asm/pgtable-32.h | 10 ++---
> arch/mips/include/asm/pgtable-64.h | 6 +--
> arch/mips/include/asm/pgtable-bits.h | 6 +--
> arch/mips/include/asm/pgtable.h | 44 +++++++++++++---------
> arch/mips/mm/c-r4k.c | 5 ++-
> arch/mips/mm/cache.c | 56 ++++++++++++++--------------
> arch/mips/mm/init.c | 21 +++++++----
> arch/mips/mm/pgtable-32.c | 2 +-
> arch/mips/mm/pgtable-64.c | 2 +-
> arch/mips/mm/tlbex.c | 2 +-
> 12 files changed, 107 insertions(+), 81 deletions(-)
>
> diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c
> index a9bea411d928..99a1ba5394e0 100644
> --- a/arch/mips/bcm47xx/prom.c
> +++ b/arch/mips/bcm47xx/prom.c
> @@ -116,7 +116,7 @@ void __init prom_init(void)
> #if defined(CONFIG_BCM47XX_BCMA) && defined(CONFIG_HIGHMEM)
>
> #define EXTVBASE 0xc0000000
> -#define ENTRYLO(x) ((pte_val(pfn_pte((x) >> _PFN_SHIFT, PAGE_KERNEL_UNCACHED)) >> 6) | 1)
> +#define ENTRYLO(x) ((pte_val(pfn_pte((x) >> PFN_PTE_SHIFT, PAGE_KERNEL_UNCACHED)) >> 6) | 1)
>
> #include <asm/tlbflush.h>
>
> diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
> index b3dc9c589442..2683cade42ef 100644
> --- a/arch/mips/include/asm/cacheflush.h
> +++ b/arch/mips/include/asm/cacheflush.h
> @@ -36,12 +36,12 @@
> */
> #define PG_dcache_dirty PG_arch_1
>
> -#define Page_dcache_dirty(page) \
> - test_bit(PG_dcache_dirty, &(page)->flags)
> -#define SetPageDcacheDirty(page) \
> - set_bit(PG_dcache_dirty, &(page)->flags)
> -#define ClearPageDcacheDirty(page) \
> - clear_bit(PG_dcache_dirty, &(page)->flags)
> +#define folio_test_dcache_dirty(folio) \
> + test_bit(PG_dcache_dirty, &(folio)->flags)
> +#define folio_set_dcache_dirty(folio) \
> + set_bit(PG_dcache_dirty, &(folio)->flags)
> +#define folio_clear_dcache_dirty(folio) \
> + clear_bit(PG_dcache_dirty, &(folio)->flags)
>
> extern void (*flush_cache_all)(void);
> extern void (*__flush_cache_all)(void);
> @@ -50,15 +50,24 @@ extern void (*flush_cache_mm)(struct mm_struct *mm);
> extern void (*flush_cache_range)(struct vm_area_struct *vma,
> unsigned long start, unsigned long end);
> extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
> -extern void __flush_dcache_page(struct page *page);
> +extern void __flush_dcache_pages(struct page *page, unsigned int nr);
>
> #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
> +static inline void flush_dcache_folio(struct folio *folio)
> +{
> + if (cpu_has_dc_aliases)
> + __flush_dcache_pages(&folio->page, folio_nr_pages(folio));
> + else if (!cpu_has_ic_fills_f_dc)
> + folio_set_dcache_dirty(folio);
> +}
> +#define flush_dcache_folio flush_dcache_folio
> +
> static inline void flush_dcache_page(struct page *page)
> {
> if (cpu_has_dc_aliases)
> - __flush_dcache_page(page);
> + __flush_dcache_pages(page, 1);
> else if (!cpu_has_ic_fills_f_dc)
> - SetPageDcacheDirty(page);
> + folio_set_dcache_dirty(page_folio(page));
> }
>
> #define flush_dcache_mmap_lock(mapping) do { } while (0)
> @@ -73,10 +82,11 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
> __flush_anon_page(page, vmaddr);
> }
>
> -static inline void flush_icache_page(struct vm_area_struct *vma,
> - struct page *page)
> +static inline void flush_icache_pages(struct vm_area_struct *vma,
> + struct page *page, unsigned int nr)
> {
> }
> +#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1)
>
> extern void (*flush_icache_range)(unsigned long start, unsigned long end);
> extern void (*local_flush_icache_range)(unsigned long start, unsigned long end);
> diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
> index ba0016709a1a..0e196650f4f4 100644
> --- a/arch/mips/include/asm/pgtable-32.h
> +++ b/arch/mips/include/asm/pgtable-32.h
> @@ -153,7 +153,7 @@ static inline void pmd_clear(pmd_t *pmdp)
> #if defined(CONFIG_XPA)
>
> #define MAX_POSSIBLE_PHYSMEM_BITS 40
> -#define pte_pfn(x) (((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT))
> +#define pte_pfn(x) (((unsigned long)((x).pte_high >> PFN_PTE_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT))
> static inline pte_t
> pfn_pte(unsigned long pfn, pgprot_t prot)
> {
> @@ -161,7 +161,7 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
>
> pte.pte_low = (pfn >> _PAGE_PRESENT_SHIFT) |
> (pgprot_val(prot) & ~_PFNX_MASK);
> - pte.pte_high = (pfn << _PFN_SHIFT) |
> + pte.pte_high = (pfn << PFN_PTE_SHIFT) |
> (pgprot_val(prot) & ~_PFN_MASK);
> return pte;
> }
> @@ -184,9 +184,9 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
> #else
>
> #define MAX_POSSIBLE_PHYSMEM_BITS 32
> -#define pte_pfn(x) ((unsigned long)((x).pte >> _PFN_SHIFT))
> -#define pfn_pte(pfn, prot) __pte(((unsigned long long)(pfn) << _PFN_SHIFT) | pgprot_val(prot))
> -#define pfn_pmd(pfn, prot) __pmd(((unsigned long long)(pfn) << _PFN_SHIFT) | pgprot_val(prot))
> +#define pte_pfn(x) ((unsigned long)((x).pte >> PFN_PTE_SHIFT))
> +#define pfn_pte(pfn, prot) __pte(((unsigned long long)(pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
> +#define pfn_pmd(pfn, prot) __pmd(((unsigned long long)(pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
> #endif /* defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) */
>
> #define pte_page(x) pfn_to_page(pte_pfn(x))
> diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
> index 98e24e3e7f2b..20ca48c1b606 100644
> --- a/arch/mips/include/asm/pgtable-64.h
> +++ b/arch/mips/include/asm/pgtable-64.h
> @@ -298,9 +298,9 @@ static inline void pud_clear(pud_t *pudp)
>
> #define pte_page(x) pfn_to_page(pte_pfn(x))
>
> -#define pte_pfn(x) ((unsigned long)((x).pte >> _PFN_SHIFT))
> -#define pfn_pte(pfn, prot) __pte(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
> -#define pfn_pmd(pfn, prot) __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
> +#define pte_pfn(x) ((unsigned long)((x).pte >> PFN_PTE_SHIFT))
> +#define pfn_pte(pfn, prot) __pte(((pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
> +#define pfn_pmd(pfn, prot) __pmd(((pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
>
> #ifndef __PAGETABLE_PMD_FOLDED
> static inline pmd_t *pud_pgtable(pud_t pud)
> diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
> index 2362842ee2b5..744abba9111f 100644
> --- a/arch/mips/include/asm/pgtable-bits.h
> +++ b/arch/mips/include/asm/pgtable-bits.h
> @@ -182,10 +182,10 @@ enum pgtable_bits {
> #if defined(CONFIG_CPU_R3K_TLB)
> # define _CACHE_UNCACHED (1 << _CACHE_UNCACHED_SHIFT)
> # define _CACHE_MASK _CACHE_UNCACHED
> -# define _PFN_SHIFT PAGE_SHIFT
> +# define PFN_PTE_SHIFT PAGE_SHIFT
> #else
> # define _CACHE_MASK (7 << _CACHE_SHIFT)
> -# define _PFN_SHIFT (PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
> +# define PFN_PTE_SHIFT (PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
> #endif
>
> #ifndef _PAGE_NO_EXEC
> @@ -195,7 +195,7 @@ enum pgtable_bits {
> #define _PAGE_SILENT_READ _PAGE_VALID
> #define _PAGE_SILENT_WRITE _PAGE_DIRTY
>
> -#define _PFN_MASK (~((1 << (_PFN_SHIFT)) - 1))
> +#define _PFN_MASK (~((1 << (PFN_PTE_SHIFT)) - 1))
>
> /*
> * The final layouts of the PTE bits are:
> diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
> index 574fa14ac8b2..cfcd6a8ba8ef 100644
> --- a/arch/mips/include/asm/pgtable.h
> +++ b/arch/mips/include/asm/pgtable.h
> @@ -66,7 +66,7 @@ extern void paging_init(void);
>
> static inline unsigned long pmd_pfn(pmd_t pmd)
> {
> - return pmd_val(pmd) >> _PFN_SHIFT;
> + return pmd_val(pmd) >> PFN_PTE_SHIFT;
> }
>
> #ifndef CONFIG_MIPS_HUGE_TLB_SUPPORT
> @@ -105,9 +105,6 @@ do { \
> } \
> } while(0)
>
> -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> - pte_t *ptep, pte_t pteval);
> -
> #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
>
> #ifdef CONFIG_XPA
> @@ -157,7 +154,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
> null.pte_low = null.pte_high = _PAGE_GLOBAL;
> }
>
> - set_pte_at(mm, addr, ptep, null);
> + set_pte(ptep, null);
> htw_start();
> }
> #else
> @@ -196,28 +193,41 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
> #if !defined(CONFIG_CPU_R3K_TLB)
> /* Preserve global status for the pair */
> if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL)
> - set_pte_at(mm, addr, ptep, __pte(_PAGE_GLOBAL));
> + set_pte(ptep, __pte(_PAGE_GLOBAL));
> else
> #endif
> - set_pte_at(mm, addr, ptep, __pte(0));
> + set_pte(ptep, __pte(0));
> htw_start();
> }
> #endif
>
> -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> - pte_t *ptep, pte_t pteval)
> +static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
> + pte_t *ptep, pte_t pte, unsigned int nr)
> {
> + unsigned int i;
> + bool do_sync = false;
>
> - if (!pte_present(pteval))
> - goto cache_sync_done;
> + for (i = 0; i < nr; i++) {
> + if (!pte_present(pte))
> + continue;
> + if (pte_present(ptep[i]) &&
> + (pte_pfn(ptep[i]) == pte_pfn(pte)))
> + continue;
> + do_sync = true;
> + }
>
> - if (pte_present(*ptep) && (pte_pfn(*ptep) == pte_pfn(pteval)))
> - goto cache_sync_done;
> + if (do_sync)
> + __update_cache(addr, pte);
>
> - __update_cache(addr, pteval);
> -cache_sync_done:
> - set_pte(ptep, pteval);
> + for (;;) {
> + set_pte(ptep, pte);
> + if (--nr == 0)
> + break;
> + ptep++;
> + pte_val(pte) += 1 << PFN_PTE_SHIFT;
> + }
> }
> +#define set_ptes set_ptes
>
> /*
> * (pmds are folded into puds so this doesn't get actually called,
> @@ -486,7 +496,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
> pte_t entry, int dirty)
> {
> if (!pte_same(*ptep, entry))
> - set_pte_at(vma->vm_mm, address, ptep, entry);
> + set_pte(ptep, entry);
> /*
> * update_mmu_cache will unconditionally execute, handling both
> * the case that the PTE changed and the spurious fault case.
> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
> index a549fa98c2f4..7d2a42f0cffd 100644
> --- a/arch/mips/mm/c-r4k.c
> +++ b/arch/mips/mm/c-r4k.c
> @@ -679,13 +679,14 @@ static inline void local_r4k_flush_cache_page(void *args)
> if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID))
> vaddr = NULL;
> else {
> + struct folio *folio = page_folio(page);
> /*
> * Use kmap_coherent or kmap_atomic to do flushes for
> * another ASID than the current one.
> */
> map_coherent = (cpu_has_dc_aliases &&
> - page_mapcount(page) &&
> - !Page_dcache_dirty(page));
> + folio_mapped(folio) &&
> + !folio_test_dcache_dirty(folio));
> if (map_coherent)
> vaddr = kmap_coherent(page, addr);
> else
> diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
> index 11b3e7ddafd5..0668435521fc 100644
> --- a/arch/mips/mm/cache.c
> +++ b/arch/mips/mm/cache.c
> @@ -82,13 +82,15 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
> return 0;
> }
>
> -void __flush_dcache_page(struct page *page)
> +void __flush_dcache_pages(struct page *page, unsigned int nr)
> {
> - struct address_space *mapping = page_mapping_file(page);
> + struct folio *folio = page_folio(page);
> + struct address_space *mapping = folio_flush_mapping(folio);
> unsigned long addr;
> + unsigned int i;
>
> if (mapping && !mapping_mapped(mapping)) {
> - SetPageDcacheDirty(page);
> + folio_set_dcache_dirty(folio);
> return;
> }
>
> @@ -97,25 +99,21 @@ void __flush_dcache_page(struct page *page)
> * case is for exec env/arg pages and those are %99 certainly going to
> * get faulted into the tlb (and thus flushed) anyways.
> */
> - if (PageHighMem(page))
> - addr = (unsigned long)kmap_atomic(page);
> - else
> - addr = (unsigned long)page_address(page);
> -
> - flush_data_cache_page(addr);
> -
> - if (PageHighMem(page))
> - kunmap_atomic((void *)addr);
> + for (i = 0; i < nr; i++) {
> + addr = (unsigned long)kmap_local_page(page + i);
> + flush_data_cache_page(addr);
> + kunmap_local((void *)addr);
> + }
> }
> -
> -EXPORT_SYMBOL(__flush_dcache_page);
> +EXPORT_SYMBOL(__flush_dcache_pages);
>
> void __flush_anon_page(struct page *page, unsigned long vmaddr)
> {
> unsigned long addr = (unsigned long) page_address(page);
> + struct folio *folio = page_folio(page);
>
> if (pages_do_alias(addr, vmaddr)) {
> - if (page_mapcount(page) && !Page_dcache_dirty(page)) {
> + if (folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
> void *kaddr;
>
> kaddr = kmap_coherent(page, vmaddr);
> @@ -130,27 +128,29 @@ EXPORT_SYMBOL(__flush_anon_page);
>
> void __update_cache(unsigned long address, pte_t pte)
> {
> - struct page *page;
> + struct folio *folio;
> unsigned long pfn, addr;
> int exec = !pte_no_exec(pte) && !cpu_has_ic_fills_f_dc;
> + unsigned int i;
>
> pfn = pte_pfn(pte);
> if (unlikely(!pfn_valid(pfn)))
> return;
> - page = pfn_to_page(pfn);
> - if (Page_dcache_dirty(page)) {
> - if (PageHighMem(page))
> - addr = (unsigned long)kmap_atomic(page);
> - else
> - addr = (unsigned long)page_address(page);
> -
> - if (exec || pages_do_alias(addr, address & PAGE_MASK))
> - flush_data_cache_page(addr);
>
> - if (PageHighMem(page))
> - kunmap_atomic((void *)addr);
> + folio = page_folio(pfn_to_page(pfn));
> + address &= PAGE_MASK;
> + address -= offset_in_folio(folio, pfn << PAGE_SHIFT);
> +
> + if (folio_test_dcache_dirty(folio)) {
> + for (i = 0; i < folio_nr_pages(folio); i++) {
> + addr = (unsigned long)kmap_local_folio(folio, i);
>
> - ClearPageDcacheDirty(page);
> + if (exec || pages_do_alias(addr, address))
> + flush_data_cache_page(addr);
> + kunmap_local((void *)addr);
> + address += PAGE_SIZE;
> + }
> + folio_clear_dcache_dirty(folio);
> }
> }
>
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index 5a8002839550..5dcb525a8995 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -88,7 +88,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
> pte_t pte;
> int tlbidx;
>
> - BUG_ON(Page_dcache_dirty(page));
> + BUG_ON(folio_test_dcache_dirty(page_folio(page)));
>
> preempt_disable();
> pagefault_disable();
> @@ -169,11 +169,12 @@ void kunmap_coherent(void)
> void copy_user_highpage(struct page *to, struct page *from,
> unsigned long vaddr, struct vm_area_struct *vma)
> {
> + struct folio *src = page_folio(from);
> void *vfrom, *vto;
>
> vto = kmap_atomic(to);
> if (cpu_has_dc_aliases &&
> - page_mapcount(from) && !Page_dcache_dirty(from)) {
> + folio_mapped(src) && !folio_test_dcache_dirty(src)) {
> vfrom = kmap_coherent(from, vaddr);
> copy_page(vto, vfrom);
> kunmap_coherent();
> @@ -194,15 +195,17 @@ void copy_to_user_page(struct vm_area_struct *vma,
> struct page *page, unsigned long vaddr, void *dst, const void *src,
> unsigned long len)
> {
> + struct folio *folio = page_folio(page);
> +
> if (cpu_has_dc_aliases &&
> - page_mapcount(page) && !Page_dcache_dirty(page)) {
> + folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
> void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
> memcpy(vto, src, len);
> kunmap_coherent();
> } else {
> memcpy(dst, src, len);
> if (cpu_has_dc_aliases)
> - SetPageDcacheDirty(page);
> + folio_set_dcache_dirty(folio);
> }
> if (vma->vm_flags & VM_EXEC)
> flush_cache_page(vma, vaddr, page_to_pfn(page));
> @@ -212,15 +215,17 @@ void copy_from_user_page(struct vm_area_struct *vma,
> struct page *page, unsigned long vaddr, void *dst, const void *src,
> unsigned long len)
> {
> + struct folio *folio = page_folio(page);
> +
> if (cpu_has_dc_aliases &&
> - page_mapcount(page) && !Page_dcache_dirty(page)) {
> + folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
> void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
> memcpy(dst, vfrom, len);
> kunmap_coherent();
> } else {
> memcpy(dst, src, len);
> if (cpu_has_dc_aliases)
> - SetPageDcacheDirty(page);
> + folio_set_dcache_dirty(folio);
> }
> }
> EXPORT_SYMBOL_GPL(copy_from_user_page);
> @@ -448,10 +453,10 @@ static inline void __init mem_init_free_highmem(void)
> void __init mem_init(void)
> {
> /*
> - * When _PFN_SHIFT is greater than PAGE_SHIFT we won't have enough PTE
> + * When PFN_PTE_SHIFT is greater than PAGE_SHIFT we won't have enough PTE
> * bits to hold a full 32b physical address on MIPS32 systems.
> */
> - BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (_PFN_SHIFT > PAGE_SHIFT));
> + BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT));
>
> #ifdef CONFIG_HIGHMEM
> max_mapnr = highend_pfn ? highend_pfn : max_low_pfn;
> diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
> index f57fb69472f8..84dd5136d53a 100644
> --- a/arch/mips/mm/pgtable-32.c
> +++ b/arch/mips/mm/pgtable-32.c
> @@ -35,7 +35,7 @@ pmd_t mk_pmd(struct page *page, pgprot_t prot)
> {
> pmd_t pmd;
>
> - pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot);
> + pmd_val(pmd) = (page_to_pfn(page) << PFN_PTE_SHIFT) | pgprot_val(prot);
>
> return pmd;
> }
> diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
> index b4386a0e2ef8..c76d21f7dffb 100644
> --- a/arch/mips/mm/pgtable-64.c
> +++ b/arch/mips/mm/pgtable-64.c
> @@ -93,7 +93,7 @@ pmd_t mk_pmd(struct page *page, pgprot_t prot)
> {
> pmd_t pmd;
>
> - pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot);
> + pmd_val(pmd) = (page_to_pfn(page) << PFN_PTE_SHIFT) | pgprot_val(prot);
>
> return pmd;
> }
> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
> index 80e05ee98d62..1393a11af539 100644
> --- a/arch/mips/mm/tlbex.c
> +++ b/arch/mips/mm/tlbex.c
> @@ -253,7 +253,7 @@ static void output_pgtable_bits_defines(void)
> pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT);
> pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT);
> pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT);
> - pr_define("_PFN_SHIFT %d\n", _PFN_SHIFT);
> + pr_define("PFN_PTE_SHIFT %d\n", PFN_PTE_SHIFT);
> pr_debug("\n");
> }
>
> --
> 2.39.2
>
>

--
Sincerely yours,
Mike.

2023-03-15 10:57:55

by Thomas Bogendoerfer

[permalink] [raw]
Subject: Re: [PATCH v4 16/36] mips: Implement the new page table range API

On Wed, Mar 15, 2023 at 05:14:24AM +0000, Matthew Wilcox (Oracle) wrote:
> Rename _PFN_SHIFT to PFN_PTE_SHIFT. Convert a few places
> to call set_pte() instead of set_pte_at(). Add set_ptes(),
> update_mmu_cache_range(), flush_icache_pages() and flush_dcache_folio().

/local/tbogendoerfer/korg/linux/mm/memory.c: In function ‘set_pte_range’:
/local/tbogendoerfer/korg/linux/mm/memory.c:4290:2: error: implicit declaration of function ‘update_mmu_cache_range’ [-Werror=implicit-function-declaration]
update_mmu_cache_range(vma, addr, vmf->pte, nr);

update_mmu_cache_range() is missing in this patch.

Thomas.

--
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea. [ RFC1925, 2.3 ]

2023-03-15 20:33:31

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v4 16/36] mips: Implement the new page table range API

On Wed, Mar 15, 2023 at 11:50:22AM +0100, Thomas Bogendoerfer wrote:
> On Wed, Mar 15, 2023 at 05:14:24AM +0000, Matthew Wilcox (Oracle) wrote:
> > Rename _PFN_SHIFT to PFN_PTE_SHIFT. Convert a few places
> > to call set_pte() instead of set_pte_at(). Add set_ptes(),
> > update_mmu_cache_range(), flush_icache_pages() and flush_dcache_folio().
>
> /local/tbogendoerfer/korg/linux/mm/memory.c: In function ‘set_pte_range’:
> /local/tbogendoerfer/korg/linux/mm/memory.c:4290:2: error: implicit declaration of function ‘update_mmu_cache_range’ [-Werror=implicit-function-declaration]
> update_mmu_cache_range(vma, addr, vmf->pte, nr);
>
> update_mmu_cache_range() is missing in this patch.

Oops. And mips was one of the arches I did a test build for!

Looks like we could try to gain some efficiency by passing 'nr' to
__update_tlb(), but as far as I can tell, that's only called for r3k and
r4k, so maybe it's not worth optimising at this point? Anyway, this
add-on makes the mips build compile for me and I'll fold it into v5.

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index cfcd6a8ba8ef..9f51b0813dc6 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -578,12 +578,20 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
pte_t pte);

-static inline void update_mmu_cache(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep, unsigned int nr)
{
- pte_t pte = *ptep;
- __update_tlb(vma, address, pte);
+ for (;;) {
+ pte_t pte = *ptep;
+ __update_tlb(vma, address, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ address += PAGE_SIZE;
+ }
}
+#define update_mmu_cache(vma, address, ptep) \
+ update_mmu_cache_range(vma, address, ptep, 1)

#define __HAVE_ARCH_UPDATE_MMU_TLB
#define update_mmu_tlb update_mmu_cache

2023-03-17 15:30:12

by Thomas Bogendoerfer

[permalink] [raw]
Subject: Re: [PATCH v4 16/36] mips: Implement the new page table range API

On Wed, Mar 15, 2023 at 08:33:21PM +0000, Matthew Wilcox wrote:
> On Wed, Mar 15, 2023 at 11:50:22AM +0100, Thomas Bogendoerfer wrote:
> > On Wed, Mar 15, 2023 at 05:14:24AM +0000, Matthew Wilcox (Oracle) wrote:
> > > Rename _PFN_SHIFT to PFN_PTE_SHIFT. Convert a few places
> > > to call set_pte() instead of set_pte_at(). Add set_ptes(),
> > > update_mmu_cache_range(), flush_icache_pages() and flush_dcache_folio().
> >
> > /local/tbogendoerfer/korg/linux/mm/memory.c: In function ‘set_pte_range’:
> > /local/tbogendoerfer/korg/linux/mm/memory.c:4290:2: error: implicit declaration of function ‘update_mmu_cache_range’ [-Werror=implicit-function-declaration]
> > update_mmu_cache_range(vma, addr, vmf->pte, nr);
> >
> > update_mmu_cache_range() is missing in this patch.
>
> Oops. And mips was one of the arches I did a test build for!
>
> Looks like we could try to gain some efficiency by passing 'nr' to
> __update_tlb(), but as far as I can tell, that's only called for r3k and
> r4k, so maybe it's not worth optimising at this point?

hmm, not sure if that would help. R4k style TLB has two PTEs mapped
per TLB entry. So by advancing per page __update_tlb() is called more
often than needed.

> Anyway, this add-on makes the mips build compile for me and I'll fold
> it into v5.

tested your v4 with the add-on on QEMU Malta and real hardware without
problems so far. I'll give v5 another spin.

Thomas.

--
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea. [ RFC1925, 2.3 ]

2023-03-19 18:45:56

by Thomas Bogendoerfer

[permalink] [raw]
Subject: Re: [PATCH v4 16/36] mips: Implement the new page table range API

On Fri, Mar 17, 2023 at 04:29:20PM +0100, Thomas Bogendoerfer wrote:
> On Wed, Mar 15, 2023 at 08:33:21PM +0000, Matthew Wilcox wrote:
> > On Wed, Mar 15, 2023 at 11:50:22AM +0100, Thomas Bogendoerfer wrote:
> > > On Wed, Mar 15, 2023 at 05:14:24AM +0000, Matthew Wilcox (Oracle) wrote:
> > > > Rename _PFN_SHIFT to PFN_PTE_SHIFT. Convert a few places
> > > > to call set_pte() instead of set_pte_at(). Add set_ptes(),
> > > > update_mmu_cache_range(), flush_icache_pages() and flush_dcache_folio().
> > >
> > > /local/tbogendoerfer/korg/linux/mm/memory.c: In function ‘set_pte_range’:
> > > /local/tbogendoerfer/korg/linux/mm/memory.c:4290:2: error: implicit declaration of function ‘update_mmu_cache_range’ [-Werror=implicit-function-declaration]
> > > update_mmu_cache_range(vma, addr, vmf->pte, nr);
> > >
> > > update_mmu_cache_range() is missing in this patch.
> >
> > Oops. And mips was one of the arches I did a test build for!
> >
> > Looks like we could try to gain some efficiency by passing 'nr' to
> > __update_tlb(), but as far as I can tell, that's only called for r3k and
> > r4k, so maybe it's not worth optimising at this point?
>
> hmm, not sure if that would help. R4k style TLB has two PTEs mapped
> per TLB entry. So by advancing per page __update_tlb() is called more
> often than needed.

btw. how big is nr going to be ? There are MIPS SoCs out there, which
just have 16 TLBs...

Thomas.

--
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea. [ RFC1925, 2.3 ]

2023-03-19 20:16:44

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v4 16/36] mips: Implement the new page table range API

On Sun, Mar 19, 2023 at 07:45:36PM +0100, Thomas Bogendoerfer wrote:
> On Fri, Mar 17, 2023 at 04:29:20PM +0100, Thomas Bogendoerfer wrote:
> > hmm, not sure if that would help. R4k style TLB has two PTEs mapped
> > per TLB entry. So by advancing per page __update_tlb() is called more
> > often than needed.
>
> btw. how big is nr going to be ? There are MIPS SoCs out there, which
> just have 16 TLBs...

Oof. The biggest we're going to see for now is one less than PTRS_PER_PMD
(that'd be a PMD-sized allocation that's mapped askew with 1 page in
one PMD and n-1 pages in the adjacent PMD). That'd be 511 on x86 and
I presume something similar on MIPS. More than 16, for sure.

Now, this isn't a new problem with this patchset. With fault-around,
we already call set_pte_at() N times. And we don't say which ones are
speculative entries vs the one actually faulted in.

But let's see if we can fix it. What if we passed in the vmf? That would
give you the actual faulting address, so you'd know to only put the PTE
into the Linux page tables and not go as far as putting it into the TLB.
Open to other ideas.

2023-03-21 11:31:14

by Thomas Bogendoerfer

[permalink] [raw]
Subject: Re: [PATCH v4 16/36] mips: Implement the new page table range API

On Sun, Mar 19, 2023 at 08:16:36PM +0000, Matthew Wilcox wrote:
> On Sun, Mar 19, 2023 at 07:45:36PM +0100, Thomas Bogendoerfer wrote:
> > On Fri, Mar 17, 2023 at 04:29:20PM +0100, Thomas Bogendoerfer wrote:
> > > hmm, not sure if that would help. R4k style TLB has two PTEs mapped
> > > per TLB entry. So by advancing per page __update_tlb() is called more
> > > often than needed.
> >
> > btw. how big is nr going to be ? There are MIPS SoCs out there, which
> > just have 16 TLBs...
>
> Oof. The biggest we're going to see for now is one less than PTRS_PER_PMD
> (that'd be a PMD-sized allocation that's mapped askew with 1 page in
> one PMD and n-1 pages in the adjacent PMD). That'd be 511 on x86 and
> I presume something similar on MIPS. More than 16, for sure.

biggest TLB I could find is 256 entries, which can map 512 pages.

> Now, this isn't a new problem with this patchset. With fault-around,
> we already call set_pte_at() N times. And we don't say which ones are
> speculative entries vs the one actually faulted in.

ic

> But let's see if we can fix it. What if we passed in the vmf? That would
> give you the actual faulting address, so you'd know to only put the PTE
> into the Linux page tables and not go as far as putting it into the TLB.
> Open to other ideas.

that would help to optimize the case. But update_mmu_cache_range needs to
do __update_tlb() for every page to avoid stale data in TLB. If I understood
correctly only the way how TLB updates are done changed, so there shouldn't
be performance regressions. And optimizing like moving the looping over
the pages into __update_tlb() could be done in a second step.

Thomas.

--
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea. [ RFC1925, 2.3 ]