2021-04-08 15:32:38

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 1/9] powerpc/mem: Move cache flushing functions into mm/cacheflush.c

Cache flushing functions are in the middle of completely
unrelated stuff in mm/mem.c

Create a dedicated mm/cacheflush.c for those functions.

Also cleanup the list of included headers.

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/mm/Makefile | 3 +-
arch/powerpc/mm/cacheflush.c | 255 +++++++++++++++++++++++++++++++
arch/powerpc/mm/mem.c | 281 -----------------------------------
3 files changed, 257 insertions(+), 282 deletions(-)
create mode 100644 arch/powerpc/mm/cacheflush.c

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3b4e9e4e25ea..c3df3a8501d4 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -8,7 +8,8 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \
init_$(BITS).o pgtable_$(BITS).o \
pgtable-frag.o ioremap.o ioremap_$(BITS).o \
- init-common.o mmu_context.o drmem.o
+ init-common.o mmu_context.o drmem.o \
+ cacheflush.o
obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/
diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
new file mode 100644
index 000000000000..40613d2fda37
--- /dev/null
+++ b/arch/powerpc/mm/cacheflush.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/highmem.h>
+#include <linux/kprobes.h>
+
+/**
+ * flush_coherent_icache() - if a CPU has a coherent icache, flush it
+ * @addr: The base address to use (can be any valid address, the whole cache will be flushed)
+ * Return true if the cache was flushed, false otherwise
+ */
+static inline bool flush_coherent_icache(unsigned long addr)
+{
+ /*
+ * For a snooping icache, we still need a dummy icbi to purge all the
+ * prefetched instructions from the ifetch buffers. We also need a sync
+ * before the icbi to order the the actual stores to memory that might
+ * have modified instructions with the icbi.
+ */
+ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+ mb(); /* sync */
+ allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES);
+ icbi((void *)addr);
+ prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES);
+ mb(); /* sync */
+ isync();
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static void invalidate_icache_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_icache_shift();
+ unsigned long bytes = l1_icache_bytes();
+ char *addr = (char *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ icbi(addr);
+
+ mb(); /* sync */
+ isync();
+}
+
+/**
+ * flush_icache_range: Write any modified data cache blocks out to memory
+ * and invalidate the corresponding blocks in the instruction cache
+ *
+ * Generic code will call this after writing memory, before executing from it.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+void flush_icache_range(unsigned long start, unsigned long stop)
+{
+ if (flush_coherent_icache(start))
+ return;
+
+ clean_dcache_range(start, stop);
+
+ if (IS_ENABLED(CONFIG_44x)) {
+ /*
+ * Flash invalidate on 44x because we are passed kmapped
+ * addresses and this doesn't work for userspace pages due to
+ * the virtually tagged icache.
+ */
+ iccci((void *)start);
+ mb(); /* sync */
+ isync();
+ } else
+ invalidate_icache_range(start, stop);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+/**
+ * flush_dcache_icache_phys() - Flush a page by it's physical address
+ * @physaddr: the physical address of the page
+ */
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+ unsigned long bytes = l1_dcache_bytes();
+ unsigned long nb = PAGE_SIZE / bytes;
+ unsigned long addr = physaddr & PAGE_MASK;
+ unsigned long msr, msr0;
+ unsigned long loop1 = addr, loop2 = addr;
+
+ msr0 = mfmsr();
+ msr = msr0 & ~MSR_DR;
+ /*
+ * This must remain as ASM to prevent potential memory accesses
+ * while the data MMU is disabled
+ */
+ asm volatile(
+ " mtctr %2;\n"
+ " mtmsr %3;\n"
+ " isync;\n"
+ "0: dcbst 0, %0;\n"
+ " addi %0, %0, %4;\n"
+ " bdnz 0b;\n"
+ " sync;\n"
+ " mtctr %2;\n"
+ "1: icbi 0, %1;\n"
+ " addi %1, %1, %4;\n"
+ " bdnz 1b;\n"
+ " sync;\n"
+ " mtmsr %5;\n"
+ " isync;\n"
+ : "+&r" (loop1), "+&r" (loop2)
+ : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
+ : "ctr", "memory");
+}
+NOKPROBE_SYMBOL(flush_dcache_icache_phys)
+#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean. We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+void flush_dcache_page(struct page *page)
+{
+ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+ return;
+ /* avoid an atomic op if possible */
+ if (test_bit(PG_dcache_clean, &page->flags))
+ clear_bit(PG_dcache_clean, &page->flags);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+static void flush_dcache_icache_hugepage(struct page *page)
+{
+ int i;
+ void *start;
+
+ BUG_ON(!PageCompound(page));
+
+ for (i = 0; i < compound_nr(page); i++) {
+ if (!PageHighMem(page)) {
+ __flush_dcache_icache(page_address(page+i));
+ } else {
+ start = kmap_atomic(page+i);
+ __flush_dcache_icache(start);
+ kunmap_atomic(start);
+ }
+ }
+}
+
+void flush_dcache_icache_page(struct page *page)
+{
+
+ if (PageCompound(page))
+ return flush_dcache_icache_hugepage(page);
+
+#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64)
+ /* On 8xx there is no need to kmap since highmem is not supported */
+ __flush_dcache_icache(page_address(page));
+#else
+ if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
+ void *start = kmap_atomic(page);
+ __flush_dcache_icache(start);
+ kunmap_atomic(start);
+ } else {
+ unsigned long addr = page_to_pfn(page) << PAGE_SHIFT;
+
+ if (flush_coherent_icache(addr))
+ return;
+ flush_dcache_icache_phys(addr);
+ }
+#endif
+}
+EXPORT_SYMBOL(flush_dcache_icache_page);
+
+/**
+ * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * @page: the address of the page to flush
+ */
+void __flush_dcache_icache(void *p)
+{
+ unsigned long addr = (unsigned long)p;
+
+ if (flush_coherent_icache(addr))
+ return;
+
+ clean_dcache_range(addr, addr + PAGE_SIZE);
+
+ /*
+ * We don't flush the icache on 44x. Those have a virtual icache and we
+ * don't have access to the virtual address here (it's not the page
+ * vaddr but where it's mapped in user space). The flushing of the
+ * icache on these is handled elsewhere, when a change in the address
+ * space occurs, before returning to user space.
+ */
+
+ if (mmu_has_feature(MMU_FTR_TYPE_44x))
+ return;
+
+ invalidate_icache_range(addr, addr + PAGE_SIZE);
+}
+
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+ clear_page(page);
+
+ /*
+ * We shouldn't have to do this, but some versions of glibc
+ * require it (ld.so assumes zero filled pages are icache clean)
+ * - Anton
+ */
+ flush_dcache_page(pg);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+ struct page *pg)
+{
+ copy_page(vto, vfrom);
+
+ /*
+ * We should be able to use the following optimisation, however
+ * there are two problems.
+ * Firstly a bug in some versions of binutils meant PLT sections
+ * were not marked executable.
+ * Secondly the first word in the GOT section is blrl, used
+ * to establish the GOT address. Until recently the GOT was
+ * not marked executable.
+ * - Anton
+ */
+#if 0
+ if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+ return;
+#endif
+
+ flush_dcache_page(pg);
+}
+
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long addr, int len)
+{
+ unsigned long maddr;
+
+ maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
+ flush_icache_range(maddr, maddr + len);
+ kunmap(page);
+}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 7a59a5c9aa5d..6564b4d81324 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -12,45 +12,15 @@
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*/

-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/highmem.h>
-#include <linux/initrd.h>
-#include <linux/pagemap.h>
#include <linux/suspend.h>
-#include <linux/hugetlb.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/memremap.h>
#include <linux/dma-direct.h>
-#include <linux/kprobes.h>

-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/mmu.h>
-#include <asm/smp.h>
#include <asm/machdep.h>
-#include <asm/btext.h>
-#include <asm/tlb.h>
-#include <asm/sections.h>
-#include <asm/sparsemem.h>
-#include <asm/vdso.h>
-#include <asm/fixmap.h>
-#include <asm/swiotlb.h>
#include <asm/rtas.h>
#include <asm/kasan.h>
#include <asm/svm.h>
-#include <asm/mmzone.h>

#include <mm/mmu_decl.h>

@@ -340,257 +310,6 @@ void free_initmem(void)
free_initmem_default(POISON_FREE_INITMEM);
}

-/**
- * flush_coherent_icache() - if a CPU has a coherent icache, flush it
- * @addr: The base address to use (can be any valid address, the whole cache will be flushed)
- * Return true if the cache was flushed, false otherwise
- */
-static inline bool flush_coherent_icache(unsigned long addr)
-{
- /*
- * For a snooping icache, we still need a dummy icbi to purge all the
- * prefetched instructions from the ifetch buffers. We also need a sync
- * before the icbi to order the the actual stores to memory that might
- * have modified instructions with the icbi.
- */
- if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
- mb(); /* sync */
- allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES);
- icbi((void *)addr);
- prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES);
- mb(); /* sync */
- isync();
- return true;
- }
-
- return false;
-}
-
-/**
- * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
- * @start: the start address
- * @stop: the stop address (exclusive)
- */
-static void invalidate_icache_range(unsigned long start, unsigned long stop)
-{
- unsigned long shift = l1_icache_shift();
- unsigned long bytes = l1_icache_bytes();
- char *addr = (char *)(start & ~(bytes - 1));
- unsigned long size = stop - (unsigned long)addr + (bytes - 1);
- unsigned long i;
-
- for (i = 0; i < size >> shift; i++, addr += bytes)
- icbi(addr);
-
- mb(); /* sync */
- isync();
-}
-
-/**
- * flush_icache_range: Write any modified data cache blocks out to memory
- * and invalidate the corresponding blocks in the instruction cache
- *
- * Generic code will call this after writing memory, before executing from it.
- *
- * @start: the start address
- * @stop: the stop address (exclusive)
- */
-void flush_icache_range(unsigned long start, unsigned long stop)
-{
- if (flush_coherent_icache(start))
- return;
-
- clean_dcache_range(start, stop);
-
- if (IS_ENABLED(CONFIG_44x)) {
- /*
- * Flash invalidate on 44x because we are passed kmapped
- * addresses and this doesn't work for userspace pages due to
- * the virtually tagged icache.
- */
- iccci((void *)start);
- mb(); /* sync */
- isync();
- } else
- invalidate_icache_range(start, stop);
-}
-EXPORT_SYMBOL(flush_icache_range);
-
-#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
-/**
- * flush_dcache_icache_phys() - Flush a page by it's physical address
- * @physaddr: the physical address of the page
- */
-static void flush_dcache_icache_phys(unsigned long physaddr)
-{
- unsigned long bytes = l1_dcache_bytes();
- unsigned long nb = PAGE_SIZE / bytes;
- unsigned long addr = physaddr & PAGE_MASK;
- unsigned long msr, msr0;
- unsigned long loop1 = addr, loop2 = addr;
-
- msr0 = mfmsr();
- msr = msr0 & ~MSR_DR;
- /*
- * This must remain as ASM to prevent potential memory accesses
- * while the data MMU is disabled
- */
- asm volatile(
- " mtctr %2;\n"
- " mtmsr %3;\n"
- " isync;\n"
- "0: dcbst 0, %0;\n"
- " addi %0, %0, %4;\n"
- " bdnz 0b;\n"
- " sync;\n"
- " mtctr %2;\n"
- "1: icbi 0, %1;\n"
- " addi %1, %1, %4;\n"
- " bdnz 1b;\n"
- " sync;\n"
- " mtmsr %5;\n"
- " isync;\n"
- : "+&r" (loop1), "+&r" (loop2)
- : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
- : "ctr", "memory");
-}
-NOKPROBE_SYMBOL(flush_dcache_icache_phys)
-#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
-
-/*
- * This is called when a page has been modified by the kernel.
- * It just marks the page as not i-cache clean. We do the i-cache
- * flush later when the page is given to a user process, if necessary.
- */
-void flush_dcache_page(struct page *page)
-{
- if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
- return;
- /* avoid an atomic op if possible */
- if (test_bit(PG_dcache_clean, &page->flags))
- clear_bit(PG_dcache_clean, &page->flags);
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
-static void flush_dcache_icache_hugepage(struct page *page)
-{
- int i;
- void *start;
-
- BUG_ON(!PageCompound(page));
-
- for (i = 0; i < compound_nr(page); i++) {
- if (!PageHighMem(page)) {
- __flush_dcache_icache(page_address(page+i));
- } else {
- start = kmap_atomic(page+i);
- __flush_dcache_icache(start);
- kunmap_atomic(start);
- }
- }
-}
-
-void flush_dcache_icache_page(struct page *page)
-{
-
- if (PageCompound(page))
- return flush_dcache_icache_hugepage(page);
-
-#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64)
- /* On 8xx there is no need to kmap since highmem is not supported */
- __flush_dcache_icache(page_address(page));
-#else
- if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
- void *start = kmap_atomic(page);
- __flush_dcache_icache(start);
- kunmap_atomic(start);
- } else {
- unsigned long addr = page_to_pfn(page) << PAGE_SHIFT;
-
- if (flush_coherent_icache(addr))
- return;
- flush_dcache_icache_phys(addr);
- }
-#endif
-}
-EXPORT_SYMBOL(flush_dcache_icache_page);
-
-/**
- * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- *
- * @page: the address of the page to flush
- */
-void __flush_dcache_icache(void *p)
-{
- unsigned long addr = (unsigned long)p;
-
- if (flush_coherent_icache(addr))
- return;
-
- clean_dcache_range(addr, addr + PAGE_SIZE);
-
- /*
- * We don't flush the icache on 44x. Those have a virtual icache and we
- * don't have access to the virtual address here (it's not the page
- * vaddr but where it's mapped in user space). The flushing of the
- * icache on these is handled elsewhere, when a change in the address
- * space occurs, before returning to user space.
- */
-
- if (mmu_has_feature(MMU_FTR_TYPE_44x))
- return;
-
- invalidate_icache_range(addr, addr + PAGE_SIZE);
-}
-
-void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
-{
- clear_page(page);
-
- /*
- * We shouldn't have to do this, but some versions of glibc
- * require it (ld.so assumes zero filled pages are icache clean)
- * - Anton
- */
- flush_dcache_page(pg);
-}
-EXPORT_SYMBOL(clear_user_page);
-
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
- struct page *pg)
-{
- copy_page(vto, vfrom);
-
- /*
- * We should be able to use the following optimisation, however
- * there are two problems.
- * Firstly a bug in some versions of binutils meant PLT sections
- * were not marked executable.
- * Secondly the first word in the GOT section is blrl, used
- * to establish the GOT address. Until recently the GOT was
- * not marked executable.
- * - Anton
- */
-#if 0
- if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
- return;
-#endif
-
- flush_dcache_page(pg);
-}
-
-void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
- unsigned long addr, int len)
-{
- unsigned long maddr;
-
- maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
- flush_icache_range(maddr, maddr + len);
- kunmap(page);
-}
-
/*
* System memory should not be in /proc/iomem but various tools expect it
* (eg kdump).
--
2.25.0


2021-04-08 15:32:41

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 7/9] powerpc/mem: Help GCC realise __flush_dcache_icache() flushes single pages

'And' the given page address with PAGE_MASK to help GCC.

With the patch:

00000024 <__flush_dcache_icache>:
24: 54 63 00 26 rlwinm r3,r3,0,0,19
28: 39 40 00 40 li r10,64
2c: 7c 69 1b 78 mr r9,r3
30: 7d 49 03 a6 mtctr r10
34: 7c 00 48 6c dcbst 0,r9
38: 39 29 00 20 addi r9,r9,32
3c: 7c 00 48 6c dcbst 0,r9
40: 39 29 00 20 addi r9,r9,32
44: 42 00 ff f0 bdnz 34 <__flush_dcache_icache+0x10>
48: 7c 00 04 ac hwsync
4c: 39 20 00 40 li r9,64
50: 7d 29 03 a6 mtctr r9
54: 7c 00 1f ac icbi 0,r3
58: 38 63 00 20 addi r3,r3,32
5c: 7c 00 1f ac icbi 0,r3
60: 38 63 00 20 addi r3,r3,32
64: 42 00 ff f0 bdnz 54 <__flush_dcache_icache+0x30>
68: 7c 00 04 ac hwsync
6c: 4c 00 01 2c isync
70: 4e 80 00 20 blr

Without the patch:

00000024 <__flush_dcache_icache>:
24: 54 6a 00 34 rlwinm r10,r3,0,0,26
28: 39 23 10 1f addi r9,r3,4127
2c: 7d 2a 48 50 subf r9,r10,r9
30: 55 29 d9 7f rlwinm. r9,r9,27,5,31
34: 41 82 00 94 beq c8 <__flush_dcache_icache+0xa4>
38: 71 28 00 01 andi. r8,r9,1
3c: 38 c9 ff ff addi r6,r9,-1
40: 7d 48 53 78 mr r8,r10
44: 7d 27 4b 78 mr r7,r9
48: 40 82 00 6c bne b4 <__flush_dcache_icache+0x90>
4c: 54 e7 f8 7e rlwinm r7,r7,31,1,31
50: 7c e9 03 a6 mtctr r7
54: 7c 00 40 6c dcbst 0,r8
58: 39 08 00 20 addi r8,r8,32
5c: 7c 00 40 6c dcbst 0,r8
60: 39 08 00 20 addi r8,r8,32
64: 42 00 ff f0 bdnz 54 <__flush_dcache_icache+0x30>
68: 7c 00 04 ac hwsync
6c: 71 28 00 01 andi. r8,r9,1
70: 39 09 ff ff addi r8,r9,-1
74: 40 82 00 2c bne a0 <__flush_dcache_icache+0x7c>
78: 55 29 f8 7e rlwinm r9,r9,31,1,31
7c: 7d 29 03 a6 mtctr r9
80: 7c 00 57 ac icbi 0,r10
84: 39 4a 00 20 addi r10,r10,32
88: 7c 00 57 ac icbi 0,r10
8c: 39 4a 00 20 addi r10,r10,32
90: 42 00 ff f0 bdnz 80 <__flush_dcache_icache+0x5c>
94: 7c 00 04 ac hwsync
98: 4c 00 01 2c isync
9c: 4e 80 00 20 blr
a0: 7c 00 57 ac icbi 0,r10
a4: 2c 08 00 00 cmpwi r8,0
a8: 39 4a 00 20 addi r10,r10,32
ac: 40 82 ff cc bne 78 <__flush_dcache_icache+0x54>
b0: 4b ff ff e4 b 94 <__flush_dcache_icache+0x70>
b4: 7c 00 50 6c dcbst 0,r10
b8: 2c 06 00 00 cmpwi r6,0
bc: 39 0a 00 20 addi r8,r10,32
c0: 40 82 ff 8c bne 4c <__flush_dcache_icache+0x28>
c4: 4b ff ff a4 b 68 <__flush_dcache_icache+0x44>
c8: 7c 00 04 ac hwsync
cc: 7c 00 04 ac hwsync
d0: 4c 00 01 2c isync
d4: 4e 80 00 20 blr

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/mm/cacheflush.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
index 2d92cb6bc423..abeef69ed4e4 100644
--- a/arch/powerpc/mm/cacheflush.c
+++ b/arch/powerpc/mm/cacheflush.c
@@ -145,7 +145,7 @@ EXPORT_SYMBOL(flush_dcache_page);
*/
static void __flush_dcache_icache(void *p)
{
- unsigned long addr = (unsigned long)p;
+ unsigned long addr = (unsigned long)p & PAGE_MASK;

clean_dcache_range(addr, addr + PAGE_SIZE);

--
2.25.0

2021-04-08 15:33:14

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 6/9] powerpc/mem: flush_dcache_icache_phys() is for HIGHMEM pages only

__flush_dcache_icache() is usable for non HIGHMEM pages on
every platform.

It is only for HIGHMEM pages that BOOKE needs kmap() and
BOOK3S needs flush_dcache_icache_phys().

So make flush_dcache_icache_phys() dependent on CONFIG_HIGHMEM and
call it only when it is a HIGHMEM page.

We could make flush_dcache_icache_phys() available at all time,
but as it is declared NOKPROBE_SYMBOL(), GCC doesn't optimise
it out when it is not used.

So define a stub for !CONFIG_HIGHMEM in order to remove the #ifdef in
flush_dcache_icache_page() and use IS_ENABLED() instead.

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/mm/cacheflush.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
index 3268a3e55c3f..2d92cb6bc423 100644
--- a/arch/powerpc/mm/cacheflush.c
+++ b/arch/powerpc/mm/cacheflush.c
@@ -76,7 +76,7 @@ void flush_icache_range(unsigned long start, unsigned long stop)
}
EXPORT_SYMBOL(flush_icache_range);

-#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+#ifdef CONFIG_HIGHMEM
/**
* flush_dcache_icache_phys() - Flush a page by it's physical address
* @physaddr: the physical address of the page
@@ -115,7 +115,11 @@ static void flush_dcache_icache_phys(unsigned long physaddr)
: "ctr", "memory");
}
NOKPROBE_SYMBOL(flush_dcache_icache_phys)
-#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+#else
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+}
+#endif

/*
* This is called when a page has been modified by the kernel.
@@ -185,18 +189,15 @@ void flush_dcache_icache_page(struct page *page)
if (PageCompound(page))
return flush_dcache_icache_hugepage(page);

-#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64)
- /* On 8xx there is no need to kmap since highmem is not supported */
- __flush_dcache_icache(page_address(page));
-#else
- if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
+ if (!PageHighMem(page)) {
+ __flush_dcache_icache(lowmem_page_address(page));
+ } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
void *start = kmap_atomic(page);
__flush_dcache_icache(start);
kunmap_atomic(start);
} else {
flush_dcache_icache_phys(page_to_phys(page));
}
-#endif
}
EXPORT_SYMBOL(flush_dcache_icache_page);

--
2.25.0

2021-04-08 15:33:27

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 8/9] powerpc/mem: Inline flush_dcache_page()

flush_dcache_page() is only a few lines, it is worth
inlining.

ia64, csky, mips, openrisc and riscv have a similar
flush_dcache_page() and inline it.

On pmac32_defconfig, we get a small size reduction.
On ppc64_defconfig, we get a very small size increase.

In both case that's in the noise (less than 0.1%).

text data bss dec hex filename
18991155 5934744 1497624 26423523 19330e3 vmlinux64.before
18994829 5936732 1497624 26429185 1934701 vmlinux64.after
9150963 2467502 184548 11803013 b41985 vmlinux32.before
9149689 2467302 184548 11801539 b413c3 vmlinux32.after

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/cacheflush.h | 14 +++++++++++++-
arch/powerpc/mm/cacheflush.c | 15 ---------------
2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 9110489ea411..7564dd4fd12b 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -30,7 +30,19 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
#endif /* CONFIG_PPC_BOOK3S_64 */

#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *page);
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean. We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+static inline void flush_dcache_page(struct page *page)
+{
+ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+ return;
+ /* avoid an atomic op if possible */
+ if (test_bit(PG_dcache_clean, &page->flags))
+ clear_bit(PG_dcache_clean, &page->flags);
+}

void flush_icache_range(unsigned long start, unsigned long stop);
#define flush_icache_range flush_icache_range
diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
index abeef69ed4e4..d9eafa077c09 100644
--- a/arch/powerpc/mm/cacheflush.c
+++ b/arch/powerpc/mm/cacheflush.c
@@ -121,21 +121,6 @@ static void flush_dcache_icache_phys(unsigned long physaddr)
}
#endif

-/*
- * This is called when a page has been modified by the kernel.
- * It just marks the page as not i-cache clean. We do the i-cache
- * flush later when the page is given to a user process, if necessary.
- */
-void flush_dcache_page(struct page *page)
-{
- if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
- return;
- /* avoid an atomic op if possible */
- if (test_bit(PG_dcache_clean, &page->flags))
- clear_bit(PG_dcache_clean, &page->flags);
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
/**
* __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
--
2.25.0

2021-04-08 15:34:54

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 9/9] powerpc/mem: Use kmap_local_page() in flushing functions

Flushing functions don't rely on preemption being disabled, so
use kmap_local_page() instead of kmap_atomic().

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/mm/cacheflush.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
index d9eafa077c09..63363787e000 100644
--- a/arch/powerpc/mm/cacheflush.c
+++ b/arch/powerpc/mm/cacheflush.c
@@ -152,16 +152,16 @@ static void flush_dcache_icache_hugepage(struct page *page)
{
int i;
int nr = compound_nr(page);
- void *start;

if (!PageHighMem(page)) {
for (i = 0; i < nr; i++)
__flush_dcache_icache(lowmem_page_address(page + i));
} else {
for (i = 0; i < nr; i++) {
- start = kmap_atomic(page+i);
+ void *start = kmap_local_page(page + i);
+
__flush_dcache_icache(start);
- kunmap_atomic(start);
+ kunmap_local(start);
}
}
}
@@ -177,9 +177,10 @@ void flush_dcache_icache_page(struct page *page)
if (!PageHighMem(page)) {
__flush_dcache_icache(lowmem_page_address(page));
} else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
- void *start = kmap_atomic(page);
+ void *start = kmap_local_page(page);
+
__flush_dcache_icache(start);
- kunmap_atomic(start);
+ kunmap_local(start);
} else {
flush_dcache_icache_phys(page_to_phys(page));
}
@@ -225,9 +226,9 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len)
{
- unsigned long maddr;
+ void *maddr;

- maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
- flush_icache_range(maddr, maddr + len);
- kunmap(page);
+ maddr = kmap_local_page(page) + (addr & ~PAGE_MASK);
+ flush_icache_range((unsigned long)maddr, (unsigned long)maddr + len);
+ kunmap_local(maddr);
}
--
2.25.0

2021-04-19 04:06:59

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v2 1/9] powerpc/mem: Move cache flushing functions into mm/cacheflush.c

On Thu, 8 Apr 2021 15:30:24 +0000 (UTC), Christophe Leroy wrote:
> Cache flushing functions are in the middle of completely
> unrelated stuff in mm/mem.c
>
> Create a dedicated mm/cacheflush.c for those functions.
>
> Also cleanup the list of included headers.

Applied to powerpc/next.

[1/9] powerpc/mem: Move cache flushing functions into mm/cacheflush.c
https://git.kernel.org/powerpc/c/b26e8f27253a47bff90972b987112fd8396e9b8d
[2/9] powerpc/mem: Declare __flush_dcache_icache() static
https://git.kernel.org/powerpc/c/bf26e0bbd2f82b52605cd7c880245eefe67e09f3
[3/9] powerpc/mem: Remove address argument to flush_coherent_icache()
https://git.kernel.org/powerpc/c/131637a17dc97fde3d007ab224e30c7ff4e62f6e
[4/9] powerpc/mem: Call flush_coherent_icache() at higher level
https://git.kernel.org/powerpc/c/e618c7aea1f2a2d615a99948f1f5cb4c11b6bf57
[5/9] powerpc/mem: Optimise flush_dcache_icache_hugepage()
https://git.kernel.org/powerpc/c/cd97d9e8b5aa45a7f867a10e99f1d6ce0a5deb8b
[6/9] powerpc/mem: flush_dcache_icache_phys() is for HIGHMEM pages only
https://git.kernel.org/powerpc/c/52d490437ffb1bab0a63ab7b1a64514d8c17dd4d
[7/9] powerpc/mem: Help GCC realise __flush_dcache_icache() flushes single pages
https://git.kernel.org/powerpc/c/67b8e6af191a6ed717be548307eb15048f8181d8
[8/9] powerpc/mem: Inline flush_dcache_page()
https://git.kernel.org/powerpc/c/6c96020882b17fb6f4fbf7f8cef8c606460fc14d
[9/9] powerpc/mem: Use kmap_local_page() in flushing functions
https://git.kernel.org/powerpc/c/7e9ab144c128df7660a2f33c9c6d1422fe798060

cheers