From: Alastair D'Silva <[email protected]>
This series addresses a few issues discovered in how we flush caches:
1. Flushes were truncated at 4GB, so larger flushes were incorrect.
2. Flushing the dcache in arch_add_memory was unnecessary
This series also converts much of the cache assembler to C, with the
aim of making it easier to maintain.
Alastair D'Silva (6):
powerpc: Allow flush_icache_range to work across ranges >4GB
powerpc: define helpers to get L1 icache sizes
powerpc: Convert flush_icache_range & friends to C
powerpc: Chunk calls to flush_dcache_range in arch_*_memory
powerpc: Remove 'extern' from func prototypes in cache headers
powerpc: Don't flush caches when adding memory
arch/powerpc/include/asm/cache.h | 63 +++++++++-----
arch/powerpc/include/asm/cacheflush.h | 49 ++++++-----
arch/powerpc/kernel/misc_32.S | 117 --------------------------
arch/powerpc/kernel/misc_64.S | 97 ---------------------
arch/powerpc/mm/mem.c | 80 +++++++++++++++++-
5 files changed, 146 insertions(+), 260 deletions(-)
--
2.21.0
From: Alastair D'Silva <[email protected]>
When calling flush_icache_range with a size >4GB, we were masking
off the upper 32 bits, so we would incorrectly flush a range smaller
than intended.
This patch replaces the 32 bit shifts with 64 bit ones, so that
the full size is accounted for.
Signed-off-by: Alastair D'Silva <[email protected]>
Cc: [email protected]
---
arch/powerpc/kernel/misc_64.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b55a7b4cb543..9bc0aa9aeb65 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -82,7 +82,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */
- srw. r8,r8,r9 /* compute line count */
+ srd. r8,r8,r9 /* compute line count */
beqlr /* nothing to do? */
mtctr r8
1: dcbst 0,r6
@@ -98,7 +98,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */
- srw. r8,r8,r9 /* compute line count */
+ srd. r8,r8,r9 /* compute line count */
beqlr /* nothing to do? */
mtctr r8
2: icbi 0,r6
--
2.21.0
From: Alastair D'Silva <[email protected]>
When presented with large amounts of memory being hotplugged
(in my test case, ~890GB), the call to flush_dcache_range takes
a while (~50 seconds), triggering RCU stalls.
This patch breaks up the call into 16GB chunks, calling
cond_resched() inbetween to allow the scheduler to run.
Signed-off-by: Alastair D'Silva <[email protected]>
---
arch/powerpc/mm/mem.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5400da87a804..fb0d5e9aa11b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
+#define FLUSH_CHUNK_SIZE (16ull * 1024ull * 1024ull * 1024ull)
+
int __ref arch_add_memory(int nid, u64 start, u64 size,
struct mhp_restrictions *restrictions)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ unsigned long i;
int rc;
resize_hpt_for_hotplug(memblock_phys_mem_size());
@@ -120,7 +123,11 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
start, start + size, rc);
return -EFAULT;
}
- flush_dcache_range(start, start + size);
+
+ for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
+ flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
+ cond_resched();
+ }
return __add_pages(nid, start_pfn, nr_pages, restrictions);
}
@@ -131,13 +138,18 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
+ unsigned long i;
int ret;
__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
- flush_dcache_range(start, start + size);
+ for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
+ flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
+ cond_resched();
+ }
+
ret = remove_section_mapping(start, start + size);
WARN_ON_ONCE(ret);
--
2.21.0
From: Alastair D'Silva <[email protected]>
The 'extern' keyword does not value-add for function prototypes.
Signed-off-by: Alastair D'Silva <[email protected]>
---
arch/powerpc/include/asm/cache.h | 8 ++++----
arch/powerpc/include/asm/cacheflush.h | 6 +++---
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 728f154204db..c5c096e968e0 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -102,10 +102,10 @@ static inline u32 l1_icache_bytes(void)
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
#ifdef CONFIG_PPC_BOOK3S_32
-extern long _get_L2CR(void);
-extern long _get_L3CR(void);
-extern void _set_L2CR(unsigned long);
-extern void _set_L3CR(unsigned long);
+long _get_L2CR(void);
+long _get_L3CR(void);
+void _set_L2CR(unsigned long val);
+void _set_L3CR(unsigned long val);
#else
#define _get_L2CR() 0L
#define _get_L3CR() 0L
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 4c3377aff8ed..1826bf2cc137 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -38,15 +38,15 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) { }
#endif
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *page);
+void flush_dcache_page(struct page *page);
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
void flush_icache_range(unsigned long start, unsigned long stop);
-extern void flush_icache_user_range(struct vm_area_struct *vma,
+void flush_icache_user_range(struct vm_area_struct *vma,
struct page *page, unsigned long addr,
int len);
-extern void flush_dcache_icache_page(struct page *page);
+void flush_dcache_icache_page(struct page *page);
/**
* flush_dcache_range(): Write any modified data cache blocks out to memory and invalidate them.
--
2.21.0
On Thu, Aug 15, 2019 at 02:10:49PM +1000, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> When presented with large amounts of memory being hotplugged
> (in my test case, ~890GB), the call to flush_dcache_range takes
> a while (~50 seconds), triggering RCU stalls.
>
> This patch breaks up the call into 16GB chunks, calling
> cond_resched() inbetween to allow the scheduler to run.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> arch/powerpc/mm/mem.c | 16 ++++++++++++++--
> 1 file changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 5400da87a804..fb0d5e9aa11b 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
> return -ENODEV;
> }
>
> +#define FLUSH_CHUNK_SIZE (16ull * 1024ull * 1024ull * 1024ull)
IMHO this begs for adding SZ_16G to include/linux/sizes.h and using it here
> +
> int __ref arch_add_memory(int nid, u64 start, u64 size,
> struct mhp_restrictions *restrictions)
> {
> unsigned long start_pfn = start >> PAGE_SHIFT;
> unsigned long nr_pages = size >> PAGE_SHIFT;
> + unsigned long i;
> int rc;
>
> resize_hpt_for_hotplug(memblock_phys_mem_size());
> @@ -120,7 +123,11 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
> start, start + size, rc);
> return -EFAULT;
> }
> - flush_dcache_range(start, start + size);
> +
> + for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> + flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
> + cond_resched();
> + }
>
> return __add_pages(nid, start_pfn, nr_pages, restrictions);
> }
> @@ -131,13 +138,18 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
> unsigned long start_pfn = start >> PAGE_SHIFT;
> unsigned long nr_pages = size >> PAGE_SHIFT;
> struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
> + unsigned long i;
> int ret;
>
> __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
>
> /* Remove htab bolted mappings for this section of memory */
> start = (unsigned long)__va(start);
> - flush_dcache_range(start, start + size);
> + for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> + flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
> + cond_resched();
> + }
> +
> ret = remove_section_mapping(start, start + size);
> WARN_ON_ONCE(ret);
>
> --
> 2.21.0
>
--
Sincerely yours,
Mike.
Le 15/08/2019 à 06:10, Alastair D'Silva a écrit :
> From: Alastair D'Silva <[email protected]>
>
> When presented with large amounts of memory being hotplugged
> (in my test case, ~890GB), the call to flush_dcache_range takes
> a while (~50 seconds), triggering RCU stalls.
>
> This patch breaks up the call into 16GB chunks, calling
> cond_resched() inbetween to allow the scheduler to run.
Is 16GB small enough ? If 890GB takes 50s, 16GB still takes about 1s.
I'd use 1GB chuncks to remain below 100ms.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> arch/powerpc/mm/mem.c | 16 ++++++++++++++--
> 1 file changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 5400da87a804..fb0d5e9aa11b 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
> return -ENODEV;
> }
>
> +#define FLUSH_CHUNK_SIZE (16ull * 1024ull * 1024ull * 1024ull)
Can we use SZ_16GB ?
> +
> int __ref arch_add_memory(int nid, u64 start, u64 size,
> struct mhp_restrictions *restrictions)
> {
> unsigned long start_pfn = start >> PAGE_SHIFT;
> unsigned long nr_pages = size >> PAGE_SHIFT;
> + unsigned long i;
> int rc;
>
> resize_hpt_for_hotplug(memblock_phys_mem_size());
> @@ -120,7 +123,11 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
> start, start + size, rc);
> return -EFAULT;
> }
> - flush_dcache_range(start, start + size);
> +
> + for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> + flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
Isn't the line a bit long (I have not checked).
> + cond_resched();
> + }
>
> return __add_pages(nid, start_pfn, nr_pages, restrictions);
> }
> @@ -131,13 +138,18 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
> unsigned long start_pfn = start >> PAGE_SHIFT;
> unsigned long nr_pages = size >> PAGE_SHIFT;
> struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
> + unsigned long i;
> int ret;
>
> __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
>
> /* Remove htab bolted mappings for this section of memory */
> start = (unsigned long)__va(start);
> - flush_dcache_range(start, start + size);
> + for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> + flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
> + cond_resched();
> + }
> +
> ret = remove_section_mapping(start, start + size);
> WARN_ON_ONCE(ret);
>
>
Christophe
---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel antivirus Avast.
https://www.avast.com/antivirus
On Thu, 2019-08-15 at 09:36 +0200, christophe leroy wrote:
>
> Le 15/08/2019 à 06:10, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <[email protected]>
> >
> > When presented with large amounts of memory being hotplugged
> > (in my test case, ~890GB), the call to flush_dcache_range takes
> > a while (~50 seconds), triggering RCU stalls.
> >
> > This patch breaks up the call into 16GB chunks, calling
> > cond_resched() inbetween to allow the scheduler to run.
>
> Is 16GB small enough ? If 890GB takes 50s, 16GB still takes about 1s.
> I'd use 1GB chuncks to remain below 100ms.
>
> > Signed-off-by: Alastair D'Silva <[email protected]>
> > ---
> > arch/powerpc/mm/mem.c | 16 ++++++++++++++--
> > 1 file changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> > index 5400da87a804..fb0d5e9aa11b 100644
> > --- a/arch/powerpc/mm/mem.c
> > +++ b/arch/powerpc/mm/mem.c
> > @@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned
> > long start, unsigned long end)
> > return -ENODEV;
> > }
> >
> > +#define FLUSH_CHUNK_SIZE (16ull * 1024ull * 1024ull * 1024ull)
>
> Can we use SZ_16GB ?
Sure, I'll go with 1GB as you recommended above
> > +
> > int __ref arch_add_memory(int nid, u64 start, u64 size,
> > struct mhp_restrictions *restrictions)
> > {
> > unsigned long start_pfn = start >> PAGE_SHIFT;
> > unsigned long nr_pages = size >> PAGE_SHIFT;
> > + unsigned long i;
> > int rc;
> >
> > resize_hpt_for_hotplug(memblock_phys_mem_size());
> > @@ -120,7 +123,11 @@ int __ref arch_add_memory(int nid, u64 start,
> > u64 size,
> > start, start + size, rc);
> > return -EFAULT;
> > }
> > - flush_dcache_range(start, start + size);
> > +
> > + for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> > + flush_dcache_range(start + i, min(start + size, start +
> > i + FLUSH_CHUNK_SIZE));
>
> Isn't the line a bit long (I have not checked).
>
> > + cond_resched();
> > + }
> >
> > return __add_pages(nid, start_pfn, nr_pages, restrictions);
> > }
> > @@ -131,13 +138,18 @@ void __ref arch_remove_memory(int nid, u64
> > start, u64 size,
> > unsigned long start_pfn = start >> PAGE_SHIFT;
> > unsigned long nr_pages = size >> PAGE_SHIFT;
> > struct page *page = pfn_to_page(start_pfn) +
> > vmem_altmap_offset(altmap);
> > + unsigned long i;
> > int ret;
> >
> > __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
> >
> > /* Remove htab bolted mappings for this section of memory */
> > start = (unsigned long)__va(start);
> > - flush_dcache_range(start, start + size);
> > + for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> > + flush_dcache_range(start + i, min(start + size, start +
> > i + FLUSH_CHUNK_SIZE));
> > + cond_resched();
> > + }
> > +
> > ret = remove_section_mapping(start, start + size);
> > WARN_ON_ONCE(ret);
> >
> >
>
> Christophe
>
> ---
> L'absence de virus dans ce courrier électronique a été vérifiée par
> le logiciel antivirus Avast.
> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.avast.com_antivirus&d=DwIDaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=cT4tgeEQ0Ll3SIlZDHE5AEXyKy6uKADMtf9_Eb7-vec&m=TBT2NNM2DXqDWHhSb_WdFPcfAjYk9hP2cvGksF001cQ&s=XURKAOQQ4h3_RhJlezSguD2kpSitAF-uBhQqVZLU4GU&e=
>
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819