2018-01-09 06:58:02

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions

Instead of just telling GCC that dcbz(), dcbi(), dcbf() and dcbst()
clobber memory, tell it what it clobbers:
* dcbz(), dcbi() and dcbf() clobbers one cacheline as output
* dcbf() and dcbst() clobbers one cacheline as input

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/cache.h | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index c1d257aa4c2d..fc8fe18acf8c 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -82,22 +82,31 @@ extern void _set_L3CR(unsigned long);

static inline void dcbz(void *addr)
{
- __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory");
+ __asm__ __volatile__ ("dcbz 0, %1" :
+ "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
+ "r"(addr) :);
}

static inline void dcbi(void *addr)
{
- __asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory");
+ __asm__ __volatile__ ("dcbi 0, %1" :
+ "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
+ "r"(addr) :);
}

static inline void dcbf(void *addr)
{
- __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory");
+ __asm__ __volatile__ ("dcbf 0, %1" :
+ "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
+ "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) :
+ );
}

static inline void dcbst(void *addr)
{
- __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory");
+ __asm__ __volatile__ ("dcbst 0, %0" : :
+ "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) :
+ );
}
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
--
2.13.3


2019-05-03 14:32:22

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions

Segher,

A while ago I proposed the following patch, and didn't get any comment
back on it.

Do you have any opinion on it ? Is it good and worth it ?

Thanks
Christophe

Le 09/01/2018 à 07:57, Christophe Leroy a écrit :
> Instead of just telling GCC that dcbz(), dcbi(), dcbf() and dcbst()
> clobber memory, tell it what it clobbers:
> * dcbz(), dcbi() and dcbf() clobbers one cacheline as output
> * dcbf() and dcbst() clobbers one cacheline as input
>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> arch/powerpc/include/asm/cache.h | 17 +++++++++++++----
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
> index c1d257aa4c2d..fc8fe18acf8c 100644
> --- a/arch/powerpc/include/asm/cache.h
> +++ b/arch/powerpc/include/asm/cache.h
> @@ -82,22 +82,31 @@ extern void _set_L3CR(unsigned long);
>
> static inline void dcbz(void *addr)
> {
> - __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory");
> + __asm__ __volatile__ ("dcbz 0, %1" :
> + "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
> + "r"(addr) :);
> }
>
> static inline void dcbi(void *addr)
> {
> - __asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory");
> + __asm__ __volatile__ ("dcbi 0, %1" :
> + "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
> + "r"(addr) :);
> }
>
> static inline void dcbf(void *addr)
> {
> - __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory");
> + __asm__ __volatile__ ("dcbf 0, %1" :
> + "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
> + "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) :
> + );
> }
>
> static inline void dcbst(void *addr)
> {
> - __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory");
> + __asm__ __volatile__ ("dcbst 0, %0" : :
> + "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) :
> + );
> }
> #endif /* !__ASSEMBLY__ */
> #endif /* __KERNEL__ */
>

2019-05-03 14:34:41

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions

Segher,

A while ago I proposed the following patch, and didn't get any comment
back on it.

Do you have any opinion on it ? Is it good and worth it ?

Thanks
Christophe

Le 09/01/2018 à 07:57, Christophe Leroy a écrit :
> Instead of just telling GCC that dcbz(), dcbi(), dcbf() and dcbst()
> clobber memory, tell it what it clobbers:
> * dcbz(), dcbi() and dcbf() clobbers one cacheline as output
> * dcbf() and dcbst() clobbers one cacheline as input
>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> arch/powerpc/include/asm/cache.h | 17 +++++++++++++----
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/cache.h
> b/arch/powerpc/include/asm/cache.h
> index c1d257aa4c2d..fc8fe18acf8c 100644
> --- a/arch/powerpc/include/asm/cache.h
> +++ b/arch/powerpc/include/asm/cache.h
> @@ -82,22 +82,31 @@ extern void _set_L3CR(unsigned long);
> static inline void dcbz(void *addr)
> {
> - __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory");
> + __asm__ __volatile__ ("dcbz 0, %1" :
> + "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
> + "r"(addr) :);
> }
> static inline void dcbi(void *addr)
> {
> - __asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory");
> + __asm__ __volatile__ ("dcbi 0, %1" :
> + "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
> + "r"(addr) :);
> }
> static inline void dcbf(void *addr)
> {
> - __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory");
> + __asm__ __volatile__ ("dcbf 0, %1" :
> + "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
> + "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) :
> + );
> }
> static inline void dcbst(void *addr)
> {
> - __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory");
> + __asm__ __volatile__ ("dcbst 0, %0" : :
> + "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) :
> + );
> }
> #endif /* !__ASSEMBLY__ */
> #endif /* __KERNEL__ */
>

2019-05-03 18:19:40

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions

Hi Christophe,

On Fri, May 03, 2019 at 04:14:13PM +0200, Christophe Leroy wrote:
> A while ago I proposed the following patch, and didn't get any comment
> back on it.

I didn't see it. Maybe because of holiday :-)

> Do you have any opinion on it ? Is it good and worth it ?

> Le 09/01/2018 ? 07:57, Christophe Leroy a ?crit?:
> >Instead of just telling GCC that dcbz(), dcbi(), dcbf() and dcbst()
> >clobber memory, tell it what it clobbers:
> >* dcbz(), dcbi() and dcbf() clobbers one cacheline as output
> >* dcbf() and dcbst() clobbers one cacheline as input

You cannot "clobber input".

Seen another way, only dcbi clobbers anything; dcbz zeroes it instead,
and dcbf and dcbst only change in what caches the data hangs out.

> >--- a/arch/powerpc/include/asm/cache.h
> >+++ b/arch/powerpc/include/asm/cache.h
> >@@ -82,22 +82,31 @@ extern void _set_L3CR(unsigned long);
> >
> > static inline void dcbz(void *addr)
> > {
> >- __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory");
> >+ __asm__ __volatile__ ("dcbz 0, %1" :
> >+ "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
> >+ "r"(addr) :);
> > }

The instruction does *not* work on the memory pointed to by addr. It
works on the cache line containing the address addr.

If you want to have addr always aligned, you need to document this, and
check all callers, etc.

> > static inline void dcbf(void *addr)
> > {
> >- __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory");
> >+ __asm__ __volatile__ ("dcbf 0, %1" :
> >+ "=m"(*(char (*)[L1_CACHE_BYTES])addr) :
> >+ "r"(addr), "m"(*(char
> >(*)[L1_CACHE_BYTES])addr) :
> >+ );
> > }

Newline damage... Was that your mailer?


Also, you may want a "memory" clobber anyway, to get ordering correct
for the synchronisation instructions.

I think your changes make things less robust than they were before.


[ Btw. Instead of

__asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory");

you can do

__asm__ __volatile__ ("dcbf %0" : : "Z"(addr) : "memory");

to save some insns here and there. ]


Segher

2019-05-06 16:33:19

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions

Hi Segher,


On 05/03/2019 06:15 PM, Segher Boessenkool wrote:
> Hi Christophe,
>
> On Fri, May 03, 2019 at 04:14:13PM +0200, Christophe Leroy wrote:
>> A while ago I proposed the following patch, and didn't get any comment
>> back on it.
>
> I didn't see it. Maybe because of holiday :-)

Thanks for this answer, I guess I'll drop it for the time being.

However, I've tried your suggestion below and get unnexpected result.

[...]

>
>
> [ Btw. Instead of
>
> __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory");
>
> you can do
>
> __asm__ __volatile__ ("dcbf %0" : : "Z"(addr) : "memory");
>
> to save some insns here and there. ]

Tried that change on dcbz() and checked function clear_page()

static inline void clear_page(void *addr)
{
unsigned int i;

for (i = 0; i < PAGE_SIZE / L1_CACHE_BYTES; i++, addr += L1_CACHE_BYTES)
dcbz(addr);
}

void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
{
clear_page(page);

/*
* We shouldn't have to do this, but some versions of glibc
* require it (ld.so assumes zero filled pages are icache clean)
* - Anton
*/
flush_dcache_page(pg);
}
EXPORT_SYMBOL(clear_user_page);


Before the change,

clear_user_page:
mflr 0
stw 0,4(1)
bl _mcount
stwu 1,-16(1)
li 9,128
mflr 0
mtctr 9
stw 0,20(1)
.L46:
#APP
# 88 "./arch/powerpc/include/asm/cache.h" 1
dcbz 0, 3
# 0 "" 2
#NO_APP
addi 3,3,32
bdnz .L46
lwz 0,20(1)
mr 3,5
mtlr 0
addi 1,1,16
b flush_dcache_page


After the change


clear_user_page:
mflr 0
stw 0,4(1)
bl _mcount
stwu 1,-32(1)
li 9,128
mflr 0
mtctr 9
stw 0,36(1)
.L46:
stw 3,8(1)
addi 9,1,8
#APP
# 88 "./arch/powerpc/include/asm/cache.h" 1
dcbz 0(9)
# 0 "" 2
#NO_APP
addi 3,3,32
bdnz .L46
mr 3,5
bl flush_dcache_page
lwz 0,36(1)
addi 1,1,32
mtlr 0
blr


So first of all it uses an unexisting form of dcbz : "dcbz 0(9)"
And in addition, it stores r3 somewhere and I guess expects to read it
with dcbz ???

Looks like 'Z' is not the right constraint to use.

Christophe

2019-05-06 16:55:01

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions

Hi!

On Mon, May 06, 2019 at 04:31:38PM +0000, Christophe Leroy wrote:
> However, I've tried your suggestion below and get unnexpected result.

> >you can do
> >
> > __asm__ __volatile__ ("dcbf %0" : : "Z"(addr) : "memory");
> >
> >to save some insns here and there. ]

This should be "dcbf %y0". Sorry. And not addr -- it needs a mem there,
so deref addr as usual.


Segher