2018-04-17 17:10:53

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 1/2] powerpc: avoid an unnecessary test and branch in longjmp()

Doing the test at exit of the function avoids an unnecessary
test and branch inside longjmp()

Signed-off-by: Christophe Leroy <[email protected]>
---
v2: Swapped both patches in the serie to reduce number of impacted lines

arch/powerpc/kernel/misc.S | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index e1f3a5d054c4..746ee0320ad4 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -74,10 +74,7 @@ _GLOBAL(setjmp)
blr

_GLOBAL(longjmp)
- PPC_LCMPI r4,0
- bne 1f
- li r4,1
-1: PPC_LL r13,4*SZL(r3)
+ PPC_LL r13,4*SZL(r3)
PPC_LL r14,5*SZL(r3)
PPC_LL r15,6*SZL(r3)
PPC_LL r16,7*SZL(r3)
@@ -102,7 +99,9 @@ _GLOBAL(longjmp)
PPC_LL r1,SZL(r3)
PPC_LL r2,2*SZL(r3)
mtlr r0
- mr r3,r4
+ mr. r3, r4
+ bnelr
+ li r3, 1
blr

_GLOBAL(current_stack_pointer)
--
2.13.3



2018-04-17 17:10:50

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 2/2] powerpc/32be: use stmw/lmw for registers save/restore in asm

arch/powerpc/Makefile activates -mmultiple on BE PPC32 configs
in order to use multiple word instructions in functions entry/exit

The patch does the same for the asm parts, for consistency

On processors like the 8xx on which insn fetching is pretty slow,
this speeds up registers save/restore

Signed-off-by: Christophe Leroy <[email protected]>
---
v2: Swapped both patches in the serie to reduce number of impacted
lines and added the same modification in ppc_save_regs()

arch/powerpc/include/asm/ppc_asm.h | 5 +++++
arch/powerpc/kernel/misc.S | 10 ++++++++++
arch/powerpc/kernel/ppc_save_regs.S | 4 ++++
3 files changed, 19 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 13f7f4c0e1ea..4bb765d0b758 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -80,11 +80,16 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#else
#define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base)
#define REST_GPR(n, base) lwz n,GPR0+4*(n)(base)
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define SAVE_NVGPRS(base) stmw 13, GPR0+4*13(base)
+#define REST_NVGPRS(base) lmw 13, GPR0+4*13(base)
+#else
#define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); \
SAVE_10GPRS(22, base)
#define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); \
REST_10GPRS(22, base)
#endif
+#endif

#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 746ee0320ad4..a316d90a5c26 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -49,6 +49,10 @@ _GLOBAL(setjmp)
PPC_STL r0,0(r3)
PPC_STL r1,SZL(r3)
PPC_STL r2,2*SZL(r3)
+#if defined(CONFIG_PPC32) && defined(CONFIG_CPU_BIG_ENDIAN)
+ mfcr r12
+ stmw r12, 3*SZL(r3)
+#else
mfcr r0
PPC_STL r0,3*SZL(r3)
PPC_STL r13,4*SZL(r3)
@@ -70,10 +74,15 @@ _GLOBAL(setjmp)
PPC_STL r29,20*SZL(r3)
PPC_STL r30,21*SZL(r3)
PPC_STL r31,22*SZL(r3)
+#endif
li r3,0
blr

_GLOBAL(longjmp)
+#if defined(CONFIG_PPC32) && defined(CONFIG_CPU_BIG_ENDIAN)
+ lmw r12, 3*SZL(r3)
+ mtcrf 0x38, r12
+#else
PPC_LL r13,4*SZL(r3)
PPC_LL r14,5*SZL(r3)
PPC_LL r15,6*SZL(r3)
@@ -95,6 +104,7 @@ _GLOBAL(longjmp)
PPC_LL r31,22*SZL(r3)
PPC_LL r0,3*SZL(r3)
mtcrf 0x38,r0
+#endif
PPC_LL r0,0(r3)
PPC_LL r1,SZL(r3)
PPC_LL r2,2*SZL(r3)
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
index 1b1787d52896..d60316e70514 100644
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -25,6 +25,9 @@
*/
_GLOBAL(ppc_save_regs)
PPC_STL r0,0*SZL(r3)
+#if defined(CONFIG_PPC32) && defined(CONFIG_CPU_BIG_ENDIAN)
+ stmw r2, 2*SZL(r3)
+#else
PPC_STL r2,2*SZL(r3)
PPC_STL r3,3*SZL(r3)
PPC_STL r4,4*SZL(r3)
@@ -55,6 +58,7 @@ _GLOBAL(ppc_save_regs)
PPC_STL r29,29*SZL(r3)
PPC_STL r30,30*SZL(r3)
PPC_STL r31,31*SZL(r3)
+#endif
/* go up one stack frame for SP */
PPC_LL r4,0(r1)
PPC_STL r4,1*SZL(r3)
--
2.13.3


2018-05-17 12:11:44

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] powerpc/32be: use stmw/lmw for registers save/restore in asm

Christophe Leroy <[email protected]> writes:
> arch/powerpc/Makefile activates -mmultiple on BE PPC32 configs
> in order to use multiple word instructions in functions entry/exit

True, though that could be a lot simpler because the MULTIPLEWORD value
is only used for PPC32, which is always big endian. I'll send a patch
for that.

> The patch does the same for the asm parts, for consistency
>
> On processors like the 8xx on which insn fetching is pretty slow,
> this speeds up registers save/restore

OK. I've always heard that they should be avoided, but that's coming
from 64-bit land.

I guess we've been enabling this for all 32-bit targets for ever so it
must be a reasonable option.

> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> v2: Swapped both patches in the serie to reduce number of impacted
> lines and added the same modification in ppc_save_regs()
>
> arch/powerpc/include/asm/ppc_asm.h | 5 +++++
> arch/powerpc/kernel/misc.S | 10 ++++++++++
> arch/powerpc/kernel/ppc_save_regs.S | 4 ++++
> 3 files changed, 19 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
> index 13f7f4c0e1ea..4bb765d0b758 100644
> --- a/arch/powerpc/include/asm/ppc_asm.h
> +++ b/arch/powerpc/include/asm/ppc_asm.h
> @@ -80,11 +80,16 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
> #else
> #define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base)
> #define REST_GPR(n, base) lwz n,GPR0+4*(n)(base)
> +#ifdef CONFIG_CPU_BIG_ENDIAN
> +#define SAVE_NVGPRS(base) stmw 13, GPR0+4*13(base)
> +#define REST_NVGPRS(base) lmw 13, GPR0+4*13(base)
> +#else
> #define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); \
> SAVE_10GPRS(22, base)
> #define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); \
> REST_10GPRS(22, base)

There is no 32-bit little endian, so this is basically dead code now.

Maybe there'll be a 32-bit LE port one day, but if so we can put the
code back then.

So I'll just drop the else case.

> #endif
> +#endif
>
> #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
> #define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
> diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
> index 746ee0320ad4..a316d90a5c26 100644
> --- a/arch/powerpc/kernel/misc.S
> +++ b/arch/powerpc/kernel/misc.S
> @@ -49,6 +49,10 @@ _GLOBAL(setjmp)
> PPC_STL r0,0(r3)
> PPC_STL r1,SZL(r3)
> PPC_STL r2,2*SZL(r3)
> +#if defined(CONFIG_PPC32) && defined(CONFIG_CPU_BIG_ENDIAN)

And this could just be:

#ifdef CONFIG_PPC32

> + mfcr r12
> + stmw r12, 3*SZL(r3)
> +#else
> mfcr r0
> PPC_STL r0,3*SZL(r3)
> PPC_STL r13,4*SZL(r3)
> @@ -70,10 +74,15 @@ _GLOBAL(setjmp)
> PPC_STL r29,20*SZL(r3)
> PPC_STL r30,21*SZL(r3)
> PPC_STL r31,22*SZL(r3)
> +#endif

It's a pity to end up with this basically split in half by ifdefs for
32/64-bit, but maybe we can clean that up later.

cheers

2018-05-17 13:17:43

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] powerpc/32be: use stmw/lmw for registers save/restore in asm

On Thu, May 17, 2018 at 10:10:21PM +1000, Michael Ellerman wrote:
> Christophe Leroy <[email protected]> writes:
> > arch/powerpc/Makefile activates -mmultiple on BE PPC32 configs
> > in order to use multiple word instructions in functions entry/exit
>
> True, though that could be a lot simpler because the MULTIPLEWORD value
> is only used for PPC32, which is always big endian. I'll send a patch
> for that.

Do you mean in the kernel? Many 32-bit processors can do LE, and many
do not implement multiple or string insns in LE mode.

> > The patch does the same for the asm parts, for consistency
> >
> > On processors like the 8xx on which insn fetching is pretty slow,
> > this speeds up registers save/restore
>
> OK. I've always heard that they should be avoided, but that's coming
> from 64-bit land.
>
> I guess we've been enabling this for all 32-bit targets for ever so it
> must be a reasonable option.

On 603, load multiple (and string) are one cycle slower than doing all the
loads separately, and store is essentially the same as separate stores.
On 7xx and 7xxx both loads and stores are one cycle slower as multiple
than as separate insns.

load/store multiple are nice for saving/storing registers.


Segher

2018-05-17 13:29:47

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] powerpc/32be: use stmw/lmw for registers save/restore in asm



Le 17/05/2018 à 15:15, Segher Boessenkool a écrit :
> On Thu, May 17, 2018 at 10:10:21PM +1000, Michael Ellerman wrote:
>> Christophe Leroy <[email protected]> writes:
>>> arch/powerpc/Makefile activates -mmultiple on BE PPC32 configs
>>> in order to use multiple word instructions in functions entry/exit
>>
>> True, though that could be a lot simpler because the MULTIPLEWORD value
>> is only used for PPC32, which is always big endian. I'll send a patch
>> for that.
>
> Do you mean in the kernel? Many 32-bit processors can do LE, and many
> do not implement multiple or string insns in LE mode.
>
>>> The patch does the same for the asm parts, for consistency
>>>
>>> On processors like the 8xx on which insn fetching is pretty slow,
>>> this speeds up registers save/restore
>>
>> OK. I've always heard that they should be avoided, but that's coming
>> from 64-bit land.
>>
>> I guess we've been enabling this for all 32-bit targets for ever so it
>> must be a reasonable option.
>
> On 603, load multiple (and string) are one cycle slower than doing all the
> loads separately, and store is essentially the same as separate stores.
> On 7xx and 7xxx both loads and stores are one cycle slower as multiple
> than as separate insns.

That's in theory when the instructions are already in the cache.

But loading several instructions into the cache takes time.

Christophe

>
> load/store multiple are nice for saving/storing registers.
>
>
> Segher
>

2018-05-17 13:41:39

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] powerpc/32be: use stmw/lmw for registers save/restore in asm

On Thu, 2018-05-17 at 22:10 +1000, Michael Ellerman wrote:
> Christophe Leroy <[email protected]> writes:
> > arch/powerpc/Makefile activates -mmultiple on BE PPC32 configs
> > in order to use multiple word instructions in functions entry/exit
>
> True, though that could be a lot simpler because the MULTIPLEWORD value
> is only used for PPC32, which is always big endian. I'll send a patch
> for that.

There have been known cases of 4xx LE ports though none ever made it
upstream ...

> > The patch does the same for the asm parts, for consistency
> >
> > On processors like the 8xx on which insn fetching is pretty slow,
> > this speeds up registers save/restore
>
> OK. I've always heard that they should be avoided, but that's coming
> from 64-bit land.
>
> I guess we've been enabling this for all 32-bit targets for ever so it
> must be a reasonable option.
>
> > Signed-off-by: Christophe Leroy <[email protected]>
> > ---
> > v2: Swapped both patches in the serie to reduce number of impacted
> > lines and added the same modification in ppc_save_regs()
> >
> > arch/powerpc/include/asm/ppc_asm.h | 5 +++++
> > arch/powerpc/kernel/misc.S | 10 ++++++++++
> > arch/powerpc/kernel/ppc_save_regs.S | 4 ++++
> > 3 files changed, 19 insertions(+)
> >
> > diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
> > index 13f7f4c0e1ea..4bb765d0b758 100644
> > --- a/arch/powerpc/include/asm/ppc_asm.h
> > +++ b/arch/powerpc/include/asm/ppc_asm.h
> > @@ -80,11 +80,16 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
> > #else
> > #define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base)
> > #define REST_GPR(n, base) lwz n,GPR0+4*(n)(base)
> > +#ifdef CONFIG_CPU_BIG_ENDIAN
> > +#define SAVE_NVGPRS(base) stmw 13, GPR0+4*13(base)
> > +#define REST_NVGPRS(base) lmw 13, GPR0+4*13(base)
> > +#else
> > #define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); \
> > SAVE_10GPRS(22, base)
> > #define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); \
> > REST_10GPRS(22, base)
>
> There is no 32-bit little endian, so this is basically dead code now.
>
> Maybe there'll be a 32-bit LE port one day, but if so we can put the
> code back then.
>
> So I'll just drop the else case.
>
> > #endif
> > +#endif
> >
> > #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
> > #define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
> > diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
> > index 746ee0320ad4..a316d90a5c26 100644
> > --- a/arch/powerpc/kernel/misc.S
> > +++ b/arch/powerpc/kernel/misc.S
> > @@ -49,6 +49,10 @@ _GLOBAL(setjmp)
> > PPC_STL r0,0(r3)
> > PPC_STL r1,SZL(r3)
> > PPC_STL r2,2*SZL(r3)
> > +#if defined(CONFIG_PPC32) && defined(CONFIG_CPU_BIG_ENDIAN)
>
> And this could just be:
>
> #ifdef CONFIG_PPC32
>
> > + mfcr r12
> > + stmw r12, 3*SZL(r3)
> > +#else
> > mfcr r0
> > PPC_STL r0,3*SZL(r3)
> > PPC_STL r13,4*SZL(r3)
> > @@ -70,10 +74,15 @@ _GLOBAL(setjmp)
> > PPC_STL r29,20*SZL(r3)
> > PPC_STL r30,21*SZL(r3)
> > PPC_STL r31,22*SZL(r3)
> > +#endif
>
> It's a pity to end up with this basically split in half by ifdefs for
> 32/64-bit, but maybe we can clean that up later.
>
> cheers

2018-05-17 14:28:58

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] powerpc/32be: use stmw/lmw for registers save/restore in asm

On Thu, May 17, 2018 at 03:27:37PM +0200, Christophe LEROY wrote:
> Le 17/05/2018 ? 15:15, Segher Boessenkool a ?crit?:
> >>I guess we've been enabling this for all 32-bit targets for ever so it
> >>must be a reasonable option.
> >
> >On 603, load multiple (and string) are one cycle slower than doing all the
> >loads separately, and store is essentially the same as separate stores.
> >On 7xx and 7xxx both loads and stores are one cycle slower as multiple
> >than as separate insns.
>
> That's in theory when the instructions are already in the cache.
>
> But loading several instructions into the cache takes time.

Yes, of course, that's why I wrote:

> >load/store multiple are nice for saving/storing registers.

:-)


Segher

2018-05-21 10:04:52

by Michael Ellerman

[permalink] [raw]
Subject: Re: [v2,1/2] powerpc: avoid an unnecessary test and branch in longjmp()

On Tue, 2018-04-17 at 17:08:16 UTC, Christophe Leroy wrote:
> Doing the test at exit of the function avoids an unnecessary
> test and branch inside longjmp()
>
> Signed-off-by: Christophe Leroy <[email protected]>

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/24c78586cc6798028205e12c34febf

cheers