2019-08-13 10:58:10

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro

Today LOAD_REG_IMMEDIATE() is a basic #define which loads all
parts on a value into a register, including the parts that are NUL.

This means always 2 instructions on PPC32 and always 5 instructions
on PPC64. And those instructions cannot run in parallele as they are
updating the same register.

Ex: LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) in head_64.S results in:

3c 20 00 00 lis r1,0
60 21 00 00 ori r1,r1,0
78 21 07 c6 rldicr r1,r1,32,31
64 21 00 00 oris r1,r1,0
60 21 40 00 ori r1,r1,16384

Rewrite LOAD_REG_IMMEDIATE() with GAS macro in order to skip
the parts that are NUL.

Rename existing LOAD_REG_IMMEDIATE() as LOAD_REG_IMMEDIATE_SYM()
and use that one for loading value of symbols which are not known
at compile time.

Now LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) in head_64.S results in:

38 20 40 00 li r1,16384

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/ppc_asm.h | 42 +++++++++++++++++++++++++++++++-----
arch/powerpc/kernel/exceptions-64e.S | 10 ++++-----
arch/powerpc/kernel/head_64.S | 2 +-
3 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index e0637730a8e7..9a7c2ca9b714 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -311,13 +311,43 @@ GLUE(.,name):
addis reg,reg,(name - 0b)@ha; \
addi reg,reg,(name - 0b)@l;

-#ifdef __powerpc64__
-#ifdef HAVE_AS_ATHIGH
+#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH)
#define __AS_ATHIGH high
#else
#define __AS_ATHIGH h
#endif
-#define LOAD_REG_IMMEDIATE(reg,expr) \
+
+.macro __LOAD_REG_IMMEDIATE_32 r, x
+ .if (\x) >= 0x8000 || (\x) < -0x8000
+ lis \r, (\x)@__AS_ATHIGH
+ .if (\x) & 0xffff != 0
+ ori \r, \r, (\x)@l
+ .endif
+ .else
+ li \r, (\x)@l
+ .endif
+.endm
+
+.macro __LOAD_REG_IMMEDIATE r, x
+ .if \x & ~0xffffffff != 0
+ __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
+ rldicr \r, \r, 32, 31
+ .if (\x) & 0xffff0000 != 0
+ oris \r, \r, (\x)@__AS_ATHIGH
+ .endif
+ .if (\x) & 0xffff != 0
+ oris \r, \r, (\x)@l
+ .endif
+ .else
+ __LOAD_REG_IMMEDIATE_32 \r, \x
+ .endif
+.endm
+
+#ifdef __powerpc64__
+
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \
lis reg,(expr)@highest; \
ori reg,reg,(expr)@higher; \
rldicr reg,reg,32,31; \
@@ -335,11 +365,13 @@ GLUE(.,name):

#else /* 32-bit */

-#define LOAD_REG_IMMEDIATE(reg,expr) \
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \
lis reg,(expr)@ha; \
addi reg,reg,(expr)@l;

-#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE(reg, name)
+#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE_SYM(reg, name)

#define LOAD_REG_ADDRBASE(reg, name) lis reg,name@ha
#define ADDROFF(name) name@l
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 1cfb3da4a84a..898aae6da167 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -751,8 +751,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+ LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e)
+ LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts)
#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
@@ -821,8 +821,8 @@ kernel_dbg_exc:
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+ LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e)
+ LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts)
#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
@@ -1449,7 +1449,7 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:

/* Now we branch the new virtual address mapped by this entry */
- LOAD_REG_IMMEDIATE(r3,1f)
+ LOAD_REG_IMMEDIATE_SYM(r3,1f)
mtctr r3
bctr

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 91d297e696dd..1fd44761e997 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -635,7 +635,7 @@ __after_prom_start:
sub r5,r5,r11
#else
/* just copy interrupts */
- LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+ LOAD_REG_IMMEDIATE_SYM(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
#endif
b 5f
3:
--
2.13.3


2019-08-13 11:17:15

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH 2/2] powerpc/32: replace LOAD_MSR_KERNEL() by LOAD_REG_IMMEDIATE()

LOAD_MSR_KERNEL() and LOAD_REG_IMMEDIATE() are doing the same thing
in the same way. Drop LOAD_MSR_KERNEL()

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/kernel/entry_32.S | 18 +++++++++---------
arch/powerpc/kernel/head_32.h | 21 ++++-----------------
2 files changed, 13 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 54fab22c9a43..972b05504a0a 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -230,7 +230,7 @@ transfer_to_handler_cont:
*/
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
- LOAD_MSR_KERNEL(r0, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r0
SYNC
@@ -304,7 +304,7 @@ stack_ovf:
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
@@ -324,7 +324,7 @@ trace_syscall_entry_irq_off:
bl trace_hardirqs_on

/* Now enable for real */
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
mtmsr r10

REST_GPR(0, r1)
@@ -394,7 +394,7 @@ ret_from_syscall:
#endif
mr r6,r3
/* disable interrupts so current_thread_info()->flags can't change */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
@@ -824,7 +824,7 @@ ret_from_except:
* can't change between when we test it and when we return
* from the interrupt. */
/* Note: We don't bother telling lockdep about it */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */

@@ -991,7 +991,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
* can restart the exception exit path at the label
* exc_exit_restart below. -- paulus
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
SYNC
MTMSRD(r10) /* clear the RI bit */
.globl exc_exit_restart
@@ -1066,7 +1066,7 @@ exc_exit_restart_end:
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
- LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
bne user_exc_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
@@ -1236,7 +1236,7 @@ recheck:
* neither. Those disable/enable cycles used to peek at
* TI_FLAGS aren't advertised.
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
lwz r9,TI_FLAGS(r2)
@@ -1329,7 +1329,7 @@ _GLOBAL(enter_rtas)
lwz r4,RTASBASE(r4)
mfmsr r9
stw r9,8(r1)
- LOAD_MSR_KERNEL(r0,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
SYNC /* disable interrupts so SRR0/1 */
MTMSRD(r0) /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4a692553651f..8abc7783dbe5 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -5,19 +5,6 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */

/*
- * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
- */
-.macro __LOAD_MSR_KERNEL r, x
-.if \x >= 0x8000
- lis \r, (\x)@h
- ori \r, \r, (\x)@l
-.else
- li \r, (\x)
-.endif
-.endm
-#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
-
-/*
* Exception entry code. This code runs with address translation
* turned off, i.e. using physical addresses.
* We assume sprg3 has the physical address of the current
@@ -92,7 +79,7 @@
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
MTMSRD(r10) /* (except for mach check in rtas) */
#endif
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -140,10 +127,10 @@
* otherwise we might risk taking an interrupt before we tell lockdep
* they are enabled.
*/
- LOAD_MSR_KERNEL(r10, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
rlwimi r10, r9, 0, MSR_EE
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
#endif
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@@ -187,7 +174,7 @@
#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
- LOAD_MSR_KERNEL(r10, msr); \
+ LOAD_REG_IMMEDIATE(r10, msr); \
bl tfer; \
.long hdlr; \
.long ret
--
2.13.3

2019-08-14 02:11:57

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro

On Tue, Aug 13, 2019 at 09:59:35AM +0000, Christophe Leroy wrote:

[snip]

> +.macro __LOAD_REG_IMMEDIATE r, x
> + .if \x & ~0xffffffff != 0
> + __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
> + rldicr \r, \r, 32, 31
> + .if (\x) & 0xffff0000 != 0
> + oris \r, \r, (\x)@__AS_ATHIGH
> + .endif
> + .if (\x) & 0xffff != 0
> + oris \r, \r, (\x)@l
> + .endif
> + .else
> + __LOAD_REG_IMMEDIATE_32 \r, \x
> + .endif
> +.endm

Doesn't this force all negative constants, even small ones, to use
the long sequence? For example, __LOAD_REG_IMMEDIATE r3, -1 will
generate (as far as I can see):

li r3, -1
rldicr r3, r3, 32, 31
oris r3, r3, 0xffff
ori r3, r3, 0xffff

which seems suboptimal.

Paul.

2019-08-14 09:48:22

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro



Le 14/08/2019 à 04:08, Paul Mackerras a écrit :
> On Tue, Aug 13, 2019 at 09:59:35AM +0000, Christophe Leroy wrote:
>
> [snip]
>
>> +.macro __LOAD_REG_IMMEDIATE r, x
>> + .if \x & ~0xffffffff != 0
>> + __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
>> + rldicr \r, \r, 32, 31
>> + .if (\x) & 0xffff0000 != 0
>> + oris \r, \r, (\x)@__AS_ATHIGH
>> + .endif
>> + .if (\x) & 0xffff != 0
>> + oris \r, \r, (\x)@l
>> + .endif
>> + .else
>> + __LOAD_REG_IMMEDIATE_32 \r, \x
>> + .endif
>> +.endm
>
> Doesn't this force all negative constants, even small ones, to use
> the long sequence? For example, __LOAD_REG_IMMEDIATE r3, -1 will
> generate (as far as I can see):
>
> li r3, -1
> rldicr r3, r3, 32, 31
> oris r3, r3, 0xffff
> ori r3, r3, 0xffff
>
> which seems suboptimal.

Ah yes, thanks. And it is also buggy when \x is over 0x80000000 because
lis is a signed ops

I'll send v2

Christophe

2019-08-14 20:13:45

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro

Hi Christophe,

On Tue, Aug 13, 2019 at 09:59:35AM +0000, Christophe Leroy wrote:
> + rldicr \r, \r, 32, 31

Could you please write this as
sldi \r, \r, 32
? It's much easier to read, imo (it's the exact same instruction).

You can do a lot cheaper sequences if you have a temporary reg, as well
(longest path of 3 insns instead of 5):
lis rt,A
ori rt,B
lis rd,C
ori rd,D
rldimi rd,rt,32,0
to load ABCD.


Segher