call_do_irq() and call_do_softirq() are simple enough to be
worth inlining.
Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.
This is inspired from S390 arch. Several other arches do more or
less the same. The way sparc arch does seems odd thought.
Signed-off-by: Christophe Leroy <[email protected]>
Reviewed-by: Segher Boessenkool <[email protected]>
---
v2: no change.
v3: no change.
v4:
- comment reminding the purpose of the inline asm block.
- added r2 as clobbered reg
v5:
- Limiting the change to PPC32 for now.
- removed r2 from the clobbered regs list (on PPC32 r2 points to current all the time)
- Removed patch 1 and merged ksp_limit handling in here.
---
arch/powerpc/include/asm/irq.h | 2 ++
arch/powerpc/kernel/irq.c | 48 ++++++++++++++++++++++++++++++++++++++++++
arch/powerpc/kernel/misc_32.S | 39 ----------------------------------
3 files changed, 50 insertions(+), 39 deletions(-)
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 814dfab7e392..e4a92f0b4ad4 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -56,8 +56,10 @@ extern void *mcheckirq_ctx[NR_CPUS];
extern void *hardirq_ctx[NR_CPUS];
extern void *softirq_ctx[NR_CPUS];
+#ifdef CONFIG_PPC64
void call_do_softirq(void *sp);
void call_do_irq(struct pt_regs *regs, void *sp);
+#endif
extern void do_IRQ(struct pt_regs *regs);
extern void __init init_IRQ(void);
extern void __do_irq(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5645bc9cbc09..240eca12c71d 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -611,6 +611,54 @@ static inline void check_stack_overflow(void)
#endif
}
+#ifdef CONFIG_PPC32
+static inline void call_do_softirq(const void *sp)
+{
+ register unsigned long ret asm("r3");
+ unsigned long limit = current->thread.ksp_limit;
+
+ /* Adjust the stack limit */
+ current->thread.ksp_limit = (unsigned long)sp;
+
+ /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
+ asm volatile(
+ " "PPC_STLU" 1, %2(%1);\n"
+ " mr 1, %1;\n"
+ " bl %3;\n"
+ " "PPC_LL" 1, 0(1);\n" :
+ "=r"(ret) :
+ "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_softirq) :
+ "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
+ "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+
+ /* Restore stack limit */
+ current->thread.ksp_limit = limit;
+}
+
+static inline void call_do_irq(struct pt_regs *regs, void *sp)
+{
+ register unsigned long r3 asm("r3") = (unsigned long)regs;
+ unsigned long limit = current->thread.ksp_limit;
+
+ /* Adjust the stack limit */
+ current->thread.ksp_limit = (unsigned long)sp;
+
+ /* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
+ asm volatile(
+ " "PPC_STLU" 1, %2(%1);\n"
+ " mr 1, %1;\n"
+ " bl %3;\n"
+ " "PPC_LL" 1, 0(1);\n" :
+ "+r"(r3) :
+ "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
+ "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
+ "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+
+ /* Restore stack limit */
+ current->thread.ksp_limit = limit;
+}
+#endif
+
void __do_irq(struct pt_regs *regs)
{
unsigned int irq;
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index d80212be8698..341a3cd199cb 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -28,45 +28,6 @@
.text
/*
- * We store the saved ksp_limit in the unused part
- * of the STACK_FRAME_OVERHEAD
- */
-_GLOBAL(call_do_softirq)
- mflr r0
- stw r0,4(r1)
- lwz r10,THREAD+KSP_LIMIT(r2)
- stw r3, THREAD+KSP_LIMIT(r2)
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
- mr r1,r3
- stw r10,8(r1)
- bl __do_softirq
- lwz r10,8(r1)
- lwz r1,0(r1)
- lwz r0,4(r1)
- stw r10,THREAD+KSP_LIMIT(r2)
- mtlr r0
- blr
-
-/*
- * void call_do_irq(struct pt_regs *regs, void *sp);
- */
-_GLOBAL(call_do_irq)
- mflr r0
- stw r0,4(r1)
- lwz r10,THREAD+KSP_LIMIT(r2)
- stw r4, THREAD+KSP_LIMIT(r2)
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
- mr r1,r4
- stw r10,8(r1)
- bl __do_irq
- lwz r10,8(r1)
- lwz r1,0(r1)
- lwz r0,4(r1)
- stw r10,THREAD+KSP_LIMIT(r2)
- mtlr r0
- blr
-
-/*
* This returns the high 64 bits of the product of two 64-bit numbers.
*/
_GLOBAL(mulhdu)
--
2.13.3
On Sat, Dec 07, 2019 at 05:20:04PM +0000, Christophe Leroy wrote:
> call_do_irq() and call_do_softirq() are simple enough to be
> worth inlining.
>
> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
> It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.
>
> This is inspired from S390 arch. Several other arches do more or
> less the same. The way sparc arch does seems odd thought.
Any reason you only do this for 32-bit and not 64-bit as well?
Le 12/12/2019 à 13:52, Christoph Hellwig a écrit :
> On Sat, Dec 07, 2019 at 05:20:04PM +0000, Christophe Leroy wrote:
>> call_do_irq() and call_do_softirq() are simple enough to be
>> worth inlining.
>>
>> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
>> It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.
>>
>> This is inspired from S390 arch. Several other arches do more or
>> less the same. The way sparc arch does seems odd thought.
>
> Any reason you only do this for 32-bit and not 64-bit as well?
>
Yes ... There has been a long discussion on this in v4, see
https://patchwork.ozlabs.org/patch/1174288/
The problem is that on PPC64, r2 register is used as TOC pointer and it
is apparently not straithforward to make sure the caller and the callee
are using the same TOC.
On PPC32 it's more simple, r2 is current task_struct at all time, it
never changes.
Christophe
Hi Michael,
Le 07/12/2019 à 18:20, Christophe Leroy a écrit :
> call_do_irq() and call_do_softirq() are simple enough to be
> worth inlining.
>
> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
> It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.
>
> This is inspired from S390 arch. Several other arches do more or
> less the same. The way sparc arch does seems odd thought.
>
> Signed-off-by: Christophe Leroy <[email protected]>
> Reviewed-by: Segher Boessenkool <[email protected]>
Anything more I need to do for this patch to get merged ?
Thanks
Christophe
>
> ---
> v2: no change.
> v3: no change.
> v4:
> - comment reminding the purpose of the inline asm block.
> - added r2 as clobbered reg
> v5:
> - Limiting the change to PPC32 for now.
> - removed r2 from the clobbered regs list (on PPC32 r2 points to current all the time)
> - Removed patch 1 and merged ksp_limit handling in here.
> ---
> arch/powerpc/include/asm/irq.h | 2 ++
> arch/powerpc/kernel/irq.c | 48 ++++++++++++++++++++++++++++++++++++++++++
> arch/powerpc/kernel/misc_32.S | 39 ----------------------------------
> 3 files changed, 50 insertions(+), 39 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
> index 814dfab7e392..e4a92f0b4ad4 100644
> --- a/arch/powerpc/include/asm/irq.h
> +++ b/arch/powerpc/include/asm/irq.h
> @@ -56,8 +56,10 @@ extern void *mcheckirq_ctx[NR_CPUS];
> extern void *hardirq_ctx[NR_CPUS];
> extern void *softirq_ctx[NR_CPUS];
>
> +#ifdef CONFIG_PPC64
> void call_do_softirq(void *sp);
> void call_do_irq(struct pt_regs *regs, void *sp);
> +#endif
> extern void do_IRQ(struct pt_regs *regs);
> extern void __init init_IRQ(void);
> extern void __do_irq(struct pt_regs *regs);
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index 5645bc9cbc09..240eca12c71d 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -611,6 +611,54 @@ static inline void check_stack_overflow(void)
> #endif
> }
>
> +#ifdef CONFIG_PPC32
> +static inline void call_do_softirq(const void *sp)
> +{
> + register unsigned long ret asm("r3");
> + unsigned long limit = current->thread.ksp_limit;
> +
> + /* Adjust the stack limit */
> + current->thread.ksp_limit = (unsigned long)sp;
> +
> + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
> + asm volatile(
> + " "PPC_STLU" 1, %2(%1);\n"
> + " mr 1, %1;\n"
> + " bl %3;\n"
> + " "PPC_LL" 1, 0(1);\n" :
> + "=r"(ret) :
> + "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_softirq) :
> + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
> + "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
> +
> + /* Restore stack limit */
> + current->thread.ksp_limit = limit;
> +}
> +
> +static inline void call_do_irq(struct pt_regs *regs, void *sp)
> +{
> + register unsigned long r3 asm("r3") = (unsigned long)regs;
> + unsigned long limit = current->thread.ksp_limit;
> +
> + /* Adjust the stack limit */
> + current->thread.ksp_limit = (unsigned long)sp;
> +
> + /* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
> + asm volatile(
> + " "PPC_STLU" 1, %2(%1);\n"
> + " mr 1, %1;\n"
> + " bl %3;\n"
> + " "PPC_LL" 1, 0(1);\n" :
> + "+r"(r3) :
> + "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
> + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
> + "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
> +
> + /* Restore stack limit */
> + current->thread.ksp_limit = limit;
> +}
> +#endif
> +
> void __do_irq(struct pt_regs *regs)
> {
> unsigned int irq;
> diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
> index d80212be8698..341a3cd199cb 100644
> --- a/arch/powerpc/kernel/misc_32.S
> +++ b/arch/powerpc/kernel/misc_32.S
> @@ -28,45 +28,6 @@
> .text
>
> /*
> - * We store the saved ksp_limit in the unused part
> - * of the STACK_FRAME_OVERHEAD
> - */
> -_GLOBAL(call_do_softirq)
> - mflr r0
> - stw r0,4(r1)
> - lwz r10,THREAD+KSP_LIMIT(r2)
> - stw r3, THREAD+KSP_LIMIT(r2)
> - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
> - mr r1,r3
> - stw r10,8(r1)
> - bl __do_softirq
> - lwz r10,8(r1)
> - lwz r1,0(r1)
> - lwz r0,4(r1)
> - stw r10,THREAD+KSP_LIMIT(r2)
> - mtlr r0
> - blr
> -
> -/*
> - * void call_do_irq(struct pt_regs *regs, void *sp);
> - */
> -_GLOBAL(call_do_irq)
> - mflr r0
> - stw r0,4(r1)
> - lwz r10,THREAD+KSP_LIMIT(r2)
> - stw r4, THREAD+KSP_LIMIT(r2)
> - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
> - mr r1,r4
> - stw r10,8(r1)
> - bl __do_irq
> - lwz r10,8(r1)
> - lwz r1,0(r1)
> - lwz r0,4(r1)
> - stw r10,THREAD+KSP_LIMIT(r2)
> - mtlr r0
> - blr
> -
> -/*
> * This returns the high 64 bits of the product of two 64-bit numbers.
> */
> _GLOBAL(mulhdu)
>