2019-10-10 05:37:03

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

call_do_irq() and call_do_softirq() are quite similar on PPC32 and
PPC64 and are simple enough to be worth inlining.

Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.

This is inspired from S390 arch. Several other arches do more or
less the same. The way sparc arch does seems odd thought.

Signed-off-by: Christophe Leroy <[email protected]>
Reviewed-by: Segher Boessenkool <[email protected]>

---
v2: no change.
v3: no change.
v4:
- comment reminding the purpose of the inline asm block.
- added r2 as clobbered reg
---
arch/powerpc/include/asm/irq.h | 2 --
arch/powerpc/kernel/irq.c | 32 ++++++++++++++++++++++++++++++++
arch/powerpc/kernel/misc_32.S | 25 -------------------------
arch/powerpc/kernel/misc_64.S | 22 ----------------------
4 files changed, 32 insertions(+), 49 deletions(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 0c6469983c66..10476d5283dc 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -57,8 +57,6 @@ extern void *mcheckirq_ctx[NR_CPUS];
extern void *hardirq_ctx[NR_CPUS];
extern void *softirq_ctx[NR_CPUS];

-void call_do_softirq(void *sp);
-void call_do_irq(struct pt_regs *regs, void *sp);
extern void do_IRQ(struct pt_regs *regs);
extern void __init init_IRQ(void);
extern void __do_irq(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 04204be49577..d62fe18405a0 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -642,6 +642,22 @@ void __do_irq(struct pt_regs *regs)
irq_exit();
}

+static inline void call_do_irq(struct pt_regs *regs, void *sp)
+{
+ register unsigned long r3 asm("r3") = (unsigned long)regs;
+
+ /* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
+ asm volatile(
+ " "PPC_STLU" 1, %2(%1);\n"
+ " mr 1, %1;\n"
+ " bl %3;\n"
+ " "PPC_LL" 1, 0(1);\n" :
+ "+r"(r3) :
+ "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
+ "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
+ "r0", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+}
+
void do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -686,6 +702,22 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly;
void *softirq_ctx[NR_CPUS] __read_mostly;
void *hardirq_ctx[NR_CPUS] __read_mostly;

+static inline void call_do_softirq(const void *sp)
+{
+ register unsigned long ret asm("r3");
+
+ /* Temporarily switch r1 to sp, call __do_softirq() then restore r1 */
+ asm volatile(
+ " "PPC_STLU" 1, %2(%1);\n"
+ " mr 1, %1;\n"
+ " bl %3;\n"
+ " "PPC_LL" 1, 0(1);\n" :
+ "=r"(ret) :
+ "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_softirq) :
+ "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
+ "r0", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+}
+
void do_softirq_own_stack(void)
{
void *irqsp = softirq_ctx[smp_processor_id()];
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index a5422f7782b3..307307b57743 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -33,31 +33,6 @@

.text

-_GLOBAL(call_do_softirq)
- mflr r0
- stw r0,4(r1)
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
- mr r1,r3
- bl __do_softirq
- lwz r1,0(r1)
- lwz r0,4(r1)
- mtlr r0
- blr
-
-/*
- * void call_do_irq(struct pt_regs *regs, void *sp);
- */
-_GLOBAL(call_do_irq)
- mflr r0
- stw r0,4(r1)
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
- mr r1,r4
- bl __do_irq
- lwz r1,0(r1)
- lwz r0,4(r1)
- mtlr r0
- blr
-
/*
* This returns the high 64 bits of the product of two 64-bit numbers.
*/
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b55a7b4cb543..69fd714a5236 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -27,28 +27,6 @@

.text

-_GLOBAL(call_do_softirq)
- mflr r0
- std r0,16(r1)
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
- mr r1,r3
- bl __do_softirq
- ld r1,0(r1)
- ld r0,16(r1)
- mtlr r0
- blr
-
-_GLOBAL(call_do_irq)
- mflr r0
- std r0,16(r1)
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
- mr r1,r4
- bl __do_irq
- ld r1,0(r1)
- ld r0,16(r1)
- mtlr r0
- blr
-
.section ".toc","aw"
PPC64_CACHES:
.tc ppc64_caches[TC],ppc64_caches
--
2.13.3


2019-11-21 06:17:27

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

Christophe Leroy <[email protected]> writes:

> call_do_irq() and call_do_softirq() are quite similar on PPC32 and
> PPC64 and are simple enough to be worth inlining.
>
> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
>
> This is inspired from S390 arch. Several other arches do more or
> less the same. The way sparc arch does seems odd thought.
>
> Signed-off-by: Christophe Leroy <[email protected]>
> Reviewed-by: Segher Boessenkool <[email protected]>
>
> ---
> v2: no change.
> v3: no change.
> v4:
> - comment reminding the purpose of the inline asm block.
> - added r2 as clobbered reg

That breaks 64-bit with GCC9:

arch/powerpc/kernel/irq.c: In function 'do_IRQ':
arch/powerpc/kernel/irq.c:650:2: error: PIC register clobbered by 'r2' in 'asm'
650 | asm volatile(
| ^~~
arch/powerpc/kernel/irq.c: In function 'do_softirq_own_stack':
arch/powerpc/kernel/irq.c:711:2: error: PIC register clobbered by 'r2' in 'asm'
711 | asm volatile(
| ^~~


> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index 04204be49577..d62fe18405a0 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -642,6 +642,22 @@ void __do_irq(struct pt_regs *regs)
> irq_exit();
> }
>
> +static inline void call_do_irq(struct pt_regs *regs, void *sp)
> +{
> + register unsigned long r3 asm("r3") = (unsigned long)regs;
> +
> + /* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
> + asm volatile(
> + " "PPC_STLU" 1, %2(%1);\n"
> + " mr 1, %1;\n"
> + " bl %3;\n"
> + " "PPC_LL" 1, 0(1);\n" :
> + "+r"(r3) :
> + "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
> + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
> + "r0", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
> +}

If we add a nop after the bl, so the linker could insert a TOC restore,
then I don't think there's any circumstance under which we expect this
to actually clobber r2, is there?

cheers

2019-11-21 10:18:14

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

On Thu, Nov 21, 2019 at 05:14:45PM +1100, Michael Ellerman wrote:
> Christophe Leroy <[email protected]> writes:
> That breaks 64-bit with GCC9:
>
> arch/powerpc/kernel/irq.c: In function 'do_IRQ':
> arch/powerpc/kernel/irq.c:650:2: error: PIC register clobbered by 'r2' in 'asm'
> 650 | asm volatile(
> | ^~~
> arch/powerpc/kernel/irq.c: In function 'do_softirq_own_stack':
> arch/powerpc/kernel/irq.c:711:2: error: PIC register clobbered by 'r2' in 'asm'
> 711 | asm volatile(
> | ^~~
>
>
> > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> > index 04204be49577..d62fe18405a0 100644
> > --- a/arch/powerpc/kernel/irq.c
> > +++ b/arch/powerpc/kernel/irq.c
> > @@ -642,6 +642,22 @@ void __do_irq(struct pt_regs *regs)
> > irq_exit();
> > }
> >
> > +static inline void call_do_irq(struct pt_regs *regs, void *sp)
> > +{
> > + register unsigned long r3 asm("r3") = (unsigned long)regs;
> > +
> > + /* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
> > + asm volatile(
> > + " "PPC_STLU" 1, %2(%1);\n"
> > + " mr 1, %1;\n"
> > + " bl %3;\n"
> > + " "PPC_LL" 1, 0(1);\n" :
> > + "+r"(r3) :
> > + "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
> > + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
> > + "r0", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
> > +}
>
> If we add a nop after the bl, so the linker could insert a TOC restore,
> then I don't think there's any circumstance under which we expect this
> to actually clobber r2, is there?

That is mostly correct.

If call_do_irq was a no-inline function, there would not be problems.

What TOC does __do_irq require in r2 on entry, and what will be there
when it returns?


Segher

2019-11-25 10:35:02

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

Segher Boessenkool <[email protected]> writes:
> On Thu, Nov 21, 2019 at 05:14:45PM +1100, Michael Ellerman wrote:
>> Christophe Leroy <[email protected]> writes:
>> That breaks 64-bit with GCC9:
>>
>> arch/powerpc/kernel/irq.c: In function 'do_IRQ':
>> arch/powerpc/kernel/irq.c:650:2: error: PIC register clobbered by 'r2' in 'asm'
>> 650 | asm volatile(
>> | ^~~
>> arch/powerpc/kernel/irq.c: In function 'do_softirq_own_stack':
>> arch/powerpc/kernel/irq.c:711:2: error: PIC register clobbered by 'r2' in 'asm'
>> 711 | asm volatile(
>> | ^~~
>>
>>
>> > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
>> > index 04204be49577..d62fe18405a0 100644
>> > --- a/arch/powerpc/kernel/irq.c
>> > +++ b/arch/powerpc/kernel/irq.c
>> > @@ -642,6 +642,22 @@ void __do_irq(struct pt_regs *regs)
>> > irq_exit();
>> > }
>> >
>> > +static inline void call_do_irq(struct pt_regs *regs, void *sp)
>> > +{
>> > + register unsigned long r3 asm("r3") = (unsigned long)regs;
>> > +
>> > + /* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
>> > + asm volatile(
>> > + " "PPC_STLU" 1, %2(%1);\n"
>> > + " mr 1, %1;\n"
>> > + " bl %3;\n"
>> > + " "PPC_LL" 1, 0(1);\n" :
>> > + "+r"(r3) :
>> > + "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
>> > + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
>> > + "r0", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
>> > +}
>>
>> If we add a nop after the bl, so the linker could insert a TOC restore,
>> then I don't think there's any circumstance under which we expect this
>> to actually clobber r2, is there?
>
> That is mostly correct.

That's the standard I aspire to :P

> If call_do_irq was a no-inline function, there would not be problems.
>
> What TOC does __do_irq require in r2 on entry, and what will be there
> when it returns?

The kernel TOC, and also the kernel TOC, unless something's gone wrong
or I'm missing something.

cheers

2019-11-25 18:38:27

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

On Mon, Nov 25, 2019 at 09:32:23PM +1100, Michael Ellerman wrote:
> Segher Boessenkool <[email protected]> writes:
> >> > +static inline void call_do_irq(struct pt_regs *regs, void *sp)
> >> > +{
> >> > + register unsigned long r3 asm("r3") = (unsigned long)regs;
> >> > +
> >> > + /* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
> >> > + asm volatile(
> >> > + " "PPC_STLU" 1, %2(%1);\n"
> >> > + " mr 1, %1;\n"
> >> > + " bl %3;\n"
> >> > + " "PPC_LL" 1, 0(1);\n" :
> >> > + "+r"(r3) :
> >> > + "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
> >> > + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
> >> > + "r0", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
> >> > +}
> >>
> >> If we add a nop after the bl, so the linker could insert a TOC restore,
> >> then I don't think there's any circumstance under which we expect this
> >> to actually clobber r2, is there?
> >
> > That is mostly correct.
>
> That's the standard I aspire to :P
>
> > If call_do_irq was a no-inline function, there would not be problems.
> >
> > What TOC does __do_irq require in r2 on entry, and what will be there
> > when it returns?
>
> The kernel TOC, and also the kernel TOC, unless something's gone wrong
> or I'm missing something.

If that is the case, we can just do the bl, no nop at all? And that works
for all of our ABIs.

If we can be certain that we have the kernel TOC in r2 on entry to
call_do_irq, that is! (Or it establishes it itself).


Segher

2019-11-27 13:54:02

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()



Le 25/11/2019 à 15:25, Segher Boessenkool a écrit :
> On Mon, Nov 25, 2019 at 09:32:23PM +1100, Michael Ellerman wrote:
>> Segher Boessenkool <[email protected]> writes:
>>>>> +static inline void call_do_irq(struct pt_regs *regs, void *sp)
>>>>> +{
>>>>> + register unsigned long r3 asm("r3") = (unsigned long)regs;
>>>>> +
>>>>> + /* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
>>>>> + asm volatile(
>>>>> + " "PPC_STLU" 1, %2(%1);\n"
>>>>> + " mr 1, %1;\n"
>>>>> + " bl %3;\n"
>>>>> + " "PPC_LL" 1, 0(1);\n" :
>>>>> + "+r"(r3) :
>>>>> + "b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
>>>>> + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
>>>>> + "r0", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
>>>>> +}
>>>>
>>>> If we add a nop after the bl, so the linker could insert a TOC restore,
>>>> then I don't think there's any circumstance under which we expect this
>>>> to actually clobber r2, is there?
>>>
>>> That is mostly correct.
>>
>> That's the standard I aspire to :P
>>
>>> If call_do_irq was a no-inline function, there would not be problems.
>>>
>>> What TOC does __do_irq require in r2 on entry, and what will be there
>>> when it returns?
>>
>> The kernel TOC, and also the kernel TOC, unless something's gone wrong
>> or I'm missing something.
>
> If that is the case, we can just do the bl, no nop at all? And that works
> for all of our ABIs.
>
> If we can be certain that we have the kernel TOC in r2 on entry to
> call_do_irq, that is! (Or it establishes it itself).

So what do we do ? We just drop the "r2" clobber ?

Otherwise, to be on the safe side we can just save r2 in a local var
before the bl and restore it after. I guess it won't collapse CPU time
on a performant PPC64.

Christophe

2019-11-27 15:19:36

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()



Le 27/11/2019 à 15:59, Segher Boessenkool a écrit :
> On Wed, Nov 27, 2019 at 02:50:30PM +0100, Christophe Leroy wrote:
>> So what do we do ? We just drop the "r2" clobber ?
>
> You have to make sure your asm code works for all ABIs. This is quite
> involved if you do a call to an external function. The compiler does
> *not* see this call, so you will have to make sure that all that the
> compiler and linker do will work, or prevent some of those things (say,
> inlining of the function containing the call).

But the whole purpose of the patch is to inline the call to __do_irq()
in order to avoid the trampoline function.

>
>> Otherwise, to be on the safe side we can just save r2 in a local var
>> before the bl and restore it after. I guess it won't collapse CPU time
>> on a performant PPC64.
>
> That does not fix everything. The called function requires a specific
> value in r2 on entry.

Euh ... but there is nothing like that when using existing
call_do_irq(). How does GCC know that call_do_irq() has same TOC as
__do_irq() ?

>
> So all this needs verification. Hopefully you can get away with just
> not clobbering r2 (and not adding a nop after the bl), sure. But this
> needs to be checked.
>
> Changing control flow inside inline assembler always is problematic.
> Another problem in this case (on all ABIs) is that the compiler does
> not see you call __do_irq. Again, you can probably get away with that
> too, but :-)

Anyway it sees I reference it, as it is in input arguments. Isn't it
enough ?

Christophe

2019-11-27 15:42:12

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

On Wed, Nov 27, 2019 at 02:50:30PM +0100, Christophe Leroy wrote:
> So what do we do ? We just drop the "r2" clobber ?

You have to make sure your asm code works for all ABIs. This is quite
involved if you do a call to an external function. The compiler does
*not* see this call, so you will have to make sure that all that the
compiler and linker do will work, or prevent some of those things (say,
inlining of the function containing the call).

> Otherwise, to be on the safe side we can just save r2 in a local var
> before the bl and restore it after. I guess it won't collapse CPU time
> on a performant PPC64.

That does not fix everything. The called function requires a specific
value in r2 on entry.

So all this needs verification. Hopefully you can get away with just
not clobbering r2 (and not adding a nop after the bl), sure. But this
needs to be checked.

Changing control flow inside inline assembler always is problematic.
Another problem in this case (on all ABIs) is that the compiler does
not see you call __do_irq. Again, you can probably get away with that
too, but :-)


Segher

2019-11-29 18:50:27

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

Hi!

On Wed, Nov 27, 2019 at 04:15:15PM +0100, Christophe Leroy wrote:
> Le 27/11/2019 ? 15:59, Segher Boessenkool a ?crit?:
> >On Wed, Nov 27, 2019 at 02:50:30PM +0100, Christophe Leroy wrote:
> >>So what do we do ? We just drop the "r2" clobber ?
> >
> >You have to make sure your asm code works for all ABIs. This is quite
> >involved if you do a call to an external function. The compiler does
> >*not* see this call, so you will have to make sure that all that the
> >compiler and linker do will work, or prevent some of those things (say,
> >inlining of the function containing the call).
>
> But the whole purpose of the patch is to inline the call to __do_irq()
> in order to avoid the trampoline function.

Yes, so you call __do_irq. You have to make sure that what you tell the
compiler -- and what you *don't tell the compiler -- works with what the
ABIs require, and what the called function expects and provides.

> >That does not fix everything. The called function requires a specific
> >value in r2 on entry.
>
> Euh ... but there is nothing like that when using existing
> call_do_irq().

> How does GCC know that call_do_irq() has same TOC as __do_irq() ?

The existing call_do_irq isn't C code. It doesn't do anything with r2,
as far as I can see; __do_irq just gets whatever the caller of call_do_irq
has.

So I guess all the callers of call_do_irq have the correct r2 value always
already? In that case everything Just Works.

> >So all this needs verification. Hopefully you can get away with just
> >not clobbering r2 (and not adding a nop after the bl), sure. But this
> >needs to be checked.
> >
> >Changing control flow inside inline assembler always is problematic.
> >Another problem in this case (on all ABIs) is that the compiler does
> >not see you call __do_irq. Again, you can probably get away with that
> >too, but :-)
>
> Anyway it sees I reference it, as it is in input arguments. Isn't it
> enough ?

It is enough for some things, sure. But not all.


Segher

2019-12-04 04:34:15

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

Hi,

Le 29/11/2019 à 19:46, Segher Boessenkool a écrit :
> Hi!
>
> On Wed, Nov 27, 2019 at 04:15:15PM +0100, Christophe Leroy wrote:
>> Le 27/11/2019 à 15:59, Segher Boessenkool a écrit :
>>> On Wed, Nov 27, 2019 at 02:50:30PM +0100, Christophe Leroy wrote:
>>>> So what do we do ? We just drop the "r2" clobber ?
>>>
>>> You have to make sure your asm code works for all ABIs. This is quite
>>> involved if you do a call to an external function. The compiler does
>>> *not* see this call, so you will have to make sure that all that the
>>> compiler and linker do will work, or prevent some of those things (say,
>>> inlining of the function containing the call).
>>
>> But the whole purpose of the patch is to inline the call to __do_irq()
>> in order to avoid the trampoline function.
>
> Yes, so you call __do_irq. You have to make sure that what you tell the
> compiler -- and what you *don't tell the compiler -- works with what the
> ABIs require, and what the called function expects and provides.
>
>>> That does not fix everything. The called function requires a specific
>>> value in r2 on entry.
>>
>> Euh ... but there is nothing like that when using existing
>> call_do_irq().
>
>> How does GCC know that call_do_irq() has same TOC as __do_irq() ?
>
> The existing call_do_irq isn't C code. It doesn't do anything with r2,
> as far as I can see; __do_irq just gets whatever the caller of call_do_irq
> has.
>
> So I guess all the callers of call_do_irq have the correct r2 value always
> already? In that case everything Just Works.

Indeed, there is only one caller for call_do_irq() which is do_IRQ().
And do_IRQ() is also calling __do_irq() directly (when the stack pointer
is already set to IRQ stack). do_IRQ() and __do_irq() are both in
arch/powerpc/kernel/irq.c

As far as I can see when replacing the call to call_do_irq() by a call
to __do_irq(), the compiler doesn't do anything special with r2, and
doesn't add any nop after the bl either, whereas for all calls outside
irq.c, there is a nop added. So I guess that's ok ?

Now that call_do_irq() is inlined, we can even define __do_irq() as static.

And that's the same for do_softirq_own_stack(), it is only called from
do_softirq() which is defined in the same file as __do_softirq()
(kernel/softirq.c)

Christophe

2019-12-06 21:01:07

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

On Wed, Dec 04, 2019 at 05:32:54AM +0100, Christophe Leroy wrote:
> Le 29/11/2019 ? 19:46, Segher Boessenkool a ?crit?:
> >The existing call_do_irq isn't C code. It doesn't do anything with r2,
> >as far as I can see; __do_irq just gets whatever the caller of call_do_irq
> >has.
> >
> >So I guess all the callers of call_do_irq have the correct r2 value always
> >already? In that case everything Just Works.
>
> Indeed, there is only one caller for call_do_irq() which is do_IRQ().
> And do_IRQ() is also calling __do_irq() directly (when the stack pointer
> is already set to IRQ stack). do_IRQ() and __do_irq() are both in
> arch/powerpc/kernel/irq.c
>
> As far as I can see when replacing the call to call_do_irq() by a call
> to __do_irq(), the compiler doesn't do anything special with r2, and
> doesn't add any nop after the bl either, whereas for all calls outside
> irq.c, there is a nop added. So I guess that's ok ?

If the compiler can see the callee wants the same TOC as the caller has,
it does not arrange to set (and restore) it, no. If it sees it may be
different, it does arrange for that (and the linker then will check if
it actually needs to do anything, and do that if needed).

In this case, the compiler cannot know the callee wants the same TOC,
which complicates thing a lot -- but it all works out.

> Now that call_do_irq() is inlined, we can even define __do_irq() as static.
>
> And that's the same for do_softirq_own_stack(), it is only called from
> do_softirq() which is defined in the same file as __do_softirq()
> (kernel/softirq.c)

I think things can still go wrong if any of this is inlined into a kernel
module? Is there anything that prevents this / can this not happen for
some fundamental reason I don't see?


Segher

2019-12-07 09:46:30

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()



Le 06/12/2019 à 21:59, Segher Boessenkool a écrit :
> On Wed, Dec 04, 2019 at 05:32:54AM +0100, Christophe Leroy wrote:
>> Le 29/11/2019 à 19:46, Segher Boessenkool a écrit :
>>> The existing call_do_irq isn't C code. It doesn't do anything with r2,
>>> as far as I can see; __do_irq just gets whatever the caller of call_do_irq
>>> has.
>>>
>>> So I guess all the callers of call_do_irq have the correct r2 value always
>>> already? In that case everything Just Works.
>>
>> Indeed, there is only one caller for call_do_irq() which is do_IRQ().
>> And do_IRQ() is also calling __do_irq() directly (when the stack pointer
>> is already set to IRQ stack). do_IRQ() and __do_irq() are both in
>> arch/powerpc/kernel/irq.c
>>
>> As far as I can see when replacing the call to call_do_irq() by a call
>> to __do_irq(), the compiler doesn't do anything special with r2, and
>> doesn't add any nop after the bl either, whereas for all calls outside
>> irq.c, there is a nop added. So I guess that's ok ?
>
> If the compiler can see the callee wants the same TOC as the caller has,
> it does not arrange to set (and restore) it, no. If it sees it may be
> different, it does arrange for that (and the linker then will check if
> it actually needs to do anything, and do that if needed).
>
> In this case, the compiler cannot know the callee wants the same TOC,
> which complicates thing a lot -- but it all works out.

Do we have a way to make sure which TOC the functions are using ? Is
there several TOC at all in kernel code ?

>
>> Now that call_do_irq() is inlined, we can even define __do_irq() as static.
>>
>> And that's the same for do_softirq_own_stack(), it is only called from
>> do_softirq() which is defined in the same file as __do_softirq()
>> (kernel/softirq.c)
>
> I think things can still go wrong if any of this is inlined into a kernel
> module? Is there anything that prevents this / can this not happen for
> some fundamental reason I don't see?

This can't happen can it ?
do_softirq_own_stack() is an outline function, defined in powerpc irq.c
Its only caller is do_softirq() which is an outline function defined in
kernel/softirq.c

That prevents inlining, doesn't it ?


Anyway, until we clarify all this I'll limit my patch to PPC32 which is
where the real benefit is I guess.

At the end, maybe the solution should be to switch to IRQ stack
immediately in the exception entry as x86_64 do ?

And do_softirq_own_stack() could be entirely written in assembly like
x86_64 as well ?

Christophe

2019-12-07 17:43:00

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

On Sat, Dec 07, 2019 at 10:42:28AM +0100, Christophe Leroy wrote:
> Le 06/12/2019 ? 21:59, Segher Boessenkool a ?crit?:
> >If the compiler can see the callee wants the same TOC as the caller has,
> >it does not arrange to set (and restore) it, no. If it sees it may be
> >different, it does arrange for that (and the linker then will check if
> >it actually needs to do anything, and do that if needed).
> >
> >In this case, the compiler cannot know the callee wants the same TOC,
> >which complicates thing a lot -- but it all works out.
>
> Do we have a way to make sure which TOC the functions are using ? Is
> there several TOC at all in kernel code ?

Kernel modules have their own TOC, I think?

> >I think things can still go wrong if any of this is inlined into a kernel
> >module? Is there anything that prevents this / can this not happen for
> >some fundamental reason I don't see?
>
> This can't happen can it ?
> do_softirq_own_stack() is an outline function, defined in powerpc irq.c
> Its only caller is do_softirq() which is an outline function defined in
> kernel/softirq.c
>
> That prevents inlining, doesn't it ?

Hopefully, sure. Would be nice if it was clearer that this works... It
is too much like working by chance, the way it is :-(

> Anyway, until we clarify all this I'll limit my patch to PPC32 which is
> where the real benefit is I guess.
>
> At the end, maybe the solution should be to switch to IRQ stack
> immediately in the exception entry as x86_64 do ?
>
> And do_softirq_own_stack() could be entirely written in assembly like
> x86_64 as well ?

Maybe? I'm out of my depth there.


Segher

2019-12-09 10:55:35

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()

Segher Boessenkool <[email protected]> writes:
> On Sat, Dec 07, 2019 at 10:42:28AM +0100, Christophe Leroy wrote:
>> Le 06/12/2019 à 21:59, Segher Boessenkool a écrit :
>> >If the compiler can see the callee wants the same TOC as the caller has,
>> >it does not arrange to set (and restore) it, no. If it sees it may be
>> >different, it does arrange for that (and the linker then will check if
>> >it actually needs to do anything, and do that if needed).
>> >
>> >In this case, the compiler cannot know the callee wants the same TOC,
>> >which complicates thing a lot -- but it all works out.
>>
>> Do we have a way to make sure which TOC the functions are using ? Is
>> there several TOC at all in kernel code ?
>
> Kernel modules have their own TOC, I think?

Yes.

>> >I think things can still go wrong if any of this is inlined into a kernel
>> >module? Is there anything that prevents this / can this not happen for
>> >some fundamental reason I don't see?
>>
>> This can't happen can it ?
>> do_softirq_own_stack() is an outline function, defined in powerpc irq.c
>> Its only caller is do_softirq() which is an outline function defined in
>> kernel/softirq.c
>>
>> That prevents inlining, doesn't it ?
>
> Hopefully, sure. Would be nice if it was clearer that this works... It
> is too much like working by chance, the way it is :-(

There's no way any of that code can end up in a module. Or at least if
there is, that's a bug.

>> Anyway, until we clarify all this I'll limit my patch to PPC32 which is
>> where the real benefit is I guess.
>>
>> At the end, maybe the solution should be to switch to IRQ stack
>> immediately in the exception entry as x86_64 do ?

Yeah that might be cleaner.

cheers

2019-12-19 06:59:36

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] powerpc/irq: inline call_do_irq() and call_do_softirq()



Le 09/12/2019 à 11:53, Michael Ellerman a écrit :
> Segher Boessenkool <[email protected]> writes:
>> On Sat, Dec 07, 2019 at 10:42:28AM +0100, Christophe Leroy wrote:
>>> Le 06/12/2019 à 21:59, Segher Boessenkool a écrit :
>>>> If the compiler can see the callee wants the same TOC as the caller has,
>>>> it does not arrange to set (and restore) it, no. If it sees it may be
>>>> different, it does arrange for that (and the linker then will check if
>>>> it actually needs to do anything, and do that if needed).
>>>>
>>>> In this case, the compiler cannot know the callee wants the same TOC,
>>>> which complicates thing a lot -- but it all works out.
>>>
>>> Do we have a way to make sure which TOC the functions are using ? Is
>>> there several TOC at all in kernel code ?
>>
>> Kernel modules have their own TOC, I think?
>
> Yes.

Yes, this means that exported functions have to care about that, right ?
And that's the reason why exported assembly functions like copy_page()
use _GLOBAL_TOC() and not _GLOBAL()

But main part of the kernel only has one TOC, so r2 can be assumed
constant for non exported functions, can't it ?

>
>>>> I think things can still go wrong if any of this is inlined into a kernel
>>>> module? Is there anything that prevents this / can this not happen for
>>>> some fundamental reason I don't see?
>>>
>>> This can't happen can it ?
>>> do_softirq_own_stack() is an outline function, defined in powerpc irq.c
>>> Its only caller is do_softirq() which is an outline function defined in
>>> kernel/softirq.c
>>>
>>> That prevents inlining, doesn't it ?
>>
>> Hopefully, sure. Would be nice if it was clearer that this works... It
>> is too much like working by chance, the way it is :-(
>
> There's no way any of that code can end up in a module. Or at least if
> there is, that's a bug.

That's my conclusion as well. So I guess we can consider r2 as constant
over those functions.

>
>>> Anyway, until we clarify all this I'll limit my patch to PPC32 which is
>>> where the real benefit is I guess.
>>>
>>> At the end, maybe the solution should be to switch to IRQ stack
>>> immediately in the exception entry as x86_64 do ?
>
> Yeah that might be cleaner.
>

I prepared a patch for that on PPC32, but it doesn't get rid of the IRQ
stack switch completely because do_IRQ() is also called from other
places like the timer interrupt.

And we will still have the switch for softirqs. We could move
do_softirq_own_stack() to assembly and merge it with call_do_softirq(),
but a find it cleaner to inline call_do_softirq() instead, now that we
have demonstrated that r2 can't change.

Christophe