2020-04-05 17:47:17

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v2 01/13] powerpc/radix: Make kuap_check_amr() and kuap_restore_amr() generic

In preparation of porting powerpc32 to C syscall entry/exit,
rename kuap_check_amr() and kuap_restore_amr() as kuap_check()
and kuap_restore(), and move the stub for when CONFIG_PPC_KUAP is
not selected in the generic asm/kup.h

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/book3s/64/kup-radix.h | 12 ++----------
arch/powerpc/include/asm/kup.h | 2 ++
arch/powerpc/kernel/syscall_64.c | 10 +++++-----
3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index 3bcef989a35d..1f2716a0dcd8 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -60,13 +60,13 @@
#include <asm/mmu.h>
#include <asm/ptrace.h>

-static inline void kuap_restore_amr(struct pt_regs *regs)
+static inline void kuap_restore(struct pt_regs *regs)
{
if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
mtspr(SPRN_AMR, regs->kuap);
}

-static inline void kuap_check_amr(void)
+static inline void kuap_check(void)
{
if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP))
WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
@@ -141,14 +141,6 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
"Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
}
-#else /* CONFIG_PPC_KUAP */
-static inline void kuap_restore_amr(struct pt_regs *regs)
-{
-}
-
-static inline void kuap_check_amr(void)
-{
-}
#endif /* CONFIG_PPC_KUAP */

#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 92bcd1a26d73..1100c13b6d9e 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -62,6 +62,8 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return false;
}
+static inline void kuap_restore(struct pt_regs *regs) { }
+static inline void kuap_check(void) { }
#endif /* CONFIG_PPC_KUAP */

static inline void allow_read_from_user(const void __user *from, unsigned long size)
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index cf06eb443a80..72f3d2f0a823 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -2,7 +2,7 @@

#include <linux/err.h>
#include <asm/asm-prototypes.h>
-#include <asm/book3s/64/kup-radix.h>
+#include <asm/kup.h>
#include <asm/cputime.h>
#include <asm/hw_irq.h>
#include <asm/kprobes.h>
@@ -48,7 +48,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
}
#endif

- kuap_check_amr();
+ kuap_check();

/*
* This is not required for the syscall exit path, but makes the
@@ -206,7 +206,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
local_paca->tm_scratch = regs->msr;
#endif

- kuap_check_amr();
+ kuap_check();

account_cpu_user_exit();

@@ -294,7 +294,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
local_paca->tm_scratch = regs->msr;
#endif

- kuap_check_amr();
+ kuap_check();

account_cpu_user_exit();

@@ -372,7 +372,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
* We don't need to restore AMR on the way back to userspace for KUAP.
* The value of AMR only matters while we're in the kernel.
*/
- kuap_restore_amr(regs);
+ kuap_restore(regs);

return ret;
}
--
2.25.0


2020-04-05 17:47:24

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v2 02/13] powerpc/32s: Create C version of kuap_restore() and kuap_check()

In preparation of porting PPC32 to C syscall entry/exit,
create C version of kuap_restore() and kuap_check() on book3s/32.

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/book3s/32/kup.h | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 3c0ba22dc360..c85bc5b56366 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -102,6 +102,27 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
isync(); /* Context sync required after mtsrin() */
}

+static inline void kuap_restore(struct pt_regs *regs)
+{
+ u32 kuap = current->thread.kuap;
+ u32 addr = kuap & 0xf0000000;
+ u32 end = kuap << 28;
+
+ if (unlikely(!kuap))
+ return;
+
+ current->thread.kuap = 0;
+ kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */
+}
+
+static inline void kuap_check(void)
+{
+ if (!IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+ return;
+
+ WARN_ON_ONCE(current->thread.kuap != 0);
+}
+
static __always_inline void allow_user_access(void __user *to, const void __user *from,
u32 size, unsigned long dir)
{
--
2.25.0

2020-04-05 17:57:20

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v2 12/13] powerpc/kernel: Do not inconditionally save non volatile registers on system call

Before : 347 cycles on null_syscall
After : 327 cycles on null_syscall

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/kernel/entry_32.S | 15 +++++++++++++++
arch/powerpc/kernel/head_32.h | 3 +--
arch/powerpc/kernel/syscall.c | 2 +-
3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 103f5158bc44..b5113593e57f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -315,13 +315,28 @@ stack_ovf:
RFI
#endif

+save_nvgprs:
+ lwz r11, _TRAP(r1)
+ andi. r12, r11, 1
+ rlwinm r11, r11, 0, ~1
+ beqlr
+ SAVE_NVGPRS(r1)
+ stw r11, _TRAP(r1)
+ blr
+
.globl transfer_to_syscall
transfer_to_syscall:
+ lwz r10, TI_FLAGS(r2)
mr r9, r0
+ andi. r10, r10, _TIF_SYSCALL_DOTRACE
addi r10, r1, STACK_FRAME_OVERHEAD
+ bnel- save_nvgprs
bl system_call_exception
ret_from_syscall:
+ lwz r9, TI_FLAGS(r2)
addi r4, r1, STACK_FRAME_OVERHEAD
+ andi. r0, r9, _TIF_SYSCALL_DOTRACE | _TIF_SINGLESTEP | _TIF_USER_WORK_MASK
+ bnel- save_nvgprs
bl syscall_exit_prepare
lwz r2, _CCR(r1)
lwz r4, _NIP(r1)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index c301d666a3e5..1cc9a67cb42c 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -174,13 +174,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
stw r2,GPR2(r11)
addi r10,r10,STACK_FRAME_REGS_MARKER@l
stw r9,_MSR(r11)
- li r2, \trapno
+ li r2, \trapno + 1
stw r10,8(r11)
stw r2,_TRAP(r11)
SAVE_GPR(0, r11)
SAVE_4GPRS(3, r11)
SAVE_2GPRS(7, r11)
- SAVE_NVGPRS(r11)
addi r11,r1,STACK_FRAME_OVERHEAD
addi r2,r12,-THREAD
stw r11,PT_REGS(r12)
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 630c423e089a..34fd66fd11a2 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(IS_ENABLED(CONFIG_PPC64) && !(regs->msr & MSR_PR));
- BUG_ON(!FULL_REGS(regs));
+ BUG_ON(IS_ENABLED(CONFIG_PPC64) && !FULL_REGS(regs));
#ifdef CONFIG_PPC64
BUG_ON(regs->softe != IRQS_ENABLED);
#endif
--
2.25.0

2020-04-05 17:57:20

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v2 06/13] powerpc/syscall: Make syscall_64.c buildable on PPC32

ifdef out specific PPC64 stuff to allow building
syscall_64.c on PPC32.

Modify Makefile to always build syscall.o

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/kernel/Makefile | 5 ++---
arch/powerpc/kernel/syscall.c | 10 ++++++++++
2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 8cc3c831dccd..e4be425b7718 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -45,11 +45,10 @@ obj-y := cputable.o syscalls.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o misc_$(BITS).o \
- of_platform.o prom_parse.o
+ of_platform.o prom_parse.o syscall.o
obj-y += ptrace/
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \
- paca.o nvram_64.o firmware.o note.o \
- syscall.o
+ paca.o nvram_64.o firmware.o note.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 72f3d2f0a823..28bd43db8755 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -25,8 +25,10 @@ notrace long system_call_exception(long r3, long r4, long r5,
unsigned long ti_flags;
syscall_fn f;

+#ifdef CONFIG_PPC64
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+#endif

trace_hardirqs_off(); /* finish reconciling */

@@ -34,7 +36,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(!(regs->msr & MSR_PR));
BUG_ON(!FULL_REGS(regs));
+#ifdef CONFIG_PPC64
BUG_ON(regs->softe != IRQS_ENABLED);
+#endif

account_cpu_user_entry();

@@ -56,7 +60,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
* frame, or if the unwinder was taught the first stack frame always
* returns to user with IRQS_ENABLED, this store could be avoided!
*/
+#ifdef CONFIG_PPC64
regs->softe = IRQS_ENABLED;
+#endif

local_irq_enable();

@@ -148,7 +154,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
ret |= _TIF_RESTOREALL;
}

+#ifdef CONFIG_PPC64
again:
+#endif
local_irq_disable();
ti_flags = READ_ONCE(*ti_flagsp);
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
@@ -191,6 +199,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,

/* This pattern matches prep_irq_for_idle */
__hard_EE_RI_disable();
+#ifdef CONFIG_PPC64
if (unlikely(lazy_irq_pending())) {
__hard_RI_enable();
trace_hardirqs_off();
@@ -201,6 +210,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
}
local_paca->irq_happened = 0;
irq_soft_mask_set(IRQS_ENABLED);
+#endif

#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
local_paca->tm_scratch = regs->msr;
--
2.25.0

2020-04-05 17:57:22

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v2 07/13] powerpc/syscall: Use is_compat_task()

Instead of hard comparing task flags with _TIF_32BIT, use
is_compat_task(). The advantage is that it returns 0 on PPC32
allthough _TIF_32BIT is always set.

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/kernel/syscall.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 28bd43db8755..4674cfd2916d 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#include <linux/err.h>
+#include <linux/compat.h>
+
#include <asm/asm-prototypes.h>
#include <asm/kup.h>
#include <asm/cputime.h>
@@ -92,7 +94,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
/* May be faster to do array_index_nospec? */
barrier_nospec();

- if (unlikely(ti_flags & _TIF_32BIT)) {
+ if (is_compat_task()) {
f = (void *)compat_sys_call_table[r0];

r3 &= 0x00000000ffffffffULL;
--
2.25.0

2020-04-06 01:27:05

by Nicholas Piggin

[permalink] [raw]
Subject: Re: [RFC PATCH v2 12/13] powerpc/kernel: Do not inconditionally save non volatile registers on system call

Christophe Leroy's on April 6, 2020 3:44 am:
> Before : 347 cycles on null_syscall
> After : 327 cycles on null_syscall

The problem I had doing this is that signal delivery wnats full regs,
and you don't know if you have a signal pending ahead of time if you
have interrupts enabled.

I began to try bailing out back to asm to save nvgprs and call again.
I think that can be made to work, but it is more complication in asm,
and I soon found that 64s CPUs don't care about NVGPRs too much so it's
nice to get rid of the !fullregs state.

Possibly another approach would be to leave interrupts disabled for the
case where you have no work to do. You could create a small
syscall_exit_prepare_nowork fastpath for that case for 32-bit, perhaps?

Thanks,
Nick

2020-04-06 01:43:53

by Nicholas Piggin

[permalink] [raw]
Subject: Re: [RFC PATCH v2 01/13] powerpc/radix: Make kuap_check_amr() and kuap_restore_amr() generic

Christophe Leroy's on April 6, 2020 3:44 am:
> In preparation of porting powerpc32 to C syscall entry/exit,
> rename kuap_check_amr() and kuap_restore_amr() as kuap_check()
> and kuap_restore(), and move the stub for when CONFIG_PPC_KUAP is
> not selected in the generic asm/kup.h

I have no problem with this,

Reviewed-by: Nicholas Piggin <[email protected]>

>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> arch/powerpc/include/asm/book3s/64/kup-radix.h | 12 ++----------
> arch/powerpc/include/asm/kup.h | 2 ++
> arch/powerpc/kernel/syscall_64.c | 10 +++++-----
> 3 files changed, 9 insertions(+), 15 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
> index 3bcef989a35d..1f2716a0dcd8 100644
> --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
> @@ -60,13 +60,13 @@
> #include <asm/mmu.h>
> #include <asm/ptrace.h>
>
> -static inline void kuap_restore_amr(struct pt_regs *regs)
> +static inline void kuap_restore(struct pt_regs *regs)
> {
> if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
> mtspr(SPRN_AMR, regs->kuap);
> }
>
> -static inline void kuap_check_amr(void)
> +static inline void kuap_check(void)
> {
> if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP))
> WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
> @@ -141,14 +141,6 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
> (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
> "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
> }
> -#else /* CONFIG_PPC_KUAP */
> -static inline void kuap_restore_amr(struct pt_regs *regs)
> -{
> -}
> -
> -static inline void kuap_check_amr(void)
> -{
> -}
> #endif /* CONFIG_PPC_KUAP */
>
> #endif /* __ASSEMBLY__ */
> diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
> index 92bcd1a26d73..1100c13b6d9e 100644
> --- a/arch/powerpc/include/asm/kup.h
> +++ b/arch/powerpc/include/asm/kup.h
> @@ -62,6 +62,8 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
> {
> return false;
> }
> +static inline void kuap_restore(struct pt_regs *regs) { }
> +static inline void kuap_check(void) { }
> #endif /* CONFIG_PPC_KUAP */
>
> static inline void allow_read_from_user(const void __user *from, unsigned long size)
> diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
> index cf06eb443a80..72f3d2f0a823 100644
> --- a/arch/powerpc/kernel/syscall_64.c
> +++ b/arch/powerpc/kernel/syscall_64.c
> @@ -2,7 +2,7 @@
>
> #include <linux/err.h>
> #include <asm/asm-prototypes.h>
> -#include <asm/book3s/64/kup-radix.h>
> +#include <asm/kup.h>
> #include <asm/cputime.h>
> #include <asm/hw_irq.h>
> #include <asm/kprobes.h>
> @@ -48,7 +48,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
> }
> #endif
>
> - kuap_check_amr();
> + kuap_check();
>
> /*
> * This is not required for the syscall exit path, but makes the
> @@ -206,7 +206,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> local_paca->tm_scratch = regs->msr;
> #endif
>
> - kuap_check_amr();
> + kuap_check();
>
> account_cpu_user_exit();
>
> @@ -294,7 +294,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
> local_paca->tm_scratch = regs->msr;
> #endif
>
> - kuap_check_amr();
> + kuap_check();
>
> account_cpu_user_exit();
>
> @@ -372,7 +372,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
> * We don't need to restore AMR on the way back to userspace for KUAP.
> * The value of AMR only matters while we're in the kernel.
> */
> - kuap_restore_amr(regs);
> + kuap_restore(regs);
>
> return ret;
> }
> --
> 2.25.0
>
>

2020-04-06 01:54:13

by Nicholas Piggin

[permalink] [raw]
Subject: Re: [RFC PATCH v2 06/13] powerpc/syscall: Make syscall_64.c buildable on PPC32

Christophe Leroy's on April 6, 2020 3:44 am:
> ifdef out specific PPC64 stuff to allow building
> syscall_64.c on PPC32.
>
> Modify Makefile to always build syscall.o
>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> arch/powerpc/kernel/Makefile | 5 ++---
> arch/powerpc/kernel/syscall.c | 10 ++++++++++
> 2 files changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index 8cc3c831dccd..e4be425b7718 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -45,11 +45,10 @@ obj-y := cputable.o syscalls.o \
> signal.o sysfs.o cacheinfo.o time.o \
> prom.o traps.o setup-common.o \
> udbg.o misc.o io.o misc_$(BITS).o \
> - of_platform.o prom_parse.o
> + of_platform.o prom_parse.o syscall.o
> obj-y += ptrace/
> obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \
> - paca.o nvram_64.o firmware.o note.o \
> - syscall.o
> + paca.o nvram_64.o firmware.o note.o
> obj-$(CONFIG_VDSO32) += vdso32/
> obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
> obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
> diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
> index 72f3d2f0a823..28bd43db8755 100644
> --- a/arch/powerpc/kernel/syscall.c
> +++ b/arch/powerpc/kernel/syscall.c
> @@ -25,8 +25,10 @@ notrace long system_call_exception(long r3, long r4, long r5,
> unsigned long ti_flags;
> syscall_fn f;
>
> +#ifdef CONFIG_PPC64
> if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
> BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> +#endif
>
> trace_hardirqs_off(); /* finish reconciling */
>
> @@ -34,7 +36,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
> BUG_ON(!(regs->msr & MSR_RI));
> BUG_ON(!(regs->msr & MSR_PR));
> BUG_ON(!FULL_REGS(regs));
> +#ifdef CONFIG_PPC64
> BUG_ON(regs->softe != IRQS_ENABLED);
> +#endif
>
> account_cpu_user_entry();
>
> @@ -56,7 +60,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
> * frame, or if the unwinder was taught the first stack frame always
> * returns to user with IRQS_ENABLED, this store could be avoided!
> */
> +#ifdef CONFIG_PPC64
> regs->softe = IRQS_ENABLED;
> +#endif
>
> local_irq_enable();
>
> @@ -148,7 +154,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> ret |= _TIF_RESTOREALL;
> }
>
> +#ifdef CONFIG_PPC64
> again:
> +#endif
> local_irq_disable();
> ti_flags = READ_ONCE(*ti_flagsp);
> while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
> @@ -191,6 +199,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>
> /* This pattern matches prep_irq_for_idle */
> __hard_EE_RI_disable();
> +#ifdef CONFIG_PPC64
> if (unlikely(lazy_irq_pending())) {
> __hard_RI_enable();
> trace_hardirqs_off();
> @@ -201,6 +210,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> }
> local_paca->irq_happened = 0;
> irq_soft_mask_set(IRQS_ENABLED);
> +#endif
>
> #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> local_paca->tm_scratch = regs->msr;
> --
> 2.25.0
>
>

The #ifdefs disappoint me!

Here is (unested) something that should help 32-bit avoid several ifdefs
in the main part of the function. I should have done this as part of the
merged series, but that's okay I'll submit as a cleanup.

The rest looks okay for now. Maybe we grow some helpers to manage the
soft-mask state, though I'm not really sure it would make sense for
32-bit code to ever call them. Maybe just confined to this file would be
okay but for now the ifdefs are okay.

---
arch/powerpc/kernel/syscall_64.c | 58 +++++++++++++++-----------------
1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index cf06eb443a80..f021db893ec2 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -103,6 +103,31 @@ notrace long system_call_exception(long r3, long r4, long r5,
return f(r3, r4, r5, r6, r7, r8);
}

+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ */
+static notrace inline bool prep_irq_for_enabled_exit(void)
+{
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+ /* This pattern matches prep_irq_for_idle */
+ __hard_EE_RI_disable();
+ if (unlikely(lazy_irq_pending())) {
+ /* Took an interrupt, may have more exit work to do. */
+ __hard_RI_enable();
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ return false;
+ }
+ local_paca->irq_happened = 0;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+ return true;
+}
+
/*
* This should be called after a syscall returns, with r3 the return value
* from the syscall. If this function returns non-zero, the system call
@@ -186,21 +211,10 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
}
}

- /* This must be done with RI=1 because tracing may touch vmaps */
- trace_hardirqs_on();
-
- /* This pattern matches prep_irq_for_idle */
- __hard_EE_RI_disable();
- if (unlikely(lazy_irq_pending())) {
- __hard_RI_enable();
- trace_hardirqs_off();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ if (unlikely(!prep_irq_for_enabled_exit())) {
local_irq_enable();
- /* Took an interrupt, may have more exit work to do. */
goto again;
}
- local_paca->irq_happened = 0;
- irq_soft_mask_set(IRQS_ENABLED);

#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
local_paca->tm_scratch = regs->msr;
@@ -264,19 +278,11 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
}
}

- trace_hardirqs_on();
- __hard_EE_RI_disable();
- if (unlikely(lazy_irq_pending())) {
- __hard_RI_enable();
- trace_hardirqs_off();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ if (unlikely(!prep_irq_for_enabled_exit())) {
local_irq_enable();
local_irq_disable();
- /* Took an interrupt, may have more exit work to do. */
goto again;
}
- local_paca->irq_happened = 0;
- irq_soft_mask_set(IRQS_ENABLED);

#ifdef CONFIG_PPC_BOOK3E
if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
@@ -334,13 +340,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
}
}

- trace_hardirqs_on();
- __hard_EE_RI_disable();
- if (unlikely(lazy_irq_pending())) {
- __hard_RI_enable();
- irq_soft_mask_set(IRQS_ALL_DISABLED);
- trace_hardirqs_off();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ if (unlikely(!prep_irq_for_enabled_exit())) {
/*
* Can't local_irq_restore to replay if we were in
* interrupt context. Must replay directly.
@@ -354,8 +354,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
/* Took an interrupt, may have more exit work to do. */
goto again;
}
- local_paca->irq_happened = 0;
- irq_soft_mask_set(IRQS_ENABLED);
} else {
/* Returning to a kernel context with local irqs disabled. */
__hard_EE_RI_disable();
--
2.23.0

2020-04-06 18:19:26

by Christophe Leroy

[permalink] [raw]
Subject: Re: [RFC PATCH v2 12/13] powerpc/kernel: Do not inconditionally save non volatile registers on system call



Le 06/04/2020 à 03:25, Nicholas Piggin a écrit :
> Christophe Leroy's on April 6, 2020 3:44 am:
>> Before : 347 cycles on null_syscall
>> After : 327 cycles on null_syscall
>
> The problem I had doing this is that signal delivery wnats full regs,
> and you don't know if you have a signal pending ahead of time if you
> have interrupts enabled.
>
> I began to try bailing out back to asm to save nvgprs and call again.
> I think that can be made to work, but it is more complication in asm,
> and I soon found that 64s CPUs don't care about NVGPRs too much so it's
> nice to get rid of the !fullregs state.

I tried a new way in v3, please have a look. I split
syscall_exit_prepare() in 3 parts and the result is unexpected: it is
better than before the series (307 cycles now versus 311 cycles with
full ASM syscall entry/exit).

>
> Possibly another approach would be to leave interrupts disabled for the
> case where you have no work to do. You could create a small
> syscall_exit_prepare_nowork fastpath for that case for 32-bit, perhaps?
>
> Thanks,
> Nick
>

Christophe

2020-04-06 18:24:11

by Christophe Leroy

[permalink] [raw]
Subject: Re: [RFC PATCH v2 06/13] powerpc/syscall: Make syscall_64.c buildable on PPC32



Le 06/04/2020 à 03:52, Nicholas Piggin a écrit :
> Christophe Leroy's on April 6, 2020 3:44 am:
>> ifdef out specific PPC64 stuff to allow building
>> syscall_64.c on PPC32.
>>
>> Modify Makefile to always build syscall.o
>>
>> Signed-off-by: Christophe Leroy <[email protected]>
>> ---
>> arch/powerpc/kernel/Makefile | 5 ++---
>> arch/powerpc/kernel/syscall.c | 10 ++++++++++
>> 2 files changed, 12 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
>> index 8cc3c831dccd..e4be425b7718 100644
>> --- a/arch/powerpc/kernel/Makefile
>> +++ b/arch/powerpc/kernel/Makefile
>> @@ -45,11 +45,10 @@ obj-y := cputable.o syscalls.o \
>> signal.o sysfs.o cacheinfo.o time.o \
>> prom.o traps.o setup-common.o \
>> udbg.o misc.o io.o misc_$(BITS).o \
>> - of_platform.o prom_parse.o
>> + of_platform.o prom_parse.o syscall.o
>> obj-y += ptrace/
>> obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \
>> - paca.o nvram_64.o firmware.o note.o \
>> - syscall.o
>> + paca.o nvram_64.o firmware.o note.o
>> obj-$(CONFIG_VDSO32) += vdso32/
>> obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
>> obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
>> diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
>> index 72f3d2f0a823..28bd43db8755 100644
>> --- a/arch/powerpc/kernel/syscall.c
>> +++ b/arch/powerpc/kernel/syscall.c
>> @@ -25,8 +25,10 @@ notrace long system_call_exception(long r3, long r4, long r5,
>> unsigned long ti_flags;
>> syscall_fn f;
>>
>> +#ifdef CONFIG_PPC64
>> if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
>> BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
>> +#endif
>>
>> trace_hardirqs_off(); /* finish reconciling */
>>
>> @@ -34,7 +36,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
>> BUG_ON(!(regs->msr & MSR_RI));
>> BUG_ON(!(regs->msr & MSR_PR));
>> BUG_ON(!FULL_REGS(regs));
>> +#ifdef CONFIG_PPC64
>> BUG_ON(regs->softe != IRQS_ENABLED);
>> +#endif
>>
>> account_cpu_user_entry();
>>
>> @@ -56,7 +60,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
>> * frame, or if the unwinder was taught the first stack frame always
>> * returns to user with IRQS_ENABLED, this store could be avoided!
>> */
>> +#ifdef CONFIG_PPC64
>> regs->softe = IRQS_ENABLED;
>> +#endif
>>
>> local_irq_enable();
>>
>> @@ -148,7 +154,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>> ret |= _TIF_RESTOREALL;
>> }
>>
>> +#ifdef CONFIG_PPC64
>> again:
>> +#endif
>> local_irq_disable();
>> ti_flags = READ_ONCE(*ti_flagsp);
>> while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
>> @@ -191,6 +199,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>>
>> /* This pattern matches prep_irq_for_idle */
>> __hard_EE_RI_disable();
>> +#ifdef CONFIG_PPC64
>> if (unlikely(lazy_irq_pending())) {
>> __hard_RI_enable();
>> trace_hardirqs_off();
>> @@ -201,6 +210,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>> }
>> local_paca->irq_happened = 0;
>> irq_soft_mask_set(IRQS_ENABLED);
>> +#endif
>>
>> #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>> local_paca->tm_scratch = regs->msr;
>> --
>> 2.25.0
>>
>>
>
> The #ifdefs disappoint me!

They disappoint me as well. I tried to removed most of them in v3, I
also took your patch up front the series.

>
> Here is (unested) something that should help 32-bit avoid several ifdefs
> in the main part of the function. I should have done this as part of the
> merged series, but that's okay I'll submit as a cleanup.
>
> The rest looks okay for now. Maybe we grow some helpers to manage the
> soft-mask state, though I'm not really sure it would make sense for
> 32-bit code to ever call them. Maybe just confined to this file would be
> okay but for now the ifdefs are okay.

Thanks
Christophe

2020-04-07 00:29:13

by Nicholas Piggin

[permalink] [raw]
Subject: Re: [RFC PATCH v2 12/13] powerpc/kernel: Do not inconditionally save non volatile registers on system call

Christophe Leroy's on April 7, 2020 4:18 am:
>
>
> Le 06/04/2020 à 03:25, Nicholas Piggin a écrit :
>> Christophe Leroy's on April 6, 2020 3:44 am:
>>> Before : 347 cycles on null_syscall
>>> After : 327 cycles on null_syscall
>>
>> The problem I had doing this is that signal delivery wnats full regs,
>> and you don't know if you have a signal pending ahead of time if you
>> have interrupts enabled.
>>
>> I began to try bailing out back to asm to save nvgprs and call again.
>> I think that can be made to work, but it is more complication in asm,
>> and I soon found that 64s CPUs don't care about NVGPRs too much so it's
>> nice to get rid of the !fullregs state.
>
> I tried a new way in v3, please have a look. I split
> syscall_exit_prepare() in 3 parts and the result is unexpected: it is
> better than before the series (307 cycles now versus 311 cycles with
> full ASM syscall entry/exit).

Great! Well I don't really see a problem with how you changed the C code
around. I'll have to look at the assembly but I don't think it would
have caused a problem for 64s.

Thanks,
Nick