2020-03-31 15:24:35

by Christophe Leroy

[permalink] [raw]
Subject: [RFC WIP PATCH] powerpc/32: system call implement entry/exit logic in C

That's first try to port PPC64 syscall entry/exit logic in C to PPC32.
I've do the minimum to get it work. I have not reworked calls
to sys_fork() and friends for instance.

For the time being, it seems to work more or less but:
- ping reports EINVAL on recvfrom
- strace shows NULL instead of strings in call like open() for instance.
- the performance is definitively bad

On an 8xx, null_syscall test is about 30% slower after this patch:
- Without the patch: 284 cycles
- With the patch: 371 cycles

@nick and others, any suggestion to fix and improve ?

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/book3s/32/kup.h | 21 ++
.../powerpc/include/asm/book3s/64/kup-radix.h | 12 +-
arch/powerpc/include/asm/hw_irq.h | 15 +
arch/powerpc/include/asm/kup.h | 2 +
arch/powerpc/include/asm/nohash/32/kup-8xx.h | 13 +
arch/powerpc/kernel/Makefile | 5 +-
arch/powerpc/kernel/entry_32.S | 259 ++----------------
arch/powerpc/kernel/head_32.h | 3 +-
.../kernel/{syscall_64.c => syscall.c} | 25 +-
9 files changed, 102 insertions(+), 253 deletions(-)
rename arch/powerpc/kernel/{syscall_64.c => syscall.c} (97%)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 3c0ba22dc360..c85bc5b56366 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -102,6 +102,27 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
isync(); /* Context sync required after mtsrin() */
}

+static inline void kuap_restore(struct pt_regs *regs)
+{
+ u32 kuap = current->thread.kuap;
+ u32 addr = kuap & 0xf0000000;
+ u32 end = kuap << 28;
+
+ if (unlikely(!kuap))
+ return;
+
+ current->thread.kuap = 0;
+ kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */
+}
+
+static inline void kuap_check(void)
+{
+ if (!IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+ return;
+
+ WARN_ON_ONCE(current->thread.kuap != 0);
+}
+
static __always_inline void allow_user_access(void __user *to, const void __user *from,
u32 size, unsigned long dir)
{
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index 3bcef989a35d..1f2716a0dcd8 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -60,13 +60,13 @@
#include <asm/mmu.h>
#include <asm/ptrace.h>

-static inline void kuap_restore_amr(struct pt_regs *regs)
+static inline void kuap_restore(struct pt_regs *regs)
{
if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
mtspr(SPRN_AMR, regs->kuap);
}

-static inline void kuap_check_amr(void)
+static inline void kuap_check(void)
{
if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP))
WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
@@ -141,14 +141,6 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
"Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
}
-#else /* CONFIG_PPC_KUAP */
-static inline void kuap_restore_amr(struct pt_regs *regs)
-{
-}
-
-static inline void kuap_check_amr(void)
-{
-}
#endif /* CONFIG_PPC_KUAP */

#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index e0e71777961f..6ccf07de6665 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -321,6 +321,16 @@ static inline void arch_local_irq_disable(void)
mtmsr(mfmsr() & ~MSR_EE);
}

+static inline void arch_local_recovery_disable(void)
+{
+ if (IS_ENABLED(CONFIG_BOOKE))
+ wrtee(0);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ wrtspr(SPRN_NRI);
+ else
+ mtmsr(mfmsr() & ~(MSR_EE | MSR_RI));
+}
+
static inline void arch_local_irq_enable(void)
{
if (IS_ENABLED(CONFIG_BOOKE))
@@ -343,6 +353,11 @@ static inline bool arch_irqs_disabled(void)

#define hard_irq_disable() arch_local_irq_disable()

+#define __hard_irq_enable() arch_local_irq_enable()
+#define __hard_irq_disable() arch_local_irq_disable()
+#define __hard_EE_RI_disable() arch_local_recovery_disable()
+#define __hard_RI_enable() arch_local_irq_disable()
+
static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
{
return !(regs->msr & MSR_EE);
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 92bcd1a26d73..1100c13b6d9e 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -62,6 +62,8 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return false;
}
+static inline void kuap_restore(struct pt_regs *regs) { }
+static inline void kuap_check(void) { }
#endif /* CONFIG_PPC_KUAP */

static inline void allow_read_from_user(const void __user *from, unsigned long size)
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 85ed2390fb99..1918d2e55da3 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -34,6 +34,19 @@

#include <asm/reg.h>

+static inline void kuap_restore(struct pt_regs *regs)
+{
+ mtspr(SPRN_MD_AP, regs->kuap);
+}
+
+static inline void kuap_check(void)
+{
+ if (!IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+ return;
+
+ WARN_ON_ONCE((mfspr(SPRN_MD_AP) & 0xffff0000) != (MD_APG_KUAP & 0xffff0000));
+}
+
static inline void allow_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir)
{
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 570660efbb3d..e4be425b7718 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -45,11 +45,10 @@ obj-y := cputable.o syscalls.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o misc_$(BITS).o \
- of_platform.o prom_parse.o
+ of_platform.o prom_parse.o syscall.o
obj-y += ptrace/
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \
- paca.o nvram_64.o firmware.o note.o \
- syscall_64.o
+ paca.o nvram_64.o firmware.o note.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index a6371fb8f761..103f5158bc44 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -315,162 +315,37 @@ stack_ovf:
RFI
#endif

-#ifdef CONFIG_TRACE_IRQFLAGS
-trace_syscall_entry_irq_off:
- /*
- * Syscall shouldn't happen while interrupts are disabled,
- * so let's do a warning here.
- */
-0: trap
- EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
- bl trace_hardirqs_on
-
- /* Now enable for real */
- LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
- mtmsr r10
-
- REST_GPR(0, r1)
- REST_4GPRS(3, r1)
- REST_2GPRS(7, r1)
- b DoSyscall
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
.globl transfer_to_syscall
transfer_to_syscall:
-#ifdef CONFIG_TRACE_IRQFLAGS
- andi. r12,r9,MSR_EE
- beq- trace_syscall_entry_irq_off
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-/*
- * Handle a system call.
- */
- .stabs "arch/powerpc/kernel/",N_SO,0,0,0f
- .stabs "entry_32.S",N_SO,0,0,0f
-0:
-
-_GLOBAL(DoSyscall)
- stw r3,ORIG_GPR3(r1)
- li r12,0
- stw r12,RESULT(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Make sure interrupts are enabled */
- mfmsr r11
- andi. r12,r11,MSR_EE
- /* We came in with interrupts disabled, we WARN and mark them enabled
- * for lockdep now */
-0: tweqi r12, 0
- EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-#endif /* CONFIG_TRACE_IRQFLAGS */
- lwz r11,TI_FLAGS(r2)
- andi. r11,r11,_TIF_SYSCALL_DOTRACE
- bne- syscall_dotrace
-syscall_dotrace_cont:
- cmplwi 0,r0,NR_syscalls
- lis r10,sys_call_table@h
- ori r10,r10,sys_call_table@l
- slwi r0,r0,2
- bge- 66f
-
- barrier_nospec_asm
- /*
- * Prevent the load of the handler below (based on the user-passed
- * system call number) being speculatively executed until the test
- * against NR_syscalls and branch to .66f above has
- * committed.
- */
-
- lwzx r10,r10,r0 /* Fetch system call handler [ptr] */
- mtlr r10
- addi r9,r1,STACK_FRAME_OVERHEAD
- PPC440EP_ERR42
- blrl /* Call handler */
- .globl ret_from_syscall
+ mr r9, r0
+ addi r10, r1, STACK_FRAME_OVERHEAD
+ bl system_call_exception
ret_from_syscall:
-#ifdef CONFIG_DEBUG_RSEQ
- /* Check whether the syscall is issued inside a restartable sequence */
- stw r3,GPR3(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl rseq_syscall
- lwz r3,GPR3(r1)
-#endif
- mr r6,r3
- /* disable interrupts so current_thread_info()->flags can't change */
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
- /* Note: We don't bother telling lockdep about it */
- SYNC
- mtmsr r10
- lwz r9,TI_FLAGS(r2)
- li r8,-MAX_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- syscall_exit_work
- cmplw 0,r3,r8
- blt+ syscall_exit_cont
- lwz r11,_CCR(r1) /* Load CR */
- neg r3,r3
- oris r11,r11,0x1000 /* Set SO bit in CR */
- stw r11,_CCR(r1)
-syscall_exit_cont:
- lwz r8,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* If we are going to return from the syscall with interrupts
- * off, we trace that here. It shouldn't normally happen.
- */
- andi. r10,r8,MSR_EE
- bne+ 1f
- stw r3,GPR3(r1)
- bl trace_hardirqs_off
- lwz r3,GPR3(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- /* If the process has its own DBCR0 value, load it up. The internal
- debug mode bit tells us that dbcr0 should be loaded. */
- lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
- bnel- load_dbcr0
-#endif
-#ifdef CONFIG_44x
-BEGIN_MMU_FTR_SECTION
- lis r4,icache_44x_need_flush@ha
- lwz r5,icache_44x_need_flush@l(r4)
- cmplwi cr0,r5,0
- bne- 2f
-1:
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
-#endif /* CONFIG_44x */
-BEGIN_FTR_SECTION
- lwarx r7,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
- stwcx. r0,0,r1 /* to clear the reservation */
- ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
-#ifdef CONFIG_PPC_BOOK3S_32
- kuep_unlock r5, r7
-#endif
- kuap_check r2, r4
- lwz r4,_LINK(r1)
- lwz r5,_CCR(r1)
- mtlr r4
- mtcr r5
- lwz r7,_NIP(r1)
- lwz r2,GPR2(r1)
- lwz r1,GPR1(r1)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
- SYNC
- RFI
-#ifdef CONFIG_44x
-2: li r7,0
- iccci r0,r0
- stw r7,icache_44x_need_flush@l(r4)
+ addi r4, r1, STACK_FRAME_OVERHEAD
+ bl syscall_exit_prepare
+ lwz r2, _CCR(r1)
+ lwz r4, _NIP(r1)
+ lwz r5, _MSR(r1)
+ lwz r6, _LINK(r1)
+ mtspr SPRN_SRR0, r4
+ mtspr SPRN_SRR1, r5
+ mtlr r6
+ cmpwi r3, 0
+ bne 2f
+1: mtcr r2
+ REST_GPR(2, r1)
+ REST_GPR(3, r1)
+ REST_GPR(1, r1)
+ rfi
+2: lwz r3, _CTR(r1)
+ lwz r4, _XER(r1)
+ REST_NVGPRS(r1)
+ mtctr r3
+ mtspr SPRN_XER, r4
+ REST_GPR(0, r1)
+ REST_8GPRS(4, r1)
+ REST_GPR(12, r1)
b 1b
-#endif /* CONFIG_44x */
-
-66: li r3,-ENOSYS
- b ret_from_syscall

.globl ret_from_fork
ret_from_fork:
@@ -490,86 +365,6 @@ ret_from_kernel_thread:
li r3,0
b ret_from_syscall

-/* Traced system call support */
-syscall_dotrace:
- SAVE_NVGPRS(r1)
- li r0,0xc00
- stw r0,_TRAP(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_enter
- /*
- * Restore argument registers possibly just changed.
- * We use the return value of do_syscall_trace_enter
- * for call number to look up in the table (r0).
- */
- mr r0,r3
- lwz r3,GPR3(r1)
- lwz r4,GPR4(r1)
- lwz r5,GPR5(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- REST_NVGPRS(r1)
-
- cmplwi r0,NR_syscalls
- /* Return code is already in r3 thanks to do_syscall_trace_enter() */
- bge- ret_from_syscall
- b syscall_dotrace_cont
-
-syscall_exit_work:
- andi. r0,r9,_TIF_RESTOREALL
- beq+ 0f
- REST_NVGPRS(r1)
- b 2f
-0: cmplw 0,r3,r8
- blt+ 1f
- andi. r0,r9,_TIF_NOERROR
- bne- 1f
- lwz r11,_CCR(r1) /* Load CR */
- neg r3,r3
- oris r11,r11,0x1000 /* Set SO bit in CR */
- stw r11,_CCR(r1)
-
-1: stw r6,RESULT(r1) /* Save result */
- stw r3,GPR3(r1) /* Update return value */
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
- beq 4f
-
- /* Clear per-syscall TIF flags if any are set. */
-
- li r11,_TIF_PERSYSCALL_MASK
- addi r12,r2,TI_FLAGS
-3: lwarx r8,0,r12
- andc r8,r8,r11
-#ifdef CONFIG_IBM405_ERR77
- dcbt 0,r12
-#endif
- stwcx. r8,0,r12
- bne- 3b
-
-4: /* Anything which requires enabling interrupts? */
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
- beq ret_from_except
-
- /* Re-enable interrupts. There is no need to trace that with
- * lockdep as we are supposed to have IRQs on at this point
- */
- ori r10,r10,MSR_EE
- SYNC
- mtmsr r10
-
- /* Save NVGPRS if they're not saved already */
- lwz r4,_TRAP(r1)
- andi. r4,r4,1
- beq 5f
- SAVE_NVGPRS(r1)
- li r4,0xc00
- stw r4,_TRAP(r1)
-5:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_leave
- b ret_from_except_full
-
/*
* System call was called from kernel. We get here with SRR1 in r9.
* Mark the exception as recoverable once we have retrieved SRR0,
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 9abec6cd099c..97691931a306 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -174,12 +174,13 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
stw r2,GPR2(r11)
addi r10,r10,STACK_FRAME_REGS_MARKER@l
stw r9,_MSR(r11)
- li r2, \trapno + 1
+ li r2, \trapno
stw r10,8(r11)
stw r2,_TRAP(r11)
SAVE_GPR(0, r11)
SAVE_4GPRS(3, r11)
SAVE_2GPRS(7, r11)
+ SAVE_NVGPRS(r11)
addi r11,r1,STACK_FRAME_OVERHEAD
addi r2,r12,-THREAD
stw r11,PT_REGS(r12)
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall.c
similarity index 97%
rename from arch/powerpc/kernel/syscall_64.c
rename to arch/powerpc/kernel/syscall.c
index cf06eb443a80..a07702a85f81 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -1,8 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#include <linux/err.h>
+#include <linux/compat.h>
+
#include <asm/asm-prototypes.h>
-#include <asm/book3s/64/kup-radix.h>
#include <asm/cputime.h>
#include <asm/hw_irq.h>
#include <asm/kprobes.h>
@@ -25,16 +26,20 @@ notrace long system_call_exception(long r3, long r4, long r5,
unsigned long ti_flags;
syscall_fn f;

+#ifdef CONFIG_PPC64
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+#endif

trace_hardirqs_off(); /* finish reconciling */

- if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(!(regs->msr & MSR_PR));
BUG_ON(!FULL_REGS(regs));
+#ifdef CONFIG_PPC64
BUG_ON(regs->softe != IRQS_ENABLED);
+#endif

account_cpu_user_entry();

@@ -48,7 +53,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
}
#endif

- kuap_check_amr();
+ kuap_check();

/*
* This is not required for the syscall exit path, but makes the
@@ -56,7 +61,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
* frame, or if the unwinder was taught the first stack frame always
* returns to user with IRQS_ENABLED, this store could be avoided!
*/
+#ifdef CONFIG_PPC64
regs->softe = IRQS_ENABLED;
+#endif

local_irq_enable();

@@ -86,7 +93,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
/* May be faster to do array_index_nospec? */
barrier_nospec();

- if (unlikely(ti_flags & _TIF_32BIT)) {
+ if (is_compat_task()) {
f = (void *)compat_sys_call_table[r0];

r3 &= 0x00000000ffffffffULL;
@@ -148,7 +155,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
ret |= _TIF_RESTOREALL;
}

+#ifdef CONFIG_PPC64
again:
+#endif
local_irq_disable();
ti_flags = READ_ONCE(*ti_flagsp);
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
@@ -191,6 +200,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,

/* This pattern matches prep_irq_for_idle */
__hard_EE_RI_disable();
+#ifdef CONFIG_PPC64
if (unlikely(lazy_irq_pending())) {
__hard_RI_enable();
trace_hardirqs_off();
@@ -201,12 +211,13 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
}
local_paca->irq_happened = 0;
irq_soft_mask_set(IRQS_ENABLED);
+#endif

#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
local_paca->tm_scratch = regs->msr;
#endif

- kuap_check_amr();
+ kuap_check();

account_cpu_user_exit();

@@ -294,7 +305,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
local_paca->tm_scratch = regs->msr;
#endif

- kuap_check_amr();
+ kuap_check();

account_cpu_user_exit();

@@ -372,7 +383,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
* We don't need to restore AMR on the way back to userspace for KUAP.
* The value of AMR only matters while we're in the kernel.
*/
- kuap_restore_amr(regs);
+ kuap_restore(regs);

return ret;
}
--
2.25.0


2020-04-01 11:49:09

by Christophe Leroy

[permalink] [raw]
Subject: Re: [RFC WIP PATCH] powerpc/32: system call implement entry/exit logic in C



Le 31/03/2020 à 17:22, Christophe Leroy a écrit :
> That's first try to port PPC64 syscall entry/exit logic in C to PPC32.
> I've do the minimum to get it work. I have not reworked calls
> to sys_fork() and friends for instance.
>
> For the time being, it seems to work more or less but:
> - ping reports EINVAL on recvfrom
> - strace shows NULL instead of strings in call like open() for instance.

For the two above problems, that's because system_call_exception()
doesn't set orig_gpr3 whereas DoSycall() does in entry_32.S . Is that
only done on PPC32 ?

With the following line at the begining of system_call_exception(), it
works perfectly:

regs->orig_gpr3 = r3;

I will now focus on performance to see if we can do something about it.

Christophe

> - the performance is definitively bad
>
> On an 8xx, null_syscall test is about 30% slower after this patch:
> - Without the patch: 284 cycles
> - With the patch: 371 cycles
>
> @nick and others, any suggestion to fix and improve ?
>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> arch/powerpc/include/asm/book3s/32/kup.h | 21 ++
> .../powerpc/include/asm/book3s/64/kup-radix.h | 12 +-
> arch/powerpc/include/asm/hw_irq.h | 15 +
> arch/powerpc/include/asm/kup.h | 2 +
> arch/powerpc/include/asm/nohash/32/kup-8xx.h | 13 +
> arch/powerpc/kernel/Makefile | 5 +-
> arch/powerpc/kernel/entry_32.S | 259 ++----------------
> arch/powerpc/kernel/head_32.h | 3 +-
> .../kernel/{syscall_64.c => syscall.c} | 25 +-
> 9 files changed, 102 insertions(+), 253 deletions(-)
> rename arch/powerpc/kernel/{syscall_64.c => syscall.c} (97%)
>
> diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
> index 3c0ba22dc360..c85bc5b56366 100644
> --- a/arch/powerpc/include/asm/book3s/32/kup.h
> +++ b/arch/powerpc/include/asm/book3s/32/kup.h
> @@ -102,6 +102,27 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
> isync(); /* Context sync required after mtsrin() */
> }
>
> +static inline void kuap_restore(struct pt_regs *regs)
> +{
> + u32 kuap = current->thread.kuap;
> + u32 addr = kuap & 0xf0000000;
> + u32 end = kuap << 28;
> +
> + if (unlikely(!kuap))
> + return;
> +
> + current->thread.kuap = 0;
> + kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */
> +}
> +
> +static inline void kuap_check(void)
> +{
> + if (!IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
> + return;
> +
> + WARN_ON_ONCE(current->thread.kuap != 0);
> +}
> +
> static __always_inline void allow_user_access(void __user *to, const void __user *from,
> u32 size, unsigned long dir)
> {
> diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
> index 3bcef989a35d..1f2716a0dcd8 100644
> --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
> @@ -60,13 +60,13 @@
> #include <asm/mmu.h>
> #include <asm/ptrace.h>
>
> -static inline void kuap_restore_amr(struct pt_regs *regs)
> +static inline void kuap_restore(struct pt_regs *regs)
> {
> if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
> mtspr(SPRN_AMR, regs->kuap);
> }
>
> -static inline void kuap_check_amr(void)
> +static inline void kuap_check(void)
> {
> if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP))
> WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
> @@ -141,14 +141,6 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
> (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
> "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
> }
> -#else /* CONFIG_PPC_KUAP */
> -static inline void kuap_restore_amr(struct pt_regs *regs)
> -{
> -}
> -
> -static inline void kuap_check_amr(void)
> -{
> -}
> #endif /* CONFIG_PPC_KUAP */
>
> #endif /* __ASSEMBLY__ */
> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
> index e0e71777961f..6ccf07de6665 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -321,6 +321,16 @@ static inline void arch_local_irq_disable(void)
> mtmsr(mfmsr() & ~MSR_EE);
> }
>
> +static inline void arch_local_recovery_disable(void)
> +{
> + if (IS_ENABLED(CONFIG_BOOKE))
> + wrtee(0);
> + else if (IS_ENABLED(CONFIG_PPC_8xx))
> + wrtspr(SPRN_NRI);
> + else
> + mtmsr(mfmsr() & ~(MSR_EE | MSR_RI));
> +}
> +
> static inline void arch_local_irq_enable(void)
> {
> if (IS_ENABLED(CONFIG_BOOKE))
> @@ -343,6 +353,11 @@ static inline bool arch_irqs_disabled(void)
>
> #define hard_irq_disable() arch_local_irq_disable()
>
> +#define __hard_irq_enable() arch_local_irq_enable()
> +#define __hard_irq_disable() arch_local_irq_disable()
> +#define __hard_EE_RI_disable() arch_local_recovery_disable()
> +#define __hard_RI_enable() arch_local_irq_disable()
> +
> static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
> {
> return !(regs->msr & MSR_EE);
> diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
> index 92bcd1a26d73..1100c13b6d9e 100644
> --- a/arch/powerpc/include/asm/kup.h
> +++ b/arch/powerpc/include/asm/kup.h
> @@ -62,6 +62,8 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
> {
> return false;
> }
> +static inline void kuap_restore(struct pt_regs *regs) { }
> +static inline void kuap_check(void) { }
> #endif /* CONFIG_PPC_KUAP */
>
> static inline void allow_read_from_user(const void __user *from, unsigned long size)
> diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
> index 85ed2390fb99..1918d2e55da3 100644
> --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
> +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
> @@ -34,6 +34,19 @@
>
> #include <asm/reg.h>
>
> +static inline void kuap_restore(struct pt_regs *regs)
> +{
> + mtspr(SPRN_MD_AP, regs->kuap);
> +}
> +
> +static inline void kuap_check(void)
> +{
> + if (!IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
> + return;
> +
> + WARN_ON_ONCE((mfspr(SPRN_MD_AP) & 0xffff0000) != (MD_APG_KUAP & 0xffff0000));
> +}
> +
> static inline void allow_user_access(void __user *to, const void __user *from,
> unsigned long size, unsigned long dir)
> {
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index 570660efbb3d..e4be425b7718 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -45,11 +45,10 @@ obj-y := cputable.o syscalls.o \
> signal.o sysfs.o cacheinfo.o time.o \
> prom.o traps.o setup-common.o \
> udbg.o misc.o io.o misc_$(BITS).o \
> - of_platform.o prom_parse.o
> + of_platform.o prom_parse.o syscall.o
> obj-y += ptrace/
> obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \
> - paca.o nvram_64.o firmware.o note.o \
> - syscall_64.o
> + paca.o nvram_64.o firmware.o note.o
> obj-$(CONFIG_VDSO32) += vdso32/
> obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
> obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
> index a6371fb8f761..103f5158bc44 100644
> --- a/arch/powerpc/kernel/entry_32.S
> +++ b/arch/powerpc/kernel/entry_32.S
> @@ -315,162 +315,37 @@ stack_ovf:
> RFI
> #endif
>
> -#ifdef CONFIG_TRACE_IRQFLAGS
> -trace_syscall_entry_irq_off:
> - /*
> - * Syscall shouldn't happen while interrupts are disabled,
> - * so let's do a warning here.
> - */
> -0: trap
> - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
> - bl trace_hardirqs_on
> -
> - /* Now enable for real */
> - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
> - mtmsr r10
> -
> - REST_GPR(0, r1)
> - REST_4GPRS(3, r1)
> - REST_2GPRS(7, r1)
> - b DoSyscall
> -#endif /* CONFIG_TRACE_IRQFLAGS */
> -
> .globl transfer_to_syscall
> transfer_to_syscall:
> -#ifdef CONFIG_TRACE_IRQFLAGS
> - andi. r12,r9,MSR_EE
> - beq- trace_syscall_entry_irq_off
> -#endif /* CONFIG_TRACE_IRQFLAGS */
> -
> -/*
> - * Handle a system call.
> - */
> - .stabs "arch/powerpc/kernel/",N_SO,0,0,0f
> - .stabs "entry_32.S",N_SO,0,0,0f
> -0:
> -
> -_GLOBAL(DoSyscall)
> - stw r3,ORIG_GPR3(r1)
> - li r12,0
> - stw r12,RESULT(r1)
> -#ifdef CONFIG_TRACE_IRQFLAGS
> - /* Make sure interrupts are enabled */
> - mfmsr r11
> - andi. r12,r11,MSR_EE
> - /* We came in with interrupts disabled, we WARN and mark them enabled
> - * for lockdep now */
> -0: tweqi r12, 0
> - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
> -#endif /* CONFIG_TRACE_IRQFLAGS */
> - lwz r11,TI_FLAGS(r2)
> - andi. r11,r11,_TIF_SYSCALL_DOTRACE
> - bne- syscall_dotrace
> -syscall_dotrace_cont:
> - cmplwi 0,r0,NR_syscalls
> - lis r10,sys_call_table@h
> - ori r10,r10,sys_call_table@l
> - slwi r0,r0,2
> - bge- 66f
> -
> - barrier_nospec_asm
> - /*
> - * Prevent the load of the handler below (based on the user-passed
> - * system call number) being speculatively executed until the test
> - * against NR_syscalls and branch to .66f above has
> - * committed.
> - */
> -
> - lwzx r10,r10,r0 /* Fetch system call handler [ptr] */
> - mtlr r10
> - addi r9,r1,STACK_FRAME_OVERHEAD
> - PPC440EP_ERR42
> - blrl /* Call handler */
> - .globl ret_from_syscall
> + mr r9, r0
> + addi r10, r1, STACK_FRAME_OVERHEAD
> + bl system_call_exception
> ret_from_syscall:
> -#ifdef CONFIG_DEBUG_RSEQ
> - /* Check whether the syscall is issued inside a restartable sequence */
> - stw r3,GPR3(r1)
> - addi r3,r1,STACK_FRAME_OVERHEAD
> - bl rseq_syscall
> - lwz r3,GPR3(r1)
> -#endif
> - mr r6,r3
> - /* disable interrupts so current_thread_info()->flags can't change */
> - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
> - /* Note: We don't bother telling lockdep about it */
> - SYNC
> - mtmsr r10
> - lwz r9,TI_FLAGS(r2)
> - li r8,-MAX_ERRNO
> - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
> - bne- syscall_exit_work
> - cmplw 0,r3,r8
> - blt+ syscall_exit_cont
> - lwz r11,_CCR(r1) /* Load CR */
> - neg r3,r3
> - oris r11,r11,0x1000 /* Set SO bit in CR */
> - stw r11,_CCR(r1)
> -syscall_exit_cont:
> - lwz r8,_MSR(r1)
> -#ifdef CONFIG_TRACE_IRQFLAGS
> - /* If we are going to return from the syscall with interrupts
> - * off, we trace that here. It shouldn't normally happen.
> - */
> - andi. r10,r8,MSR_EE
> - bne+ 1f
> - stw r3,GPR3(r1)
> - bl trace_hardirqs_off
> - lwz r3,GPR3(r1)
> -1:
> -#endif /* CONFIG_TRACE_IRQFLAGS */
> -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
> - /* If the process has its own DBCR0 value, load it up. The internal
> - debug mode bit tells us that dbcr0 should be loaded. */
> - lwz r0,THREAD+THREAD_DBCR0(r2)
> - andis. r10,r0,DBCR0_IDM@h
> - bnel- load_dbcr0
> -#endif
> -#ifdef CONFIG_44x
> -BEGIN_MMU_FTR_SECTION
> - lis r4,icache_44x_need_flush@ha
> - lwz r5,icache_44x_need_flush@l(r4)
> - cmplwi cr0,r5,0
> - bne- 2f
> -1:
> -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
> -#endif /* CONFIG_44x */
> -BEGIN_FTR_SECTION
> - lwarx r7,0,r1
> -END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
> - stwcx. r0,0,r1 /* to clear the reservation */
> - ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
> -#ifdef CONFIG_PPC_BOOK3S_32
> - kuep_unlock r5, r7
> -#endif
> - kuap_check r2, r4
> - lwz r4,_LINK(r1)
> - lwz r5,_CCR(r1)
> - mtlr r4
> - mtcr r5
> - lwz r7,_NIP(r1)
> - lwz r2,GPR2(r1)
> - lwz r1,GPR1(r1)
> -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
> - mtspr SPRN_NRI, r0
> -#endif
> - mtspr SPRN_SRR0,r7
> - mtspr SPRN_SRR1,r8
> - SYNC
> - RFI
> -#ifdef CONFIG_44x
> -2: li r7,0
> - iccci r0,r0
> - stw r7,icache_44x_need_flush@l(r4)
> + addi r4, r1, STACK_FRAME_OVERHEAD
> + bl syscall_exit_prepare
> + lwz r2, _CCR(r1)
> + lwz r4, _NIP(r1)
> + lwz r5, _MSR(r1)
> + lwz r6, _LINK(r1)
> + mtspr SPRN_SRR0, r4
> + mtspr SPRN_SRR1, r5
> + mtlr r6
> + cmpwi r3, 0
> + bne 2f
> +1: mtcr r2
> + REST_GPR(2, r1)
> + REST_GPR(3, r1)
> + REST_GPR(1, r1)
> + rfi
> +2: lwz r3, _CTR(r1)
> + lwz r4, _XER(r1)
> + REST_NVGPRS(r1)
> + mtctr r3
> + mtspr SPRN_XER, r4
> + REST_GPR(0, r1)
> + REST_8GPRS(4, r1)
> + REST_GPR(12, r1)
> b 1b
> -#endif /* CONFIG_44x */
> -
> -66: li r3,-ENOSYS
> - b ret_from_syscall
>
> .globl ret_from_fork
> ret_from_fork:
> @@ -490,86 +365,6 @@ ret_from_kernel_thread:
> li r3,0
> b ret_from_syscall
>
> -/* Traced system call support */
> -syscall_dotrace:
> - SAVE_NVGPRS(r1)
> - li r0,0xc00
> - stw r0,_TRAP(r1)
> - addi r3,r1,STACK_FRAME_OVERHEAD
> - bl do_syscall_trace_enter
> - /*
> - * Restore argument registers possibly just changed.
> - * We use the return value of do_syscall_trace_enter
> - * for call number to look up in the table (r0).
> - */
> - mr r0,r3
> - lwz r3,GPR3(r1)
> - lwz r4,GPR4(r1)
> - lwz r5,GPR5(r1)
> - lwz r6,GPR6(r1)
> - lwz r7,GPR7(r1)
> - lwz r8,GPR8(r1)
> - REST_NVGPRS(r1)
> -
> - cmplwi r0,NR_syscalls
> - /* Return code is already in r3 thanks to do_syscall_trace_enter() */
> - bge- ret_from_syscall
> - b syscall_dotrace_cont
> -
> -syscall_exit_work:
> - andi. r0,r9,_TIF_RESTOREALL
> - beq+ 0f
> - REST_NVGPRS(r1)
> - b 2f
> -0: cmplw 0,r3,r8
> - blt+ 1f
> - andi. r0,r9,_TIF_NOERROR
> - bne- 1f
> - lwz r11,_CCR(r1) /* Load CR */
> - neg r3,r3
> - oris r11,r11,0x1000 /* Set SO bit in CR */
> - stw r11,_CCR(r1)
> -
> -1: stw r6,RESULT(r1) /* Save result */
> - stw r3,GPR3(r1) /* Update return value */
> -2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
> - beq 4f
> -
> - /* Clear per-syscall TIF flags if any are set. */
> -
> - li r11,_TIF_PERSYSCALL_MASK
> - addi r12,r2,TI_FLAGS
> -3: lwarx r8,0,r12
> - andc r8,r8,r11
> -#ifdef CONFIG_IBM405_ERR77
> - dcbt 0,r12
> -#endif
> - stwcx. r8,0,r12
> - bne- 3b
> -
> -4: /* Anything which requires enabling interrupts? */
> - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
> - beq ret_from_except
> -
> - /* Re-enable interrupts. There is no need to trace that with
> - * lockdep as we are supposed to have IRQs on at this point
> - */
> - ori r10,r10,MSR_EE
> - SYNC
> - mtmsr r10
> -
> - /* Save NVGPRS if they're not saved already */
> - lwz r4,_TRAP(r1)
> - andi. r4,r4,1
> - beq 5f
> - SAVE_NVGPRS(r1)
> - li r4,0xc00
> - stw r4,_TRAP(r1)
> -5:
> - addi r3,r1,STACK_FRAME_OVERHEAD
> - bl do_syscall_trace_leave
> - b ret_from_except_full
> -
> /*
> * System call was called from kernel. We get here with SRR1 in r9.
> * Mark the exception as recoverable once we have retrieved SRR0,
> diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
> index 9abec6cd099c..97691931a306 100644
> --- a/arch/powerpc/kernel/head_32.h
> +++ b/arch/powerpc/kernel/head_32.h
> @@ -174,12 +174,13 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
> stw r2,GPR2(r11)
> addi r10,r10,STACK_FRAME_REGS_MARKER@l
> stw r9,_MSR(r11)
> - li r2, \trapno + 1
> + li r2, \trapno
> stw r10,8(r11)
> stw r2,_TRAP(r11)
> SAVE_GPR(0, r11)
> SAVE_4GPRS(3, r11)
> SAVE_2GPRS(7, r11)
> + SAVE_NVGPRS(r11)
> addi r11,r1,STACK_FRAME_OVERHEAD
> addi r2,r12,-THREAD
> stw r11,PT_REGS(r12)
> diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall.c
> similarity index 97%
> rename from arch/powerpc/kernel/syscall_64.c
> rename to arch/powerpc/kernel/syscall.c
> index cf06eb443a80..a07702a85f81 100644
> --- a/arch/powerpc/kernel/syscall_64.c
> +++ b/arch/powerpc/kernel/syscall.c
> @@ -1,8 +1,9 @@
> // SPDX-License-Identifier: GPL-2.0-or-later
>
> #include <linux/err.h>
> +#include <linux/compat.h>
> +
> #include <asm/asm-prototypes.h>
> -#include <asm/book3s/64/kup-radix.h>
> #include <asm/cputime.h>
> #include <asm/hw_irq.h>
> #include <asm/kprobes.h>
> @@ -25,16 +26,20 @@ notrace long system_call_exception(long r3, long r4, long r5,
> unsigned long ti_flags;
> syscall_fn f;
>
> +#ifdef CONFIG_PPC64
> if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
> BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> +#endif
>
> trace_hardirqs_off(); /* finish reconciling */
>
> - if (IS_ENABLED(CONFIG_PPC_BOOK3S))
> + if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
> BUG_ON(!(regs->msr & MSR_RI));
> BUG_ON(!(regs->msr & MSR_PR));
> BUG_ON(!FULL_REGS(regs));
> +#ifdef CONFIG_PPC64
> BUG_ON(regs->softe != IRQS_ENABLED);
> +#endif
>
> account_cpu_user_entry();
>
> @@ -48,7 +53,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
> }
> #endif
>
> - kuap_check_amr();
> + kuap_check();
>
> /*
> * This is not required for the syscall exit path, but makes the
> @@ -56,7 +61,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
> * frame, or if the unwinder was taught the first stack frame always
> * returns to user with IRQS_ENABLED, this store could be avoided!
> */
> +#ifdef CONFIG_PPC64
> regs->softe = IRQS_ENABLED;
> +#endif
>
> local_irq_enable();
>
> @@ -86,7 +93,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
> /* May be faster to do array_index_nospec? */
> barrier_nospec();
>
> - if (unlikely(ti_flags & _TIF_32BIT)) {
> + if (is_compat_task()) {
> f = (void *)compat_sys_call_table[r0];
>
> r3 &= 0x00000000ffffffffULL;
> @@ -148,7 +155,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> ret |= _TIF_RESTOREALL;
> }
>
> +#ifdef CONFIG_PPC64
> again:
> +#endif
> local_irq_disable();
> ti_flags = READ_ONCE(*ti_flagsp);
> while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
> @@ -191,6 +200,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>
> /* This pattern matches prep_irq_for_idle */
> __hard_EE_RI_disable();
> +#ifdef CONFIG_PPC64
> if (unlikely(lazy_irq_pending())) {
> __hard_RI_enable();
> trace_hardirqs_off();
> @@ -201,12 +211,13 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> }
> local_paca->irq_happened = 0;
> irq_soft_mask_set(IRQS_ENABLED);
> +#endif
>
> #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> local_paca->tm_scratch = regs->msr;
> #endif
>
> - kuap_check_amr();
> + kuap_check();
>
> account_cpu_user_exit();
>
> @@ -294,7 +305,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
> local_paca->tm_scratch = regs->msr;
> #endif
>
> - kuap_check_amr();
> + kuap_check();
>
> account_cpu_user_exit();
>
> @@ -372,7 +383,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
> * We don't need to restore AMR on the way back to userspace for KUAP.
> * The value of AMR only matters while we're in the kernel.
> */
> - kuap_restore_amr(regs);
> + kuap_restore(regs);
>
> return ret;
> }
>

2020-04-03 07:52:54

by Nicholas Piggin

[permalink] [raw]
Subject: Re: [RFC WIP PATCH] powerpc/32: system call implement entry/exit logic in C

Christophe Leroy's on April 1, 2020 9:48 pm:
>
>
> Le 31/03/2020 à 17:22, Christophe Leroy a écrit :
>> That's first try to port PPC64 syscall entry/exit logic in C to PPC32.
>> I've do the minimum to get it work. I have not reworked calls
>> to sys_fork() and friends for instance.
>>
>> For the time being, it seems to work more or less but:
>> - ping reports EINVAL on recvfrom
>> - strace shows NULL instead of strings in call like open() for instance.
>
> For the two above problems, that's because system_call_exception()
> doesn't set orig_gpr3 whereas DoSycall() does in entry_32.S . Is that
> only done on PPC32 ?
>
> With the following line at the begining of system_call_exception(), it
> works perfectly:
>
> regs->orig_gpr3 = r3;

Oh great, nice work. We should be able to make some simple helpers or
move some things a bit to reduce the amount of ifdefs in the C code.
It doesn't look too bad though.

> I will now focus on performance to see if we can do something about it.

What's the performance difference between current asm code just with
always saving NVGPRS vs C?

Thanks,
Nick

2020-04-05 19:02:47

by Christophe Leroy

[permalink] [raw]
Subject: Re: [RFC WIP PATCH] powerpc/32: system call implement entry/exit logic in C



Le 03/04/2020 à 09:33, Nicholas Piggin a écrit :
> Christophe Leroy's on April 1, 2020 9:48 pm:
>>
>>
>> Le 31/03/2020 à 17:22, Christophe Leroy a écrit :
>>> That's first try to port PPC64 syscall entry/exit logic in C to PPC32.
>>> I've do the minimum to get it work. I have not reworked calls
>>> to sys_fork() and friends for instance.
>>>
>>> For the time being, it seems to work more or less but:
>>> - ping reports EINVAL on recvfrom
>>> - strace shows NULL instead of strings in call like open() for instance.
>>
>> For the two above problems, that's because system_call_exception()
>> doesn't set orig_gpr3 whereas DoSycall() does in entry_32.S . Is that
>> only done on PPC32 ?
>>
>> With the following line at the begining of system_call_exception(), it
>> works perfectly:
>>
>> regs->orig_gpr3 = r3;
>
> Oh great, nice work. We should be able to make some simple helpers or
> move some things a bit to reduce the amount of ifdefs in the C code.
> It doesn't look too bad though.
>
>> I will now focus on performance to see if we can do something about it.
>
> What's the performance difference between current asm code just with
> always saving NVGPRS vs C?

Done new measurement and sent a series. lower values now because the
time accounting was done twice as it was still in the ASM part.

Before the series, 311 cycles for a null_syscall
If adding SAVE_NVGPRS to the entry macro, 335 cycles

First patch: 353 cycles ie +13,5%

After a few changes, including conditional saving of non volatile
registers, I get 325 cycles that is only +4,5%. I thing that's acceptable.

Do you see a problem with still saving non volatile registers only when
required ?

Christophe