Hello,
This is merge of https://patchwork.ozlabs.org/cover/1162376/ (except two
last experimental patches) and
https://patchwork.ozlabs.org/patch/1162079/ rebased on top of master.
There was minor conflict in Makefile in the latter series.
Refreshed the patchset to fix build error on ppc32 and ppc64e.
Rebased on top of powerpc/merge.
Thanks
Michal
Michal Suchanek (9):
powerpc/64: system call: Fix sparse warning about missing declaration
powerpc: Add back __ARCH_WANT_SYS_LLSEEK macro
powerpc: move common register copy functions from signal_32.c to
signal.c
powerpc/perf: consolidate read_user_stack_32
powerpc/perf: consolidate valid_user_sp
powerpc/64: make buildable without CONFIG_COMPAT
powerpc/64: Make COMPAT user-selectable disabled on littleendian by
default.
powerpc/perf: split callchain.c by bitness
MAINTAINERS: perf: Add pattern that matches ppc perf to the perf
entry.
Nicholas Piggin (25):
powerpc/64s/exception: Introduce INT_DEFINE parameter block for code
generation
powerpc/64s/exception: Add GEN_COMMON macro that uses INT_DEFINE
parameters
powerpc/64s/exception: Add GEN_KVM macro that uses INT_DEFINE
parameters
powerpc/64s/exception: Expand EXC_COMMON and EXC_COMMON_ASYNC macros
powerpc/64s/exception: Move all interrupt handlers to new style code
gen macros
powerpc/64s/exception: Remove old INT_ENTRY macro
powerpc/64s/exception: Remove old INT_COMMON macro
powerpc/64s/exception: Remove old INT_KVM_HANDLER
powerpc/64s/exception: Add ISIDE option
powerpc/64s/exception: move real->virt switch into the common handler
powerpc/64s/exception: move soft-mask test to common code
powerpc/64s/exception: move KVM test to common code
powerpc/64s/exception: remove confusing IEARLY option
powerpc/64s/exception: remove the SPR saving patch code macros
powerpc/64s/exception: trim unused arguments from KVMTEST macro
powerpc/64s/exception: hdecrementer avoid touching the stack
powerpc/64s/exception: re-inline some handlers
powerpc/64s/exception: Clean up SRR specifiers
powerpc/64s/exception: add more comments for interrupt handlers
powerpc/64s/exception: only test KVM in SRR interrupts when PR KVM is
supported
powerpc/64s/exception: soft nmi interrupt should not use
ret_from_except
powerpc/64: system call remove non-volatile GPR save optimisation
powerpc/64: system call implement the bulk of the logic in C
powerpc/64s: interrupt return in C
powerpc/64s/exception: remove lite interrupt return
MAINTAINERS | 2 +
arch/powerpc/Kconfig | 5 +-
arch/powerpc/include/asm/asm-prototypes.h | 17 +-
.../powerpc/include/asm/book3s/64/kup-radix.h | 24 +-
arch/powerpc/include/asm/cputime.h | 24 +
arch/powerpc/include/asm/exception-64s.h | 4 -
arch/powerpc/include/asm/hw_irq.h | 4 +
arch/powerpc/include/asm/ptrace.h | 3 +
arch/powerpc/include/asm/signal.h | 3 +
arch/powerpc/include/asm/switch_to.h | 11 +
arch/powerpc/include/asm/thread_info.h | 4 +-
arch/powerpc/include/asm/time.h | 4 +-
arch/powerpc/include/asm/unistd.h | 1 +
arch/powerpc/kernel/Makefile | 9 +-
arch/powerpc/kernel/entry_64.S | 880 ++------
arch/powerpc/kernel/exceptions-64e.S | 255 ++-
arch/powerpc/kernel/exceptions-64s.S | 1937 ++++++++++++-----
arch/powerpc/kernel/process.c | 89 +-
arch/powerpc/kernel/signal.c | 144 +-
arch/powerpc/kernel/signal.h | 2 -
arch/powerpc/kernel/signal_32.c | 140 --
arch/powerpc/kernel/syscall_64.c | 349 +++
arch/powerpc/kernel/syscalls/syscall.tbl | 22 +-
arch/powerpc/kernel/systbl.S | 9 +-
arch/powerpc/kernel/time.c | 9 -
arch/powerpc/kernel/vdso.c | 3 +-
arch/powerpc/kernel/vector.S | 2 +-
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 -
arch/powerpc/kvm/book3s_segment.S | 7 -
arch/powerpc/perf/Makefile | 5 +-
arch/powerpc/perf/callchain.c | 370 +---
arch/powerpc/perf/callchain.h | 20 +
arch/powerpc/perf/callchain_32.c | 197 ++
arch/powerpc/perf/callchain_64.c | 178 ++
fs/read_write.c | 3 +-
35 files changed, 2798 insertions(+), 1949 deletions(-)
create mode 100644 arch/powerpc/kernel/syscall_64.c
create mode 100644 arch/powerpc/perf/callchain.h
create mode 100644 arch/powerpc/perf/callchain_32.c
create mode 100644 arch/powerpc/perf/callchain_64.c
--
2.23.0
From: Nicholas Piggin <[email protected]>
Implement the bulk of interrupt return logic in C. The asm return code
must handle a few cases: restoring full GPRs, and emulating stack store.
The asm return code is moved into 64e for now. The new logic has made
allowance for 64e, but I don't have a full environment that works well
to test it, and even booting in emulated qemu is not great for stress
testing. 64e shouldn't be too far off working with this, given a bit
more testing and auditing of the logic.
This is slightly faster on a POWER9 (page fault speed increases about
1.1%), probably due to reduced mtmsrd.
Signed-off-by: Nicholas Piggin <[email protected]>
[ms: Move the FP restore functions to restore_math. They are not used
anywhere else and when restore_math is not built gcc warns about them
being unused.
Add asm/context_tracking.h include to exceptions-64e.S for SCHEDULE_USER
definition.]
Signed-off-by: Michal Suchanek <[email protected]>
---
.../powerpc/include/asm/book3s/64/kup-radix.h | 10 +
arch/powerpc/include/asm/switch_to.h | 6 +
arch/powerpc/kernel/entry_64.S | 475 ++++--------------
arch/powerpc/kernel/exceptions-64e.S | 255 +++++++++-
arch/powerpc/kernel/exceptions-64s.S | 119 ++---
arch/powerpc/kernel/process.c | 89 ++--
arch/powerpc/kernel/syscall_64.c | 157 +++++-
arch/powerpc/kernel/vector.S | 2 +-
8 files changed, 623 insertions(+), 490 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index 07058edc5970..762afbed4762 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -60,6 +60,12 @@
#include <asm/mmu.h>
#include <asm/ptrace.h>
+static inline void kuap_restore_amr(struct pt_regs *regs)
+{
+ if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
+ mtspr(SPRN_AMR, regs->kuap);
+}
+
static inline void kuap_check_amr(void)
{
if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP))
@@ -110,6 +116,10 @@ static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
"Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
}
#else /* CONFIG_PPC_KUAP */
+static inline void kuap_restore_amr(struct pt_regs *regs)
+{
+}
+
static inline void kuap_check_amr(void)
{
}
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 476008bc3d08..b867b58b1093 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -23,7 +23,13 @@ extern void switch_booke_debug_regs(struct debug_reg *new_debug);
extern int emulate_altivec(struct pt_regs *);
+#ifdef CONFIG_PPC_BOOK3S_64
void restore_math(struct pt_regs *regs);
+#else
+static inline void restore_math(struct pt_regs *regs)
+{
+}
+#endif
void restore_tm_state(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 15bc2a872a76..b2e68f5ca8f7 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -16,6 +16,7 @@
#include <linux/errno.h>
#include <linux/err.h>
+#include <asm/cache.h>
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/page.h>
@@ -279,7 +280,7 @@ flush_count_cache:
* state of one is saved on its kernel stack. Then the state
* of the other is restored from its kernel stack. The memory
* management hardware is updated to the second process's state.
- * Finally, we can return to the second process, via ret_from_except.
+ * Finally, we can return to the second process, via interrupt_return.
* On entry, r3 points to the THREAD for the current task, r4
* points to the THREAD for the new task.
*
@@ -433,408 +434,150 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
addi r1,r1,SWITCH_FRAME_SIZE
blr
- .align 7
-_GLOBAL(ret_from_except)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- bne ret_from_except_lite
- REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
+#ifdef CONFIG_PPC_BOOK3S
/*
- * Disable interrupts so that current_thread_info()->flags
- * can't change between when we test it and when we return
- * from the interrupt.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r10,MSR_RI
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
+ * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
+ * touched, AMR not set, no exit work created, then this can be used.
+ */
+ .balign IFETCH_ALIGN_BYTES
+_GLOBAL(fast_interrupt_return)
+ ld r4,_MSR(r1)
+ andi. r0,r4,MSR_PR
+ bne .Lfast_user_interrupt_return
+ andi. r0,r4,MSR_RI
+ bne+ .Lfast_kernel_interrupt_return
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unrecoverable_exception
+ b . /* should not get here */
- ld r9, PACA_THREAD_INFO(r13)
- ld r3,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3E
- ld r10,PACACURRENT(r13)
-#endif /* CONFIG_PPC_BOOK3E */
- ld r4,TI_FLAGS(r9)
- andi. r3,r3,MSR_PR
- beq resume_kernel
-#ifdef CONFIG_PPC_BOOK3E
- lwz r3,(THREAD+THREAD_DBCR0)(r10)
-#endif /* CONFIG_PPC_BOOK3E */
+ .balign IFETCH_ALIGN_BYTES
+_GLOBAL(interrupt_return)
+ REST_NVGPRS(r1)
- /* Check current_thread_info()->flags */
- andi. r0,r4,_TIF_USER_WORK_MASK
- bne 1f
-#ifdef CONFIG_PPC_BOOK3E
- /*
- * Check to see if the dbcr0 register is set up to debug.
- * Use the internal debug mode bit to do this.
- */
- andis. r0,r3,DBCR0_IDM@h
- beq restore
- mfmsr r0
- rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
- mtmsr r0
- mtspr SPRN_DBCR0,r3
- li r10, -1
- mtspr SPRN_DBSR,r10
- b restore
-#else
+ .balign IFETCH_ALIGN_BYTES
+_GLOBAL(interrupt_return_lite)
+ ld r4,_MSR(r1)
+ andi. r0,r4,MSR_PR
+ beq kernel_interrupt_return
+user_interrupt_return:
addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_math
- b restore
-#endif
-1: andi. r0,r4,_TIF_NEED_RESCHED
- beq 2f
- bl restore_interrupts
- SCHEDULE_USER
- b ret_from_except_lite
-2:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
- bne 3f /* only restore TM if nothing else to do */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_tm_state
- b restore
-3:
-#endif
- bl save_nvgprs
- /*
- * Use a non volatile GPR to save and restore our thread_info flags
- * across the call to restore_interrupts.
- */
- mr r30,r4
- bl restore_interrupts
- mr r4,r30
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_notify_resume
- b ret_from_except
-
-resume_kernel:
- /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- andis. r8,r4,_TIF_EMULATE_STACK_STORE@h
- beq+ 1f
+ bl interrupt_exit_user_prepare
+ cmpdi r3,0
+ bne- .Lrestore_nvgprs
- addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+.Lfast_user_interrupt_return:
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+BEGIN_FTR_SECTION
+ ld r10,_PPR(r1)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
- ld r3,GPR1(r1)
- subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
- mr r4,r1 /* src: current exception frame */
- mr r1,r3 /* Reroute the trampoline frame to r1 */
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- /* Copy from the original to the trampoline. */
- li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
- li r6,0 /* start offset: 0 */
- mtctr r5
-2: ldx r0,r6,r4
- stdx r0,r6,r3
- addi r6,r6,8
- bdnz 2b
-
- /* Do real store operation to complete stdu */
- ld r5,GPR1(r1)
- std r8,0(r5)
-
- /* Clear _TIF_EMULATE_STACK_STORE flag */
- lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r9,TI_FLAGS
-0: ldarx r4,0,r5
- andc r4,r4,r11
- stdcx. r4,0,r5
- bne- 0b
-1:
-
-#ifdef CONFIG_PREEMPT
- /* Check if we need to preempt */
- andi. r0,r4,_TIF_NEED_RESCHED
- beq+ restore
- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
- cmpwi cr0,r8,0
- bne restore
- ld r0,SOFTE(r1)
- andi. r0,r0,IRQS_DISABLED
- bne restore
+ ld r3,_CCR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_CTR(r1)
+ ld r6,_XER(r1)
+ li r0,0
- /*
- * Here we are preempting the current task. We want to make
- * sure we are soft-disabled first and reconcile irq state.
- */
- RECONCILE_IRQ_STATE(r3,r4)
- bl preempt_schedule_irq
+ REST_4GPRS(7, r1)
+ REST_2GPRS(11, r1)
+ REST_GPR(13, r1)
- /*
- * arch_local_irq_restore() from preempt_schedule_irq above may
- * enable hard interrupt but we really should disable interrupts
- * when we return from the interrupt, and so that we don't get
- * interrupted after loading SRR0/1.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r10,MSR_RI
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPT */
+ mtcr r3
+ mtlr r4
+ mtctr r5
+ mtspr SPRN_XER,r6
- .globl fast_exc_return_irq
-fast_exc_return_irq:
-restore:
- /*
- * This is the main kernel exit path. First we check if we
- * are about to re-enable interrupts
- */
- ld r5,SOFTE(r1)
- lbz r6,PACAIRQSOFTMASK(r13)
- andi. r5,r5,IRQS_DISABLED
- bne .Lrestore_irq_off
+ REST_4GPRS(2, r1)
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
- /* We are enabling, were we already enabled ? Yes, just return */
- andi. r6,r6,IRQS_DISABLED
- beq cr0,.Ldo_restore
+.Lrestore_nvgprs:
+ REST_NVGPRS(r1)
+ b .Lfast_user_interrupt_return
- /*
- * We are about to soft-enable interrupts (we are hard disabled
- * at this point). We check if there's anything that needs to
- * be replayed first.
- */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- bne- .Lrestore_check_irq_replay
+ .balign IFETCH_ALIGN_BYTES
+kernel_interrupt_return:
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl interrupt_exit_kernel_prepare
+ cmpdi cr1,r3,0
- /*
- * Get here when nothing happened while soft-disabled, just
- * soft-enable and move-on. We will hard-enable as a side
- * effect of rfi
- */
-.Lrestore_no_replay:
- TRACE_ENABLE_INTS
- li r0,IRQS_ENABLED
- stb r0,PACAIRQSOFTMASK(r13);
+.Lfast_kernel_interrupt_return:
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
- /*
- * Final return path. BookE is handled in a different file
- */
-.Ldo_restore:
-#ifdef CONFIG_PPC_BOOK3E
- b exception_return_book3e
-#else
- /*
- * Clear the reservation. If we know the CPU tracks the address of
- * the reservation then we can potentially save some cycles and use
- * a larx. On POWER6 and POWER7 this is significantly faster.
- */
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
FTR_SECTION_ELSE
- ldarx r4,0,r1
+ ldarx r0,0,r1
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- /*
- * Some code path such as load_up_fpu or altivec return directly
- * here. They run entirely hard disabled and do not alter the
- * interrupt state. They also don't use lwarx/stwcx. and thus
- * are known not to leave dangling reservations.
- */
- .globl fast_exception_return
-fast_exception_return:
- ld r3,_MSR(r1)
- ld r4,_CTR(r1)
- ld r0,_LINK(r1)
- mtctr r4
- mtlr r0
- ld r4,_XER(r1)
- mtspr SPRN_XER,r4
-
- kuap_check_amr r5, r6
-
- REST_8GPRS(5, r1)
+ ld r3,_CCR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_CTR(r1)
+ ld r6,_XER(r1)
+ li r0,0
- andi. r0,r3,MSR_RI
- beq- .Lunrecov_restore
+ REST_4GPRS(7, r1)
+ REST_2GPRS(11, r1)
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
- li r4,0
- mtmsrd r4,1
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* TM debug */
- std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */
-#endif
- /*
- * r13 is our per cpu area, only restore it if we are returning to
- * userspace the value stored in the stack frame may belong to
- * another CPU.
- */
- andi. r0,r3,MSR_PR
- beq 1f
-BEGIN_FTR_SECTION
- /* Restore PPR */
- ld r2,_PPR(r1)
- mtspr SPRN_PPR,r2
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
- REST_GPR(13, r1)
+ bne- cr1,1f /* emulate stack store */
+ mtcr r3
+ mtlr r4
+ mtctr r5
+ mtspr SPRN_XER,r6
/*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * The value of AMR only matters while we're in the kernel.
+ * Leaving a stale exception_marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
*/
- mtspr SPRN_SRR1,r3
-
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
+ std r0,STACK_FRAME_OVERHEAD-16(r1)
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
- RFI_TO_USER
+ REST_4GPRS(2, r1)
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
-1: mtspr SPRN_SRR1,r3
-
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
+1: mtcr r3
+ mtlr r4
+ mtctr r5
+ mtspr SPRN_XER,r6
/*
* Leaving a stale exception_marker on the stack can confuse
* the reliable stack unwinder later on. Clear it.
*/
- li r2,0
- std r2,STACK_FRAME_OVERHEAD-16(r1)
+ std r0,STACK_FRAME_OVERHEAD-16(r1)
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
+ REST_4GPRS(2, r1)
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
- kuap_restore_amr r4
+ /* Nasty emulate stack store case. */
+ std r9,PACA_EXGEN+0(r13)
+ addi r9,r1,INT_FRAME_SIZE /* get original r1 */
+ REST_GPR(1, r1)
+ std r9,0(r1)
+ ld r9,PACA_EXGEN+0(r13)
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
RFI_TO_KERNEL
b . /* prevent speculative execution */
-
-#endif /* CONFIG_PPC_BOOK3E */
-
- /*
- * We are returning to a context with interrupts soft disabled.
- *
- * However, we may also about to hard enable, so we need to
- * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
- * or that bit can get out of sync and bad things will happen
- */
-.Lrestore_irq_off:
- ld r3,_MSR(r1)
- lbz r7,PACAIRQHAPPENED(r13)
- andi. r0,r3,MSR_EE
- beq 1f
- rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
- stb r7,PACAIRQHAPPENED(r13)
-1:
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
- /* The interrupt should not have soft enabled. */
- lbz r7,PACAIRQSOFTMASK(r13)
-1: tdeqi r7,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
- b .Ldo_restore
-
- /*
- * Something did happen, check if a re-emit is needed
- * (this also clears paca->irq_happened)
- */
-.Lrestore_check_irq_replay:
- /* XXX: We could implement a fast path here where we check
- * for irq_happened being just 0x01, in which case we can
- * clear it and return. That means that we would potentially
- * miss a decrementer having wrapped all the way around.
- *
- * Still, this might be useful for things like hash_page
- */
- bl __check_irq_replay
- cmpwi cr0,r3,0
- beq .Lrestore_no_replay
-
- /*
- * We need to re-emit an interrupt. We do so by re-using our
- * existing exception frame. We first change the trap value,
- * but we need to ensure we preserve the low nibble of it
- */
- ld r4,_TRAP(r1)
- clrldi r4,r4,60
- or r4,r4,r3
- std r4,_TRAP(r1)
-
- /*
- * PACA_IRQ_HARD_DIS won't always be set here, so set it now
- * to reconcile the IRQ state. Tracing is already accounted for.
- */
- lbz r4,PACAIRQHAPPENED(r13)
- ori r4,r4,PACA_IRQ_HARD_DIS
- stb r4,PACAIRQHAPPENED(r13)
-
- /*
- * Then find the right handler and call it. Interrupts are
- * still soft-disabled and we keep them that way.
- */
- cmpwi cr0,r3,0x500
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl do_IRQ
- b ret_from_except
-1: cmpwi cr0,r3,0xf00
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl performance_monitor_exception
- b ret_from_except
-1: cmpwi cr0,r3,0xe60
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl handle_hmi_exception
- b ret_from_except
-1: cmpwi cr0,r3,0x900
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl timer_interrupt
- b ret_from_except
-#ifdef CONFIG_PPC_DOORBELL
-1:
-#ifdef CONFIG_PPC_BOOK3E
- cmpwi cr0,r3,0x280
-#else
- cmpwi cr0,r3,0xa00
-#endif /* CONFIG_PPC_BOOK3E */
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl doorbell_exception
-#endif /* CONFIG_PPC_DOORBELL */
-1: b ret_from_except /* What else to do here ? */
-
-.Lunrecov_restore:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b .Lunrecov_restore
-
-_ASM_NOKPROBE_SYMBOL(ret_from_except);
-_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
-_ASM_NOKPROBE_SYMBOL(resume_kernel);
-_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq);
-_ASM_NOKPROBE_SYMBOL(restore);
-_ASM_NOKPROBE_SYMBOL(fast_exception_return);
-
+#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_RTAS
/*
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index e4076e3c072d..f7ca46f2a38e 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -24,6 +24,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
#include <asm/feature-fixups.h>
+#include <asm/context_tracking.h>
/* XXX This will ultimately add space for a special exception save
* structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
@@ -1073,17 +1074,161 @@ alignment_more:
bl alignment_exception
b ret_from_except
-/*
- * We branch here from entry_64.S for the last stage of the exception
- * return code path. MSR:EE is expected to be off at that point
- */
-_GLOBAL(exception_return_book3e)
- b 1f
+ .align 7
+_GLOBAL(ret_from_except)
+ ld r11,_TRAP(r1)
+ andi. r0,r11,1
+ bne ret_from_except_lite
+ REST_NVGPRS(r1)
+
+_GLOBAL(ret_from_except_lite)
+ /*
+ * Disable interrupts so that current_thread_info()->flags
+ * can't change between when we test it and when we return
+ * from the interrupt.
+ */
+ wrteei 0
+
+ ld r9, PACA_THREAD_INFO(r13)
+ ld r3,_MSR(r1)
+ ld r10,PACACURRENT(r13)
+ ld r4,TI_FLAGS(r9)
+ andi. r3,r3,MSR_PR
+ beq resume_kernel
+ lwz r3,(THREAD+THREAD_DBCR0)(r10)
+
+ /* Check current_thread_info()->flags */
+ andi. r0,r4,_TIF_USER_WORK_MASK
+ bne 1f
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ andis. r0,r3,DBCR0_IDM@h
+ beq restore
+ mfmsr r0
+ rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
+ mtmsr r0
+ mtspr SPRN_DBCR0,r3
+ li r10, -1
+ mtspr SPRN_DBSR,r10
+ b restore
+1: andi. r0,r4,_TIF_NEED_RESCHED
+ beq 2f
+ bl restore_interrupts
+ SCHEDULE_USER
+ b ret_from_except_lite
+2:
+ bl save_nvgprs
+ /*
+ * Use a non volatile GPR to save and restore our thread_info flags
+ * across the call to restore_interrupts.
+ */
+ mr r30,r4
+ bl restore_interrupts
+ mr r4,r30
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_notify_resume
+ b ret_from_except
+
+resume_kernel:
+ /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+ andis. r8,r4,_TIF_EMULATE_STACK_STORE@h
+ beq+ 1f
+
+ addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+
+ ld r3,GPR1(r1)
+ subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
+ mr r4,r1 /* src: current exception frame */
+ mr r1,r3 /* Reroute the trampoline frame to r1 */
+
+ /* Copy from the original to the trampoline. */
+ li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
+ li r6,0 /* start offset: 0 */
+ mtctr r5
+2: ldx r0,r6,r4
+ stdx r0,r6,r3
+ addi r6,r6,8
+ bdnz 2b
+
+ /* Do real store operation to complete stdu */
+ ld r5,GPR1(r1)
+ std r8,0(r5)
+
+ /* Clear _TIF_EMULATE_STACK_STORE flag */
+ lis r11,_TIF_EMULATE_STACK_STORE@h
+ addi r5,r9,TI_FLAGS
+0: ldarx r4,0,r5
+ andc r4,r4,r11
+ stdcx. r4,0,r5
+ bne- 0b
+1:
+
+#ifdef CONFIG_PREEMPT
+ /* Check if we need to preempt */
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq+ restore
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+ lwz r8,TI_PREEMPT(r9)
+ cmpwi cr0,r8,0
+ bne restore
+ ld r0,SOFTE(r1)
+ andi. r0,r0,IRQS_DISABLED
+ bne restore
+
+ /*
+ * Here we are preempting the current task. We want to make
+ * sure we are soft-disabled first and reconcile irq state.
+ */
+ RECONCILE_IRQ_STATE(r3,r4)
+ bl preempt_schedule_irq
+
+ /*
+ * arch_local_irq_restore() from preempt_schedule_irq above may
+ * enable hard interrupt but we really should disable interrupts
+ * when we return from the interrupt, and so that we don't get
+ * interrupted after loading SRR0/1.
+ */
+ wrteei 0
+#endif /* CONFIG_PREEMPT */
+
+restore:
+ /*
+ * This is the main kernel exit path. First we check if we
+ * are about to re-enable interrupts
+ */
+ ld r5,SOFTE(r1)
+ lbz r6,PACAIRQSOFTMASK(r13)
+ andi. r5,r5,IRQS_DISABLED
+ bne .Lrestore_irq_off
+
+ /* We are enabling, were we already enabled ? Yes, just return */
+ andi. r6,r6,IRQS_DISABLED
+ beq cr0,fast_exception_return
+
+ /*
+ * We are about to soft-enable interrupts (we are hard disabled
+ * at this point). We check if there's anything that needs to
+ * be replayed first.
+ */
+ lbz r0,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r0,0
+ bne- .Lrestore_check_irq_replay
+
+ /*
+ * Get here when nothing happened while soft-disabled, just
+ * soft-enable and move-on. We will hard-enable as a side
+ * effect of rfi
+ */
+.Lrestore_no_replay:
+ TRACE_ENABLE_INTS
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13);
/* This is the return from load_up_fpu fast path which could do with
* less GPR restores in fact, but for now we have a single return path
*/
- .globl fast_exception_return
fast_exception_return:
wrteei 0
1: mr r0,r13
@@ -1124,6 +1269,102 @@ fast_exception_return:
mfspr r13,SPRN_SPRG_GEN_SCRATCH
rfi
+ /*
+ * We are returning to a context with interrupts soft disabled.
+ *
+ * However, we may also about to hard enable, so we need to
+ * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
+ * or that bit can get out of sync and bad things will happen
+ */
+.Lrestore_irq_off:
+ ld r3,_MSR(r1)
+ lbz r7,PACAIRQHAPPENED(r13)
+ andi. r0,r3,MSR_EE
+ beq 1f
+ rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
+ stb r7,PACAIRQHAPPENED(r13)
+1:
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+ /* The interrupt should not have soft enabled. */
+ lbz r7,PACAIRQSOFTMASK(r13)
+1: tdeqi r7,IRQS_ENABLED
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
+ b fast_exception_return
+
+ /*
+ * Something did happen, check if a re-emit is needed
+ * (this also clears paca->irq_happened)
+ */
+.Lrestore_check_irq_replay:
+ /* XXX: We could implement a fast path here where we check
+ * for irq_happened being just 0x01, in which case we can
+ * clear it and return. That means that we would potentially
+ * miss a decrementer having wrapped all the way around.
+ *
+ * Still, this might be useful for things like hash_page
+ */
+ bl __check_irq_replay
+ cmpwi cr0,r3,0
+ beq .Lrestore_no_replay
+
+ /*
+ * We need to re-emit an interrupt. We do so by re-using our
+ * existing exception frame. We first change the trap value,
+ * but we need to ensure we preserve the low nibble of it
+ */
+ ld r4,_TRAP(r1)
+ clrldi r4,r4,60
+ or r4,r4,r3
+ std r4,_TRAP(r1)
+
+ /*
+ * PACA_IRQ_HARD_DIS won't always be set here, so set it now
+ * to reconcile the IRQ state. Tracing is already accounted for.
+ */
+ lbz r4,PACAIRQHAPPENED(r13)
+ ori r4,r4,PACA_IRQ_HARD_DIS
+ stb r4,PACAIRQHAPPENED(r13)
+
+ /*
+ * Then find the right handler and call it. Interrupts are
+ * still soft-disabled and we keep them that way.
+ */
+ cmpwi cr0,r3,0x500
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl do_IRQ
+ b ret_from_except
+1: cmpwi cr0,r3,0xf00
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl performance_monitor_exception
+ b ret_from_except
+1: cmpwi cr0,r3,0xe60
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl handle_hmi_exception
+ b ret_from_except
+1: cmpwi cr0,r3,0x900
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl timer_interrupt
+ b ret_from_except
+#ifdef CONFIG_PPC_DOORBELL
+1:
+ cmpwi cr0,r3,0x280
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl doorbell_exception
+#endif /* CONFIG_PPC_DOORBELL */
+1: b ret_from_except /* What else to do here ? */
+
+_ASM_NOKPROBE_SYMBOL(ret_from_except);
+_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
+_ASM_NOKPROBE_SYMBOL(resume_kernel);
+_ASM_NOKPROBE_SYMBOL(restore);
+_ASM_NOKPROBE_SYMBOL(fast_exception_return);
+
/*
* Trampolines used when spotting a bad kernel stack pointer in
* the exception entry code.
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index af1264cd005f..269edd1460be 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -575,6 +575,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r10,GPR12(r1)
std r11,GPR13(r1)
+ SAVE_NVGPRS(r1)
+
.if IDAR
.if IISIDE
ld r10,_NIP(r1)
@@ -611,7 +613,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mfspr r11,SPRN_XER /* save XER in stackframe */
std r10,SOFTE(r1)
std r11,_XER(r1)
- li r9,(IVEC)+1
+ li r9,IVEC
std r9,_TRAP(r1) /* set trap number */
li r10,0
ld r11,exception_marker@toc(r2)
@@ -918,7 +920,6 @@ EXC_COMMON_BEGIN(system_reset_common)
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
__GEN_COMMON_BODY system_reset
- bl save_nvgprs
/*
* Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
* the right thing. We do not want to reconcile because that goes
@@ -1093,7 +1094,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
li r10,MSR_RI
mtmsrd r10,1
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_early
std r3,RESULT(r1) /* Save result */
@@ -1186,10 +1186,9 @@ EXC_COMMON_BEGIN(machine_check_common)
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM machine_check
@@ -1356,20 +1355,19 @@ BEGIN_MMU_FTR_SECTION
bl do_slb_fault
cmpdi r3,0
bne- 1f
- b fast_exception_return
+ b fast_interrupt_return
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
ld r4,_DAR(r1)
ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
- b ret_from_except
+ b interrupt_return
GEN_KVM data_access_slb
@@ -1449,20 +1447,19 @@ BEGIN_MMU_FTR_SECTION
bl do_slb_fault
cmpdi r3,0
bne- 1f
- b fast_exception_return
+ b fast_interrupt_return
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
ld r4,_DAR(r1)
ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
- b ret_from_except
+ b interrupt_return
GEN_KVM instruction_access_slb
@@ -1510,7 +1507,7 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
- b ret_from_except_lite
+ b interrupt_return_lite
GEN_KVM hardware_interrupt
@@ -1536,10 +1533,9 @@ EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
EXC_VIRT_END(alignment, 0x4600, 0x100)
EXC_COMMON_BEGIN(alignment_common)
GEN_COMMON alignment
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl alignment_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM alignment
@@ -1600,10 +1596,9 @@ EXC_COMMON_BEGIN(program_check_common)
__ISTACK(program_check)=1
__GEN_COMMON_BODY program_check
3:
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM program_check
@@ -1634,7 +1629,6 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
EXC_COMMON_BEGIN(fp_unavailable_common)
GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
@@ -1651,14 +1645,13 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
bl load_up_fpu
- b fast_exception_return
+ b fast_interrupt_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl fp_unavailable_tm
- b ret_from_except
+ b interrupt_return
#endif
GEN_KVM fp_unavailable
@@ -1701,7 +1694,7 @@ EXC_COMMON_BEGIN(decrementer_common)
RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl timer_interrupt
- b ret_from_except_lite
+ b interrupt_return_lite
GEN_KVM decrementer
@@ -1792,7 +1785,7 @@ EXC_COMMON_BEGIN(doorbell_super_common)
#else
bl unknown_exception
#endif
- b ret_from_except_lite
+ b interrupt_return_lite
GEN_KVM doorbell_super
@@ -1977,10 +1970,9 @@ EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
EXC_VIRT_END(single_step, 0x4d00, 0x100)
EXC_COMMON_BEGIN(single_step_common)
GEN_COMMON single_step
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl single_step_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM single_step
@@ -2015,7 +2007,6 @@ EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
EXC_COMMON_BEGIN(h_data_storage_common)
GEN_COMMON h_data_storage
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
ld r4,_DAR(r1)
@@ -2024,7 +2015,7 @@ BEGIN_MMU_FTR_SECTION
MMU_FTR_SECTION_ELSE
bl unknown_exception
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
- b ret_from_except
+ b interrupt_return
GEN_KVM h_data_storage
@@ -2049,10 +2040,9 @@ EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
EXC_COMMON_BEGIN(h_instr_storage_common)
GEN_COMMON h_instr_storage
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl unknown_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM h_instr_storage
@@ -2075,10 +2065,9 @@ EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
EXC_COMMON_BEGIN(emulation_assist_common)
GEN_COMMON emulation_assist
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl emulation_assist_interrupt
- b ret_from_except
+ b interrupt_return
GEN_KVM emulation_assist
@@ -2158,10 +2147,9 @@ EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
FINISH_NAP
RUNLATCH_ON
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM hmi_exception
@@ -2195,7 +2183,7 @@ EXC_COMMON_BEGIN(h_doorbell_common)
#else
bl unknown_exception
#endif
- b ret_from_except_lite
+ b interrupt_return_lite
GEN_KVM h_doorbell
@@ -2225,7 +2213,7 @@ EXC_COMMON_BEGIN(h_virt_irq_common)
RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
- b ret_from_except_lite
+ b interrupt_return_lite
GEN_KVM h_virt_irq
@@ -2272,7 +2260,7 @@ EXC_COMMON_BEGIN(performance_monitor_common)
RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
- b ret_from_except_lite
+ b interrupt_return_lite
GEN_KVM performance_monitor
@@ -2312,23 +2300,21 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
bl load_up_altivec
- b fast_exception_return
+ b fast_interrupt_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_tm
- b ret_from_except
+ b interrupt_return
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM altivec_unavailable
@@ -2370,20 +2356,18 @@ BEGIN_FTR_SECTION
b load_up_vsx
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_tm
- b ret_from_except
+ b interrupt_return
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM vsx_unavailable
@@ -2410,10 +2394,9 @@ EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
EXC_COMMON_BEGIN(facility_unavailable_common)
GEN_COMMON facility_unavailable
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl facility_unavailable_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM facility_unavailable
@@ -2440,10 +2423,9 @@ EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
EXC_COMMON_BEGIN(h_facility_unavailable_common)
GEN_COMMON h_facility_unavailable
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl facility_unavailable_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM h_facility_unavailable
@@ -2474,10 +2456,9 @@ EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
EXC_COMMON_BEGIN(cbe_system_error_common)
GEN_COMMON cbe_system_error
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl cbe_system_error_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM cbe_system_error
@@ -2503,10 +2484,9 @@ EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
EXC_COMMON_BEGIN(instruction_breakpoint_common)
GEN_COMMON instruction_breakpoint
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl instruction_breakpoint_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM instruction_breakpoint
@@ -2626,10 +2606,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
EXC_COMMON_BEGIN(denorm_exception_common)
GEN_COMMON denorm_exception
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl unknown_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM denorm_exception
@@ -2648,10 +2627,9 @@ EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
EXC_COMMON_BEGIN(cbe_maintenance_common)
GEN_COMMON cbe_maintenance
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl cbe_maintenance_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM cbe_maintenance
@@ -2676,14 +2654,13 @@ EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
EXC_COMMON_BEGIN(altivec_assist_common)
GEN_COMMON altivec_assist
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_ALTIVEC
bl altivec_assist_exception
#else
bl unknown_exception
#endif
- b ret_from_except
+ b interrupt_return
GEN_KVM altivec_assist
@@ -2702,10 +2679,9 @@ EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
EXC_COMMON_BEGIN(cbe_thermal_common)
GEN_COMMON cbe_thermal
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl cbe_thermal_exception
- b ret_from_except
+ b interrupt_return
GEN_KVM cbe_thermal
@@ -2737,7 +2713,6 @@ EXC_COMMON_BEGIN(soft_nmi_common)
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
__GEN_COMMON_BODY soft_nmi
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl soft_nmi_interrupt
/* Clear MSR_RI before setting SRR0 and SRR1. */
@@ -3038,7 +3013,7 @@ do_hash_page:
cmpdi r3,0 /* see if __hash_page succeeded */
/* Success */
- beq fast_exc_return_irq /* Return from exception on success */
+ beq interrupt_return_lite /* Return from exception on success */
/* Error */
blt- 13f
@@ -3055,17 +3030,15 @@ handle_page_fault:
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
cmpdi r3,0
- beq+ ret_from_except_lite
- bl save_nvgprs
+ beq+ interrupt_return_lite
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
ld r4,_DAR(r1)
bl bad_page_fault
- b ret_from_except
+ b interrupt_return
/* We have a data breakpoint exception - handle it */
handle_dabr_fault:
- bl save_nvgprs
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -3073,21 +3046,20 @@ handle_dabr_fault:
/*
* do_break() may have changed the NV GPRS while handling a breakpoint.
* If so, we need to restore them with their updated values. Don't use
- * ret_from_except_lite here.
+ * interrupt_return_lite here.
*/
- b ret_from_except
+ b interrupt_return
#ifdef CONFIG_PPC_BOOK3S_64
/* We have a page fault that hash_page could handle but HV refused
* the PTE insertion
*/
-13: bl save_nvgprs
- mr r5,r3
+13: mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
ld r4,_DAR(r1)
bl low_hash_fault
- b ret_from_except
+ b interrupt_return
#endif
/*
@@ -3097,11 +3069,10 @@ handle_dabr_fault:
* were soft-disabled. We want to invoke the exception handler for
* the access, or panic if there isn't a handler.
*/
-77: bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+77: addi r3,r1,STACK_FRAME_OVERHEAD
li r5,SIGSEGV
bl bad_page_fault
- b ret_from_except
+ b interrupt_return
/*
* When doorbell is triggered from system reset wakeup, the message is
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4df94b6e2f32..b5f8433f8b46 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -236,23 +236,9 @@ void enable_kernel_fp(void)
}
}
EXPORT_SYMBOL(enable_kernel_fp);
-
-static int restore_fp(struct task_struct *tsk)
-{
- if (tsk->thread.load_fp) {
- load_fp_state(¤t->thread.fp_state);
- current->thread.load_fp++;
- return 1;
- }
- return 0;
-}
-#else
-static int restore_fp(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_PPC_FPU */
#ifdef CONFIG_ALTIVEC
-#define loadvec(thr) ((thr).load_vec)
-
static void __giveup_altivec(struct task_struct *tsk)
{
unsigned long msr;
@@ -318,21 +304,6 @@ void flush_altivec_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
-
-static int restore_altivec(struct task_struct *tsk)
-{
- if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
- load_vr_state(&tsk->thread.vr_state);
- tsk->thread.used_vr = 1;
- tsk->thread.load_vec++;
-
- return 1;
- }
- return 0;
-}
-#else
-#define loadvec(thr) 0
-static inline int restore_altivec(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
@@ -400,18 +371,6 @@ void flush_vsx_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
-
-static int restore_vsx(struct task_struct *tsk)
-{
- if (cpu_has_feature(CPU_FTR_VSX)) {
- tsk->thread.used_vsr = 1;
- return 1;
- }
-
- return 0;
-}
-#else
-static inline int restore_vsx(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -511,6 +470,53 @@ void giveup_all(struct task_struct *tsk)
}
EXPORT_SYMBOL(giveup_all);
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_FPU
+static int restore_fp(struct task_struct *tsk)
+{
+ if (tsk->thread.load_fp) {
+ load_fp_state(¤t->thread.fp_state);
+ current->thread.load_fp++;
+ return 1;
+ }
+ return 0;
+}
+#else
+static int restore_fp(struct task_struct *tsk) { return 0; }
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+#define loadvec(thr) ((thr).load_vec)
+static int restore_altivec(struct task_struct *tsk)
+{
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
+ load_vr_state(&tsk->thread.vr_state);
+ tsk->thread.used_vr = 1;
+ tsk->thread.load_vec++;
+
+ return 1;
+ }
+ return 0;
+}
+#else
+#define loadvec(thr) 0
+static inline int restore_altivec(struct task_struct *tsk) { return 0; }
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static int restore_vsx(struct task_struct *tsk)
+{
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ tsk->thread.used_vsr = 1;
+ return 1;
+ }
+
+ return 0;
+}
+#else
+static inline int restore_vsx(struct task_struct *tsk) { return 0; }
+#endif /* CONFIG_VSX */
+
/*
* The exception exit path calls restore_math() with interrupts hard disabled
* but the soft irq state not "reconciled". ftrace code that calls
@@ -551,6 +557,7 @@ void notrace restore_math(struct pt_regs *regs)
regs->msr = msr;
}
+#endif
static void save_all(struct task_struct *tsk)
{
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index ce4e1b5a9c27..d00cfc4a39a9 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -20,7 +20,11 @@ long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8,
unsigned long ti_flags;
syscall_fn f;
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+ BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(!FULL_REGS(regs));
+ BUG_ON(regs->softe != IRQS_ENABLED);
if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
unlikely(regs->msr & MSR_TS_T))
@@ -177,7 +181,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *reg
trace_hardirqs_off();
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
local_irq_enable();
- /* Took an interrupt which may have more exit work to do. */
+ /* Took an interrupt, may have more exit work to do. */
goto again;
}
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
@@ -193,3 +197,154 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *reg
return ret;
}
+
+#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
+{
+#ifdef CONFIG_PPC_BOOK3E
+ struct thread_struct *ts = ¤t->thread;
+#endif
+ unsigned long *ti_flagsp = ¤t_thread_info()->flags;
+ unsigned long ti_flags;
+ unsigned long flags;
+ unsigned long ret = 0;
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+ BUG_ON(!(regs->msr & MSR_RI));
+ BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(!FULL_REGS(regs));
+ BUG_ON(regs->softe != IRQS_ENABLED);
+
+ local_irq_save(flags);
+
+again:
+ ti_flags = READ_ONCE(*ti_flagsp);
+ while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ local_irq_enable(); /* returning to user: may enable */
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
+ unsigned long mathflags = 0;
+
+ if (IS_ENABLED(CONFIG_PPC_FPU))
+ mathflags |= MSR_FP;
+ if (IS_ENABLED(CONFIG_ALTIVEC))
+ mathflags |= MSR_VEC;
+
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ (ti_flags & _TIF_RESTORE_TM))
+ restore_tm_state(regs);
+ else if ((regs->msr & mathflags) != mathflags)
+ restore_math(regs);
+ }
+
+ trace_hardirqs_on();
+ __hard_EE_RI_disable();
+ if (unlikely(lazy_irq_pending())) {
+ __hard_RI_enable();
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ local_irq_disable();
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+#ifdef CONFIG_PPC_BOOK3E
+ if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ mtmsr(mfmsr() & ~MSR_DE);
+ mtspr(SPRN_DBCR0, ts->debug.dbcr0);
+ mtspr(SPRN_DBSR, -1);
+ }
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ kuap_check_amr();
+
+ account_cpu_user_exit();
+
+ return ret;
+}
+
+void unrecoverable_exception(struct pt_regs *regs);
+void preempt_schedule_irq(void);
+
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
+{
+ unsigned long *ti_flagsp = ¤t_thread_info()->flags;
+ unsigned long flags;
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI)))
+ unrecoverable_exception(regs);
+ BUG_ON(regs->msr & MSR_PR);
+ BUG_ON(!FULL_REGS(regs));
+
+ local_irq_save(flags);
+
+ if (regs->softe == IRQS_ENABLED) {
+ /* Returning to a kernel context with local irqs enabled. */
+again:
+ if (IS_ENABLED(CONFIG_PREEMPT)) {
+ /* Return to preemptible kernel context */
+ if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) {
+ if (preempt_count() == 0)
+ preempt_schedule_irq();
+ }
+ }
+
+ trace_hardirqs_on();
+ __hard_EE_RI_disable();
+ if (unlikely(lazy_irq_pending())) {
+ __hard_RI_enable();
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ local_irq_disable();
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+ irq_soft_mask_set(IRQS_ENABLED);
+ } else {
+ /* Returning to a kernel context with local irqs disabled. */
+ trace_hardirqs_on();
+ __hard_EE_RI_disable();
+ }
+
+ if (regs->msr & MSR_EE)
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
+ kuap_restore_amr(regs);
+
+ if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) {
+ clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp);
+ return 1;
+ }
+ return 0;
+}
+#endif
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 8eb867dbad5f..44e7a776e56f 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -131,7 +131,7 @@ _GLOBAL(load_up_vsx)
/* enable use of VSX after return */
oris r12,r12,MSR_VSX@h
std r12,_MSR(r1)
- b fast_exception_return
+ b fast_interrupt_return
#endif /* CONFIG_VSX */
--
2.23.0
Building callchain.c with !COMPAT proved quite ugly with all the
defines. Splitting out the 32bit and 64bit parts looks better.
No code change intended.
Signed-off-by: Michal Suchanek <[email protected]>
---
arch/powerpc/perf/Makefile | 5 +-
arch/powerpc/perf/callchain.c | 362 +------------------------------
arch/powerpc/perf/callchain.h | 20 ++
arch/powerpc/perf/callchain_32.c | 197 +++++++++++++++++
arch/powerpc/perf/callchain_64.c | 178 +++++++++++++++
5 files changed, 400 insertions(+), 362 deletions(-)
create mode 100644 arch/powerpc/perf/callchain.h
create mode 100644 arch/powerpc/perf/callchain_32.c
create mode 100644 arch/powerpc/perf/callchain_64.c
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index c155dcbb8691..53d614e98537 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -1,6 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
+obj-$(CONFIG_PERF_EVENTS) += callchain.o callchain_$(BITS).o perf_regs.o
+ifdef CONFIG_COMPAT
+obj-$(CONFIG_PERF_EVENTS) += callchain_32.o
+endif
obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index b9fc2f297f30..dd5051015008 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -15,11 +15,9 @@
#include <asm/sigcontext.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
-#ifdef CONFIG_COMPAT
-#include "../kernel/ppc32.h"
-#endif
#include <asm/pte-walk.h>
+#include "callchain.h"
/*
* Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -102,364 +100,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
}
}
-static inline int valid_user_sp(unsigned long sp)
-{
- bool is_64 = !is_32bit_task();
-
- if (!sp || (sp & (is_64 ? 7 : 3)) || sp > STACK_TOP - (is_64 ? 32 : 16))
- return 0;
- return 1;
-}
-
-#ifdef CONFIG_PPC64
-/*
- * On 64-bit we don't want to invoke hash_page on user addresses from
- * interrupt context, so if the access faults, we read the page tables
- * to find which page (if any) is mapped and access it directly.
- */
-static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
-{
- int ret = -EFAULT;
- pgd_t *pgdir;
- pte_t *ptep, pte;
- unsigned shift;
- unsigned long addr = (unsigned long) ptr;
- unsigned long offset;
- unsigned long pfn, flags;
- void *kaddr;
-
- pgdir = current->mm->pgd;
- if (!pgdir)
- return -EFAULT;
-
- local_irq_save(flags);
- ptep = find_current_mm_pte(pgdir, addr, NULL, &shift);
- if (!ptep)
- goto err_out;
- if (!shift)
- shift = PAGE_SHIFT;
-
- /* align address to page boundary */
- offset = addr & ((1UL << shift) - 1);
-
- pte = READ_ONCE(*ptep);
- if (!pte_present(pte) || !pte_user(pte))
- goto err_out;
- pfn = pte_pfn(pte);
- if (!page_is_ram(pfn))
- goto err_out;
-
- /* no highmem to worry about here */
- kaddr = pfn_to_kaddr(pfn);
- memcpy(buf, kaddr + offset, nb);
- ret = 0;
-err_out:
- local_irq_restore(flags);
- return ret;
-}
-
-static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
-{
- if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
- ((unsigned long)ptr & 7))
- return -EFAULT;
-
- pagefault_disable();
- if (!__get_user_inatomic(*ret, ptr)) {
- pagefault_enable();
- return 0;
- }
- pagefault_enable();
-
- return read_user_stack_slow(ptr, ret, 8);
-}
-
-/*
- * 64-bit user processes use the same stack frame for RT and non-RT signals.
- */
-struct signal_frame_64 {
- char dummy[__SIGNAL_FRAMESIZE];
- struct ucontext uc;
- unsigned long unused[2];
- unsigned int tramp[6];
- struct siginfo *pinfo;
- void *puc;
- struct siginfo info;
- char abigap[288];
-};
-
-static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
-{
- if (nip == fp + offsetof(struct signal_frame_64, tramp))
- return 1;
- if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
- return 1;
- return 0;
-}
-
-/*
- * Do some sanity checking on the signal frame pointed to by sp.
- * We check the pinfo and puc pointers in the frame.
- */
-static int sane_signal_64_frame(unsigned long sp)
-{
- struct signal_frame_64 __user *sf;
- unsigned long pinfo, puc;
-
- sf = (struct signal_frame_64 __user *) sp;
- if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
- read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
- return 0;
- return pinfo == (unsigned long) &sf->info &&
- puc == (unsigned long) &sf->uc;
-}
-
-static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
- struct pt_regs *regs)
-{
- unsigned long sp, next_sp;
- unsigned long next_ip;
- unsigned long lr;
- long level = 0;
- struct signal_frame_64 __user *sigframe;
- unsigned long __user *fp, *uregs;
-
- next_ip = perf_instruction_pointer(regs);
- lr = regs->link;
- sp = regs->gpr[1];
- perf_callchain_store(entry, next_ip);
-
- while (entry->nr < entry->max_stack) {
- fp = (unsigned long __user *) sp;
- if (!valid_user_sp(sp) || read_user_stack_64(fp, &next_sp))
- return;
- if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
- return;
-
- /*
- * Note: the next_sp - sp >= signal frame size check
- * is true when next_sp < sp, which can happen when
- * transitioning from an alternate signal stack to the
- * normal stack.
- */
- if (next_sp - sp >= sizeof(struct signal_frame_64) &&
- (is_sigreturn_64_address(next_ip, sp) ||
- (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
- sane_signal_64_frame(sp)) {
- /*
- * This looks like an signal frame
- */
- sigframe = (struct signal_frame_64 __user *) sp;
- uregs = sigframe->uc.uc_mcontext.gp_regs;
- if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
- read_user_stack_64(&uregs[PT_LNK], &lr) ||
- read_user_stack_64(&uregs[PT_R1], &sp))
- return;
- level = 0;
- perf_callchain_store_context(entry, PERF_CONTEXT_USER);
- perf_callchain_store(entry, next_ip);
- continue;
- }
-
- if (level == 0)
- next_ip = lr;
- perf_callchain_store(entry, next_ip);
- ++level;
- sp = next_sp;
- }
-}
-
-#else /* CONFIG_PPC64 */
-static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
-{
- return 0;
-}
-
-static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
- struct pt_regs *regs)
-{
-}
-
-#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE
-#define sigcontext32 sigcontext
-#define mcontext32 mcontext
-#define ucontext32 ucontext
-#define compat_siginfo_t struct siginfo
-
-#endif /* CONFIG_PPC64 */
-
-#if defined(CONFIG_PPC32) || defined(CONFIG_COMPAT)
-/*
- * On 32-bit we just access the address and let hash_page create a
- * HPTE if necessary, so there is no need to fall back to reading
- * the page tables. Since this is called at interrupt level,
- * do_page_fault() won't treat a DSI as a page fault.
- */
-static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
-{
- int rc;
-
- if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
- ((unsigned long)ptr & 3))
- return -EFAULT;
-
- pagefault_disable();
- rc = __get_user_inatomic(*ret, ptr);
- pagefault_enable();
-
- if (IS_ENABLED(CONFIG_PPC64) && rc)
- return read_user_stack_slow(ptr, ret, 4);
- return rc;
-}
-
-/*
- * Layout for non-RT signal frames
- */
-struct signal_frame_32 {
- char dummy[__SIGNAL_FRAMESIZE32];
- struct sigcontext32 sctx;
- struct mcontext32 mctx;
- int abigap[56];
-};
-
-/*
- * Layout for RT signal frames
- */
-struct rt_signal_frame_32 {
- char dummy[__SIGNAL_FRAMESIZE32 + 16];
- compat_siginfo_t info;
- struct ucontext32 uc;
- int abigap[56];
-};
-
-static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
-{
- if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
- return 1;
- if (vdso32_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso32_sigtramp)
- return 1;
- return 0;
-}
-
-static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
-{
- if (nip == fp + offsetof(struct rt_signal_frame_32,
- uc.uc_mcontext.mc_pad))
- return 1;
- if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
- return 1;
- return 0;
-}
-
-static int sane_signal_32_frame(unsigned int sp)
-{
- struct signal_frame_32 __user *sf;
- unsigned int regs;
-
- sf = (struct signal_frame_32 __user *) (unsigned long) sp;
- if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s))
- return 0;
- return regs == (unsigned long) &sf->mctx;
-}
-
-static int sane_rt_signal_32_frame(unsigned int sp)
-{
- struct rt_signal_frame_32 __user *sf;
- unsigned int regs;
-
- sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
- if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s))
- return 0;
- return regs == (unsigned long) &sf->uc.uc_mcontext;
-}
-
-static unsigned int __user *signal_frame_32_regs(unsigned int sp,
- unsigned int next_sp, unsigned int next_ip)
-{
- struct mcontext32 __user *mctx = NULL;
- struct signal_frame_32 __user *sf;
- struct rt_signal_frame_32 __user *rt_sf;
-
- /*
- * Note: the next_sp - sp >= signal frame size check
- * is true when next_sp < sp, for example, when
- * transitioning from an alternate signal stack to the
- * normal stack.
- */
- if (next_sp - sp >= sizeof(struct signal_frame_32) &&
- is_sigreturn_32_address(next_ip, sp) &&
- sane_signal_32_frame(sp)) {
- sf = (struct signal_frame_32 __user *) (unsigned long) sp;
- mctx = &sf->mctx;
- }
-
- if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
- is_rt_sigreturn_32_address(next_ip, sp) &&
- sane_rt_signal_32_frame(sp)) {
- rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
- mctx = &rt_sf->uc.uc_mcontext;
- }
-
- if (!mctx)
- return NULL;
- return mctx->mc_gregs;
-}
-
-static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
- struct pt_regs *regs)
-{
- unsigned int sp, next_sp;
- unsigned int next_ip;
- unsigned int lr;
- long level = 0;
- unsigned int __user *fp, *uregs;
-
- next_ip = perf_instruction_pointer(regs);
- lr = regs->link;
- sp = regs->gpr[1];
- perf_callchain_store(entry, next_ip);
-
- while (entry->nr < entry->max_stack) {
- fp = (unsigned int __user *) (unsigned long) sp;
- if (!valid_user_sp(sp) || read_user_stack_32(fp, &next_sp))
- return;
- if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
- return;
-
- uregs = signal_frame_32_regs(sp, next_sp, next_ip);
- if (!uregs && level <= 1)
- uregs = signal_frame_32_regs(sp, next_sp, lr);
- if (uregs) {
- /*
- * This looks like an signal frame, so restart
- * the stack trace with the values in it.
- */
- if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
- read_user_stack_32(&uregs[PT_LNK], &lr) ||
- read_user_stack_32(&uregs[PT_R1], &sp))
- return;
- level = 0;
- perf_callchain_store_context(entry, PERF_CONTEXT_USER);
- perf_callchain_store(entry, next_ip);
- continue;
- }
-
- if (level == 0)
- next_ip = lr;
- perf_callchain_store(entry, next_ip);
- ++level;
- sp = next_sp;
- }
-}
-#else /* 32bit */
-static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
- struct pt_regs *regs)
-{}
-#endif /* 32bit */
-
void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h
new file mode 100644
index 000000000000..8631a96d627d
--- /dev/null
+++ b/arch/powerpc/perf/callchain.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _POWERPC_PERF_CALLCHAIN_H
+#define _POWERPC_PERF_CALLCHAIN_H
+
+int read_user_stack_slow(void __user *ptr, void *buf, int nb);
+void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs);
+void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs);
+
+static inline int valid_user_sp(unsigned long sp)
+{
+ bool is_64 = !is_32bit_task();
+
+ if (!sp || (sp & (is_64 ? 7 : 3)) || sp > STACK_TOP - (is_64 ? 32 : 16))
+ return 0;
+ return 1;
+}
+
+#endif /* _POWERPC_PERF_CALLCHAIN_H */
diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c
new file mode 100644
index 000000000000..eb3bcb696fcc
--- /dev/null
+++ b/arch/powerpc/perf/callchain_32.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/pgtable.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#include <asm/pte-walk.h>
+
+#include "callchain.h"
+
+#ifdef CONFIG_PPC64
+#include "../kernel/ppc32.h"
+#else /* CONFIG_PPC64 */
+
+#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE
+#define sigcontext32 sigcontext
+#define mcontext32 mcontext
+#define ucontext32 ucontext
+#define compat_siginfo_t struct siginfo
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * On 32-bit we just access the address and let hash_page create a
+ * HPTE if necessary, so there is no need to fall back to reading
+ * the page tables. Since this is called at interrupt level,
+ * do_page_fault() won't treat a DSI as a page fault.
+ */
+static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
+{
+ int rc;
+
+ if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
+ ((unsigned long)ptr & 3))
+ return -EFAULT;
+
+ pagefault_disable();
+ rc = __get_user_inatomic(*ret, ptr);
+ pagefault_enable();
+
+ if (IS_ENABLED(CONFIG_PPC64) && rc)
+ return read_user_stack_slow(ptr, ret, 4);
+ return rc;
+}
+
+/*
+ * Layout for non-RT signal frames
+ */
+struct signal_frame_32 {
+ char dummy[__SIGNAL_FRAMESIZE32];
+ struct sigcontext32 sctx;
+ struct mcontext32 mctx;
+ int abigap[56];
+};
+
+/*
+ * Layout for RT signal frames
+ */
+struct rt_signal_frame_32 {
+ char dummy[__SIGNAL_FRAMESIZE32 + 16];
+ compat_siginfo_t info;
+ struct ucontext32 uc;
+ int abigap[56];
+};
+
+static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+ if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
+ return 1;
+ if (vdso32_sigtramp && current->mm->context.vdso_base &&
+ nip == current->mm->context.vdso_base + vdso32_sigtramp)
+ return 1;
+ return 0;
+}
+
+static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+ if (nip == fp + offsetof(struct rt_signal_frame_32,
+ uc.uc_mcontext.mc_pad))
+ return 1;
+ if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
+ nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
+ return 1;
+ return 0;
+}
+
+static int sane_signal_32_frame(unsigned int sp)
+{
+ struct signal_frame_32 __user *sf;
+ unsigned int regs;
+
+ sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+ if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s))
+ return 0;
+ return regs == (unsigned long) &sf->mctx;
+}
+
+static int sane_rt_signal_32_frame(unsigned int sp)
+{
+ struct rt_signal_frame_32 __user *sf;
+ unsigned int regs;
+
+ sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+ if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s))
+ return 0;
+ return regs == (unsigned long) &sf->uc.uc_mcontext;
+}
+
+static unsigned int __user *signal_frame_32_regs(unsigned int sp,
+ unsigned int next_sp, unsigned int next_ip)
+{
+ struct mcontext32 __user *mctx = NULL;
+ struct signal_frame_32 __user *sf;
+ struct rt_signal_frame_32 __user *rt_sf;
+
+ /*
+ * Note: the next_sp - sp >= signal frame size check
+ * is true when next_sp < sp, for example, when
+ * transitioning from an alternate signal stack to the
+ * normal stack.
+ */
+ if (next_sp - sp >= sizeof(struct signal_frame_32) &&
+ is_sigreturn_32_address(next_ip, sp) &&
+ sane_signal_32_frame(sp)) {
+ sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+ mctx = &sf->mctx;
+ }
+
+ if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
+ is_rt_sigreturn_32_address(next_ip, sp) &&
+ sane_rt_signal_32_frame(sp)) {
+ rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+ mctx = &rt_sf->uc.uc_mcontext;
+ }
+
+ if (!mctx)
+ return NULL;
+ return mctx->mc_gregs;
+}
+
+void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ unsigned int sp, next_sp;
+ unsigned int next_ip;
+ unsigned int lr;
+ long level = 0;
+ unsigned int __user *fp, *uregs;
+
+ next_ip = perf_instruction_pointer(regs);
+ lr = regs->link;
+ sp = regs->gpr[1];
+ perf_callchain_store(entry, next_ip);
+
+ while (entry->nr < entry->max_stack) {
+ fp = (unsigned int __user *) (unsigned long) sp;
+ if (!valid_user_sp(sp) || read_user_stack_32(fp, &next_sp))
+ return;
+ if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
+ return;
+
+ uregs = signal_frame_32_regs(sp, next_sp, next_ip);
+ if (!uregs && level <= 1)
+ uregs = signal_frame_32_regs(sp, next_sp, lr);
+ if (uregs) {
+ /*
+ * This looks like an signal frame, so restart
+ * the stack trace with the values in it.
+ */
+ if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
+ read_user_stack_32(&uregs[PT_LNK], &lr) ||
+ read_user_stack_32(&uregs[PT_R1], &sp))
+ return;
+ level = 0;
+ perf_callchain_store_context(entry, PERF_CONTEXT_USER);
+ perf_callchain_store(entry, next_ip);
+ continue;
+ }
+
+ if (level == 0)
+ next_ip = lr;
+ perf_callchain_store(entry, next_ip);
+ ++level;
+ sp = next_sp;
+ }
+}
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
new file mode 100644
index 000000000000..a0ec88d3439f
--- /dev/null
+++ b/arch/powerpc/perf/callchain_64.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/pgtable.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#include <asm/pte-walk.h>
+
+#include "callchain.h"
+
+/*
+ * On 64-bit we don't want to invoke hash_page on user addresses from
+ * interrupt context, so if the access faults, we read the page tables
+ * to find which page (if any) is mapped and access it directly.
+ */
+int read_user_stack_slow(void __user *ptr, void *buf, int nb)
+{
+ int ret = -EFAULT;
+ pgd_t *pgdir;
+ pte_t *ptep, pte;
+ unsigned int shift;
+ unsigned long addr = (unsigned long) ptr;
+ unsigned long offset;
+ unsigned long pfn, flags;
+ void *kaddr;
+
+ pgdir = current->mm->pgd;
+ if (!pgdir)
+ return -EFAULT;
+
+ local_irq_save(flags);
+ ptep = find_current_mm_pte(pgdir, addr, NULL, &shift);
+ if (!ptep)
+ goto err_out;
+ if (!shift)
+ shift = PAGE_SHIFT;
+
+ /* align address to page boundary */
+ offset = addr & ((1UL << shift) - 1);
+
+ pte = READ_ONCE(*ptep);
+ if (!pte_present(pte) || !pte_user(pte))
+ goto err_out;
+ pfn = pte_pfn(pte);
+ if (!page_is_ram(pfn))
+ goto err_out;
+
+ /* no highmem to worry about here */
+ kaddr = pfn_to_kaddr(pfn);
+ memcpy(buf, kaddr + offset, nb);
+ ret = 0;
+err_out:
+ local_irq_restore(flags);
+ return ret;
+}
+
+static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
+{
+ if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
+ ((unsigned long)ptr & 7))
+ return -EFAULT;
+
+ pagefault_disable();
+ if (!__get_user_inatomic(*ret, ptr)) {
+ pagefault_enable();
+ return 0;
+ }
+ pagefault_enable();
+
+ return read_user_stack_slow(ptr, ret, 8);
+}
+
+/*
+ * 64-bit user processes use the same stack frame for RT and non-RT signals.
+ */
+struct signal_frame_64 {
+ char dummy[__SIGNAL_FRAMESIZE];
+ struct ucontext uc;
+ unsigned long unused[2];
+ unsigned int tramp[6];
+ struct siginfo *pinfo;
+ void *puc;
+ struct siginfo info;
+ char abigap[288];
+};
+
+static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
+{
+ if (nip == fp + offsetof(struct signal_frame_64, tramp))
+ return 1;
+ if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
+ nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
+ return 1;
+ return 0;
+}
+
+/*
+ * Do some sanity checking on the signal frame pointed to by sp.
+ * We check the pinfo and puc pointers in the frame.
+ */
+static int sane_signal_64_frame(unsigned long sp)
+{
+ struct signal_frame_64 __user *sf;
+ unsigned long pinfo, puc;
+
+ sf = (struct signal_frame_64 __user *) sp;
+ if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
+ read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
+ return 0;
+ return pinfo == (unsigned long) &sf->info &&
+ puc == (unsigned long) &sf->uc;
+}
+
+void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ unsigned long sp, next_sp;
+ unsigned long next_ip;
+ unsigned long lr;
+ long level = 0;
+ struct signal_frame_64 __user *sigframe;
+ unsigned long __user *fp, *uregs;
+
+ next_ip = perf_instruction_pointer(regs);
+ lr = regs->link;
+ sp = regs->gpr[1];
+ perf_callchain_store(entry, next_ip);
+
+ while (entry->nr < entry->max_stack) {
+ fp = (unsigned long __user *) sp;
+ if (!valid_user_sp(sp) || read_user_stack_64(fp, &next_sp))
+ return;
+ if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
+ return;
+
+ /*
+ * Note: the next_sp - sp >= signal frame size check
+ * is true when next_sp < sp, which can happen when
+ * transitioning from an alternate signal stack to the
+ * normal stack.
+ */
+ if (next_sp - sp >= sizeof(struct signal_frame_64) &&
+ (is_sigreturn_64_address(next_ip, sp) ||
+ (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
+ sane_signal_64_frame(sp)) {
+ /*
+ * This looks like an signal frame
+ */
+ sigframe = (struct signal_frame_64 __user *) sp;
+ uregs = sigframe->uc.uc_mcontext.gp_regs;
+ if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
+ read_user_stack_64(&uregs[PT_LNK], &lr) ||
+ read_user_stack_64(&uregs[PT_R1], &sp))
+ return;
+ level = 0;
+ perf_callchain_store_context(entry, PERF_CONTEXT_USER);
+ perf_callchain_store(entry, next_ip);
+ continue;
+ }
+
+ if (level == 0)
+ next_ip = lr;
+ perf_callchain_store(entry, next_ip);
+ ++level;
+ sp = next_sp;
+ }
+}
--
2.23.0
From: Nicholas Piggin <[email protected]>
A few of the non-standard handlers are left uncommented. Some more
description could be added to some.
Signed-off-by: Nicholas Piggin <[email protected]>
---
arch/powerpc/kernel/exceptions-64s.S | 391 ++++++++++++++++++++++++---
1 file changed, 353 insertions(+), 38 deletions(-)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ef37d0ab6594..2f50587392aa 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -121,26 +121,26 @@ name:
/*
* Interrupt code generation macros
*/
-#define IVEC .L_IVEC_\name\()
-#define IHSRR .L_IHSRR_\name\()
-#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\()
-#define IAREA .L_IAREA_\name\()
-#define IVIRT .L_IVIRT_\name\()
-#define IISIDE .L_IISIDE_\name\()
-#define IDAR .L_IDAR_\name\()
-#define IDSISR .L_IDSISR_\name\()
-#define ISET_RI .L_ISET_RI_\name\()
-#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\()
-#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\()
-#define IMASK .L_IMASK_\name\()
-#define IKVM_SKIP .L_IKVM_SKIP_\name\()
-#define IKVM_REAL .L_IKVM_REAL_\name\()
+#define IVEC .L_IVEC_\name\() /* Interrupt vector address */
+#define IHSRR .L_IHSRR_\name\() /* Sets SRR or HSRR registers */
+#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\() /* HSRR if HV else SRR */
+#define IAREA .L_IAREA_\name\() /* PACA save area */
+#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */
+#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */
+#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */
+#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */
+#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */
+#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
+#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
+#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
+#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */
+#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
-#define IKVM_VIRT .L_IKVM_VIRT_\name\()
-#define ISTACK .L_ISTACK_\name\()
+#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
+#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */
#define __ISTACK(name) .L_ISTACK_ ## name
-#define IRECONCILE .L_IRECONCILE_\name\()
-#define IKUAP .L_IKUAP_\name\()
+#define IRECONCILE .L_IRECONCILE_\name\() /* Do RECONCILE_IRQ_STATE */
+#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */
#define INT_DEFINE_BEGIN(n) \
.macro int_define_ ## n name
@@ -759,6 +759,39 @@ __start_interrupts:
EXC_VIRT_NONE(0x4000, 0x100)
+/**
+ * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI).
+ * This is a non-maskable, asynchronous interrupt always taken in real-mode.
+ * It is caused by:
+ * - Wake from power-saving state, on powernv.
+ * - An NMI from another CPU, triggered by firmware or hypercall.
+ * - As crash/debug signal injected from BMC, firmware or hypervisor.
+ *
+ * Handling:
+ * Power-save wakeup is the only performance critical path, so this is
+ * determined quickly as possible first. In this case volatile registers
+ * can be discarded and SPRs like CFAR don't need to be read.
+ *
+ * If not a powersave wakeup, then it's run as a regular interrupt, however
+ * it uses its own stack and PACA save area to preserve the regular kernel
+ * environment for debugging.
+ *
+ * This interrupt is not maskable, so triggering it when MSR[RI] is clear,
+ * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely
+ * correct to switch to virtual mode to run the regular interrupt handler
+ * because it might be interrupted when the MMU is in a bad state (e.g., SLB
+ * is clear).
+ *
+ * FWNMI:
+ * PAPR specifies a "fwnmi" facility which sends the sreset to a different
+ * entry point with a different register set up. Some hypervisors will
+ * send the sreset to 0x100 in the guest if it is not fwnmi capable.
+ *
+ * KVM:
+ * Unlike most SRR interrupts, this may be taken by the host while executing
+ * in a guest, so a KVM test is required. KVM will pull the CPU out of guest
+ * mode and then raise the sreset.
+ */
INT_DEFINE_BEGIN(system_reset)
IVEC=0x100
IAREA=PACA_EXNMI
@@ -834,6 +867,7 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
* Vectors for the FWNMI option. Share common code.
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
+ /* XXX: fwnmi guest could run a nested/PR guest, so why no test? */
__IKVM_REAL(system_reset)=0
GEN_INT_ENTRY system_reset, virt=0
@@ -900,6 +934,44 @@ EXC_COMMON_BEGIN(system_reset_common)
GEN_KVM system_reset
+/**
+ * Interrupt 0x200 - Machine Check Interrupt (MCE).
+ * This is a non-maskable interrupt always taken in real-mode. It can be
+ * synchronous or asynchronous, caused by hardware or software, and it may be
+ * taken in a power-saving state.
+ *
+ * Handling:
+ * Similarly to system reset, this uses its own stack and PACA save area,
+ * the difference is re-entrancy is allowed on the machine check stack.
+ *
+ * machine_check_early is run in real mode, and carefully decodes the
+ * machine check and tries to handle it (e.g., flush the SLB if there was an
+ * error detected there), determines if it was recoverable and logs the
+ * event.
+ *
+ * Then, depending on the execution context when the interrupt is taken, there
+ * are 3 main actions:
+ * - Executing in kernel mode. The event is queued with irq_work, which means
+ * it is handled when it is next safe to do so (i.e., the kernel has enabled
+ * interrupts), which could be immediately when the interrupt returns. This
+ * avoids nasty issues like switching to virtual mode when the MMU is in a
+ * bad state, or when executing OPAL code. (SRESET is exposed to such issues,
+ * but it has different priorities). Check to see if the CPU was in power
+ * save, and return via the wake up code if it was.
+ *
+ * - Executing in user mode. machine_check_exception is run like a normal
+ * interrupt handler, which processes the data generated by the early handler.
+ *
+ * - Executing in guest mode. The interrupt is run with its KVM test, and
+ * branches to KVM to deal with. KVM may queue the event for the host
+ * to report later.
+ *
+ * This interrupt is not maskable, so if it triggers when MSR[RI] is clear,
+ * or SCRATCH0 is in use, it may cause a crash.
+ *
+ * KVM:
+ * See SRESET.
+ */
INT_DEFINE_BEGIN(machine_check_early)
IVEC=0x200
IAREA=PACA_EXMC
@@ -1159,19 +1231,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
/**
- * 0x300 - Data Storage Interrupt (DSI)
- * This interrupt is generated due to a data access which does not have a valid
- * page table entry with permissions to allow the data access to be performed.
- * DAWR matches also fault here, as do RC updates, and minor misc errors e.g.,
- * copy/paste, AMO, certain invalid CI accesses, etc.
+ * Interrupt 0x300 - Data Storage Interrupt (DSI).
+ * This is a synchronous interrupt generated due to a data access exception,
+ * e.g., a load orstore which does not have a valid page table entry with
+ * permissions. DAWR matches also fault here, as do RC updates, and minor misc
+ * errors e.g., copy/paste, AMO, certain invalid CI accesses, etc.
+ *
+ * Handling:
+ * - Hash MMU
+ * Go to do_hash_page first to see if the HPT can be filled from an entry in
+ * the Linux page table. Hash faults can hit in kernel mode in a fairly
+ * arbitrary state (e.g., interrupts disabled, locks held) when accessing
+ * "non-bolted" regions, e.g., vmalloc space. However these should always be
+ * backed by Linux page tables.
*
- * This interrupt is delivered to the guest (HV bit unchanged).
+ * If none is found, do a Linux page fault. Linux page faults can happen in
+ * kernel mode due to user copy operations of course.
*
- * Linux HPT responds by first attempting to refill the hash table from the
- * Linux page table, then going to a full page fault if the Linux page table
- * entry was insufficient. RPT goes straight to full page fault.
+ * - Radix MMU
+ * The hardware loads from the Linux page table directly, so a fault goes
+ * immediately to Linux page fault.
*
- * PR KVM ...?
+ * Conditions like DAWR match are handled on the way in to Linux page fault.
*/
INT_DEFINE_BEGIN(data_access)
IVEC=0x300
@@ -1202,6 +1283,24 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
GEN_KVM data_access
+/**
+ * Interrupt 0x380 - Data Segment Interrupt (DSLB).
+ * This is a synchronous interrupt in response to an MMU fault missing SLB
+ * entry for HPT, or an address outside RPT translation range.
+ *
+ * Handling:
+ * - HPT:
+ * This refills the SLB, or reports an access fault similarly to a bad page
+ * fault. When coming from user-mode, the SLB handler may access any kernel
+ * data, though it may itself take a DSLB. When coming from kernel mode,
+ * recursive faults must be avoided so access is restricted to the kernel
+ * image text/data, kernel stack, and any data allocated below
+ * ppc64_bolted_size (first segment). The kernel handler must avoid stomping
+ * on user-handler data structures.
+ *
+ * A dedicated save area EXSLB is used (XXX: but it actually need not be
+ * these days, we could use EXGEN).
+ */
INT_DEFINE_BEGIN(data_access_slb)
IVEC=0x380
IAREA=PACA_EXSLB
@@ -1244,6 +1343,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
GEN_KVM data_access_slb
+/**
+ * Interrupt 0x400 - Instruction Storage Interrupt (ISI).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSI, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR), and status in SRR1 (rather than DSISR).
+ */
INT_DEFINE_BEGIN(instruction_access)
IVEC=0x400
IISIDE=1
@@ -1273,6 +1381,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
GEN_KVM instruction_access
+/**
+ * Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSLB, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR).
+ */
INT_DEFINE_BEGIN(instruction_access_slb)
IVEC=0x480
IAREA=PACA_EXSLB
@@ -1315,6 +1432,29 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
GEN_KVM instruction_access_slb
+/**
+ * Interrupt 0x500 - External Interrupt.
+ * This is an asynchronous maskable interrupt in response to an "external
+ * exception" from the interrupt controller or hypervisor (e.g., device
+ * interrupt). It is maskable in hardware by clearing MSR[EE], and
+ * soft-maskable with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * When running in HV mode, Linux sets up the LPCR[LPES] bit such that
+ * interrupts are delivered with HSRR registers, guests use SRRs, which
+ * reqiures IHSRR_IF_HVMODE.
+ *
+ * On bare metal POWER9 and later, Linux sets the LPCR[HVICE] bit such that
+ * external interrupts are delivered as Hypervisor Virtualization Interrupts
+ * rather than External Interrupts.
+ *
+ * Handling:
+ * This calls into Linux IRQ handler. NVGPRs are not saved to reduce overhead,
+ * because registers at the time of the interrupt are not so important as it is
+ * asynchronous.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ */
INT_DEFINE_BEGIN(hardware_interrupt)
IVEC=0x500
IHSRR_IF_HVMODE=1
@@ -1340,6 +1480,10 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
GEN_KVM hardware_interrupt
+/**
+ * Interrupt 0x600 - Alignment Interrupt
+ * This is a synchronous interrupt in response to data alignment fault.
+ */
INT_DEFINE_BEGIN(alignment)
IVEC=0x600
IDAR=1
@@ -1363,6 +1507,15 @@ EXC_COMMON_BEGIN(alignment_common)
GEN_KVM alignment
+/**
+ * Interrupt 0x700 - Program Interrupt (program check).
+ * This is a synchronous interrupt in response to various instruction faults:
+ * traps, privilege errors, TM errors, floating point exceptions.
+ *
+ * Handling:
+ * This interrupt may use the "emergency stack" in some cases when being taken
+ * from kernel context, which complicates handling.
+ */
INT_DEFINE_BEGIN(program_check)
IVEC=0x700
IKVM_REAL=1
@@ -1416,6 +1569,15 @@ EXC_COMMON_BEGIN(program_check_common)
GEN_KVM program_check
+/*
+ * Interrupt 0x800 - Floating-Point Unavailable Interrupt.
+ * This is a synchronous interrupt in response to executing an fp instruction
+ * with MSR[FP]=0.
+ *
+ * Handling:
+ * This will load FP registers and enable the FP bit if coming from userspace,
+ * otherwise report a bad kernel use of FP.
+ */
INT_DEFINE_BEGIN(fp_unavailable)
IVEC=0x800
IRECONCILE=0
@@ -1461,6 +1623,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
GEN_KVM fp_unavailable
+/**
+ * Interrupt 0x900 - Decrementer Interrupt.
+ * This is an asynchronous interrupt in response to a decrementer exception
+ * (e.g., DEC has wrapped below zero). It is maskable in hardware by clearing
+ * MSR[EE], and soft-maskable with IRQS_DISABLED mask (i.e.,
+ * local_irq_disable()).
+ *
+ * Handling:
+ * This calls into Linux timer handler. NVGPRs are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and bump the decrementer to a high value, leaving MSR[EE] enabled
+ * in the interrupted context.
+ * If PPC_WATCHDOG is configured, the soft masked handler will actually set
+ * things back up to run soft_nmi_interrupt as a regular interrupt handler
+ * on the emergency stack.
+ */
INT_DEFINE_BEGIN(decrementer)
IVEC=0x900
IMASK=IRQS_DISABLED
@@ -1484,6 +1663,16 @@ EXC_COMMON_BEGIN(decrementer_common)
GEN_KVM decrementer
+/**
+ * Interrupt 0x980 - Hypervisor Decrementer Interrupt.
+ * This is an asynchronous interrupt, similar to 0x900 but for the HDEC
+ * register.
+ *
+ * Handling:
+ * Linux does not use this outside KVM where it's used to keep a host timer
+ * while the guest is given control of DEC. It should normally be caught by
+ * the KVM test and routed there.
+ */
INT_DEFINE_BEGIN(hdecrementer)
IVEC=0x980
IHSRR=1
@@ -1522,6 +1711,20 @@ EXC_COMMON_BEGIN(hdecrementer_common)
GEN_KVM hdecrementer
+/**
+ * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsndp doorbell.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * Guests may use this for IPIs between threads in a core if the
+ * hypervisor supports it. NVGPRS are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, leaving MSR[EE] enabled in the interrupted context because the
+ * doorbells are edge triggered.
+ */
INT_DEFINE_BEGIN(doorbell_super)
IVEC=0xa00
IMASK=IRQS_DISABLED
@@ -1552,16 +1755,20 @@ EXC_COMMON_BEGIN(doorbell_super_common)
EXC_REAL_NONE(0xb00, 0x100)
EXC_VIRT_NONE(0x4b00, 0x100)
-/*
- * system call / hypercall (0xc00, 0x4c00)
- *
- * The system call exception is invoked with "sc 0" and does not alter HV bit.
- *
- * The hypercall is invoked with "sc 1" and sets HV=1.
+/**
+ * Interrupt 0xc00 - System Call Interrupt (syscall, hcall).
+ * This is a synchronous interrupt invoked with the "sc" instruction. The
+ * system call is invoked with "sc 0" and does not alter the HV bit, so it
+ * is directed to the currently running OS. The hypercall is invoked with
+ * "sc 1" and it sets HV=1, so it elevates to hypervisor.
*
* In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to
* 0x4c00 virtual mode.
*
+ * Handling:
+ * If the KVM test fires then it was due to a hypercall and is accordingly
+ * routed to KVM. Otherwise this executes a normal Linux system call.
+ *
* Call convention:
*
* syscall register convention is in Documentation/powerpc/syscall64-abi.rst
@@ -1705,6 +1912,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
#endif
+/**
+ * Interrupt 0xd00 - Trace Interrupt.
+ * This is a synchronous interrupt in response to instruction step or
+ * breakpoint faults.
+ */
INT_DEFINE_BEGIN(single_step)
IVEC=0xd00
IKVM_REAL=1
@@ -1726,6 +1938,18 @@ EXC_COMMON_BEGIN(single_step_common)
GEN_KVM single_step
+/**
+ * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest data access.
+ *
+ * Handling:
+ * This should always get routed to KVM. In radix MMU mode, this is caused
+ * by a guest nested radix access that can't be performed due to the
+ * partition scope page table. In hash mode, this can be caused by guests
+ * running with translation disabled (virtual real mode) or with VPM enabled.
+ * KVM will update the page table structures or disallow the access.
+ */
INT_DEFINE_BEGIN(h_data_storage)
IVEC=0xe00
IHSRR=1
@@ -1758,6 +1982,11 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
GEN_KVM h_data_storage
+/**
+ * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest instruction fetch, similar to HDSI.
+ */
INT_DEFINE_BEGIN(h_instr_storage)
IVEC=0xe20
IHSRR=1
@@ -1781,6 +2010,9 @@ EXC_COMMON_BEGIN(h_instr_storage_common)
GEN_KVM h_instr_storage
+/**
+ * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
+ */
INT_DEFINE_BEGIN(emulation_assist)
IVEC=0xe40
IHSRR=1
@@ -1804,10 +2036,29 @@ EXC_COMMON_BEGIN(emulation_assist_common)
GEN_KVM emulation_assist
-/*
- * hmi_exception trampoline is a special case. It jumps to hmi_exception_early
- * first, and then eventaully from there to the trampoline to get into virtual
- * mode.
+/**
+ * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
+ * This is an asynchronous interrupt caused by a Hypervisor Maintenance
+ * Exception. It is always taken in real mode but uses HSRR registers
+ * unlike SRESET and MCE.
+ *
+ * It is maskable in hardware by clearing MSR[EE], and partially soft-maskable
+ * with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * This is a special case, this is handled similarly to machine checks, with an
+ * initial real mode handler that is not soft-masked, which attempts to fix the
+ * problem. Then a regular handler which is soft-maskable and reports the
+ * problem.
+ *
+ * The emergency stack is used for the early real mode handler.
+ *
+ * XXX: unclear why MCE and HMI schemes could not be made common, e.g.,
+ * either use soft-masking for the MCE, or use irq_work for the HMI.
+ *
+ * KVM:
+ * Unlike MCE, this calls into KVM without calling the real mode handler
+ * first.
*/
INT_DEFINE_BEGIN(hmi_exception_early)
IVEC=0xe60
@@ -1868,6 +2119,11 @@ EXC_COMMON_BEGIN(hmi_exception_common)
GEN_KVM hmi_exception
+/**
+ * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsnd doorbell.
+ * Similar to the 0xa00 doorbell but for host rather than guest.
+ */
INT_DEFINE_BEGIN(h_doorbell)
IVEC=0xe80
IHSRR=1
@@ -1897,6 +2153,11 @@ EXC_COMMON_BEGIN(h_doorbell_common)
GEN_KVM h_doorbell
+/**
+ * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
+ * This is an asynchronous interrupt in response to an "external exception".
+ * Similar to 0x500 but for host only.
+ */
INT_DEFINE_BEGIN(h_virt_irq)
IVEC=0xea0
IHSRR=1
@@ -1928,6 +2189,22 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
+/*
+ * Interrupt 0xf00 - Performance Monitor Interrupt (PMI, PMU).
+ * This is an asynchronous interrupt in response to a PMU exception.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_PMI_DISABLED mask (NOTE: NOT local_irq_disable()).
+ *
+ * Handling:
+ * This calls into the perf subsystem.
+ *
+ * Like the watchdog soft-nmi, it appears an NMI interrupt to Linux, in that it
+ * runs under local_irq_disable. However it may be soft-masked in
+ * powerpc-specific code.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ */
INT_DEFINE_BEGIN(performance_monitor)
IVEC=0xf00
IMASK=IRQS_PMI_DISABLED
@@ -1951,6 +2228,12 @@ EXC_COMMON_BEGIN(performance_monitor_common)
GEN_KVM performance_monitor
+/**
+ * Interrupt 0xf20 - Vector Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a vector (or altivec) instruction with MSR[VEC]=0.
+ * Similar to FP unavailable.
+ */
INT_DEFINE_BEGIN(altivec_unavailable)
IVEC=0xf20
IRECONCILE=0
@@ -1999,6 +2282,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
GEN_KVM altivec_unavailable
+/**
+ * Interrupt 0xf40 - VSX Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a VSX instruction with MSR[VSX]=0.
+ * Similar to FP unavailable.
+ */
INT_DEFINE_BEGIN(vsx_unavailable)
IVEC=0xf40
IRECONCILE=0
@@ -2046,6 +2335,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
GEN_KVM vsx_unavailable
+/**
+ * Interrupt 0xf60 - Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can be
+ * resolved by the OS (e.g., FSCR, MSR).
+ * Similar to FP unavailable.
+ */
INT_DEFINE_BEGIN(facility_unavailable)
IVEC=0xf60
IKVM_REAL=1
@@ -2067,6 +2363,13 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
GEN_KVM facility_unavailable
+/**
+ * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can only
+ * be resolved in HV mode (e.g., HFSCR).
+ * Similar to FP unavailable.
+ */
INT_DEFINE_BEGIN(h_facility_unavailable)
IVEC=0xf80
IHSRR=1
@@ -2154,6 +2457,18 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common)
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
+/**
+ * Interrupt 0x1500 - Soft Patch Interrupt
+ *
+ * Handling:
+ * This is an implementation specific interrupt which can be used for a
+ * range of exceptions.
+ *
+ * This interrupt handler is unique in that it runs the denormal assist
+ * code even for guests (and even in guest context) without going to KVM,
+ * for speed. POWER9 does not raise denorm exceptions, so this special case
+ * could be phased out in future to reduce special cases.
+ */
INT_DEFINE_BEGIN(denorm_exception)
IVEC=0x1500
IHSRR=1
--
2.23.0
From: Nicholas Piggin <[email protected]>
These are used infrequently enough they don't provide much help, so
inline them.
Signed-off-by: Nicholas Piggin <[email protected]>
---
arch/powerpc/kernel/exceptions-64s.S | 82 ++++++++++------------------
1 file changed, 28 insertions(+), 54 deletions(-)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 716a95ba814f..abf26db36427 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -110,46 +110,6 @@ name:
#define EXC_HV 1
#define EXC_STD 0
-/*
- * PPR save/restore macros used in exceptions-64s.S
- * Used for P7 or later processors
- */
-#define SAVE_PPR(area, ra) \
-BEGIN_FTR_SECTION_NESTED(940) \
- ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \
- std ra,_PPR(r1); \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
-
-#define RESTORE_PPR_PACA(area, ra) \
-BEGIN_FTR_SECTION_NESTED(941) \
- ld ra,area+EX_PPR(r13); \
- mtspr SPRN_PPR,ra; \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
-
-/*
- * Get an SPR into a register if the CPU has the given feature
- */
-#define OPT_GET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mfspr ra,spr; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Set an SPR from a register if the CPU has the given feature
- */
-#define OPT_SET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mtspr spr,ra; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Save a register to the PACA if the CPU has the given feature
- */
-#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- std ra,offset(r13); \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
/*
* Branch to label using its 0xC000 address. This results in instruction
* address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
@@ -278,18 +238,18 @@ do_define_int n
cmpwi r10,KVM_GUEST_MODE_SKIP
beq 89f
.else
-BEGIN_FTR_SECTION_NESTED(947)
+BEGIN_FTR_SECTION
ld r10,IAREA+EX_CFAR(r13)
std r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.endif
ld r10,PACA_EXGEN+EX_CTR(r13)
mtctr r10
-BEGIN_FTR_SECTION_NESTED(948)
+BEGIN_FTR_SECTION
ld r10,IAREA+EX_PPR(r13)
std r10,HSTATE_PPR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r11,IAREA+EX_R11(r13)
ld r12,IAREA+EX_R12(r13)
std r12,HSTATE_SCRATCH0(r13)
@@ -386,10 +346,14 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
SET_SCRATCH0(r13) /* save r13 */
GET_PACA(r13)
std r9,IAREA+EX_R9(r13) /* save r9 */
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+ mfspr r9,SPRN_PPR
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
std r10,IAREA+EX_R10(r13) /* save r10 - r12 */
- OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_CFAR
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.if \ool
.if !\virt
b tramp_real_\name
@@ -402,8 +366,12 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
.endif
- OPT_SAVE_REG_TO_PACA(IAREA+EX_PPR, r9, CPU_FTR_HAS_PPR)
- OPT_SAVE_REG_TO_PACA(IAREA+EX_CFAR, r10, CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ std r9,IAREA+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+ std r10,IAREA+EX_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
INTERRUPT_TO_KERNEL
mfctr r10
std r10,IAREA+EX_CTR(r13)
@@ -558,7 +526,10 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
.endif
beq 101f /* if from kernel mode */
ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
- SAVE_PPR(IAREA, r9)
+BEGIN_FTR_SECTION
+ ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */
+ std r9,_PPR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
101:
.else
.if IKUAP
@@ -598,10 +569,10 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
std r10,_DSISR(r1)
.endif
-BEGIN_FTR_SECTION_NESTED(66)
+BEGIN_FTR_SECTION
ld r10,IAREA+EX_CFAR(r13)
std r10,ORIG_GPR3(r1)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
std r2,GPR2(r1) /* save r2 in stackframe */
@@ -1696,10 +1667,10 @@ TRAMP_REAL_BEGIN(system_call_kvm)
* HMT_MEDIUM. That allows the KVM code to save that value into the
* guest state (it is the guest's PPR value).
*/
-BEGIN_FTR_SECTION_NESTED(948)
+BEGIN_FTR_SECTION
mfspr r10,SPRN_PPR
std r10,HSTATE_PPR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
mfctr r10
SET_SCRATCH0(r10)
@@ -2254,7 +2225,10 @@ denorm_done:
mtspr SPRN_HSRR0,r11
mtcrf 0x80,r9
ld r9,PACA_EXGEN+EX_R9(r13)
- RESTORE_PPR_PACA(PACA_EXGEN, r10)
+BEGIN_FTR_SECTION
+ ld r10,PACA_EXGEN+EX_PPR(r13)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
BEGIN_FTR_SECTION
ld r10,PACA_EXGEN+EX_CFAR(r13)
mtspr SPRN_CFAR,r10
--
2.23.0
From: Nicholas Piggin <[email protected]>
Signed-off-by: Nicholas Piggin <[email protected]>
---
arch/powerpc/kernel/exceptions-64s.S | 55 +++++++++++++---------------
1 file changed, 26 insertions(+), 29 deletions(-)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f318869607db..bef0c2eee7dc 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -266,15 +266,6 @@ do_define_int n
.endif
.endm
-.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
- TRAMP_KVM_BEGIN(\name\()_kvm)
- KVM_HANDLER \vec, \hsrr, \area, \skip
-.endm
-
-.macro GEN_KVM name
- KVM_HANDLER IVEC, IHSRR, IAREA, IKVM_SKIP
-.endm
-
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
@@ -293,35 +284,35 @@ do_define_int n
bne \name\()_kvm
.endm
-.macro KVM_HANDLER vec, hsrr, area, skip
- .if \skip
+.macro GEN_KVM name
+ .if IKVM_SKIP
cmpwi r10,KVM_GUEST_MODE_SKIP
beq 89f
.else
BEGIN_FTR_SECTION_NESTED(947)
- ld r10,\area+EX_CFAR(r13)
+ ld r10,IAREA+EX_CFAR(r13)
std r10,HSTATE_CFAR(r13)
END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
.endif
BEGIN_FTR_SECTION_NESTED(948)
- ld r10,\area+EX_PPR(r13)
+ ld r10,IAREA+EX_PPR(r13)
std r10,HSTATE_PPR(r13)
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
- ld r10,\area+EX_R10(r13)
+ ld r10,IAREA+EX_R10(r13)
std r12,HSTATE_SCRATCH0(r13)
sldi r12,r9,32
/* HSRR variants have the 0x2 bit added to their trap number */
- .if \hsrr == EXC_HV_OR_STD
+ .if IHSRR == EXC_HV_OR_STD
BEGIN_FTR_SECTION
- ori r12,r12,(\vec + 0x2)
+ ori r12,r12,(IVEC + 0x2)
FTR_SECTION_ELSE
- ori r12,r12,(\vec)
+ ori r12,r12,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- ori r12,r12,(\vec + 0x2)
+ .elseif IHSRR
+ ori r12,r12,(IVEC+ 0x2)
.else
- ori r12,r12,(\vec)
+ ori r12,r12,(IVEC)
.endif
#ifdef CONFIG_RELOCATABLE
@@ -334,25 +325,25 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r9,HSTATE_SCRATCH1(r13)
__LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
mtctr r9
- ld r9,\area+EX_R9(r13)
+ ld r9,IAREA+EX_R9(r13)
bctr
#else
- ld r9,\area+EX_R9(r13)
+ ld r9,IAREA+EX_R9(r13)
b kvmppc_interrupt
#endif
- .if \skip
+ .if IKVM_SKIP
89: mtocrf 0x80,r9
- ld r9,\area+EX_R9(r13)
- ld r10,\area+EX_R10(r13)
- .if \hsrr == EXC_HV_OR_STD
+ ld r9,IAREA+EX_R9(r13)
+ ld r10,IAREA+EX_R10(r13)
+ .if IHSRR == EXC_HV_OR_STD
BEGIN_FTR_SECTION
b kvmppc_skip_Hinterrupt
FTR_SECTION_ELSE
b kvmppc_skip_interrupt
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
+ .elseif IHSRR
b kvmppc_skip_Hinterrupt
.else
b kvmppc_skip_interrupt
@@ -363,7 +354,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
#else
.macro KVMTEST name, hsrr, n
.endm
-.macro KVM_HANDLER name, vec, hsrr, area, skip
+.macro GEN_KVM name
.endm
#endif
@@ -1640,6 +1631,12 @@ EXC_VIRT_NONE(0x4b00, 0x100)
* without saving, though xer is not a good idea to use, as hardware may
* interpret some bits so it may be costly to change them.
*/
+INT_DEFINE_BEGIN(system_call)
+ IVEC=0xc00
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(system_call)
+
.macro SYSTEM_CALL virt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
@@ -1733,7 +1730,7 @@ TRAMP_KVM_BEGIN(system_call_kvm)
SET_SCRATCH0(r10)
std r9,PACA_EXGEN+EX_R9(r13)
mfcr r9
- KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
+ GEN_KVM system_call
#endif
--
2.23.0
From: Nicholas Piggin <[email protected]>
The code generation macro arguments are difficult to read, and
defaults can't easily be used.
This introduces a block where parameters can be set for interrupt
handler code generation by the subsequent macros, and adds the first
generation macro for interrupt entry.
One interrupt handler is converted to the new macros to demonstrate
the change, the rest will be coverted all at once.
No generated code change.
Signed-off-by: Nicholas Piggin <[email protected]>
---
arch/powerpc/kernel/exceptions-64s.S | 77 ++++++++++++++++++++++++++--
1 file changed, 73 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 46508b148e16..0be6d8c34536 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -193,6 +193,61 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr reg; \
bctr
+/*
+ * Interrupt code generation macros
+ */
+#define IVEC .L_IVEC_\name\()
+#define IHSRR .L_IHSRR_\name\()
+#define IAREA .L_IAREA_\name\()
+#define IDAR .L_IDAR_\name\()
+#define IDSISR .L_IDSISR_\name\()
+#define ISET_RI .L_ISET_RI_\name\()
+#define IEARLY .L_IEARLY_\name\()
+#define IMASK .L_IMASK_\name\()
+#define IKVM_REAL .L_IKVM_REAL_\name\()
+#define IKVM_VIRT .L_IKVM_VIRT_\name\()
+
+#define INT_DEFINE_BEGIN(n) \
+.macro int_define_ ## n name
+
+#define INT_DEFINE_END(n) \
+.endm ; \
+int_define_ ## n n ; \
+do_define_int n
+
+.macro do_define_int name
+ .ifndef IVEC
+ .error "IVEC not defined"
+ .endif
+ .ifndef IHSRR
+ IHSRR=EXC_STD
+ .endif
+ .ifndef IAREA
+ IAREA=PACA_EXGEN
+ .endif
+ .ifndef IDAR
+ IDAR=0
+ .endif
+ .ifndef IDSISR
+ IDSISR=0
+ .endif
+ .ifndef ISET_RI
+ ISET_RI=1
+ .endif
+ .ifndef IEARLY
+ IEARLY=0
+ .endif
+ .ifndef IMASK
+ IMASK=0
+ .endif
+ .ifndef IKVM_REAL
+ IKVM_REAL=0
+ .endif
+ .ifndef IKVM_VIRT
+ IKVM_VIRT=0
+ .endif
+.endm
+
.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
TRAMP_KVM_BEGIN(\name\()_kvm)
KVM_HANDLER \vec, \hsrr, \area, \skip
@@ -474,7 +529,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
*/
GET_SCRATCH0(r10)
std r10,\area\()+EX_R13(r13)
- .if \dar
+ .if \dar == 1
.if \hsrr
mfspr r10,SPRN_HDAR
.else
@@ -482,7 +537,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
std r10,\area\()+EX_DAR(r13)
.endif
- .if \dsisr
+ .if \dsisr == 1
.if \hsrr
mfspr r10,SPRN_HDSISR
.else
@@ -506,6 +561,14 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
.endm
+.macro GEN_INT_ENTRY name, virt, ool=0
+ .if ! \virt
+ INT_HANDLER \name, IVEC, \ool, IEARLY, \virt, IHSRR, IAREA, ISET_RI, IDAR, IDSISR, IMASK, IKVM_REAL
+ .else
+ INT_HANDLER \name, IVEC, \ool, IEARLY, \virt, IHSRR, IAREA, ISET_RI, IDAR, IDSISR, IMASK, IKVM_VIRT
+ .endif
+.endm
+
/*
* On entry r13 points to the paca, r9-r13 are saved in the paca,
* r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
@@ -1143,12 +1206,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
bl unrecoverable_exception
b .
+INT_DEFINE_BEGIN(data_access)
+ IVEC=0x300
+ IDAR=1
+ IDSISR=1
+ IKVM_REAL=1
+INT_DEFINE_END(data_access)
EXC_REAL_BEGIN(data_access, 0x300, 0x80)
- INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY data_access, virt=0, ool=1
EXC_REAL_END(data_access, 0x300, 0x80)
EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
- INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
+ GEN_INT_ENTRY data_access, virt=1
EXC_VIRT_END(data_access, 0x4300, 0x80)
INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
--
2.23.0