This series adds a separate stack for each CPU wihin the system to use
when handling IRQs. Previously IRQs were handled on the kernel stack of
the current task. If that task was deep down a call stack at the point
of the interrupt, and handling the interrupt required a deep IRQ stack,
then there was a likelihood of stack overflow. Since the kernel stack
is in normal unmapped memory, overflowing it can lead to silent
corruption of other kernel data, with weird and wonderful results.
Before this patch series, ftracing the maximum stack size of a v4.9
kernel running on a Ci40 board gave:
4996
And with this series:
4084
Handling interrupts on a separate stack reduces the maximum kernel stack
usage in this configuration by ~900 bytes.
Since do_IRQ is now invoked on a separate stack, we select
HAVE_IRQ_EXIT_ON_IRQ_STACK so that softirqs will also be executed on the
irq stack rather than attempting to switch with do_softirq_own_stack().
This series has been tested on MIPS Boston, Malta and SEAD3 platforms,
Pistachio on the Creator Ci40 board and Cavium Octeon III.
Changes in v3:
Drop superfluous nop that would have been in delay slot with .set
noreorder but is no longer required now that the code is .set reorder.
Changes in v2:
Drop .set reorder/noreorder when updating $28
Matt Redfearn (5):
MIPS: Introduce irq_stack
MIPS: Stack unwinding while on IRQ stack
MIPS: Only change $28 to thread_info if coming from user mode
MIPS: Switch to the irq_stack in interrupts
MIPS: Select HAVE_IRQ_EXIT_ON_IRQ_STACK
arch/mips/Kconfig | 1 +
arch/mips/include/asm/irq.h | 12 ++++++
arch/mips/include/asm/stackframe.h | 7 ++++
arch/mips/kernel/asm-offsets.c | 1 +
arch/mips/kernel/genex.S | 81 +++++++++++++++++++++++++++++++++++---
arch/mips/kernel/irq.c | 11 ++++++
arch/mips/kernel/process.c | 15 ++++++-
7 files changed, 122 insertions(+), 6 deletions(-)
--
2.7.4
Allocate a per-cpu irq stack for use within interrupt handlers.
Also add a utility function on_irq_stack to determine if a given stack
pointer is within the irq stack for that cpu.
Signed-off-by: Matt Redfearn <[email protected]>
---
Changes in v3: None
Changes in v2: None
arch/mips/include/asm/irq.h | 12 ++++++++++++
arch/mips/kernel/asm-offsets.c | 1 +
arch/mips/kernel/irq.c | 11 +++++++++++
3 files changed, 24 insertions(+)
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 6bf10e796553..956db6e201d1 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -17,6 +17,18 @@
#include <irq.h>
+#define IRQ_STACK_SIZE THREAD_SIZE
+
+extern void *irq_stack[NR_CPUS];
+
+static inline bool on_irq_stack(int cpu, unsigned long sp)
+{
+ unsigned long low = (unsigned long)irq_stack[cpu];
+ unsigned long high = low + IRQ_STACK_SIZE;
+
+ return (low <= sp && sp <= high);
+}
+
#ifdef CONFIG_I8259
static inline int irq_canonicalize(int irq)
{
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index fae2f9447792..4be2763f835d 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -102,6 +102,7 @@ void output_thread_info_defines(void)
OFFSET(TI_REGS, thread_info, regs);
DEFINE(_THREAD_SIZE, THREAD_SIZE);
DEFINE(_THREAD_MASK, THREAD_MASK);
+ DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
BLANK();
}
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index f25f7eab7307..2b0a371b42af 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -25,6 +25,8 @@
#include <linux/atomic.h>
#include <asm/uaccess.h>
+void *irq_stack[NR_CPUS];
+
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
* each architecture has to answer this themselves.
@@ -58,6 +60,15 @@ void __init init_IRQ(void)
clear_c0_status(ST0_IM);
arch_init_irq();
+
+ for_each_possible_cpu(i) {
+ int irq_pages = IRQ_STACK_SIZE / PAGE_SIZE;
+ void *s = (void *)__get_free_pages(GFP_KERNEL, irq_pages);
+
+ irq_stack[i] = s;
+ pr_debug("CPU%d IRQ stack at 0x%p - 0x%p\n", i,
+ irq_stack[i], irq_stack[i] + IRQ_STACK_SIZE);
+ }
}
#ifdef CONFIG_DEBUG_STACKOVERFLOW
--
2.7.4
Within unwind stack, check if the stack pointer being unwound is within
the CPU's irq_stack and if so use that page rather than the task's stack
page.
Signed-off-by: Matt Redfearn <[email protected]>
---
Changes in v3: None
Changes in v2: None
arch/mips/kernel/process.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 9514e5f2209f..4fdbf7078071 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -33,6 +33,7 @@
#include <asm/dsemul.h>
#include <asm/dsp.h>
#include <asm/fpu.h>
+#include <asm/irq.h>
#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/mipsregs.h>
@@ -511,7 +512,19 @@ EXPORT_SYMBOL(unwind_stack_by_address);
unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
unsigned long pc, unsigned long *ra)
{
- unsigned long stack_page = (unsigned long)task_stack_page(task);
+ unsigned long stack_page = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (on_irq_stack(cpu, *sp)) {
+ stack_page = (unsigned long)irq_stack[cpu];
+ break;
+ }
+ }
+
+ if (!stack_page)
+ stack_page = (unsigned long)task_stack_page(task);
+
return unwind_stack_by_address(stack_page, sp, pc, ra);
}
#endif
--
2.7.4
The SAVE_SOME macro is used to save the execution context on all
exceptions.
If an exception occurs while executing user code, the stack is switched
to the kernel's stack for the current task, and register $28 is switched
to point to the current_thread_info, which is at the bottom of the stack
region.
If the exception occurs while executing kernel code, the stack is left,
and this change ensures that register $28 is not updated. This is the
correct behaviour when the kernel can be executing on the separate irq
stack, because the thread_info will not be at the base of it.
With this change, register $28 is only switched to it's kernel
conventional usage of the currrent thread info pointer at the point at
which execution enters kernel space. Doing it on every exception was
redundant, but OK without an IRQ stack, but will be erroneous once that
is introduced.
Signed-off-by: Matt Redfearn <[email protected]>
---
Changes in v3:
Drop superfluous nop that would have been in delay slot with .set
noreorder but is no longer required now that the code is .set reorder.
Changes in v2:
Drop .set reorder/noreorder when updating $28
arch/mips/include/asm/stackframe.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index eebf39549606..2f182bdf024f 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -216,12 +216,19 @@
LONG_S $25, PT_R25(sp)
LONG_S $28, PT_R28(sp)
LONG_S $31, PT_R31(sp)
+
+ /* Set thread_info if we're coming from user mode */
+ mfc0 k0, CP0_STATUS
+ sll k0, 3 /* extract cu0 bit */
+ bltz k0, 9f
+
ori $28, sp, _THREAD_MASK
xori $28, _THREAD_MASK
#ifdef CONFIG_CPU_CAVIUM_OCTEON
.set mips64
pref 0, 0($28) /* Prefetch the current pointer */
#endif
+9:
.set pop
.endm
--
2.7.4
When enterring interrupt context via handle_int or except_vec_vi, switch
to the irq_stack of the current CPU if it is not already in use.
The current stack pointer is masked with the thread size and compared to
the base or the irq stack. If it does not match then the stack pointer
is set to the top of that stack, otherwise this is a nested irq being
handled on the irq stack so the stack pointer should be left as it was.
The in-use stack pointer is placed in the callee saved register s1. It
will be saved to the stack when plat_irq_dispatch is invoked and can be
restored once control returns here.
Signed-off-by: Matt Redfearn <[email protected]>
---
Changes in v3: None
Changes in v2: None
arch/mips/kernel/genex.S | 81 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 76 insertions(+), 5 deletions(-)
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index dc0b29612891..0a7ba4b2f687 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -187,9 +187,44 @@ NESTED(handle_int, PT_SIZE, sp)
LONG_L s0, TI_REGS($28)
LONG_S sp, TI_REGS($28)
- PTR_LA ra, ret_from_irq
- PTR_LA v0, plat_irq_dispatch
- jr v0
+
+ /*
+ * SAVE_ALL ensures we are using a valid kernel stack for the thread.
+ * Check if we are already using the IRQ stack.
+ */
+ move s1, sp # Preserve the sp
+
+ /* Get IRQ stack for this CPU */
+ ASM_CPUID_MFC0 k0, ASM_SMP_CPUID_REG
+#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
+ lui k1, %hi(irq_stack)
+#else
+ lui k1, %highest(irq_stack)
+ daddiu k1, %higher(irq_stack)
+ dsll k1, 16
+ daddiu k1, %hi(irq_stack)
+ dsll k1, 16
+#endif
+ LONG_SRL k0, SMP_CPUID_PTRSHIFT
+ LONG_ADDU k1, k0
+ LONG_L t0, %lo(irq_stack)(k1)
+
+ # Check if already on IRQ stack
+ PTR_LI t1, ~(_THREAD_SIZE-1)
+ and t1, t1, sp
+ beq t0, t1, 2f
+
+ /* Switch to IRQ stack */
+ li t1, _IRQ_STACK_SIZE
+ PTR_ADD sp, t0, t1
+
+2:
+ jal plat_irq_dispatch
+
+ /* Restore sp */
+ move sp, s1
+
+ j ret_from_irq
#ifdef CONFIG_CPU_MICROMIPS
nop
#endif
@@ -262,8 +297,44 @@ NESTED(except_vec_vi_handler, 0, sp)
LONG_L s0, TI_REGS($28)
LONG_S sp, TI_REGS($28)
- PTR_LA ra, ret_from_irq
- jr v0
+
+ /*
+ * SAVE_ALL ensures we are using a valid kernel stack for the thread.
+ * Check if we are already using the IRQ stack.
+ */
+ move s1, sp # Preserve the sp
+
+ /* Get IRQ stack for this CPU */
+ ASM_CPUID_MFC0 k0, ASM_SMP_CPUID_REG
+#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
+ lui k1, %hi(irq_stack)
+#else
+ lui k1, %highest(irq_stack)
+ daddiu k1, %higher(irq_stack)
+ dsll k1, 16
+ daddiu k1, %hi(irq_stack)
+ dsll k1, 16
+#endif
+ LONG_SRL k0, SMP_CPUID_PTRSHIFT
+ LONG_ADDU k1, k0
+ LONG_L t0, %lo(irq_stack)(k1)
+
+ # Check if already on IRQ stack
+ PTR_LI t1, ~(_THREAD_SIZE-1)
+ and t1, t1, sp
+ beq t0, t1, 2f
+
+ /* Switch to IRQ stack */
+ li t1, _IRQ_STACK_SIZE
+ PTR_ADD sp, t0, t1
+
+2:
+ jal plat_irq_dispatch
+
+ /* Restore sp */
+ move sp, s1
+
+ j ret_from_irq
END(except_vec_vi_handler)
/*
--
2.7.4
Since do_IRQ is now invoked on a separate IRQ stack, we select
HAVE_IRQ_EXIT_ON_IRQ_STACK so that softirq's may be invoked directly
from irq_exit(), rather than requiring do_softirq_own_stack.
Signed-off-by: Matt Redfearn <[email protected]>
---
Changes in v3: None
Changes in v2: None
arch/mips/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b3c5bde43d34..80832aa8e4fb 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -9,6 +9,7 @@ config MIPS
select HAVE_CONTEXT_TRACKING
select HAVE_GENERIC_DMA_COHERENT
select HAVE_IDE
+ select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_OPROFILE
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
--
2.7.4
Hi Matt,
Thanks for the v3. For the whole series:
Acked-by: Jason A. Donenfeld <[email protected]>
Jason
On Mon, 19 Dec 2016, Matt Redfearn wrote:
> Changes in v3:
> Drop superfluous nop that would have been in delay slot with .set
> noreorder but is no longer required now that the code is .set reorder.
LGTM
Reviewed-by: Maciej W. Rozycki <[email protected]>
Maciej