From: Guo Ren <[email protected]>
Add independent irq stacks for percpu to prevent kernel stack overflows.
It is also compatible with VMAP_STACK by arch_alloc_vmap_stack.
Tested-by: Jisheng Zhang <[email protected]>
Signed-off-by: Guo Ren <[email protected]>
Signed-off-by: Guo Ren <[email protected]>
Cc: Clément Léger <[email protected]>
---
arch/riscv/Kconfig | 7 ++++++
arch/riscv/include/asm/irq_stack.h | 30 ++++++++++++++++++++++++
arch/riscv/include/asm/thread_info.h | 2 ++
arch/riscv/kernel/irq.c | 33 ++++++++++++++++++++++++++
arch/riscv/kernel/traps.c | 35 ++++++++++++++++++++++++++--
5 files changed, 105 insertions(+), 2 deletions(-)
create mode 100644 arch/riscv/include/asm/irq_stack.h
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a3d54cd14fca..a8368fe7be14 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -590,6 +590,13 @@ config FPU
If you don't know what to do here, say Y.
+config IRQ_STACKS
+ bool "Independent irq stacks" if EXPERT
+ default y
+ select HAVE_IRQ_EXIT_ON_IRQ_STACK
+ help
+ Add independent irq stacks for percpu to prevent kernel stack overflows.
+
endmenu # "Platform type"
menu "Kernel features"
diff --git a/arch/riscv/include/asm/irq_stack.h b/arch/riscv/include/asm/irq_stack.h
new file mode 100644
index 000000000000..e4042d297580
--- /dev/null
+++ b/arch/riscv/include/asm/irq_stack.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_IRQ_STACK_H
+#define _ASM_RISCV_IRQ_STACK_H
+
+#include <linux/bug.h>
+#include <linux/gfp.h>
+#include <linux/kconfig.h>
+#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
+#include <asm/thread_info.h>
+
+DECLARE_PER_CPU(ulong *, irq_stack_ptr);
+
+#ifdef CONFIG_VMAP_STACK
+/*
+ * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd
+ * stacks need to have the same alignment.
+ */
+static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
+{
+ void *p;
+
+ p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+ __builtin_return_address(0));
+ return kasan_reset_tag(p);
+}
+#endif /* CONFIG_VMAP_STACK */
+
+#endif /* _ASM_RISCV_IRQ_STACK_H */
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 97e6f65ec176..2f32875276b0 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -40,6 +40,8 @@
#define OVERFLOW_STACK_SIZE SZ_4K
#define SHADOW_OVERFLOW_STACK_SIZE (1024)
+#define IRQ_STACK_SIZE THREAD_SIZE
+
#ifndef __ASSEMBLY__
extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index eb9a68a539e6..a1dcf8e43b3c 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -28,6 +28,38 @@ struct fwnode_handle *riscv_get_intc_hwnode(void)
}
EXPORT_SYMBOL_GPL(riscv_get_intc_hwnode);
+#ifdef CONFIG_IRQ_STACKS
+#include <asm/irq_stack.h>
+
+DEFINE_PER_CPU(ulong *, irq_stack_ptr);
+
+#ifdef CONFIG_VMAP_STACK
+static void init_irq_stacks(void)
+{
+ int cpu;
+ ulong *p;
+
+ for_each_possible_cpu(cpu) {
+ p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
+ per_cpu(irq_stack_ptr, cpu) = p;
+ }
+}
+#else
+/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
+DEFINE_PER_CPU_ALIGNED(ulong [IRQ_STACK_SIZE/sizeof(ulong)], irq_stack);
+
+static void init_irq_stacks(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
+}
+#endif /* CONFIG_VMAP_STACK */
+#else
+static void init_irq_stacks(void) {}
+#endif /* CONFIG_IRQ_STACKS */
+
int arch_show_interrupts(struct seq_file *p, int prec)
{
show_ipi_stats(p, prec);
@@ -36,6 +68,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
void __init init_IRQ(void)
{
+ init_irq_stacks();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 05ffdcd1424e..5158961ea977 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -27,6 +27,7 @@
#include <asm/syscall.h>
#include <asm/thread_info.h>
#include <asm/vector.h>
+#include <asm/irq_stack.h>
int show_unhandled_signals = 1;
@@ -327,16 +328,46 @@ asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs)
}
#endif
-asmlinkage __visible noinstr void do_irq(struct pt_regs *regs)
+static void noinstr handle_riscv_irq(struct pt_regs *regs)
{
struct pt_regs *old_regs;
- irqentry_state_t state = irqentry_enter(regs);
irq_enter_rcu();
old_regs = set_irq_regs(regs);
handle_arch_irq(regs);
set_irq_regs(old_regs);
irq_exit_rcu();
+}
+
+asmlinkage void noinstr do_irq(struct pt_regs *regs)
+{
+ irqentry_state_t state = irqentry_enter(regs);
+#ifdef CONFIG_IRQ_STACKS
+ if (on_thread_stack()) {
+ ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id())
+ + IRQ_STACK_SIZE/sizeof(ulong);
+ __asm__ __volatile(
+ "addi sp, sp, -"RISCV_SZPTR "\n"
+ REG_S" ra, (sp) \n"
+ "addi sp, sp, -"RISCV_SZPTR "\n"
+ REG_S" s0, (sp) \n"
+ "addi s0, sp, 2*"RISCV_SZPTR "\n"
+ "move sp, %[sp] \n"
+ "move a0, %[regs] \n"
+ "call handle_riscv_irq \n"
+ "addi sp, s0, -2*"RISCV_SZPTR"\n"
+ REG_L" s0, (sp) \n"
+ "addi sp, sp, "RISCV_SZPTR "\n"
+ REG_L" ra, (sp) \n"
+ "addi sp, sp, "RISCV_SZPTR "\n"
+ :
+ : [sp] "r" (sp), [regs] "r" (regs)
+ : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6",
+ "memory");
+ } else
+#endif
+ handle_riscv_irq(regs);
irqentry_exit(regs, state);
}
--
2.36.1
On Fri, Aug 4, 2023 at 11:28 PM Jason A. Donenfeld <[email protected]> wrote:
>
> Hi Guo,
>
> On Tue, Jun 13, 2023 at 09:30:16PM -0400, [email protected] wrote:
> > From: Guo Ren <[email protected]>
> >
> > Add independent irq stacks for percpu to prevent kernel stack overflows.
> > It is also compatible with VMAP_STACK by arch_alloc_vmap_stack.
> >
> > Tested-by: Jisheng Zhang <[email protected]>
> > Signed-off-by: Guo Ren <[email protected]>
> > Signed-off-by: Guo Ren <[email protected]>
> > Cc: Clément Léger <[email protected]>
>
> This patch broke the WireGuard test suite. I've attached the .config
> file that it uses. I'm able to fix it by setting CONFIG_EXPERT=y and
> CONFIG_IRQ_STACKS=n to essentially reverse the effect of this patch. But
> I'd rather not do that.
>
> Any idea what's up?
>
> Thanks,
> Jason
And, err, I guess I failed to describe what's broken exactly. Here's
what happens:
timeout --foreground 20m qemu-system-riscv64 \
-nodefaults \
-nographic \
-smp 4 \
-cpu rv64 -machine virt \
-m 256M \
-serial stdio \
-chardev
file,path=/home/zx2c4/Projects/wireguard-linux/tools/testing/selftests/wireguard/qemu/build/riscv64/result,id=result
\
-device virtio-serial-device -device virtserialport,chardev=result \
-no-reboot \
-monitor none \
-kernel /home/zx2c4/Projects/wireguard-linux/tools/testing/selftests/wireguard/qemu/build/riscv64/kernel/arch/riscv/boot/Image
OpenSBI v1.2
____ _____ ____ _____
/ __ \ / ____| _ \_ _|
| | | |_ __ ___ _ __ | (___ | |_) || |
| | | | '_ \ / _ \ '_ \ \___ \| _ < | |
| |__| | |_) | __/ | | |____) | |_) || |_
\____/| .__/ \___|_| |_|_____/|____/_____|
| |
|_|
Platform Name : riscv-virtio,qemu
Platform Features : medeleg
Platform HART Count : 4
Platform IPI Device : aclint-mswi
Platform Timer Device : aclint-mtimer @ 10000000Hz
Platform Console Device : uart8250
Platform HSM Device : ---
Platform PMU Device : ---
Platform Reboot Device : sifive_test
Platform Shutdown Device : sifive_test
Firmware Base : 0x80000000
Firmware Size : 236 KB
Runtime SBI Version : 1.0
Domain0 Name : root
Domain0 Boot HART : 0
Domain0 HARTs : 0*,1*,2*,3*
Domain0 Region00 : 0x0000000002000000-0x000000000200ffff (I)
Domain0 Region01 : 0x0000000080000000-0x000000008003ffff ()
Domain0 Region02 : 0x0000000000000000-0xffffffffffffffff (R,W,X)
Domain0 Next Address : 0x0000000080200000
Domain0 Next Arg1 : 0x000000008fe00000
Domain0 Next Mode : S-mode
Domain0 SysReset : yes
Boot HART ID : 0
Boot HART Domain : root
Boot HART Priv Version : v1.12
Boot HART Base ISA : rv64imafdch
Boot HART ISA Extensions : time,sstc
Boot HART PMP Count : 16
Boot HART PMP Granularity : 4
Boot HART PMP Address Bits: 54
Boot HART MHPM Count : 16
Boot HART MIDELEG : 0x0000000000001666
Boot HART MEDELEG : 0x0000000000f0b509
[terminates/hangs here]
On Fri, Aug 04, 2023 at 11:28:17PM +0200, Jason A. Donenfeld wrote:
> On Fri, Aug 4, 2023 at 11:28 PM Jason A. Donenfeld <[email protected]> wrote:
> >
> > Hi Guo,
> >
> > On Tue, Jun 13, 2023 at 09:30:16PM -0400, [email protected] wrote:
> > > From: Guo Ren <[email protected]>
> > >
> > > Add independent irq stacks for percpu to prevent kernel stack overflows.
> > > It is also compatible with VMAP_STACK by arch_alloc_vmap_stack.
> > >
> > > Tested-by: Jisheng Zhang <[email protected]>
> > > Signed-off-by: Guo Ren <[email protected]>
> > > Signed-off-by: Guo Ren <[email protected]>
> > > Cc: Clément Léger <[email protected]>
> >
> > This patch broke the WireGuard test suite. I've attached the .config
> > file that it uses. I'm able to fix it by setting CONFIG_EXPERT=y and
> > CONFIG_IRQ_STACKS=n to essentially reverse the effect of this patch. But
> > I'd rather not do that.
> >
> > Any idea what's up?
Given your config, I suspect you're hitting the issue that is resolved
by Guo Ren's series:
https://lore.kernel.org/linux-riscv/[email protected]/
Hopefully that's it,
Conor.
> >
> > Thanks,
> > Jason
>
> And, err, I guess I failed to describe what's broken exactly. Here's
> what happens:
>
> timeout --foreground 20m qemu-system-riscv64 \
> -nodefaults \
> -nographic \
> -smp 4 \
> -cpu rv64 -machine virt \
> -m 256M \
> -serial stdio \
> -chardev
> file,path=/home/zx2c4/Projects/wireguard-linux/tools/testing/selftests/wireguard/qemu/build/riscv64/result,id=result
> \
> -device virtio-serial-device -device virtserialport,chardev=result \
> -no-reboot \
> -monitor none \
> -kernel /home/zx2c4/Projects/wireguard-linux/tools/testing/selftests/wireguard/qemu/build/riscv64/kernel/arch/riscv/boot/Image
>
> OpenSBI v1.2
> ____ _____ ____ _____
> / __ \ / ____| _ \_ _|
> | | | |_ __ ___ _ __ | (___ | |_) || |
> | | | | '_ \ / _ \ '_ \ \___ \| _ < | |
> | |__| | |_) | __/ | | |____) | |_) || |_
> \____/| .__/ \___|_| |_|_____/|____/_____|
> | |
> |_|
>
> Platform Name : riscv-virtio,qemu
> Platform Features : medeleg
> Platform HART Count : 4
> Platform IPI Device : aclint-mswi
> Platform Timer Device : aclint-mtimer @ 10000000Hz
> Platform Console Device : uart8250
> Platform HSM Device : ---
> Platform PMU Device : ---
> Platform Reboot Device : sifive_test
> Platform Shutdown Device : sifive_test
> Firmware Base : 0x80000000
> Firmware Size : 236 KB
> Runtime SBI Version : 1.0
>
> Domain0 Name : root
> Domain0 Boot HART : 0
> Domain0 HARTs : 0*,1*,2*,3*
> Domain0 Region00 : 0x0000000002000000-0x000000000200ffff (I)
> Domain0 Region01 : 0x0000000080000000-0x000000008003ffff ()
> Domain0 Region02 : 0x0000000000000000-0xffffffffffffffff (R,W,X)
> Domain0 Next Address : 0x0000000080200000
> Domain0 Next Arg1 : 0x000000008fe00000
> Domain0 Next Mode : S-mode
> Domain0 SysReset : yes
>
> Boot HART ID : 0
> Boot HART Domain : root
> Boot HART Priv Version : v1.12
> Boot HART Base ISA : rv64imafdch
> Boot HART ISA Extensions : time,sstc
> Boot HART PMP Count : 16
> Boot HART PMP Granularity : 4
> Boot HART PMP Address Bits: 54
> Boot HART MHPM Count : 16
> Boot HART MIDELEG : 0x0000000000001666
> Boot HART MEDELEG : 0x0000000000f0b509
> [terminates/hangs here]
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Hi Guo,
On Tue, Jun 13, 2023 at 09:30:16PM -0400, [email protected] wrote:
> From: Guo Ren <[email protected]>
>
> Add independent irq stacks for percpu to prevent kernel stack overflows.
> It is also compatible with VMAP_STACK by arch_alloc_vmap_stack.
>
> Tested-by: Jisheng Zhang <[email protected]>
> Signed-off-by: Guo Ren <[email protected]>
> Signed-off-by: Guo Ren <[email protected]>
> Cc: Clément Léger <[email protected]>
This patch broke the WireGuard test suite. I've attached the .config
file that it uses. I'm able to fix it by setting CONFIG_EXPERT=y and
CONFIG_IRQ_STACKS=n to essentially reverse the effect of this patch. But
I'd rather not do that.
Any idea what's up?
Thanks,
Jason
Hi Conor,
On Fri, Aug 4, 2023 at 11:41 PM Conor Dooley <[email protected]> wrote:
>
> On Fri, Aug 04, 2023 at 11:28:17PM +0200, Jason A. Donenfeld wrote:
> > On Fri, Aug 4, 2023 at 11:28 PM Jason A. Donenfeld <[email protected]> wrote:
> > >
> > > Hi Guo,
> > >
> > > On Tue, Jun 13, 2023 at 09:30:16PM -0400, [email protected] wrote:
> > > > From: Guo Ren <[email protected]>
> > > >
> > > > Add independent irq stacks for percpu to prevent kernel stack overflows.
> > > > It is also compatible with VMAP_STACK by arch_alloc_vmap_stack.
> > > >
> > > > Tested-by: Jisheng Zhang <[email protected]>
> > > > Signed-off-by: Guo Ren <[email protected]>
> > > > Signed-off-by: Guo Ren <[email protected]>
> > > > Cc: Clément Léger <[email protected]>
> > >
> > > This patch broke the WireGuard test suite. I've attached the .config
> > > file that it uses. I'm able to fix it by setting CONFIG_EXPERT=y and
> > > CONFIG_IRQ_STACKS=n to essentially reverse the effect of this patch. But
> > > I'd rather not do that.
> > >
> > > Any idea what's up?
>
> Given your config, I suspect you're hitting the issue that is resolved
> by Guo Ren's series:
> https://lore.kernel.org/linux-riscv/[email protected]/
>
> Hopefully that's it,
> Conor.
Thanks! That did the trick. I suppose this will be in the next 6.5 rc.
Jason
On Fri, Aug 4, 2023 at 5:41 PM Conor Dooley <[email protected]> wrote:
>
> On Fri, Aug 04, 2023 at 11:28:17PM +0200, Jason A. Donenfeld wrote:
> > On Fri, Aug 4, 2023 at 11:28 PM Jason A. Donenfeld <[email protected]> wrote:
> > >
> > > Hi Guo,
> > >
> > > On Tue, Jun 13, 2023 at 09:30:16PM -0400, [email protected] wrote:
> > > > From: Guo Ren <[email protected]>
> > > >
> > > > Add independent irq stacks for percpu to prevent kernel stack overflows.
> > > > It is also compatible with VMAP_STACK by arch_alloc_vmap_stack.
> > > >
> > > > Tested-by: Jisheng Zhang <[email protected]>
> > > > Signed-off-by: Guo Ren <[email protected]>
> > > > Signed-off-by: Guo Ren <[email protected]>
> > > > Cc: Clément Léger <[email protected]>
> > >
> > > This patch broke the WireGuard test suite. I've attached the .config
> > > file that it uses. I'm able to fix it by setting CONFIG_EXPERT=y and
> > > CONFIG_IRQ_STACKS=n to essentially reverse the effect of this patch. But
> > > I'd rather not do that.
> > >
> > > Any idea what's up?
>
> Given your config, I suspect you're hitting the issue that is resolved
> by Guo Ren's series:
> https://lore.kernel.org/linux-riscv/[email protected]/
Thx Conor's reply.
>
> Hopefully that's it,
> Conor.
>
> > >
> > > Thanks,
> > > Jason
> >
> > And, err, I guess I failed to describe what's broken exactly. Here's
> > what happens:
> >
> > timeout --foreground 20m qemu-system-riscv64 \
> > -nodefaults \
> > -nographic \
> > -smp 4 \
> > -cpu rv64 -machine virt \
> > -m 256M \
> > -serial stdio \
> > -chardev
> > file,path=/home/zx2c4/Projects/wireguard-linux/tools/testing/selftests/wireguard/qemu/build/riscv64/result,id=result
> > \
> > -device virtio-serial-device -device virtserialport,chardev=result \
> > -no-reboot \
> > -monitor none \
> > -kernel /home/zx2c4/Projects/wireguard-linux/tools/testing/selftests/wireguard/qemu/build/riscv64/kernel/arch/riscv/boot/Image
> >
> > OpenSBI v1.2
> > ____ _____ ____ _____
> > / __ \ / ____| _ \_ _|
> > | | | |_ __ ___ _ __ | (___ | |_) || |
> > | | | | '_ \ / _ \ '_ \ \___ \| _ < | |
> > | |__| | |_) | __/ | | |____) | |_) || |_
> > \____/| .__/ \___|_| |_|_____/|____/_____|
> > | |
> > |_|
> >
> > Platform Name : riscv-virtio,qemu
> > Platform Features : medeleg
> > Platform HART Count : 4
> > Platform IPI Device : aclint-mswi
> > Platform Timer Device : aclint-mtimer @ 10000000Hz
> > Platform Console Device : uart8250
> > Platform HSM Device : ---
> > Platform PMU Device : ---
> > Platform Reboot Device : sifive_test
> > Platform Shutdown Device : sifive_test
> > Firmware Base : 0x80000000
> > Firmware Size : 236 KB
> > Runtime SBI Version : 1.0
> >
> > Domain0 Name : root
> > Domain0 Boot HART : 0
> > Domain0 HARTs : 0*,1*,2*,3*
> > Domain0 Region00 : 0x0000000002000000-0x000000000200ffff (I)
> > Domain0 Region01 : 0x0000000080000000-0x000000008003ffff ()
> > Domain0 Region02 : 0x0000000000000000-0xffffffffffffffff (R,W,X)
> > Domain0 Next Address : 0x0000000080200000
> > Domain0 Next Arg1 : 0x000000008fe00000
> > Domain0 Next Mode : S-mode
> > Domain0 SysReset : yes
> >
> > Boot HART ID : 0
> > Boot HART Domain : root
> > Boot HART Priv Version : v1.12
> > Boot HART Base ISA : rv64imafdch
> > Boot HART ISA Extensions : time,sstc
> > Boot HART PMP Count : 16
> > Boot HART PMP Granularity : 4
> > Boot HART PMP Address Bits: 54
> > Boot HART MHPM Count : 16
> > Boot HART MIDELEG : 0x0000000000001666
> > Boot HART MEDELEG : 0x0000000000f0b509
> > [terminates/hangs here]
> >
> > _______________________________________________
> > linux-riscv mailing list
> > [email protected]
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
--
Best Regards
Guo Ren