LinuxLists.cc - [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack

2020-05-05 14:21:26

Subject: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack

Device interrupt handlers and system vector handlers are executed on the
interrupt stack. The stack switch happens in the low level assembly entry
code. This conflicts with the efforts to consolidate the exit code in C to
ensure correctness vs. RCU and tracing.

As there is no way to move #DB away from IST due to the MOV SS issue, the
requirements vs. #DB and NMI for switching to the interrupt stack do not
exist anymore. The only requirement is that interrupts are disabled.

That allows to move the stack switching to C code which simplifies the
entry/exit handling further because it allows to switch stacks after
handling the entry and on exit before handling RCU, return to usermode and
kernel preemption in the same way as for regular exceptions.

That also allows to move the xen hypercall extra magic code and the softirq
stack switching into C.

The mechanism is straight forward:

1) Store the current stack pointer on top of the interrupt stack. That's
required for the unwinder.

2) Switch the stack pointer

3) Call the function

4) Restore the stackpointer

The full code sequence to make the unwinder happy is:

pushq %rbp
movq %rsp, %rbp
movq $(top_of_hardirq_stack - 8), %reg
movq %rsp, (%reg)
movq %reg , %rsp
call function
popq %rsp
leaveq

While the following sequence would spare the 'popq %rsp':

pushq %rbp
movq $(top_of_hardirq_stack - 8), %rbp
movq %rsp, (%rrbp)
xchgq %rbp, %rsp
call function
movq %rbp, %rsp
leaveq

but that requires further changes to objtool so that the unwinder works
correctly. Can be done on top and is not critical for now.

Provide helper functions to check whether the interrupt stack is already
active and whether stack switching is required.

64 bit only for now. 32 bit has a variant of that already. Once this is
cleaned up the two implementations might be consolidated as a cleanup on
top.

Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/irq_stack.h | 61 +++++++++++++++++++++++++++++++++++++++
1 file changed, 61 insertions(+)

--- /dev/null
+++ b/arch/x86/include/asm/irq_stack.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IRQ_STACK_H
+#define _ASM_X86_IRQ_STACK_H
+
+#include <linux/ptrace.h>
+
+#include <asm/processor.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline bool irqstack_active(void)
+{
+ return __this_cpu_read(irq_count) != -1;
+}
+
+/*
+ * Macro to emit code for running @func on the irq stack.
+ */
+#define RUN_ON_IRQSTACK(func) { \
+ unsigned long tos; \
+ \
+ lockdep_assert_irqs_disabled(); \
+ \
+ tos = ((unsigned long)__this_cpu_read(hardirq_stack_ptr)) - 8; \
+ \
+ __this_cpu_add(irq_count, 1); \
+ asm volatile( \
+ "pushq %%rbp \n" \
+ "movq %%rsp, %%rbp \n" \
+ "movq %%rsp, (%[ts]) \n" \
+ "movq %[ts], %%rsp \n" \
+ "1: \n" \
+ " .pushsection .discard.instr_begin \n" \
+ " .long 1b - . \n" \
+ " .popsection \n" \
+ "call " __ASM_FORM(func) " \n" \
+ "2: \n" \
+ " .pushsection .discard.instr_end \n" \
+ " .long 2b - . \n" \
+ " .popsection \n" \
+ "popq %%rsp \n" \
+ "leaveq \n" \
+ : \
+ : [ts] "r" (tos) \
+ : "memory" \
+ ); \
+ __this_cpu_sub(irq_count, 1); \
+}
+
+#else /* CONFIG_X86_64 */
+static __always_inline bool irqstack_active(void) { return false; }
+#define RUN_ON_IRQSTACK(func) do { } while (0)
+#endif /* !CONFIG_X86_64 */
+
+static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ return false;
+ return !user_mode(regs) && !irqstack_active();
+}
+
+#endif

2020-05-06 08:25:50

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack

Thomas Gleixner <[email protected]> writes:
> That also allows to move the xen hypercall extra magic code and the softirq
> stack switching into C.
>
> The mechanism is straight forward:
>
> 1) Store the current stack pointer on top of the interrupt stack. That's
> required for the unwinder.
>
> 2) Switch the stack pointer
>
> 3) Call the function
>
> 4) Restore the stackpointer
>
> The full code sequence to make the unwinder happy is:
>
> pushq %rbp
> movq %rsp, %rbp
> movq $(top_of_hardirq_stack - 8), %reg
> movq %rsp, (%reg)
> movq %reg , %rsp
> call function
> popq %rsp
> leaveq
>
> While the following sequence would spare the 'popq %rsp':
>
> pushq %rbp
> movq $(top_of_hardirq_stack - 8), %rbp
> movq %rsp, (%rrbp)
> xchgq %rbp, %rsp
> call function
> movq %rbp, %rsp
> leaveq

So I stared some more into that.

The push rbp is wrong for the frame unwinder case. That one is happy
(except for objtool) with the most minimalistic variant:

movq %%rsp, (%[tos])
movq %[tos], %%rsp
call function
popq %%rsp

which is not surprising because for the frame unwinder this is similar
to the 'gcc aligns stack in the middle of the function' handling. BP
still has to point to the previous frame. Adjustment of BP must only
happen on function entry.

The stack border convention of having the pointer to the previous stack
in the top word is sufficient for this.

objtool complains though:

warning: objtool: do_softirq_own_stack()+0x67: return with modified stack frame

That obviously makes also the ORC unwinder unhappty as objtool fails to
provide the right hint. But also for ORC this construct should be
completely sufficient.

I'm exploring another idea right now, but wanted to share the info.

Thanks,

tglx

2020-05-10 04:35:39

by Lai Jiangshan

[permalink] [raw]

Subject: Re: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack

On Tue, May 5, 2020 at 10:19 PM Thomas Gleixner <[email protected]> wrote:
>
> Device interrupt handlers and system vector handlers are executed on the
> interrupt stack. The stack switch happens in the low level assembly entry
> code. This conflicts with the efforts to consolidate the exit code in C to
> ensure correctness vs. RCU and tracing.
>
> As there is no way to move #DB away from IST due to the MOV SS issue, the
> requirements vs. #DB and NMI for switching to the interrupt stack do not
> exist anymore. The only requirement is that interrupts are disabled.

Hi, tglx and Andy Lutomirski,

Is there any information about "no way to move #DB away from IST
due to the MOV SS issue"? IST-based #DB results to ist_shift(for
nested #DB) and debug_idt(for #NMI vs. #DB) which are somewhat
ugly. If IST-less #DB should work, debug stack should be switched
in software manner like interrupt stack.

There was a "POP/MOV SS" CVE/issue about #BP which lead to
moving #BP to IST-less by d8ba61ba58c8
(x86/entry/64: Don't use IST entry for #BP stack)

#DB #BP are considered as #NMI due to their super-interrupt
ability. But the kernel has much more control over #DB and #BP
which can be disabled by putting the code snip into non-instrument
sections like __entry noinstr etc.

Is it possible to implement IST-less #DB?

Thanks,
Lai

2020-05-11 09:14:13

by Alexandre Chartre

[permalink] [raw]

Subject: Re: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack

On 5/5/20 3:53 PM, Thomas Gleixner wrote:
> Device interrupt handlers and system vector handlers are executed on the
> interrupt stack. The stack switch happens in the low level assembly entry
> code. This conflicts with the efforts to consolidate the exit code in C to
> ensure correctness vs. RCU and tracing.
>
> As there is no way to move #DB away from IST due to the MOV SS issue, the
> requirements vs. #DB and NMI for switching to the interrupt stack do not
> exist anymore. The only requirement is that interrupts are disabled.
>
> That allows to move the stack switching to C code which simplifies the
> entry/exit handling further because it allows to switch stacks after
> handling the entry and on exit before handling RCU, return to usermode and
> kernel preemption in the same way as for regular exceptions.
>
> That also allows to move the xen hypercall extra magic code and the softirq
> stack switching into C.
>
> The mechanism is straight forward:
>
> 1) Store the current stack pointer on top of the interrupt stack. That's
> required for the unwinder.
>
> 2) Switch the stack pointer
>
> 3) Call the function
>
> 4) Restore the stackpointer
>
> The full code sequence to make the unwinder happy is:
>
> pushq %rbp
> movq %rsp, %rbp
> movq $(top_of_hardirq_stack - 8), %reg
> movq %rsp, (%reg)
> movq %reg , %rsp
> call function
> popq %rsp
> leaveq
>
> While the following sequence would spare the 'popq %rsp':
>
> pushq %rbp
> movq $(top_of_hardirq_stack - 8), %rbp
> movq %rsp, (%rrbp)

Should be (%rbp) instead of (%rrbp).

> xchgq %rbp, %rsp
> call function
> movq %rbp, %rsp
> leaveq
>
> but that requires further changes to objtool so that the unwinder works
> correctly. Can be done on top and is not critical for now.
>
> Provide helper functions to check whether the interrupt stack is already
> active and whether stack switching is required.
>
> 64 bit only for now. 32 bit has a variant of that already. Once this is
> cleaned up the two implementations might be consolidated as a cleanup on
> top.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> arch/x86/include/asm/irq_stack.h | 61 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 61 insertions(+)
>
> --- /dev/null
> +++ b/arch/x86/include/asm/irq_stack.h
...
> +/*
> + * Macro to emit code for running @func on the irq stack.
> + */
> +#define RUN_ON_IRQSTACK(func) { \
> + unsigned long tos; \
> + \
> + lockdep_assert_irqs_disabled(); \
> + \
> + tos = ((unsigned long)__this_cpu_read(hardirq_stack_ptr)) - 8; \
> + \
> + __this_cpu_add(irq_count, 1); \
> + asm volatile( \
> + "pushq %%rbp \n" \
> + "movq %%rsp, %%rbp \n" \
> + "movq %%rsp, (%[ts]) \n" \
> + "movq %[ts], %%rsp \n" \
> + "1: \n" \
> + " .pushsection .discard.instr_begin \n" \
> + " .long 1b - . \n" \
> + " .popsection \n" \
> + "call " __ASM_FORM(func) " \n" \
> + "2: \n" \
> + " .pushsection .discard.instr_end \n" \
> + " .long 2b - . \n" \
> + " .popsection \n" \
> + "popq %%rsp \n" \
> + "leaveq \n" \
> + : \
> + : [ts] "r" (tos) \
> + : "memory" \
> + ); \
> + __this_cpu_sub(irq_count, 1); \
> +}

The pushsection/popsection discard.instr_begin/end sequences are used several
times in asm() statement at different places, so I wonder if it might be worth
having a macro.

In part 1, patch 20/36 adds instr_begin()/end(): they provide the sequence
but already encapsulated into an asm() statement, then we could do something
like this:

/* Begin/end of an instrumentation safe region */
#define instr_begin_insn(label) \
__stringify(label) ":\n\t" \
".pushsection .discard.instr_begin\n\t" \
".long " __stringify(label) "b - .\n\t" \
".popsection\n\t"

#define instr_end_insn(label) \
__stringify(label) ":\n\t" \
".pushsection .discard.instr_end\n\t" \
".long " __stringify(label) "b - .\n\t" \
".popsection\n\t"

#define instr_begin() ({asm volatile(instr_begin_insn(__COUNTER__));})
#define instr_end() ({asm volatile(instr_end_insn(__COUNTER__));})> +#else /* CONFIG_X86_64 */

And the RUN_ON_IRQSTACK macro would become:

#define RUN_ON_IRQSTACK(func) { \
unsigned long tos; \
\
lockdep_assert_irqs_disabled(); \
\
tos = ((unsigned long)__this_cpu_read(hardirq_stack_ptr)) - 8; \
\
__this_cpu_add(irq_count, 1); \
asm volatile( \
"pushq %%rbp \n" \
"movq %%rsp, %%rbp \n" \
"movq %%rsp, (%[ts]) \n" \
"movq %[ts], %%rsp \n" \
instr_begin_insn(1) \
"call " __ASM_FORM(func) " \n" \
instr_end_insn(2) \
"popq %%rsp \n" \
"leaveq \n" \
: \
: [ts] "r" (tos) \
: "memory" \
); \
__this_cpu_sub(irq_count, 1); \
}

alex.

2020-05-11 11:59:31

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack

Alexandre Chartre <[email protected]> writes:
> On 5/5/20 3:53 PM, Thomas Gleixner wrote:
>> + __this_cpu_add(irq_count, 1); \
>> + asm volatile( \
>> + "pushq %%rbp \n" \
>> + "movq %%rsp, %%rbp \n" \
>> + "movq %%rsp, (%[ts]) \n" \
>> + "movq %[ts], %%rsp \n" \
>> + "1: \n" \
>> + " .pushsection .discard.instr_begin \n" \
>> + " .long 1b - . \n" \
>> + " .popsection \n" \
>> + "call " __ASM_FORM(func) " \n" \
>> + "2: \n" \
>> + " .pushsection .discard.instr_end \n" \
>> + " .long 2b - . \n" \
>> + " .popsection \n" \
>> + "popq %%rsp \n" \
>> + "leaveq \n" \
>> + : \
>> + : [ts] "r" (tos) \
>> + : "memory" \
>> + ); \
>> + __this_cpu_sub(irq_count, 1); \
>> +}
>
> The pushsection/popsection discard.instr_begin/end sequences are used several
> times in asm() statement at different places, so I wonder if it might be worth
> having a macro.

As discussed elsewhere this is going to move to ASM partially and the
various variants are not longer necessary.

Thanks,

tglx