This patchset applies on top of the two other tip/pti-related patches
I sent out moments ago,[*] and try to implement what Linus suggested a
few days ago[+].
[+] http://lkml.kernel.org/r/[email protected]
[*] http://lkml.kernel.org/r/CA+55aFwLTF3EtaQ4OpDv2UM41J=EU7gfemv=eVq+uQi31-usSg@mail.gmail.com .
Overall, these patches provide for a sizeable cutting of up to 4.35k:
text data bss dec hex filename
20987 0 0 20987 51fb entry_64.o-orig
16621 0 0 16621 40ed entry_64.o
They are split up in four small steps to easen the review. Another
advantage is that we can decide whether each additional step is really
worth it in relation to an increase in code complexity.
NOTE / WARNING: As usual, please be extremely stringent in reviewing these
patches.
Thanks,
Dominik
Dominik Brodowski (4):
x86/entry/64: move PUSH_AND_CLEAR_REGS from interrupt macro to helper
function
x86/entry/64: move ENTER_IRQ_STACK from interrupt macro to helper
function
x86/entry/64: move switch_to_thread_stack to interrupt helper function
x86/entry/64: remove interrupt macro
arch/x86/entry/entry_64.S | 99 +++++++++++++++++++++++++++++------------------
1 file changed, 62 insertions(+), 37 deletions(-)
--
2.16.1
We can also move the SWAPGS and the switch_to_thread_stack to the
interrupt helper function. As we do not want call depths of two,
convert switch_to_thread_stack to a macro. However, as entry_64_compat.S
expects switch_to_thread_stack to be a function, provide a wrapper for
that, which leads to some code duplication if CONFIG_IA32_EMULATION is
enabled. Therefore, the size reduction differs slightly:
With CONFIG_IA32_EMULATION enabled (-0.13k):
text data bss dec hex filename
16897 0 0 16897 4201 entry_64.o-orig
16767 0 0 16767 417f entry_64.o
With CONFIG_IA32_EMULATION disabled (-0.27k):
text data bss dec hex filename
16897 0 0 16897 4201 entry_64.o-orig
16622 0 0 16622 40ee entry_64.o
Signed-off-by: Dominik Brodowski <[email protected]>
---
arch/x86/entry/entry_64.S | 65 ++++++++++++++++++++++++++---------------------
1 file changed, 36 insertions(+), 29 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3046b12a1acb..b60a3b692ca9 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -536,6 +536,31 @@ END(irq_entries_start)
decl PER_CPU_VAR(irq_count)
.endm
+/*
+ * Switch to the thread stack. This is called with the IRET frame and
+ * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
+ * space has not been allocated for them.)
+ */
+.macro DO_SWITCH_TO_THREAD_STACK
+ pushq %rdi
+ /* Need to switch before accessing the thread stack. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+
+ pushq 7*8(%rdi) /* regs->ss */
+ pushq 6*8(%rdi) /* regs->rsp */
+ pushq 5*8(%rdi) /* regs->eflags */
+ pushq 4*8(%rdi) /* regs->cs */
+ pushq 3*8(%rdi) /* regs->ip */
+ pushq 2*8(%rdi) /* regs->orig_ax */
+ pushq 8(%rdi) /* return address */
+ UNWIND_HINT_FUNC
+
+ movq (%rdi), %rdi
+.endm
+
/*
* Interrupt entry/exit.
*
@@ -543,10 +568,17 @@ END(irq_entries_start)
*
* Entry runs with interrupts off.
*/
+/* 8(%rsp): ~(interrupt number) */
ENTRY(interrupt_helper)
UNWIND_HINT_FUNC
cld
+ testb $3, CS-ORIG_RAX+8(%rsp)
+ jz 1f
+ SWAPGS
+ DO_SWITCH_TO_THREAD_STACK
+1:
+
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
@@ -579,12 +611,6 @@ END(interrupt_helper)
.macro interrupt func
cld
- testb $3, CS-ORIG_RAX(%rsp)
- jz 1f
- SWAPGS
- call switch_to_thread_stack
-1:
-
call interrupt_helper
call \func /* rdi points to pt_regs */
@@ -853,33 +879,14 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
-/*
- * Switch to the thread stack. This is called with the IRET frame and
- * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
- * space has not been allocated for them.)
- */
+#if defined(CONFIG_IA32_EMULATION)
+/* entry_64_compat.S::entry_INT80_compat expects this to be an ASM function */
ENTRY(switch_to_thread_stack)
UNWIND_HINT_FUNC
-
- pushq %rdi
- /* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
- movq %rsp, %rdi
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
-
- pushq 7*8(%rdi) /* regs->ss */
- pushq 6*8(%rdi) /* regs->rsp */
- pushq 5*8(%rdi) /* regs->eflags */
- pushq 4*8(%rdi) /* regs->cs */
- pushq 3*8(%rdi) /* regs->ip */
- pushq 2*8(%rdi) /* regs->orig_ax */
- pushq 8(%rdi) /* return address */
- UNWIND_HINT_FUNC
-
- movq (%rdi), %rdi
+ DO_SWITCH_TO_THREAD_STACK
ret
END(switch_to_thread_stack)
+#endif
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
--
2.16.1
It is now trivial to call the interrupt helper function and then the
actual worker. Therefore, remove the interrupt macro.
Suggested-by: Linus Torvalds <[email protected]>
Signed-off-by: Dominik Brodowski <[email protected]>
---
arch/x86/entry/entry_64.S | 15 ++++-----------
1 file changed, 4 insertions(+), 11 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b60a3b692ca9..09205da68764 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -607,15 +607,6 @@ ENTRY(interrupt_helper)
ret
END(interrupt_helper)
-/* 0(%rsp): ~(interrupt number) */
- .macro interrupt func
- cld
-
- call interrupt_helper
-
- call \func /* rdi points to pt_regs */
- .endm
-
/*
* The interrupt stubs push (~vector+0x80) onto the stack and
* then jump to common_interrupt.
@@ -624,7 +615,8 @@ END(interrupt_helper)
common_interrupt:
ASM_CLAC
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
- interrupt do_IRQ
+ call interrupt_helper
+ call do_IRQ /* rdi points to pt_regs */
/* 0(%rsp): old RSP */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -816,7 +808,8 @@ ENTRY(\sym)
ASM_CLAC
pushq $~(\num)
.Lcommon_\sym:
- interrupt \do_sym
+ call interrupt_helper
+ call \do_sym /* rdi points to pt_regs */
jmp ret_from_intr
END(\sym)
.endm
--
2.16.1
Moving the switch to IRQ stack from the interrupt macro to the helper
function requires some trickery: All ENTER_IRQ_STACK really cares about
is where the "original" stack -- meaning the GP registers etc. -- is
stored. Therefore, we need to offset the stored RSP value by 8 whenever
ENTER_IRQ_STACK is called from within a function. In such cases, and
after switching to the IRQ stack, we need to push the "original" return
address (i.e. the return address from the call to the interrupt entry
function) to the IRQ stack.
This trickery allows us to carve another 1k from the text size:
text data bss dec hex filename
17905 0 0 17905 45f1 entry_64.o-orig
16897 0 0 16897 4201 entry_64.o
Signed-off-by: Dominik Brodowski <[email protected]>
---
arch/x86/entry/entry_64.S | 53 +++++++++++++++++++++++++++++++----------------
1 file changed, 35 insertions(+), 18 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index de8a0da0d347..3046b12a1acb 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -449,10 +449,18 @@ END(irq_entries_start)
*
* The invariant is that, if irq_count != -1, then the IRQ stack is in use.
*/
-.macro ENTER_IRQ_STACK regs=1 old_rsp
+.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
DEBUG_ENTRY_ASSERT_IRQS_OFF
movq %rsp, \old_rsp
+ .if \save_ret
+ /*
+ * If save_ret is set, the original stack contains one additional
+ * entry -- the return address.
+ */
+ addq $8, \old_rsp
+ .endif
+
.if \regs
UNWIND_HINT_REGS base=\old_rsp
.endif
@@ -497,6 +505,15 @@ END(irq_entries_start)
.if \regs
UNWIND_HINT_REGS indirect=1
.endif
+
+ .if \save_ret
+ /*
+ * Push the return address to the stack. This return address can
+ * be found at the "real" original RSP, which was offset by 8 at
+ * the beginning of this macro.
+ */
+ pushq -8(\old_rsp)
+ .endif
.endm
/*
@@ -533,22 +550,7 @@ ENTRY(interrupt_helper)
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
- ret
-END(interrupt_helper)
-
-/* 0(%rsp): ~(interrupt number) */
- .macro interrupt func
- cld
-
- testb $3, CS-ORIG_RAX(%rsp)
- jz 1f
- SWAPGS
- call switch_to_thread_stack
-1:
-
- call interrupt_helper
-
- testb $3, CS(%rsp)
+ testb $3, CS+8(%rsp)
jz 1f
/*
@@ -566,10 +568,25 @@ END(interrupt_helper)
CALL_enter_from_user_mode
1:
- ENTER_IRQ_STACK old_rsp=%rdi
+ ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
+ ret
+END(interrupt_helper)
+
+/* 0(%rsp): ~(interrupt number) */
+ .macro interrupt func
+ cld
+
+ testb $3, CS-ORIG_RAX(%rsp)
+ jz 1f
+ SWAPGS
+ call switch_to_thread_stack
+1:
+
+ call interrupt_helper
+
call \func /* rdi points to pt_regs */
.endm
--
2.16.1
The PUSH_AND_CLEAR_REGS macro is able to insert the GP registers
"above" the original return address. This allows us to move a sizeable
part of the interrupt entry macro to an interrupt entry helper function:
text data bss dec hex filename
20987 0 0 20987 51fb entry_64.o-orig
17905 0 0 17905 45f1 entry_64.o
Suggested-by: Linus Torvalds <[email protected]>
Signed-off-by: Dominik Brodowski <[email protected]>
---
arch/x86/entry/entry_64.S | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 5d9cb0f037e4..de8a0da0d347 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -526,6 +526,15 @@ END(irq_entries_start)
*
* Entry runs with interrupts off.
*/
+ENTRY(interrupt_helper)
+ UNWIND_HINT_FUNC
+ cld
+
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
+
+ ret
+END(interrupt_helper)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
@@ -537,8 +546,7 @@ END(irq_entries_start)
call switch_to_thread_stack
1:
- PUSH_AND_CLEAR_REGS
- ENCODE_FRAME_POINTER
+ call interrupt_helper
testb $3, CS(%rsp)
jz 1f
--
2.16.1
On Wed, Feb 14, 2018 at 1:21 PM, Dominik Brodowski
<[email protected]> wrote:
> Moving the switch to IRQ stack from the interrupt macro to the helper
> function requires some trickery: All ENTER_IRQ_STACK really cares about
> is where the "original" stack -- meaning the GP registers etc. -- is
> stored. Therefore, we need to offset the stored RSP value by 8 whenever
> ENTER_IRQ_STACK is called from within a function. In such cases, and
> after switching to the IRQ stack, we need to push the "original" return
> address (i.e. the return address from the call to the interrupt entry
> function) to the IRQ stack.
>
> This trickery allows us to carve another 1k from the text size:
>
> text data bss dec hex filename
> 17905 0 0 17905 45f1 entry_64.o-orig
> 16897 0 0 16897 4201 entry_64.o
>
> Signed-off-by: Dominik Brodowski <[email protected]>
> ---
> arch/x86/entry/entry_64.S | 53 +++++++++++++++++++++++++++++++----------------
> 1 file changed, 35 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index de8a0da0d347..3046b12a1acb 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -449,10 +449,18 @@ END(irq_entries_start)
> *
> * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
> */
> -.macro ENTER_IRQ_STACK regs=1 old_rsp
> +.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
> DEBUG_ENTRY_ASSERT_IRQS_OFF
> movq %rsp, \old_rsp
>
> + .if \save_ret
> + /*
> + * If save_ret is set, the original stack contains one additional
> + * entry -- the return address.
> + */
> + addq $8, \old_rsp
> + .endif
Combine the mov and add into leaq 8(%rsp), \old_rsp.
--
Brian Gerst
On Wed, Feb 14, 2018 at 1:21 PM, Dominik Brodowski
<[email protected]> wrote:
> We can also move the SWAPGS and the switch_to_thread_stack to the
> interrupt helper function. As we do not want call depths of two,
> convert switch_to_thread_stack to a macro. However, as entry_64_compat.S
> expects switch_to_thread_stack to be a function, provide a wrapper for
> that, which leads to some code duplication if CONFIG_IA32_EMULATION is
> enabled. Therefore, the size reduction differs slightly:
>
> With CONFIG_IA32_EMULATION enabled (-0.13k):
> text data bss dec hex filename
> 16897 0 0 16897 4201 entry_64.o-orig
> 16767 0 0 16767 417f entry_64.o
>
> With CONFIG_IA32_EMULATION disabled (-0.27k):
> text data bss dec hex filename
> 16897 0 0 16897 4201 entry_64.o-orig
> 16622 0 0 16622 40ee entry_64.o
>
> Signed-off-by: Dominik Brodowski <[email protected]>
> ---
> arch/x86/entry/entry_64.S | 65 ++++++++++++++++++++++++++---------------------
> 1 file changed, 36 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 3046b12a1acb..b60a3b692ca9 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -536,6 +536,31 @@ END(irq_entries_start)
> decl PER_CPU_VAR(irq_count)
> .endm
>
> +/*
> + * Switch to the thread stack. This is called with the IRET frame and
> + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
> + * space has not been allocated for them.)
> + */
> +.macro DO_SWITCH_TO_THREAD_STACK
> + pushq %rdi
> + /* Need to switch before accessing the thread stack. */
> + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
> + movq %rsp, %rdi
> + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
> + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
> +
> + pushq 7*8(%rdi) /* regs->ss */
> + pushq 6*8(%rdi) /* regs->rsp */
> + pushq 5*8(%rdi) /* regs->eflags */
> + pushq 4*8(%rdi) /* regs->cs */
> + pushq 3*8(%rdi) /* regs->ip */
> + pushq 2*8(%rdi) /* regs->orig_ax */
> + pushq 8(%rdi) /* return address */
> + UNWIND_HINT_FUNC
> +
> + movq (%rdi), %rdi
> +.endm
> +
> /*
> * Interrupt entry/exit.
> *
> @@ -543,10 +568,17 @@ END(irq_entries_start)
> *
> * Entry runs with interrupts off.
> */
> +/* 8(%rsp): ~(interrupt number) */
> ENTRY(interrupt_helper)
> UNWIND_HINT_FUNC
> cld
>
> + testb $3, CS-ORIG_RAX+8(%rsp)
> + jz 1f
> + SWAPGS
> + DO_SWITCH_TO_THREAD_STACK
> +1:
> +
> PUSH_AND_CLEAR_REGS save_ret=1
> ENCODE_FRAME_POINTER 8
>
> @@ -579,12 +611,6 @@ END(interrupt_helper)
> .macro interrupt func
> cld
>
> - testb $3, CS-ORIG_RAX(%rsp)
> - jz 1f
> - SWAPGS
> - call switch_to_thread_stack
> -1:
> -
> call interrupt_helper
>
> call \func /* rdi points to pt_regs */
> @@ -853,33 +879,14 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
> */
> #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
>
> -/*
> - * Switch to the thread stack. This is called with the IRET frame and
> - * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
> - * space has not been allocated for them.)
> - */
> +#if defined(CONFIG_IA32_EMULATION)
> +/* entry_64_compat.S::entry_INT80_compat expects this to be an ASM function */
> ENTRY(switch_to_thread_stack)
> UNWIND_HINT_FUNC
> -
> - pushq %rdi
> - /* Need to switch before accessing the thread stack. */
> - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
> - movq %rsp, %rdi
> - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
> - UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
> -
> - pushq 7*8(%rdi) /* regs->ss */
> - pushq 6*8(%rdi) /* regs->rsp */
> - pushq 5*8(%rdi) /* regs->eflags */
> - pushq 4*8(%rdi) /* regs->cs */
> - pushq 3*8(%rdi) /* regs->ip */
> - pushq 2*8(%rdi) /* regs->orig_ax */
> - pushq 8(%rdi) /* return address */
> - UNWIND_HINT_FUNC
> -
> - movq (%rdi), %rdi
> + DO_SWITCH_TO_THREAD_STACK
> ret
> END(switch_to_thread_stack)
> +#endif
>
> .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
> ENTRY(\sym)
> --
> 2.16.1
>
Move the macro to calling.h, and inline it into the compat entry.
--
Brian Gerst
On Wed, Feb 14, 2018 at 01:57:15PM -0500, Brian Gerst wrote:
> On Wed, Feb 14, 2018 at 1:21 PM, Dominik Brodowski
> <[email protected]> wrote:
> > We can also move the SWAPGS and the switch_to_thread_stack to the
> > interrupt helper function. As we do not want call depths of two,
> > convert switch_to_thread_stack to a macro. However, as entry_64_compat.S
> > expects switch_to_thread_stack to be a function, provide a wrapper for
> > that, which leads to some code duplication if CONFIG_IA32_EMULATION is
> > enabled. Therefore, the size reduction differs slightly:
> >
> > With CONFIG_IA32_EMULATION enabled (-0.13k):
> > text data bss dec hex filename
> > 16897 0 0 16897 4201 entry_64.o-orig
> > 16767 0 0 16767 417f entry_64.o
> >
> > With CONFIG_IA32_EMULATION disabled (-0.27k):
> > text data bss dec hex filename
> > 16897 0 0 16897 4201 entry_64.o-orig
> > 16622 0 0 16622 40ee entry_64.o
> >
> > Signed-off-by: Dominik Brodowski <[email protected]>
> > ---
> > arch/x86/entry/entry_64.S | 65 ++++++++++++++++++++++++++---------------------
> > 1 file changed, 36 insertions(+), 29 deletions(-)
> >
> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> > index 3046b12a1acb..b60a3b692ca9 100644
> > --- a/arch/x86/entry/entry_64.S
> > +++ b/arch/x86/entry/entry_64.S
> > @@ -536,6 +536,31 @@ END(irq_entries_start)
> > decl PER_CPU_VAR(irq_count)
> > .endm
> >
> > +/*
> > + * Switch to the thread stack. This is called with the IRET frame and
> > + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
> > + * space has not been allocated for them.)
> > + */
> > +.macro DO_SWITCH_TO_THREAD_STACK
> > + pushq %rdi
> > + /* Need to switch before accessing the thread stack. */
> > + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
> > + movq %rsp, %rdi
> > + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
> > + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
> > +
> > + pushq 7*8(%rdi) /* regs->ss */
> > + pushq 6*8(%rdi) /* regs->rsp */
> > + pushq 5*8(%rdi) /* regs->eflags */
> > + pushq 4*8(%rdi) /* regs->cs */
> > + pushq 3*8(%rdi) /* regs->ip */
> > + pushq 2*8(%rdi) /* regs->orig_ax */
> > + pushq 8(%rdi) /* return address */
> > + UNWIND_HINT_FUNC
> > +
> > + movq (%rdi), %rdi
> > +.endm
> > +
> > /*
> > * Interrupt entry/exit.
> > *
> > @@ -543,10 +568,17 @@ END(irq_entries_start)
> > *
> > * Entry runs with interrupts off.
> > */
> > +/* 8(%rsp): ~(interrupt number) */
> > ENTRY(interrupt_helper)
> > UNWIND_HINT_FUNC
> > cld
> >
> > + testb $3, CS-ORIG_RAX+8(%rsp)
> > + jz 1f
> > + SWAPGS
> > + DO_SWITCH_TO_THREAD_STACK
> > +1:
> > +
> > PUSH_AND_CLEAR_REGS save_ret=1
> > ENCODE_FRAME_POINTER 8
> >
> > @@ -579,12 +611,6 @@ END(interrupt_helper)
> > .macro interrupt func
> > cld
> >
> > - testb $3, CS-ORIG_RAX(%rsp)
> > - jz 1f
> > - SWAPGS
> > - call switch_to_thread_stack
> > -1:
> > -
> > call interrupt_helper
> >
> > call \func /* rdi points to pt_regs */
> > @@ -853,33 +879,14 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
> > */
> > #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
> >
> > -/*
> > - * Switch to the thread stack. This is called with the IRET frame and
> > - * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
> > - * space has not been allocated for them.)
> > - */
> > +#if defined(CONFIG_IA32_EMULATION)
> > +/* entry_64_compat.S::entry_INT80_compat expects this to be an ASM function */
> > ENTRY(switch_to_thread_stack)
> > UNWIND_HINT_FUNC
> > -
> > - pushq %rdi
> > - /* Need to switch before accessing the thread stack. */
> > - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
> > - movq %rsp, %rdi
> > - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
> > - UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
> > -
> > - pushq 7*8(%rdi) /* regs->ss */
> > - pushq 6*8(%rdi) /* regs->rsp */
> > - pushq 5*8(%rdi) /* regs->eflags */
> > - pushq 4*8(%rdi) /* regs->cs */
> > - pushq 3*8(%rdi) /* regs->ip */
> > - pushq 2*8(%rdi) /* regs->orig_ax */
> > - pushq 8(%rdi) /* return address */
> > - UNWIND_HINT_FUNC
> > -
> > - movq (%rdi), %rdi
> > + DO_SWITCH_TO_THREAD_STACK
> > ret
> > END(switch_to_thread_stack)
> > +#endif
> >
> > .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
> > ENTRY(\sym)
> > --
> > 2.16.1
> >
>
> Move the macro to calling.h, and inline it into the compat entry.
That certainly sounds possible, but makes the macro more complex: Inlining
means that the offsets need to be reduced by -8. But we need the current
offset for the call from interrupt_helper. So such a change might make the
code less readable.
Thanks,
Dominik
On Wed, Feb 14, 2018 at 2:06 PM, Dominik Brodowski
<[email protected]> wrote:
> On Wed, Feb 14, 2018 at 01:57:15PM -0500, Brian Gerst wrote:
>> On Wed, Feb 14, 2018 at 1:21 PM, Dominik Brodowski
>> <[email protected]> wrote:
>> > We can also move the SWAPGS and the switch_to_thread_stack to the
>> > interrupt helper function. As we do not want call depths of two,
>> > convert switch_to_thread_stack to a macro. However, as entry_64_compat.S
>> > expects switch_to_thread_stack to be a function, provide a wrapper for
>> > that, which leads to some code duplication if CONFIG_IA32_EMULATION is
>> > enabled. Therefore, the size reduction differs slightly:
>> >
>> > With CONFIG_IA32_EMULATION enabled (-0.13k):
>> > text data bss dec hex filename
>> > 16897 0 0 16897 4201 entry_64.o-orig
>> > 16767 0 0 16767 417f entry_64.o
>> >
>> > With CONFIG_IA32_EMULATION disabled (-0.27k):
>> > text data bss dec hex filename
>> > 16897 0 0 16897 4201 entry_64.o-orig
>> > 16622 0 0 16622 40ee entry_64.o
>> >
>> > Signed-off-by: Dominik Brodowski <[email protected]>
>> > ---
>> > arch/x86/entry/entry_64.S | 65 ++++++++++++++++++++++++++---------------------
>> > 1 file changed, 36 insertions(+), 29 deletions(-)
>> >
>> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>> > index 3046b12a1acb..b60a3b692ca9 100644
>> > --- a/arch/x86/entry/entry_64.S
>> > +++ b/arch/x86/entry/entry_64.S
>> > @@ -536,6 +536,31 @@ END(irq_entries_start)
>> > decl PER_CPU_VAR(irq_count)
>> > .endm
>> >
>> > +/*
>> > + * Switch to the thread stack. This is called with the IRET frame and
>> > + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
>> > + * space has not been allocated for them.)
>> > + */
>> > +.macro DO_SWITCH_TO_THREAD_STACK
>> > + pushq %rdi
>> > + /* Need to switch before accessing the thread stack. */
>> > + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
>> > + movq %rsp, %rdi
>> > + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>> > + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
>> > +
>> > + pushq 7*8(%rdi) /* regs->ss */
>> > + pushq 6*8(%rdi) /* regs->rsp */
>> > + pushq 5*8(%rdi) /* regs->eflags */
>> > + pushq 4*8(%rdi) /* regs->cs */
>> > + pushq 3*8(%rdi) /* regs->ip */
>> > + pushq 2*8(%rdi) /* regs->orig_ax */
>> > + pushq 8(%rdi) /* return address */
>> > + UNWIND_HINT_FUNC
>> > +
>> > + movq (%rdi), %rdi
>> > +.endm
>> > +
>> > /*
>> > * Interrupt entry/exit.
>> > *
>> > @@ -543,10 +568,17 @@ END(irq_entries_start)
>> > *
>> > * Entry runs with interrupts off.
>> > */
>> > +/* 8(%rsp): ~(interrupt number) */
>> > ENTRY(interrupt_helper)
>> > UNWIND_HINT_FUNC
>> > cld
>> >
>> > + testb $3, CS-ORIG_RAX+8(%rsp)
>> > + jz 1f
>> > + SWAPGS
>> > + DO_SWITCH_TO_THREAD_STACK
>> > +1:
>> > +
>> > PUSH_AND_CLEAR_REGS save_ret=1
>> > ENCODE_FRAME_POINTER 8
>> >
>> > @@ -579,12 +611,6 @@ END(interrupt_helper)
>> > .macro interrupt func
>> > cld
>> >
>> > - testb $3, CS-ORIG_RAX(%rsp)
>> > - jz 1f
>> > - SWAPGS
>> > - call switch_to_thread_stack
>> > -1:
>> > -
>> > call interrupt_helper
>> >
>> > call \func /* rdi points to pt_regs */
>> > @@ -853,33 +879,14 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
>> > */
>> > #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
>> >
>> > -/*
>> > - * Switch to the thread stack. This is called with the IRET frame and
>> > - * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
>> > - * space has not been allocated for them.)
>> > - */
>> > +#if defined(CONFIG_IA32_EMULATION)
>> > +/* entry_64_compat.S::entry_INT80_compat expects this to be an ASM function */
>> > ENTRY(switch_to_thread_stack)
>> > UNWIND_HINT_FUNC
>> > -
>> > - pushq %rdi
>> > - /* Need to switch before accessing the thread stack. */
>> > - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
>> > - movq %rsp, %rdi
>> > - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>> > - UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
>> > -
>> > - pushq 7*8(%rdi) /* regs->ss */
>> > - pushq 6*8(%rdi) /* regs->rsp */
>> > - pushq 5*8(%rdi) /* regs->eflags */
>> > - pushq 4*8(%rdi) /* regs->cs */
>> > - pushq 3*8(%rdi) /* regs->ip */
>> > - pushq 2*8(%rdi) /* regs->orig_ax */
>> > - pushq 8(%rdi) /* return address */
>> > - UNWIND_HINT_FUNC
>> > -
>> > - movq (%rdi), %rdi
>> > + DO_SWITCH_TO_THREAD_STACK
>> > ret
>> > END(switch_to_thread_stack)
>> > +#endif
>> >
>> > .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
>> > ENTRY(\sym)
>> > --
>> > 2.16.1
>> >
>>
>> Move the macro to calling.h, and inline it into the compat entry.
>
> That certainly sounds possible, but makes the macro more complex: Inlining
> means that the offsets need to be reduced by -8. But we need the current
> offset for the call from interrupt_helper. So such a change might make the
> code less readable.
>
> Thanks,
> Dominik
It would probably be better to just open code it then, without the
return address handling.
--
Brian Gerst
On Wed, Feb 14, 2018 at 6:21 PM, Dominik Brodowski
<[email protected]> wrote:
> Moving the switch to IRQ stack from the interrupt macro to the helper
> function requires some trickery: All ENTER_IRQ_STACK really cares about
> is where the "original" stack -- meaning the GP registers etc. -- is
> stored. Therefore, we need to offset the stored RSP value by 8 whenever
> ENTER_IRQ_STACK is called from within a function. In such cases, and
> after switching to the IRQ stack, we need to push the "original" return
> address (i.e. the return address from the call to the interrupt entry
> function) to the IRQ stack.
>
> This trickery allows us to carve another 1k from the text size:
>
> text data bss dec hex filename
> 17905 0 0 17905 45f1 entry_64.o-orig
> 16897 0 0 16897 4201 entry_64.o
>
> Signed-off-by: Dominik Brodowski <[email protected]>
> ---
> arch/x86/entry/entry_64.S | 53 +++++++++++++++++++++++++++++++----------------
> 1 file changed, 35 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index de8a0da0d347..3046b12a1acb 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -449,10 +449,18 @@ END(irq_entries_start)
> *
> * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
> */
> -.macro ENTER_IRQ_STACK regs=1 old_rsp
> +.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
> DEBUG_ENTRY_ASSERT_IRQS_OFF
> movq %rsp, \old_rsp
>
> + .if \save_ret
> + /*
> + * If save_ret is set, the original stack contains one additional
> + * entry -- the return address.
> + */
> + addq $8, \old_rsp
> + .endif
> +
This is a bit alarming in that you now have live data below RSP. For
x86_32, this would be a big no-no due to NMI. For x86_64, it might
still be bad if there are code paths where NMI is switched to non-IST
temporarily, which was the case at some point and might still be the
case. (I think it is.) Remember that the x86_64 *kernel* ABI has no
red zone.
It also means that, if you manage to hit vmalloc_fault() in here when
you touch the IRQ stack, you're dead. IOW you hit:
movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
which gets #PF and eats your return pointer. Debugging this will be
quite nasty because you'll only hit it on really huge systems after a
thread gets migrated, and even then only if you get unlucky on your
stack alignment.
So can you find another way to do this?
--Andy
On Wed, Feb 14, 2018 at 7:17 PM, Andy Lutomirski <[email protected]> wrote:
> On Wed, Feb 14, 2018 at 6:21 PM, Dominik Brodowski
> <[email protected]> wrote:
>> Moving the switch to IRQ stack from the interrupt macro to the helper
>> function requires some trickery: All ENTER_IRQ_STACK really cares about
>> is where the "original" stack -- meaning the GP registers etc. -- is
>> stored. Therefore, we need to offset the stored RSP value by 8 whenever
>> ENTER_IRQ_STACK is called from within a function. In such cases, and
>> after switching to the IRQ stack, we need to push the "original" return
>> address (i.e. the return address from the call to the interrupt entry
>> function) to the IRQ stack.
>>
>> This trickery allows us to carve another 1k from the text size:
>>
>> text data bss dec hex filename
>> 17905 0 0 17905 45f1 entry_64.o-orig
>> 16897 0 0 16897 4201 entry_64.o
>>
>> Signed-off-by: Dominik Brodowski <[email protected]>
>> ---
>> arch/x86/entry/entry_64.S | 53 +++++++++++++++++++++++++++++++----------------
>> 1 file changed, 35 insertions(+), 18 deletions(-)
>>
>> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>> index de8a0da0d347..3046b12a1acb 100644
>> --- a/arch/x86/entry/entry_64.S
>> +++ b/arch/x86/entry/entry_64.S
>> @@ -449,10 +449,18 @@ END(irq_entries_start)
>> *
>> * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
>> */
>> -.macro ENTER_IRQ_STACK regs=1 old_rsp
>> +.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
>> DEBUG_ENTRY_ASSERT_IRQS_OFF
>> movq %rsp, \old_rsp
>>
>> + .if \save_ret
>> + /*
>> + * If save_ret is set, the original stack contains one additional
>> + * entry -- the return address.
>> + */
>> + addq $8, \old_rsp
>> + .endif
>> +
>
> This is a bit alarming in that you now have live data below RSP. For
> x86_32, this would be a big no-no due to NMI. For x86_64, it might
> still be bad if there are code paths where NMI is switched to non-IST
> temporarily, which was the case at some point and might still be the
> case. (I think it is.) Remember that the x86_64 *kernel* ABI has no
> red zone.
>
> It also means that, if you manage to hit vmalloc_fault() in here when
> you touch the IRQ stack, you're dead. IOW you hit:
>
> movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
>
> which gets #PF and eats your return pointer. Debugging this will be
> quite nasty because you'll only hit it on really huge systems after a
> thread gets migrated, and even then only if you get unlucky on your
> stack alignment.
>
> So can you find another way to do this?
It's adding 8 to the temp register, not %rsp.
--
Brian Gerst
On Thu, Feb 15, 2018 at 12:48 AM, Brian Gerst <[email protected]> wrote:
> On Wed, Feb 14, 2018 at 7:17 PM, Andy Lutomirski <[email protected]> wrote:
>> On Wed, Feb 14, 2018 at 6:21 PM, Dominik Brodowski
>> <[email protected]> wrote:
>>> Moving the switch to IRQ stack from the interrupt macro to the helper
>>> function requires some trickery: All ENTER_IRQ_STACK really cares about
>>> is where the "original" stack -- meaning the GP registers etc. -- is
>>> stored. Therefore, we need to offset the stored RSP value by 8 whenever
>>> ENTER_IRQ_STACK is called from within a function. In such cases, and
>>> after switching to the IRQ stack, we need to push the "original" return
>>> address (i.e. the return address from the call to the interrupt entry
>>> function) to the IRQ stack.
>>>
>>> This trickery allows us to carve another 1k from the text size:
>>>
>>> text data bss dec hex filename
>>> 17905 0 0 17905 45f1 entry_64.o-orig
>>> 16897 0 0 16897 4201 entry_64.o
>>>
>>> Signed-off-by: Dominik Brodowski <[email protected]>
>>> ---
>>> arch/x86/entry/entry_64.S | 53 +++++++++++++++++++++++++++++++----------------
>>> 1 file changed, 35 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>>> index de8a0da0d347..3046b12a1acb 100644
>>> --- a/arch/x86/entry/entry_64.S
>>> +++ b/arch/x86/entry/entry_64.S
>>> @@ -449,10 +449,18 @@ END(irq_entries_start)
>>> *
>>> * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
>>> */
>>> -.macro ENTER_IRQ_STACK regs=1 old_rsp
>>> +.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
>>> DEBUG_ENTRY_ASSERT_IRQS_OFF
>>> movq %rsp, \old_rsp
>>>
>>> + .if \save_ret
>>> + /*
>>> + * If save_ret is set, the original stack contains one additional
>>> + * entry -- the return address.
>>> + */
>>> + addq $8, \old_rsp
>>> + .endif
>>> +
>>
>> This is a bit alarming in that you now have live data below RSP. For
>> x86_32, this would be a big no-no due to NMI. For x86_64, it might
>> still be bad if there are code paths where NMI is switched to non-IST
>> temporarily, which was the case at some point and might still be the
>> case. (I think it is.) Remember that the x86_64 *kernel* ABI has no
>> red zone.
>>
>> It also means that, if you manage to hit vmalloc_fault() in here when
>> you touch the IRQ stack, you're dead. IOW you hit:
>>
>> movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
>>
>> which gets #PF and eats your return pointer. Debugging this will be
>> quite nasty because you'll only hit it on really huge systems after a
>> thread gets migrated, and even then only if you get unlucky on your
>> stack alignment.
>>
>> So can you find another way to do this?
>
> It's adding 8 to the temp register, not %rsp.
Duh.
On Wed, Feb 14, 2018 at 10:11 PM, Andy Lutomirski <[email protected]> wrote:
> On Thu, Feb 15, 2018 at 12:48 AM, Brian Gerst <[email protected]> wrote:
>> On Wed, Feb 14, 2018 at 7:17 PM, Andy Lutomirski <[email protected]> wrote:
>>> On Wed, Feb 14, 2018 at 6:21 PM, Dominik Brodowski
>>> <[email protected]> wrote:
>>>> Moving the switch to IRQ stack from the interrupt macro to the helper
>>>> function requires some trickery: All ENTER_IRQ_STACK really cares about
>>>> is where the "original" stack -- meaning the GP registers etc. -- is
>>>> stored. Therefore, we need to offset the stored RSP value by 8 whenever
>>>> ENTER_IRQ_STACK is called from within a function. In such cases, and
>>>> after switching to the IRQ stack, we need to push the "original" return
>>>> address (i.e. the return address from the call to the interrupt entry
>>>> function) to the IRQ stack.
>>>>
>>>> This trickery allows us to carve another 1k from the text size:
>>>>
>>>> text data bss dec hex filename
>>>> 17905 0 0 17905 45f1 entry_64.o-orig
>>>> 16897 0 0 16897 4201 entry_64.o
>>>>
>>>> Signed-off-by: Dominik Brodowski <[email protected]>
>>>> ---
>>>> arch/x86/entry/entry_64.S | 53 +++++++++++++++++++++++++++++++----------------
>>>> 1 file changed, 35 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>>>> index de8a0da0d347..3046b12a1acb 100644
>>>> --- a/arch/x86/entry/entry_64.S
>>>> +++ b/arch/x86/entry/entry_64.S
>>>> @@ -449,10 +449,18 @@ END(irq_entries_start)
>>>> *
>>>> * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
>>>> */
>>>> -.macro ENTER_IRQ_STACK regs=1 old_rsp
>>>> +.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
>>>> DEBUG_ENTRY_ASSERT_IRQS_OFF
>>>> movq %rsp, \old_rsp
>>>>
>>>> + .if \save_ret
>>>> + /*
>>>> + * If save_ret is set, the original stack contains one additional
>>>> + * entry -- the return address.
>>>> + */
>>>> + addq $8, \old_rsp
>>>> + .endif
>>>> +
>>>
>>> This is a bit alarming in that you now have live data below RSP. For
>>> x86_32, this would be a big no-no due to NMI. For x86_64, it might
>>> still be bad if there are code paths where NMI is switched to non-IST
>>> temporarily, which was the case at some point and might still be the
>>> case. (I think it is.) Remember that the x86_64 *kernel* ABI has no
>>> red zone.
>>>
>>> It also means that, if you manage to hit vmalloc_fault() in here when
>>> you touch the IRQ stack, you're dead. IOW you hit:
>>>
>>> movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
>>>
>>> which gets #PF and eats your return pointer. Debugging this will be
>>> quite nasty because you'll only hit it on really huge systems after a
>>> thread gets migrated, and even then only if you get unlucky on your
>>> stack alignment.
>>>
>>> So can you find another way to do this?
>>
>> It's adding 8 to the temp register, not %rsp.
>
> Duh.
Even if you get a #PF when writing to the IRQ stack (which should
never happen in the first place since per-cpu memory is mapped at boot
time), RSP would still be below the return address and the fault won't
overwrite it.
That said, the word it writes to the top of the IRQ stack is
vulnerable for a short window between when it switches RSP to the IRQ
stack and when it pushes old_rsp. That would only affect the unwinder
during a NMI in that window though since it pushes old_rsp again. So
I'd say commit 29955909 doesn't work exactly as advertised. But that
has nothing to do with this patch.
--
Brian Gerst