Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753346AbbBRUXS (ORCPT ); Wed, 18 Feb 2015 15:23:18 -0500 Received: from mail-la0-f44.google.com ([209.85.215.44]:39452 "EHLO mail-la0-f44.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752668AbbBRUXQ (ORCPT ); Wed, 18 Feb 2015 15:23:16 -0500 MIME-Version: 1.0 In-Reply-To: <1423778052-21038-2-git-send-email-dvlasenk@redhat.com> References: <1423778052-21038-1-git-send-email-dvlasenk@redhat.com> <1423778052-21038-2-git-send-email-dvlasenk@redhat.com> From: Andy Lutomirski Date: Wed, 18 Feb 2015 12:22:54 -0800 Message-ID: Subject: Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs" To: Denys Vlasenko Cc: Linus Torvalds , Oleg Nesterov , Borislav Petkov , "H. Peter Anvin" , Frederic Weisbecker , X86 ML , Alexei Starovoitov , Will Drewry , Kees Cook , "linux-kernel@vger.kernel.org" Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 33644 Lines: 988 On Thu, Feb 12, 2015 at 1:54 PM, Denys Vlasenko wrote: > 64-bit code was using six stack slots less by not saving/restoring > registers which are callee-preserved according to C ABI, > and not allocating space for them. > Only when syscall needed a complete "struct pt_regs", > the complete area was allocated and filled in. > As an additional twist, on interrupt entry a "slightly less truncated pt_regs" > trick is used, to make nested interrupt stacks easier to unwind. > > This proved to be a source of significant obfuscation and subtle bugs. > For example, stub_fork had to pop the return address, > extend the struct, save registers, and push return address back. Ugly. > ia32_ptregs_common pops return address and "returns" via jmp insn, > throwing a wrench into CPU return stack cache. > > This patch changes code to always allocate a complete "struct pt_regs". > The saving of registers is still done lazily. > > "Partial pt_regs" trick on interrupt stack is retained. > > Macros which manipulate "struct pt_regs" on stack are reworked: > ALLOC_PT_GPREGS_ON_STACK allocates the structure. > SAVE_C_REGS saves to it those registers which are clobbered by C code. > SAVE_EXTRA_REGS saves to it all other registers. > Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it. > > ia32_ptregs_common, stub_fork and friends lost their ugly dance with > return pointer. > > LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets > instead of magic numbers. > > error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS > instead of having it open-coded yet again. > > Patch was run-tested: 64-bit executables, 32-bit executables, > strace works. > Timing tests did not show measurable difference in 32-bit > and 64-bit syscalls. This patch scares me, because it changes a lot of hairy code. That being said, I don't see anything wrong with it, and the end result is much nicer than the status quo. So I applied it, and I'll let the kbuild bot have fun with it. I confirmed that I can boot a 64-bit and a 32-bit system with it, at least in my configuration. Further reviews are encouraged :) --Andy > > Signed-off-by: Denys Vlasenko > CC: Linus Torvalds > CC: Oleg Nesterov > CC: Borislav Petkov > CC: "H. Peter Anvin" > CC: Andy Lutomirski > CC: Frederic Weisbecker > CC: X86 ML > CC: Alexei Starovoitov > CC: Will Drewry > CC: Kees Cook > CC: linux-kernel@vger.kernel.org > --- > arch/x86/ia32/ia32entry.S | 47 +++---- > arch/x86/include/asm/calling.h | 222 ++++++++++++++++----------------- > arch/x86/include/asm/irqflags.h | 4 +- > arch/x86/include/uapi/asm/ptrace-abi.h | 1 - > arch/x86/kernel/entry_64.S | 195 +++++++++++------------------ > 5 files changed, 209 insertions(+), 260 deletions(-) > > diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S > index 156ebca..f4bed49 100644 > --- a/arch/x86/ia32/ia32entry.S > +++ b/arch/x86/ia32/ia32entry.S > @@ -62,12 +62,12 @@ > */ > .macro LOAD_ARGS32 offset, _r9=0 > .if \_r9 > - movl \offset+16(%rsp),%r9d > + movl \offset+R9(%rsp),%r9d > .endif > - movl \offset+40(%rsp),%ecx > - movl \offset+48(%rsp),%edx > - movl \offset+56(%rsp),%esi > - movl \offset+64(%rsp),%edi > + movl \offset+RCX(%rsp),%ecx > + movl \offset+RDX(%rsp),%edx > + movl \offset+RSI(%rsp),%esi > + movl \offset+RDI(%rsp),%edi > movl %eax,%eax /* zero extension */ > .endm > > @@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target) > CFI_REL_OFFSET rip,0 > pushq_cfi %rax > cld > - SAVE_ARGS 0,1,0 > + ALLOC_PT_GPREGS_ON_STACK > + SAVE_C_REGS_EXCEPT_R891011 > /* no need to do an access_ok check here because rbp has been > 32bit zero extended */ > ASM_STAC > @@ -182,7 +183,8 @@ sysexit_from_sys_call: > andl $~0x200,EFLAGS-ARGOFFSET(%rsp) > movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ > CFI_REGISTER rip,rdx > - RESTORE_ARGS 0,24,0,0,0,0 > + RESTORE_RSI_RDI > + REMOVE_PT_GPREGS_FROM_STACK 3*8 > xorq %r8,%r8 > xorq %r9,%r9 > xorq %r10,%r10 > @@ -256,13 +258,13 @@ sysenter_tracesys: > testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) > jz sysenter_auditsys > #endif > - SAVE_REST > + SAVE_EXTRA_REGS > CLEAR_RREGS > movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ > movq %rsp,%rdi /* &pt_regs -> arg1 */ > call syscall_trace_enter > LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ > - RESTORE_REST > + RESTORE_EXTRA_REGS > cmpq $(IA32_NR_syscalls-1),%rax > ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ > jmp sysenter_do_call > @@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target) > * disabled irqs and here we enable it straight after entry: > */ > ENABLE_INTERRUPTS(CLBR_NONE) > - SAVE_ARGS 8,0,0 > + ALLOC_PT_GPREGS_ON_STACK 8 > + SAVE_C_REGS_EXCEPT_RCX_R891011 > movl %eax,%eax /* zero extension */ > movq %rax,ORIG_RAX-ARGOFFSET(%rsp) > movq %rcx,RIP-ARGOFFSET(%rsp) > @@ -341,7 +344,7 @@ cstar_dispatch: > jnz sysretl_audit > sysretl_from_sys_call: > andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) > - RESTORE_ARGS 0,-ARG_SKIP,0,0,0 > + RESTORE_RSI_RDI_RDX > movl RIP-ARGOFFSET(%rsp),%ecx > CFI_REGISTER rip,rcx > movl EFLAGS-ARGOFFSET(%rsp),%r11d > @@ -372,13 +375,13 @@ cstar_tracesys: > jz cstar_auditsys > #endif > xchgl %r9d,%ebp > - SAVE_REST > + SAVE_EXTRA_REGS > CLEAR_RREGS 0, r9 > movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ > movq %rsp,%rdi /* &pt_regs -> arg1 */ > call syscall_trace_enter > LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ > - RESTORE_REST > + RESTORE_EXTRA_REGS > xchgl %ebp,%r9d > cmpq $(IA32_NR_syscalls-1),%rax > ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ > @@ -433,7 +436,8 @@ ENTRY(ia32_syscall) > cld > /* note the registers are not zero extended to the sf. > this could be a problem. */ > - SAVE_ARGS 0,1,0 > + ALLOC_PT_GPREGS_ON_STACK > + SAVE_C_REGS_EXCEPT_R891011 > orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) > testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) > jnz ia32_tracesys > @@ -446,16 +450,16 @@ ia32_sysret: > movq %rax,RAX-ARGOFFSET(%rsp) > ia32_ret_from_sys_call: > CLEAR_RREGS -ARGOFFSET > - jmp int_ret_from_sys_call > + jmp int_ret_from_sys_call > > -ia32_tracesys: > - SAVE_REST > +ia32_tracesys: > + SAVE_EXTRA_REGS > CLEAR_RREGS > movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ > movq %rsp,%rdi /* &pt_regs -> arg1 */ > call syscall_trace_enter > LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ > - RESTORE_REST > + RESTORE_EXTRA_REGS > cmpq $(IA32_NR_syscalls-1),%rax > ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ > jmp ia32_do_call > @@ -492,7 +496,6 @@ GLOBAL(stub32_clone) > > ALIGN > ia32_ptregs_common: > - popq %r11 > CFI_ENDPROC > CFI_STARTPROC32 simple > CFI_SIGNAL_FRAME > @@ -507,9 +510,9 @@ ia32_ptregs_common: > /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ > CFI_REL_OFFSET rsp,RSP-ARGOFFSET > /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ > - SAVE_REST > + SAVE_EXTRA_REGS 8 > call *%rax > - RESTORE_REST > - jmp ia32_sysret /* misbalances the return cache */ > + RESTORE_EXTRA_REGS 8 > + ret > CFI_ENDPROC > END(ia32_ptregs_common) > diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h > index 3c711f2a..3835647 100644 > --- a/arch/x86/include/asm/calling.h > +++ b/arch/x86/include/asm/calling.h > @@ -55,143 +55,137 @@ For 32-bit we have the following conventions - kernel is built with > * for assembly code: > */ > > -#define R15 0 > -#define R14 8 > -#define R13 16 > -#define R12 24 > -#define RBP 32 > -#define RBX 40 > - > -/* arguments: interrupts/non tracing syscalls only save up to here: */ > -#define R11 48 > -#define R10 56 > -#define R9 64 > -#define R8 72 > -#define RAX 80 > -#define RCX 88 > -#define RDX 96 > -#define RSI 104 > -#define RDI 112 > -#define ORIG_RAX 120 /* + error_code */ > -/* end of arguments */ > - > -/* cpu exception frame or undefined in case of fast syscall: */ > -#define RIP 128 > -#define CS 136 > -#define EFLAGS 144 > -#define RSP 152 > -#define SS 160 > - > -#define ARGOFFSET R11 > - > - .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 > - subq $9*8+\addskip, %rsp > - CFI_ADJUST_CFA_OFFSET 9*8+\addskip > - movq_cfi rdi, 8*8 > - movq_cfi rsi, 7*8 > - movq_cfi rdx, 6*8 > - > - .if \save_rcx > - movq_cfi rcx, 5*8 > - .endif > +/* The layout forms the "struct pt_regs" on the stack: */ > +/* > + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry > + * unless syscall needs a complete, fully filled "struct pt_regs". > + */ > +#define R15 0*8 > +#define R14 1*8 > +#define R13 2*8 > +#define R12 3*8 > +#define RBP 4*8 > +#define RBX 5*8 > +/* These regs are callee-clobbered. Always saved on kernel entry. */ > +#define R11 6*8 > +#define R10 7*8 > +#define R9 8*8 > +#define R8 9*8 > +#define RAX 10*8 > +#define RCX 11*8 > +#define RDX 12*8 > +#define RSI 13*8 > +#define RDI 14*8 > +/* > + * On syscall entry, this is syscall#. On CPU exception, this is error code. > + * On hw interrupt, it's IRQ number: > + */ > +#define ORIG_RAX 15*8 > +/* Return frame for iretq */ > +#define RIP 16*8 > +#define CS 17*8 > +#define EFLAGS 18*8 > +#define RSP 19*8 > +#define SS 20*8 > + > +#define ARGOFFSET 0 > + > + .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 > + subq $15*8+\addskip, %rsp > + CFI_ADJUST_CFA_OFFSET 15*8+\addskip > + .endm > > - .if \rax_enosys > - movq $-ENOSYS, 4*8(%rsp) > - .else > - movq_cfi rax, 4*8 > + .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8plus=1 > + .if \r8plus > + movq_cfi r11, 6*8+\offset > + movq_cfi r10, 7*8+\offset > + movq_cfi r9, 8*8+\offset > + movq_cfi r8, 9*8+\offset > .endif > - > - .if \save_r891011 > - movq_cfi r8, 3*8 > - movq_cfi r9, 2*8 > - movq_cfi r10, 1*8 > - movq_cfi r11, 0*8 > + .if \rax > + movq_cfi rax, 10*8+\offset > + .endif > + .if \rcx > + movq_cfi rcx, 11*8+\offset > .endif > + movq_cfi rdx, 12*8+\offset > + movq_cfi rsi, 13*8+\offset > + movq_cfi rdi, 14*8+\offset > + .endm > + .macro SAVE_C_REGS offset=0 > + SAVE_C_REGS_HELPER \offset, 1, 1, 1 > + .endm > + .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 > + SAVE_C_REGS_HELPER \offset, 0, 0, 1 > + .endm > + .macro SAVE_C_REGS_EXCEPT_R891011 > + SAVE_C_REGS_HELPER 0, 1, 1, 0 > + .endm > + .macro SAVE_C_REGS_EXCEPT_RCX_R891011 > + SAVE_C_REGS_HELPER 0, 1, 0, 0 > + .endm > > + .macro SAVE_EXTRA_REGS offset=0 > + movq_cfi r15, 0*8+\offset > + movq_cfi r14, 1*8+\offset > + movq_cfi r13, 2*8+\offset > + movq_cfi r12, 3*8+\offset > + movq_cfi rbp, 4*8+\offset > + movq_cfi rbx, 5*8+\offset > + .endm > + .macro SAVE_EXTRA_REGS_RBP offset=0 > + movq_cfi rbp, 4*8+\offset > .endm > > -#define ARG_SKIP (9*8) > + .macro RESTORE_EXTRA_REGS offset=0 > + movq_cfi_restore 0*8+\offset, r15 > + movq_cfi_restore 1*8+\offset, r14 > + movq_cfi_restore 2*8+\offset, r13 > + movq_cfi_restore 3*8+\offset, r12 > + movq_cfi_restore 4*8+\offset, rbp > + movq_cfi_restore 5*8+\offset, rbx > + .endm > > - .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ > - rstor_r8910=1, rstor_rdx=1 > + .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 > .if \rstor_r11 > - movq_cfi_restore 0*8, r11 > + movq_cfi_restore 6*8, r11 > .endif > - > .if \rstor_r8910 > - movq_cfi_restore 1*8, r10 > - movq_cfi_restore 2*8, r9 > - movq_cfi_restore 3*8, r8 > + movq_cfi_restore 7*8, r10 > + movq_cfi_restore 8*8, r9 > + movq_cfi_restore 9*8, r8 > .endif > - > .if \rstor_rax > - movq_cfi_restore 4*8, rax > + movq_cfi_restore 10*8, rax > .endif > - > .if \rstor_rcx > - movq_cfi_restore 5*8, rcx > + movq_cfi_restore 11*8, rcx > .endif > - > .if \rstor_rdx > - movq_cfi_restore 6*8, rdx > - .endif > - > - movq_cfi_restore 7*8, rsi > - movq_cfi_restore 8*8, rdi > - > - .if ARG_SKIP+\addskip > 0 > - addq $ARG_SKIP+\addskip, %rsp > - CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) > + movq_cfi_restore 12*8, rdx > .endif > + movq_cfi_restore 13*8, rsi > + movq_cfi_restore 14*8, rdi > .endm > - > - .macro LOAD_ARGS offset, skiprax=0 > - movq \offset(%rsp), %r11 > - movq \offset+8(%rsp), %r10 > - movq \offset+16(%rsp), %r9 > - movq \offset+24(%rsp), %r8 > - movq \offset+40(%rsp), %rcx > - movq \offset+48(%rsp), %rdx > - movq \offset+56(%rsp), %rsi > - movq \offset+64(%rsp), %rdi > - .if \skiprax > - .else > - movq \offset+72(%rsp), %rax > - .endif > + .macro RESTORE_C_REGS > + RESTORE_C_REGS_HELPER 1,1,1,1,1 > .endm > - > -#define REST_SKIP (6*8) > - > - .macro SAVE_REST > - subq $REST_SKIP, %rsp > - CFI_ADJUST_CFA_OFFSET REST_SKIP > - movq_cfi rbx, 5*8 > - movq_cfi rbp, 4*8 > - movq_cfi r12, 3*8 > - movq_cfi r13, 2*8 > - movq_cfi r14, 1*8 > - movq_cfi r15, 0*8 > + .macro RESTORE_C_REGS_EXCEPT_RAX > + RESTORE_C_REGS_HELPER 0,1,1,1,1 > .endm > - > - .macro RESTORE_REST > - movq_cfi_restore 0*8, r15 > - movq_cfi_restore 1*8, r14 > - movq_cfi_restore 2*8, r13 > - movq_cfi_restore 3*8, r12 > - movq_cfi_restore 4*8, rbp > - movq_cfi_restore 5*8, rbx > - addq $REST_SKIP, %rsp > - CFI_ADJUST_CFA_OFFSET -(REST_SKIP) > + .macro RESTORE_C_REGS_EXCEPT_RCX > + RESTORE_C_REGS_HELPER 1,0,1,1,1 > .endm > - > - .macro SAVE_ALL > - SAVE_ARGS > - SAVE_REST > + .macro RESTORE_RSI_RDI > + RESTORE_C_REGS_HELPER 0,0,0,0,0 > + .endm > + .macro RESTORE_RSI_RDI_RDX > + RESTORE_C_REGS_HELPER 0,0,0,0,1 > .endm > > - .macro RESTORE_ALL addskip=0 > - RESTORE_REST > - RESTORE_ARGS 1, \addskip > + .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 > + addq $15*8+\addskip, %rsp > + CFI_ADJUST_CFA_OFFSET -(15*8+\addskip) > .endm > > .macro icebp > diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h > index 0a8b519..021bee9 100644 > --- a/arch/x86/include/asm/irqflags.h > +++ b/arch/x86/include/asm/irqflags.h > @@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void) > #define ARCH_LOCKDEP_SYS_EXIT_IRQ \ > TRACE_IRQS_ON; \ > sti; \ > - SAVE_REST; \ > + SAVE_EXTRA_REGS; \ > LOCKDEP_SYS_EXIT; \ > - RESTORE_REST; \ > + RESTORE_EXTRA_REGS; \ > cli; \ > TRACE_IRQS_OFF; > > diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h > index 7b0a55a..ad115bf 100644 > --- a/arch/x86/include/uapi/asm/ptrace-abi.h > +++ b/arch/x86/include/uapi/asm/ptrace-abi.h > @@ -49,7 +49,6 @@ > #define EFLAGS 144 > #define RSP 152 > #define SS 160 > -#define ARGOFFSET R11 > #endif /* __ASSEMBLY__ */ > > /* top of stack page */ > diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S > index ac542ac..45bdd26 100644 > --- a/arch/x86/kernel/entry_64.S > +++ b/arch/x86/kernel/entry_64.S > @@ -26,12 +26,6 @@ > * Some macro usage: > * - CFI macros are used to generate dwarf2 unwind information for better > * backtraces. They don't change any code. > - * - SAVE_ALL/RESTORE_ALL - Save/restore all registers > - * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. > - * There are unfortunately lots of special cases where some registers > - * not touched. The macro is a big mess that should be cleaned up. > - * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. > - * Gives a full stack frame. > * - ENTRY/END Define functions in the symbol table. > * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack > * frame that is otherwise undefined after a SYSCALL > @@ -190,9 +184,9 @@ ENDPROC(native_usergs_sysret64) > .endm > > /* > - * frame that enables calling into C. > + * frame that enables passing a complete pt_regs to a C function. > */ > - .macro PARTIAL_FRAME start=1 offset=0 > + .macro DEFAULT_FRAME start=1 offset=0 > XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET > CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET > CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET > @@ -203,13 +197,6 @@ ENDPROC(native_usergs_sysret64) > CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET > CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET > CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET > - .endm > - > -/* > - * frame that enables passing a complete pt_regs to a C function. > - */ > - .macro DEFAULT_FRAME start=1 offset=0 > - PARTIAL_FRAME \start, R11+\offset-R15 > CFI_REL_OFFSET rbx, RBX+\offset > CFI_REL_OFFSET rbp, RBP+\offset > CFI_REL_OFFSET r12, R12+\offset > @@ -221,21 +208,8 @@ ENDPROC(native_usergs_sysret64) > ENTRY(save_paranoid) > XCPT_FRAME 1 RDI+8 > cld > - movq %rdi, RDI+8(%rsp) > - movq %rsi, RSI+8(%rsp) > - movq_cfi rdx, RDX+8 > - movq_cfi rcx, RCX+8 > - movq_cfi rax, RAX+8 > - movq %r8, R8+8(%rsp) > - movq %r9, R9+8(%rsp) > - movq %r10, R10+8(%rsp) > - movq %r11, R11+8(%rsp) > - movq_cfi rbx, RBX+8 > - movq %rbp, RBP+8(%rsp) > - movq %r12, R12+8(%rsp) > - movq %r13, R13+8(%rsp) > - movq %r14, R14+8(%rsp) > - movq %r15, R15+8(%rsp) > + SAVE_C_REGS 8 > + SAVE_EXTRA_REGS 8 > movl $1,%ebx > movl $MSR_GS_BASE,%ecx > rdmsr > @@ -264,7 +238,7 @@ ENTRY(ret_from_fork) > > GET_THREAD_INFO(%rcx) > > - RESTORE_REST > + RESTORE_EXTRA_REGS > > testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? > jz 1f > @@ -276,12 +250,10 @@ ENTRY(ret_from_fork) > jmp ret_from_sys_call # go to the SYSRET fastpath > > 1: > - subq $REST_SKIP, %rsp # leave space for volatiles > - CFI_ADJUST_CFA_OFFSET REST_SKIP > movq %rbp, %rdi > call *%rbx > movl $0, RAX(%rsp) > - RESTORE_REST > + RESTORE_EXTRA_REGS > jmp int_ret_from_sys_call > CFI_ENDPROC > END(ret_from_fork) > @@ -339,9 +311,11 @@ GLOBAL(system_call_after_swapgs) > * and short: > */ > ENABLE_INTERRUPTS(CLBR_NONE) > - SAVE_ARGS 8, 0, rax_enosys=1 > + ALLOC_PT_GPREGS_ON_STACK 8 > + SAVE_C_REGS_EXCEPT_RAX_RCX > + movq $-ENOSYS,RAX-ARGOFFSET(%rsp) > movq_cfi rax,(ORIG_RAX-ARGOFFSET) > - movq %rcx,RIP-ARGOFFSET(%rsp) > + movq %rcx,RIP-ARGOFFSET(%rsp) > CFI_REL_OFFSET rip,RIP-ARGOFFSET > testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) > jnz tracesys > @@ -372,9 +346,9 @@ ret_from_sys_call: > * sysretq will re-enable interrupts: > */ > TRACE_IRQS_ON > + RESTORE_C_REGS_EXCEPT_RCX > movq RIP-ARGOFFSET(%rsp),%rcx > CFI_REGISTER rip,rcx > - RESTORE_ARGS 1,-ARG_SKIP,0 > /*CFI_REGISTER rflags,r11*/ > movq PER_CPU_VAR(old_rsp), %rsp > USERGS_SYSRET64 > @@ -387,16 +361,16 @@ int_ret_from_sys_call_fixup: > > /* Do syscall tracing */ > tracesys: > - leaq -REST_SKIP(%rsp), %rdi > + movq %rsp, %rdi > movq $AUDIT_ARCH_X86_64, %rsi > call syscall_trace_enter_phase1 > test %rax, %rax > jnz tracesys_phase2 /* if needed, run the slow path */ > - LOAD_ARGS 0 /* else restore clobbered regs */ > + RESTORE_C_REGS /* else restore clobbered regs */ > jmp system_call_fastpath /* and return to the fast path */ > > tracesys_phase2: > - SAVE_REST > + SAVE_EXTRA_REGS > FIXUP_TOP_OF_STACK %rdi > movq %rsp, %rdi > movq $AUDIT_ARCH_X86_64, %rsi > @@ -408,8 +382,8 @@ tracesys_phase2: > * We don't reload %rax because syscall_trace_entry_phase2() returned > * the value it wants us to use in the table lookup. > */ > - LOAD_ARGS ARGOFFSET, 1 > - RESTORE_REST > + RESTORE_C_REGS_EXCEPT_RAX > + RESTORE_EXTRA_REGS > #if __SYSCALL_MASK == ~0 > cmpq $__NR_syscall_max,%rax > #else > @@ -460,7 +434,7 @@ int_very_careful: > TRACE_IRQS_ON > ENABLE_INTERRUPTS(CLBR_NONE) > int_check_syscall_exit_work: > - SAVE_REST > + SAVE_EXTRA_REGS > /* Check for syscall exit trace */ > testl $_TIF_WORK_SYSCALL_EXIT,%edx > jz int_signal > @@ -479,7 +453,7 @@ int_signal: > call do_notify_resume > 1: movl $_TIF_WORK_MASK,%edi > int_restore_rest: > - RESTORE_REST > + RESTORE_EXTRA_REGS > DISABLE_INTERRUPTS(CLBR_NONE) > TRACE_IRQS_OFF > jmp int_with_check > @@ -489,15 +463,12 @@ END(system_call) > .macro FORK_LIKE func > ENTRY(stub_\func) > CFI_STARTPROC > - popq %r11 /* save return address */ > - PARTIAL_FRAME 0 > - SAVE_REST > - pushq %r11 /* put it back on stack */ > + DEFAULT_FRAME 0, 8 /* offset 8: return address */ > + SAVE_EXTRA_REGS 8 > FIXUP_TOP_OF_STACK %r11, 8 > - DEFAULT_FRAME 0 8 /* offset 8: return address */ > call sys_\func > RESTORE_TOP_OF_STACK %r11, 8 > - ret $REST_SKIP /* pop extended registers */ > + ret > CFI_ENDPROC > END(stub_\func) > .endm > @@ -505,7 +476,7 @@ END(stub_\func) > .macro FIXED_FRAME label,func > ENTRY(\label) > CFI_STARTPROC > - PARTIAL_FRAME 0 8 /* offset 8: return address */ > + DEFAULT_FRAME 0, 8 /* offset 8: return address */ > FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET > call \func > RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET > @@ -522,12 +493,12 @@ END(\label) > ENTRY(stub_execve) > CFI_STARTPROC > addq $8, %rsp > - PARTIAL_FRAME 0 > - SAVE_REST > + DEFAULT_FRAME 0 > + SAVE_EXTRA_REGS > FIXUP_TOP_OF_STACK %r11 > call sys_execve > movq %rax,RAX(%rsp) > - RESTORE_REST > + RESTORE_EXTRA_REGS > jmp int_ret_from_sys_call > CFI_ENDPROC > END(stub_execve) > @@ -535,13 +506,13 @@ END(stub_execve) > ENTRY(stub_execveat) > CFI_STARTPROC > addq $8, %rsp > - PARTIAL_FRAME 0 > - SAVE_REST > + DEFAULT_FRAME 0 > + SAVE_EXTRA_REGS > FIXUP_TOP_OF_STACK %r11 > call sys_execveat > RESTORE_TOP_OF_STACK %r11 > movq %rax,RAX(%rsp) > - RESTORE_REST > + RESTORE_EXTRA_REGS > jmp int_ret_from_sys_call > CFI_ENDPROC > END(stub_execveat) > @@ -553,12 +524,12 @@ END(stub_execveat) > ENTRY(stub_rt_sigreturn) > CFI_STARTPROC > addq $8, %rsp > - PARTIAL_FRAME 0 > - SAVE_REST > + DEFAULT_FRAME 0 > + SAVE_EXTRA_REGS > FIXUP_TOP_OF_STACK %r11 > call sys_rt_sigreturn > movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer > - RESTORE_REST > + RESTORE_EXTRA_REGS > jmp int_ret_from_sys_call > CFI_ENDPROC > END(stub_rt_sigreturn) > @@ -567,12 +538,12 @@ END(stub_rt_sigreturn) > ENTRY(stub_x32_rt_sigreturn) > CFI_STARTPROC > addq $8, %rsp > - PARTIAL_FRAME 0 > - SAVE_REST > + DEFAULT_FRAME 0 > + SAVE_EXTRA_REGS > FIXUP_TOP_OF_STACK %r11 > call sys32_x32_rt_sigreturn > movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer > - RESTORE_REST > + RESTORE_EXTRA_REGS > jmp int_ret_from_sys_call > CFI_ENDPROC > END(stub_x32_rt_sigreturn) > @@ -580,13 +551,13 @@ END(stub_x32_rt_sigreturn) > ENTRY(stub_x32_execve) > CFI_STARTPROC > addq $8, %rsp > - PARTIAL_FRAME 0 > - SAVE_REST > + DEFAULT_FRAME 0 > + SAVE_EXTRA_REGS > FIXUP_TOP_OF_STACK %r11 > call compat_sys_execve > RESTORE_TOP_OF_STACK %r11 > movq %rax,RAX(%rsp) > - RESTORE_REST > + RESTORE_EXTRA_REGS > jmp int_ret_from_sys_call > CFI_ENDPROC > END(stub_x32_execve) > @@ -594,13 +565,13 @@ END(stub_x32_execve) > ENTRY(stub_x32_execveat) > CFI_STARTPROC > addq $8, %rsp > - PARTIAL_FRAME 0 > - SAVE_REST > + DEFAULT_FRAME 0 > + SAVE_EXTRA_REGS > FIXUP_TOP_OF_STACK %r11 > call compat_sys_execveat > RESTORE_TOP_OF_STACK %r11 > movq %rax,RAX(%rsp) > - RESTORE_REST > + RESTORE_EXTRA_REGS > jmp int_ret_from_sys_call > CFI_ENDPROC > END(stub_x32_execveat) > @@ -656,42 +627,28 @@ END(interrupt) > > /* 0(%rsp): ~(interrupt number) */ > .macro interrupt func > - /* reserve pt_regs for scratch regs and rbp */ > - subq $ORIG_RAX-RBP, %rsp > - CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP > cld > - /* start from rbp in pt_regs and jump over */ > - movq_cfi rdi, (RDI-RBP) > - movq_cfi rsi, (RSI-RBP) > - movq_cfi rdx, (RDX-RBP) > - movq_cfi rcx, (RCX-RBP) > - movq_cfi rax, (RAX-RBP) > - movq_cfi r8, (R8-RBP) > - movq_cfi r9, (R9-RBP) > - movq_cfi r10, (R10-RBP) > - movq_cfi r11, (R11-RBP) > - > - /* Save rbp so that we can unwind from get_irq_regs() */ > - movq_cfi rbp, 0 > - > - /* Save previous stack value */ > - movq %rsp, %rsi > + ALLOC_PT_GPREGS_ON_STACK -RBP > + SAVE_C_REGS -RBP > + /* this goes to 0(%rsp) for unwinder, not for saving the value: */ > + SAVE_EXTRA_REGS_RBP -RBP > + > + leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ > > - leaq -RBP(%rsp),%rdi /* arg1 for handler */ > - testl $3, CS-RBP(%rsi) > + testl $3, CS-RBP(%rsp) > je 1f > SWAPGS > +1: > /* > * irq_count is used to check if a CPU is already on an interrupt stack > * or not. While this is essentially redundant with preempt_count it is > * a little cheaper to use a separate counter in the PDA (short of > * moving irq_enter into assembly, which would be too much work) > */ > -1: incl PER_CPU_VAR(irq_count) > + movq %rsp, %rsi > + incl PER_CPU_VAR(irq_count) > cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp > CFI_DEF_CFA_REGISTER rsi > - > - /* Store previous stack value */ > pushq %rsi > CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ > 0x77 /* DW_OP_breg7 */, 0, \ > @@ -800,7 +757,8 @@ retint_swapgs: /* return to user-space */ > */ > irq_return_via_sysret: > CFI_REMEMBER_STATE > - RESTORE_ARGS 1,8,1 > + RESTORE_C_REGS > + REMOVE_PT_GPREGS_FROM_STACK 8 > movq (RSP-RIP)(%rsp),%rsp > USERGS_SYSRET64 > CFI_RESTORE_STATE > @@ -816,7 +774,8 @@ retint_restore_args: /* return to kernel space */ > */ > TRACE_IRQS_IRETQ > restore_args: > - RESTORE_ARGS 1,8,1 > + RESTORE_C_REGS > + REMOVE_PT_GPREGS_FROM_STACK 8 > > irq_return: > INTERRUPT_RETURN > @@ -887,12 +846,12 @@ retint_signal: > jz retint_swapgs > TRACE_IRQS_ON > ENABLE_INTERRUPTS(CLBR_NONE) > - SAVE_REST > + SAVE_EXTRA_REGS > movq $-1,ORIG_RAX(%rsp) > xorl %esi,%esi # oldset > movq %rsp,%rdi # &pt_regs > call do_notify_resume > - RESTORE_REST > + RESTORE_EXTRA_REGS > DISABLE_INTERRUPTS(CLBR_NONE) > TRACE_IRQS_OFF > GET_THREAD_INFO(%rcx) > @@ -1019,8 +978,7 @@ ENTRY(\sym) > pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ > .endif > > - subq $ORIG_RAX-R15, %rsp > - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 > + ALLOC_PT_GPREGS_ON_STACK > > .if \paranoid > .if \paranoid == 1 > @@ -1266,7 +1224,9 @@ ENTRY(xen_failsafe_callback) > addq $0x30,%rsp > CFI_ADJUST_CFA_OFFSET -0x30 > pushq_cfi $-1 /* orig_ax = -1 => not a system call */ > - SAVE_ALL > + ALLOC_PT_GPREGS_ON_STACK > + SAVE_C_REGS > + SAVE_EXTRA_REGS > jmp error_exit > CFI_ENDPROC > END(xen_failsafe_callback) > @@ -1318,11 +1278,15 @@ ENTRY(paranoid_exit) > jnz paranoid_restore > TRACE_IRQS_IRETQ 0 > SWAPGS_UNSAFE_STACK > - RESTORE_ALL 8 > + RESTORE_EXTRA_REGS > + RESTORE_C_REGS > + REMOVE_PT_GPREGS_FROM_STACK 8 > INTERRUPT_RETURN > paranoid_restore: > TRACE_IRQS_IRETQ_DEBUG 0 > - RESTORE_ALL 8 > + RESTORE_EXTRA_REGS > + RESTORE_C_REGS > + REMOVE_PT_GPREGS_FROM_STACK 8 > INTERRUPT_RETURN > CFI_ENDPROC > END(paranoid_exit) > @@ -1336,21 +1300,8 @@ ENTRY(error_entry) > CFI_ADJUST_CFA_OFFSET 15*8 > /* oldrax contains error code */ > cld > - movq %rdi, RDI+8(%rsp) > - movq %rsi, RSI+8(%rsp) > - movq %rdx, RDX+8(%rsp) > - movq %rcx, RCX+8(%rsp) > - movq %rax, RAX+8(%rsp) > - movq %r8, R8+8(%rsp) > - movq %r9, R9+8(%rsp) > - movq %r10, R10+8(%rsp) > - movq %r11, R11+8(%rsp) > - movq_cfi rbx, RBX+8 > - movq %rbp, RBP+8(%rsp) > - movq %r12, R12+8(%rsp) > - movq %r13, R13+8(%rsp) > - movq %r14, R14+8(%rsp) > - movq %r15, R15+8(%rsp) > + SAVE_C_REGS 8 > + SAVE_EXTRA_REGS 8 > xorl %ebx,%ebx > testl $3,CS+8(%rsp) > je error_kernelspace > @@ -1399,7 +1350,7 @@ END(error_entry) > ENTRY(error_exit) > DEFAULT_FRAME > movl %ebx,%eax > - RESTORE_REST > + RESTORE_EXTRA_REGS > DISABLE_INTERRUPTS(CLBR_NONE) > TRACE_IRQS_OFF > GET_THREAD_INFO(%rcx) > @@ -1618,8 +1569,8 @@ end_repeat_nmi: > * so that we repeat another NMI. > */ > pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ > - subq $ORIG_RAX-R15, %rsp > - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 > + ALLOC_PT_GPREGS_ON_STACK > + > /* > * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit > * as we should not be calling schedule in NMI context. > @@ -1658,8 +1609,10 @@ end_repeat_nmi: > nmi_swapgs: > SWAPGS_UNSAFE_STACK > nmi_restore: > + RESTORE_EXTRA_REGS > + RESTORE_C_REGS > /* Pop the extra iret frame at once */ > - RESTORE_ALL 6*8 > + REMOVE_PT_GPREGS_FROM_STACK 6*8 > > /* Clear the NMI executing stack variable */ > movq $0, 5*8(%rsp) > -- > 1.8.1.4 > -- Andy Lutomirski AMA Capital Management, LLC -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/