Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753200AbeAKAQb (ORCPT + 1 other); Wed, 10 Jan 2018 19:16:31 -0500 Received: from one.firstfloor.org ([193.170.194.197]:54302 "EHLO one.firstfloor.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752473AbeAKAQa (ORCPT ); Wed, 10 Jan 2018 19:16:30 -0500 Date: Wed, 10 Jan 2018 16:16:28 -0800 From: Andi Kleen To: Brian Gerst Cc: Andi Kleen , Thomas Gleixner , the arch/x86 maintainers , Linux Kernel Mailing List , Linus Torvalds , David Woodhouse , Paul Turner , Andy Lutomirski , Peter Zijlstra , Tom Lendacky , Tim Chen , Greg Kroah-Hartman , Dave Hansen , Jiri Kosina , Andi Kleen Subject: Re: [PATCH v1 1/8] x86/entry/clearregs: Remove partial stack frame in fast system call Message-ID: <20180111001626.m5cgtngkmoskuhyh@two.firstfloor.org> References: <20180110010328.22163-1-andi@firstfloor.org> <20180110010328.22163-2-andi@firstfloor.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: User-Agent: NeoMutt/20170113 (1.7.2) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: On Tue, Jan 09, 2018 at 09:46:16PM -0500, Brian Gerst wrote: > On Tue, Jan 9, 2018 at 8:03 PM, Andi Kleen wrote: > > From: Andi Kleen > > > > Remove the partial stack frame in the 64bit syscall fast path. > > In the next patch we want to clear the extra registers, which requires > > to always save all registers. So remove the partial stack frame > > in the syscall fast path and always save everything. > > > > This actually simplifies the code because the ptregs stubs > > are not needed anymore. > > > > arch/x86/entry/entry_64.S | 57 ++++----------------------------------------------------- > > arch/x86/entry/syscall_64.c | 2 +- > > > > Signed-off-by: Andi Kleen > > --- > > arch/x86/entry/entry_64.S | 57 ++++----------------------------------------- > > arch/x86/entry/syscall_64.c | 2 +- > > 2 files changed, 5 insertions(+), 54 deletions(-) > > > > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S > > index 58dbf7a12a05..bbdfbdd817d6 100644 > > --- a/arch/x86/entry/entry_64.S > > +++ b/arch/x86/entry/entry_64.S > > @@ -234,7 +234,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) > > pushq %r9 /* pt_regs->r9 */ > > pushq %r10 /* pt_regs->r10 */ > > pushq %r11 /* pt_regs->r11 */ > > - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ > > + sub $(6*8), %rsp > > + SAVE_EXTRA_REGS > > + > > Continue using pushes here > > > UNWIND_HINT_REGS extra=0 > > > > /* > > @@ -262,11 +264,6 @@ entry_SYSCALL_64_fastpath: > > ja 1f /* return -ENOSYS (already in pt_regs->ax) */ > > movq %r10, %rcx > > > > - /* > > - * This call instruction is handled specially in stub_ptregs_64. > > - * It might end up jumping to the slow path. If it jumps, RAX > > - * and all argument registers are clobbered. > > - */ > > #ifdef CONFIG_RETPOLINE > > movq sys_call_table(, %rax, 8), %rax > > call __x86_indirect_thunk_rax > > @@ -293,9 +290,7 @@ entry_SYSCALL_64_fastpath: > > TRACE_IRQS_ON /* user mode is traced as IRQs on */ > > movq RIP(%rsp), %rcx > > movq EFLAGS(%rsp), %r11 > > - addq $6*8, %rsp /* skip extra regs -- they were preserved */ > > - UNWIND_HINT_EMPTY > > - jmp .Lpop_c_regs_except_rcx_r11_and_sysret > > + jmp syscall_return_via_sysret > > > > 1: > > /* > > @@ -305,14 +300,12 @@ entry_SYSCALL_64_fastpath: > > */ > > TRACE_IRQS_ON > > ENABLE_INTERRUPTS(CLBR_ANY) > > - SAVE_EXTRA_REGS > > movq %rsp, %rdi > > call syscall_return_slowpath /* returns with IRQs disabled */ > > jmp return_from_SYSCALL_64 > > > > entry_SYSCALL64_slow_path: > > /* IRQs are off. */ > > - SAVE_EXTRA_REGS > > movq %rsp, %rdi > > call do_syscall_64 /* returns with IRQs disabled */ > > > > @@ -389,7 +382,6 @@ syscall_return_via_sysret: > > /* rcx and r11 are already restored (see code above) */ > > UNWIND_HINT_EMPTY > > POP_EXTRA_REGS > > -.Lpop_c_regs_except_rcx_r11_and_sysret: > > popq %rsi /* skip r11 */ > > popq %r10 > > popq %r9 > > @@ -420,47 +412,6 @@ syscall_return_via_sysret: > > USERGS_SYSRET64 > > END(entry_SYSCALL_64) > > > > -ENTRY(stub_ptregs_64) > > - /* > > - * Syscalls marked as needing ptregs land here. > > - * If we are on the fast path, we need to save the extra regs, > > - * which we achieve by trying again on the slow path. If we are on > > - * the slow path, the extra regs are already saved. > > - * > > - * RAX stores a pointer to the C function implementing the syscall. > > - * IRQs are on. > > - */ > > - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) > > - jne 1f > > - > > - /* > > - * Called from fast path -- disable IRQs again, pop return address > > - * and jump to slow path > > - */ > > - DISABLE_INTERRUPTS(CLBR_ANY) > > - TRACE_IRQS_OFF > > - popq %rax > > - UNWIND_HINT_REGS extra=0 > > - jmp entry_SYSCALL64_slow_path > > - > > -1: > > - JMP_NOSPEC %rax /* Called from C */ > > -END(stub_ptregs_64) > > - > > -.macro ptregs_stub func > > -ENTRY(ptregs_\func) > > - UNWIND_HINT_FUNC > > - leaq \func(%rip), %rax > > - jmp stub_ptregs_64 > > -END(ptregs_\func) > > -.endm > > - > > -/* Instantiate ptregs_stub for each ptregs-using syscall */ > > -#define __SYSCALL_64_QUAL_(sym) > > -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym > > -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) > > -#include > > - > > You can't just blindly remove this. We need to make sure that > syscalls that modify registers take the slow path exit, because they > may change the registers to be incompatible with SYSRET. That's a good point. I checked the ptregs calls: iopl: should be fine, we will be restoring the correct IOPL through SYSRET clone/fork: fine too, the original return is fine and ret_from_fork takes care of the child execve et.al.: we will be leaking r11(rflags), rcx(orig return) into the new process. but that seems acceptable. rt_sigreturn: that's the only one who has problems. I added a new TIF_FULL_RESTORE to force it into the slow path. -Andi