Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753717AbeAKAy1 (ORCPT + 1 other); Wed, 10 Jan 2018 19:54:27 -0500 Received: from mail-it0-f65.google.com ([209.85.214.65]:45161 "EHLO mail-it0-f65.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753014AbeAKAy0 (ORCPT ); Wed, 10 Jan 2018 19:54:26 -0500 X-Google-Smtp-Source: ACJfBouNXPU5rjWQ61XhlrrpeR5VUXNfxlcl5MHcLX8Lq8v4rsXtOkeB1B1kVfKKgGJbKuxuERu3IDzFaIVhisv6rxo= MIME-Version: 1.0 In-Reply-To: <20180111001626.m5cgtngkmoskuhyh@two.firstfloor.org> References: <20180110010328.22163-1-andi@firstfloor.org> <20180110010328.22163-2-andi@firstfloor.org> <20180111001626.m5cgtngkmoskuhyh@two.firstfloor.org> From: Brian Gerst Date: Wed, 10 Jan 2018 19:54:24 -0500 Message-ID: Subject: Re: [PATCH v1 1/8] x86/entry/clearregs: Remove partial stack frame in fast system call To: Andi Kleen Cc: Thomas Gleixner , "the arch/x86 maintainers" , Linux Kernel Mailing List , Linus Torvalds , David Woodhouse , Paul Turner , Andy Lutomirski , Peter Zijlstra , Tom Lendacky , Tim Chen , Greg Kroah-Hartman , Dave Hansen , Jiri Kosina , Andi Kleen Content-Type: text/plain; charset="UTF-8" Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: On Wed, Jan 10, 2018 at 7:16 PM, Andi Kleen wrote: > On Tue, Jan 09, 2018 at 09:46:16PM -0500, Brian Gerst wrote: >> On Tue, Jan 9, 2018 at 8:03 PM, Andi Kleen wrote: >> > From: Andi Kleen >> > >> > Remove the partial stack frame in the 64bit syscall fast path. >> > In the next patch we want to clear the extra registers, which requires >> > to always save all registers. So remove the partial stack frame >> > in the syscall fast path and always save everything. >> > >> > This actually simplifies the code because the ptregs stubs >> > are not needed anymore. >> > >> > arch/x86/entry/entry_64.S | 57 ++++----------------------------------------------------- >> > arch/x86/entry/syscall_64.c | 2 +- >> > >> > Signed-off-by: Andi Kleen >> > --- >> > arch/x86/entry/entry_64.S | 57 ++++----------------------------------------- >> > arch/x86/entry/syscall_64.c | 2 +- >> > 2 files changed, 5 insertions(+), 54 deletions(-) >> > >> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S >> > index 58dbf7a12a05..bbdfbdd817d6 100644 >> > --- a/arch/x86/entry/entry_64.S >> > +++ b/arch/x86/entry/entry_64.S >> > @@ -234,7 +234,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) >> > pushq %r9 /* pt_regs->r9 */ >> > pushq %r10 /* pt_regs->r10 */ >> > pushq %r11 /* pt_regs->r11 */ >> > - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ >> > + sub $(6*8), %rsp >> > + SAVE_EXTRA_REGS >> > + >> >> Continue using pushes here >> >> > UNWIND_HINT_REGS extra=0 >> > >> > /* >> > @@ -262,11 +264,6 @@ entry_SYSCALL_64_fastpath: >> > ja 1f /* return -ENOSYS (already in pt_regs->ax) */ >> > movq %r10, %rcx >> > >> > - /* >> > - * This call instruction is handled specially in stub_ptregs_64. >> > - * It might end up jumping to the slow path. If it jumps, RAX >> > - * and all argument registers are clobbered. >> > - */ >> > #ifdef CONFIG_RETPOLINE >> > movq sys_call_table(, %rax, 8), %rax >> > call __x86_indirect_thunk_rax >> > @@ -293,9 +290,7 @@ entry_SYSCALL_64_fastpath: >> > TRACE_IRQS_ON /* user mode is traced as IRQs on */ >> > movq RIP(%rsp), %rcx >> > movq EFLAGS(%rsp), %r11 >> > - addq $6*8, %rsp /* skip extra regs -- they were preserved */ >> > - UNWIND_HINT_EMPTY >> > - jmp .Lpop_c_regs_except_rcx_r11_and_sysret >> > + jmp syscall_return_via_sysret >> > >> > 1: >> > /* >> > @@ -305,14 +300,12 @@ entry_SYSCALL_64_fastpath: >> > */ >> > TRACE_IRQS_ON >> > ENABLE_INTERRUPTS(CLBR_ANY) >> > - SAVE_EXTRA_REGS >> > movq %rsp, %rdi >> > call syscall_return_slowpath /* returns with IRQs disabled */ >> > jmp return_from_SYSCALL_64 >> > >> > entry_SYSCALL64_slow_path: >> > /* IRQs are off. */ >> > - SAVE_EXTRA_REGS >> > movq %rsp, %rdi >> > call do_syscall_64 /* returns with IRQs disabled */ >> > >> > @@ -389,7 +382,6 @@ syscall_return_via_sysret: >> > /* rcx and r11 are already restored (see code above) */ >> > UNWIND_HINT_EMPTY >> > POP_EXTRA_REGS >> > -.Lpop_c_regs_except_rcx_r11_and_sysret: >> > popq %rsi /* skip r11 */ >> > popq %r10 >> > popq %r9 >> > @@ -420,47 +412,6 @@ syscall_return_via_sysret: >> > USERGS_SYSRET64 >> > END(entry_SYSCALL_64) >> > >> > -ENTRY(stub_ptregs_64) >> > - /* >> > - * Syscalls marked as needing ptregs land here. >> > - * If we are on the fast path, we need to save the extra regs, >> > - * which we achieve by trying again on the slow path. If we are on >> > - * the slow path, the extra regs are already saved. >> > - * >> > - * RAX stores a pointer to the C function implementing the syscall. >> > - * IRQs are on. >> > - */ >> > - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) >> > - jne 1f >> > - >> > - /* >> > - * Called from fast path -- disable IRQs again, pop return address >> > - * and jump to slow path >> > - */ >> > - DISABLE_INTERRUPTS(CLBR_ANY) >> > - TRACE_IRQS_OFF >> > - popq %rax >> > - UNWIND_HINT_REGS extra=0 >> > - jmp entry_SYSCALL64_slow_path >> > - >> > -1: >> > - JMP_NOSPEC %rax /* Called from C */ >> > -END(stub_ptregs_64) >> > - >> > -.macro ptregs_stub func >> > -ENTRY(ptregs_\func) >> > - UNWIND_HINT_FUNC >> > - leaq \func(%rip), %rax >> > - jmp stub_ptregs_64 >> > -END(ptregs_\func) >> > -.endm >> > - >> > -/* Instantiate ptregs_stub for each ptregs-using syscall */ >> > -#define __SYSCALL_64_QUAL_(sym) >> > -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym >> > -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) >> > -#include >> > - >> >> You can't just blindly remove this. We need to make sure that >> syscalls that modify registers take the slow path exit, because they >> may change the registers to be incompatible with SYSRET. > > That's a good point. I checked the ptregs calls: > > iopl: should be fine, we will be restoring the correct IOPL through > SYSRET > clone/fork: fine too, the original return is fine and ret_from_fork > takes care of the child > execve et.al.: we will be leaking r11(rflags), rcx(orig return) into > the new process. but that seems acceptable. We still need to check if we are loading a 32-bit binary. That must return with IRET. > rt_sigreturn: that's the only one who has problems. I added a new > TIF_FULL_RESTORE to force it into the slow path. -- Brian Gerst