Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754490AbbLITav (ORCPT ); Wed, 9 Dec 2015 14:30:51 -0500 Received: from mail.kernel.org ([198.145.29.136]:43192 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754441AbbLITat (ORCPT ); Wed, 9 Dec 2015 14:30:49 -0500 From: Andy Lutomirski To: Brian Gerst Cc: x86@kernel.org, Linux Kernel Mailing List , Borislav Petkov , =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= , Denys Vlasenko , Linus Torvalds , Andy Lutomirski Subject: [PATCH] x86/entry/64: Remove duplicate syscall table for fast path Date: Wed, 9 Dec 2015 11:30:45 -0800 Message-Id: <5c485c72545e8a3f803bd65238560026f794e80a.1449689202.git.luto@kernel.org> X-Mailer: git-send-email 2.5.0 In-Reply-To: <1449666173-15366-1-git-send-email-brgerst@gmail.com> References: <1449666173-15366-1-git-send-email-brgerst@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5314 Lines: 153 Instead of using a duplicate syscall table for the fast path, create stubs for the syscalls that need pt_regs that dispatch based on the call site. I think that this is very likely to introduce a mis-predicted branch in all such syscalls. I think that's fine -- all of them are already very slow. Heavily based on a patch from Brian Gerst [1]. [1] http://lkml.kernel.org/g/1449666173-15366-1-git-send-email-brgerst@gmail.com Signed-off-by: Brian Gerst Cc: the arch/x86 maintainers Cc: Linux Kernel Mailing List Cc: Borislav Petkov Cc: Frédéric Weisbecker Cc: Denys Vlasenko Cc: Linus Torvalds Signed-off-by: Andy Lutomirski --- Brian, here's a counter-proposal. It's derived from your patch, but it works differently. If people like this, I'll send a new version of the whole series that includes it at the end. arch/x86/entry/entry_64.S | 49 ++++++++++++++++++++++++++++++++++++++------- arch/x86/entry/syscall_64.c | 25 +++++------------------ 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1ab5362f241d..16779b52419e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -188,7 +188,15 @@ entry_SYSCALL_64_fastpath: #endif ja 1f /* return -ENOSYS (already in pt_regs->ax) */ movq %r10, %rcx - call *sys_call_table_fastpath_64(, %rax, 8) + + /* + * This call instruction is handled specially in stub_ptregs_64. + * It might end up jumping to the slow path. If it jumps, rax and + * r11 are clobbered. + */ + call *sys_call_table(, %rax, 8) +.Lentry_SYSCALL_64_after_fastpath_call: + movq %rax, RAX(%rsp) 1: @@ -306,15 +314,42 @@ END(entry_SYSCALL_64) ENTRY(stub_ptregs_64) /* - * Syscalls marked as needing ptregs that go through the fast path - * land here. We transfer to the slow path. + * Syscalls marked as needing ptregs land here. + * If we are on the fast path, we need to save the extra regs. + * If we are on the slow path, the extra regs are already saved. + * + * RAX stores a pointer to the C function implementing the syscall. + * + * We can safely clobber RAX (clobbered by return value regardless) + * and R11 (owned by callee and never stores an argument) regardless + * of which path we take. */ - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - addq $8, %rsp - jmp entry_SYSCALL64_slow_path + leaq .Lentry_SYSCALL_64_after_fastpath_call(%rip), %r11 + cmpq %r11, (%rsp) + jne 1f + + /* Called from fast path -- pop return address and jump to slow path */ + popq %rax + jmp entry_SYSCALL64_slow_path /* called from fast path */ + +1: + /* Called from C */ + jmp *%rax /* called from C */ END(stub_ptregs_64) +.macro ptregs_stub func +ENTRY(ptregs_\func) + leaq \func(%rip), %rax + jmp stub_ptregs_64 +END(ptregs_\func) +.endm + +/* Instantiate ptregs_stub for each ptregs-using syscall */ +#define __SYSCALL_64_QUAL_(sym) +#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym +#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) +#include + /* * A newly forked process directly context switches into this address. * diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 601745c667ce..9dbc5abb6162 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -6,11 +6,14 @@ #include #include -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; +#define __SYSCALL_64_QUAL_(sym) sym +#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym + +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #include #undef __SYSCALL_64 -#define __SYSCALL_64(nr, sym, qual) [nr] = sym, +#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); @@ -22,21 +25,3 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { [0 ... __NR_syscall_max] = &sys_ni_syscall, #include }; - -#undef __SYSCALL_64 - -extern long stub_ptregs_64(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); - -#define __SYSCALL_64_QUAL_(nr, sym) [nr] = sym, -#define __SYSCALL_64_QUAL_ptregs(nr, sym) [nr] = stub_ptregs_64, - -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(nr, sym) - -asmlinkage const sys_call_ptr_t sys_call_table_fastpath_64[__NR_syscall_max+1] = { - /* - * Smells like a compiler bug -- it doesn't work - * when the & below is removed. - */ - [0 ... __NR_syscall_max] = &sys_ni_syscall, -#include -}; -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/