From: Andy Lutomirski <luto@kernel.org>
To: Brian Gerst <brgerst@gmail.com>
Cc: x86@kernel.org, Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
        Borislav Petkov <bp@alien8.de>,
        =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>,
        Denys Vlasenko <dvlasenk@redhat.com>,
        Linus Torvalds <torvalds@linux-foundation.org>,
        Andy Lutomirski <luto@kernel.org>
Subject: [PATCH] x86/entry/64: Remove duplicate syscall table for fast path
Date: Wed,  9 Dec 2015 11:30:45 -0800
Message-Id: <5c485c72545e8a3f803bd65238560026f794e80a.1449689202.git.luto@kernel.org>
In-Reply-To: <1449666173-15366-1-git-send-email-brgerst@gmail.com>
References: <1449666173-15366-1-git-send-email-brgerst@gmail.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 5314
Lines: 153

Instead of using a duplicate syscall table for the fast path, create
stubs for the syscalls that need pt_regs that dispatch based on the
call site.

I think that this is very likely to introduce a mis-predicted branch
in all such syscalls.  I think that's fine -- all of them are
already very slow.

Heavily based on a patch from Brian Gerst [1].

[1] http://lkml.kernel.org/g/1449666173-15366-1-git-send-email-brgerst@gmail.com

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Cc: the arch/x86 maintainers <x86@kernel.org>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---

Brian, here's a counter-proposal.  It's derived from your patch, but it works
differently.

If people like this, I'll send a new version of the whole series that includes
it at the end.

arch/x86/entry/entry_64.S   | 49 ++++++++++++++++++++++++++++++++++++++-------
 arch/x86/entry/syscall_64.c | 25 +++++------------------
 2 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1ab5362f241d..16779b52419e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -188,7 +188,15 @@ entry_SYSCALL_64_fastpath:
 #endif
 	ja	1f				/* return -ENOSYS (already in pt_regs->ax) */
 	movq	%r10, %rcx
-	call	*sys_call_table_fastpath_64(, %rax, 8)
+
+	/*
+	 * This call instruction is handled specially in stub_ptregs_64.
+	 * It might end up jumping to the slow path.  If it jumps, rax and
+	 * r11 are clobbered.
+	 */
+	call	*sys_call_table(, %rax, 8)
+.Lentry_SYSCALL_64_after_fastpath_call:
+
 	movq	%rax, RAX(%rsp)
 1:
 
@@ -306,15 +314,42 @@ END(entry_SYSCALL_64)
 
 ENTRY(stub_ptregs_64)
 	/*
-	 * Syscalls marked as needing ptregs that go through the fast path
-	 * land here.  We transfer to the slow path.
+	 * Syscalls marked as needing ptregs land here.
+	 * If we are on the fast path, we need to save the extra regs.
+	 * If we are on the slow path, the extra regs are already saved.
+	 *
+	 * RAX stores a pointer to the C function implementing the syscall.
+	 *
+	 * We can safely clobber RAX (clobbered by return value regardless)
+	 * and R11 (owned by callee and never stores an argument) regardless
+	 * of which path we take.
 	 */
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	addq	$8, %rsp
-	jmp	entry_SYSCALL64_slow_path
+	leaq	.Lentry_SYSCALL_64_after_fastpath_call(%rip), %r11
+	cmpq	%r11, (%rsp)
+	jne	1f
+
+	/* Called from fast path -- pop return address and jump to slow path */
+	popq	%rax
+	jmp	entry_SYSCALL64_slow_path	/* called from fast path */
+
+1:
+	/* Called from C */
+	jmp	*%rax				/* called from C */
 END(stub_ptregs_64)
 
+.macro ptregs_stub func
+ENTRY(ptregs_\func)
+	leaq	\func(%rip), %rax
+	jmp	stub_ptregs_64
+END(ptregs_\func)
+.endm
+
+/* Instantiate ptregs_stub for each ptregs-using syscall */
+#define __SYSCALL_64_QUAL_(sym)
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+#include <asm/syscalls_64.h>
+
 /*
  * A newly forked process directly context switches into this address.
  *
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 601745c667ce..9dbc5abb6162 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -6,11 +6,14 @@
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64_QUAL_(sym) sym
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
+
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_64
 
-#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
+#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
 
 extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
@@ -22,21 +25,3 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	[0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
-
-#undef __SYSCALL_64
-
-extern long stub_ptregs_64(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-
-#define __SYSCALL_64_QUAL_(nr, sym) [nr] = sym,
-#define __SYSCALL_64_QUAL_ptregs(nr, sym) [nr] = stub_ptregs_64,
-
-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(nr, sym)
-
-asmlinkage const sys_call_ptr_t sys_call_table_fastpath_64[__NR_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_syscall_max] = &sys_ni_syscall,
-#include <asm/syscalls_64.h>
-};
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/