Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752334AbdHNCob (ORCPT ); Sun, 13 Aug 2017 22:44:31 -0400 Received: from mail-it0-f67.google.com ([209.85.214.67]:35016 "EHLO mail-it0-f67.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752263AbdHNCo3 (ORCPT ); Sun, 13 Aug 2017 22:44:29 -0400 MIME-Version: 1.0 In-Reply-To: <7c88ed36805d36841ab03ec3b48b4122c4418d71.1502164668.git.luto@kernel.org> References: <7c88ed36805d36841ab03ec3b48b4122c4418d71.1502164668.git.luto@kernel.org> From: Brian Gerst Date: Sun, 13 Aug 2017 22:44:28 -0400 Message-ID: Subject: Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries To: Andy Lutomirski Cc: X86 ML , Juergen Gross , Borislav Petkov , Linux Kernel Mailing List , "xen-devel@lists.xenproject.org" , Boris Ostrovsky , "H. Peter Anvin" Content-Type: text/plain; charset="UTF-8" Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5272 Lines: 137 On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski wrote: > Xen's raw SYSCALL entries are much less weird than native. Rather > than fudging them to look like native entries, use the Xen-provided > stack frame directly. > > This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of > the SWAPGS_UNSAFE_STACK paravirt hook. The SYSENTER code would > benefit from similar treatment. > > This makes one change to the native code path: the compat > instruction that clears the high 32 bits of %rax is moved slightly > later. I'd be surprised if this affects performance at all. > > Signed-off-by: Andy Lutomirski > --- > > Changes from v1 (which I never actually emailed): > - Fix zero-extension in the compat case. > > arch/x86/entry/entry_64.S | 9 ++------- > arch/x86/entry/entry_64_compat.S | 7 +++---- > arch/x86/xen/xen-asm_64.S | 23 +++++++++-------------- > 3 files changed, 14 insertions(+), 25 deletions(-) > > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S > index aa58155187c5..7cee92cf807f 100644 > --- a/arch/x86/entry/entry_64.S > +++ b/arch/x86/entry/entry_64.S > @@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64) > * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, > * it is too small to ever cause noticeable irq latency. > */ > - SWAPGS_UNSAFE_STACK > - /* > - * A hypervisor implementation might want to use a label > - * after the swapgs, so that it can do the swapgs > - * for the guest and jump here on syscall. > - */ > -GLOBAL(entry_SYSCALL_64_after_swapgs) > > + swapgs > movq %rsp, PER_CPU_VAR(rsp_scratch) > movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp > > @@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) > pushq %r11 /* pt_regs->flags */ > pushq $__USER_CS /* pt_regs->cs */ > pushq %rcx /* pt_regs->ip */ > +GLOBAL(entry_SYSCALL_64_after_hwframe) > pushq %rax /* pt_regs->orig_ax */ > pushq %rdi /* pt_regs->di */ > pushq %rsi /* pt_regs->si */ > diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S > index e1721dafbcb1..5314d7b8e5ad 100644 > --- a/arch/x86/entry/entry_64_compat.S > +++ b/arch/x86/entry/entry_64_compat.S > @@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat) > */ > ENTRY(entry_SYSCALL_compat) > /* Interrupts are off on entry. */ > - SWAPGS_UNSAFE_STACK > + swapgs > > /* Stash user ESP and switch to the kernel stack. */ > movl %esp, %r8d > movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp > > - /* Zero-extending 32-bit regs, do not remove */ > - movl %eax, %eax > - > /* Construct struct pt_regs on stack */ > pushq $__USER32_DS /* pt_regs->ss */ > pushq %r8 /* pt_regs->sp */ > pushq %r11 /* pt_regs->flags */ > pushq $__USER32_CS /* pt_regs->cs */ > pushq %rcx /* pt_regs->ip */ > +GLOBAL(entry_SYSCALL_compat_after_hwframe) > + movl %eax, %eax /* discard orig_ax high bits */ > pushq %rax /* pt_regs->orig_ax */ > pushq %rdi /* pt_regs->di */ > pushq %rsi /* pt_regs->si */ > diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S > index c3df43141e70..a8a4f4c460a6 100644 > --- a/arch/x86/xen/xen-asm_64.S > +++ b/arch/x86/xen/xen-asm_64.S > @@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1) > * rip > * r11 > * rsp->rcx > - * > - * In all the entrypoints, we undo all that to make it look like a > - * CPU-generated syscall/sysenter and jump to the normal entrypoint. > */ > > -.macro undo_xen_syscall > - mov 0*8(%rsp), %rcx > - mov 1*8(%rsp), %r11 > - mov 5*8(%rsp), %rsp > -.endm > - > /* Normal 64-bit system call target */ > ENTRY(xen_syscall_target) > - undo_xen_syscall > - jmp entry_SYSCALL_64_after_swapgs > + popq %rcx > + popq %r11 > + jmp entry_SYSCALL_64_after_hwframe > ENDPROC(xen_syscall_target) > > #ifdef CONFIG_IA32_EMULATION > > /* 32-bit compat syscall target */ > ENTRY(xen_syscall32_target) > - undo_xen_syscall > - jmp entry_SYSCALL_compat > + popq %rcx > + popq %r11 > + jmp entry_SYSCALL_compat_after_hwframe > ENDPROC(xen_syscall32_target) > > /* 32-bit compat sysenter target */ > ENTRY(xen_sysenter_target) > - undo_xen_syscall > + mov 0*8(%rsp), %rcx > + mov 1*8(%rsp), %r11 > + mov 5*8(%rsp), %rsp > jmp entry_SYSENTER_compat > ENDPROC(xen_sysenter_target) This patch causes the iopl_32 and ioperm_32 self-tests to fail on a 64-bit PV kernel. The 64-bit versions pass. It gets a seg fault after "parent: write to 0x80 (should fail)", and the fault isn't caught by the signal handler. It just dumps back to the shell. The tests pass after reverting this. -- Brian Gerst