Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933565AbbDJPpQ (ORCPT ); Fri, 10 Apr 2015 11:45:16 -0400 Received: from mail-lb0-f170.google.com ([209.85.217.170]:32903 "EHLO mail-lb0-f170.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933321AbbDJPpO (ORCPT ); Fri, 10 Apr 2015 11:45:14 -0400 MIME-Version: 1.0 In-Reply-To: <1428679988-9592-2-git-send-email-dvlasenk@redhat.com> References: <1428679988-9592-1-git-send-email-dvlasenk@redhat.com> <1428679988-9592-2-git-send-email-dvlasenk@redhat.com> From: Andy Lutomirski Date: Fri, 10 Apr 2015 08:44:52 -0700 Message-ID: Subject: Re: [PATCH] x86/asm/entry/32: Update ENOSYS handling to match 64-bit logic To: Denys Vlasenko Cc: Ingo Molnar , Linus Torvalds , Steven Rostedt , Borislav Petkov , "H. Peter Anvin" , Oleg Nesterov , Frederic Weisbecker , Alexei Starovoitov , Will Drewry , Kees Cook , X86 ML , "linux-kernel@vger.kernel.org" Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9382 Lines: 229 On Fri, Apr 10, 2015 at 8:33 AM, Denys Vlasenko wrote: > Sometime ago Andy changed 64-bit syscall logic so that pt_regs->ax is > initially set to -ENOSYS, and on exit from syscall, it is updated with > actual return value. This simplified logic there. > > This patch does the same for 32-bit syscall entry points. > > The check for %rax being too big is moved to be just before > the call insn which dispatches execution through syscall table. > There is no way to accidentally skip this check now by jumping > to a label after it. This allows to remove redundant checks > after e.g. ptrace. > > If %rax is too big, we just skip over the (call, write %rax to pt_regs->ax) > insn pair. pt_regs->ax remains set to -ENOSYS, and it gets returned > to userspace. This looks okay, but I'll read it again later today. At the very least, though, this should be tested against the seccomp test suite. --Andy > > Similar to 64-bit code, this eliminates "ia32_badsys" code path. > > Run-tested. > > Signed-off-by: Denys Vlasenko > CC: Linus Torvalds > CC: Steven Rostedt > CC: Ingo Molnar > CC: Borislav Petkov > CC: "H. Peter Anvin" > CC: Andy Lutomirski > CC: Oleg Nesterov > CC: Frederic Weisbecker > CC: Alexei Starovoitov > CC: Will Drewry > CC: Kees Cook > CC: x86@kernel.org > CC: linux-kernel@vger.kernel.org > --- > arch/x86/ia32/ia32entry.S | 44 +++++++++++++++----------------------------- > 1 file changed, 15 insertions(+), 29 deletions(-) > > diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S > index a821b1c..29ab1c2 100644 > --- a/arch/x86/ia32/ia32entry.S > +++ b/arch/x86/ia32/ia32entry.S > @@ -142,7 +142,7 @@ ENTRY(ia32_sysenter_target) > pushq_cfi_reg rsi /* pt_regs->si */ > pushq_cfi_reg rdx /* pt_regs->dx */ > pushq_cfi_reg rcx /* pt_regs->cx */ > - pushq_cfi_reg rax /* pt_regs->ax */ > + pushq_cfi $-ENOSYS /* pt_regs->ax */ > cld > sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ > CFI_ADJUST_CFA_OFFSET 10*8 > @@ -169,8 +169,6 @@ sysenter_flags_fixed: > testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) > CFI_REMEMBER_STATE > jnz sysenter_tracesys > - cmpq $(IA32_NR_syscalls-1),%rax > - ja ia32_badsys > sysenter_do_call: > /* 32bit syscall -> 64bit C ABI argument conversion */ > movl %edi,%r8d /* arg5 */ > @@ -179,8 +177,11 @@ sysenter_do_call: > movl %ebx,%edi /* arg1 */ > movl %edx,%edx /* arg3 (zero extension) */ > sysenter_dispatch: > + cmpq $(IA32_NR_syscalls-1),%rax > + ja 1f > call *ia32_sys_call_table(,%rax,8) > movq %rax,RAX(%rsp) > +1: > DISABLE_INTERRUPTS(CLBR_NONE) > TRACE_IRQS_OFF > testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) > @@ -247,9 +248,7 @@ sysexit_from_sys_call: > movl %ebx,%esi /* 2nd arg: 1st syscall arg */ > movl %eax,%edi /* 1st arg: syscall number */ > call __audit_syscall_entry > - movl RAX(%rsp),%eax /* reload syscall number */ > - cmpq $(IA32_NR_syscalls-1),%rax > - ja ia32_badsys > + movl ORIG_RAX(%rsp),%eax /* reload syscall number */ > movl %ebx,%edi /* reload 1st syscall arg */ > movl RCX(%rsp),%esi /* reload 2nd syscall arg */ > movl RDX(%rsp),%edx /* reload 3rd syscall arg */ > @@ -269,7 +268,7 @@ sysexit_from_sys_call: > 1: setbe %al /* 1 if error, 0 if not */ > movzbl %al,%edi /* zero-extend that into %edi */ > call __audit_syscall_exit > - movq RAX(%rsp),%rax /* reload syscall return value */ > + movq ORIG_RAX(%rsp),%rax /* reload syscall return value */ > movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi > DISABLE_INTERRUPTS(CLBR_NONE) > TRACE_IRQS_OFF > @@ -300,13 +299,10 @@ sysenter_tracesys: > #endif > SAVE_EXTRA_REGS > CLEAR_RREGS > - movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ > movq %rsp,%rdi /* &pt_regs -> arg1 */ > call syscall_trace_enter > LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ > RESTORE_EXTRA_REGS > - cmpq $(IA32_NR_syscalls-1),%rax > - ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ > jmp sysenter_do_call > CFI_ENDPROC > ENDPROC(ia32_sysenter_target) > @@ -376,7 +372,7 @@ ENTRY(ia32_cstar_target) > pushq_cfi_reg rdx /* pt_regs->dx */ > pushq_cfi_reg rbp /* pt_regs->cx */ > movl %ebp,%ecx > - pushq_cfi_reg rax /* pt_regs->ax */ > + pushq_cfi $-ENOSYS /* pt_regs->ax */ > sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ > CFI_ADJUST_CFA_OFFSET 10*8 > > @@ -392,8 +388,6 @@ ENTRY(ia32_cstar_target) > testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) > CFI_REMEMBER_STATE > jnz cstar_tracesys > - cmpq $IA32_NR_syscalls-1,%rax > - ja ia32_badsys > cstar_do_call: > /* 32bit syscall -> 64bit C ABI argument conversion */ > movl %edi,%r8d /* arg5 */ > @@ -402,8 +396,11 @@ cstar_do_call: > movl %ebx,%edi /* arg1 */ > movl %edx,%edx /* arg3 (zero extension) */ > cstar_dispatch: > + cmpq $(IA32_NR_syscalls-1),%rax > + ja 1f > call *ia32_sys_call_table(,%rax,8) > movq %rax,RAX(%rsp) > +1: > DISABLE_INTERRUPTS(CLBR_NONE) > TRACE_IRQS_OFF > testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) > @@ -450,14 +447,11 @@ cstar_tracesys: > xchgl %r9d,%ebp > SAVE_EXTRA_REGS > CLEAR_RREGS r9 > - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ > movq %rsp,%rdi /* &pt_regs -> arg1 */ > call syscall_trace_enter > LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */ > RESTORE_EXTRA_REGS > xchgl %ebp,%r9d > - cmpq $(IA32_NR_syscalls-1),%rax > - ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ > jmp cstar_do_call > END(ia32_cstar_target) > > @@ -516,7 +510,7 @@ ENTRY(ia32_syscall) > pushq_cfi_reg rsi /* pt_regs->si */ > pushq_cfi_reg rdx /* pt_regs->dx */ > pushq_cfi_reg rcx /* pt_regs->cx */ > - pushq_cfi_reg rax /* pt_regs->ax */ > + pushq_cfi $-ENOSYS /* pt_regs->ax */ > cld > sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ > CFI_ADJUST_CFA_OFFSET 10*8 > @@ -524,8 +518,6 @@ ENTRY(ia32_syscall) > orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) > testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) > jnz ia32_tracesys > - cmpq $(IA32_NR_syscalls-1),%rax > - ja ia32_badsys > ia32_do_call: > /* 32bit syscall -> 64bit C ABI argument conversion */ > movl %edi,%r8d /* arg5 */ > @@ -533,9 +525,12 @@ ia32_do_call: > xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ > movl %ebx,%edi /* arg1 */ > movl %edx,%edx /* arg3 (zero extension) */ > + cmpq $(IA32_NR_syscalls-1),%rax > + ja 1f > call *ia32_sys_call_table(,%rax,8) # xxx: rip relative > ia32_sysret: > movq %rax,RAX(%rsp) > +1: > ia32_ret_from_sys_call: > CLEAR_RREGS > jmp int_ret_from_sys_call > @@ -543,23 +538,14 @@ ia32_ret_from_sys_call: > ia32_tracesys: > SAVE_EXTRA_REGS > CLEAR_RREGS > - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ > movq %rsp,%rdi /* &pt_regs -> arg1 */ > call syscall_trace_enter > LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ > RESTORE_EXTRA_REGS > - cmpq $(IA32_NR_syscalls-1),%rax > - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ > jmp ia32_do_call > + CFI_ENDPROC > END(ia32_syscall) > > -ia32_badsys: > - movq $0,ORIG_RAX(%rsp) > - movq $-ENOSYS,%rax > - jmp ia32_sysret > - > - CFI_ENDPROC > - > .macro PTREGSCALL label, func > ALIGN > GLOBAL(\label) > -- > 1.8.1.4 > -- Andy Lutomirski AMA Capital Management, LLC -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/