2015-04-10 15:33:16

by Denys Vlasenko

[permalink] [raw]
Subject: [PATCH] x86/asm/entry/64: 32-bit execve stubs are identical to x32 ones, merge them.

Siggested by Borislav Petkov.

Run-tested.

Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/kernel/entry_64.S | 23 +++++------------------
1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c7b2384..3bdfdcd 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -524,40 +524,27 @@ GLOBAL(stub_execveat)
CFI_ENDPROC
END(stub_execveat)

-#ifdef CONFIG_X86_X32_ABI
+#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
.align 8
GLOBAL(stub_x32_execve)
+GLOBAL(stub32_execve)
CFI_STARTPROC
DEFAULT_FRAME 0, 8
call compat_sys_execve
jmp return_from_execve
CFI_ENDPROC
+END(stub32_execve)
END(stub_x32_execve)
.align 8
GLOBAL(stub_x32_execveat)
- CFI_STARTPROC
- DEFAULT_FRAME 0, 8
- call compat_sys_execveat
- jmp return_from_execve
- CFI_ENDPROC
-END(stub_x32_execveat)
-#endif
-
-#ifdef CONFIG_IA32_EMULATION
- .align 8
-GLOBAL(stub32_execve)
- CFI_STARTPROC
- call compat_sys_execve
- jmp return_from_execve
- CFI_ENDPROC
-END(stub32_execve)
- .align 8
GLOBAL(stub32_execveat)
CFI_STARTPROC
+ DEFAULT_FRAME 0, 8
call compat_sys_execveat
jmp return_from_execve
CFI_ENDPROC
END(stub32_execveat)
+END(stub_x32_execveat)
#endif

/*
--
1.8.1.4


2015-04-10 15:33:47

by Denys Vlasenko

[permalink] [raw]
Subject: [PATCH] x86/asm/entry/32: Update ENOSYS handling to match 64-bit logic

Sometime ago Andy changed 64-bit syscall logic so that pt_regs->ax is
initially set to -ENOSYS, and on exit from syscall, it is updated with
actual return value. This simplified logic there.

This patch does the same for 32-bit syscall entry points.

The check for %rax being too big is moved to be just before
the call insn which dispatches execution through syscall table.
There is no way to accidentally skip this check now by jumping
to a label after it. This allows to remove redundant checks
after e.g. ptrace.

If %rax is too big, we just skip over the (call, write %rax to pt_regs->ax)
insn pair. pt_regs->ax remains set to -ENOSYS, and it gets returned
to userspace.

Similar to 64-bit code, this eliminates "ia32_badsys" code path.

Run-tested.

Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/ia32/ia32entry.S | 44 +++++++++++++++-----------------------------
1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a821b1c..29ab1c2 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -142,7 +142,7 @@ ENTRY(ia32_sysenter_target)
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rcx /* pt_regs->cx */
- pushq_cfi_reg rax /* pt_regs->ax */
+ pushq_cfi $-ENOSYS /* pt_regs->ax */
cld
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8
@@ -169,8 +169,6 @@ sysenter_flags_fixed:
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
- cmpq $(IA32_NR_syscalls-1),%rax
- ja ia32_badsys
sysenter_do_call:
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
@@ -179,8 +177,11 @@ sysenter_do_call:
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
sysenter_dispatch:
+ cmpq $(IA32_NR_syscalls-1),%rax
+ ja 1f
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX(%rsp)
+1:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
@@ -247,9 +248,7 @@ sysexit_from_sys_call:
movl %ebx,%esi /* 2nd arg: 1st syscall arg */
movl %eax,%edi /* 1st arg: syscall number */
call __audit_syscall_entry
- movl RAX(%rsp),%eax /* reload syscall number */
- cmpq $(IA32_NR_syscalls-1),%rax
- ja ia32_badsys
+ movl ORIG_RAX(%rsp),%eax /* reload syscall number */
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX(%rsp),%esi /* reload 2nd syscall arg */
movl RDX(%rsp),%edx /* reload 3rd syscall arg */
@@ -269,7 +268,7 @@ sysexit_from_sys_call:
1: setbe %al /* 1 if error, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
call __audit_syscall_exit
- movq RAX(%rsp),%rax /* reload syscall return value */
+ movq ORIG_RAX(%rsp),%rax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
@@ -300,13 +299,10 @@ sysenter_tracesys:
#endif
SAVE_EXTRA_REGS
CLEAR_RREGS
- movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
- cmpq $(IA32_NR_syscalls-1),%rax
- ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
CFI_ENDPROC
ENDPROC(ia32_sysenter_target)
@@ -376,7 +372,7 @@ ENTRY(ia32_cstar_target)
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rbp /* pt_regs->cx */
movl %ebp,%ecx
- pushq_cfi_reg rax /* pt_regs->ax */
+ pushq_cfi $-ENOSYS /* pt_regs->ax */
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8

@@ -392,8 +388,6 @@ ENTRY(ia32_cstar_target)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE
jnz cstar_tracesys
- cmpq $IA32_NR_syscalls-1,%rax
- ja ia32_badsys
cstar_do_call:
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
@@ -402,8 +396,11 @@ cstar_do_call:
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
cstar_dispatch:
+ cmpq $(IA32_NR_syscalls-1),%rax
+ ja 1f
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX(%rsp)
+1:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
@@ -450,14 +447,11 @@ cstar_tracesys:
xchgl %r9d,%ebp
SAVE_EXTRA_REGS
CLEAR_RREGS r9
- movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
xchgl %ebp,%r9d
- cmpq $(IA32_NR_syscalls-1),%rax
- ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
jmp cstar_do_call
END(ia32_cstar_target)

@@ -516,7 +510,7 @@ ENTRY(ia32_syscall)
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rcx /* pt_regs->cx */
- pushq_cfi_reg rax /* pt_regs->ax */
+ pushq_cfi $-ENOSYS /* pt_regs->ax */
cld
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8
@@ -524,8 +518,6 @@ ENTRY(ia32_syscall)
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz ia32_tracesys
- cmpq $(IA32_NR_syscalls-1),%rax
- ja ia32_badsys
ia32_do_call:
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
@@ -533,9 +525,12 @@ ia32_do_call:
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
+ cmpq $(IA32_NR_syscalls-1),%rax
+ ja 1f
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
movq %rax,RAX(%rsp)
+1:
ia32_ret_from_sys_call:
CLEAR_RREGS
jmp int_ret_from_sys_call
@@ -543,23 +538,14 @@ ia32_ret_from_sys_call:
ia32_tracesys:
SAVE_EXTRA_REGS
CLEAR_RREGS
- movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
- cmpq $(IA32_NR_syscalls-1),%rax
- ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
+ CFI_ENDPROC
END(ia32_syscall)

-ia32_badsys:
- movq $0,ORIG_RAX(%rsp)
- movq $-ENOSYS,%rax
- jmp ia32_sysret
-
- CFI_ENDPROC
-
.macro PTREGSCALL label, func
ALIGN
GLOBAL(\label)
--
1.8.1.4

2015-04-10 15:41:31

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH] x86/asm/entry/64: 32-bit execve stubs are identical to x32 ones, merge them.

On Fri, Apr 10, 2015 at 05:33:07PM +0200, Denys Vlasenko wrote:
> Siggested by Borislav Petkov.

I think you mean Brian Gerst here. :)

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--

2015-04-10 15:45:16

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH] x86/asm/entry/32: Update ENOSYS handling to match 64-bit logic

On Fri, Apr 10, 2015 at 8:33 AM, Denys Vlasenko <[email protected]> wrote:
> Sometime ago Andy changed 64-bit syscall logic so that pt_regs->ax is
> initially set to -ENOSYS, and on exit from syscall, it is updated with
> actual return value. This simplified logic there.
>
> This patch does the same for 32-bit syscall entry points.
>
> The check for %rax being too big is moved to be just before
> the call insn which dispatches execution through syscall table.
> There is no way to accidentally skip this check now by jumping
> to a label after it. This allows to remove redundant checks
> after e.g. ptrace.
>
> If %rax is too big, we just skip over the (call, write %rax to pt_regs->ax)
> insn pair. pt_regs->ax remains set to -ENOSYS, and it gets returned
> to userspace.

This looks okay, but I'll read it again later today.

At the very least, though, this should be tested against the seccomp test suite.

--Andy

>
> Similar to 64-bit code, this eliminates "ia32_badsys" code path.
>
> Run-tested.
>
> Signed-off-by: Denys Vlasenko <[email protected]>
> CC: Linus Torvalds <[email protected]>
> CC: Steven Rostedt <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Borislav Petkov <[email protected]>
> CC: "H. Peter Anvin" <[email protected]>
> CC: Andy Lutomirski <[email protected]>
> CC: Oleg Nesterov <[email protected]>
> CC: Frederic Weisbecker <[email protected]>
> CC: Alexei Starovoitov <[email protected]>
> CC: Will Drewry <[email protected]>
> CC: Kees Cook <[email protected]>
> CC: [email protected]
> CC: [email protected]
> ---
> arch/x86/ia32/ia32entry.S | 44 +++++++++++++++-----------------------------
> 1 file changed, 15 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
> index a821b1c..29ab1c2 100644
> --- a/arch/x86/ia32/ia32entry.S
> +++ b/arch/x86/ia32/ia32entry.S
> @@ -142,7 +142,7 @@ ENTRY(ia32_sysenter_target)
> pushq_cfi_reg rsi /* pt_regs->si */
> pushq_cfi_reg rdx /* pt_regs->dx */
> pushq_cfi_reg rcx /* pt_regs->cx */
> - pushq_cfi_reg rax /* pt_regs->ax */
> + pushq_cfi $-ENOSYS /* pt_regs->ax */
> cld
> sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
> CFI_ADJUST_CFA_OFFSET 10*8
> @@ -169,8 +169,6 @@ sysenter_flags_fixed:
> testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
> CFI_REMEMBER_STATE
> jnz sysenter_tracesys
> - cmpq $(IA32_NR_syscalls-1),%rax
> - ja ia32_badsys
> sysenter_do_call:
> /* 32bit syscall -> 64bit C ABI argument conversion */
> movl %edi,%r8d /* arg5 */
> @@ -179,8 +177,11 @@ sysenter_do_call:
> movl %ebx,%edi /* arg1 */
> movl %edx,%edx /* arg3 (zero extension) */
> sysenter_dispatch:
> + cmpq $(IA32_NR_syscalls-1),%rax
> + ja 1f
> call *ia32_sys_call_table(,%rax,8)
> movq %rax,RAX(%rsp)
> +1:
> DISABLE_INTERRUPTS(CLBR_NONE)
> TRACE_IRQS_OFF
> testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
> @@ -247,9 +248,7 @@ sysexit_from_sys_call:
> movl %ebx,%esi /* 2nd arg: 1st syscall arg */
> movl %eax,%edi /* 1st arg: syscall number */
> call __audit_syscall_entry
> - movl RAX(%rsp),%eax /* reload syscall number */
> - cmpq $(IA32_NR_syscalls-1),%rax
> - ja ia32_badsys
> + movl ORIG_RAX(%rsp),%eax /* reload syscall number */
> movl %ebx,%edi /* reload 1st syscall arg */
> movl RCX(%rsp),%esi /* reload 2nd syscall arg */
> movl RDX(%rsp),%edx /* reload 3rd syscall arg */
> @@ -269,7 +268,7 @@ sysexit_from_sys_call:
> 1: setbe %al /* 1 if error, 0 if not */
> movzbl %al,%edi /* zero-extend that into %edi */
> call __audit_syscall_exit
> - movq RAX(%rsp),%rax /* reload syscall return value */
> + movq ORIG_RAX(%rsp),%rax /* reload syscall return value */
> movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
> DISABLE_INTERRUPTS(CLBR_NONE)
> TRACE_IRQS_OFF
> @@ -300,13 +299,10 @@ sysenter_tracesys:
> #endif
> SAVE_EXTRA_REGS
> CLEAR_RREGS
> - movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
> movq %rsp,%rdi /* &pt_regs -> arg1 */
> call syscall_trace_enter
> LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
> RESTORE_EXTRA_REGS
> - cmpq $(IA32_NR_syscalls-1),%rax
> - ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
> jmp sysenter_do_call
> CFI_ENDPROC
> ENDPROC(ia32_sysenter_target)
> @@ -376,7 +372,7 @@ ENTRY(ia32_cstar_target)
> pushq_cfi_reg rdx /* pt_regs->dx */
> pushq_cfi_reg rbp /* pt_regs->cx */
> movl %ebp,%ecx
> - pushq_cfi_reg rax /* pt_regs->ax */
> + pushq_cfi $-ENOSYS /* pt_regs->ax */
> sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
> CFI_ADJUST_CFA_OFFSET 10*8
>
> @@ -392,8 +388,6 @@ ENTRY(ia32_cstar_target)
> testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
> CFI_REMEMBER_STATE
> jnz cstar_tracesys
> - cmpq $IA32_NR_syscalls-1,%rax
> - ja ia32_badsys
> cstar_do_call:
> /* 32bit syscall -> 64bit C ABI argument conversion */
> movl %edi,%r8d /* arg5 */
> @@ -402,8 +396,11 @@ cstar_do_call:
> movl %ebx,%edi /* arg1 */
> movl %edx,%edx /* arg3 (zero extension) */
> cstar_dispatch:
> + cmpq $(IA32_NR_syscalls-1),%rax
> + ja 1f
> call *ia32_sys_call_table(,%rax,8)
> movq %rax,RAX(%rsp)
> +1:
> DISABLE_INTERRUPTS(CLBR_NONE)
> TRACE_IRQS_OFF
> testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
> @@ -450,14 +447,11 @@ cstar_tracesys:
> xchgl %r9d,%ebp
> SAVE_EXTRA_REGS
> CLEAR_RREGS r9
> - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
> movq %rsp,%rdi /* &pt_regs -> arg1 */
> call syscall_trace_enter
> LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */
> RESTORE_EXTRA_REGS
> xchgl %ebp,%r9d
> - cmpq $(IA32_NR_syscalls-1),%rax
> - ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
> jmp cstar_do_call
> END(ia32_cstar_target)
>
> @@ -516,7 +510,7 @@ ENTRY(ia32_syscall)
> pushq_cfi_reg rsi /* pt_regs->si */
> pushq_cfi_reg rdx /* pt_regs->dx */
> pushq_cfi_reg rcx /* pt_regs->cx */
> - pushq_cfi_reg rax /* pt_regs->ax */
> + pushq_cfi $-ENOSYS /* pt_regs->ax */
> cld
> sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
> CFI_ADJUST_CFA_OFFSET 10*8
> @@ -524,8 +518,6 @@ ENTRY(ia32_syscall)
> orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
> testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
> jnz ia32_tracesys
> - cmpq $(IA32_NR_syscalls-1),%rax
> - ja ia32_badsys
> ia32_do_call:
> /* 32bit syscall -> 64bit C ABI argument conversion */
> movl %edi,%r8d /* arg5 */
> @@ -533,9 +525,12 @@ ia32_do_call:
> xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
> movl %ebx,%edi /* arg1 */
> movl %edx,%edx /* arg3 (zero extension) */
> + cmpq $(IA32_NR_syscalls-1),%rax
> + ja 1f
> call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
> ia32_sysret:
> movq %rax,RAX(%rsp)
> +1:
> ia32_ret_from_sys_call:
> CLEAR_RREGS
> jmp int_ret_from_sys_call
> @@ -543,23 +538,14 @@ ia32_ret_from_sys_call:
> ia32_tracesys:
> SAVE_EXTRA_REGS
> CLEAR_RREGS
> - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
> movq %rsp,%rdi /* &pt_regs -> arg1 */
> call syscall_trace_enter
> LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
> RESTORE_EXTRA_REGS
> - cmpq $(IA32_NR_syscalls-1),%rax
> - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
> jmp ia32_do_call
> + CFI_ENDPROC
> END(ia32_syscall)
>
> -ia32_badsys:
> - movq $0,ORIG_RAX(%rsp)
> - movq $-ENOSYS,%rax
> - jmp ia32_sysret
> -
> - CFI_ENDPROC
> -
> .macro PTREGSCALL label, func
> ALIGN
> GLOBAL(\label)
> --
> 1.8.1.4
>



--
Andy Lutomirski
AMA Capital Management, LLC

2015-04-10 16:08:54

by Denys Vlasenko

[permalink] [raw]
Subject: Re: [PATCH] x86/asm/entry/32: Update ENOSYS handling to match 64-bit logic

On 04/10/2015 05:44 PM, Andy Lutomirski wrote:
> On Fri, Apr 10, 2015 at 8:33 AM, Denys Vlasenko <[email protected]> wrote:
>> Sometime ago Andy changed 64-bit syscall logic so that pt_regs->ax is
>> initially set to -ENOSYS, and on exit from syscall, it is updated with
>> actual return value. This simplified logic there.
>>
>> This patch does the same for 32-bit syscall entry points.
>>
>> The check for %rax being too big is moved to be just before
>> the call insn which dispatches execution through syscall table.
>> There is no way to accidentally skip this check now by jumping
>> to a label after it. This allows to remove redundant checks
>> after e.g. ptrace.
>>
>> If %rax is too big, we just skip over the (call, write %rax to pt_regs->ax)
>> insn pair. pt_regs->ax remains set to -ENOSYS, and it gets returned
>> to userspace.
>
> This looks okay, but I'll read it again later today.

Unfortunately, there is a mistake.

>> @@ -247,9 +248,7 @@ sysexit_from_sys_call:
>> movl %ebx,%esi /* 2nd arg: 1st syscall arg */
>> movl %eax,%edi /* 1st arg: syscall number */
>> call __audit_syscall_entry
>> - movl RAX(%rsp),%eax /* reload syscall number */
>> - cmpq $(IA32_NR_syscalls-1),%rax
>> - ja ia32_badsys
>> + movl ORIG_RAX(%rsp),%eax /* reload syscall number */

this is correct, now syscall# is only in orig_ax, not in ax...

>> call __audit_syscall_exit
>> - movq RAX(%rsp),%rax /* reload syscall return value */
>> + movq ORIG_RAX(%rsp),%rax /* reload syscall return value */

but here I'm wrong, this line should not be changed.
Will send v2 now.

2015-04-10 16:23:23

by Denys Vlasenko

[permalink] [raw]
Subject: [PATCH v2] x86/asm/entry/32: Update ENOSYS handling to match 64-bit logic

Sometime ago Andy changed 64-bit syscall logic so that pt_regs->ax is
initially set to -ENOSYS, and on exit from syscall, it is updated with
actual return value. This simplified logic there.

This patch does the same for 32-bit syscall entry points.

The check for %rax being too big is moved to be just before
the call insn which dispatches execution through syscall table.
There is no way to accidentally skip this check now by jumping
to a label after it. This allows to remove redundant checks
after e.g. ptrace.

If %rax is too big, we just skip over the (call, write %rax to pt_regs->ax)
insn pair. pt_regs->ax remains set to -ENOSYS, and it gets returned
to userspace.

Similar to 64-bit code, this eliminates "ia32_badsys" code path.

Run-tested.

Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---

Changes in v2: fixed an error: changing RAX to ORIG_RAX in
movq ORIG_RAX(%rsp),%rax /* reload syscall return value */
was wrong.

arch/x86/ia32/ia32entry.S | 44 +++++++++++++++-----------------------------
1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a821b1c..29ab1c2 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -142,7 +142,7 @@ ENTRY(ia32_sysenter_target)
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rcx /* pt_regs->cx */
- pushq_cfi_reg rax /* pt_regs->ax */
+ pushq_cfi $-ENOSYS /* pt_regs->ax */
cld
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8
@@ -169,8 +169,6 @@ sysenter_flags_fixed:
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
- cmpq $(IA32_NR_syscalls-1),%rax
- ja ia32_badsys
sysenter_do_call:
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
@@ -179,8 +177,11 @@ sysenter_do_call:
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
sysenter_dispatch:
+ cmpq $(IA32_NR_syscalls-1),%rax
+ ja 1f
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX(%rsp)
+1:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
@@ -247,9 +248,7 @@ sysexit_from_sys_call:
movl %ebx,%esi /* 2nd arg: 1st syscall arg */
movl %eax,%edi /* 1st arg: syscall number */
call __audit_syscall_entry
- movl RAX(%rsp),%eax /* reload syscall number */
- cmpq $(IA32_NR_syscalls-1),%rax
- ja ia32_badsys
+ movl ORIG_RAX(%rsp),%eax /* reload syscall number */
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX(%rsp),%esi /* reload 2nd syscall arg */
movl RDX(%rsp),%edx /* reload 3rd syscall arg */
@@ -300,13 +299,10 @@ sysenter_tracesys:
#endif
SAVE_EXTRA_REGS
CLEAR_RREGS
- movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
- cmpq $(IA32_NR_syscalls-1),%rax
- ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
CFI_ENDPROC
ENDPROC(ia32_sysenter_target)
@@ -376,7 +372,7 @@ ENTRY(ia32_cstar_target)
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rbp /* pt_regs->cx */
movl %ebp,%ecx
- pushq_cfi_reg rax /* pt_regs->ax */
+ pushq_cfi $-ENOSYS /* pt_regs->ax */
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8

@@ -392,8 +388,6 @@ ENTRY(ia32_cstar_target)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE
jnz cstar_tracesys
- cmpq $IA32_NR_syscalls-1,%rax
- ja ia32_badsys
cstar_do_call:
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
@@ -402,8 +396,11 @@ cstar_do_call:
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
cstar_dispatch:
+ cmpq $(IA32_NR_syscalls-1),%rax
+ ja 1f
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX(%rsp)
+1:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
@@ -450,14 +447,11 @@ cstar_tracesys:
xchgl %r9d,%ebp
SAVE_EXTRA_REGS
CLEAR_RREGS r9
- movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
xchgl %ebp,%r9d
- cmpq $(IA32_NR_syscalls-1),%rax
- ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
jmp cstar_do_call
END(ia32_cstar_target)

@@ -516,7 +510,7 @@ ENTRY(ia32_syscall)
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rcx /* pt_regs->cx */
- pushq_cfi_reg rax /* pt_regs->ax */
+ pushq_cfi $-ENOSYS /* pt_regs->ax */
cld
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8
@@ -524,8 +518,6 @@ ENTRY(ia32_syscall)
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz ia32_tracesys
- cmpq $(IA32_NR_syscalls-1),%rax
- ja ia32_badsys
ia32_do_call:
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
@@ -533,9 +525,12 @@ ia32_do_call:
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
+ cmpq $(IA32_NR_syscalls-1),%rax
+ ja 1f
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
movq %rax,RAX(%rsp)
+1:
ia32_ret_from_sys_call:
CLEAR_RREGS
jmp int_ret_from_sys_call
@@ -543,23 +538,14 @@ ia32_ret_from_sys_call:
ia32_tracesys:
SAVE_EXTRA_REGS
CLEAR_RREGS
- movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
- cmpq $(IA32_NR_syscalls-1),%rax
- ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
+ CFI_ENDPROC
END(ia32_syscall)

-ia32_badsys:
- movq $0,ORIG_RAX(%rsp)
- movq $-ENOSYS,%rax
- jmp ia32_sysret
-
- CFI_ENDPROC
-
.macro PTREGSCALL label, func
ALIGN
GLOBAL(\label)
--
1.8.1.4

2015-04-10 16:24:10

by Denys Vlasenko

[permalink] [raw]
Subject: Re: [PATCH] x86/asm/entry/64: 32-bit execve stubs are identical to x32 ones, merge them.

On 04/10/2015 05:39 PM, Borislav Petkov wrote:
> On Fri, Apr 10, 2015 at 05:33:07PM +0200, Denys Vlasenko wrote:
>> Siggested by Borislav Petkov.
>
> I think you mean Brian Gerst here. :)

Oops.... you're right