2015-07-27 20:33:56

by Denys Vlasenko

[permalink] [raw]
Subject: [PATCH 1/5] x86/asm/entry/32: Massage SYSENTER32 fast path to be nearly identical to SYSCALL32

This change swaps a few instructions in final register restoring/zeroing
section of SYSENTER fast path, and adds/deletes a few empty lines.

After this, the difference between SYSENTER and SYCALL fast paths
(after the prologue which saved pt_regs) is very small:
they differ merely in the choice of register to hold arg6 (EBP or R9)
and in the value of EDX on exit: SYSENTER ABI doesn't need to preserve it,
so it is zeroed. SYSCALL preserves it:

|(prologue is different)
| orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
| testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|- jnz sysenter_tracesys
|+ jnz cstar_tracesys
|
|-sysenter_do_call:
|+cstar_do_call:
| /* 32-bit syscall -> 64-bit C ABI argument conversion */
| movl %edi, %r8d /* arg5 */
|- movl %ebp, %r9d /* arg6 */
|+ /* r9 already loaded */ /* arg6 */
| xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
| movl %ebx, %edi /* arg1 */
| movl %edx, %edx /* arg3 (zero extension) */
|
|-sysenter_dispatch:
|+cstar_dispatch:
| cmpq $(IA32_NR_syscalls-1), %rax
| ja 1f
| call *ia32_sys_call_table(, %rax, 8)
|@@ -19,15 +19,15 @@
| DISABLE_INTERRUPTS(CLBR_NONE)
| TRACE_IRQS_OFF
| testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|- jnz sysexit_audit
|+ jnz sysretl_audit
|
|-sysexit_from_sys_call:
|+sysretl_from_sys_call:
| andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|+ movl RDX(%rsp), %edx
| movl RSI(%rsp), %esi
| movl RDI(%rsp), %edi
| movl RIP(%rsp), %ecx
| movl EFLAGS(%rsp), %r11d
|- xorl %edx, %edx
| xorq %r10, %r10
| xorq %r9, %r9
| xorq %r8, %r8
|(the rest of fast path, up to final SYSRET32, is identical)

This is a preparatory change which allows to drop most of SYSENTER machinery
and make SYSENTER reuse SYSCALL code.

Signed-off-by: Denys Vlasenko <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Krzysztof A. Sobiecki <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/entry/entry_64_compat.S | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 8997383..9f9dfa5 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -117,6 +117,7 @@ sysenter_do_call:
xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
movl %ebx, %edi /* arg1 */
movl %edx, %edx /* arg3 (zero extension) */
+
sysenter_dispatch:
cmpq $(IA32_NR_syscalls-1), %rax
ja 1f
@@ -127,6 +128,7 @@ sysenter_dispatch:
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz sysexit_audit
+
sysexit_from_sys_call:
/*
* NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
@@ -139,14 +141,14 @@ sysexit_from_sys_call:
* with 'sysenter' and it uses the SYSENTER calling convention.
*/
andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
- movl RIP(%rsp), %ecx /* User %eip */
movl RSI(%rsp), %esi
movl RDI(%rsp), %edi
+ movl RIP(%rsp), %ecx /* User %eip */
+ movl EFLAGS(%rsp), %r11d /* User eflags */
xorl %edx, %edx /* Do not leak kernel information */
- xorq %r8, %r8
- xorq %r9, %r9
xorq %r10, %r10
- movl EFLAGS(%rsp), %r11d /* User eflags */
+ xorq %r9, %r9
+ xorq %r8, %r8
TRACE_IRQS_ON

/*
@@ -340,6 +342,7 @@ ENTRY(entry_SYSCALL_compat)
1: movl (%r8), %r9d
_ASM_EXTABLE(1b, ia32_badarg)
ASM_CLAC
+
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz cstar_tracesys
@@ -355,7 +358,6 @@ cstar_do_call:
cstar_dispatch:
cmpq $(IA32_NR_syscalls-1), %rax
ja 1f
-
call *ia32_sys_call_table(, %rax, 8)
movq %rax, RAX(%rsp)
1:
--
1.8.1.4


2015-07-27 20:34:01

by Denys Vlasenko

[permalink] [raw]
Subject: [PATCH 2/5] x86/asm/entry/32: Expand auditsys_FOO macros in SYSCALL32 code path

This is a preparatory change which allows to drop most of SYSENTER machinery
and make SYSENTER reuse SYSCALL code: we will be deleting entire
SYSENTER code block, including auditsys_entry_common and
auditsys_exit macros.

Signed-off-by: Denys Vlasenko <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Krzysztof A. Sobiecki <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/entry/entry_64_compat.S | 57 +++++++++++++++++++++++++++++++++++++---
1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 9f9dfa5..df102e8 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -396,13 +396,62 @@ sysretl_from_sys_call:

#ifdef CONFIG_AUDITSYSCALL
cstar_auditsys:
- movl %r9d, R9(%rsp) /* register to be clobbered by call */
- auditsys_entry_common
- movl R9(%rsp), %r9d /* reload 6th syscall arg */
+ movl %r9d, R9(%rsp) /* R9 is callee-clobbered, save it */
+ /*
+ * At this point, registers hold syscall args in the 32-bit syscall ABI:
+ * EAX is syscall number, the 6 args are in EBX,ECX,EDX,ESI,EDI,
+ * 6th arg is in R9.
+ *
+ * We want to pass them to __audit_syscall_entry(), which is a 64-bit
+ * C function with 5 parameters, so shuffle them to match what
+ * the function expects: RDI,RSI,RDX,RCX,R8.
+ */
+ movl %esi, %r8d /* arg5 (R8 ) <= 4th syscall arg (ESI) */
+ xchg %ecx, %edx /* arg4 (RCX) <= 3rd syscall arg (EDX) */
+ /* arg3 (RDX) <= 2nd syscall arg (ECX) */
+ movl %ebx, %esi /* arg2 (RSI) <= 1st syscall arg (EBX) */
+ movl %eax, %edi /* arg1 (RDI) <= syscall number (EAX) */
+ call __audit_syscall_entry
+
+ /*
+ * We are going to jump back to the syscall dispatch code.
+ * Prepare syscall args as required by the 64-bit C ABI.
+ * Registers clobbered by __audit_syscall_entry() are
+ * loaded from pt_regs on stack:
+ */
+ movl ORIG_RAX(%rsp), %eax /* syscall number */
+ movl %ebx, %edi /* arg1 */
+ movl RCX(%rsp), %esi /* arg2 */
+ movl RDX(%rsp), %edx /* arg3 */
+ movl RSI(%rsp), %ecx /* arg4 */
+ movl RDI(%rsp), %r8d /* arg5 */
+ movl R9(%rsp), %r9d /* arg6 */
jmp cstar_dispatch

sysretl_audit:
- auditsys_exit sysretl_from_sys_call
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz ia32_ret_from_sys_call
+ movl %eax, %esi /* second arg, syscall return value */
+ cmpl $-MAX_ERRNO, %eax /* is it an error ? */
+ jbe 1f
+ movslq %eax, %rsi /* if error sign extend to 64 bits */
+1: setbe %al /* 1 if error, 0 if not */
+ movzbl %al, %edi /* zero-extend that into %edi */
+ call __audit_syscall_exit
+ movq RAX(%rsp), %rax /* reload syscall return value */
+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jz sysretl_from_sys_call
+ xorl %eax, %eax /* Do not leak kernel information */
+ movq %rax, R11(%rsp)
+ movq %rax, R10(%rsp)
+ movq %rax, R9(%rsp)
+ movq %rax, R8(%rsp)
+ jmp int_ret_from_sys_call_irqs_off
#endif

cstar_tracesys:
--
1.8.1.4

2015-07-27 20:34:09

by Denys Vlasenko

[permalink] [raw]
Subject: [PATCH 3/5] x86/asm/entry/32: Jump from SYSENTER32 to SYSCALL32 code path

In 32-bit SYSENTER code, load arg6 into R9 instead of EBP.
Jump to SYSCALL code path after we finish setting up pt_regs
and clearing FLAGS_NT.

This leaves most of SYSENTER32 code path inaccessible.

Signed-off-by: Denys Vlasenko <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Krzysztof A. Sobiecki <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/entry/entry_64_compat.S | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index df102e8..d74745a 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -93,7 +93,7 @@ ENTRY(entry_SYSENTER_compat)
* 32-bit zero extended
*/
ASM_STAC
-1: movl (%rbp), %ebp
+1: movl (%rbp), %r9d
_ASM_EXTABLE(1b, ia32_badarg)
ASM_CLAC

@@ -105,6 +105,7 @@ ENTRY(entry_SYSENTER_compat)
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
jnz sysenter_fix_flags
sysenter_flags_fixed:
+ jmp sysenter_jumps_here

orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
@@ -343,6 +344,7 @@ ENTRY(entry_SYSCALL_compat)
_ASM_EXTABLE(1b, ia32_badarg)
ASM_CLAC

+sysenter_jumps_here:
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz cstar_tracesys
--
1.8.1.4

2015-07-27 20:34:15

by Denys Vlasenko

[permalink] [raw]
Subject: [PATCH 4/5] x86/asm/entry/32: Delete most of SYSENTER32 code.

Deleting dead code.

Only code which performs pt_regs setup, reads arg6 and clears FLAGS_NT,
remains.

Signed-off-by: Denys Vlasenko <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Krzysztof A. Sobiecki <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/entry/entry_64_compat.S | 162 ---------------------------------------
1 file changed, 162 deletions(-)

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d74745a..73b56a5 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -107,173 +107,11 @@ ENTRY(entry_SYSENTER_compat)
sysenter_flags_fixed:
jmp sysenter_jumps_here

- orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
- testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz sysenter_tracesys
-
-sysenter_do_call:
- /* 32-bit syscall -> 64-bit C ABI argument conversion */
- movl %edi, %r8d /* arg5 */
- movl %ebp, %r9d /* arg6 */
- xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
- movl %ebx, %edi /* arg1 */
- movl %edx, %edx /* arg3 (zero extension) */
-
-sysenter_dispatch:
- cmpq $(IA32_NR_syscalls-1), %rax
- ja 1f
- call *ia32_sys_call_table(, %rax, 8)
- movq %rax, RAX(%rsp)
-1:
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz sysexit_audit
-
-sysexit_from_sys_call:
- /*
- * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
- * NMI between STI and SYSEXIT has poorly specified behavior,
- * and and NMI followed by an IRQ with usergs is fatal. So
- * we just pretend we're using SYSEXIT but we really use
- * SYSRETL instead.
- *
- * This code path is still called 'sysexit' because it pairs
- * with 'sysenter' and it uses the SYSENTER calling convention.
- */
- andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
- movl RSI(%rsp), %esi
- movl RDI(%rsp), %edi
- movl RIP(%rsp), %ecx /* User %eip */
- movl EFLAGS(%rsp), %r11d /* User eflags */
- xorl %edx, %edx /* Do not leak kernel information */
- xorq %r10, %r10
- xorq %r9, %r9
- xorq %r8, %r8
- TRACE_IRQS_ON
-
- /*
- * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
- * since it avoids a dicey window with interrupts enabled.
- */
- movl RSP(%rsp), %esp
-
- /*
- * USERGS_SYSRET32 does:
- * gsbase = user's gs base
- * eip = ecx
- * rflags = r11
- * cs = __USER32_CS
- * ss = __USER_DS
- *
- * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
- *
- * pop %ebp
- * pop %edx
- * pop %ecx
- *
- * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
- * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
- * address (already known to user code), and R12-R15 are
- * callee-saved and therefore don't contain any interesting
- * kernel data.
- */
- USERGS_SYSRET32
-
-#ifdef CONFIG_AUDITSYSCALL
- .macro auditsys_entry_common
- /*
- * At this point, registers hold syscall args in the 32-bit syscall ABI:
- * EAX is syscall number, the 6 args are in EBX,ECX,EDX,ESI,EDI,EBP.
- *
- * We want to pass them to __audit_syscall_entry(), which is a 64-bit
- * C function with 5 parameters, so shuffle them to match what
- * the function expects: RDI,RSI,RDX,RCX,R8.
- */
- movl %esi, %r8d /* arg5 (R8 ) <= 4th syscall arg (ESI) */
- xchg %ecx, %edx /* arg4 (RCX) <= 3rd syscall arg (EDX) */
- /* arg3 (RDX) <= 2nd syscall arg (ECX) */
- movl %ebx, %esi /* arg2 (RSI) <= 1st syscall arg (EBX) */
- movl %eax, %edi /* arg1 (RDI) <= syscall number (EAX) */
- call __audit_syscall_entry
-
- /*
- * We are going to jump back to the syscall dispatch code.
- * Prepare syscall args as required by the 64-bit C ABI.
- * Registers clobbered by __audit_syscall_entry() are
- * loaded from pt_regs on stack:
- */
- movl ORIG_RAX(%rsp), %eax /* syscall number */
- movl %ebx, %edi /* arg1 */
- movl RCX(%rsp), %esi /* arg2 */
- movl RDX(%rsp), %edx /* arg3 */
- movl RSI(%rsp), %ecx /* arg4 */
- movl RDI(%rsp), %r8d /* arg5 */
- .endm
-
- .macro auditsys_exit exit
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz ia32_ret_from_sys_call
- movl %eax, %esi /* second arg, syscall return value */
- cmpl $-MAX_ERRNO, %eax /* is it an error ? */
- jbe 1f
- movslq %eax, %rsi /* if error sign extend to 64 bits */
-1: setbe %al /* 1 if error, 0 if not */
- movzbl %al, %edi /* zero-extend that into %edi */
- call __audit_syscall_exit
- movq RAX(%rsp), %rax /* reload syscall return value */
- movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jz \exit
- xorl %eax, %eax /* Do not leak kernel information */
- movq %rax, R11(%rsp)
- movq %rax, R10(%rsp)
- movq %rax, R9(%rsp)
- movq %rax, R8(%rsp)
- jmp int_ret_from_sys_call_irqs_off
- .endm
-
-sysenter_auditsys:
- auditsys_entry_common
- movl %ebp, %r9d /* reload 6th syscall arg */
- jmp sysenter_dispatch
-
-sysexit_audit:
- auditsys_exit sysexit_from_sys_call
-#endif
-
sysenter_fix_flags:
pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
popfq
jmp sysenter_flags_fixed

-sysenter_tracesys:
-#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jz sysenter_auditsys
-#endif
- SAVE_EXTRA_REGS
- xorl %eax, %eax /* Do not leak kernel information */
- movq %rax, R11(%rsp)
- movq %rax, R10(%rsp)
- movq %rax, R9(%rsp)
- movq %rax, R8(%rsp)
- movq %rsp, %rdi /* &pt_regs -> arg1 */
- call syscall_trace_enter
-
- /* Reload arg registers from stack. (see sysenter_tracesys) */
- movl RCX(%rsp), %ecx
- movl RDX(%rsp), %edx
- movl RSI(%rsp), %esi
- movl RDI(%rsp), %edi
- movl %eax, %eax /* zero extension */
-
- RESTORE_EXTRA_REGS
- jmp sysenter_do_call
ENDPROC(entry_SYSENTER_compat)

/*
--
1.8.1.4

2015-07-27 20:34:22

by Denys Vlasenko

[permalink] [raw]
Subject: [PATCH 5/5] x86/asm/entry/32: Simplify FLAGS_NT clearing in SYSENTER32 code.

"sysenter_fix_flags" detour does not need to be convoluted anymore,
straigten it up. However, we still use this:

jnz 2f
jmp sysenter_jumps_here
2: ...

instead of this:

jz sysenter_jumps_here
...

because "cold" conditional forward branch is predicted not taken
by most CPUs - exactly what we want. Latter version would get it wrong.

Reinstate "why we use SYSRETL instead of SYSEXIT" comment.

Signed-off-by: Denys Vlasenko <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Krzysztof A. Sobiecki <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/entry/entry_64_compat.S | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 73b56a5..bd3664f 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -98,20 +98,28 @@ ENTRY(entry_SYSENTER_compat)
ASM_CLAC

/*
- * Sysenter doesn't filter flags, so we need to clear NT
- * ourselves. To save a few cycles, we can check whether
- * NT was set instead of doing an unconditional popfq.
+ * Sysenter doesn't filter flags, so we need to clear NT ourselves.
*/
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
- jnz sysenter_fix_flags
-sysenter_flags_fixed:
+ jnz 2f
jmp sysenter_jumps_here
-
-sysenter_fix_flags:
+2:
pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
popfq
- jmp sysenter_flags_fixed
-
+ jmp sysenter_jumps_here
+ /*
+ * SYSEXIT insn is not obviously safe for 64-bit kernels --
+ * an NMI between STI and SYSEXIT has poorly specified behavior,
+ * and NMI followed by an IRQ with usergs is fatal.
+ * So we just pretend we're using SYSEXIT but we really use
+ * SYSRETL instead. (Yes, SYSRETL works even on Intel CPUs.)
+ * We do that by reusing the entire SYSCALL code path:
+ * the jump above takes us there.
+ *
+ * The difference of SYSENTER 32-bit ABI versus SYSCALL
+ * is that SYSENTER ABI does not promise to preserve EDX and EBP,
+ * SYSCALL does.
+ */
ENDPROC(entry_SYSENTER_compat)

/*
--
1.8.1.4

2015-07-27 20:50:14

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86/asm/entry/32: Massage SYSENTER32 fast path to be nearly identical to SYSCALL32

On Mon, Jul 27, 2015 at 1:33 PM, Denys Vlasenko <[email protected]> wrote:
> This change swaps a few instructions in final register restoring/zeroing
> section of SYSENTER fast path, and adds/deletes a few empty lines.
>
> After this, the difference between SYSENTER and SYCALL fast paths
> (after the prologue which saved pt_regs) is very small:
> they differ merely in the choice of register to hold arg6 (EBP or R9)
> and in the value of EDX on exit: SYSENTER ABI doesn't need to preserve it,
> so it is zeroed. SYSCALL preserves it:

Acked-by: Andy Lutomirski <[email protected]>

(I haven't tested it, but this looks Obviously Correct (tm) and
there's no funny stack manipulation in the middle of the changes that
could break things.)

--Andy

2015-07-27 20:51:45

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86/asm/entry/32: Expand auditsys_FOO macros in SYSCALL32 code path

On Mon, Jul 27, 2015 at 1:33 PM, Denys Vlasenko <[email protected]> wrote:
> This is a preparatory change which allows to drop most of SYSENTER machinery
> and make SYSENTER reuse SYSCALL code: we will be deleting entire
> SYSENTER code block, including auditsys_entry_common and
> auditsys_exit macros.

Assuming the object file with CONFIG_AUDITSYSCALL is identical before
and after, then:

Acked-by: Andy Lutomirski <[email protected]>

(Except that you forgot to accidentally delete all the audit code in
this patch.)

--Andy

2015-07-27 22:37:57

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 3/5] x86/asm/entry/32: Jump from SYSENTER32 to SYSCALL32 code path

> Subject: [PATCH 3/5] x86/asm/entry/32: Jump from SYSENTER32 to SYSCALL32 code path

Shouldn't that be /64, not /32, or maybe /64/compat?

On Mon, Jul 27, 2015 at 1:33 PM, Denys Vlasenko <[email protected]> wrote:
> In 32-bit SYSENTER code, load arg6 into R9 instead of EBP.
> Jump to SYSCALL code path after we finish setting up pt_regs
> and clearing FLAGS_NT.
>
> This leaves most of SYSENTER32 code path inaccessible.
>
> Signed-off-by: Denys Vlasenko <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Linus Torvalds <[email protected]>
> CC: Krzysztof A. Sobiecki <[email protected]>
> CC: Steven Rostedt <[email protected]>
> CC: Borislav Petkov <[email protected]>
> CC: "H. Peter Anvin" <[email protected]>
> CC: Andy Lutomirski <[email protected]>
> CC: Oleg Nesterov <[email protected]>
> CC: Frederic Weisbecker <[email protected]>
> CC: Alexei Starovoitov <[email protected]>
> CC: Will Drewry <[email protected]>
> CC: Kees Cook <[email protected]>
> CC: [email protected]
> CC: [email protected]
> ---
> arch/x86/entry/entry_64_compat.S | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> index df102e8..d74745a 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -93,7 +93,7 @@ ENTRY(entry_SYSENTER_compat)
> * 32-bit zero extended
> */
> ASM_STAC
> -1: movl (%rbp), %ebp
> +1: movl (%rbp), %r9d

You're sticking arg6 into r9d here, I think, and then:


>
> orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
> testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
> @@ -343,6 +344,7 @@ ENTRY(entry_SYSCALL_compat)
> _ASM_EXTABLE(1b, ia32_badarg)
> ASM_CLAC
>
> +sysenter_jumps_here:
> orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
> testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
> jnz cstar_tracesys

you land here, which eventually does:

movl %ebp, %r9d /* arg6 */

What am I missing?

--Andy

2015-07-27 22:38:51

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 4/5] x86/asm/entry/32: Delete most of SYSENTER32 code.

On Mon, Jul 27, 2015 at 1:33 PM, Denys Vlasenko <[email protected]> wrote:
> Deleting dead code.
>
> Only code which performs pt_regs setup, reads arg6 and clears FLAGS_NT,
> remains.
>

Acked-by: Andy Lutomirski <[email protected]>

2015-07-27 22:40:22

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 5/5] x86/asm/entry/32: Simplify FLAGS_NT clearing in SYSENTER32 code.

On Mon, Jul 27, 2015 at 1:33 PM, Denys Vlasenko <[email protected]> wrote:
> "sysenter_fix_flags" detour does not need to be convoluted anymore,
> straigten it up. However, we still use this:
>
> jnz 2f
> jmp sysenter_jumps_here
> 2: ...
>
> instead of this:
>
> jz sysenter_jumps_here
> ...
>
> because "cold" conditional forward branch is predicted not taken
> by most CPUs - exactly what we want. Latter version would get it wrong.
>
> Reinstate "why we use SYSRETL instead of SYSEXIT" comment.
>

Acked-by: Andy Lutomirski <[email protected]>

2015-07-28 11:13:12

by Denys Vlasenko

[permalink] [raw]
Subject: Re: [PATCH 3/5] x86/asm/entry/32: Jump from SYSENTER32 to SYSCALL32 code path

On 07/28/2015 12:37 AM, Andy Lutomirski wrote:
>> Subject: [PATCH 3/5] x86/asm/entry/32: Jump from SYSENTER32 to SYSCALL32 code path
>
> Shouldn't that be /64, not /32, or maybe /64/compat?
>
> On Mon, Jul 27, 2015 at 1:33 PM, Denys Vlasenko <[email protected]> wrote:
>> In 32-bit SYSENTER code, load arg6 into R9 instead of EBP.
>> Jump to SYSCALL code path after we finish setting up pt_regs
>> and clearing FLAGS_NT.
>>
>> This leaves most of SYSENTER32 code path inaccessible.
>>
>> Signed-off-by: Denys Vlasenko <[email protected]>
>> CC: Ingo Molnar <[email protected]>
>> CC: Linus Torvalds <[email protected]>
>> CC: Krzysztof A. Sobiecki <[email protected]>
>> CC: Steven Rostedt <[email protected]>
>> CC: Borislav Petkov <[email protected]>
>> CC: "H. Peter Anvin" <[email protected]>
>> CC: Andy Lutomirski <[email protected]>
>> CC: Oleg Nesterov <[email protected]>
>> CC: Frederic Weisbecker <[email protected]>
>> CC: Alexei Starovoitov <[email protected]>
>> CC: Will Drewry <[email protected]>
>> CC: Kees Cook <[email protected]>
>> CC: [email protected]
>> CC: [email protected]
>> ---
>> arch/x86/entry/entry_64_compat.S | 4 +++-
>> 1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
>> index df102e8..d74745a 100644
>> --- a/arch/x86/entry/entry_64_compat.S
>> +++ b/arch/x86/entry/entry_64_compat.S
>> @@ -93,7 +93,7 @@ ENTRY(entry_SYSENTER_compat)
>> * 32-bit zero extended
>> */
>> ASM_STAC
>> -1: movl (%rbp), %ebp
>> +1: movl (%rbp), %r9d
>
> You're sticking arg6 into r9d here, I think, and then:
>
>
>>
>> orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
>> testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
>> @@ -343,6 +344,7 @@ ENTRY(entry_SYSCALL_compat)
>> _ASM_EXTABLE(1b, ia32_badarg)
>> ASM_CLAC
>>
>> +sysenter_jumps_here:
>> orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
>> testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
>> jnz cstar_tracesys
>
> you land here, which eventually does:
>
> movl %ebp, %r9d /* arg6 */
>
> What am I missing?

Please "git pull" from Ingo's tree. There was a revert,
arg6 is no longer held in EBP in SYSCALL code:

cstar_do_call:
/* 32-bit syscall -> 64-bit C ABI argument conversion */
movl %edi, %r8d /* arg5 */
/* r9 already loaded */ /* arg6 */
xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
movl %ebx, %edi /* arg1 */
movl %edx, %edx /* arg3 (zero extension) */