2023-07-18 14:01:17

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 0/6] x86: Clean up fast syscall return validation

This patch set cleans up the tests done to determine if a fast syscall
return instruction can be used to return to userspace. It converts the
code to C, and refactors existing code to be more readable.

Brian Gerst (6):
x86/entry/64: Remove obsolete comment on tracing vs. SYSRET
x86/entry/64: Convert SYSRET validation tests to C
x86/entry/compat: Combine return value test from syscall handler
x86/entry/32: Convert do_fast_syscall_32() to bool return type
x86/entry/32: Remove SEP test for SYSEXIT
x86/entry/32: Clean up syscall fast exit tests

arch/x86/entry/common.c | 109 +++++++++++++++++++++----------
arch/x86/entry/entry_32.S | 2 +-
arch/x86/entry/entry_64.S | 68 +------------------
arch/x86/entry/entry_64_compat.S | 12 ++--
arch/x86/include/asm/syscall.h | 6 +-
5 files changed, 87 insertions(+), 110 deletions(-)

--
2.41.0



2023-07-18 14:24:58

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 1/6] x86/entry/64: Remove obsolete comment on tracing vs. SYSRET

This comment comes from a time when the kernel attempted to use SYSRET
on all returns to userspace, including interrupts and exceptions. Ever
since commit fffbb5dc ("Move opportunistic sysret code to syscall code
path"), SYSRET is only used for returning from system calls. The
specific tracing issue listed in this comment is not possible anymore.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/entry/entry_64.S | 19 +++----------------
1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 91f6818884fa..c01776a51545 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -166,22 +166,9 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
jne swapgs_restore_regs_and_return_to_usermode

/*
- * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
- * restore RF properly. If the slowpath sets it for whatever reason, we
- * need to restore it correctly.
- *
- * SYSRET can restore TF, but unlike IRET, restoring TF results in a
- * trap from userspace immediately after SYSRET. This would cause an
- * infinite loop whenever #DB happens with register state that satisfies
- * the opportunistic SYSRET conditions. For example, single-stepping
- * this user code:
- *
- * movq $stuck_here, %rcx
- * pushfq
- * popq %r11
- * stuck_here:
- *
- * would never get past 'stuck_here'.
+ * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
+ * restoring TF results in a trap from userspace immediately after
+ * SYSRET.
*/
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
jnz swapgs_restore_regs_and_return_to_usermode
--
2.41.0


2023-07-18 14:57:45

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 6/6] x86/entry/32: Clean up syscall fast exit tests

Merge compat and native code and clarify comments.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/entry/common.c | 48 +++++++++++++++-----------------
arch/x86/entry/entry_64_compat.S | 5 ++--
2 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index fca6f2b7daf3..b975dc1d0812 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -251,34 +251,30 @@ __visible noinstr bool do_fast_syscall_32(struct pt_regs *regs)
if (!__do_fast_syscall_32(regs))
return false;

-#ifdef CONFIG_X86_64
/*
- * Opportunistic SYSRETL: if possible, try to return using SYSRETL.
- * SYSRETL is available on all 64-bit CPUs, so we don't need to
- * bother with SYSEXIT.
- *
- * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
- * because the ECX fixup above will ensure that this is essentially
- * never the case.
+ * Check that the register state is valid for using SYSRETL/SYSEXIT
+ * to exit to userspace. Otherwise use the slower but fully capable
+ * IRET exit path.
*/
- return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
- regs->ip == landing_pad &&
- (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
-#else
- /*
- * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
- *
- * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
- * because the ECX fixup above will ensure that this is essentially
- * never the case.
- *
- * We don't allow syscalls at all from VM86 mode, but we still
- * need to check VM, because we might be returning from sys_vm86.
- */
- return regs->cs == __USER_CS && regs->ss == __USER_DS &&
- regs->ip == landing_pad &&
- (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
-#endif
+
+ /* XEN PV guests always use IRET path */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /* EIP must point to the VDSO landing pad */
+ if (unlikely(regs->ip != landing_pad))
+ return false;
+
+ /* CS and SS must match the values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER32_CS || regs->ss != __USER_DS))
+ return false;
+
+ /* If the TF, RF, or VM flags are set, use IRET */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)))
+ return false;
+
+ /* Use SYSRETL/SYSEXIT to exit to userspace */
+ return true;
}

/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 27c05d08558a..84e21d1ebf10 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -211,9 +211,8 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
call do_fast_syscall_32

sysret32_from_system_call:
- /* XEN PV guests always use IRET path */
- ALTERNATIVE "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \
- "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
+ testb %al, %al /* Is SYSRET allowed? */
+ jz swapgs_restore_regs_and_return_to_usermode

/*
* Opportunistic SYSRET
--
2.41.0