Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/entry/common.c | 40 ++++++++++++++++++++++++-
arch/x86/entry/entry_64.S | 55 ++--------------------------------
arch/x86/include/asm/syscall.h | 2 +-
3 files changed, 42 insertions(+), 55 deletions(-)
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6c2826417b33..cccdd18c8304 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -70,7 +70,8 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
return false;
}
-__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
+/* Returns true to return using SYSRET, or false to use IRET */
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
{
add_random_kstack_offset();
nr = syscall_enter_from_user_mode(regs, nr);
@@ -84,6 +85,43 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
instrumentation_end();
syscall_exit_to_user_mode(regs);
+
+ /*
+ * Check that the register state is valid for using SYSRET to exit
+ * to userspace. Otherwise use the slower but fully capable IRET
+ * exit path.
+ */
+
+ /* XEN PV guests always use IRET path */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /* SYSRET requires RCX == RIP and R11 == EFLAGS */
+ if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+ return false;
+
+ /* CS and SS must match the values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+ return false;
+
+ /*
+ * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+ * in kernel space. This essentially lets the user take over
+ * the kernel, since userspace controls RSP.
+ */
+ if (unlikely(!__is_canonical_address(regs->ip, __VIRTUAL_MASK_SHIFT + 1)))
+ return false;
+
+ /*
+ * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
+ * restoring TF results in a trap from userspace immediately after
+ * SYSRET.
+ */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
+ return false;
+
+ /* Use SYSRET to exit to userspace */
+ return true;
}
#endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c01776a51545..b1288e22cae8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -123,60 +123,9 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
* Try to use SYSRET instead of IRET if we're returning to
* a completely clean 64-bit userspace context. If we're not,
* go to the slow exit path.
- * In the Xen PV case we must use iret anyway.
*/
-
- ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \
- X86_FEATURE_XENPV
-
- movq RCX(%rsp), %rcx
- movq RIP(%rsp), %r11
-
- cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
- jne swapgs_restore_regs_and_return_to_usermode
-
- /*
- * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
- * in kernel space. This essentially lets the user take over
- * the kernel, since userspace controls RSP.
- *
- * If width of "canonical tail" ever becomes variable, this will need
- * to be updated to remain correct on both old and new CPUs.
- *
- * Change top bits to match most significant bit (47th or 56th bit
- * depending on paging mode) in the address.
- */
-#ifdef CONFIG_X86_5LEVEL
- ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
- "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
-#else
- shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
- sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-#endif
-
- /* If this changed %rcx, it was not canonical */
- cmpq %rcx, %r11
- jne swapgs_restore_regs_and_return_to_usermode
-
- cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
- jne swapgs_restore_regs_and_return_to_usermode
-
- movq R11(%rsp), %r11
- cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
- jne swapgs_restore_regs_and_return_to_usermode
-
- /*
- * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
- * restoring TF results in a trap from userspace immediately after
- * SYSRET.
- */
- testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
- jnz swapgs_restore_regs_and_return_to_usermode
-
- /* nothing to check for RSP */
-
- cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
- jne swapgs_restore_regs_and_return_to_usermode
+ testb %al, %al
+ jz swapgs_restore_regs_and_return_to_usermode
/*
* We win! This label is here just for ease of understanding
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4fb36fba4b5a..be6c5515e0b9 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -126,7 +126,7 @@ static inline int syscall_get_arch(struct task_struct *task)
? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}
-void do_syscall_64(struct pt_regs *regs, int nr);
+bool do_syscall_64(struct pt_regs *regs, int nr);
#endif /* CONFIG_X86_32 */
--
2.41.0
> @@ -84,6 +85,43 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int
> nr)
>
> instrumentation_end();
> syscall_exit_to_user_mode(regs);
Would it be better to make the following code a new function?
And then the similar changes in patch 6 could be merged into the new
function with #ifdef CONFIG_X86_64.
> +
> + /*
> + * Check that the register state is valid for using SYSRET to exit
> + * to userspace. Otherwise use the slower but fully capable IRET
> + * exit path.
> + */
> +
> + /* XEN PV guests always use IRET path */
> + if (cpu_feature_enabled(X86_FEATURE_XENPV))
> + return false;
> +
> + /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> + if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> + return false;
> +
> + /* CS and SS must match the values set in MSR_STAR */
> + if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> + return false;
> +
> + /*
> + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> + * in kernel space. This essentially lets the user take over
> + * the kernel, since userspace controls RSP.
> + */
> + if (unlikely(!__is_canonical_address(regs->ip, __VIRTUAL_MASK_SHIFT +
> 1)))
> + return false;
> +
> + /*
> + * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
> + * restoring TF results in a trap from userspace immediately after
> + * SYSRET.
> + */
> + if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
> + return false;
> +
> + /* Use SYSRET to exit to userspace */
> + return true;
> }
> #endif
>
On Sun, Jul 23, 2023 at 5:53 AM Li, Xin3 <[email protected]> wrote:
>
>
> > @@ -84,6 +85,43 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int
> > nr)
> >
> > instrumentation_end();
> > syscall_exit_to_user_mode(regs);
>
> Would it be better to make the following code a new function?
>
> And then the similar changes in patch 6 could be merged into the new
> function with #ifdef CONFIG_X86_64.
>
> > +
> > + /*
> > + * Check that the register state is valid for using SYSRET to exit
> > + * to userspace. Otherwise use the slower but fully capable IRET
> > + * exit path.
> > + */
> > +
> > + /* XEN PV guests always use IRET path */
> > + if (cpu_feature_enabled(X86_FEATURE_XENPV))
> > + return false;
> > +
> > + /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> > + if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> > + return false;
> > +
> > + /* CS and SS must match the values set in MSR_STAR */
> > + if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> > + return false;
> > +
> > + /*
> > + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> > + * in kernel space. This essentially lets the user take over
> > + * the kernel, since userspace controls RSP.
> > + */
> > + if (unlikely(!__is_canonical_address(regs->ip, __VIRTUAL_MASK_SHIFT +
> > 1)))
> > + return false;
> > +
> > + /*
> > + * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
> > + * restoring TF results in a trap from userspace immediately after
> > + * SYSRET.
> > + */
> > + if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
> > + return false;
> > +
> > + /* Use SYSRET to exit to userspace */
> > + return true;
> > }
> > #endif
> >
The tests are similar but not enough to combine them. If
IA32_EMULATION is enabled, both versions are needed so one copy of the
function with #ifdefs won't work..
Brian Gerst