Don't do SYSRET validation tests when FRED is enabled, since ERETU is
the only legit instruction to return to user level.
Signed-off-by: Xin Li (Intel) <[email protected]>
---
arch/x86/entry/common.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6356060caaf3..1c3944eb9901 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -72,7 +72,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
return false;
}
-/* Returns true to return using SYSRET, or false to use IRET */
+/* Returns true to return using SYSRET, or false to use IRET/ERETU */
__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
{
add_random_kstack_offset();
@@ -88,6 +88,10 @@ __visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
instrumentation_end();
syscall_exit_to_user_mode(regs);
+ /* No test for FRED, which returns to user level with ERETU only */
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ return false;
+
/*
* Check that the register state is valid for using SYSRET to exit
* to userspace. Otherwise use the slower but fully capable IRET
@@ -325,7 +329,7 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
return true;
}
-/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */
+/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET/ERETU */
__visible noinstr bool do_fast_syscall_32(struct pt_regs *regs)
{
/*
@@ -346,6 +350,10 @@ __visible noinstr bool do_fast_syscall_32(struct pt_regs *regs)
if (!__do_fast_syscall_32(regs))
return false;
+ /* No test for FRED, which returns to user level with ERETU only */
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ return false;
+
/*
* Check that the register state is valid for using SYSRETL/SYSEXIT
* to exit to userspace. Otherwise use the slower but fully capable
base-commit: 65d1240b6728b38e4d2068d6738a17e4ee4351f5
--
2.44.0
Now, for a FRED system the return value is ignored anyway, so the only
benefit of this is skipping the test, correct?
I do observe than what is left of do_fast_syscall_32 is a single
assignment followed by __do_fast_syscall_32(), which could be turned
into a tailcall.
Another thing that has been added lately is a bunch of tests on the int
$0x80 path. This is a slow path *on legacy hardware*, but on FRED
systems it is the *fast* path for IA32 emulation. These tests are also
totally unnecessary *AND IN FACT, WRONG* on a FRED system, as FRED
distinguishes external interrupts from software interrupts.
Furthermore, under FRED interrupt 0x80 is available as a hardware
interrupt (since there is no reason to block it out.) Therefore, going
and poking the APIC as in int80_is_external() is INCORRECT and possibly
fatal.
Again, the easiest way to fix that is to follow what XenPV does in
int80_is_external(), but there is more unnecessary stuff:
!user_mode(regs) cannot happen on FRED, and the stuff in the
int80_emulation assembly function should, if it is needed at all on any
FRED-compatible hardware (I don't believe so) should be done in the FRED
user mode assembly entry stub.
Thus, it might be better to strip down do_int80_emulation() to a lean
fred_int80_emulation().
-hpa
On 4/2/24 23:24, Xin Li (Intel) wrote:
> Don't do SYSRET validation tests when FRED is enabled, since ERETU is
> the only legit instruction to return to user level.
>
> Signed-off-by: Xin Li (Intel) <[email protected]>
> ---
> arch/x86/entry/common.c | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 6356060caaf3..1c3944eb9901 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -72,7 +72,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
> return false;
> }
>
> -/* Returns true to return using SYSRET, or false to use IRET */
> +/* Returns true to return using SYSRET, or false to use IRET/ERETU */
> __visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
> {
> add_random_kstack_offset();
> @@ -88,6 +88,10 @@ __visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
> instrumentation_end();
> syscall_exit_to_user_mode(regs);
>
> + /* No test for FRED, which returns to user level with ERETU only */
> + if (cpu_feature_enabled(X86_FEATURE_FRED))
> + return false;
> +
> /*
> * Check that the register state is valid for using SYSRET to exit
> * to userspace. Otherwise use the slower but fully capable IRET
> @@ -325,7 +329,7 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
> return true;
> }
>
> -/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */
> +/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET/ERETU */
> __visible noinstr bool do_fast_syscall_32(struct pt_regs *regs)
> {
> /*
> @@ -346,6 +350,10 @@ __visible noinstr bool do_fast_syscall_32(struct pt_regs *regs)
> if (!__do_fast_syscall_32(regs))
> return false;
>
> + /* No test for FRED, which returns to user level with ERETU only */
> + if (cpu_feature_enabled(X86_FEATURE_FRED))
> + return false;
> +
> /*
> * Check that the register state is valid for using SYSRETL/SYSEXIT
> * to exit to userspace. Otherwise use the slower but fully capable
>
> base-commit: 65d1240b6728b38e4d2068d6738a17e4ee4351f5
On 4/9/2024 4:35 PM, H. Peter Anvin wrote:
> Now, for a FRED system the return value is ignored anyway, so the only
> benefit of this is skipping the test, correct?
Yes, and as syscalls are hot paths, we should save some cycles.
>
> I do observe than what is left of do_fast_syscall_32 is a single
> assignment followed by __do_fast_syscall_32(), which could be turned
> into a tailcall.
Ah, I didn't realize this is a better way, will change.
>
> Another thing that has been added lately is a bunch of tests on the int
> $0x80 path. This is a slow path *on legacy hardware*, but on FRED
> systems it is the *fast* path for IA32 emulation. These tests are also
> totally unnecessary *AND IN FACT, WRONG* on a FRED system, as FRED
> distinguishes external interrupts from software interrupts.
>
> Furthermore, under FRED interrupt 0x80 is available as a hardware
> interrupt (since there is no reason to block it out.) Therefore, going
> and poking the APIC as in int80_is_external() is INCORRECT and possibly
> fatal.
>
> Again, the easiest way to fix that is to follow what XenPV does in
> int80_is_external(), but there is more unnecessary stuff:
> !user_mode(regs) cannot happen on FRED, and the stuff in the
> int80_emulation assembly function should, if it is needed at all on any
> FRED-compatible hardware (I don't believe so) should be done in the FRED
> user mode assembly entry stub.
>
> Thus, it might be better to strip down do_int80_emulation() to a lean
> fred_int80_emulation().
Yeah, FRED is fundamentally better with its nature. Will do!
Thanks!
Xin
>
> -hpa
>
> On 4/2/24 23:24, Xin Li (Intel) wrote:
>> Don't do SYSRET validation tests when FRED is enabled, since ERETU is
>> the only legit instruction to return to user level.
>>
>> Signed-off-by: Xin Li (Intel) <[email protected]>
>> ---
>> arch/x86/entry/common.c | 12 ++++++++++--
>> 1 file changed, 10 insertions(+), 2 deletions(-)