2023-07-18 14:00:37

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 2/6] x86/entry/64: Convert SYSRET validation tests to C

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++++-
arch/x86/entry/entry_64.S | 55 ++--------------------------------
arch/x86/include/asm/syscall.h | 2 +-
3 files changed, 52 insertions(+), 55 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6c2826417b33..afe79c3f1c5b 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
return false;
}

-__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
+/* Returns true to return using SYSRET, or false to use IRET */
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
{
+ long rip;
+ unsigned int shift_rip;
+
add_random_kstack_offset();
nr = syscall_enter_from_user_mode(regs, nr);

@@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)

instrumentation_end();
syscall_exit_to_user_mode(regs);
+
+ /*
+ * Check that the register state is valid for using SYSRET to exit
+ * to userspace. Otherwise use the slower but fully capable IRET
+ * exit path.
+ */
+
+ /* XEN PV guests always use IRET path */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /* SYSRET requires RCX == RIP and R11 == EFLAGS */
+ if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+ return false;
+
+ /* CS and SS must match the values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+ return false;
+
+ /*
+ * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+ * in kernel space. This essentially lets the user take over
+ * the kernel, since userspace controls RSP.
+ *
+ * Change top bits to match most significant bit (47th or 56th bit
+ * depending on paging mode) in the address.
+ */
+ shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
+ rip = (long) regs->ip;
+ rip <<= shift_rip;
+ rip >>= shift_rip;
+ if (unlikely((unsigned long) rip != regs->ip))
+ return false;
+
+ /*
+ * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
+ * restoring TF results in a trap from userspace immediately after
+ * SYSRET.
+ */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
+ return false;
+
+ /* Use SYSRET to exit to userspace */
+ return true;
}
#endif

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c01776a51545..b1288e22cae8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -123,60 +123,9 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
* Try to use SYSRET instead of IRET if we're returning to
* a completely clean 64-bit userspace context. If we're not,
* go to the slow exit path.
- * In the Xen PV case we must use iret anyway.
*/
-
- ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \
- X86_FEATURE_XENPV
-
- movq RCX(%rsp), %rcx
- movq RIP(%rsp), %r11
-
- cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
- jne swapgs_restore_regs_and_return_to_usermode
-
- /*
- * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
- * in kernel space. This essentially lets the user take over
- * the kernel, since userspace controls RSP.
- *
- * If width of "canonical tail" ever becomes variable, this will need
- * to be updated to remain correct on both old and new CPUs.
- *
- * Change top bits to match most significant bit (47th or 56th bit
- * depending on paging mode) in the address.
- */
-#ifdef CONFIG_X86_5LEVEL
- ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
- "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
-#else
- shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
- sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-#endif
-
- /* If this changed %rcx, it was not canonical */
- cmpq %rcx, %r11
- jne swapgs_restore_regs_and_return_to_usermode
-
- cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
- jne swapgs_restore_regs_and_return_to_usermode
-
- movq R11(%rsp), %r11
- cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
- jne swapgs_restore_regs_and_return_to_usermode
-
- /*
- * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
- * restoring TF results in a trap from userspace immediately after
- * SYSRET.
- */
- testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
- jnz swapgs_restore_regs_and_return_to_usermode
-
- /* nothing to check for RSP */
-
- cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
- jne swapgs_restore_regs_and_return_to_usermode
+ testb %al, %al
+ jz swapgs_restore_regs_and_return_to_usermode

/*
* We win! This label is here just for ease of understanding
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4fb36fba4b5a..be6c5515e0b9 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -126,7 +126,7 @@ static inline int syscall_get_arch(struct task_struct *task)
? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}

-void do_syscall_64(struct pt_regs *regs, int nr);
+bool do_syscall_64(struct pt_regs *regs, int nr);

#endif /* CONFIG_X86_32 */

--
2.41.0



2023-07-18 14:34:42

by Mika Penttilä

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86/entry/64: Convert SYSRET validation tests to C

Hi,


On 18.7.2023 16.44, Brian Gerst wrote:
> Signed-off-by: Brian Gerst <[email protected]>
> ---
> arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++++-
> arch/x86/entry/entry_64.S | 55 ++--------------------------------
> arch/x86/include/asm/syscall.h | 2 +-
> 3 files changed, 52 insertions(+), 55 deletions(-)
>
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 6c2826417b33..afe79c3f1c5b 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
> return false;
> }
>
> -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> +/* Returns true to return using SYSRET, or false to use IRET */
> +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
> {
> + long rip;
> + unsigned int shift_rip;
> +
> add_random_kstack_offset();
> nr = syscall_enter_from_user_mode(regs, nr);
>
> @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
>
> instrumentation_end();
> syscall_exit_to_user_mode(regs);
> +
> + /*
> + * Check that the register state is valid for using SYSRET to exit
> + * to userspace. Otherwise use the slower but fully capable IRET
> + * exit path.
> + */
> +
> + /* XEN PV guests always use IRET path */
> + if (cpu_feature_enabled(X86_FEATURE_XENPV))
> + return false;
> +
> + /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> + if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> + return false;
> +
> + /* CS and SS must match the values set in MSR_STAR */
> + if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> + return false;
> +
> + /*
> + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> + * in kernel space. This essentially lets the user take over
> + * the kernel, since userspace controls RSP.
> + *
> + * Change top bits to match most significant bit (47th or 56th bit
> + * depending on paging mode) in the address.
> + */
> + shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);

Should this be:

shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
?

> + rip = (long) regs->ip;
> + rip <<= shift_rip;
> + rip >>= shift_rip;
> + if (unlikely((unsigned long) rip != regs->ip))
> + return false;
> +
> + /*
> + * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
> + * restoring TF results in a trap from userspace immediately after
> + * SYSRET.
> + */
> + if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
> + return false;
> +
> + /* Use SYSRET to exit to userspace */
> + return true;
> }
> #endif
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index c01776a51545..b1288e22cae8 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -123,60 +123,9 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
> * Try to use SYSRET instead of IRET if we're returning to
> * a completely clean 64-bit userspace context. If we're not,
> * go to the slow exit path.
> - * In the Xen PV case we must use iret anyway.
> */
> -
> - ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \
> - X86_FEATURE_XENPV
> -
> - movq RCX(%rsp), %rcx
> - movq RIP(%rsp), %r11
> -
> - cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
> - jne swapgs_restore_regs_and_return_to_usermode
> -
> - /*
> - * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> - * in kernel space. This essentially lets the user take over
> - * the kernel, since userspace controls RSP.
> - *
> - * If width of "canonical tail" ever becomes variable, this will need
> - * to be updated to remain correct on both old and new CPUs.
> - *
> - * Change top bits to match most significant bit (47th or 56th bit
> - * depending on paging mode) in the address.
> - */
> -#ifdef CONFIG_X86_5LEVEL
> - ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
> - "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
> -#else
> - shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> - sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> -#endif
> -
> - /* If this changed %rcx, it was not canonical */
> - cmpq %rcx, %r11
> - jne swapgs_restore_regs_and_return_to_usermode
> -
> - cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
> - jne swapgs_restore_regs_and_return_to_usermode
> -
> - movq R11(%rsp), %r11
> - cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
> - jne swapgs_restore_regs_and_return_to_usermode
> -
> - /*
> - * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
> - * restoring TF results in a trap from userspace immediately after
> - * SYSRET.
> - */
> - testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
> - jnz swapgs_restore_regs_and_return_to_usermode
> -
> - /* nothing to check for RSP */
> -
> - cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
> - jne swapgs_restore_regs_and_return_to_usermode
> + testb %al, %al
> + jz swapgs_restore_regs_and_return_to_usermode
>
> /*
> * We win! This label is here just for ease of understanding
> diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
> index 4fb36fba4b5a..be6c5515e0b9 100644
> --- a/arch/x86/include/asm/syscall.h
> +++ b/arch/x86/include/asm/syscall.h
> @@ -126,7 +126,7 @@ static inline int syscall_get_arch(struct task_struct *task)
> ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
> }
>
> -void do_syscall_64(struct pt_regs *regs, int nr);
> +bool do_syscall_64(struct pt_regs *regs, int nr);
>
> #endif /* CONFIG_X86_32 */
>


--Mika


2023-07-18 15:08:32

by Brian Gerst

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86/entry/64: Convert SYSRET validation tests to C

On Tue, Jul 18, 2023 at 10:17 AM Mika Penttilä <[email protected]> wrote:
>
> Hi,
>
>
> On 18.7.2023 16.44, Brian Gerst wrote:
> > Signed-off-by: Brian Gerst <[email protected]>
> > ---
> > arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++++-
> > arch/x86/entry/entry_64.S | 55 ++--------------------------------
> > arch/x86/include/asm/syscall.h | 2 +-
> > 3 files changed, 52 insertions(+), 55 deletions(-)
> >
> > diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> > index 6c2826417b33..afe79c3f1c5b 100644
> > --- a/arch/x86/entry/common.c
> > +++ b/arch/x86/entry/common.c
> > @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
> > return false;
> > }
> >
> > -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> > +/* Returns true to return using SYSRET, or false to use IRET */
> > +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
> > {
> > + long rip;
> > + unsigned int shift_rip;
> > +
> > add_random_kstack_offset();
> > nr = syscall_enter_from_user_mode(regs, nr);
> >
> > @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> >
> > instrumentation_end();
> > syscall_exit_to_user_mode(regs);
> > +
> > + /*
> > + * Check that the register state is valid for using SYSRET to exit
> > + * to userspace. Otherwise use the slower but fully capable IRET
> > + * exit path.
> > + */
> > +
> > + /* XEN PV guests always use IRET path */
> > + if (cpu_feature_enabled(X86_FEATURE_XENPV))
> > + return false;
> > +
> > + /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> > + if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> > + return false;
> > +
> > + /* CS and SS must match the values set in MSR_STAR */
> > + if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> > + return false;
> > +
> > + /*
> > + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> > + * in kernel space. This essentially lets the user take over
> > + * the kernel, since userspace controls RSP.
> > + *
> > + * Change top bits to match most significant bit (47th or 56th bit
> > + * depending on paging mode) in the address.
> > + */
> > + shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
>
> Should this be:
>
> shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
> ?

I removed a set of parentheses, which switched the sign from -1 to +1.
I could put it back if that's less confusing.

Brian Gerst

2023-07-18 15:13:37

by Mika Penttilä

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86/entry/64: Convert SYSRET validation tests to C



On 18.7.2023 17.25, Brian Gerst wrote:
> On Tue, Jul 18, 2023 at 10:17 AM Mika Penttilä <[email protected]> wrote:
>>
>> Hi,
>>
>>
>> On 18.7.2023 16.44, Brian Gerst wrote:
>>> Signed-off-by: Brian Gerst <[email protected]>
>>> ---
>>> arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++++-
>>> arch/x86/entry/entry_64.S | 55 ++--------------------------------
>>> arch/x86/include/asm/syscall.h | 2 +-
>>> 3 files changed, 52 insertions(+), 55 deletions(-)
>>>
>>> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
>>> index 6c2826417b33..afe79c3f1c5b 100644
>>> --- a/arch/x86/entry/common.c
>>> +++ b/arch/x86/entry/common.c
>>> @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
>>> return false;
>>> }
>>>
>>> -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
>>> +/* Returns true to return using SYSRET, or false to use IRET */
>>> +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
>>> {
>>> + long rip;
>>> + unsigned int shift_rip;
>>> +
>>> add_random_kstack_offset();
>>> nr = syscall_enter_from_user_mode(regs, nr);
>>>
>>> @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
>>>
>>> instrumentation_end();
>>> syscall_exit_to_user_mode(regs);
>>> +
>>> + /*
>>> + * Check that the register state is valid for using SYSRET to exit
>>> + * to userspace. Otherwise use the slower but fully capable IRET
>>> + * exit path.
>>> + */
>>> +
>>> + /* XEN PV guests always use IRET path */
>>> + if (cpu_feature_enabled(X86_FEATURE_XENPV))
>>> + return false;
>>> +
>>> + /* SYSRET requires RCX == RIP and R11 == EFLAGS */
>>> + if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
>>> + return false;
>>> +
>>> + /* CS and SS must match the values set in MSR_STAR */
>>> + if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
>>> + return false;
>>> +
>>> + /*
>>> + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
>>> + * in kernel space. This essentially lets the user take over
>>> + * the kernel, since userspace controls RSP.
>>> + *
>>> + * Change top bits to match most significant bit (47th or 56th bit
>>> + * depending on paging mode) in the address.
>>> + */
>>> + shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
>>
>> Should this be:
>>
>> shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
>> ?
>
> I removed a set of parentheses, which switched the sign from -1 to +1.
> I could put it back if that's less confusing.
>

I mean isn't it supposed to be:
shift_rip = (64 - 48) for 4 level, now it's
shift_rip = (64 - 46)

__VIRTUAL_MASK_SHIFT == 47


> Brian Gerst
>


2023-07-18 15:43:46

by Brian Gerst

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86/entry/64: Convert SYSRET validation tests to C

On Tue, Jul 18, 2023 at 10:49 AM Mika Penttilä <[email protected]> wrote:
>
>
>
> On 18.7.2023 17.25, Brian Gerst wrote:
> > On Tue, Jul 18, 2023 at 10:17 AM Mika Penttilä <[email protected]> wrote:
> >>
> >> Hi,
> >>
> >>
> >> On 18.7.2023 16.44, Brian Gerst wrote:
> >>> Signed-off-by: Brian Gerst <[email protected]>
> >>> ---
> >>> arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++++-
> >>> arch/x86/entry/entry_64.S | 55 ++--------------------------------
> >>> arch/x86/include/asm/syscall.h | 2 +-
> >>> 3 files changed, 52 insertions(+), 55 deletions(-)
> >>>
> >>> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> >>> index 6c2826417b33..afe79c3f1c5b 100644
> >>> --- a/arch/x86/entry/common.c
> >>> +++ b/arch/x86/entry/common.c
> >>> @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
> >>> return false;
> >>> }
> >>>
> >>> -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> >>> +/* Returns true to return using SYSRET, or false to use IRET */
> >>> +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
> >>> {
> >>> + long rip;
> >>> + unsigned int shift_rip;
> >>> +
> >>> add_random_kstack_offset();
> >>> nr = syscall_enter_from_user_mode(regs, nr);
> >>>
> >>> @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> >>>
> >>> instrumentation_end();
> >>> syscall_exit_to_user_mode(regs);
> >>> +
> >>> + /*
> >>> + * Check that the register state is valid for using SYSRET to exit
> >>> + * to userspace. Otherwise use the slower but fully capable IRET
> >>> + * exit path.
> >>> + */
> >>> +
> >>> + /* XEN PV guests always use IRET path */
> >>> + if (cpu_feature_enabled(X86_FEATURE_XENPV))
> >>> + return false;
> >>> +
> >>> + /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> >>> + if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> >>> + return false;
> >>> +
> >>> + /* CS and SS must match the values set in MSR_STAR */
> >>> + if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> >>> + return false;
> >>> +
> >>> + /*
> >>> + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> >>> + * in kernel space. This essentially lets the user take over
> >>> + * the kernel, since userspace controls RSP.
> >>> + *
> >>> + * Change top bits to match most significant bit (47th or 56th bit
> >>> + * depending on paging mode) in the address.
> >>> + */
> >>> + shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
> >>
> >> Should this be:
> >>
> >> shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
> >> ?
> >
> > I removed a set of parentheses, which switched the sign from -1 to +1.
> > I could put it back if that's less confusing.
> >
>
> I mean isn't it supposed to be:
> shift_rip = (64 - 48) for 4 level, now it's
> shift_rip = (64 - 46)
>
> __VIRTUAL_MASK_SHIFT == 47

Original:
(64 - (47 + 1)) = (64 - 48) = 16

c5: 48 c1 e1 10 shl $0x10,%rcx
c9: 48 c1 f9 10 sar $0x10,%rcx

New:
(64 - 47 - 1) = (17 - 1) = 16

18b: b9 10 00 00 00 mov $0x10,%ecx
193: 48 d3 e2 shl %cl,%rdx
196: 48 d3 fa sar %cl,%rdx

Anyways, I'll switch it back to the original formula. I'm not going
to argue any more about basic math.

Brian Gerst

2023-07-18 16:17:11

by Brian Gerst

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86/entry/64: Convert SYSRET validation tests to C

On Tue, Jul 18, 2023 at 11:21 AM Brian Gerst <[email protected]> wrote:
>
> On Tue, Jul 18, 2023 at 10:49 AM Mika Penttilä <[email protected]> wrote:
> >
> >
> >
> > On 18.7.2023 17.25, Brian Gerst wrote:
> > > On Tue, Jul 18, 2023 at 10:17 AM Mika Penttilä <[email protected]> wrote:
> > >>
> > >> Hi,
> > >>
> > >>
> > >> On 18.7.2023 16.44, Brian Gerst wrote:
> > >>> Signed-off-by: Brian Gerst <[email protected]>
> > >>> ---
> > >>> arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++++-
> > >>> arch/x86/entry/entry_64.S | 55 ++--------------------------------
> > >>> arch/x86/include/asm/syscall.h | 2 +-
> > >>> 3 files changed, 52 insertions(+), 55 deletions(-)
> > >>>
> > >>> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> > >>> index 6c2826417b33..afe79c3f1c5b 100644
> > >>> --- a/arch/x86/entry/common.c
> > >>> +++ b/arch/x86/entry/common.c
> > >>> @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
> > >>> return false;
> > >>> }
> > >>>
> > >>> -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> > >>> +/* Returns true to return using SYSRET, or false to use IRET */
> > >>> +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
> > >>> {
> > >>> + long rip;
> > >>> + unsigned int shift_rip;
> > >>> +
> > >>> add_random_kstack_offset();
> > >>> nr = syscall_enter_from_user_mode(regs, nr);
> > >>>
> > >>> @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> > >>>
> > >>> instrumentation_end();
> > >>> syscall_exit_to_user_mode(regs);
> > >>> +
> > >>> + /*
> > >>> + * Check that the register state is valid for using SYSRET to exit
> > >>> + * to userspace. Otherwise use the slower but fully capable IRET
> > >>> + * exit path.
> > >>> + */
> > >>> +
> > >>> + /* XEN PV guests always use IRET path */
> > >>> + if (cpu_feature_enabled(X86_FEATURE_XENPV))
> > >>> + return false;
> > >>> +
> > >>> + /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> > >>> + if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> > >>> + return false;
> > >>> +
> > >>> + /* CS and SS must match the values set in MSR_STAR */
> > >>> + if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> > >>> + return false;
> > >>> +
> > >>> + /*
> > >>> + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> > >>> + * in kernel space. This essentially lets the user take over
> > >>> + * the kernel, since userspace controls RSP.
> > >>> + *
> > >>> + * Change top bits to match most significant bit (47th or 56th bit
> > >>> + * depending on paging mode) in the address.
> > >>> + */
> > >>> + shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
> > >>
> > >> Should this be:
> > >>
> > >> shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
> > >> ?
> > >
> > > I removed a set of parentheses, which switched the sign from -1 to +1.
> > > I could put it back if that's less confusing.
> > >
> >
> > I mean isn't it supposed to be:
> > shift_rip = (64 - 48) for 4 level, now it's
> > shift_rip = (64 - 46)
> >
> > __VIRTUAL_MASK_SHIFT == 47

My apologies, you were right. I've been sitting on this series for a
while and finally got around to posting it and didn't catch that
error.

>
> Original:
> (64 - (47 + 1)) = (64 - 48) = 16
>
> c5: 48 c1 e1 10 shl $0x10,%rcx
> c9: 48 c1 f9 10 sar $0x10,%rcx

This was wrong. I hastily compiled this after I had reverted to the
original formula.

> New:
> (64 - 47 - 1) = (17 - 1) = 16
>
> 18b: b9 10 00 00 00 mov $0x10,%ecx
> 193: 48 d3 e2 shl %cl,%rdx
> 196: 48 d3 fa sar %cl,%rdx
>
> Anyways, I'll switch it back to the original formula. I'm not going
> to argue any more about basic math.

I'll send a v2 later after any more feedback. Thanks.

Brian Gerst