In __fpu_restore_sig(),'init_fpstate.xsave' and part of 'fpu->state.xsave'
are restored separately to xregs. However, as stated in __cpu_invalidate_
fpregs_state(),
Any code that clobbers the FPU registers or updates the in-memory
FPU state for a task MUST let the rest of the kernel know that the
FPU registers are no longer valid for this task.
and this code violates that rule. Should the restoration fail, the other
task's context is corrupted.
This problem does not occur very often because copy_*_to_xregs() succeeds
most of the time. It occurs, for instance, in copy_user_to_fpregs_
zeroing() when the first half of the restoration succeeds and the other
half fails. This can be triggered by running glibc tests, where a non-
present user stack page causes the XRSTOR to fail.
The introduction of supervisor xstates and CET, while not contributing to
the problem, makes it more detectable. After init_fpstate and the Shadow
Stack pointer have been restored to xregs, the XRSTOR from user stack
fails and fpu_fpregs_owner_ctx is not updated. The task currently owning
fpregs then uses the corrupted Shadow Stack pointer and triggers a control-
protection fault.
Fix it by adding __cpu_invalidate_fpregs_state() to functions that copy
fpstate to fpregs:
copy_*_to_xregs_*(), copy_*_to_fxregs_*(), and copy_*_to_fregs_*().
The alternative is to hit all of the call sites themselves.
The function __cpu_invalidate_fpregs_state() is chosen over fpregs_
deactivate() as it is called under fpregs_lock() protection.
In addition to sigreturn, also checked all call sites of these functions:
- copy_init_fpstate_to_fpregs();
- copy_kernel_to_fpregs();
- ex_handler_fprestore();
- fpu__save(); and
- fpu__copy().
In fpu__save() and fpu__copy(), fpregs are re-activated because they are
considered valid in both cases.
v2:
Add the missing EXPORT_SYMBOL_GPL(fpu_fpregs_owner_ctx).
Signed-off-by: Yu-cheng Yu <[email protected]>
Acked-by: Dave Hansen <[email protected]>
---
arch/x86/include/asm/fpu/internal.h | 14 ++++++++++++++
arch/x86/kernel/fpu/core.c | 16 ++++++++++++++--
2 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 44c48e34d799..f317da2c5ca5 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -142,6 +142,8 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \
: output : input)
+static inline void __cpu_invalidate_fpregs_state(void);
+
static inline int copy_fregs_to_user(struct fregs_state __user *fx)
{
return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
@@ -158,6 +160,8 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
{
+ __cpu_invalidate_fpregs_state();
+
if (IS_ENABLED(CONFIG_X86_32))
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
@@ -166,6 +170,8 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx)
{
+ __cpu_invalidate_fpregs_state();
+
if (IS_ENABLED(CONFIG_X86_32))
return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
@@ -174,6 +180,8 @@ static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx)
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
{
+ __cpu_invalidate_fpregs_state();
+
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
@@ -182,16 +190,19 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
static inline void copy_kernel_to_fregs(struct fregs_state *fx)
{
+ __cpu_invalidate_fpregs_state();
kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int copy_kernel_to_fregs_err(struct fregs_state *fx)
{
+ __cpu_invalidate_fpregs_state();
return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int copy_user_to_fregs(struct fregs_state __user *fx)
{
+ __cpu_invalidate_fpregs_state();
return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
@@ -340,6 +351,7 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
u32 lmask = mask;
u32 hmask = mask >> 32;
+ __cpu_invalidate_fpregs_state();
XSTATE_XRESTORE(xstate, lmask, hmask);
}
@@ -382,6 +394,7 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
u32 hmask = mask >> 32;
int err;
+ __cpu_invalidate_fpregs_state();
stac();
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
clac();
@@ -399,6 +412,7 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
u32 hmask = mask >> 32;
int err;
+ __cpu_invalidate_fpregs_state();
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
return err;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 12c70840980e..4e5151e43a2c 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -42,6 +42,7 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
* Track which context is using the FPU on the CPU:
*/
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+EXPORT_SYMBOL_GPL(fpu_fpregs_owner_ctx);
static bool kernel_fpu_disabled(void)
{
@@ -127,7 +128,12 @@ void fpu__save(struct fpu *fpu)
if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
if (!copy_fpregs_to_fpstate(fpu)) {
+ /*
+ * copy_kernel_to_fpregs deactivates fpregs;
+ * re-activate fpregs after that.
+ */
copy_kernel_to_fpregs(&fpu->state);
+ fpregs_activate(fpu);
}
}
@@ -191,11 +197,17 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src)
* register contents so we have to load them back. )
*/
fpregs_lock();
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
- else if (!copy_fpregs_to_fpstate(dst_fpu))
+ } else if (!copy_fpregs_to_fpstate(dst_fpu)) {
+ /*
+ * copy_kernel_to_fpregs deactivates fpregs;
+ * re-activate fpregs after that.
+ */
copy_kernel_to_fpregs(&dst_fpu->state);
+ fpregs_activate(src_fpu);
+ }
fpregs_unlock();
--
2.17.1
On 2019-12-18 12:53:59 [-0800], Yu-cheng Yu wrote:
> I could have explained this better, sorry! I will explain the first
> case below; other cases are similar.
>
> In copy_user_to_fpregs_zeroing(), we have:
>
> if (user_xsave()) {
> ...
> if (unlikely(init_bv))
> copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
> return copy_user_to_xregs(buf, xbv);
> ...
> }
>
> The copy_user_to_xregs() may fail, and when that happens, before going to
> the slow path, there is fpregs_unlock() and context switches may happen.
The context switch may only happen after fpregs_unlock().
> However, at this point, fpu_fpregs_owner_ctx has not been changed; it could
> still be another task's FPU.
TIF_NEED_FPU_LOAD is set for the task in __fpu__restore_sig() and its
context (__fpu_invalidate_fpregs_state()) has been invalidated. So the
FPU register may contain another task's content and
fpu_fpregs_owner_ctx points to another context.
> For this to happen and to be detected, the user
> stack page needs to be non-present, fpu_fpregs_owner_ctx need to be another task,
> and that other task needs to be able to detect its registers are modified.
> The last factor is not easy to reproduce, and a CET control-protection fault
> helps.
So far everything is legal. However. If there is a context switch before
fpregs_lock() then this is bad before we don't account for that.
So that:
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -352,6 +352,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
fpregs_unlock();
return 0;
}
+ fpregs_deactivate(fpu);
fpregs_unlock();
}
@@ -403,6 +404,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
}
if (!ret)
fpregs_mark_activate();
+ else
+ fpregs_deactivate(fpu);
fpregs_unlock();
err_out:
Should be enough.
> > Can you tell me which glibc test? I would like to reproduce this.
> >
> > > The introduction of supervisor xstates and CET, while not contributing to
> > > the problem, makes it more detectable. After init_fpstate and the Shadow
> > > Stack pointer have been restored to xregs, the XRSTOR from user stack
> > > fails and fpu_fpregs_owner_ctx is not updated. The task currently owning
> > > fpregs then uses the corrupted Shadow Stack pointer and triggers a control-
> > > protection fault.
> >
> > So I don't need new HW with supervisor and CET? A plain KVM box with
> > SSE2 and so should be enough?
>
> What I do is, clone the whole glibc source, and run mutiple copies of
> "make check". In about 40 minutes or so, there are unexplained seg faults,
> or a few control-protection faults (if you enable CET). Please let me
> know if more clarification is needed.
Okay. Can you please try the above and if not, I try that glibc thing myself.
> Thanks,
> Yu-cheng
Sebastian
On Thu, 2019-12-19 at 15:22 +0100, Sebastian Andrzej Siewior wrote:
> On 2019-12-18 12:53:59 [-0800], Yu-cheng Yu wrote:
> > I could have explained this better, sorry! I will explain the first
> > case below; other cases are similar.
> >
> > In copy_user_to_fpregs_zeroing(), we have:
> >
> > if (user_xsave()) {
> > ...
> > if (unlikely(init_bv))
> > copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
> > return copy_user_to_xregs(buf, xbv);
> > ...
> > }
> >
> > The copy_user_to_xregs() may fail, and when that happens, before going to
> > the slow path, there is fpregs_unlock() and context switches may happen.
>
> The context switch may only happen after fpregs_unlock().
>
> > However, at this point, fpu_fpregs_owner_ctx has not been changed; it could
> > still be another task's FPU.
>
> TIF_NEED_FPU_LOAD is set for the task in __fpu__restore_sig() and its
> context (__fpu_invalidate_fpregs_state()) has been invalidated. So the
> FPU register may contain another task's content and
> fpu_fpregs_owner_ctx points to another context.
>
> > For this to happen and to be detected, the user
> > stack page needs to be non-present, fpu_fpregs_owner_ctx need to be another task,
> > and that other task needs to be able to detect its registers are modified.
> > The last factor is not easy to reproduce, and a CET control-protection fault
> > helps.
>
> So far everything is legal. However. If there is a context switch before
> fpregs_lock() then this is bad before we don't account for that.
> So that:
>
> diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
> --- a/arch/x86/kernel/fpu/signal.c
> +++ b/arch/x86/kernel/fpu/signal.c
> @@ -352,6 +352,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
> fpregs_unlock();
> return 0;
> }
> + fpregs_deactivate(fpu);
> fpregs_unlock();
> }
>
> @@ -403,6 +404,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
> }
> if (!ret)
> fpregs_mark_activate();
> + else
> + fpregs_deactivate(fpu);
> fpregs_unlock();
>
> err_out:
>
>
> Should be enough.
Yes, this works. But then everywhere that calls copy_*_to_xregs_*() etc. needs to be checked.
Are there other alternatives?
Yu-cheng
On 2019-12-19 08:44:08 [-0800], Yu-cheng Yu wrote:
> Yes, this works. But then everywhere that calls copy_*_to_xregs_*() etc. needs to be checked.
> Are there other alternatives?
I don't like the big hammer approach of your very much. It might make
all it "correct" but then it might lead to more "invalids" then needed.
It also required to export the symbol which I would like to avoid.
So if this patch works for you and you don't find anything else where it
falls apart then I will audit tomorrow all callers which got the
"invalidator" added and check for that angle.
Unless someone here complains big tyme and wants this instead…
> Yu-cheng
Sebastian
On Thu, 2019-12-19 at 18:16 +0100, Sebastian Andrzej Siewior wrote:
> On 2019-12-19 08:44:08 [-0800], Yu-cheng Yu wrote:
> > Yes, this works. But then everywhere that calls copy_*_to_xregs_*() etc. needs to be checked.
> > Are there other alternatives?
>
> I don't like the big hammer approach of your very much. It might make
> all it "correct" but then it might lead to more "invalids" then needed.
> It also required to export the symbol which I would like to avoid.
Copying to registers invalids current fpregs context. It might not cause
extra register loading, because registers are in fact already invalidated
and any task owning the context needs to reload anyway. Setting
fpu_fpregs_owner_ctx is only to let the rest of the kernel know the
fact that already happened.
But, I agree with you the patch does look biggish.
>
> So if this patch works for you and you don't find anything else where it
> falls apart then I will audit tomorrow all callers which got the
> "invalidator" added and check for that angle.
Yes, that works for me. Also, most of these call sites are under fpregs_lock(),
and we could use __cpu_invalidate_fpregs_state().
I was also thinking maybe add warnings when any new code re-introduces the issue,
but not sure where to add that. Do you think that is needed?
Thanks,
Yu-cheng
In __fpu__restore_sig() we need to reset `fpu_fpregs_owner_ctx' if the
FPU state was not fully restored. Otherwise the following may happen
(on the same CPU):
Task A Task B fpu_fpregs_owner_ctx
*active* A.fpu
__fpu__restore_sig()
ctx switch load B.fpu
*active* B.fpu
fpregs_lock()
copy_user_to_fpregs_zeroing()
copy_kernel_to_xregs() *modify*
copy_user_to_xregs() *fails*
fpregs_unlock()
ctx switch skip loading B.fpu,
*active* B.fpu
In the succeess case we set `fpu_fpregs_owner_ctx' to the current task.
In the failure case we *might* have modified the FPU state because we
loaded the init state. In this case we need to reset
`fpu_fpregs_owner_ctx' to ensure that the FPU state of the following
task is loaded from saved state (and not skip because it was the
previous state).
Reset `fpu_fpregs_owner_ctx' after a failure during restore occured to
ensure that the FPU state for the next task is always loaded.
Fixes: 5f409e20b7945 ("x86/fpu: Defer FPU state load until return to userspace")
Reported-by: Yu-cheng Yu <[email protected]>
Debugged-by: Yu-cheng Yu <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
arch/x86/kernel/fpu/signal.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 0071b794ed193..400a05e1c1c51 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -352,6 +352,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
fpregs_unlock();
return 0;
}
+ fpregs_deactivate(fpu);
fpregs_unlock();
}
@@ -403,6 +404,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
}
if (!ret)
fpregs_mark_activate();
+ else
+ fpregs_deactivate(fpu);
fpregs_unlock();
err_out:
--
2.24.1
On 2019-12-19 09:40:06 [-0800], Yu-cheng Yu wrote:
> On Thu, 2019-12-19 at 18:16 +0100, Sebastian Andrzej Siewior wrote:
> > On 2019-12-19 08:44:08 [-0800], Yu-cheng Yu wrote:
> > > Yes, this works. But then everywhere that calls copy_*_to_xregs_*() etc. needs to be checked.
> > > Are there other alternatives?
> >
> > I don't like the big hammer approach of your very much. It might make
> > all it "correct" but then it might lead to more "invalids" then needed.
> > It also required to export the symbol which I would like to avoid.
>
> Copying to registers invalids current fpregs context. It might not cause
> extra register loading, because registers are in fact already invalidated
> and any task owning the context needs to reload anyway. Setting
> fpu_fpregs_owner_ctx is only to let the rest of the kernel know the
> fact that already happened.
>
> But, I agree with you the patch does look biggish.
Now that I looked at it:
All kernel loads don't fail. If they fail we end up in the handler and
restore to init-state. So no need to reset `fpu_fpregs_owner_ctx' in this
case. The variable is actually set to task's FPU state so resetting is
not required.
fpu__save() invokes copy_kernel_to_fpregs() (on older boxes) and by
resetting `fpu_fpregs_owner_ctx' we would load it twice (in fpu__save()
and on return to userland).
So far I can tell, the only problematic case is the signal code because
here the state restore *may* fail and we *may* do it in two steps. The
error happens only if both `may' are true.
> > So if this patch works for you and you don't find anything else where it
> > falls apart then I will audit tomorrow all callers which got the
> > "invalidator" added and check for that angle.
>
> Yes, that works for me. Also, most of these call sites are under fpregs_lock(),
> and we could use __cpu_invalidate_fpregs_state().
> I was also thinking maybe add warnings when any new code re-introduces the issue,
> but not sure where to add that. Do you think that is needed?
I was thinking about it. So the `read-FPU-state' function must be
invoked within the fpregs_lock() section. This could be easily
enforced. At fpregs_unlock() time `fpu_fpregs_owner_ctx' must be NULL or
pointing to task's FPU.
My brain is fried for today so I'm sure if this is a sane approach. But
it might be a start.
> Thanks,
> Yu-cheng
Sebastian
On Fri, 2019-12-20 at 21:16 +0100, Sebastian Andrzej Siewior wrote:
> [...]
> Now that I looked at it:
> All kernel loads don't fail. If they fail we end up in the handler and
> restore to init-state. So no need to reset `fpu_fpregs_owner_ctx' in this
> case. The variable is actually set to task's FPU state so resetting is
> not required.
Agree.
> fpu__save() invokes copy_kernel_to_fpregs() (on older boxes) and by
> resetting `fpu_fpregs_owner_ctx' we would load it twice (in fpu__save()
> and on return to userland).
That is true.
> So far I can tell, the only problematic case is the signal code because
> here the state restore *may* fail and we *may* do it in two steps. The
> error happens only if both `may' are true.
>
> > > So if this patch works for you and you don't find anything else where it
> > > falls apart then I will audit tomorrow all callers which got the
> > > "invalidator" added and check for that angle.
> >
> > Yes, that works for me. Also, most of these call sites are under fpregs_lock(),
> > and we could use __cpu_invalidate_fpregs_state().
> > I was also thinking maybe add warnings when any new code re-introduces the issue,
> > but not sure where to add that. Do you think that is needed?
>
> I was thinking about it. So the `read-FPU-state' function must be
> invoked within the fpregs_lock() section. This could be easily
> enforced. At fpregs_unlock() time `fpu_fpregs_owner_ctx' must be NULL or
> pointing to task's FPU.
> My brain is fried for today so I'm sure if this is a sane approach. But
> it might be a start.
I will also think about it. Thanks!
Yu-cheng
The following commit has been merged into the x86/fpu branch of tip:
Commit-ID: bbc55341b9c67645d1a5471506370caf7dd4a203
Gitweb: https://git.kernel.org/tip/bbc55341b9c67645d1a5471506370caf7dd4a203
Author: Sebastian Andrzej Siewior <[email protected]>
AuthorDate: Fri, 20 Dec 2019 20:59:06 +01:00
Committer: Borislav Petkov <[email protected]>
CommitterDate: Tue, 07 Jan 2020 13:44:42 +01:00
x86/fpu: Deactivate FPU state after failure during state load
In __fpu__restore_sig(), fpu_fpregs_owner_ctx needs to be reset if the
FPU state was not fully restored. Otherwise the following may happen (on
the same CPU):
Task A Task B fpu_fpregs_owner_ctx
*active* A.fpu
__fpu__restore_sig()
ctx switch load B.fpu
*active* B.fpu
fpregs_lock()
copy_user_to_fpregs_zeroing()
copy_kernel_to_xregs() *modify*
copy_user_to_xregs() *fails*
fpregs_unlock()
ctx switch skip loading B.fpu,
*active* B.fpu
In the success case, fpu_fpregs_owner_ctx is set to the current task.
In the failure case, the FPU state might have been modified by loading
the init state.
In this case, fpu_fpregs_owner_ctx needs to be reset in order to ensure
that the FPU state of the following task is loaded from saved state (and
not skipped because it was the previous state).
Reset fpu_fpregs_owner_ctx after a failure during restore occurred, to
ensure that the FPU state for the next task is always loaded.
The problem was debugged-by Yu-cheng Yu <[email protected]>.
[ bp: Massage commit message. ]
Fixes: 5f409e20b7945 ("x86/fpu: Defer FPU state load until return to userspace")
Reported-by: Yu-cheng Yu <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jann Horn <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: "Ravi V. Shankar" <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: x86-ml <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/kernel/fpu/signal.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 0071b79..400a05e 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -352,6 +352,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
fpregs_unlock();
return 0;
}
+ fpregs_deactivate(fpu);
fpregs_unlock();
}
@@ -403,6 +404,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
}
if (!ret)
fpregs_mark_activate();
+ else
+ fpregs_deactivate(fpu);
fpregs_unlock();
err_out:
> On Jan 7, 2020, at 2:52 AM, tip-bot2 for Sebastian Andrzej Siewior <[email protected]> wrote:
>
> The following commit has been merged into the x86/fpu branch of tip:
>
> Commit-ID: bbc55341b9c67645d1a5471506370caf7dd4a203
> Gitweb: https://git.kernel.org/tip/bbc55341b9c67645d1a5471506370caf7dd4a203
> Author: Sebastian Andrzej Siewior <[email protected]>
> AuthorDate: Fri, 20 Dec 2019 20:59:06 +01:00
> Committer: Borislav Petkov <[email protected]>
> CommitterDate: Tue, 07 Jan 2020 13:44:42 +01:00
>
> x86/fpu: Deactivate FPU state after failure during state load
>
> In __fpu__restore_sig(), fpu_fpregs_owner_ctx needs to be reset if the
> FPU state was not fully restored. Otherwise the following may happen (on
> the same CPU):
>
> Task A Task B fpu_fpregs_owner_ctx
> *active* A.fpu
> __fpu__restore_sig()
> ctx switch load B.fpu
> *active* B.fpu
> fpregs_lock()
> copy_user_to_fpregs_zeroing()
> copy_kernel_to_xregs() *modify*
> copy_user_to_xregs() *fails*
> fpregs_unlock()
> ctx switch skip loading B.fpu,
> *active* B.fpu
>
> In the success case, fpu_fpregs_owner_ctx is set to the current task.
>
> In the failure case, the FPU state might have been modified by loading
> the init state.
>
> In this case, fpu_fpregs_owner_ctx needs to be reset in order to ensure
> that the FPU state of the following task is loaded from saved state (and
> not skipped because it was the previous state).
>
> Reset fpu_fpregs_owner_ctx after a failure during restore occurred, to
> ensure that the FPU state for the next task is always loaded.
>
> The problem was debugged-by Yu-cheng Yu <[email protected]>.
Wow, __fpu__restore_sig is a mess. We have __copy_from... that is Obviously Incorrect (tm) even though it’s not obviously exploitable. (It’s wrong because the *wrong pointer* is checked with access_ok().). We have a fast path that will execute just enough of the time to make debugging the slow path really annoying. (We should probably delete the fast path.) There are pagefault_disable() call in there mostly to confuse people. (So we take a fault and sleep — big deal. We have temporarily corrupt state, but no one will ever read it. The retry after sleeping will clobber xstate, but lazy save is long gone and this should be fine now. The real issue is that, if we’re preempted after a successful a successful restore, then the new state will get lost.)
So either we should delete the fast path or we should make it work reliably and delete the slow path. And we should get rid of the __copy. And we should have some test cases.
BTW, how was the bug in here discovered? It looks like it only affects signal restore failure, which is usually not survivable unless the user program is really trying.
>
> [ bp: Massage commit message. ]
>
> Fixes: 5f409e20b7945 ("x86/fpu: Defer FPU state load until return to userspace")
> Reported-by: Yu-cheng Yu <[email protected]>
> Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
> Signed-off-by: Borislav Petkov <[email protected]>
> Cc: Andy Lutomirski <[email protected]>
> Cc: Dave Hansen <[email protected]>
> Cc: Fenghua Yu <[email protected]>
> Cc: "H. Peter Anvin" <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Jann Horn <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: "Ravi V. Shankar" <[email protected]>
> Cc: Rik van Riel <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: Tony Luck <[email protected]>
> Cc: x86-ml <[email protected]>
> Link: https://lkml.kernel.org/r/[email protected]
> ---
> arch/x86/kernel/fpu/signal.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
> index 0071b79..400a05e 100644
> --- a/arch/x86/kernel/fpu/signal.c
> +++ b/arch/x86/kernel/fpu/signal.c
> @@ -352,6 +352,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
> fpregs_unlock();
> return 0;
> }
> + fpregs_deactivate(fpu);
> fpregs_unlock();
> }
>
> @@ -403,6 +404,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
> }
> if (!ret)
> fpregs_mark_activate();
> + else
> + fpregs_deactivate(fpu);
> fpregs_unlock();
>
> err_out:
On Tue, 2020-01-07 at 10:41 -1000, Andy Lutomirski wrote:
> > On Jan 7, 2020, at 2:52 AM, tip-bot2 for Sebastian Andrzej Siewior <[email protected]> wrote:
> >
> > The following commit has been merged into the x86/fpu branch of tip:
> >
> > Commit-ID: bbc55341b9c67645d1a5471506370caf7dd4a203
> > Gitweb: https://git.kernel.org/tip/bbc55341b9c67645d1a5471506370caf7dd4a203
> > Author: Sebastian Andrzej Siewior <[email protected]>
> > AuthorDate: Fri, 20 Dec 2019 20:59:06 +01:00
> > Committer: Borislav Petkov <[email protected]>
> > CommitterDate: Tue, 07 Jan 2020 13:44:42 +01:00
> >
> > x86/fpu: Deactivate FPU state after failure during state load
> >
> > In __fpu__restore_sig(), fpu_fpregs_owner_ctx needs to be reset if the
> > FPU state was not fully restored. Otherwise the following may happen (on
> > the same CPU):
> >
> > Task A Task B fpu_fpregs_owner_ctx
> > *active* A.fpu
> > __fpu__restore_sig()
> > ctx switch load B.fpu
> > *active* B.fpu
> > fpregs_lock()
> > copy_user_to_fpregs_zeroing()
> > copy_kernel_to_xregs() *modify*
> > copy_user_to_xregs() *fails*
> > fpregs_unlock()
> > ctx switch skip loading B.fpu,
> > *active* B.fpu
> >
> > In the success case, fpu_fpregs_owner_ctx is set to the current task.
> >
> > In the failure case, the FPU state might have been modified by loading
> > the init state.
> >
> > In this case, fpu_fpregs_owner_ctx needs to be reset in order to ensure
> > that the FPU state of the following task is loaded from saved state (and
> > not skipped because it was the previous state).
> >
> > Reset fpu_fpregs_owner_ctx after a failure during restore occurred, to
> > ensure that the FPU state for the next task is always loaded.
> >
> > The problem was debugged-by Yu-cheng Yu <[email protected]>.
>
> Wow, __fpu__restore_sig is a mess. We have __copy_from... that is Obviously Incorrect (tm) even though it’s not obviously exploitable. (It’s wrong because the *wrong pointer* is checked with access_ok().). We have a fast path that will execute just enough of the time to make debugging the slow path really annoying. (We should probably delete the fast path.) There are pagefault_disable() call in there mostly to confuse people. (So we take a fault and sleep — big deal. We have temporarily corrupt state, but no one will ever read it. The retry after sleeping will clobber xstate, but lazy save is long gone and this should be fine now. The real issue is that, if we’re preempted after a successful a successful restore, then the new state will get lost.)
>
> So either we should delete the fast path or we should make it work reliably and delete the slow path. And we should get rid of the __copy. And we should have some test cases.
>
> BTW, how was the bug in here discovered? It looks like it only affects signal restore failure, which is usually not survivable unless the user program is really trying.
It causes corruption in other tasks, e.g. a non-CET task gets a control-protection fault.
Yu-cheng
On 2020-01-07 10:41:52 [-1000], Andy Lutomirski wrote:
> Wow, __fpu__restore_sig is a mess. We have __copy_from... that is
> Obviously Incorrect (tm) even though it’s not obviously exploitable.
> (It’s wrong because the *wrong pointer* is checked with access_ok().).
> We have a fast path that will execute just enough of the time to make
> debugging the slow path really annoying. (We should probably delete
> the fast path.) There are pagefault_disable() call in there mostly to
> confuse people. (So we take a fault and sleep — big deal. We have
> temporarily corrupt state, but no one will ever read it. The retry
> after sleeping will clobber xstate, but lazy save is long gone and
> this should be fine now. The real issue is that, if we’re preempted
> after a successful a successful restore, then the new state will get
> lost.)
There is preempt_disable() as part of FPU locking since we can't change
the content of the FPU registers (CPU's or within task's state) and get
interrupted by task preemption. With disabled preemption we can't take a
page fault.
We need to load the page from userland which may fault. The context
switch saves _current_ FPU state only if TIF_NEED_FPU_LOAD is cleared.
This needs to happen atomic.
The fast path may fail if stack is not faulted-in (custom stack,
madvise(,, MADV_DONTNEED))
> So either we should delete the fast path or we should make it work
> reliably and delete the slow path. And we should get rid of the
> __copy. And we should have some test cases.
without the fastpath the average case is too slow.
People-complained-about-this-slow. That is why we ended up with the
fastpath in the last revision of the series.
The go people contirbuted a testcase. Maybe I should hack up it up so
that we trigger each path and post since it obviously did not happen.
Boris, do you remember why we did not include their testcase yet?
> BTW, how was the bug in here discovered? It looks like it only
> affects signal restore failure, which is usually not survivable unless
> the user program is really trying.
The glibc test suite.
Sebastian
On Tue, Jan 07, 2020 at 10:11:34PM +0100, Sebastian Andrzej Siewior wrote:
> The go people contirbuted a testcase. Maybe I should hack up it up so
> that we trigger each path and post since it obviously did not happen.
Yes, please do.
> Boris, do you remember why we did not include their testcase yet?
It is in my ever-growing TODO list so if you could find some time to add
it to selftests, I'd appreciate it a lot:
https://lkml.kernel.org/r/[email protected]
And Andy wants it in the slow category (see reply) and that's fine with
me - I'm happy with some tests than none at all.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Tue, Jan 07, 2020 at 10:41:52AM -1000, Andy Lutomirski wrote:
> Wow, __fpu__restore_sig is a mess.
FWIW, I share your sentiment and I'd very much take patches which
simplify that flow. It is causing me headaches each time I have to look
at it.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette