Greeting,
FYI, we noticed the following commit (built with gcc-11):
commit: 9c5743dff415a7384669229d327702ea9bd45560 ("x86/uaccess: fix code generation in put_user()")
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git master
in testcase: boot
on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
3cb12d27ff655e57 9c5743dff415a7384669229d327
---------------- ---------------------------
fail:runs %reproduction fail:runs
| | |
:50 82% 41:52 dmesg.Kernel_panic-not_syncing:Attempted_to_kill_init!exitcode=
:50 82% 41:52 dmesg.RIP:copy_kernel_to_fpregs
:50 82% 41:52 dmesg.RIP:ex_handler_fprestore
:50 82% 41:52 dmesg.WARNING:at_arch/x86/mm/extable.c:#ex_handler_fprestore
If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>
[ 266.823123][ T1] WARNING: CPU: 0 PID: 1 at arch/x86/mm/extable.c:65 ex_handler_fprestore (??:?)
[ 266.827557][ T1] Modules linked in:
[ 266.828722][ T1] CPU: 0 PID: 1 Comm: init Not tainted 5.9.0-13419-g9c5743dff415 #1
[ 266.830775][ T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-4 04/01/2014
[ 266.833337][ T1] RIP: 0010:ex_handler_fprestore (??:?)
[ 266.834643][ T1] Code: c7 c7 d0 73 75 89 48 83 05 e2 8b 19 0b 01 48 83 05 e2 8b 19 0b 01 c6 05 85 89 e9 09 01 e8 c0 97 0e 07 48 83 05 d6 8b 19 0b 01 <0f> 0b 48 83 05 d4 8b 19 0b 01 48 83 05 d4 8b 19 0b 01 eb a5 66 90
All code
========
0: c7 c7 d0 73 75 89 mov $0x897573d0,%edi
6: 48 83 05 e2 8b 19 0b addq $0x1,0xb198be2(%rip) # 0xb198bf0
d: 01
e: 48 83 05 e2 8b 19 0b addq $0x1,0xb198be2(%rip) # 0xb198bf8
15: 01
16: c6 05 85 89 e9 09 01 movb $0x1,0x9e98985(%rip) # 0x9e989a2
1d: e8 c0 97 0e 07 callq 0x70e97e2
22: 48 83 05 d6 8b 19 0b addq $0x1,0xb198bd6(%rip) # 0xb198c00
29: 01
2a:* 0f 0b ud2 <-- trapping instruction
2c: 48 83 05 d4 8b 19 0b addq $0x1,0xb198bd4(%rip) # 0xb198c08
33: 01
34: 48 83 05 d4 8b 19 0b addq $0x1,0xb198bd4(%rip) # 0xb198c10
3b: 01
3c: eb a5 jmp 0xffffffffffffffe3
3e: 66 90 xchg %ax,%ax
Code starting with the faulting instruction
===========================================
0: 0f 0b ud2
2: 48 83 05 d4 8b 19 0b addq $0x1,0xb198bd4(%rip) # 0xb198bde
9: 01
a: 48 83 05 d4 8b 19 0b addq $0x1,0xb198bd4(%rip) # 0xb198be6
11: 01
12: eb a5 jmp 0xffffffffffffffb9
14: 66 90 xchg %ax,%ax
[ 266.838800][ T1] RSP: 0018:ffffc90000013b90 EFLAGS: 00010202
[ 266.840342][ T1] RAX: 0000000000000000 RBX: ffffc90000013c68 RCX: ffffffff8cfb0490
[ 266.842462][ T1] RDX: 0000000000000049 RSI: 0000ffff8c2fbc60 RDI: ffffffff8c2fbc60
[ 266.844350][ T1] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000ffff8c2fbc60
[ 266.846289][ T1] R10: 0000ffffffffffff R11: 0000ffff8c2fbc67 R12: 0000000000000000
[ 266.848067][ T1] R13: 000000000000000d R14: 0000000000000000 R15: 0000000000000000
[ 266.849937][ T1] FS: 00007f32496c4700(0000) GS:ffffffff89ce0000(0000) knlGS:0000000000000000
[ 266.852121][ T1] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 266.853840][ T1] CR2: 00007f32489102c0 CR3: 0000000112b48000 CR4: 00000000000406b0
[ 266.855611][ T1] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 266.857677][ T1] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 266.859403][ T1] Call Trace:
[ 266.875421][ T1] fixup_exception (??:?)
[ 266.876756][ T1] exc_general_protection (??:?)
[ 266.878040][ T1] asm_exc_general_protection (??:?)
[ 266.879246][ T1] RIP: 0010:copy_kernel_to_fpregs (core.c:?)
[ 266.880748][ T1] Code: 05 31 84 1e 0b 48 c7 c7 50 47 2b 8c 48 8d 58 01 e8 c1 80 5c 00 b8 ff ff ff ff 48 89 1d 15 84 1e 0b 4c 89 e7 89 c2 48 0f ae 2f <48> c7 c7 58 47 2b 8c e8 60 82 5c 00 48 8b 05 01 84 1e 0b 48 c7 c7
All code
========
0: 05 31 84 1e 0b add $0xb1e8431,%eax
5: 48 c7 c7 50 47 2b 8c mov $0xffffffff8c2b4750,%rdi
c: 48 8d 58 01 lea 0x1(%rax),%rbx
10: e8 c1 80 5c 00 callq 0x5c80d6
15: b8 ff ff ff ff mov $0xffffffff,%eax
1a: 48 89 1d 15 84 1e 0b mov %rbx,0xb1e8415(%rip) # 0xb1e8436
21: 4c 89 e7 mov %r12,%rdi
24: 89 c2 mov %eax,%edx
26: 48 0f ae 2f xrstor64 (%rdi)
2a:* 48 c7 c7 58 47 2b 8c mov $0xffffffff8c2b4758,%rdi <-- trapping instruction
31: e8 60 82 5c 00 callq 0x5c8296
36: 48 8b 05 01 84 1e 0b mov 0xb1e8401(%rip),%rax # 0xb1e843e
3d: 48 rex.W
3e: c7 .byte 0xc7
3f: c7 .byte 0xc7
Code starting with the faulting instruction
===========================================
0: 48 c7 c7 58 47 2b 8c mov $0xffffffff8c2b4758,%rdi
7: e8 60 82 5c 00 callq 0x5c826c
c: 48 8b 05 01 84 1e 0b mov 0xb1e8401(%rip),%rax # 0xb1e8414
13: 48 rex.W
14: c7 .byte 0xc7
15: c7 .byte 0xc7
[ 266.884719][ T1] RSP: 0018:ffffc90000013d10 EFLAGS: 00010246
[ 266.886191][ T1] RAX: 00000000ffffffff RBX: 0000000000000001 RCX: ffffffff8cfb0458
[ 266.887841][ T1] RDX: 00000000ffffffff RSI: 0000ffff8c2b4750 RDI: ffff888100b42600
[ 266.893767][ T1] RBP: ffffffff8b042900 R08: 0000000000000000 R09: 0000ffff8c2b4750
[ 266.895513][ T1] R10: 0000ffffffffffff R11: 0000ffff8c2b4757 R12: ffff888100b42600
[ 266.897361][ T1] R13: ffff8881129e65c0 R14: ffff888100b424d0 R15: 00007ffcca9e0e00
[ 266.899210][ T1] __fpregs_load_activate (core.c:?)
[ 266.900418][ T1] copy_fpstate_to_sigframe (??:?)
[ 266.901947][ T1] get_sigframe+0x196/0x360
[ 266.903138][ T1] __setup_rt_frame (signal.c:?)
[ 266.904162][ T1] setup_rt_frame (signal.c:?)
[ 266.905386][ T1] handle_signal (signal.c:?)
[ 266.906423][ T1] arch_do_signal (??:?)
[ 266.907548][ T1] exit_to_user_mode_loop (common.c:?)
[ 266.908932][ T1] exit_to_user_mode_prepare (common.c:?)
[ 266.910354][ T1] syscall_exit_to_user_mode (??:?)
[ 266.911655][ T1] do_syscall_64 (??:?)
[ 266.912727][ T1] entry_SYSCALL_64_after_hwframe (??:?)
[ 266.914026][ T1] RIP: 0033:0x7f32488b5700
[ 266.915046][ T1] Code: 76 05 e9 f3 fd ff ff 48 8b 05 3c f7 37 00 64 c7 00 16 00 00 00 83 c8 ff c3 90 41 ba 08 00 00 00 48 63 ff b8 0e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 02 f3 c3 48 8b 15 0f f7 37 00 f7 d8 64 89 02
All code
========
0: 76 05 jbe 0x7
2: e9 f3 fd ff ff jmpq 0xfffffffffffffdfa
7: 48 8b 05 3c f7 37 00 mov 0x37f73c(%rip),%rax # 0x37f74a
e: 64 c7 00 16 00 00 00 movl $0x16,%fs:(%rax)
15: 83 c8 ff or $0xffffffff,%eax
18: c3 retq
19: 90 nop
1a: 41 ba 08 00 00 00 mov $0x8,%r10d
20: 48 63 ff movslq %edi,%rdi
23: b8 0e 00 00 00 mov $0xe,%eax
28: 0f 05 syscall
2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <-- trapping instruction
30: 77 02 ja 0x34
32: f3 c3 repz retq
34: 48 8b 15 0f f7 37 00 mov 0x37f70f(%rip),%rdx # 0x37f74a
3b: f7 d8 neg %eax
3d: 64 89 02 mov %eax,%fs:(%rdx)
Code starting with the faulting instruction
===========================================
0: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
6: 77 02 ja 0xa
8: f3 c3 repz retq
a: 48 8b 15 0f f7 37 00 mov 0x37f70f(%rip),%rdx # 0x37f720
11: f7 d8 neg %eax
13: 64 89 02 mov %eax,%fs:(%rdx)
To reproduce:
# build kernel
cd linux
cp config-5.9.0-13419-g9c5743dff415 .config
make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
cd <mod-install-dir>
find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz
git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email
# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.
--
0-DAY CI Kernel Test Service
https://01.org/lkp
On Fri, May 13, 2022 at 1:55 AM kernel test robot <[email protected]> wrote:
>
> FYI, we noticed the following commit (built with gcc-11): commit
> 9c5743dff415 ("x86/uaccess: fix code generation in put_user()")
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git master
>
> in testcase: boot
>
> on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
>
> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
Hmm. It sounds unlikely that _that_ commit caused the problem,
although tweaks to generate different code can obviously always expose
anything..
But considering that the fail:runs thing is 41:52, I suspect it's
something very timing-dependent and who knows how reliable the
bisection has been.
That commit did have some discussion about how to possibly do it more
nicely without the "register asm" thing, but I'm not finding anything
else about it, so I don't think it caused any actual real code
generation problems.
As such, it seems unlikely to then cause this FP state restore issue..
> [ 266.823123][ T1] WARNING: CPU: 0 PID: 1 at arch/x86/mm/extable.c:65 ex_handler_fprestore (??:?)
This is just
65 WARN_ONCE(1, "Bad FPU state detected at %pB,
reinitializing FPU registers.",
66 (void *)instruction_pointer(regs));
which isn't great, in that it implies that there was bad fp state to
restore in the first place.
But that can technically happen when user space does something bad
too, notably when it has used ptrace to change the FP state.
See commit d5c8028b4788 ("x86/fpu: Reinitialize FPU registers if
restoring FPU state fails") for more details.
And *this* part:
> [ 266.879246][ T1] RIP: 0010:copy_kernel_to_fpregs (core.c:?)
> [ 266.880748][ T1] Code: 05 31 84 1e 0b 48 c7 c7 50 47 2b 8c 48 8d 58 01 e8 c1 80 5c 00 b8 ff ff ff ff 48 89 1d 15 84 1e 0b 4c 89 e7 89 c2 48 0f ae 2f <48> c7 c7 58 47 2b 8c e8 60 82 5c 00 48 8b 05 01 84 1e 0b 48 c7 c7
> All code
> ========
> 0: 05 31 84 1e 0b add $0xb1e8431,%eax
> 5: 48 c7 c7 50 47 2b 8c mov $0xffffffff8c2b4750,%rdi
> c: 48 8d 58 01 lea 0x1(%rax),%rbx
> 10: e8 c1 80 5c 00 callq 0x5c80d6
> 15: b8 ff ff ff ff mov $0xffffffff,%eax
> 1a: 48 89 1d 15 84 1e 0b mov %rbx,0xb1e8415(%rip) # 0xb1e8436
> 21: 4c 89 e7 mov %r12,%rdi
> 24: 89 c2 mov %eax,%edx
> 26: 48 0f ae 2f xrstor64 (%rdi)
> 2a:* 48 c7 c7 58 47 2b 8c mov $0xffffffff8c2b4758,%rdi <-- trapping instruction
Seems to be just the exception stack chain (ie notice how it's
pointing to the instruction after the xrstor64, it's not that the
immediate register move really trapped).
> [ 266.899210][ T1] __fpregs_load_activate (core.c:?)
> [ 266.900418][ T1] copy_fpstate_to_sigframe (??:?)
> [ 266.901947][ T1] get_sigframe+0x196/0x360
> [ 266.903138][ T1] __setup_rt_frame (signal.c:?)
> [ 266.904162][ T1] setup_rt_frame (signal.c:?)
> [ 266.905386][ T1] handle_signal (signal.c:?)
> [ 266.906423][ T1] arch_do_signal (??:?)
.. and it is in the signal handling path when returning to user space. Hmm.
And then again, we have the exception stack entry all the way to user space:
> [ 266.914026][ T1] RIP: 0033:0x7f32488b5700
> [ 266.915046][ T1] Code: 76 05 e9 f3 fd ff ff 48 8b 05 3c f7 37 00 64 c7 00 16 00 00 00 83 c8 ff c3 90 41 ba 08 00 00 00 48 63 ff b8 0e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 02 f3 c3 48 8b 15 0f f7 37 00 f7 d8 64 89 02
> All code
> ========
> 0: 76 05 jbe 0x7
> 2: e9 f3 fd ff ff jmpq 0xfffffffffffffdfa
> 7: 48 8b 05 3c f7 37 00 mov 0x37f73c(%rip),%rax # 0x37f74a
> e: 64 c7 00 16 00 00 00 movl $0x16,%fs:(%rax)
> 15: 83 c8 ff or $0xffffffff,%eax
> 18: c3 retq
> 19: 90 nop
> 1a: 41 ba 08 00 00 00 mov $0x8,%r10d
> 20: 48 63 ff movslq %edi,%rdi
> 23: b8 0e 00 00 00 mov $0xe,%eax
> 28: 0f 05 syscall
> 2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <-- trapping instruction
and again, it's just pointing back to after the 'syscall' instruction
that caused this whole chain of events.
Anyway, I *think* that what may be going on is some ptrace thing, but
let's bring in other people. Because I don't think that "x86/uaccess:
fix code generation in put_user()" commit is what triggered this, but
who knows.. The x86 FP code can be very grotty.
On Fri, May 13 2022 at 09:52, Linus Torvalds wrote:
> On Fri, May 13, 2022 at 1:55 AM kernel test robot <[email protected]> wrote:
> But considering that the fail:runs thing is 41:52, I suspect it's
> something very timing-dependent and who knows how reliable the
> bisection has been.
This smells very much like the issue which got fixed with
59f5ede3bc0f ("x86/fpu: Prevent FPU state corruption")
which resulted in the very same stack trace pattern because the restore
detects the fpstate corruption. The sigframe setup does:
if (TIF_NEED_FPU_LOAD)
restore();
save_to_sigframe();
But yes, in theory it might be caused by ptrace as well. See below.
>> 24: 89 c2 mov %eax,%edx
>> 26: 48 0f ae 2f xrstor64 (%rdi)
>> 2a:* 48 c7 c7 58 47 2b 8c mov $0xffffffff8c2b4758,%rdi <-- trapping instruction
>
> Seems to be just the exception stack chain (ie notice how it's
> pointing to the instruction after the xrstor64, it's not that the
> immediate register move really trapped).
which is caused by ex_handler_fprestore() itself because it stupidly
fixes up regs->ip _before_ the warning. This should obviously be done
afterwards. Without that fixup it would point at xrstor64.
>> 28: 0f 05 syscall
>> 2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <-- trapping instruction
>
> and again, it's just pointing back to after the 'syscall' instruction
> that caused this whole chain of events.
>
> Anyway, I *think* that what may be going on is some ptrace thing, but
> let's bring in other people. Because I don't think that "x86/uaccess:
> fix code generation in put_user()" commit is what triggered this, but
> who knows.. The x86 FP code can be very grotty.
Courtesy to the corresponding hardware...
The code which copies the ptrace supplied state has a pile of sanity
checks to catch invalid state, but I wouldn't bet my hat on it that it's
100% complete. We can be more defensive here, but I would be surprised.
Something like the untested below. I'll expose it to some testing to see
what explodes.
Thanks,
tglx
---
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 39e1c8626ab9..c1228d6391c8 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1248,7 +1248,48 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
*/
int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
{
- return copy_uabi_to_xstate(fpstate, kbuf, NULL);
+ struct fpstate *tmpfps;
+ unsigned int fpsize;
+ int ret;
+
+ /* This cannot operate on current's fpstate */
+ if (WARN_ON_ONCE(fpstate == current->thread.fpu.fpstate))
+ return -EPERM;
+
+ /* Use a temporary fpstate for the xrstor validation below */
+ fpsize = fpstate->size + ALIGN(offsetof(struct fpstate, regs), 64);
+ tmpfps = vmalloc(fpsize);
+ if (!tmpfps)
+ return -ENOMEM;
+ memcpy(tmpfps, fpstate, fpstate->size);
+
+ ret = copy_uabi_to_xstate(tmpfps, kbuf, NULL);
+ if (ret)
+ goto out;
+ /*
+ * Ensure right here that the user space provided xstate content is
+ * correct. Save current's fpstate and invalidate the per-CPU FPU
+ * state.
+ */
+ kernel_fpu_begin_mask(0);
+ /*
+ * Limit the restore attempt to the user features as fpstate
+ * is not current's fpstate. So current's supervisor state
+ * has to be preserved and the target's supervisor state was
+ * not touched in copy_uabi_to_xstate().
+ */
+ ret = os_xrstor_safe(tmpfps, tmpfps->user_xfeatures);
+ kernel_fpu_end();
+ /*
+ * If the restore succeeded, copy the state. Otherwise
+ * keep the previous content.
+ */
+ if (!ret)
+ memcpy(fpstate, tmpfps, fpstate->size);
+
+out:
+ vfree(tmpfps);
+ return ret;
}
/*
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index dba2197c05c3..c0d852998d18 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -68,11 +68,10 @@ static bool ex_handler_sgx(const struct exception_table_entry *fixup,
static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
struct pt_regs *regs)
{
- regs->ip = ex_fixup_addr(fixup);
-
WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
(void *)instruction_pointer(regs));
+ regs->ip = ex_fixup_addr(fixup);
fpu_reset_from_exception_fixup();
return true;
}