On Tue, Apr 19, 2022 at 6:52 PM Pawan Gupta
<[email protected]> wrote:
>
> From: Borislav Petkov <[email protected]>
>
> When resuming from system sleep state, restore_processor_state()
> restores the boot CPU MSRs. These MSRs could be emulated by the
> microcode. If microcode is not loaded yet, writing to emulated MSRs
> leads to unchecked MSR access error:
>
> [ 28.702947] PM: Calling lapic_suspend+0x0/0x210
> [ 28.703345] unchecked MSR access error: WRMSR to 0x10f (tried to write 0=x0000000000000000) at rIP: 0xffffffff9b2819e4 (native_write_msr+0x4/0x20)
> [ 28.703357] Call Trace:
> [ 28.703359] <TASK>
> [ 28.703361] ? restore_processor_state+0x255/0x2d0
> [ 28.703369] x86_acpi_suspend_lowlevel+0x11f/0x170
> [ 28.703374] acpi_suspend_enter+0x4f/0x1f0
> [ 28.703379] suspend_devices_and_enter+0x6e0/0x7d0
> [ 28.703384] pm_suspend.cold+0x35c/0x3a7
> [ 28.703388] state_store+0x81/0xe0
> [ 28.703392] kobj_attr_store+0x12/0x20
> [ 28.703396] sysfs_kf_write+0x3f/0x50
> [ 28.703399] kernfs_fop_write_iter+0x13b/0x1d0
> [ 28.703403] new_sync_write+0x101/0x180
> [ 28.703408] vfs_write+0x217/0x2a0
> [ 28.703413] ksys_write+0x67/0xe0
> [ 28.703417] __x64_sys_write+0x1a/0x20
> [ 28.703421] do_syscall_64+0x3b/0x90
> [ 28.703426] entry_SYSCALL_64_after_hwframe+0x44/0xae
> [ 28.703429] RIP: 0033:0x7fda13c260a7
> [ 28.703434] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 =
> 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48=
> > 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
> [ 28.703437] RSP: 002b:00007fffa4060268 EFLAGS: 00000246 ORIG_RAX: 000000=0000000001
> [ 28.703441] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fda13c=260a7
> [ 28.703443] RDX: 0000000000000004 RSI: 000055a41f65a570 RDI: 00000000000=00004
> [ 28.703444] RBP: 000055a41f65a570 R08: 0000000000000000 R09: 00000000000=00004
> [ 28.703446] R10: 000055a41f0cc2a6 R11: 0000000000000246 R12: 00000000000=00004
> [ 28.703447] R13: 000055a41f657510 R14: 00007fda13d014a0 R15: 00007fda13d=008a0
__restore_processor_state() is used during resume from both
suspend-to-RAM and hibernation, but I don't think that the latter is
affected by the issue at hand, because microcode should be loaded by
the restore kernel in that case. However, loading the boot processor
microcode in __restore_processor_state() during resume from
hibernation still works, although it is redundant in that case.
It would be good to acknowledge the above in the changelog and also
mention in it that the issue is specific to suspend-to-RAM (ACPI S3 on
x86 PC systems).
> To ensure microcode emulated MSRs are available for restoration, load
> the microcode for boot CPU before restoring these MSRs.
>
> Reported-by: Kyle D. Pelton <[email protected]>
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215841
> Fixes: e2a1256b17b1 ("x86/speculation: Restore speculation related MSRs during S3 resume")
> Signed-off-by: Borislav Petkov <[email protected]>
> Signed-off-by: Pawan Gupta <[email protected]>
> Tested-by: Kyle D. Pelton <[email protected]>
> Cc: [email protected]
> ---
> arch/x86/include/asm/microcode.h | 2 ++
> arch/x86/kernel/cpu/microcode/core.c | 6 +++---
> arch/x86/power/cpu.c | 10 +++++++++-
> 3 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
> index d6bfdfb0f0af..0c3d3440fe27 100644
> --- a/arch/x86/include/asm/microcode.h
> +++ b/arch/x86/include/asm/microcode.h
> @@ -131,10 +131,12 @@ extern void __init load_ucode_bsp(void);
> extern void load_ucode_ap(void);
> void reload_early_microcode(void);
> extern bool initrd_gone;
> +void microcode_bsp_resume(void);
> #else
> static inline void __init load_ucode_bsp(void) { }
> static inline void load_ucode_ap(void) { }
> static inline void reload_early_microcode(void) { }
> +static inline void microcode_bsp_resume(void) { }
> #endif
>
> #endif /* _ASM_X86_MICROCODE_H */
> diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
> index f955d25076ba..239ff5fcec6a 100644
> --- a/arch/x86/kernel/cpu/microcode/core.c
> +++ b/arch/x86/kernel/cpu/microcode/core.c
> @@ -758,9 +758,9 @@ static struct subsys_interface mc_cpu_interface = {
> };
>
> /**
> - * mc_bp_resume - Update boot CPU microcode during resume.
> + * microcode_bsp_resume - Update boot CPU microcode during resume.
> */
> -static void mc_bp_resume(void)
> +void microcode_bsp_resume(void)
> {
> int cpu = smp_processor_id();
> struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
> @@ -772,7 +772,7 @@ static void mc_bp_resume(void)
> }
>
> static struct syscore_ops mc_syscore_ops = {
> - .resume = mc_bp_resume,
> + .resume = microcode_bsp_resume,
> };
>
> static int mc_cpu_starting(unsigned int cpu)
> diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
> index 3822666fb73d..bb176c72891c 100644
> --- a/arch/x86/power/cpu.c
> +++ b/arch/x86/power/cpu.c
> @@ -25,6 +25,7 @@
> #include <asm/cpu.h>
> #include <asm/mmu_context.h>
> #include <asm/cpu_device_id.h>
> +#include <asm/microcode.h>
>
> #ifdef CONFIG_X86_32
> __visible unsigned long saved_context_ebx;
> @@ -262,11 +263,18 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
> x86_platform.restore_sched_clock_state();
> mtrr_bp_restore();
> perf_restore_debug_store();
> - msr_restore_context(ctxt);
>
> c = &cpu_data(smp_processor_id());
> if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL))
> init_ia32_feat_ctl(c);
The change of the ordering between the above and msr_restore_context()
needs to be explained in the changelog too.
> +
> + microcode_bsp_resume();
> +
> + /*
> + * This needs to happen after the microcode has been updated upon resume
> + * because some of the MSRs are "emulated" in microcode.
> + */
> + msr_restore_context(ctxt);
> }
>
> /* Needed by apm.c */
> --
On Fri, Apr 22, 2022 at 04:09:57PM +0200, Rafael J. Wysocki wrote:
>On Tue, Apr 19, 2022 at 6:52 PM Pawan Gupta
><[email protected]> wrote:
>>
>> From: Borislav Petkov <[email protected]>
>>
>> When resuming from system sleep state, restore_processor_state()
>> restores the boot CPU MSRs. These MSRs could be emulated by the
>> microcode. If microcode is not loaded yet, writing to emulated MSRs
>> leads to unchecked MSR access error:
>>
>> [ 28.702947] PM: Calling lapic_suspend+0x0/0x210
>> [ 28.703345] unchecked MSR access error: WRMSR to 0x10f (tried to write 0=x0000000000000000) at rIP: 0xffffffff9b2819e4 (native_write_msr+0x4/0x20)
>> [ 28.703357] Call Trace:
>> [ 28.703359] <TASK>
>> [ 28.703361] ? restore_processor_state+0x255/0x2d0
>> [ 28.703369] x86_acpi_suspend_lowlevel+0x11f/0x170
>> [ 28.703374] acpi_suspend_enter+0x4f/0x1f0
>> [ 28.703379] suspend_devices_and_enter+0x6e0/0x7d0
>> [ 28.703384] pm_suspend.cold+0x35c/0x3a7
>> [ 28.703388] state_store+0x81/0xe0
>> [ 28.703392] kobj_attr_store+0x12/0x20
>> [ 28.703396] sysfs_kf_write+0x3f/0x50
>> [ 28.703399] kernfs_fop_write_iter+0x13b/0x1d0
>> [ 28.703403] new_sync_write+0x101/0x180
>> [ 28.703408] vfs_write+0x217/0x2a0
>> [ 28.703413] ksys_write+0x67/0xe0
>> [ 28.703417] __x64_sys_write+0x1a/0x20
>> [ 28.703421] do_syscall_64+0x3b/0x90
>> [ 28.703426] entry_SYSCALL_64_after_hwframe+0x44/0xae
>> [ 28.703429] RIP: 0033:0x7fda13c260a7
>> [ 28.703434] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 =
>> 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48=
>> > 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
>> [ 28.703437] RSP: 002b:00007fffa4060268 EFLAGS: 00000246 ORIG_RAX: 000000=0000000001
>> [ 28.703441] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fda13c=260a7
>> [ 28.703443] RDX: 0000000000000004 RSI: 000055a41f65a570 RDI: 00000000000=00004
>> [ 28.703444] RBP: 000055a41f65a570 R08: 0000000000000000 R09: 00000000000=00004
>> [ 28.703446] R10: 000055a41f0cc2a6 R11: 0000000000000246 R12: 00000000000=00004
>> [ 28.703447] R13: 000055a41f657510 R14: 00007fda13d014a0 R15: 00007fda13d=008a0
>
>__restore_processor_state() is used during resume from both
>suspend-to-RAM and hibernation, but I don't think that the latter is
>affected by the issue at hand, because microcode should be loaded by
>the restore kernel in that case. However, loading the boot processor
>microcode in __restore_processor_state() during resume from
>hibernation still works, although it is redundant in that case.
>
>It would be good to acknowledge the above in the changelog and also
>mention in it that the issue is specific to suspend-to-RAM (ACPI S3 on
>x86 PC systems).
Yes, it will be good to add this.
[...]
>> @@ -262,11 +263,18 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
>> x86_platform.restore_sched_clock_state();
>> mtrr_bp_restore();
>> perf_restore_debug_store();
>> - msr_restore_context(ctxt);
>>
>> c = &cpu_data(smp_processor_id());
>> if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL))
>> init_ia32_feat_ctl(c);
>
>The change of the ordering between the above and msr_restore_context()
>needs to be explained in the changelog too.
This patch is already queued in tip tree. I am not sure if we can update
the commit message in tip tree?
Thanks,
Pawan
>> +
>> + microcode_bsp_resume();
>> +
>> + /*
>> + * This needs to happen after the microcode has been updated upon resume
>> + * because some of the MSRs are "emulated" in microcode.
>> + */
>> + msr_restore_context(ctxt);
>> }
>>
>> /* Needed by apm.c */
On Fri, Apr 22, 2022 at 11:51 PM Pawan Gupta
<[email protected]> wrote:
>
> On Fri, Apr 22, 2022 at 04:09:57PM +0200, Rafael J. Wysocki wrote:
> >On Tue, Apr 19, 2022 at 6:52 PM Pawan Gupta
> ><[email protected]> wrote:
> >>
> >> From: Borislav Petkov <[email protected]>
> >>
> >> When resuming from system sleep state, restore_processor_state()
> >> restores the boot CPU MSRs. These MSRs could be emulated by the
> >> microcode. If microcode is not loaded yet, writing to emulated MSRs
> >> leads to unchecked MSR access error:
> >>
> >> [ 28.702947] PM: Calling lapic_suspend+0x0/0x210
> >> [ 28.703345] unchecked MSR access error: WRMSR to 0x10f (tried to write 0=x0000000000000000) at rIP: 0xffffffff9b2819e4 (native_write_msr+0x4/0x20)
> >> [ 28.703357] Call Trace:
> >> [ 28.703359] <TASK>
> >> [ 28.703361] ? restore_processor_state+0x255/0x2d0
> >> [ 28.703369] x86_acpi_suspend_lowlevel+0x11f/0x170
> >> [ 28.703374] acpi_suspend_enter+0x4f/0x1f0
> >> [ 28.703379] suspend_devices_and_enter+0x6e0/0x7d0
> >> [ 28.703384] pm_suspend.cold+0x35c/0x3a7
> >> [ 28.703388] state_store+0x81/0xe0
> >> [ 28.703392] kobj_attr_store+0x12/0x20
> >> [ 28.703396] sysfs_kf_write+0x3f/0x50
> >> [ 28.703399] kernfs_fop_write_iter+0x13b/0x1d0
> >> [ 28.703403] new_sync_write+0x101/0x180
> >> [ 28.703408] vfs_write+0x217/0x2a0
> >> [ 28.703413] ksys_write+0x67/0xe0
> >> [ 28.703417] __x64_sys_write+0x1a/0x20
> >> [ 28.703421] do_syscall_64+0x3b/0x90
> >> [ 28.703426] entry_SYSCALL_64_after_hwframe+0x44/0xae
> >> [ 28.703429] RIP: 0033:0x7fda13c260a7
> >> [ 28.703434] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 =
> >> 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48=
> >> > 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
> >> [ 28.703437] RSP: 002b:00007fffa4060268 EFLAGS: 00000246 ORIG_RAX: 000000=0000000001
> >> [ 28.703441] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fda13c=260a7
> >> [ 28.703443] RDX: 0000000000000004 RSI: 000055a41f65a570 RDI: 00000000000=00004
> >> [ 28.703444] RBP: 000055a41f65a570 R08: 0000000000000000 R09: 00000000000=00004
> >> [ 28.703446] R10: 000055a41f0cc2a6 R11: 0000000000000246 R12: 00000000000=00004
> >> [ 28.703447] R13: 000055a41f657510 R14: 00007fda13d014a0 R15: 00007fda13d=008a0
> >
> >__restore_processor_state() is used during resume from both
> >suspend-to-RAM and hibernation, but I don't think that the latter is
> >affected by the issue at hand, because microcode should be loaded by
> >the restore kernel in that case. However, loading the boot processor
> >microcode in __restore_processor_state() during resume from
> >hibernation still works, although it is redundant in that case.
> >
> >It would be good to acknowledge the above in the changelog and also
> >mention in it that the issue is specific to suspend-to-RAM (ACPI S3 on
> >x86 PC systems).
>
> Yes, it will be good to add this.
>
> [...]
> >> @@ -262,11 +263,18 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
> >> x86_platform.restore_sched_clock_state();
> >> mtrr_bp_restore();
> >> perf_restore_debug_store();
> >> - msr_restore_context(ctxt);
> >>
> >> c = &cpu_data(smp_processor_id());
> >> if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL))
> >> init_ia32_feat_ctl(c);
> >
> >The change of the ordering between the above and msr_restore_context()
> >needs to be explained in the changelog too.
>
> This patch is already queued in tip tree. I am not sure if we can update
> the commit message in tip tree?
That depends on the maintainer who has applied the patch.