__restore_processor_state() was spaghetti code, made no sense, and
had bugs. And I broke resume on 32-bit systems. This series cleans
it up and fixes it (hopefully!).
Andy Lutomirski (3):
x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
x86/power: Make restore_processor_context() sane
arch/x86/include/asm/suspend_32.h | 8 +++-
arch/x86/include/asm/suspend_64.h | 19 ++++++--
arch/x86/power/cpu.c | 99 ++++++++++++++++++---------------------
3 files changed, 67 insertions(+), 59 deletions(-)
--
2.13.6
x86_64's saved_context nonsensically used separate idt_limit and
idt_base fields and then cast &idt_limit to struct desc_ptr *. This
was correct (with -fno-strict-aliasing), but it's confusing, served
no purpose, and required ifdeffery. Just use struct desc_ptr
directly.
Tested-by: Jarkko Nikula <[email protected]>
Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/include/asm/suspend_64.h | 3 +--
arch/x86/power/cpu.c | 11 +----------
2 files changed, 2 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7306e911faee..600e9e0aea51 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -30,8 +30,7 @@ struct saved_context {
u16 gdt_pad; /* Unused */
struct desc_ptr gdt_desc;
u16 idt_pad;
- u16 idt_limit;
- unsigned long idt_base;
+ struct desc_ptr idt;
u16 ldt;
u16 tss;
unsigned long tr;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 5191de14f4df..472bc8c8212b 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* descriptor tables
*/
-#ifdef CONFIG_X86_32
store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
- store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
+
/*
* We save it here, but restore it only in the hibernate case.
* For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -219,12 +215,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
* now restore the descriptor tables to their proper values
* ltr is done i fix_processor_context().
*/
-#ifdef CONFIG_X86_32
load_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
- load_idt((const struct desc_ptr *)&ctxt->idt_limit);
-#endif
#ifdef CONFIG_X86_64
/*
--
2.13.6
x86_64 restores system call MSRs in fix_processor_context(), and
x86_32 restored them along with segment registers. The 64-bit
variant makes more sense, so move the 32-bit code to match the
64-bit code.
Tested-by: Jarkko Nikula <[email protected]>
Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/power/cpu.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 472bc8c8212b..033c61e6891b 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -174,6 +174,9 @@ static void fix_processor_context(void)
write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
syscall_init(); /* This sets MSR_*STAR and related */
+#else
+ if (boot_cpu_has(X86_FEATURE_SEP))
+ enable_sep_cpu();
#endif
load_TR_desc(); /* This does ltr */
load_mm_ldt(current->active_mm); /* This does lldt */
@@ -237,12 +240,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
loadsegment(fs, ctxt->fs);
loadsegment(gs, ctxt->gs);
loadsegment(ss, ctxt->ss);
-
- /*
- * sysenter MSRs
- */
- if (boot_cpu_has(X86_FEATURE_SEP))
- enable_sep_cpu();
#else
/* CONFIG_X86_64 */
asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
--
2.13.6
My previous attempt to fix a couple of bugs in
__restore_processor_context() introduced yet another bug. Rather
than trying to come up with a minimal fix, let's try to clean it up
for real. This patch fixes quite a few things:
- The old code saved a nonsensical subset of segment registers.
The only registers that need to be saved are those that contain
userspace state or those that can't be trivially restored without
percpu access working. (On x86_32, we can restore percpu access
by writing __KERNEL_PERCPU to %fs. On x86_64, it's easier to
save and restore the kernel's GSBASE.) With this patch, we
restore hardcoded values to the kernel state where applicable and
explicitly restore the user state after fixing all the descriptor
tables.
- We used to use an unholy mix of inline asm and C helpers for
segment register access. Let's get rid of the inline asm.
Tested-by: Jarkko Nikula <[email protected]>
Fixes: 5b06bbcfc2c6 ("x86/power: Fix some ordering bugs in __restore_processor_context()")
Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/include/asm/suspend_32.h | 8 +++-
arch/x86/include/asm/suspend_64.h | 16 +++++++-
arch/x86/power/cpu.c | 79 ++++++++++++++++++++-------------------
3 files changed, 62 insertions(+), 41 deletions(-)
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 982c325dad33..8be6afb58471 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
/* image of the saved processor state */
struct saved_context {
- u16 es, fs, gs, ss;
+ /*
+ * On x86_32, all segment registers, with the possible exception of
+ * gs, are saved at kernel entry in pt_regs.
+ */
+#ifdef CONFIG_X86_32_LAZY_GS
+ u16 gs;
+#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 600e9e0aea51..a7af9f53c0cb 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
*/
struct saved_context {
struct pt_regs regs;
- u16 ds, es, fs, gs, ss;
- unsigned long gs_base, gs_kernel_base, fs_base;
+
+ /*
+ * User CS and SS are saved in current_pt_regs(). The rest of the
+ * segment selectors need to be saved and restored here.
+ */
+ u16 ds, es, fs, gs;
+
+ /*
+ * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
+ * so we save them separately. We save the kernelmode GSBASE to
+ * restore percpu access after resume.
+ */
+ unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
+
unsigned long cr0, cr2, cr3, cr4, cr8;
u64 misc_enable;
bool misc_enable_saved;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 033c61e6891b..36a28eddb435 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -99,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* segment registers
*/
-#ifdef CONFIG_X86_32
- savesegment(es, ctxt->es);
- savesegment(fs, ctxt->fs);
+#ifdef CONFIG_X86_32_LAZY_GS
savesegment(gs, ctxt->gs);
- savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
- asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
- asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
- asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
- asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
- asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+#endif
+#ifdef CONFIG_X86_64
+ savesegment(gs, ctxt->gs);
+ savesegment(fs, ctxt->fs);
+ savesegment(ds, ctxt->ds);
+ savesegment(es, ctxt->es);
rdmsrl(MSR_FS_BASE, ctxt->fs_base);
- rdmsrl(MSR_GS_BASE, ctxt->gs_base);
- rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+ rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
mtrr_save_fixed_ranges(NULL);
rdmsrl(MSR_EFER, ctxt->efer);
@@ -189,9 +185,12 @@ static void fix_processor_context(void)
}
/**
- * __restore_processor_state - restore the contents of CPU registers saved
- * by __save_processor_state()
- * @ctxt - structure to load the registers contents from
+ * __restore_processor_state - restore the contents of CPU registers saved
+ * by __save_processor_state()
+ * @ctxt - structure to load the registers contents from
+ *
+ * The asm code that gets us here will have restored a usable GDT, although
+ * it will be pointing to the wrong alias.
*/
static void notrace __restore_processor_state(struct saved_context *ctxt)
{
@@ -214,46 +213,50 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
write_cr2(ctxt->cr2);
write_cr0(ctxt->cr0);
+ /* Restore the IDT. */
+ load_idt(&ctxt->idt);
+
/*
- * now restore the descriptor tables to their proper values
- * ltr is done i fix_processor_context().
+ * Just in case the asm code got us here with the SS, DS, or ES
+ * out of sync with the GDT, update them.
*/
- load_idt(&ctxt->idt);
+ loadsegment(ss, __KERNEL_DS);
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
-#ifdef CONFIG_X86_64
/*
- * We need GSBASE restored before percpu access can work.
- * percpu access can happen in exception handlers or in complicated
- * helpers like load_gs_index().
+ * Restore percpu access. Percpu access can happen in exception
+ * handlers or in complicated helpers like load_gs_index().
*/
- wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+#ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+#else
+ loadsegment(fs, __KERNEL_PERCPU);
+ loadsegment(gs, __KERNEL_STACK_CANARY);
#endif
+ /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
fix_processor_context();
/*
- * Restore segment registers. This happens after restoring the GDT
- * and LDT, which happen in fix_processor_context().
+ * Now that we have descriptor tables fully restored and working
+ * exception handling, restore the usermode segments.
*/
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_64
+ loadsegment(ds, ctxt->es);
loadsegment(es, ctxt->es);
loadsegment(fs, ctxt->fs);
- loadsegment(gs, ctxt->gs);
- loadsegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
- asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
- asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
- asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
load_gs_index(ctxt->gs);
- asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
/*
- * Restore FSBASE and user GSBASE after reloading the respective
- * segment selectors.
+ * Restore FSBASE and GSBASE after restoring the selectors, since
+ * restoring the selectors clobbers the bases. Keep in mind
+ * that MSR_KERNEL_GS_BASE is horribly misnamed.
*/
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
- wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+#elif defined(CONFIG_X86_32_LAZY_GS)
+ loadsegment(gs, ctxt->gs);
#endif
do_fpu_end();
--
2.13.6
Hi!
> My previous attempt to fix a couple of bugs in
> __restore_processor_context() introduced yet another bug. Rather
> than trying to come up with a minimal fix, let's try to clean it up
> for real. This patch fixes quite a few things:
>
> - The old code saved a nonsensical subset of segment registers.
> The only registers that need to be saved are those that contain
> userspace state or those that can't be trivially restored without
> percpu access working. (On x86_32, we can restore percpu access
> by writing __KERNEL_PERCPU to %fs. On x86_64, it's easier to
> save and restore the kernel's GSBASE.) With this patch, we
> restore hardcoded values to the kernel state where applicable and
> explicitly restore the user state after fixing all the descriptor
> tables.
>
> - We used to use an unholy mix of inline asm and C helpers for
> segment register access. Let's get rid of the inline asm.
>
> Tested-by: Jarkko Nikula <[email protected]>
> Fixes: 5b06bbcfc2c6 ("x86/power: Fix some ordering bugs in __restore_processor_context()")
> Signed-off-by: Andy Lutomirski <[email protected]>
Thanks!
Tested-by: Pavel Machek <[email protected]>
Reported-by: Pavel Machek <[email protected]>
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Thursday, December 14, 2017 10:19:04 PM CET Andy Lutomirski wrote:
> __restore_processor_state() was spaghetti code, made no sense, and
> had bugs. And I broke resume on 32-bit systems. This series cleans
> it up and fixes it (hopefully!).
>
> Andy Lutomirski (3):
> x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
> x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
> x86/power: Make restore_processor_context() sane
For all three:
Acked-by: Rafael J. Wysocki <[email protected]>
Thanks!
On Thu, Dec 14, 2017 at 5:28 PM, Rafael J. Wysocki <[email protected]> wrote:
> On Thursday, December 14, 2017 10:19:04 PM CET Andy Lutomirski wrote:
>> __restore_processor_state() was spaghetti code, made no sense, and
>> had bugs. And I broke resume on 32-bit systems. This series cleans
>> it up and fixes it (hopefully!).
>
> Acked-by: Rafael J. Wysocki <[email protected]>
And I'm assuming I'll get these from the x86 -tip tree.
If not, somebody please holler, and I'll pick them up directly.
Linus
On 12/15/2017 03:28 AM, Rafael J. Wysocki wrote:
> On Thursday, December 14, 2017 10:19:04 PM CET Andy Lutomirski wrote:
>> __restore_processor_state() was spaghetti code, made no sense, and
>> had bugs. And I broke resume on 32-bit systems. This series cleans
>> it up and fixes it (hopefully!).
>>
>> Andy Lutomirski (3):
>> x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
>> x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
>> x86/power: Make restore_processor_context() sane
>
> For all three:
>
> Acked-by: Rafael J. Wysocki <[email protected]>
>
Works still after split up so my tested by remains. Please note Pavel
gave his reported/tested-by in 3/3.
--
Jarkko
* Linus Torvalds <[email protected]> wrote:
> On Thu, Dec 14, 2017 at 5:28 PM, Rafael J. Wysocki <[email protected]> wrote:
> > On Thursday, December 14, 2017 10:19:04 PM CET Andy Lutomirski wrote:
> >> __restore_processor_state() was spaghetti code, made no sense, and
> >> had bugs. And I broke resume on 32-bit systems. This series cleans
> >> it up and fixes it (hopefully!).
> >
> > Acked-by: Rafael J. Wysocki <[email protected]>
>
> And I'm assuming I'll get these from the x86 -tip tree.
>
> If not, somebody please holler, and I'll pick them up directly.
I have applied them to tip:x86/urgent with small changes to the changelogs, and
will send them to you in a couple of hours.
These fixes are really nice, thanks guys for all the effort!
Thanks,
Ingo
* Jarkko Nikula <[email protected]> wrote:
> On 12/15/2017 03:28 AM, Rafael J. Wysocki wrote:
> > On Thursday, December 14, 2017 10:19:04 PM CET Andy Lutomirski wrote:
> > > __restore_processor_state() was spaghetti code, made no sense, and
> > > had bugs. And I broke resume on 32-bit systems. This series cleans
> > > it up and fixes it (hopefully!).
> > >
> > > Andy Lutomirski (3):
> > > x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
> > > x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
> > > x86/power: Make restore_processor_context() sane
> >
> > For all three:
> >
> > Acked-by: Rafael J. Wysocki <[email protected]>
> >
>
> Works still after split up so my tested by remains. Please note Pavel gave
> his reported/tested-by in 3/3.
Thanks, I've updated all the changelogs - and I've also added your Reported-by tag
to patch #3.
Thanks,
Ingo
Commit-ID: 090edbe23ff57940fca7f57d9165ce57a826bd7a
Gitweb: https://git.kernel.org/tip/090edbe23ff57940fca7f57d9165ce57a826bd7a
Author: Andy Lutomirski <[email protected]>
AuthorDate: Thu, 14 Dec 2017 13:19:05 -0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 15 Dec 2017 12:18:29 +0100
x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
x86_64's saved_context nonsensically used separate idt_limit and
idt_base fields and then cast &idt_limit to struct desc_ptr *.
This was correct (with -fno-strict-aliasing), but it's confusing,
served no purpose, and required #ifdeffery. Simplify this by
using struct desc_ptr directly.
No change in functionality.
Tested-by: Jarkko Nikula <[email protected]>
Signed-off-by: Andy Lutomirski <[email protected]>
Acked-by: Rafael J. Wysocki <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Pavel Machek <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Cc: Zhang Rui <[email protected]>
Link: http://lkml.kernel.org/r/967909ce38d341b01d45eff53e278e2728a3a93a.1513286253.git.luto@kernel.org
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/suspend_64.h | 3 +--
arch/x86/power/cpu.c | 11 +----------
2 files changed, 2 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7306e91..600e9e0 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -30,8 +30,7 @@ struct saved_context {
u16 gdt_pad; /* Unused */
struct desc_ptr gdt_desc;
u16 idt_pad;
- u16 idt_limit;
- unsigned long idt_base;
+ struct desc_ptr idt;
u16 ldt;
u16 tss;
unsigned long tr;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 5191de1..472bc8c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* descriptor tables
*/
-#ifdef CONFIG_X86_32
store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
- store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
+
/*
* We save it here, but restore it only in the hibernate case.
* For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -219,12 +215,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
* now restore the descriptor tables to their proper values
* ltr is done i fix_processor_context().
*/
-#ifdef CONFIG_X86_32
load_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
- load_idt((const struct desc_ptr *)&ctxt->idt_limit);
-#endif
#ifdef CONFIG_X86_64
/*
Commit-ID: 896c80bef4d3b357814a476663158aaf669d0fb3
Gitweb: https://git.kernel.org/tip/896c80bef4d3b357814a476663158aaf669d0fb3
Author: Andy Lutomirski <[email protected]>
AuthorDate: Thu, 14 Dec 2017 13:19:06 -0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 15 Dec 2017 12:18:29 +0100
x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
x86_64 restores system call MSRs in fix_processor_context(), and
x86_32 restored them along with segment registers. The 64-bit
variant makes more sense, so move the 32-bit code to match the
64-bit code.
No side effects are expected to runtime behavior.
Tested-by: Jarkko Nikula <[email protected]>
Signed-off-by: Andy Lutomirski <[email protected]>
Acked-by: Rafael J. Wysocki <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Pavel Machek <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Cc: Zhang Rui <[email protected]>
Link: http://lkml.kernel.org/r/65158f8d7ee64dd6bbc6c1c83b3b34aaa854e3ae.1513286253.git.luto@kernel.org
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/power/cpu.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 472bc8c..033c61e 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -174,6 +174,9 @@ static void fix_processor_context(void)
write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
syscall_init(); /* This sets MSR_*STAR and related */
+#else
+ if (boot_cpu_has(X86_FEATURE_SEP))
+ enable_sep_cpu();
#endif
load_TR_desc(); /* This does ltr */
load_mm_ldt(current->active_mm); /* This does lldt */
@@ -237,12 +240,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
loadsegment(fs, ctxt->fs);
loadsegment(gs, ctxt->gs);
loadsegment(ss, ctxt->ss);
-
- /*
- * sysenter MSRs
- */
- if (boot_cpu_has(X86_FEATURE_SEP))
- enable_sep_cpu();
#else
/* CONFIG_X86_64 */
asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
Commit-ID: 7ee18d677989e99635027cee04c878950e0752b9
Gitweb: https://git.kernel.org/tip/7ee18d677989e99635027cee04c878950e0752b9
Author: Andy Lutomirski <[email protected]>
AuthorDate: Thu, 14 Dec 2017 13:19:07 -0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 15 Dec 2017 12:21:38 +0100
x86/power: Make restore_processor_context() sane
My previous attempt to fix a couple of bugs in __restore_processor_context():
5b06bbcfc2c6 ("x86/power: Fix some ordering bugs in __restore_processor_context()")
... introduced yet another bug, breaking suspend-resume.
Rather than trying to come up with a minimal fix, let's try to clean it up
for real. This patch fixes quite a few things:
- The old code saved a nonsensical subset of segment registers.
The only registers that need to be saved are those that contain
userspace state or those that can't be trivially restored without
percpu access working. (On x86_32, we can restore percpu access
by writing __KERNEL_PERCPU to %fs. On x86_64, it's easier to
save and restore the kernel's GSBASE.) With this patch, we
restore hardcoded values to the kernel state where applicable and
explicitly restore the user state after fixing all the descriptor
tables.
- We used to use an unholy mix of inline asm and C helpers for
segment register access. Let's get rid of the inline asm.
This fixes the reported s2ram hangs and make the code all around
more logical.
Analyzed-by: Linus Torvalds <[email protected]>
Reported-by: Jarkko Nikula <[email protected]>
Reported-by: Pavel Machek <[email protected]>
Tested-by: Jarkko Nikula <[email protected]>
Tested-by: Pavel Machek <[email protected]>
Signed-off-by: Andy Lutomirski <[email protected]>
Acked-by: Rafael J. Wysocki <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Cc: Zhang Rui <[email protected]>
Fixes: 5b06bbcfc2c6 ("x86/power: Fix some ordering bugs in __restore_processor_context()")
Link: http://lkml.kernel.org/r/398ee68e5c0f766425a7b746becfc810840770ff.1513286253.git.luto@kernel.org
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/suspend_32.h | 8 +++-
arch/x86/include/asm/suspend_64.h | 16 +++++++-
arch/x86/power/cpu.c | 79 ++++++++++++++++++++-------------------
3 files changed, 62 insertions(+), 41 deletions(-)
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 982c325..8be6afb 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
/* image of the saved processor state */
struct saved_context {
- u16 es, fs, gs, ss;
+ /*
+ * On x86_32, all segment registers, with the possible exception of
+ * gs, are saved at kernel entry in pt_regs.
+ */
+#ifdef CONFIG_X86_32_LAZY_GS
+ u16 gs;
+#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 600e9e0..a7af9f5 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
*/
struct saved_context {
struct pt_regs regs;
- u16 ds, es, fs, gs, ss;
- unsigned long gs_base, gs_kernel_base, fs_base;
+
+ /*
+ * User CS and SS are saved in current_pt_regs(). The rest of the
+ * segment selectors need to be saved and restored here.
+ */
+ u16 ds, es, fs, gs;
+
+ /*
+ * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
+ * so we save them separately. We save the kernelmode GSBASE to
+ * restore percpu access after resume.
+ */
+ unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
+
unsigned long cr0, cr2, cr3, cr4, cr8;
u64 misc_enable;
bool misc_enable_saved;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 033c61e..36a28ed 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -99,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* segment registers
*/
-#ifdef CONFIG_X86_32
- savesegment(es, ctxt->es);
- savesegment(fs, ctxt->fs);
+#ifdef CONFIG_X86_32_LAZY_GS
savesegment(gs, ctxt->gs);
- savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
- asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
- asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
- asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
- asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
- asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+#endif
+#ifdef CONFIG_X86_64
+ savesegment(gs, ctxt->gs);
+ savesegment(fs, ctxt->fs);
+ savesegment(ds, ctxt->ds);
+ savesegment(es, ctxt->es);
rdmsrl(MSR_FS_BASE, ctxt->fs_base);
- rdmsrl(MSR_GS_BASE, ctxt->gs_base);
- rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+ rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
mtrr_save_fixed_ranges(NULL);
rdmsrl(MSR_EFER, ctxt->efer);
@@ -189,9 +185,12 @@ static void fix_processor_context(void)
}
/**
- * __restore_processor_state - restore the contents of CPU registers saved
- * by __save_processor_state()
- * @ctxt - structure to load the registers contents from
+ * __restore_processor_state - restore the contents of CPU registers saved
+ * by __save_processor_state()
+ * @ctxt - structure to load the registers contents from
+ *
+ * The asm code that gets us here will have restored a usable GDT, although
+ * it will be pointing to the wrong alias.
*/
static void notrace __restore_processor_state(struct saved_context *ctxt)
{
@@ -214,46 +213,50 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
write_cr2(ctxt->cr2);
write_cr0(ctxt->cr0);
+ /* Restore the IDT. */
+ load_idt(&ctxt->idt);
+
/*
- * now restore the descriptor tables to their proper values
- * ltr is done i fix_processor_context().
+ * Just in case the asm code got us here with the SS, DS, or ES
+ * out of sync with the GDT, update them.
*/
- load_idt(&ctxt->idt);
+ loadsegment(ss, __KERNEL_DS);
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
-#ifdef CONFIG_X86_64
/*
- * We need GSBASE restored before percpu access can work.
- * percpu access can happen in exception handlers or in complicated
- * helpers like load_gs_index().
+ * Restore percpu access. Percpu access can happen in exception
+ * handlers or in complicated helpers like load_gs_index().
*/
- wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+#ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+#else
+ loadsegment(fs, __KERNEL_PERCPU);
+ loadsegment(gs, __KERNEL_STACK_CANARY);
#endif
+ /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
fix_processor_context();
/*
- * Restore segment registers. This happens after restoring the GDT
- * and LDT, which happen in fix_processor_context().
+ * Now that we have descriptor tables fully restored and working
+ * exception handling, restore the usermode segments.
*/
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_64
+ loadsegment(ds, ctxt->es);
loadsegment(es, ctxt->es);
loadsegment(fs, ctxt->fs);
- loadsegment(gs, ctxt->gs);
- loadsegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
- asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
- asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
- asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
load_gs_index(ctxt->gs);
- asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
/*
- * Restore FSBASE and user GSBASE after reloading the respective
- * segment selectors.
+ * Restore FSBASE and GSBASE after restoring the selectors, since
+ * restoring the selectors clobbers the bases. Keep in mind
+ * that MSR_KERNEL_GS_BASE is horribly misnamed.
*/
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
- wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+#elif defined(CONFIG_X86_32_LAZY_GS)
+ loadsegment(gs, ctxt->gs);
#endif
do_fpu_end();