2006-10-22 13:49:54

by Rafael J. Wysocki

[permalink] [raw]
Subject: [RFC][PATCH -mm] Make swsusp work on i386 with PAE

Hi,

The purpose of the appended patch is to make swsusp work on i386 with PAE,
but it should also allow i386 systems without PSE to use swsusp.

The patch creates temporary page tables located in resume-safe page frames
during the resume and uses them for restoring the suspend image (the same
approach is used on x86-64).

It has been tested on an i386 system with PAE and survived several
suspend-resume cycles in a row, but I have no systems without PSE, so that
requires some testing.

Comments welcome.

Greetings,
Rafael


Signed-off-by: Rafael J. Wysocki <[email protected]>
---
arch/i386/power/Makefile | 2
arch/i386/power/suspend.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++
arch/i386/power/swsusp.S | 9 ++
kernel/power/Kconfig | 2
4 files changed, 162 insertions(+), 4 deletions(-)

Index: linux-2.6.19-rc2-mm2/arch/i386/power/swsusp.S
===================================================================
--- linux-2.6.19-rc2-mm2.orig/arch/i386/power/swsusp.S
+++ linux-2.6.19-rc2-mm2/arch/i386/power/swsusp.S
@@ -28,8 +28,9 @@ ENTRY(swsusp_arch_suspend)
call swsusp_save
ret

-ENTRY(swsusp_arch_resume)
- movl $swsusp_pg_dir-__PAGE_OFFSET, %ecx
+ENTRY(restore_image)
+ movl resume_pg_dir, %ecx
+ subl $__PAGE_OFFSET, %ecx
movl %ecx, %cr3

movl restore_pblist, %edx
@@ -51,6 +52,10 @@ copy_loop:
.p2align 4,,7

done:
+ /* go back to the original page tables */
+ movl $swapper_pg_dir, %ecx
+ subl $__PAGE_OFFSET, %ecx
+ movl %ecx, %cr3
/* Flush TLB, including "global" things (vmalloc) */
movl mmu_cr4_features, %eax
movl %eax, %edx
Index: linux-2.6.19-rc2-mm2/arch/i386/power/suspend.c
===================================================================
--- /dev/null
+++ linux-2.6.19-rc2-mm2/arch/i386/power/suspend.c
@@ -0,0 +1,153 @@
+/*
+ * Suspend support specific for i386 - temporary page tables
+ *
+ * Distribute under GPLv2
+ *
+ * Copyright (c) 2006 Rafael J. Wysocki <[email protected]>
+ */
+
+#include <linux/suspend.h>
+#include <linux/bootmem.h>
+
+#include <asm/system.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+/* Defined in arch/i386/power/swsusp.S */
+extern int restore_image(void);
+
+/* Pointer to the temporary resume page tables */
+pgd_t *resume_pg_dir;
+
+/* The following three functions are based on the analogous code in
+ * arch/i386/mm/init.c
+ */
+
+/*
+ * Create a middle page table on a resume-safe page and put a pointer to it in
+ * the given global directory entry. This only returns the gd entry
+ * in non-PAE compilation mode, since the middle layer is folded.
+ */
+static pmd_t *resume_one_md_table_init(pgd_t *pgd)
+{
+ pud_t *pud;
+ pmd_t *pmd_table;
+
+#ifdef CONFIG_X86_PAE
+ pmd_table = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pmd_table)
+ return pmd_table;
+
+ set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+ pud = pud_offset(pgd, 0);
+
+ BUG_ON(pmd_table != pmd_offset(pud, 0));
+#else
+ pud = pud_offset(pgd, 0);
+ pmd_table = pmd_offset(pud, 0);
+#endif
+
+ return pmd_table;
+}
+
+/*
+ * Create a page table on a resume-safe page and place a pointer to it in
+ * a middle page directory entry.
+ */
+static pte_t *resume_one_page_table_init(pmd_t *pmd)
+{
+ if (pmd_none(*pmd)) {
+ pte_t *page_table = (pte_t *)get_safe_page(GFP_ATOMIC);
+ if (!page_table)
+ return page_table;
+
+ set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+
+ BUG_ON(page_table != pte_offset_kernel(pmd, 0));
+
+ return page_table;
+ }
+
+ return pte_offset_kernel(pmd, 0);
+}
+
+/*
+ * This maps the physical memory to kernel virtual address space, a total
+ * of max_low_pfn pages, by creating page tables starting from address
+ * PAGE_OFFSET. The page tables are allocated out of resume-safe pages.
+ */
+static int resume_physical_mapping_init(pgd_t *pgd_base)
+{
+ unsigned long pfn;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ int pgd_idx, pmd_idx;
+
+ pgd_idx = pgd_index(PAGE_OFFSET);
+ pgd = pgd_base + pgd_idx;
+ pfn = 0;
+
+ for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
+ pmd = resume_one_md_table_init(pgd);
+ if (!pmd)
+ return -ENOMEM;
+
+ if (pfn >= max_low_pfn)
+ continue;
+
+ for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) {
+ if (pfn >= max_low_pfn)
+ break;
+
+ /* Map with big pages if possible, otherwise create
+ * normal page tables.
+ * NOTE: We can mark everything as executable here
+ */
+ if (cpu_has_pse) {
+ set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
+ pfn += PTRS_PER_PTE;
+ } else {
+ pte_t *max_pte;
+
+ pte = resume_one_page_table_init(pmd);
+ if (!pte)
+ return -ENOMEM;
+
+ max_pte = pte + PTRS_PER_PTE;
+ for (; pte < max_pte; pte++, pfn++) {
+ if (pfn >= max_low_pfn)
+ break;
+
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+int swsusp_arch_resume(void)
+{
+ int error;
+
+ resume_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!resume_pg_dir)
+ return -ENOMEM;
+
+#ifdef CONFIG_X86_PAE
+ int i;
+ /* Init entries of the first-level page table to the zero page */
+ for (i = 0; i < PTRS_PER_PGD; i++)
+ set_pgd(resume_pg_dir + i,
+ __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+#endif
+
+ error = resume_physical_mapping_init(resume_pg_dir);
+ if (error)
+ return error;
+
+ /* We have got enough memory and from now on we cannot recover */
+ restore_image();
+ return 0;
+}
Index: linux-2.6.19-rc2-mm2/arch/i386/power/Makefile
===================================================================
--- linux-2.6.19-rc2-mm2.orig/arch/i386/power/Makefile
+++ linux-2.6.19-rc2-mm2/arch/i386/power/Makefile
@@ -1,2 +1,2 @@
obj-$(CONFIG_PM) += cpu.o
-obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o suspend.o
Index: linux-2.6.19-rc2-mm2/kernel/power/Kconfig
===================================================================
--- linux-2.6.19-rc2-mm2.orig/kernel/power/Kconfig
+++ linux-2.6.19-rc2-mm2/kernel/power/Kconfig
@@ -78,7 +78,7 @@ config PM_SYSFS_DEPRECATED

config SOFTWARE_SUSPEND
bool "Software Suspend"
- depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP) && !X86_PAE) || ((FRV || PPC32) && !SMP))
+ depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
---help---
Enable the possibility of suspending the machine.
It doesn't need ACPI or APM.


2006-10-23 14:51:00

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC][PATCH -mm] Make swsusp work on i386 with PAE

Hi!

> The purpose of the appended patch is to make swsusp work on i386 with PAE,
> but it should also allow i386 systems without PSE to use swsusp.
>
> The patch creates temporary page tables located in resume-safe page frames
> during the resume and uses them for restoring the suspend image (the same
> approach is used on x86-64).
>
> It has been tested on an i386 system with PAE and survived several
> suspend-resume cycles in a row, but I have no systems without PSE, so that
> requires some testing.

Thanks, looks okay to me. I guess Andi Kleen would be right person to
review it in detail?

Lack of assembly modifications is good.

I guess this should be now removed? (include/asm-i386/suspend.h)

arch_prepare_suspend(void)
{
/* If you want to make non-PSE machine work, turn off paging
in swsusp_arch_suspend. swsusp_pg_dir should have identity mapping, so
it could work... */
if (!cpu_has_pse) {
printk(KERN_ERR "PSE is required for swsusp.\n");
return -EPERM;
}

> +/*
> + * Create a middle page table on a resume-safe page and put a pointer to it in
> + * the given global directory entry. This only returns the gd entry
> + * in non-PAE compilation mode, since the middle layer is folded.
> + */
> +static pmd_t *resume_one_md_table_init(pgd_t *pgd)
> +{
> + pud_t *pud;
> + pmd_t *pmd_table;
> +
> +#ifdef CONFIG_X86_PAE
> + pmd_table = (pmd_t *)get_safe_page(GFP_ATOMIC);
> + if (!pmd_table)
> + return pmd_table;

I'd do plain old return NULL; here.

> + /* Map with big pages if possible, otherwise create
> + * normal page tables.
> + * NOTE: We can mark everything as executable here
> + */
> + if (cpu_has_pse) {
> + set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
> + pfn += PTRS_PER_PTE;

Perhaps disabling PSE here can help getting some testing?

Okay, I guess I should really test this one... Seems good enough for
-mm to me, but it should preferably stay there for a _long_ time.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2006-10-23 15:29:48

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [RFC][PATCH -mm] Make swsusp work on i386 with PAE

Hi,

On Monday, 23 October 2006 16:50, Pavel Machek wrote:
> Hi!
>
> > The purpose of the appended patch is to make swsusp work on i386 with PAE,
> > but it should also allow i386 systems without PSE to use swsusp.
> >
> > The patch creates temporary page tables located in resume-safe page frames
> > during the resume and uses them for restoring the suspend image (the same
> > approach is used on x86-64).
> >
> > It has been tested on an i386 system with PAE and survived several
> > suspend-resume cycles in a row, but I have no systems without PSE, so that
> > requires some testing.
>
> Thanks, looks okay to me. I guess Andi Kleen would be right person to
> review it in detail?

Yes, I think so.

> Lack of assembly modifications is good.
>
> I guess this should be now removed? (include/asm-i386/suspend.h)
>
> arch_prepare_suspend(void)
> {
> /* If you want to make non-PSE machine work, turn off paging
> in swsusp_arch_suspend. swsusp_pg_dir should have identity mapping, so
> it could work... */
> if (!cpu_has_pse) {
> printk(KERN_ERR "PSE is required for swsusp.\n");
> return -EPERM;
> }

Yes, it should. I though it went away when the Kconfig was changed ...

> > +/*
> > + * Create a middle page table on a resume-safe page and put a pointer to it in
> > + * the given global directory entry. This only returns the gd entry
> > + * in non-PAE compilation mode, since the middle layer is folded.
> > + */
> > +static pmd_t *resume_one_md_table_init(pgd_t *pgd)
> > +{
> > + pud_t *pud;
> > + pmd_t *pmd_table;
> > +
> > +#ifdef CONFIG_X86_PAE
> > + pmd_table = (pmd_t *)get_safe_page(GFP_ATOMIC);
> > + if (!pmd_table)
> > + return pmd_table;
>
> I'd do plain old return NULL; here.

OK

> > + /* Map with big pages if possible, otherwise create
> > + * normal page tables.
> > + * NOTE: We can mark everything as executable here
> > + */
> > + if (cpu_has_pse) {
> > + set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
> > + pfn += PTRS_PER_PTE;
>
> Perhaps disabling PSE here can help getting some testing?

Well, I don't really want to make everyone test the !PSE scenario. ;-)

> Okay, I guess I should really test this one... Seems good enough for
> -mm to me, but it should preferably stay there for a _long_ time.

I think so too.

Greetings,
Rafael


--
You never change things by fighting the existing reality.
R. Buckminster Fuller

2006-10-25 08:05:42

by Dave Jones

[permalink] [raw]
Subject: Re: [RFC][PATCH -mm] Make swsusp work on i386 with PAE

On Mon, Oct 23, 2006 at 04:50:33PM +0200, Pavel Machek wrote:
> Hi!
>
> > The purpose of the appended patch is to make swsusp work on i386 with PAE,
> > but it should also allow i386 systems without PSE to use swsusp.
> >
> > The patch creates temporary page tables located in resume-safe page frames
> > during the resume and uses them for restoring the suspend image (the same
> > approach is used on x86-64).
> >
> > It has been tested on an i386 system with PAE and survived several
> > suspend-resume cycles in a row, but I have no systems without PSE, so that
> > requires some testing.
>
> Thanks, looks okay to me. I guess Andi Kleen would be right person to
> review it in detail?

I gave it a quick skim, and saw nothing obviously broken fwiw.
Thanks for doing this work, it's definitly something that's needed.

Dave

--
http://www.codemonkey.org.uk

2006-11-02 10:31:31

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC][PATCH -mm] Make swsusp work on i386 with PAE

Hi!

> The purpose of the appended patch is to make swsusp work on i386 with PAE,
> but it should also allow i386 systems without PSE to use swsusp.
>
> The patch creates temporary page tables located in resume-safe page frames
> during the resume and uses them for restoring the suspend image (the same
> approach is used on x86-64).
>
> It has been tested on an i386 system with PAE and survived several
> suspend-resume cycles in a row, but I have no systems without PSE, so that
> requires some testing.
>
> Comments welcome.

Just a short update. I actualy tried it, and it seems to work
ok. Thanks!
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html