2005-01-30 23:20:00

by Rafael J. Wysocki

[permalink] [raw]
Subject: [RFC][PATCH] swsusp: do not use higher order memory allocations on resume

Hi,

The following patch is (yet) an(other) attempt to eliminate the need for using higher
order memory allocations on resume. ?It accomplishes this by replacing the array
of page backup entries with a list, so it is only necessary to allocate individual
memory pages. This approach makes it possible to avoid relocating many memory
pages on resume (as a result, much less memory is used) and to simplify
the assembly code that restores the image.

The patch is a complement to the patch that I sent some time ago as "swsusp: do not
use higher order memory allocations on suspend". It is against 2.6.11-rc2 - on top
of the previous patch and on top of the "x86_64: Speed up suspend" patch which are
availble at:
http://www.sisk.pl/kernel/patches/2.6.11-rc2/swsusp-use-list-suspend-v2.patch
and at:
http://www.sisk.pl/kernel/patches/2.6.11-rc2/x86_64-Speed-up-suspend.patch
respectively. The patch itself is available at:
http://www.sisk.pl/kernel/patches/2.6.11-rc2/swsusp-use-list-resume-v1.patch
and there is a consolidated patch against 2.6.11-rc2 at:
http://www.sisk.pl/kernel/patches/2.6.11-rc2/2.6.11-rc2-swsusp-use-list.patch

The patch includes the modifications to the assembly code for x86-64 and i386,
but it does not modify the ppc code (I have no experience with the ppc architecture
and no hardware; if anyone could help, I'd really appreciate it). It has been tested
(extensively) on x86-64 and (lightly) on i386. Please let me know if you find any bugs
in it. Of course, any comments, suggestions, etc. are welcome.

Greets,
RJW


Signed-off-by: Rafael J. Wysocki <[email protected]>

diff -Nru linux-2.6.11-rc2-prev/arch/i386/power/swsusp.S linux-2.6.11-rc2-new/arch/i386/power/swsusp.S
--- linux-2.6.11-rc2-prev/arch/i386/power/swsusp.S 2004-12-24 22:34:31.000000000 +0100
+++ linux-2.6.11-rc2-new/arch/i386/power/swsusp.S 2005-01-30 23:12:22.000000000 +0100
@@ -28,28 +28,28 @@
ret

ENTRY(swsusp_arch_resume)
- movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
- movl %ecx,%cr3
+ movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
+ movl %ecx,%cr3

- movl pagedir_nosave, %ebx
- xorl %eax, %eax
- xorl %edx, %edx
+ movl pagedir_nosave, %edx
+ testl %edx, %edx
+ jz done
.p2align 4,,7

copy_loop:
- movl 4(%ebx,%edx),%edi
- movl (%ebx,%edx),%esi
+ movl (%edx), %esi
+ movl 4(%edx), %edi

movl $1024, %ecx
rep
movsl

- incl %eax
- addl $16, %edx
- cmpl nr_copy_pages,%eax
- jb copy_loop
+ movl 12(%edx), %edx
+ testl %edx, %edx
+ jnz copy_loop
.p2align 4,,7

+done:
movl saved_context_esp, %esp
movl saved_context_ebp, %ebp
movl saved_context_ebx, %ebx
diff -Nru linux-2.6.11-rc2-prev/arch/x86_64/kernel/asm-offsets.c linux-2.6.11-rc2-new/arch/x86_64/kernel/asm-offsets.c
--- linux-2.6.11-rc2-prev/arch/x86_64/kernel/asm-offsets.c 2005-01-30 21:57:58.000000000 +0100
+++ linux-2.6.11-rc2-new/arch/x86_64/kernel/asm-offsets.c 2005-01-30 23:19:15.000000000 +0100
@@ -62,8 +62,8 @@
offsetof (struct rt_sigframe32, uc.uc_mcontext));
BLANK();
#endif
- DEFINE(SIZEOF_PBE, sizeof(struct pbe));
DEFINE(pbe_address, offsetof(struct pbe, address));
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
+ DEFINE(pbe_next, offsetof(struct pbe, next));
return 0;
}
diff -Nru linux-2.6.11-rc2-prev/arch/x86_64/kernel/suspend_asm.S linux-2.6.11-rc2-new/arch/x86_64/kernel/suspend_asm.S
--- linux-2.6.11-rc2-prev/arch/x86_64/kernel/suspend_asm.S 2005-01-30 21:57:58.000000000 +0100
+++ linux-2.6.11-rc2-new/arch/x86_64/kernel/suspend_asm.S 2005-01-30 14:36:17.000000000 +0100
@@ -1,6 +1,9 @@
-/* Originally gcc generated, modified by hand
+/* Copyright 2004,2005 Pavel Machek <[email protected]>, Andi Kleen <[email protected]>, Rafael J. Wysocki <[email protected]>
*
- * This may not use any stack, nor any variable that is not "NoSave":
+ * Distribute under GPLv2.
+ *
+ * swsusp_arch_resume may not use any stack, nor any variable that is
+ * not "NoSave" during copying pages:
*
* Its rewriting one kernel image with another. What is stack in "old"
* image could very well be data page in "new" image, and overwriting
@@ -51,15 +54,8 @@
movq %rax, %cr4; # turn PGE back on

movq pagedir_nosave(%rip), %rdx
- /* compute the limit */
- movl nr_copy_pages(%rip), %eax
- testl %eax, %eax
+ testq %rdx, %rdx
jz done
- movq %rdx,%r8
- movl $SIZEOF_PBE,%r9d
- mul %r9 # with rax, clobbers rdx
- movq %r8, %rdx
- addq %r8, %rax
loop:
/* get addresses from the pbe and copy the page */
movq pbe_address(%rdx), %rsi
@@ -69,9 +65,9 @@
movsq

/* progress to the next pbe */
- addq $SIZEOF_PBE, %rdx
- cmpq %rax, %rdx
- jb loop
+ movq pbe_next(%rdx), %rdx
+ testq %rdx, %rdx
+ jnz loop
done:
movl $24, %eax
movl %eax, %ds
diff -Nru linux-2.6.11-rc2-prev/kernel/power/swsusp.c linux-2.6.11-rc2-new/kernel/power/swsusp.c
--- linux-2.6.11-rc2-prev/kernel/power/swsusp.c 2005-01-28 19:07:22.000000000 +0100
+++ linux-2.6.11-rc2-new/kernel/power/swsusp.c 2005-01-30 22:10:44.000000000 +0100
@@ -938,44 +938,96 @@
return 0;
}

-/*
- * We check here that pagedir & pages it points to won't collide with pages
- * where we're going to restore from the loaded pages later
+/**
+ * On resume, for storing the PBE list and the image,
+ * we can only use memory pages that do not conflict with the pages
+ * which had been used before suspend.
+ *
+ * We don't know which pages are usable until we allocate them.
+ *
+ * Allocated but unusable (ie eaten) memory pages are linked together
+ * to create a list, so that we can free them easily
+ *
+ * We could have used a type other than (void *)
+ * for this purpose, but ...
*/
-static int __init check_pagedir(void)
-{
- int i;
+static void **eaten_memory = NULL;

- for(i=0; i < nr_copy_pages; i++) {
- unsigned long addr;
+static unsigned long __init get_usable_page(unsigned gfp_mask) {
+ unsigned long m;
+ void **c = eaten_memory;
+
+ m = get_zeroed_page(gfp_mask);
+ while (does_collide_order(m, 0)) {
+ eaten_memory = (void *)m;
+ *eaten_memory = c;
+ c = eaten_memory;
+ m = get_zeroed_page(gfp_mask);
+ if (!m)
+ break;
+ }
+ return m;
+}

- do {
- addr = get_zeroed_page(GFP_ATOMIC);
- if(!addr)
- return -ENOMEM;
- } while (does_collide_order(addr, 0));
+static void __init free_eaten_memory(void) {
+ unsigned long m;
+ void **c;
+ int i = 0;

- (pagedir_nosave+i)->address = addr;
+ c = eaten_memory;
+ while (c) {
+ m = (unsigned long)c;
+ c = *c;
+ free_page(m);
+ i++;
}
- return 0;
+ eaten_memory = NULL;
+ pr_debug("swsusp: %d unused pages freed\n", i);
}

-static int __init swsusp_pagedir_relocate(void)
+/**
+ * check_pagedir - We ensure here that pages that the PBEs point to
+ * won't collide with pages where we're going to restore from the loaded
+ * pages later
+ */
+
+static int __init check_pagedir(struct pbe *pblist)
{
- /*
- * We have to avoid recursion (not to overflow kernel stack),
- * and that's why code looks pretty cryptic
+ struct pbe *p;
+
+ /* This is necessary, so that we can free allocated pages
+ * in case of failure
*/
- suspend_pagedir_t *old_pagedir = pagedir_nosave;
- void **eaten_memory = NULL;
- void **c = eaten_memory, *m, *f;
- int ret = 0;
+ for_each_pbe(p, pblist)
+ p->address = 0UL;
+
+ for_each_pbe(p, pblist) {
+ p->address = get_usable_page(GFP_ATOMIC);
+ if(!p->address)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/**
+ * swsusp_pagedir_relocate - It is possible, that some memory pages
+ * occupied by the list of PBEs collide with pages where we're going to
+ * restore from the loaded pages later. We relocate them here.
+ */
+
+static struct pbe * __init swsusp_pagedir_relocate(struct pbe *pblist)
+{
struct zone *zone;
- int i;
- struct pbe *p;
unsigned long zone_pfn;
+ struct pbe *pbpage, *tail, *p;
+ void *m;
+ int rel = 0, error = 0;
+
+ if (!pblist) /* a sanity check */
+ return NULL;

- printk("Relocating pagedir ");
+ pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n",
+ swsusp_info.pagedir_pages);

/* Set page flags */

@@ -985,45 +1037,52 @@
zone->zone_start_pfn));
}

- /* Clear orig address */
+ /* Clear orig addresses */

- for(i = 0, p = pagedir_nosave; i < nr_copy_pages; i++, p++) {
+ for_each_pbe(p, pblist)
ClearPageNosaveFree(virt_to_page(p->orig_address));
- }

- if (!does_collide_order((unsigned long)old_pagedir, pagedir_order)) {
- printk("not necessary\n");
- return check_pagedir();
- }
+ tail = pblist + PB_PAGE_SKIP;

- while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
- if (!does_collide_order((unsigned long)m, pagedir_order))
- break;
- eaten_memory = m;
- printk( "." );
- *eaten_memory = c;
- c = eaten_memory;
- }
+ /* Relocate colliding pages */

- if (!m) {
- printk("out of memory\n");
- ret = -ENOMEM;
- } else {
- pagedir_nosave =
- memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
+ for_each_pb_page(pbpage, pblist) {
+ if (does_collide_order((unsigned long)pbpage, 0)) {
+ m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
+ if (!m) {
+ error = -ENOMEM;
+ break;
+ }
+ memcpy(m, (void *)pbpage, PAGE_SIZE);
+ if (pbpage == pblist)
+ pblist = (struct pbe *)m;
+ else
+ tail->next = (struct pbe *)m;
+
+ free_page((unsigned long)pbpage);
+ pbpage = (struct pbe *)m;
+
+ /* We have to link the PBEs again */
+
+ for (p = pbpage ; p < pbpage + PB_PAGE_SKIP ; p++)
+ if (p->next) /* needed to save the end */
+ p->next = p + 1;
+
+ rel++;
+ }
+ tail = pbpage + PB_PAGE_SKIP;
+ }
+
+ if (error) {
+ printk("\nswsusp: Out of memory\n\n");
+ free_pagedir(pblist);
+ free_eaten_memory();
+ pblist = NULL;
}
+ else
+ printk("swsusp: Relocated %d pages\n", rel);

- c = eaten_memory;
- while (c) {
- printk(":");
- f = c;
- c = *c;
- free_pages((unsigned long)f, pagedir_order);
- }
- if (ret)
- return ret;
- printk("|\n");
- return check_pagedir();
+ return pblist;
}

/**
@@ -1168,77 +1227,150 @@
}

/**
- * swsusp_read_data - Read image pages from swap.
+ * data_read - Read image pages from swap.
*
* You do not need to check for overlaps, check_pagedir()
* already did that.
*/

-static int __init data_read(void)
+static int __init data_read(struct pbe *pblist)
{
struct pbe * p;
- int error;
- int i;
- int mod = nr_copy_pages / 100;
+ int error = 0;
+ int i = 0;
+ int mod = swsusp_info.image_pages / 100;

if (!mod)
mod = 1;

- if ((error = swsusp_pagedir_relocate()))
- return error;
+ printk("swsusp: Reading image data (%lu pages): ",
+ swsusp_info.image_pages);
+
+ for_each_pbe(p, pblist) {
+ if (!(i % mod))
+ printk("\b\b\b\b%3d%%", i / mod);

- printk( "Reading image data (%d pages): ", nr_copy_pages );
- for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
+ if (error)
+ return error;
+
+ i++;
}
- printk(" %d done.\n",i);
+ printk("\b\b\b\bdone\n");
return error;
-
}

extern dev_t __init name_to_dev_t(const char *line);

-static int __init read_pagedir(void)
+/**
+ * read_pagedir - Read page backup list pages from swap, allocate
+ * memory for them and set up the list
+ *
+ * The list of PBEs that is created by this function may be freed
+ * using free_pagedir()
+ */
+
+static struct pbe * __init read_pagedir(void)
{
- unsigned long addr;
- int i, n = swsusp_info.pagedir_pages;
- int error = 0;
+ unsigned nr_pages = swsusp_info.image_pages;
+ unsigned num, i = 0;
+ struct pbe *pblist, *pbe, *p;
+ unsigned long offset;
+ int error;

- addr = __get_free_pages(GFP_ATOMIC, pagedir_order);
- if (!addr)
- return -ENOMEM;
- pagedir_nosave = (struct pbe *)addr;
+ if (!nr_pages)
+ return NULL;

- pr_debug("swsusp: Reading pagedir (%d Pages)\n",n);
+ printk("swsusp: Reading pagedir (%lu pages)\n",
+ swsusp_info.pagedir_pages);
+
+ pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ if (!pblist)
+ return NULL;
+
+ error = -EFAULT;
+ offset = swp_offset(swsusp_info.pagedir[i++]);
+ if (offset)
+ error = bio_read_page(offset, (void *)pblist);
+
+ if (error) {
+ free_page((unsigned long)pblist);
+ pblist = NULL;
+ }
+
+ for (pbe = pblist, num = PBES_PER_PAGE ; pbe && num < nr_pages ;
+ pbe = pbe->next, num += PBES_PER_PAGE) {
+ p = pbe;
+ pbe += PB_PAGE_SKIP;
+ do
+ p->next = p + 1;
+ while (p++ < pbe);
+ pbe->next = NULL;
+ error = -EFAULT;
+ p = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ if (p)
+ offset = swp_offset(swsusp_info.pagedir[i++]);

- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
- unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
if (offset)
- error = bio_read_page(offset, (void *)addr);
+ error = bio_read_page(offset, (void *)p);
+
+ if (error) {
+ if (p)
+ free_page((unsigned long)p);
+ }
else
- error = -EFAULT;
+ pbe->next = p;
}
- if (error)
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
-
- return error;
+ if (pbe) {
+ for (num -= PBES_PER_PAGE - 1, p = pbe ; num < nr_pages ; p++, num++)
+ p->next = p + 1;
+
+ p->next = NULL;
+ pr_debug("swsusp: Read %d pages, allocated %d PBEs\n", i, num);
+ error = (i != swsusp_info.pagedir_pages); /* a sanity check */
+ }
+ if (error) { /* if pbe is zero, error must be non-zero */
+ free_pagedir(pblist);
+ pblist = NULL;
+ pr_debug("swsusp: Reading pagedir failed\n");
+ }
+ return pblist;
}

+
static int __init read_suspend_image(void)
{
int error = 0;
+ struct pbe *p;

if ((error = check_sig()))
return error;
+
if ((error = check_header()))
return error;
- if ((error = read_pagedir()))
- return error;
- if ((error = data_read()))
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+
+ if(!(p = read_pagedir()))
+ return -EFAULT;
+
+ if(!(pagedir_nosave = swsusp_pagedir_relocate(p)))
+ return -ENOMEM;
+
+ /* Allocate memory for the image and read the data from swap */
+
+ error = check_pagedir(pagedir_nosave);
+ free_eaten_memory();
+ if (!error)
+ error = data_read(pagedir_nosave);
+
+ if (error) { /* We fail cleanly */
+ for_each_pbe(p, pagedir_nosave)
+ if (p->address) {
+ free_page(p->address);
+ p->address = 0UL;
+ }
+ free_pagedir(pagedir_nosave);
+ }
return error;
}


--
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
-- Lewis Carroll "Alice's Adventures in Wonderland"


2005-01-31 23:32:00

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order memory allocations [update]

Hi,

On Monday, 31 of January 2005 00:19, Rafael J. Wysocki wrote:
> Hi,
>
> The following patch is (yet) an(other) attempt to eliminate the need for using higher
> order memory allocations on resume. ?It accomplishes this by replacing the array
> of page backup entries with a list, so it is only necessary to allocate individual
> memory pages. This approach makes it possible to avoid relocating many memory
> pages on resume (as a result, much less memory is used) and to simplify
> the assembly code that restores the image.
>
> The patch is a complement to the patch that I sent some time ago as "swsusp: do not
> use higher order memory allocations on suspend". It is against 2.6.11-rc2 - on top
> of the previous patch and on top of the "x86_64: Speed up suspend" patch which are
> availble at:
> http://www.sisk.pl/kernel/patches/2.6.11-rc2/swsusp-use-list-suspend-v2.patch
> and at:
> http://www.sisk.pl/kernel/patches/2.6.11-rc2/x86_64-Speed-up-suspend.patch
> respectively. The patch itself is available at:
> http://www.sisk.pl/kernel/patches/2.6.11-rc2/swsusp-use-list-resume-v1.patch
> and there is a consolidated patch against 2.6.11-rc2 at:
> http://www.sisk.pl/kernel/patches/2.6.11-rc2/2.6.11-rc2-swsusp-use-list.patch

I have updated the patches to include a bugfix from Pavel Machek (thanks, Pavel!).
Affected are the "suspend" patch and the "consolidated" patch. The updated patches
are available at:
http://www.sisk.pl/kernel/patches/2.6.11-rc2/swsusp-use-list-suspend-v3.patch
http://www.sisk.pl/kernel/patches/2.6.11-rc2/2.6.11-rc2-swsusp-use-list-v2.patch
respectively. The other patches remain unchanged.

Greets,
Rafael


--
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
-- Lewis Carroll "Alice's Adventures in Wonderland"

2005-02-07 11:08:29

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

Hi,

On Monday, 31 of January 2005 00:19, Rafael J. Wysocki wrote:
> Hi,
>
> The following patch is (yet) an(other) attempt to eliminate the need for using higher
> order memory allocations on resume. ?It accomplishes this by replacing the array
> of page backup entries with a list, so it is only necessary to allocate individual
> memory pages. This approach makes it possible to avoid relocating many memory
> pages on resume (as a result, much less memory is used) and to simplify
> the assembly code that restores the image.

I have updated the resume patch to apply to the 2.6.11-rc3-mm1 kernel that
contains the suspend part and the x86_64-Speed-up-suspend patch. The patch
is only for x86-64 and i386.

[Note: without this patch the resume process fails on my box ("out of memory")
during every 7th - 8th suspend/resume cycle, on the average.]

I'm going to maintain this patch so that it's available for the next -mm
kernels (it will be available under http://www.sisk.pl/kernel/patches/).

The (updated) patch follows.

Greets,
Rafael


Signed-off-by: Rafael J. Wysocki <[email protected]>

diff -Nru linux-2.6.11-rc3-mm1-orig/arch/i386/power/swsusp.S linux-2.6.11-rc3-mm1/arch/i386/power/swsusp.S
--- linux-2.6.11-rc3-mm1-orig/arch/i386/power/swsusp.S 2004-12-24 22:34:31.000000000 +0100
+++ linux-2.6.11-rc3-mm1/arch/i386/power/swsusp.S 2005-02-05 20:57:03.000000000 +0100
@@ -28,28 +28,28 @@
ret

ENTRY(swsusp_arch_resume)
- movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
- movl %ecx,%cr3
+ movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
+ movl %ecx,%cr3

- movl pagedir_nosave, %ebx
- xorl %eax, %eax
- xorl %edx, %edx
+ movl pagedir_nosave, %edx
+ testl %edx, %edx
+ jz done
.p2align 4,,7

copy_loop:
- movl 4(%ebx,%edx),%edi
- movl (%ebx,%edx),%esi
+ movl (%edx), %esi
+ movl 4(%edx), %edi

movl $1024, %ecx
rep
movsl

- incl %eax
- addl $16, %edx
- cmpl nr_copy_pages,%eax
- jb copy_loop
+ movl 12(%edx), %edx
+ testl %edx, %edx
+ jnz copy_loop
.p2align 4,,7

+done:
movl saved_context_esp, %esp
movl saved_context_ebp, %ebp
movl saved_context_ebx, %ebx
diff -Nru linux-2.6.11-rc3-mm1-orig/arch/x86_64/kernel/asm-offsets.c linux-2.6.11-rc3-mm1/arch/x86_64/kernel/asm-offsets.c
--- linux-2.6.11-rc3-mm1-orig/arch/x86_64/kernel/asm-offsets.c 2005-02-05 20:49:04.000000000 +0100
+++ linux-2.6.11-rc3-mm1/arch/x86_64/kernel/asm-offsets.c 2005-02-05 20:57:03.000000000 +0100
@@ -62,8 +62,8 @@
offsetof (struct rt_sigframe32, uc.uc_mcontext));
BLANK();
#endif
- DEFINE(SIZEOF_PBE, sizeof(struct pbe));
DEFINE(pbe_address, offsetof(struct pbe, address));
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
+ DEFINE(pbe_next, offsetof(struct pbe, next));
return 0;
}
diff -Nru linux-2.6.11-rc3-mm1-orig/arch/x86_64/kernel/suspend_asm.S linux-2.6.11-rc3-mm1/arch/x86_64/kernel/suspend_asm.S
--- linux-2.6.11-rc3-mm1-orig/arch/x86_64/kernel/suspend_asm.S 2005-02-05 20:49:04.000000000 +0100
+++ linux-2.6.11-rc3-mm1/arch/x86_64/kernel/suspend_asm.S 2005-02-05 20:57:03.000000000 +0100
@@ -54,15 +54,8 @@
movq %rax, %cr4; # turn PGE back on

movq pagedir_nosave(%rip), %rdx
- /* compute the limit */
- movl nr_copy_pages(%rip), %eax
- testl %eax, %eax
+ testq %rdx, %rdx
jz done
- movq %rdx,%r8
- movl $SIZEOF_PBE,%r9d
- mul %r9 # with rax, clobbers rdx
- movq %r8, %rdx
- addq %r8, %rax
loop:
/* get addresses from the pbe and copy the page */
movq pbe_address(%rdx), %rsi
@@ -72,9 +65,9 @@
movsq

/* progress to the next pbe */
- addq $SIZEOF_PBE, %rdx
- cmpq %rax, %rdx
- jb loop
+ movq pbe_next(%rdx), %rdx
+ testq %rdx, %rdx
+ jnz loop
done:
movl $24, %eax
movl %eax, %ds
diff -Nru linux-2.6.11-rc3-mm1-orig/kernel/power/swsusp.c linux-2.6.11-rc3-mm1/kernel/power/swsusp.c
--- linux-2.6.11-rc3-mm1-orig/kernel/power/swsusp.c 2005-02-05 20:49:33.000000000 +0100
+++ linux-2.6.11-rc3-mm1/kernel/power/swsusp.c 2005-02-05 20:57:03.000000000 +0100
@@ -919,44 +919,96 @@
return 0;
}

-/*
- * We check here that pagedir & pages it points to won't collide with pages
- * where we're going to restore from the loaded pages later
+/**
+ * On resume, for storing the PBE list and the image,
+ * we can only use memory pages that do not conflict with the pages
+ * which had been used before suspend.
+ *
+ * We don't know which pages are usable until we allocate them.
+ *
+ * Allocated but unusable (ie eaten) memory pages are linked together
+ * to create a list, so that we can free them easily
+ *
+ * We could have used a type other than (void *)
+ * for this purpose, but ...
*/
-static int __init check_pagedir(void)
-{
- int i;
+static void **eaten_memory = NULL;

- for(i=0; i < nr_copy_pages; i++) {
- unsigned long addr;
+static unsigned long __init get_usable_page(unsigned gfp_mask) {
+ unsigned long m;
+ void **c = eaten_memory;
+
+ m = get_zeroed_page(gfp_mask);
+ while (does_collide_order(m, 0)) {
+ eaten_memory = (void *)m;
+ *eaten_memory = c;
+ c = eaten_memory;
+ m = get_zeroed_page(gfp_mask);
+ if (!m)
+ break;
+ }
+ return m;
+}

- do {
- addr = get_zeroed_page(GFP_ATOMIC);
- if(!addr)
- return -ENOMEM;
- } while (does_collide_order(addr, 0));
+static void __init free_eaten_memory(void) {
+ unsigned long m;
+ void **c;
+ int i = 0;

- (pagedir_nosave+i)->address = addr;
+ c = eaten_memory;
+ while (c) {
+ m = (unsigned long)c;
+ c = *c;
+ free_page(m);
+ i++;
}
- return 0;
+ eaten_memory = NULL;
+ pr_debug("swsusp: %d unused pages freed\n", i);
}

-static int __init swsusp_pagedir_relocate(void)
+/**
+ * check_pagedir - We ensure here that pages that the PBEs point to
+ * won't collide with pages where we're going to restore from the loaded
+ * pages later
+ */
+
+static int __init check_pagedir(struct pbe *pblist)
{
- /*
- * We have to avoid recursion (not to overflow kernel stack),
- * and that's why code looks pretty cryptic
+ struct pbe *p;
+
+ /* This is necessary, so that we can free allocated pages
+ * in case of failure
*/
- suspend_pagedir_t *old_pagedir = pagedir_nosave;
- void **eaten_memory = NULL;
- void **c = eaten_memory, *m, *f;
- int ret = 0;
+ for_each_pbe(p, pblist)
+ p->address = 0UL;
+
+ for_each_pbe(p, pblist) {
+ p->address = get_usable_page(GFP_ATOMIC);
+ if(!p->address)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/**
+ * swsusp_pagedir_relocate - It is possible, that some memory pages
+ * occupied by the list of PBEs collide with pages where we're going to
+ * restore from the loaded pages later. We relocate them here.
+ */
+
+static struct pbe * __init swsusp_pagedir_relocate(struct pbe *pblist)
+{
struct zone *zone;
- int i;
- struct pbe *p;
unsigned long zone_pfn;
+ struct pbe *pbpage, *tail, *p;
+ void *m;
+ int rel = 0, error = 0;

- printk("Relocating pagedir ");
+ if (!pblist) /* a sanity check */
+ return NULL;
+
+ pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n",
+ swsusp_info.pagedir_pages);

/* Set page flags */

@@ -966,45 +1018,52 @@
zone->zone_start_pfn));
}

- /* Clear orig address */
+ /* Clear orig addresses */

- for(i = 0, p = pagedir_nosave; i < nr_copy_pages; i++, p++) {
+ for_each_pbe(p, pblist)
ClearPageNosaveFree(virt_to_page(p->orig_address));
- }

- if (!does_collide_order((unsigned long)old_pagedir, pagedir_order)) {
- printk("not necessary\n");
- return check_pagedir();
- }
+ tail = pblist + PB_PAGE_SKIP;

- while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
- if (!does_collide_order((unsigned long)m, pagedir_order))
- break;
- eaten_memory = m;
- printk( "." );
- *eaten_memory = c;
- c = eaten_memory;
- }
+ /* Relocate colliding pages */

- if (!m) {
- printk("out of memory\n");
- ret = -ENOMEM;
- } else {
- pagedir_nosave =
- memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
+ for_each_pb_page(pbpage, pblist) {
+ if (does_collide_order((unsigned long)pbpage, 0)) {
+ m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
+ if (!m) {
+ error = -ENOMEM;
+ break;
+ }
+ memcpy(m, (void *)pbpage, PAGE_SIZE);
+ if (pbpage == pblist)
+ pblist = (struct pbe *)m;
+ else
+ tail->next = (struct pbe *)m;
+
+ free_page((unsigned long)pbpage);
+ pbpage = (struct pbe *)m;
+
+ /* We have to link the PBEs again */
+
+ for (p = pbpage ; p < pbpage + PB_PAGE_SKIP ; p++)
+ if (p->next) /* needed to save the end */
+ p->next = p + 1;
+
+ rel++;
+ }
+ tail = pbpage + PB_PAGE_SKIP;
+ }
+
+ if (error) {
+ printk("\nswsusp: Out of memory\n\n");
+ free_pagedir(pblist);
+ free_eaten_memory();
+ pblist = NULL;
}
+ else
+ printk("swsusp: Relocated %d pages\n", rel);

- c = eaten_memory;
- while (c) {
- printk(":");
- f = c;
- c = *c;
- free_pages((unsigned long)f, pagedir_order);
- }
- if (ret)
- return ret;
- printk("|\n");
- return check_pagedir();
+ return pblist;
}

/**
@@ -1149,76 +1208,150 @@
}

/**
- * swsusp_read_data - Read image pages from swap.
+ * data_read - Read image pages from swap.
*
* You do not need to check for overlaps, check_pagedir()
* already did that.
*/

-static int __init data_read(void)
+static int __init data_read(struct pbe *pblist)
{
struct pbe * p;
- int error;
- int i;
- int mod = nr_copy_pages / 100;
+ int error = 0;
+ int i = 0;
+ int mod = swsusp_info.image_pages / 100;

if (!mod)
mod = 1;

- if ((error = swsusp_pagedir_relocate()))
- return error;
+ printk("swsusp: Reading image data (%lu pages): ",
+ swsusp_info.image_pages);
+
+ for_each_pbe(p, pblist) {
+ if (!(i % mod))
+ printk("\b\b\b\b%3d%%", i / mod);

- printk( "Reading image data (%d pages): ", nr_copy_pages );
- for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
+ if (error)
+ return error;
+
+ i++;
}
- printk(" %d done.\n",i);
+ printk("\b\b\b\bdone\n");
return error;
-
}

extern dev_t __init name_to_dev_t(const char *line);

-static int __init read_pagedir(void)
+/**
+ * read_pagedir - Read page backup list pages from swap, allocate
+ * memory for them and set up the list
+ *
+ * The list of PBEs that is created by this function may be freed
+ * using free_pagedir()
+ */
+
+static struct pbe * __init read_pagedir(void)
{
- unsigned long addr;
- int i, n = swsusp_info.pagedir_pages;
- int error = 0;
+ unsigned nr_pages = swsusp_info.image_pages;
+ unsigned num, i = 0;
+ struct pbe *pblist, *pbe, *p;
+ unsigned long offset;
+ int error;

- addr = __get_free_pages(GFP_ATOMIC, pagedir_order);
- if (!addr)
- return -ENOMEM;
- pagedir_nosave = (struct pbe *)addr;
+ if (!nr_pages)
+ return NULL;
+
+ printk("swsusp: Reading pagedir (%lu pages)\n",
+ swsusp_info.pagedir_pages);

- pr_debug("swsusp: Reading pagedir (%d Pages)\n",n);
+ pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ if (!pblist)
+ return NULL;
+
+ error = -EFAULT;
+ offset = swp_offset(swsusp_info.pagedir[i++]);
+ if (offset)
+ error = bio_read_page(offset, (void *)pblist);
+
+ if (error) {
+ free_page((unsigned long)pblist);
+ pblist = NULL;
+ }
+
+ for (pbe = pblist, num = PBES_PER_PAGE ; pbe && num < nr_pages ;
+ pbe = pbe->next, num += PBES_PER_PAGE) {
+ p = pbe;
+ pbe += PB_PAGE_SKIP;
+ do
+ p->next = p + 1;
+ while (p++ < pbe);
+ pbe->next = NULL;
+ error = -EFAULT;
+ p = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ if (p)
+ offset = swp_offset(swsusp_info.pagedir[i++]);

- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
- unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
if (offset)
- error = bio_read_page(offset, (void *)addr);
+ error = bio_read_page(offset, (void *)p);
+
+ if (error) {
+ if (p)
+ free_page((unsigned long)p);
+ }
else
- error = -EFAULT;
+ pbe->next = p;
}
- if (error)
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
- return error;
+ if (pbe) {
+ for (num -= PBES_PER_PAGE - 1, p = pbe ; num < nr_pages ; p++, num++)
+ p->next = p + 1;
+
+ p->next = NULL;
+ pr_debug("swsusp: Read %d pages, allocated %d PBEs\n", i, num);
+ error = (i != swsusp_info.pagedir_pages); /* a sanity check */
+ }
+ if (error) { /* if pbe is zero, error must be non-zero */
+ free_pagedir(pblist);
+ pblist = NULL;
+ pr_debug("swsusp: Reading pagedir failed\n");
+ }
+ return pblist;
}

+
static int __init read_suspend_image(void)
{
int error = 0;
+ struct pbe *p;

if ((error = check_sig()))
return error;
+
if ((error = check_header()))
return error;
- if ((error = read_pagedir()))
- return error;
- if ((error = data_read()))
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+
+ if(!(p = read_pagedir()))
+ return -EFAULT;
+
+ if(!(pagedir_nosave = swsusp_pagedir_relocate(p)))
+ return -ENOMEM;
+
+ /* Allocate memory for the image and read the data from swap */
+
+ error = check_pagedir(pagedir_nosave);
+ free_eaten_memory();
+ if (!error)
+ error = data_read(pagedir_nosave);
+
+ if (error) { /* We fail cleanly */
+ for_each_pbe(p, pagedir_nosave)
+ if (p->address) {
+ free_page(p->address);
+ p->address = 0UL;
+ }
+ free_pagedir(pagedir_nosave);
+ }
return error;
}


--
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
-- Lewis Carroll "Alice's Adventures in Wonderland"

2005-02-07 14:30:10

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

Hi!

> > The following patch is (yet) an(other) attempt to eliminate the need for using higher
> > order memory allocations on resume. ?It accomplishes this by replacing the array
> > of page backup entries with a list, so it is only necessary to allocate individual
> > memory pages. This approach makes it possible to avoid relocating many memory
> > pages on resume (as a result, much less memory is used) and to simplify
> > the assembly code that restores the image.
>
> I have updated the resume patch to apply to the 2.6.11-rc3-mm1 kernel that
> contains the suspend part and the x86_64-Speed-up-suspend patch. The patch
> is only for x86-64 and i386.
>
> [Note: without this patch the resume process fails on my box ("out of memory")
> during every 7th - 8th suspend/resume cycle, on the average.]

Pssst. At this point, solution would be to revert the first part,
too. 2.6.11 is too near to do anything else.
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-02-07 14:51:35

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

Hi,

On Monday, 7 of February 2005 15:27, Pavel Machek wrote:
> Hi!
>
> > > The following patch is (yet) an(other) attempt to eliminate the need for using higher
> > > order memory allocations on resume. ?It accomplishes this by replacing the array
> > > of page backup entries with a list, so it is only necessary to allocate individual
> > > memory pages. This approach makes it possible to avoid relocating many memory
> > > pages on resume (as a result, much less memory is used) and to simplify
> > > the assembly code that restores the image.
> >
> > I have updated the resume patch to apply to the 2.6.11-rc3-mm1 kernel that
> > contains the suspend part and the x86_64-Speed-up-suspend patch. The patch
> > is only for x86-64 and i386.
> >
> > [Note: without this patch the resume process fails on my box ("out of memory")
> > during every 7th - 8th suspend/resume cycle, on the average.]

Well, this doesn't depend on the previous patch, apparently. ;-)

> Pssst. At this point, solution would be to revert the first part,
> too. 2.6.11 is too near to do anything else.

Oh, I didn't mean changing anything now (eg because of the missing ppc
assembly part). However, the patch is useful for me so I thought I'd post it
in case someone else (using the -mm kernels) needed it.

Greets,
Rafael


--
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
-- Lewis Carroll "Alice's Adventures in Wonderland"

2005-02-07 16:24:15

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

Hi!

> The (updated) patch follows.

Okay, few comments...


> Signed-off-by: Rafael J. Wysocki <[email protected]>
>
> diff -Nru linux-2.6.11-rc3-mm1-orig/arch/i386/power/swsusp.S linux-2.6.11-rc3-mm1/arch/i386/power/swsusp.S
> --- linux-2.6.11-rc3-mm1-orig/arch/i386/power/swsusp.S 2004-12-24 22:34:31.000000000 +0100
> +++ linux-2.6.11-rc3-mm1/arch/i386/power/swsusp.S 2005-02-05 20:57:03.000000000 +0100
> @@ -28,28 +28,28 @@
> ret
>
> ENTRY(swsusp_arch_resume)
> - movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
> - movl %ecx,%cr3
> + movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
> + movl %ecx,%cr3
>
> - movl pagedir_nosave, %ebx
> - xorl %eax, %eax
> - xorl %edx, %edx
> + movl pagedir_nosave, %edx

move copy_loop: here

> + testl %edx, %edx
> + jz done
> .p2align 4,,7
>
> copy_loop:
> - movl 4(%ebx,%edx),%edi
> - movl (%ebx,%edx),%esi
> + movl (%edx), %esi
> + movl 4(%edx), %edi
>
> movl $1024, %ecx
> rep
> movsl
>
> - incl %eax
> - addl $16, %edx
> - cmpl nr_copy_pages,%eax
> - jb copy_loop
> + movl 12(%edx), %edx
> + testl %edx, %edx
> + jnz copy_loop

And do unconditional jump here? Also, 12(%edx)... Could it be handled
using asm-offsets, like on x86-64?

> +static void __init free_eaten_memory(void) {

Please put { at new line.

> + for_each_pbe(p, pblist)
> + p->address = 0UL;
> +
> + for_each_pbe(p, pblist) {
> + p->address = get_usable_page(GFP_ATOMIC);
> + if(!p->address)

I'd put space between if and (. And probably do the same for
for_each_pbe... it behaves like a while.

> @@ -966,45 +1018,52 @@
> zone->zone_start_pfn));
> }
>
> - /* Clear orig address */
> + /* Clear orig addresses */
>
> - for(i = 0, p = pagedir_nosave; i < nr_copy_pages; i++, p++) {
> + for_each_pbe(p, pblist)
> ClearPageNosaveFree(virt_to_page(p->orig_address));
> - }
>
> - if (!does_collide_order((unsigned long)old_pagedir, pagedir_order)) {
> - printk("not necessary\n");
> - return check_pagedir();
> - }
> + tail = pblist + PB_PAGE_SKIP;
>
> - while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
> - if (!does_collide_order((unsigned long)m, pagedir_order))
> - break;
> - eaten_memory = m;
> - printk( "." );
> - *eaten_memory = c;
> - c = eaten_memory;
> - }
> + /* Relocate colliding pages */
>
> - if (!m) {
> - printk("out of memory\n");
> - ret = -ENOMEM;
> - } else {
> - pagedir_nosave =
> - memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
> + for_each_pb_page(pbpage, pblist) {
> + if (does_collide_order((unsigned long)pbpage, 0)) {
> + m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
> + if (!m) {
> + error = -ENOMEM;
> + break;
> + }
> + memcpy(m, (void *)pbpage, PAGE_SIZE);
> + if (pbpage == pblist)
> + pblist = (struct pbe *)m;
> + else
> + tail->next = (struct pbe *)m;
> +
> + free_page((unsigned long)pbpage);

Uh, you free it so that you can allocate it again, and again find out
that it is unusable? It will probably end up on list of unusable
pages, so it is okay, but...

> + pbpage = (struct pbe *)m;
> +
> + /* We have to link the PBEs again */
> +
> + for (p = pbpage ; p < pbpage + PB_PAGE_SKIP ; p++)

I'd avoid " " before ;.

> + p = pbe;
> + pbe += PB_PAGE_SKIP;
> + do
> + p->next = p + 1;
> + while (p++ < pbe);

I've already seen this code somewhere around in different
variant... Perhaps you want to make it inline function?

> + p->next = NULL;
> + pr_debug("swsusp: Read %d pages, allocated %d PBEs\n", i, num);
> + error = (i != swsusp_info.pagedir_pages); /* a sanity check */

If it is sanity check, do BUG_ON().


> + if(!(p = read_pagedir()))
> + return -EFAULT;

Is the value used? By using pointers instead of normal ints, you kill
possibility of meaningfull error reporting...

> + if(!(pagedir_nosave = swsusp_pagedir_relocate(p)))
> + return -ENOMEM;

Same here.
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-02-08 18:29:34

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

Hi,

On Monday, 7 of February 2005 17:23, Pavel Machek wrote:
> Hi!
>
> > The (updated) patch follows.
>
> Okay, few comments...
>
>
> > Signed-off-by: Rafael J. Wysocki <[email protected]>
> >
> > diff -Nru linux-2.6.11-rc3-mm1-orig/arch/i386/power/swsusp.S linux-2.6.11-rc3-mm1/arch/i386/power/swsusp.S
> > --- linux-2.6.11-rc3-mm1-orig/arch/i386/power/swsusp.S 2004-12-24 22:34:31.000000000 +0100
> > +++ linux-2.6.11-rc3-mm1/arch/i386/power/swsusp.S 2005-02-05 20:57:03.000000000 +0100
> > @@ -28,28 +28,28 @@
> > ret
> >
> > ENTRY(swsusp_arch_resume)
> > - movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
> > - movl %ecx,%cr3
> > + movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
> > + movl %ecx,%cr3
> >
> > - movl pagedir_nosave, %ebx
> > - xorl %eax, %eax
> > - xorl %edx, %edx
> > + movl pagedir_nosave, %edx
>
> move copy_loop: here

OK


> > + testl %edx, %edx
> > + jz done
> > .p2align 4,,7
> >
> > copy_loop:
> > - movl 4(%ebx,%edx),%edi
> > - movl (%ebx,%edx),%esi
> > + movl (%edx), %esi
> > + movl 4(%edx), %edi
> >
> > movl $1024, %ecx
> > rep
> > movsl
> >
> > - incl %eax
> > - addl $16, %edx
> > - cmpl nr_copy_pages,%eax
> > - jb copy_loop
> > + movl 12(%edx), %edx
> > + testl %edx, %edx
> > + jnz copy_loop
>
> And do unconditional jump here?

OK (I did the same for x86-64).


> Also, 12(%edx)... Could it be handled using asm-offsets, like on x86-64?

Yes, and so I did.


> > +static void __init free_eaten_memory(void) {
>
> Please put { at new line.
>
> > + for_each_pbe(p, pblist)
> > + p->address = 0UL;
> > +
> > + for_each_pbe(p, pblist) {
> > + p->address = get_usable_page(GFP_ATOMIC);
> > + if(!p->address)
>
> I'd put space between if and (. And probably do the same for
> for_each_pbe... it behaves like a while.

OK


[-- snip --]
> > + for_each_pb_page(pbpage, pblist) {
> > + if (does_collide_order((unsigned long)pbpage, 0)) {
> > + m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
> > + if (!m) {
> > + error = -ENOMEM;
> > + break;
> > + }
> > + memcpy(m, (void *)pbpage, PAGE_SIZE);
> > + if (pbpage == pblist)
> > + pblist = (struct pbe *)m;
> > + else
> > + tail->next = (struct pbe *)m;
> > +
> > + free_page((unsigned long)pbpage);
>
> Uh, you free it so that you can allocate it again, and again find out
> that it is unusable? It will probably end up on list of unusable
> pages,

That's because I wanted the page to end up on this list.

> so it is okay, but...

... I could have done it more elegantly. You're right, I've now introduced
a function eat_page() that adds a page to the list of unusable pages and
used it instead of the free_page() here.


> > + pbpage = (struct pbe *)m;
> > +
> > + /* We have to link the PBEs again */
> > +
> > + for (p = pbpage ; p < pbpage + PB_PAGE_SKIP ; p++)
>
> I'd avoid " " before ;.

OK


> > + p = pbe;
> > + pbe += PB_PAGE_SKIP;
> > + do
> > + p->next = p + 1;
> > + while (p++ < pbe);
>
> I've already seen this code somewhere around in different
> variant... Perhaps you want to make it inline function?

I tried to avoid modifying the suspend part, but if it's not a problem,
why don't we go farther and reuse alloc_pagedir() in the resume code?

It has the advantage that read_pagedir() is then much simpler, and it
returns an integer. However, for this purpose, it's better to split
alloc_pagedir() into two functions, one of which allocates memory pages,
and the second puts the list structure on them.


> > + p->next = NULL;
> > + pr_debug("swsusp: Read %d pages, allocated %d PBEs\n", i, num);
> > + error = (i != swsusp_info.pagedir_pages); /* a sanity check */
>
> If it is sanity check, do BUG_ON().

OK


> > + if(!(p = read_pagedir()))
> > + return -EFAULT;
>
> Is the value used? By using pointers instead of normal ints, you kill
> possibility of meaningfull error reporting...

Yes, but it is fixed easily if alloc_pagedir() is reused in the resume code.


> > + if(!(pagedir_nosave = swsusp_pagedir_relocate(p)))
> > + return -ENOMEM;
>
> Same here.

The value is used in error reporting and the only reason why this function
may fail is the lack of memory (the same applies to alloc_pagedir()).

The revised (not as thoroughly tested as the previous one, but hopefully
nicer) patch follows.

Greets,
Rafael


Signed-off-by: Rafael J. Wysocki <[email protected]>

diff -Nru linux-2.6.11-rc3-mm1-orig/arch/i386/kernel/asm-offsets.c linux-2.6.11-rc3-mm1/arch/i386/kernel/asm-offsets.c
--- linux-2.6.11-rc3-mm1-orig/arch/i386/kernel/asm-offsets.c 2004-12-24 22:34:31.000000000 +0100
+++ linux-2.6.11-rc3-mm1/arch/i386/kernel/asm-offsets.c 2005-02-08 18:14:27.000000000 +0100
@@ -7,6 +7,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/personality.h>
+#include <linux/suspend.h>
#include <asm/ucontext.h>
#include "sigframe.h"
#include <asm/fixmap.h>
@@ -56,6 +57,11 @@

OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
+ BLANK();
+
+ OFFSET(pbe_address, struct pbe, address);
+ OFFSET(pbe_orig_address, struct pbe, orig_address);
+ OFFSET(pbe_next, struct pbe, next);

/* Offset from the sysenter stack to tss.esp0 */
DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
diff -Nru linux-2.6.11-rc3-mm1-orig/arch/i386/power/swsusp.S linux-2.6.11-rc3-mm1/arch/i386/power/swsusp.S
--- linux-2.6.11-rc3-mm1-orig/arch/i386/power/swsusp.S 2004-12-24 22:34:31.000000000 +0100
+++ linux-2.6.11-rc3-mm1/arch/i386/power/swsusp.S 2005-02-08 18:14:10.000000000 +0100
@@ -12,6 +12,7 @@
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
+#include <asm/asm_offsets.h>

.text

@@ -28,28 +29,28 @@
ret

ENTRY(swsusp_arch_resume)
- movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
- movl %ecx,%cr3
+ movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
+ movl %ecx,%cr3

- movl pagedir_nosave, %ebx
- xorl %eax, %eax
- xorl %edx, %edx
+ movl pagedir_nosave, %edx
.p2align 4,,7

copy_loop:
- movl 4(%ebx,%edx),%edi
- movl (%ebx,%edx),%esi
+ testl %edx, %edx
+ jz done
+
+ movl pbe_address(%edx), %esi
+ movl pbe_orig_address(%edx), %edi

movl $1024, %ecx
rep
movsl

- incl %eax
- addl $16, %edx
- cmpl nr_copy_pages,%eax
- jb copy_loop
+ movl pbe_next(%edx), %edx
+ jmp copy_loop
.p2align 4,,7

+done:
movl saved_context_esp, %esp
movl saved_context_ebp, %ebp
movl saved_context_ebx, %ebx
diff -Nru linux-2.6.11-rc3-mm1-orig/arch/x86_64/kernel/asm-offsets.c linux-2.6.11-rc3-mm1/arch/x86_64/kernel/asm-offsets.c
--- linux-2.6.11-rc3-mm1-orig/arch/x86_64/kernel/asm-offsets.c 2005-02-05 20:49:04.000000000 +0100
+++ linux-2.6.11-rc3-mm1/arch/x86_64/kernel/asm-offsets.c 2005-02-08 18:13:36.000000000 +0100
@@ -62,8 +62,8 @@
offsetof (struct rt_sigframe32, uc.uc_mcontext));
BLANK();
#endif
- DEFINE(SIZEOF_PBE, sizeof(struct pbe));
DEFINE(pbe_address, offsetof(struct pbe, address));
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
+ DEFINE(pbe_next, offsetof(struct pbe, next));
return 0;
}
diff -Nru linux-2.6.11-rc3-mm1-orig/arch/x86_64/kernel/suspend_asm.S linux-2.6.11-rc3-mm1/arch/x86_64/kernel/suspend_asm.S
--- linux-2.6.11-rc3-mm1-orig/arch/x86_64/kernel/suspend_asm.S 2005-02-05 20:49:04.000000000 +0100
+++ linux-2.6.11-rc3-mm1/arch/x86_64/kernel/suspend_asm.S 2005-02-08 18:13:51.000000000 +0100
@@ -54,16 +54,10 @@
movq %rax, %cr4; # turn PGE back on

movq pagedir_nosave(%rip), %rdx
- /* compute the limit */
- movl nr_copy_pages(%rip), %eax
- testl %eax, %eax
- jz done
- movq %rdx,%r8
- movl $SIZEOF_PBE,%r9d
- mul %r9 # with rax, clobbers rdx
- movq %r8, %rdx
- addq %r8, %rax
loop:
+ testq %rdx, %rdx
+ jz done
+
/* get addresses from the pbe and copy the page */
movq pbe_address(%rdx), %rsi
movq pbe_orig_address(%rdx), %rdi
@@ -72,9 +66,8 @@
movsq

/* progress to the next pbe */
- addq $SIZEOF_PBE, %rdx
- cmpq %rax, %rdx
- jb loop
+ movq pbe_next(%rdx), %rdx
+ jmp loop
done:
movl $24, %eax
movl %eax, %ds
diff -Nru linux-2.6.11-rc3-mm1-orig/kernel/power/swsusp.c linux-2.6.11-rc3-mm1/kernel/power/swsusp.c
--- linux-2.6.11-rc3-mm1-orig/kernel/power/swsusp.c 2005-02-08 18:16:34.000000000 +0100
+++ linux-2.6.11-rc3-mm1/kernel/power/swsusp.c 2005-02-08 18:16:13.000000000 +0100
@@ -608,6 +608,46 @@
static inline void free_pagedir(struct pbe *pblist);

/**
+ * fill_pb_page - Create a list of PBEs on a given memory page
+ */
+
+static inline void fill_pb_page(struct pbe *pbpage)
+{
+ struct pbe *p;
+
+ p = pbpage;
+ pbpage += PB_PAGE_SKIP;
+ do
+ p->next = p + 1;
+ while (++p < pbpage);
+}
+
+/**
+ * create_pbe_list - Create a list of PBEs on top of a given chain
+ * of memory pages allocated with alloc_pagedir()
+ */
+
+static void create_pbe_list(struct pbe *pblist, unsigned nr_pages)
+{
+ struct pbe *pbpage, *p;
+ unsigned num = PBES_PER_PAGE;
+
+ for_each_pb_page (pbpage, pblist) {
+ if (num >= nr_pages)
+ break;
+
+ fill_pb_page(pbpage);
+ num += PBES_PER_PAGE;
+ }
+ if (pbpage) {
+ for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
+ p->next = p + 1;
+ p->next = NULL;
+ }
+ pr_debug("create_pbe_list(): initialized %d PBEs\n", num);
+}
+
+/**
* alloc_pagedir - Allocate the page directory.
*
* First, determine exactly how many pages we need and
@@ -623,7 +663,7 @@
static struct pbe * alloc_pagedir(unsigned nr_pages)
{
unsigned num;
- struct pbe *pblist, *pbe, *p;
+ struct pbe *pblist, *pbe;

if (!nr_pages)
return NULL;
@@ -632,21 +672,13 @@
pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
pbe = pbe->next, num += PBES_PER_PAGE) {
- p = pbe;
pbe += PB_PAGE_SKIP;
- do
- p->next = p + 1;
- while (p++ < pbe);
pbe->next = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
}
- if (pbe) {
- for (num -= PBES_PER_PAGE - 1, p = pbe; num < nr_pages; p++, num++)
- p->next = p + 1;
- } else { /* get_zeroed_page() failed */
+ if (!pbe) { /* get_zeroed_page() failed */
free_pagedir(pblist);
pblist = NULL;
}
- pr_debug("alloc_pagedir(): allocated %d PBEs\n", num);
return pblist;
}

@@ -768,6 +800,7 @@
printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
return -ENOMEM;
}
+ create_pbe_list(pagedir_save, nr_copy_pages);
pagedir_nosave = pagedir_save;
if ((error = alloc_image_pages())) {
printk(KERN_ERR "suspend: Allocating image pages failed.\n");
@@ -919,44 +952,103 @@
return 0;
}

-/*
- * We check here that pagedir & pages it points to won't collide with pages
- * where we're going to restore from the loaded pages later
+/**
+ * On resume, for storing the PBE list and the image,
+ * we can only use memory pages that do not conflict with the pages
+ * which had been used before suspend.
+ *
+ * We don't know which pages are usable until we allocate them.
+ *
+ * Allocated but unusable (ie eaten) memory pages are linked together
+ * to create a list, so that we can free them easily
+ *
+ * We could have used a type other than (void *)
+ * for this purpose, but ...
*/
-static int __init check_pagedir(void)
+static void **eaten_memory = NULL;
+
+static inline void eat_page(void *page) {
+ void **c;
+
+ c = eaten_memory;
+ eaten_memory = page;
+ *eaten_memory = c;
+}
+
+static unsigned long __init get_usable_page(unsigned gfp_mask)
{
- int i;
+ unsigned long m;

- for(i=0; i < nr_copy_pages; i++) {
- unsigned long addr;
+ m = get_zeroed_page(gfp_mask);
+ while (does_collide_order(m, 0)) {
+ eat_page((void *)m);
+ m = get_zeroed_page(gfp_mask);
+ if (!m)
+ break;
+ }
+ return m;
+}

- do {
- addr = get_zeroed_page(GFP_ATOMIC);
- if(!addr)
- return -ENOMEM;
- } while (does_collide_order(addr, 0));
+static void __init free_eaten_memory(void)
+{
+ unsigned long m;
+ void **c;
+ int i = 0;

- (pagedir_nosave+i)->address = addr;
+ c = eaten_memory;
+ while (c) {
+ m = (unsigned long)c;
+ c = *c;
+ free_page(m);
+ i++;
}
- return 0;
+ eaten_memory = NULL;
+ pr_debug("swsusp: %d unused pages freed\n", i);
}

-static int __init swsusp_pagedir_relocate(void)
+/**
+ * check_pagedir - We ensure here that pages that the PBEs point to
+ * won't collide with pages where we're going to restore from the loaded
+ * pages later
+ */
+
+static int __init check_pagedir(struct pbe *pblist)
{
- /*
- * We have to avoid recursion (not to overflow kernel stack),
- * and that's why code looks pretty cryptic
+ struct pbe *p;
+
+ /* This is necessary, so that we can free allocated pages
+ * in case of failure
*/
- suspend_pagedir_t *old_pagedir = pagedir_nosave;
- void **eaten_memory = NULL;
- void **c = eaten_memory, *m, *f;
- int ret = 0;
+ for_each_pbe (p, pblist)
+ p->address = 0UL;
+
+ for_each_pbe (p, pblist) {
+ p->address = get_usable_page(GFP_ATOMIC);
+ if (!p->address)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/**
+ * swsusp_pagedir_relocate - It is possible, that some memory pages
+ * occupied by the list of PBEs collide with pages where we're going to
+ * restore from the loaded pages later. We relocate them here.
+ */
+
+static struct pbe * __init swsusp_pagedir_relocate(struct pbe *pblist)
+{
struct zone *zone;
- int i;
- struct pbe *p;
unsigned long zone_pfn;
+ struct pbe *pbpage, *tail, *p;
+ void *m;
+ int rel = 0, error = 0;

- printk("Relocating pagedir ");
+ if (!pblist) /* a sanity check */
+ return NULL;
+
+ pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n",
+ swsusp_info.pagedir_pages);

/* Set page flags */

@@ -966,45 +1058,52 @@
zone->zone_start_pfn));
}

- /* Clear orig address */
+ /* Clear orig addresses */

- for(i = 0, p = pagedir_nosave; i < nr_copy_pages; i++, p++) {
+ for_each_pbe (p, pblist)
ClearPageNosaveFree(virt_to_page(p->orig_address));
- }

- if (!does_collide_order((unsigned long)old_pagedir, pagedir_order)) {
- printk("not necessary\n");
- return check_pagedir();
- }
+ tail = pblist + PB_PAGE_SKIP;

- while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
- if (!does_collide_order((unsigned long)m, pagedir_order))
- break;
- eaten_memory = m;
- printk( "." );
- *eaten_memory = c;
- c = eaten_memory;
- }
+ /* Relocate colliding pages */
+
+ for_each_pb_page (pbpage, pblist) {
+ if (does_collide_order((unsigned long)pbpage, 0)) {
+ m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
+ if (!m) {
+ error = -ENOMEM;
+ break;
+ }
+ memcpy(m, (void *)pbpage, PAGE_SIZE);
+ if (pbpage == pblist)
+ pblist = (struct pbe *)m;
+ else
+ tail->next = (struct pbe *)m;
+
+ eat_page((void *)pbpage);
+ pbpage = (struct pbe *)m;
+
+ /* We have to link the PBEs again */
+
+ for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++)
+ if (p->next) /* needed to save the end */
+ p->next = p + 1;

- if (!m) {
- printk("out of memory\n");
- ret = -ENOMEM;
- } else {
- pagedir_nosave =
- memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
+ rel++;
+ }
+ tail = pbpage + PB_PAGE_SKIP;
}

- c = eaten_memory;
- while (c) {
- printk(":");
- f = c;
- c = *c;
- free_pages((unsigned long)f, pagedir_order);
+ if (error) {
+ printk("\nswsusp: Out of memory\n\n");
+ free_pagedir(pblist);
+ free_eaten_memory();
+ pblist = NULL;
}
- if (ret)
- return ret;
- printk("|\n");
- return check_pagedir();
+ else
+ printk("swsusp: Relocated %d pages\n", rel);
+
+ return pblist;
}

/**
@@ -1149,76 +1248,117 @@
}

/**
- * swsusp_read_data - Read image pages from swap.
+ * data_read - Read image pages from swap.
*
* You do not need to check for overlaps, check_pagedir()
* already did that.
*/

-static int __init data_read(void)
+static int __init data_read(struct pbe *pblist)
{
struct pbe * p;
- int error;
- int i;
- int mod = nr_copy_pages / 100;
+ int error = 0;
+ int i = 0;
+ int mod = swsusp_info.image_pages / 100;

if (!mod)
mod = 1;

- if ((error = swsusp_pagedir_relocate()))
- return error;
+ printk("swsusp: Reading image data (%lu pages): ",
+ swsusp_info.image_pages);
+
+ for_each_pbe (p, pblist) {
+ if (!(i % mod))
+ printk("\b\b\b\b%3d%%", i / mod);

- printk( "Reading image data (%d pages): ", nr_copy_pages );
- for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
+ if (error)
+ return error;
+
+ i++;
}
- printk(" %d done.\n",i);
+ printk("\b\b\b\bdone\n");
return error;
-
}

extern dev_t __init name_to_dev_t(const char *line);

-static int __init read_pagedir(void)
+/**
+ * read_pagedir - Read page backup list pages from swap
+ */
+
+static int __init read_pagedir(struct pbe *pblist)
{
- unsigned long addr;
- int i, n = swsusp_info.pagedir_pages;
- int error = 0;
+ struct pbe *pbpage, *p;
+ unsigned i = 0;
+ int error;

- addr = __get_free_pages(GFP_ATOMIC, pagedir_order);
- if (!addr)
- return -ENOMEM;
- pagedir_nosave = (struct pbe *)addr;
+ if (!pblist)
+ return -EFAULT;
+
+ printk("swsusp: Reading pagedir (%lu pages)\n",
+ swsusp_info.pagedir_pages);

- pr_debug("swsusp: Reading pagedir (%d Pages)\n",n);
+ for_each_pb_page (pbpage, pblist) {
+ unsigned long offset = swp_offset(swsusp_info.pagedir[i++]);

- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
- unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
- if (offset)
- error = bio_read_page(offset, (void *)addr);
- else
- error = -EFAULT;
+ error = -EFAULT;
+ if (offset) {
+ p = (pbpage + PB_PAGE_SKIP)->next;
+ error = bio_read_page(offset, (void *)pbpage);
+ (pbpage + PB_PAGE_SKIP)->next = p;
+ }
+ if (error)
+ break;
}
+
if (error)
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+ free_page((unsigned long)pblist);
+
+ BUG_ON(i != swsusp_info.pagedir_pages);
+
return error;
}

+
static int __init read_suspend_image(void)
{
int error = 0;
+ struct pbe *p;

if ((error = check_sig()))
return error;
+
if ((error = check_header()))
return error;
- if ((error = read_pagedir()))
+
+ if (!(p = alloc_pagedir(nr_copy_pages)))
+ return -ENOMEM;
+
+ if ((error = read_pagedir(p)))
return error;
- if ((error = data_read()))
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+
+ create_pbe_list(p, nr_copy_pages);
+
+ if (!(pagedir_nosave = swsusp_pagedir_relocate(p)))
+ return -ENOMEM;
+
+ /* Allocate memory for the image and read the data from swap */
+
+ error = check_pagedir(pagedir_nosave);
+ free_eaten_memory();
+ if (!error)
+ error = data_read(pagedir_nosave);
+
+ if (error) { /* We fail cleanly */
+ for_each_pbe (p, pagedir_nosave)
+ if (p->address) {
+ free_page(p->address);
+ p->address = 0UL;
+ }
+ free_pagedir(pagedir_nosave);
+ }
return error;
}


--
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
-- Lewis Carroll "Alice's Adventures in Wonderland"

2005-02-08 19:10:28

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

Hi!

> > so it is okay, but...
>
> ... I could have done it more elegantly. You're right, I've now introduced
> a function eat_page() that adds a page to the list of unusable pages and
> used it instead of the free_page() here.

Thanks.

> > > + p = pbe;
> > > + pbe += PB_PAGE_SKIP;
> > > + do
> > > + p->next = p + 1;
> > > + while (p++ < pbe);
> >
> > I've already seen this code somewhere around in different
> > variant... Perhaps you want to make it inline function?
>
> I tried to avoid modifying the suspend part, but if it's not a problem,
> why don't we go farther and reuse alloc_pagedir() in the resume code?
>
> It has the advantage that read_pagedir() is then much simpler, and it
> returns an integer. However, for this purpose, it's better to split
> alloc_pagedir() into two functions, one of which allocates memory pages,
> and the second puts the list structure on them.

I guess that modifying suspend part is okay. We do not want to have
two copies of similar code...

> > > + if(!(pagedir_nosave = swsusp_pagedir_relocate(p)))
> > > + return -ENOMEM;
> >
> > Same here.
>
> The value is used in error reporting and the only reason why this function
> may fail is the lack of memory (the same applies to alloc_pagedir()).
>
> The revised (not as thoroughly tested as the previous one, but hopefully
> nicer) patch follows.

I guess I'll wait for "reuse alloc_pagedir" version.
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-02-08 22:27:11

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

On Tuesday, 8 of February 2005 20:10, Pavel Machek wrote:
> Hi!
>
> > > so it is okay, but...
> >
> > ... I could have done it more elegantly. You're right, I've now introduced
> > a function eat_page() that adds a page to the list of unusable pages and
> > used it instead of the free_page() here.
>
> Thanks.
>
> > > > + p = pbe;
> > > > + pbe += PB_PAGE_SKIP;
> > > > + do
> > > > + p->next = p + 1;
> > > > + while (p++ < pbe);
> > >
> > > I've already seen this code somewhere around in different
> > > variant... Perhaps you want to make it inline function?
> >
> > I tried to avoid modifying the suspend part, but if it's not a problem,
> > why don't we go farther and reuse alloc_pagedir() in the resume code?
> >
> > It has the advantage that read_pagedir() is then much simpler, and it
> > returns an integer. However, for this purpose, it's better to split
> > alloc_pagedir() into two functions, one of which allocates memory pages,
> > and the second puts the list structure on them.
>
> I guess that modifying suspend part is okay. We do not want to have
> two copies of similar code...
>
> > > > + if(!(pagedir_nosave = swsusp_pagedir_relocate(p)))
> > > > + return -ENOMEM;
> > >
> > > Same here.
> >
> > The value is used in error reporting and the only reason why this function
> > may fail is the lack of memory (the same applies to alloc_pagedir()).
> >
> > The revised (not as thoroughly tested as the previous one, but hopefully
> > nicer) patch follows.
>
> I guess I'll wait for "reuse alloc_pagedir" version.

It's this one. :-)

Greets,
Rafael


--
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
-- Lewis Carroll "Alice's Adventures in Wonderland"

2005-02-08 22:44:34

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

Hi!

> +static inline void eat_page(void *page) {

Please put { on new line.

Okay, as you can see, I could find very little wrong with this
patch. That hopefully means it is okay ;-). I should still check error
handling, but I guess I'll do it when it is applied because it is hard
to do on a diff. I guess it should go into -mm just after 2.6.11 is
released...

Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-02-08 23:22:02

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

On Tuesday, 8 of February 2005 23:42, Pavel Machek wrote:
> Hi!
>
> > +static inline void eat_page(void *page) {
>
> Please put { on new line.

Oh, I still tend to forget about this. Corrected in the patch that is
available on the web
(http://www.sisk.pl/kernel/patches/2.6.11-rc3-mm1/swsusp-use-list-resume-v4.patch).


> Okay, as you can see, I could find very little wrong with this
> patch. That hopefully means it is okay ;-). I should still check error
> handling, but I guess I'll do it when it is applied because it is hard
> to do on a diff. I guess it should go into -mm just after 2.6.11 is
> released...

That would be great!

Greets,
Rafael


--
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
-- Lewis Carroll "Alice's Adventures in Wonderland"

2005-02-13 05:54:05

by Hu Gang

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

On Wed, Feb 09, 2005 at 12:22:52AM +0100, Rafael J. Wysocki wrote:
> On Tuesday, 8 of February 2005 23:42, Pavel Machek wrote:
> > Hi!
> >
> > > +static inline void eat_page(void *page) {
> >
> > Please put { on new line.
>
> Oh, I still tend to forget about this. Corrected in the patch that is
> available on the web
> (http://www.sisk.pl/kernel/patches/2.6.11-rc3-mm1/swsusp-use-list-resume-v4.patch).
>
>
> > Okay, as you can see, I could find very little wrong with this
> > patch. That hopefully means it is okay ;-). I should still check error
> > handling, but I guess I'll do it when it is applied because it is hard
> > to do on a diff. I guess it should go into -mm just after 2.6.11 is
> > released...
>
> That would be great!
>
> Greets,
> Rafael

Here is powerpc port support for that. Thanks for greate patch.

--- 2.6.11-rc3-mm2-use-list-resume-v4/arch/ppc/Kconfig 2005-02-13 12:13:31.000000000 +0800
+++ 2.6.11-rc3-mm2-use-list-resume-v4-ppc/arch/ppc/Kconfig 2005-02-13 12:22:32.000000000 +0800
@@ -1068,6 +1068,8 @@ config PROC_HARDWARE

source "drivers/zorro/Kconfig"

+source kernel/power/Kconfig
+
endmenu

menu "Bus options"
--- 2.6.11-rc3-mm2-use-list-resume-v4/arch/ppc/kernel/Makefile 2005-02-13 12:13:31.000000000 +0800
+++ 2.6.11-rc3-mm2-use-list-resume-v4-ppc/arch/ppc/kernel/Makefile 2005-02-13 12:22:06.000000000 +0800
@@ -16,6 +16,7 @@ obj-y := entry.o traps.o irq.o idle.o
semaphore.o syscalls.o setup.o \
cputable.o ppc_htab.o perfmon.o
obj-$(CONFIG_6xx) += l2cr.o cpu_setup_6xx.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
obj-$(CONFIG_POWER4) += cpu_setup_power4.o
obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-mapping.o
--- 2.6.11-rc3-mm2-use-list-resume-v4/arch/ppc/kernel/signal.c 2005-02-13 12:11:50.000000000 +0800
+++ 2.6.11-rc3-mm2-use-list-resume-v4-ppc/arch/ppc/kernel/signal.c 2005-02-13 12:22:06.000000000 +0800
@@ -28,6 +28,7 @@
#include <linux/elf.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
+#include <linux/suspend.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -704,6 +705,14 @@ int do_signal(sigset_t *oldset, struct p
unsigned long frame, newsp;
int signr, ret;

+ if (current->flags & PF_FREEZE) {
+ refrigerator(PF_FREEZE);
+ signr = 0;
+ ret = regs->gpr[3];
+ if (!signal_pending(current))
+ goto no_signal;
+ }
+
if (!oldset)
oldset = &current->blocked;

@@ -726,6 +735,7 @@ int do_signal(sigset_t *oldset, struct p
regs->gpr[3] = EINTR;
/* note that the cr0.SO bit is already set */
} else {
+no_signal:
regs->nip -= 4; /* Back up & retry system call */
regs->result = 0;
regs->trap = 0;
--- /dev/null 2004-06-07 18:45:47.000000000 +0800
+++ 2.6.11-rc3-mm2-use-list-resume-v4-ppc/arch/ppc/kernel/swsusp.S 1904-01-01 00:05:22.000000000 +0706
@@ -0,0 +1,349 @@
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/offsets.h>
+
+
+/*
+ * Structure for storing CPU registers on the save area.
+ */
+#define SL_SP 0
+#define SL_PC 4
+#define SL_MSR 8
+#define SL_SDR1 0xc
+#define SL_SPRG0 0x10 /* 4 sprg's */
+#define SL_DBAT0 0x20
+#define SL_IBAT0 0x28
+#define SL_DBAT1 0x30
+#define SL_IBAT1 0x38
+#define SL_DBAT2 0x40
+#define SL_IBAT2 0x48
+#define SL_DBAT3 0x50
+#define SL_IBAT3 0x58
+#define SL_TB 0x60
+#define SL_R2 0x68
+#define SL_CR 0x6c
+#define SL_LR 0x70
+#define SL_R12 0x74 /* r12 to r31 */
+#define SL_SIZE (SL_R12 + 80)
+
+ .section .data
+ .align 5
+
+_GLOBAL(swsusp_save_area)
+ .space SL_SIZE
+
+
+ .section .text
+ .align 5
+
+_GLOBAL(swsusp_arch_suspend)
+
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+
+ mflr r0
+ stw r0,SL_LR(r11)
+ mfcr r0
+ stw r0,SL_CR(r11)
+ stw r1,SL_SP(r11)
+ stw r2,SL_R2(r11)
+ stmw r12,SL_R12(r11)
+
+ /* Save MSR & SDR1 */
+ mfmsr r4
+ stw r4,SL_MSR(r11)
+ mfsdr1 r4
+ stw r4,SL_SDR1(r11)
+
+ /* Get a stable timebase and save it */
+1: mftbu r4
+ stw r4,SL_TB(r11)
+ mftb r5
+ stw r5,SL_TB+4(r11)
+ mftbu r3
+ cmpw r3,r4
+ bne 1b
+
+ /* Save SPRGs */
+ mfsprg r4,0
+ stw r4,SL_SPRG0(r11)
+ mfsprg r4,1
+ stw r4,SL_SPRG0+4(r11)
+ mfsprg r4,2
+ stw r4,SL_SPRG0+8(r11)
+ mfsprg r4,3
+ stw r4,SL_SPRG0+12(r11)
+
+ /* Save BATs */
+ mfdbatu r4,0
+ stw r4,SL_DBAT0(r11)
+ mfdbatl r4,0
+ stw r4,SL_DBAT0+4(r11)
+ mfdbatu r4,1
+ stw r4,SL_DBAT1(r11)
+ mfdbatl r4,1
+ stw r4,SL_DBAT1+4(r11)
+ mfdbatu r4,2
+ stw r4,SL_DBAT2(r11)
+ mfdbatl r4,2
+ stw r4,SL_DBAT2+4(r11)
+ mfdbatu r4,3
+ stw r4,SL_DBAT3(r11)
+ mfdbatl r4,3
+ stw r4,SL_DBAT3+4(r11)
+ mfibatu r4,0
+ stw r4,SL_IBAT0(r11)
+ mfibatl r4,0
+ stw r4,SL_IBAT0+4(r11)
+ mfibatu r4,1
+ stw r4,SL_IBAT1(r11)
+ mfibatl r4,1
+ stw r4,SL_IBAT1+4(r11)
+ mfibatu r4,2
+ stw r4,SL_IBAT2(r11)
+ mfibatl r4,2
+ stw r4,SL_IBAT2+4(r11)
+ mfibatu r4,3
+ stw r4,SL_IBAT3(r11)
+ mfibatl r4,3
+ stw r4,SL_IBAT3+4(r11)
+
+#if 0
+ /* Backup various CPU config stuffs */
+ bl __save_cpu_setup
+#endif
+ /* Call the low level suspend stuff (we should probably have made
+ * a stackframe...
+ */
+ bl swsusp_save
+
+ /* Restore LR from the save area */
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+ lwz r0,SL_LR(r11)
+ mtlr r0
+
+ blr
+
+
+/* Resume code */
+_GLOBAL(swsusp_arch_resume)
+
+ /* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+ DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ sync
+
+ /* Disable MSR:DR to make sure we don't take a TLB or
+ * hash miss during the copy, as our hash table will
+ * for a while be unuseable. For .text, we assume we are
+ * covered by a BAT. This works only for non-G5 at this
+ * point. G5 will need a better approach, possibly using
+ * a small temporary hash table filled with large mappings,
+ * disabling the MMU completely isn't a good option for
+ * performance reasons.
+ * (Note that 750's may have the same performance issue as
+ * the G5 in this case, we should investigate using moving
+ * BATs for these CPUs)
+ */
+ mfmsr r0
+ sync
+ rlwinm r0,r0,0,28,26 /* clear MSR_DR */
+ mtmsr r0
+ sync
+ isync
+
+ /* Load ptr the list of pages to copy in r3 */
+ lis r11,(pagedir_nosave - KERNELBASE)@h
+ ori r11,r11,pagedir_nosave@l
+ lwz r10,0(r11)
+
+ /* Copy the pages. This is a very basic implementation, to
+ * be replaced by something more cache efficient */
+1:
+ tophys(r3,r10)
+ li r0,256
+ mtctr r0
+ lwz r11,pbe_address(r3) /* source */
+ tophys(r5,r11)
+ lwz r10,pbe_orig_address(r3) /* destination */
+ tophys(r6,r10)
+2:
+ lwz r8,0(r5)
+ lwz r9,4(r5)
+ lwz r10,8(r5)
+ lwz r11,12(r5)
+ addi r5,r5,16
+ stw r8,0(r6)
+ stw r9,4(r6)
+ stw r10,8(r6)
+ stw r11,12(r6)
+ addi r6,r6,16
+ bdnz 2b
+ lwz r10,pbe_next(r3)
+ cmpwi 0,r10,0
+ bne 1b
+
+ /* Do a very simple cache flush/inval of the L1 to ensure
+ * coherency of the icache
+ */
+ lis r3,0x0002
+ mtctr r3
+ li r3, 0
+1:
+ lwz r0,0(r3)
+ addi r3,r3,0x0020
+ bdnz 1b
+ isync
+ sync
+
+ /* Now flush those cache lines */
+ lis r3,0x0002
+ mtctr r3
+ li r3, 0
+1:
+ dcbf 0,r3
+ addi r3,r3,0x0020
+ bdnz 1b
+ sync
+
+ /* Ok, we are now running with the kernel data of the old
+ * kernel fully restored. We can get to the save area
+ * easily now. As for the rest of the code, it assumes the
+ * loader kernel and the booted one are exactly identical
+ */
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+ tophys(r11,r11)
+
+#if 0
+ /* Restore various CPU config stuffs */
+ bl __restore_cpu_setup
+#endif
+ /* Restore the BATs, and SDR1. Then we can turn on the MMU.
+ * This is a bit hairy as we are running out of those BATs,
+ * but first, our code is probably in the icache, and we are
+ * writing the same value to the BAT, so that should be fine,
+ * though a better solution will have to be found long-term
+ */
+ lwz r4,SL_SDR1(r11)
+ mtsdr1 r4
+ lwz r4,SL_SPRG0(r11)
+ mtsprg 0,r4
+ lwz r4,SL_SPRG0+4(r11)
+ mtsprg 1,r4
+ lwz r4,SL_SPRG0+8(r11)
+ mtsprg 2,r4
+ lwz r4,SL_SPRG0+12(r11)
+ mtsprg 3,r4
+
+#if 0
+ lwz r4,SL_DBAT0(r11)
+ mtdbatu 0,r4
+ lwz r4,SL_DBAT0+4(r11)
+ mtdbatl 0,r4
+ lwz r4,SL_DBAT1(r11)
+ mtdbatu 1,r4
+ lwz r4,SL_DBAT1+4(r11)
+ mtdbatl 1,r4
+ lwz r4,SL_DBAT2(r11)
+ mtdbatu 2,r4
+ lwz r4,SL_DBAT2+4(r11)
+ mtdbatl 2,r4
+ lwz r4,SL_DBAT3(r11)
+ mtdbatu 3,r4
+ lwz r4,SL_DBAT3+4(r11)
+ mtdbatl 3,r4
+ lwz r4,SL_IBAT0(r11)
+ mtibatu 0,r4
+ lwz r4,SL_IBAT0+4(r11)
+ mtibatl 0,r4
+ lwz r4,SL_IBAT1(r11)
+ mtibatu 1,r4
+ lwz r4,SL_IBAT1+4(r11)
+ mtibatl 1,r4
+ lwz r4,SL_IBAT2(r11)
+ mtibatu 2,r4
+ lwz r4,SL_IBAT2+4(r11)
+ mtibatl 2,r4
+ lwz r4,SL_IBAT3(r11)
+ mtibatu 3,r4
+ lwz r4,SL_IBAT3+4(r11)
+ mtibatl 3,r4
+#endif
+
+BEGIN_FTR_SECTION
+ li r4,0
+ mtspr SPRN_DBAT4U,r4
+ mtspr SPRN_DBAT4L,r4
+ mtspr SPRN_DBAT5U,r4
+ mtspr SPRN_DBAT5L,r4
+ mtspr SPRN_DBAT6U,r4
+ mtspr SPRN_DBAT6L,r4
+ mtspr SPRN_DBAT7U,r4
+ mtspr SPRN_DBAT7L,r4
+ mtspr SPRN_IBAT4U,r4
+ mtspr SPRN_IBAT4L,r4
+ mtspr SPRN_IBAT5U,r4
+ mtspr SPRN_IBAT5L,r4
+ mtspr SPRN_IBAT6U,r4
+ mtspr SPRN_IBAT6L,r4
+ mtspr SPRN_IBAT7U,r4
+ mtspr SPRN_IBAT7L,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS)
+
+ /* Flush all TLBs */
+ lis r4,0x1000
+1: addic. r4,r4,-0x1000
+ tlbie r4
+ blt 1b
+ sync
+
+ /* restore the MSR and turn on the MMU */
+ lwz r3,SL_MSR(r11)
+ bl turn_on_mmu
+ tovirt(r11,r11)
+
+ /* Restore TB */
+ li r3,0
+ mttbl r3
+ lwz r3,SL_TB(r11)
+ lwz r4,SL_TB+4(r11)
+ mttbu r3
+ mttbl r4
+
+ /* Kick decrementer */
+ li r0,1
+ mtdec r0
+
+ /* Restore the callee-saved registers and return */
+ lwz r0,SL_CR(r11)
+ mtcr r0
+ lwz r2,SL_R2(r11)
+ lmw r12,SL_R12(r11)
+ lwz r1,SL_SP(r11)
+ lwz r0,SL_LR(r11)
+ mtlr r0
+
+ // XXX Note: we don't really need to call swsusp_resume
+
+ li r3,0
+ blr
+
+/* FIXME:This construct is actually not useful since we don't shut
+ * down the instruction MMU, we could just flip back MSR-DR on.
+ */
+turn_on_mmu:
+ mflr r4
+ mtsrr0 r4
+ mtsrr1 r3
+ sync
+ isync
+ rfi
+
--- 2.6.11-rc3-mm2-use-list-resume-v4/arch/ppc/kernel/vmlinux.lds.S 2004-12-30 14:55:39.000000000 +0800
+++ 2.6.11-rc3-mm2-use-list-resume-v4-ppc/arch/ppc/kernel/vmlinux.lds.S 2005-02-13 12:22:06.000000000 +0800
@@ -74,6 +74,12 @@ SECTIONS
CONSTRUCTORS
}

+ . = ALIGN(4096);
+ __nosave_begin = .;
+ .data_nosave : { *(.data.nosave) }
+ . = ALIGN(4096);
+ __nosave_end = .;
+
. = ALIGN(32);
.data.cacheline_aligned : { *(.data.cacheline_aligned) }

--- 2.6.11-rc3-mm2-use-list-resume-v4/drivers/macintosh/via-pmu.c 2005-02-13 12:13:37.000000000 +0800
+++ 2.6.11-rc3-mm2-use-list-resume-v4-ppc/drivers/macintosh/via-pmu.c 2005-02-13 12:22:06.000000000 +0800
@@ -43,6 +43,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/device.h>
+#include <linux/sysdev.h>
#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <linux/cpu.h>
@@ -2326,7 +2327,7 @@ pmac_suspend_devices(void)
/* Sync the disks. */
/* XXX It would be nice to have some way to ensure that
* nobody is dirtying any new buffers while we wait. That
- * could be acheived using the refrigerator for processes
+ * could be achieved using the refrigerator for processes
* that swsusp uses
*/
sys_sync();
@@ -2379,7 +2380,6 @@ pmac_suspend_devices(void)

/* Wait for completion of async backlight requests */
while (!bright_req_1.complete || !bright_req_2.complete ||
-
!batt_req.complete)
pmu_poll();

@@ -3040,6 +3040,88 @@ pmu_polled_request(struct adb_request *r
}
#endif /* DEBUG_SLEEP */

+
+/* FIXME: This is a temporary set of callbacks to enable us
+ * to do suspend-to-disk.
+ */
+
+#ifdef CONFIG_PM
+
+static int pmu_sys_suspended = 0;
+
+static int pmu_sys_suspend(struct sys_device *sysdev, u32 state)
+{
+ if (state != PM_SUSPEND_DISK || pmu_sys_suspended)
+ return 0;
+
+ /* Suspend PMU event interrupts */
+ pmu_suspend();
+
+ pmu_sys_suspended = 1;
+ return 0;
+}
+
+static int pmu_sys_resume(struct sys_device *sysdev)
+{
+ struct adb_request req;
+
+ if (!pmu_sys_suspended)
+ return 0;
+
+ /* Tell PMU we are ready */
+ pmu_request(&req, NULL, 2, PMU_SYSTEM_READY, 2);
+ pmu_wait_complete(&req);
+
+ /* Resume PMU event interrupts */
+ pmu_resume();
+
+ pmu_sys_suspended = 0;
+
+ return 0;
+}
+
+#endif /* CONFIG_PM */
+
+static struct sysdev_class pmu_sysclass = {
+ set_kset_name("pmu"),
+};
+
+static struct sys_device device_pmu = {
+ .id = 0,
+ .cls = &pmu_sysclass,
+};
+
+static struct sysdev_driver driver_pmu = {
+#ifdef CONFIG_PM
+ .suspend = &pmu_sys_suspend,
+ .resume = &pmu_sys_resume,
+#endif /* CONFIG_PM */
+};
+
+static int __init init_pmu_sysfs(void)
+{
+ int rc;
+
+ rc = sysdev_class_register(&pmu_sysclass);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys class\n");
+ return -ENODEV;
+ }
+ rc = sysdev_register(&device_pmu);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys device\n");
+ return -ENODEV;
+ }
+ rc = sysdev_driver_register(&pmu_sysclass, &driver_pmu);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys driver\n");
+ return -ENODEV;
+ }
+ return 0;
+}
+
+subsys_initcall(init_pmu_sysfs);
+
EXPORT_SYMBOL(pmu_request);
EXPORT_SYMBOL(pmu_poll);
EXPORT_SYMBOL(pmu_poll_adb);
--- 2.6.11-rc3-mm2-use-list-resume-v4/arch/ppc/platforms/pmac_setup.c 2005-02-13 12:13:31.000000000 +0800
+++ 2.6.11-rc3-mm2-use-list-resume-v4-ppc/arch/ppc/platforms/pmac_setup.c 2005-02-13 12:22:06.000000000 +0800
@@ -52,6 +52,7 @@
#include <linux/seq_file.h>
#include <linux/root_dev.h>
#include <linux/bitops.h>
+#include <linux/suspend.h>

#include <asm/reg.h>
#include <asm/sections.h>
@@ -70,6 +71,8 @@
#include <asm/pmac_feature.h>
#include <asm/time.h>
#include <asm/of_device.h>
+#include <asm/mmu_context.h>
+
#include "pmac_pic.h"
#include "mem_pieces.h"

@@ -421,10 +424,62 @@ find_boot_device(void)
}

static int initializing = 1;
+/* TODO: Merge the suspend-to-ram with the common code !!!
+ * currently, this is a stub implementation for suspend-to-disk
+ * only
+ */
+
+#ifdef CONFIG_SOFTWARE_SUSPEND
+
+static int pmac_pm_prepare(suspend_state_t state)
+{
+ printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state);
+
+ return 0;
+}
+
+static int pmac_pm_enter(suspend_state_t state)
+{
+ printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state);
+
+ /* Giveup the lazy FPU & vec so we don't have to back them
+ * up from the low level code
+ */
+ enable_kernel_fp();
+
+#ifdef CONFIG_ALTIVEC
+ if (cur_cpu_spec[0]->cpu_features & CPU_FTR_ALTIVEC)
+ enable_kernel_altivec();
+#endif /* CONFIG_ALTIVEC */
+
+ return 0;
+}
+
+static int pmac_pm_finish(suspend_state_t state)
+{
+ printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state);
+
+ /* Restore userland MMU context */
+ set_context(current->active_mm->context, current->active_mm->pgd);
+
+ return 0;
+}
+
+static struct pm_ops pmac_pm_ops = {
+ .pm_disk_mode = PM_DISK_SHUTDOWN,
+ .prepare = pmac_pm_prepare,
+ .enter = pmac_pm_enter,
+ .finish = pmac_pm_finish,
+};
+
+#endif /* CONFIG_SOFTWARE_SUSPEND */

static int pmac_late_init(void)
{
initializing = 0;
+#ifdef CONFIG_SOFTWARE_SUSPEND
+ pm_set_ops(&pmac_pm_ops);
+#endif /* CONFIG_SOFTWARE_SUSPEND */
return 0;
}

--- /dev/null 2004-06-07 18:45:47.000000000 +0800
+++ 2.6.11-rc3-mm2-use-list-resume-v4-ppc/include/asm-ppc/suspend.h 2005-02-13 12:22:06.000000000 +0800
@@ -0,0 +1,12 @@
+static inline int arch_prepare_suspend(void)
+{
+ return 0;
+}
+
+static inline void save_processor_state(void)
+{
+}
+
+static inline void restore_processor_state(void)
+{
+}
--- 2.6.11-rc3-mm2-use-list-resume-v4/include/linux/suspend.h 2005-02-13 12:13:51.000000000 +0800
+++ 2.6.11-rc3-mm2-use-list-resume-v4-ppc/include/linux/suspend.h 2005-02-13 12:22:06.000000000 +0800
@@ -1,7 +1,7 @@
#ifndef _LINUX_SWSUSP_H
#define _LINUX_SWSUSP_H

-#if defined(CONFIG_X86) || defined(CONFIG_FRV)
+#if defined(CONFIG_X86) || defined(CONFIG_FRV) || defined(CONFIG_PPC32)
#include <asm/suspend.h>
#endif
#include <linux/swap.h>

--
Hu Gang .-.
/v\
// \\
Linux User /( )\ [204016]
GPG Key ID ^^-^^ http://soulinfo.com/~hugang/hugang.asc

2005-02-13 06:12:48

by Hu Gang

[permalink] [raw]
Subject: Re: [RFC][PATCH] swsusp: do not use higher order allocations on resume [update 2]

On Sun, Feb 13, 2005 at 01:04:56PM +0706, [email protected] wrote:
> On Wed, Feb 09, 2005 at 12:22:52AM +0100, Rafael J. Wysocki wrote:
> > On Tuesday, 8 of February 2005 23:42, Pavel Machek wrote:
> > > Hi!
> > >
> > > > +static inline void eat_page(void *page) {
> > >
> > > Please put { on new line.
> >
> > Oh, I still tend to forget about this. Corrected in the patch that is
> > available on the web
> > (http://www.sisk.pl/kernel/patches/2.6.11-rc3-mm1/swsusp-use-list-resume-v4.patch).
> >
> >
> > > Okay, as you can see, I could find very little wrong with this
> > > patch. That hopefully means it is okay ;-). I should still check error
> > > handling, but I guess I'll do it when it is applied because it is hard
> > > to do on a diff. I guess it should go into -mm just after 2.6.11 is
> > > released...
> >
> > That would be great!
> >
> > Greets,
> > Rafael
>
> Here is powerpc port support for that. Thanks for greate patch.
>

Sorry I forgot this one.

--- 2.6.11-rc3-mm2-use-list-resume-v4/arch/ppc/kernel/asm-offsets.c 2004-12-30 14:55:39.000000000 +0800
+++ 2.6.11-rc3-mm2-use-list-resume-v4-ppc/arch/ppc/kernel/asm-offsets.c 2005-02-13 12:30:59.000000000 +0800
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <linux/suspend.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <asm/io.h>
@@ -136,6 +137,10 @@ main(void)
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));

+ DEFINE(pbe_address, offsetof(struct pbe, address));
+ DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
+ DEFINE(pbe_next, offsetof(struct pbe, next));
+
DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
return 0;
}

--
Hu Gang .-.
/v\
// \\
Linux User /( )\ [204016]
GPG Key ID ^^-^^ http://soulinfo.com/~hugang/hugang.asc