2004-11-19 19:46:01

by Hu Gang

[permalink] [raw]
Subject: [PATCH] Software Suspend split to two stage V2.

Hi Pavel Machek:

This patch using pagemap for PageSet2 bitmap, It increase suspend
speed, In my PowerPC suspend only need 5 secs, cool.

Test passed in my ppc and x86 laptop.

ppc swsusp patch for 2.6.9
http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
Have fun.

diff -ur linux-2.6.9/kernel/power/disk.c linux-2.6.9-hg/kernel/power/disk.c
--- linux-2.6.9/kernel/power/disk.c 2004-10-20 16:00:53.000000000 +0800
+++ linux-2.6.9-hg/kernel/power/disk.c 2004-11-20 01:07:09.000000000 +0800
@@ -17,7 +17,6 @@
#include <linux/fs.h>
#include "power.h"

-
extern u32 pm_disk_mode;
extern struct pm_ops * pm_ops;

@@ -26,7 +25,7 @@
extern int swsusp_read(void);
extern int swsusp_resume(void);
extern int swsusp_free(void);
-
+extern int pcs_suspend(int resume);

static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -73,7 +72,7 @@

static int in_suspend __nosavedata = 0;

-
+#if 0
/**
* free_some_memory - Try to free as much memory as possible
*
@@ -91,7 +90,7 @@
printk("|\n");
}

-
+#endif
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -104,13 +103,14 @@
{
device_resume();
platform_finish();
+ pcs_suspend(2);
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
}


-static int prepare(void)
+static int prepare(int resume)
{
int error;

@@ -130,9 +130,12 @@
}

/* Free memory before shutting down devices. */
- free_some_memory();
+ //free_some_memory();

disable_nonboot_cpus();
+ if ((error = pcs_suspend(resume))) {
+ goto Finish;
+ }
if ((error = device_suspend(PM_SUSPEND_DISK)))
goto Finish;

@@ -160,7 +163,7 @@
{
int error;

- if ((error = prepare()))
+ if ((error = prepare(0)))
return error;

pr_debug("PM: Attempting to suspend to disk.\n");
@@ -226,7 +229,7 @@

pr_debug("PM: Preparing system for restore.\n");

- if ((error = prepare()))
+ if ((error = prepare(1)))
goto Free;

barrier();
diff -ur linux-2.6.9/kernel/power/process.c linux-2.6.9-hg/kernel/power/process.c
--- linux-2.6.9/kernel/power/process.c 2004-10-20 16:00:53.000000000 +0800
+++ linux-2.6.9-hg/kernel/power/process.c 2004-11-20 01:20:31.000000000 +0800
@@ -4,8 +4,6 @@
*
* Originally from swsusp.
*/
-
-
#undef DEBUG

#include <linux/smp_lock.h>
diff -ur linux-2.6.9/kernel/power/swsusp.c linux-2.6.9-hg/kernel/power/swsusp.c
--- linux-2.6.9/kernel/power/swsusp.c 2004-10-20 16:00:53.000000000 +0800
+++ linux-2.6.9-hg/kernel/power/swsusp.c 2004-11-20 03:21:47.000000000 +0800
@@ -301,6 +301,12 @@
printk( "." );
error = write_page((pagedir_nosave+i)->address,
&((pagedir_nosave+i)->swap_address));
+#ifdef PCS_DEBUG
+ pr_debug("data_write: %p %p %u\n",
+ (void *)(pagedir_nosave+i)->address,
+ (void *)(pagedir_nosave+i)->orig_address,
+ (pagedir_nosave+i)->swap_address);
+#endif
}
printk(" %d Pages done.\n",i);
return error;
@@ -505,6 +511,316 @@
return 0;
}

+/**
+ * calc_order - Determine the order of allocation needed for pagedir_save.
+ *
+ * This looks tricky, but is just subtle. Please fix it some time.
+ * Since there are %nr_copy_pages worth of pages in the snapshot, we need
+ * to allocate enough contiguous space to hold
+ * (%nr_copy_pages * sizeof(struct pbe)),
+ * which has the saved/orig locations of the page..
+ *
+ * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
+ * structures, then we call get_bitmask_order(), which will tell us the
+ * last bit set in the number, starting with 1. (If we need 30 pages, that
+ * is 0x0000001e in hex. The last bit is the 5th, which is the order we
+ * would use to allocate 32 contiguous pages).
+ *
+ * Since we also need to save those pages, we add the number of pages that
+ * we need to nr_copy_pages, and in case of an overflow, do the
+ * calculation again to update the number of pages needed.
+ *
+ * With this model, we will tend to waste a lot of memory if we just cross
+ * an order boundary. Plus, the higher the order of allocation that we try
+ * to do, the more likely we are to fail in a low-memory situtation
+ * (though we're unlikely to get this far in such a case, since swsusp
+ * requires half of memory to be free anyway).
+ */
+
+static void calc_order(int *po, int *nr)
+{
+ int diff = 0;
+ int order = 0;
+
+ do {
+ diff = get_bitmask_order(SUSPEND_PD_PAGES(*nr)) - order;
+ if (diff) {
+ order += diff;
+ *nr += 1 << diff;
+ }
+ } while(diff);
+ *po = order;
+}
+
+typedef int (*do_page_t)(struct page *page, void *p);
+
+static int foreach_zone_page(struct zone *zone, do_page_t fun, void *p)
+{
+ unsigned long flags;
+ int inactive = 0, active = 0;
+
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ if (zone->nr_inactive) {
+ struct list_head * entry = zone->inactive_list.prev;
+ while (entry != &zone->inactive_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ inactive += fun(page, p);
+ } else {
+ inactive ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ if (zone->nr_active) {
+ struct list_head * entry = zone->active_list.prev;
+ while (entry != &zone->active_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ active += fun(page, p);
+ } else {
+ active ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+
+ return (active + inactive);
+}
+
+static unsigned long *pageset2map = NULL;
+
+#define PAGENUMBER(page) (page-mem_map)
+#define PAGEINDEX(page) ((PAGENUMBER(page))/(8*sizeof(unsigned long)))
+#define PAGEBIT(page) ((int) ((PAGENUMBER(page))%(8 * sizeof(unsigned long))))
+
+#define BITS_PER_PAGE (PAGE_SIZE * 8)
+#define PAGES_PER_BITMAP ((max_mapnr + BITS_PER_PAGE - 1) / BITS_PER_PAGE)
+#define BITMAP_ORDER (get_bitmask_order((PAGES_PER_BITMAP) - 1))
+
+#define PagePageset2(page) \
+ test_bit(PAGEBIT(page), &pageset2map[PAGEINDEX(page)])
+#define SetPagePageset2(page) \
+ set_bit(PAGEBIT(page), &pageset2map[PAGEINDEX(page)])
+
+static int setup_pcs_pe(struct page *page, void *p)
+{
+ suspend_pagedir_t **pe = p;
+ unsigned long pfn = page_to_pfn(page);
+
+ BUG_ON(PageReserved(page) && PageNosave(page));
+ if (!pfn_valid(pfn)) {
+ printk("not valid page\n");
+ return 0;
+ }
+ if (PageNosave(page)) {
+ printk("nosave\n");
+ return 0;
+ }
+ if (PageReserved(page) /*&& pfn_is_nosave(pfn)*/) {
+ printk("[nosave]\n");
+ return 0;
+ }
+ if (PageSlab(page)) {
+ printk("slab\n");
+ return (0);
+ }
+ if (pe && *pe) {
+ (*pe)->address = (long) page_address(page);
+ (*pe) ++;
+ }
+ SetPagePageset2(page);
+
+ return (1);
+}
+
+
+static int count_pcs(struct zone *zone, suspend_pagedir_t **pe)
+{
+ return foreach_zone_page(zone, setup_pcs_pe, pe);
+}
+#if 0
+static int comp_pcs_page(struct page *page, void *p)
+{
+ struct page *pg = p;
+
+ if (pg == page) return (1);
+ else return (0);
+}
+#endif
+
+static int find_pcs(struct zone *zone, struct page *pg)
+{
+ return PagePageset2(pg);
+ /* return foreach_zone_page(zone, comp_pcs_page, pg); */
+}
+
+static suspend_pagedir_t *pagedir_cache = NULL;
+static int nr_copy_pcs = 0;
+static int pcs_order = 0;
+
+static int alloc_pagedir_cache(void)
+{
+ int need_nr_copy_pcs = nr_copy_pcs;
+
+ calc_order(&pcs_order, &need_nr_copy_pcs);
+ pagedir_cache = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, pcs_order);
+ if (!pagedir_cache)
+ return -ENOMEM;
+ memset(pagedir_cache, 0, (1 << pcs_order) * PAGE_SIZE);
+
+ pr_debug("alloc pcs %p, %d\n", pagedir_cache, pcs_order);
+
+ return 0;
+}
+
+static int pcs_alloc_pagemap(void)
+{
+ pageset2map = (unsigned long *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, BITMAP_ORDER);
+ memset(pageset2map, 0, (1 << BITMAP_ORDER) * PAGE_SIZE);
+
+ return 0;
+}
+
+static int pcs_free_pagemap(void)
+{
+ if (pageset2map) {
+ free_pages((unsigned long) pageset2map, BITMAP_ORDER);
+ pageset2map = NULL;
+ }
+
+ return (0);
+}
+
+int bio_read_page(pgoff_t page_off, void * page);
+
+static int pcs_read(void)
+{
+ struct pbe * p;
+ int error = 0, i;
+ swp_entry_t entry;
+
+ printk( "Reading Page Caches (%d pages): ", nr_copy_pcs);
+ for(i = 0, p = pagedir_cache; i < nr_copy_pcs && !error; i++, p++) {
+ if (!(i%100))
+ printk( "." );
+ error = bio_read_page(swp_offset(p->swap_address),
+ (void *)p->address);
+#ifdef PCS_DEBUG
+ pr_debug("pcs_read: %p %p %u\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+#endif
+ }
+
+ for (i = 0; i < nr_copy_pcs; i++) {
+ entry = (pagedir_cache + i)->swap_address;
+ if (entry.val)
+ swap_free(entry);
+ }
+ free_pages((unsigned long)pagedir_cache, pcs_order);
+
+ printk(" %d done.\n",i);
+
+ return (0);
+}
+
+static int pcs_write(void)
+{
+ int error = 0;
+ int i;
+
+ printk( "Writing PageCaches to swap (%d pages): ", nr_copy_pcs);
+ for (i = 0; i < nr_copy_pcs && !error; i++) {
+ if (!(i%100))
+ printk( "." );
+ error = write_page((pagedir_cache+i)->address,
+ &((pagedir_cache+i)->swap_address));
+#ifdef PCS_DEBUG
+ pr_debug("pcs_write: %p %p %u\n",
+ (void *)(pagedir_cache+i)->address,
+ (void *)(pagedir_cache+i)->orig_address,
+ (pagedir_cache+i)->swap_address);
+#endif
+ }
+ printk(" %d Pages done.\n",i);
+
+ return error;
+}
+
+static void count_data_pages(void);
+static int swsusp_alloc(void);
+
+int pcs_suspend(int resume)
+{
+ struct zone *zone;
+ suspend_pagedir_t *pe = NULL;
+ int error;
+
+ if (resume == 1) {
+ return (0);
+ }
+ if (resume == 2) {
+ pcs_read();
+ pcs_free_pagemap();
+ return (0);
+ }
+
+ nr_copy_pcs = 0;
+
+ pcs_alloc_pagemap();
+
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ nr_copy_pcs += count_pcs(zone, NULL);
+ }
+ }
+
+ printk("swsusp: Need to copy %u pcs\n", nr_copy_pcs);
+
+ if (nr_copy_pcs == 0) {
+ return (0);
+ }
+
+ if ((error = swsusp_swap_check()))
+ return error;
+
+ if ((error = alloc_pagedir_cache())) {
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ error = swsusp_alloc();
+ if (error)
+ return error;
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(2/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ pe = pagedir_cache;
+
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ count_pcs(zone, &pe);
+ }
+ }
+ error = pcs_write();
+ if (error)
+ return error;
+
+ return (0);
+}

static int pfn_is_nosave(unsigned long pfn)
{
@@ -547,7 +863,10 @@
*zone_pfn += chunk_size - 1;
return 0;
}
-
+ if ((zone->nr_inactive || zone->nr_active) &&
+ find_pcs(zone, page)) {
+ return 0;
+ }
return 1;
}

@@ -557,9 +876,12 @@
unsigned long zone_pfn;

nr_copy_pages = 0;
+ nr_copy_pcs = 0;

for_each_zone(zone) {
if (!is_highmem(zone)) {
+ if (zone->nr_inactive || zone->nr_active)
+ nr_copy_pcs += count_pcs(zone, NULL);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
}
@@ -621,47 +943,6 @@
}


-/**
- * calc_order - Determine the order of allocation needed for pagedir_save.
- *
- * This looks tricky, but is just subtle. Please fix it some time.
- * Since there are %nr_copy_pages worth of pages in the snapshot, we need
- * to allocate enough contiguous space to hold
- * (%nr_copy_pages * sizeof(struct pbe)),
- * which has the saved/orig locations of the page..
- *
- * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
- * structures, then we call get_bitmask_order(), which will tell us the
- * last bit set in the number, starting with 1. (If we need 30 pages, that
- * is 0x0000001e in hex. The last bit is the 5th, which is the order we
- * would use to allocate 32 contiguous pages).
- *
- * Since we also need to save those pages, we add the number of pages that
- * we need to nr_copy_pages, and in case of an overflow, do the
- * calculation again to update the number of pages needed.
- *
- * With this model, we will tend to waste a lot of memory if we just cross
- * an order boundary. Plus, the higher the order of allocation that we try
- * to do, the more likely we are to fail in a low-memory situtation
- * (though we're unlikely to get this far in such a case, since swsusp
- * requires half of memory to be free anyway).
- */
-
-
-static void calc_order(void)
-{
- int diff = 0;
- int order = 0;
-
- do {
- diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
- if (diff) {
- order += diff;
- nr_copy_pages += 1 << diff;
- }
- } while(diff);
- pagedir_order = order;
-}


/**
@@ -673,13 +954,14 @@

static int alloc_pagedir(void)
{
- calc_order();
+ calc_order(&pagedir_order, &nr_copy_pages);
pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
pagedir_order);
if (!pagedir_save)
return -ENOMEM;
memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
pagedir_nosave = pagedir_save;
+ pr_debug("pagedir %p, %d\n", pagedir_save, pagedir_order);
return 0;
}

@@ -783,7 +1065,6 @@
int suspend_prepare_image(void)
{
unsigned int nr_needed_pages = 0;
- int error;

pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
@@ -791,15 +1072,8 @@
return -ENOMEM;
}

- drain_local_pages();
- count_data_pages();
- printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;

- error = swsusp_alloc();
- if (error)
- return error;
-
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
@@ -1011,7 +1285,7 @@
}


-static struct block_device * resume_bdev;
+static struct block_device * resume_bdev __nosavedata;

/**
* submit - submit BIO request.
@@ -1151,6 +1425,11 @@
printk( "." );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
+#ifdef PCS_DEBUG
+ pr_debug("data_read: %p %p %u\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+#endif
}
printk(" %d done.\n",i);
return error;
@@ -1219,7 +1498,7 @@
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
- blkdev_put(resume_bdev);
+ /* blkdev_put(resume_bdev); */
} else
error = PTR_ERR(resume_bdev);


--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc


2004-11-20 00:50:20

by Pavel Machek

[permalink] [raw]
Subject: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi!

> This patch using pagemap for PageSet2 bitmap, It increase suspend
> speed, In my PowerPC suspend only need 5 secs, cool.
>
> Test passed in my ppc and x86 laptop.
>
> ppc swsusp patch for 2.6.9
> http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> Have fun.

BTW here's my curent bigdiff. It already has some rather nice
swsusp speedups. Please try it on your machine; if it works for you,
try to send your patches relative to this one. I hope to merge these
changes during 2.6.11.

Pavel


--- clean/Documentation/pm.txt 2001-12-19 22:38:12.000000000 +0100
+++ linux/Documentation/pm.txt 2004-10-26 12:44:23.000000000 +0200
@@ -36,8 +36,8 @@
apmd: http://worldvisions.ca/~apenwarr/apmd/
acpid: http://acpid.sf.net/

-Driver Interface
-----------------
+Driver Interface -- OBSOLETE, DO NOT USE!
+----------------*************************
If you are writing a new driver or maintaining an old driver, it
should include power management support. Without power management
support, a single driver may prevent a system with power management
@@ -91,54 +91,6 @@
void pm_unregister_all(pm_callback cback);

/*
- * Device idle/use detection
- *
- * In general, drivers for all devices should call "pm_access"
- * before accessing the hardware (ie. before reading or modifying
- * a hardware register). Request or packet-driven drivers should
- * additionally call "pm_dev_idle" when a device is not being used.
- *
- * Examples:
- * 1) A keyboard driver would call pm_access whenever a key is pressed
- * 2) A network driver would call pm_access before submitting
- * a packet for transmit or receive and pm_dev_idle when its
- * transfer and receive queues are empty.
- * 3) A VGA driver would call pm_access before it accesses any
- * of the video controller registers
- *
- * Ultimately, the PM policy manager uses the access and idle
- * information to decide when to suspend individual devices
- * or when to suspend the entire system
- */
-
-/*
- * Description: Update device access time and wake up device, if necessary
- *
- * Parameters:
- * dev - PM device previously returned from pm_register
- *
- * Details: If called from an interrupt handler pm_access updates
- * access time but should never need to wake up the device
- * (if device is generating interrupts, it should be awake
- * already) This is important as we can not wake up
- * devices from an interrupt handler.
- */
-void pm_access(struct pm_dev *dev);
-
-/*
- * Description: Identify device as currently being idle
- *
- * Parameters:
- * dev - PM device previously returned from pm_register
- *
- * Details: A call to pm_dev_idle might signal to the policy manager
- * to put a device to sleep. If a new device request arrives
- * between the call to pm_dev_idle and the pm_callback
- * callback, the driver should fail the pm_callback request.
- */
-void pm_dev_idle(struct pm_dev *dev);
-
-/*
* Power management request callback
*
* Parameters:
@@ -262,8 +214,8 @@

ACPI Development mailing list: [email protected]

-System Interface
-----------------
+System Interface -- OBSOLETE, DO NOT USE!
+----------------*************************
If you are providing new power management support to Linux (ie.
adding support for something like APM or ACPI), you should
communicate with drivers through the existing generic power
--- clean/Documentation/power/devices.txt 2004-11-03 01:23:03.000000000 +0100
+++ linux/Documentation/power/devices.txt 2004-11-03 02:16:40.000000000 +0100
@@ -141,3 +141,82 @@
The driver core will not call any extra functions when binding the
device to the driver.

+pm_message_t meaning
+
+pm_message_t has two fields. event ("major"), and flags. If driver
+does not know event code, it aborts the request, returning error. Some
+drivers may need to deal with special cases based on the actual type
+of suspend operation being done at the system level. This is why
+there are flags.
+
+Event codes are:
+
+ON -- no need to do anything except special cases like broken
+HW.
+
+FREEZE -- stop DMA and interrupts, and be prepared to reinit HW from
+scratch. That probably means stop accepting upstream requests, the
+actual policy of what to do with them beeing specific to a given
+driver. It's acceptable for a network driver to just drop packets
+while a block driver is expected to block the queue so no request is
+lost. (Use IDE as an example on how to do that). FREEZE requires no
+power state change, and it's expected for drivers to be able to
+quickly transition back to operating state.
+
+SUSPEND -- like FREEZE, but also put hardware into low-power state. If
+there's need to distinguish several levels of sleep, additional flag
+is probably best way to do that.
+
+All events are:
+
+#Prepare for suspend -- userland is still running but we are going to
+#enter suspend state. This gives drivers chance to load firmware from
+#disk and store it in memory, or do other activities taht require
+#operating userland, ability to kmalloc GFP_KERNEL, etc... All of these
+#are forbiden once the suspend dance is started.. event = ON, flags =
+#PREPARE_TO_SUSPEND
+
+Apm standby -- prepare for APM event. Quiesce devices to make life
+easier for APM BIOS. event = FREEZE, flags = APM_STANDBY
+
+Apm suspend -- same as APM_STANDBY, but it we should probably avoid
+spinning down disks. event = FREEZE, flags = APM_SUSPEND
+
+System halt, reboot -- quiesce devices to make life easier for BIOS. event
+= FREEZE, flags = SYSTEM_HALT or SYSTEM_REBOOT
+
+System shutdown -- at least disks need to be spun down, or data may be
+lost. Quiesce devices, just to make life easier for BIOS. event =
+FREEZE, flags = SYSTEM_SHUTDOWN
+
+Kexec -- turn off DMAs and put hardware into some state where new
+kernel can take over. event = FREEZE, flags = KEXEC
+
+Powerdown at end of swsusp -- very similar to SYSTEM_SHUTDOWN, except wake
+may need to be enabled on some devices. This actually has at least 3
+subtypes, system can reboot, enter S4 and enter S5 at the end of
+swsusp. event = FREEZE, flags = SWSUSP and one of SYSTEM_REBOOT,
+SYSTEM_SHUTDOWN, SYSTEM_S4
+
+Suspend to ram -- put devices into low power state. event = SUSPEND,
+flags = SUSPEND_TO_RAM
+
+Freeze for swsusp snapshot -- stop DMA and interrupts. No need to put
+devices into low power mode, but you must be able to reinitialize
+device from scratch in resume method. This has two flavors, its done
+once on suspending kernel, once on resuming kernel. event = FREEZE,
+flags = DURING_SUSPEND or DURING_RESUME
+
+Device detach requested from /sys -- deinitialize device; proably same as
+SYSTEM_SHUTDOWN, I do not understand this one too much. probably event
+= FREEZE, flags = DEV_DETACH.
+
+#These are not really events sent:
+#
+#System fully on -- device is working normally; this is probably never
+#passed to suspend() method... event = ON, flags = 0
+#
+#Ready after resume -- userland is now running, again. Time to free any
+#memory you ate during prepare to suspend... event = ON, flags =
+#READY_AFTER_RESUME
+#
--- clean/Documentation/power/swsusp.txt 2004-10-01 00:29:56.000000000 +0200
+++ linux/Documentation/power/swsusp.txt 2004-11-14 23:36:46.000000000 +0100
@@ -15,10 +15,21 @@
* If you change kernel command line between suspend and resume...
* ...prepare for nasty fsck or worse.
*
- * (*) pm interface support is needed to make it safe.
+ * If you change your hardware while system is suspended...
+ * ...well, it was not good idea.
+ *
+ * (*) suspend/resume support is needed to make it safe.

You need to append resume=/dev/your_swap_partition to kernel command
-line. Then you suspend by echo 4 > /proc/acpi/sleep.
+line. Then you suspend by
+
+echo shutdown > /sys/power/disk; echo disk > /sys/power/state
+
+. If you feel ACPI works pretty well on your system, you might try
+
+echo platform > /sys/power/disk; echo disk > /sys/power/state
+
+

Article about goals and implementation of Software Suspend for Linux
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -32,42 +43,24 @@
to standby mode. Later resuming the machine the saved state is loaded back to
ram and the machine can continue its work. It has two real benefits. First we
save ourselves the time machine goes down and later boots up, energy costs
-real high when running from batteries. The other gain is that we don't have to
+are real high when running from batteries. The other gain is that we don't have to
interrupt our programs so processes that are calculating something for a long
time shouldn't need to be written interruptible.

-Using the code
-
-You have two ways to use this code. The first one is is with a patched
-SysVinit (my patch is against 2.76 and available at my home page). You
-might call 'swsusp' or 'shutdown -z <time>'. Next way is to echo 4 >
-/proc/acpi/sleep.
-
-Either way it saves the state of the machine into active swaps and then
-reboots. You must explicitly specify the swap partition to resume from with
+swsusp saves the state of the machine into active swaps and then reboots or
+powerdowns. You must explicitly specify the swap partition to resume from with
``resume='' kernel option. If signature is found it loads and restores saved
state. If the option ``noresume'' is specified as a boot parameter, it skips
the resuming.

-In the meantime while the system is suspended you should not touch any of the
-hardware!
-
-About the code
-
-Things to implement
-- We should only make a copy of data related to kernel segment, since any
- process data won't be changed.
-- Should make more sanity checks. Or are these enough?
-
-Not so important ideas for implementing
+In the meantime while the system is suspended you should not add/remove any
+of the hardware, write to the filesystems, etc.

-- If a real time process is running then don't suspend the machine.
-- Support for adding/removing hardware while suspended?
-- We should not free pages at the beginning so aggressively, most of them
- go there anyway..
+Sleep states summary
+====================

-Sleep states summary (thanx, Ducrot)
-====================================
+There are three different interfaces you can use, /proc/acpi should
+work like this:

In a really perfect world:
echo 1 > /proc/acpi/sleep # for standby
@@ -79,7 +72,6 @@
and perhaps
echo 4b > /proc/acpi/sleep # for suspend to disk via s4bios

-
Frequently Asked Questions
==========================

@@ -123,27 +115,13 @@

Q: Does linux support ACPI S4?

-A: No.
-
-When swsusp was created, ACPI was not too widespread, so we tried to
-avoid using ACPI-specific stuff. ACPI also is/was notoriously
-buggy. These days swsusp works on APM-only i386 machines and even
-without any power managment at all. Some versions also work on PPC.
-
-That means that machine does not enter S4 on suspend-to-disk, but
-simply enters S5. That has few advantages, you can for example boot
-windows on next boot, and return to your Linux session later. You
-could even have few different Linuxes on your box (not sharing any
-partitions), and switch between them.
-
-It also has disadvantages. On HP nx5000, if you unplug power cord
-while machine is suspended-to-disk, Linux will fail to notice that.
+A: Yes. That's what echo platform > /sys/power/disk does.

Q: My machine doesn't work with ACPI. How can I use swsusp than ?

A: Do a reboot() syscall with right parameters. Warning: glibc gets in
its way, so check with strace:
-
+
reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, 0xd000fce2)

(Thanks to Peter Osterlund:)
@@ -162,6 +140,8 @@
return 0;
}

+Also /sys/ interface should be still present.
+
Q: What is 'suspend2'?

A: suspend2 is 'Software Suspend 2', a forked implementation of
@@ -175,17 +155,22 @@
website, and not to the Linux Kernel Mailing List. We are working
toward merging suspend2 into the mainline kernel.

-Q: Kernel thread must voluntarily freeze itself (call 'refrigerator'). But
-I found some kernel threads don't do it, and they don't freeze, and
+Q: A kernel thread must voluntarily freeze itself (call 'refrigerator').
+I found some kernel threads that don't do it, and they don't freeze
so the system can't sleep. Is this a known behavior?

-A: All such kernel threads need to be fixed, one by one. Select place
-where it is safe to be frozen (no kernel semaphores should be held at
-that point and it must be safe to sleep there), and add:
+A: All such kernel threads need to be fixed, one by one. Select the
+place where the thread is safe to be frozen (no kernel semaphores
+should be held at that point and it must be safe to sleep there), and
+add:

if (current->flags & PF_FREEZE)
refrigerator(PF_FREEZE);

+If the thread is needed for writing the image to storage, you should
+instead set the PF_NOFREEZE process flag when creating the thread.
+
+
Q: What is the difference between between "platform", "shutdown" and
"firmware" in /sys/power/disk?

@@ -201,3 +186,42 @@

"platform" is actually right thing to do, but "shutdown" is most
reliable.
+
+Q: I do not understand why you have such strong objections to idea of
+selective suspend.
+
+A: Do selective suspend during runtime power managment, that's okay. But
+its useless for suspend-to-disk. (And I do not see how you could use
+it for suspend-to-ram, I hope you do not want that).
+
+Lets see, so you suggest to
+
+* SUSPEND all but swap device and parents
+* Snapshot
+* Write image to disk
+* SUSPEND swap device and parents
+* Powerdown
+
+Oh no, that does not work, if swap device or its parents uses DMA,
+you've corrupted data. You'd have to do
+
+* SUSPEND all but swap device and parents
+* FREEZE swap device and parents
+* Snapshot
+* UNFREEZE swap device and parents
+* Write
+* SUSPEND swap device and parents
+
+Which means that you still need that FREEZE state, and you get more
+complicated code. (And I have not yet introduce details like system
+devices).
+
+Q: There don't seem to be any generally useful behavioral
+distinctions between SUSPEND and FREEZE.
+
+A: Doing SUSPEND when you are asked to do FREEZE is always correct,
+but it may be unneccessarily slow. If you want USB to stay simple,
+slowness may not matter to you. It can always be fixed later.
+
+For devices like disk it does matter, you do not want to spindown for
+FREEZE.
--- clean/Documentation/power/video.txt 2004-08-15 19:14:52.000000000 +0200
+++ linux/Documentation/power/video.txt 2004-10-29 11:56:46.000000000 +0200
@@ -17,15 +17,18 @@

* systems where video state is preserved over S3. (Athlon HP Omnibook xe3s)

-* systems that initialize video card into vga text mode and where BIOS
- works well enough to be able to set video mode. Use
- acpi_sleep=s3_mode on these. (Toshiba 4030cdt)
-
* systems where it is possible to call video bios during S3
resume. Unfortunately, it is not correct to call video BIOS at that
point, but it happens to work on some machines. Use
acpi_sleep=s3_bios (Athlon64 desktop system)

+* systems that initialize video card into vga text mode and where BIOS
+ works well enough to be able to set video mode. Use
+ acpi_sleep=s3_mode on these. (Toshiba 4030cdt)
+
+* on some systems s3_bios kicks video into text mode, and
+ acpi_sleep=s3_bios,s3_mode is needed (Toshiba Satellite P10-554)
+
* radeon systems, where X can soft-boot your video card. You'll need
patched X, and plain text console (no vesafb or radeonfb), see
http://www.doesi.gmxhome.de/linux/tm800s3/s3.html. (Acer TM 800)
--- clean/Documentation/sparse.txt 2004-10-16 23:48:08.000000000 +0200
+++ linux/Documentation/sparse.txt 2004-10-24 22:44:47.000000000 +0200
@@ -0,0 +1,72 @@
+Copyright 2004 Linus Torvalds
+Copyright 2004 Pavel Machek <[email protected]>
+
+Using sparse for typechecking
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+"__bitwise" is a type attribute, so you have to do something like this:
+
+ typedef int __bitwise pm_request_t;
+
+ enum pm_request {
+ PM_SUSPEND = (__force pm_request_t) 1,
+ PM_RESUME = (__force pm_request_t) 2
+ };
+
+which makes PM_SUSPEND and PM_RESUME "bitwise" integers (the "__force" is
+there because sparse will complain about casting to/from a bitwise type,
+but in this case we really _do_ want to force the conversion). And because
+the enum values are all the same type, now "enum pm_request" will be that
+type too.
+
+And with gcc, all the __bitwise/__force stuff goes away, and it all ends
+up looking just like integers to gcc.
+
+Quite frankly, you don't need the enum there. The above all really just
+boils down to one special "int __bitwise" type.
+
+So the simpler way is to just do
+
+ typedef int __bitwise pm_request_t;
+
+ #define PM_SUSPEND ((__force pm_request_t) 1)
+ #define PM_RESUME ((__force pm_request_t) 2)
+
+and you now have all the infrastructure needed for strict typechecking.
+
+One small note: the constant integer "0" is special. You can use a
+constant zero as a bitwise integer type without sparse ever complaining.
+This is because "bitwise" (as the name implies) was designed for making
+sure that bitwise types don't get mixed up (little-endian vs big-endian
+vs cpu-endian vs whatever), and there the constant "0" really _is_
+special.
+
+Modify top-level Makefile to say
+
+CHECK = sparse -Wbitwise
+
+or you don't get any checking at all.
+
+
+Where to get sparse
+~~~~~~~~~~~~~~~~~~~
+
+With BK, you can just get it from
+
+ bk://sparse.bkbits.net/sparse
+
+and DaveJ has tar-balls at
+
+ http://www.codemonkey.org.uk/projects/bitkeeper/sparse/
+
+
+Once you have it, just do
+
+ make
+ make install
+
+as your regular user, and it will install sparse in your ~/bin directory.
+After that, doing a kernel make with "make C=1" will run sparse on all the
+C files that get recompiled, or with "make C=2" will run sparse on the
+files whether they need to be recompiled or not (ie the latter is fast way
+to check the whole tree if you have already built it).
--- clean/Makefile 2004-10-19 14:16:26.000000000 +0200
+++ linux/Makefile 2004-10-29 11:56:48.000000000 +0200
@@ -325,7 +325,7 @@
DEPMOD = /sbin/depmod
KALLSYMS = scripts/kallsyms
PERL = perl
-CHECK = sparse
+CHECK = sparse -Wbitwise
CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__
MODFLAGS = -DMODULE
CFLAGS_MODULE = $(MODFLAGS)
--- clean/arch/arm/mach-sa1100/pm.c 2004-10-19 14:16:27.000000000 +0200
+++ linux/arch/arm/mach-sa1100/pm.c 2004-10-26 00:27:12.000000000 +0200
@@ -57,7 +57,7 @@
};


-static int sa11x0_pm_enter(u32 state)
+static int sa11x0_pm_enter(suspend_state_t state)
{
unsigned long gpio, sleep_save[SLEEP_SAVE_SIZE];
struct timespec delta, rtc;
@@ -153,7 +153,7 @@
/*
* Called after processes are frozen, but before we shut down devices.
*/
-static int sa11x0_pm_prepare(u32 state)
+static int sa11x0_pm_prepare(suspend_state_t state)
{
return 0;
}
@@ -161,7 +161,7 @@
/*
* Called after devices are re-setup, but before processes are thawed.
*/
-static int sa11x0_pm_finish(u32 state)
+static int sa11x0_pm_finish(suspend_state_t state)
{
return 0;
}
--- clean/arch/i386/kernel/apm.c 2004-10-01 00:29:59.000000000 +0200
+++ linux/arch/i386/kernel/apm.c 2004-11-14 23:36:46.000000000 +0100
@@ -1201,8 +1201,8 @@
printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n");
}

- device_suspend(3);
- device_power_down(3);
+ device_suspend(PMSG_SUSPEND);
+ device_power_down(PMSG_SUSPEND);

/* serialize with the timer interrupt */
write_seqlock_irq(&xtime_lock);
@@ -1255,7 +1255,7 @@
{
int err;

- device_power_down(3);
+ device_power_down(PMSG_SUSPEND);
/* serialize with the timer interrupt */
write_seqlock_irq(&xtime_lock);
/* If needed, notify drivers here */
--- clean/arch/i386/kernel/signal.c 2004-10-01 00:29:59.000000000 +0200
+++ linux/arch/i386/kernel/signal.c 2004-10-29 11:56:46.000000000 +0200
@@ -587,7 +587,8 @@

if (current->flags & PF_FREEZE) {
refrigerator(0);
- goto no_signal;
+ if (!signal_pending(current))
+ goto no_signal;
}

if (!oldset)
--- clean/arch/i386/mm/fault.c 2004-10-19 14:16:27.000000000 +0200
+++ linux/arch/i386/mm/fault.c 2004-10-29 11:56:48.000000000 +0200
@@ -21,6 +21,7 @@
#include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/highmem.h>
#include <linux/module.h>
+#include <linux/delay.h>

#include <asm/system.h>
#include <asm/uaccess.h>
@@ -451,6 +452,7 @@
asm("movl %%cr3,%0":"=r" (page));
page = ((unsigned long *) __va(page))[address >> 22];
printk(KERN_ALERT "*pde = %08lx\n", page);
+ mdelay(10000);
/*
* We must not directly access the pte in the highpte
* case, the page table might be allocated in highmem.
--- clean/arch/i386/power/cpu.c 2004-10-01 00:29:59.000000000 +0200
+++ linux/arch/i386/power/cpu.c 2004-10-29 11:56:46.000000000 +0200
@@ -148,6 +148,6 @@
__restore_processor_state(&saved_context);
}

-
+/* Needed by apm.c */
EXPORT_SYMBOL(save_processor_state);
EXPORT_SYMBOL(restore_processor_state);
--- clean/drivers/acpi/sleep/main.c 2004-10-01 00:30:09.000000000 +0200
+++ linux/drivers/acpi/sleep/main.c 2004-11-04 00:18:36.000000000 +0100
@@ -42,7 +42,7 @@
* wakeup code to the waking vector.
*/

-static int acpi_pm_prepare(u32 pm_state)
+static int acpi_pm_prepare(suspend_state_t pm_state)
{
u32 acpi_state = acpi_suspend_states[pm_state];

@@ -74,7 +74,7 @@
* It's unfortunate, but it works. Please fix if you're feeling frisky.
*/

-static int acpi_pm_enter(u32 pm_state)
+static int acpi_pm_enter(suspend_state_t pm_state)
{
acpi_status status = AE_OK;
unsigned long flags = 0;
@@ -136,7 +136,7 @@
* failed).
*/

-static int acpi_pm_finish(u32 pm_state)
+static int acpi_pm_finish(suspend_state_t pm_state)
{
u32 acpi_state = acpi_suspend_states[pm_state];

@@ -156,7 +156,7 @@

int acpi_suspend(u32 acpi_state)
{
- u32 states[] = {
+ suspend_state_t states[] = {
[1] = PM_SUSPEND_STANDBY,
[3] = PM_SUSPEND_MEM,
[4] = PM_SUSPEND_DISK,
--- clean/drivers/base/platform.c 2004-08-15 19:14:55.000000000 +0200
+++ linux/drivers/base/platform.c 2004-11-04 00:31:33.000000000 +0100
@@ -238,7 +238,7 @@
return (strncmp(pdev->name, drv->name, BUS_ID_SIZE) == 0);
}

-static int platform_suspend(struct device * dev, u32 state)
+static int platform_suspend(struct device * dev, pm_message_t state)
{
int ret = 0;

--- clean/drivers/base/power/power.h 2004-08-15 19:14:55.000000000 +0200
+++ linux/drivers/base/power/power.h 2004-11-14 23:36:46.000000000 +0100
@@ -66,14 +66,14 @@
/*
* suspend.c
*/
-extern int suspend_device(struct device *, u32);
+extern int suspend_device(struct device *, pm_message_t);


/*
* runtime.c
*/

-extern int dpm_runtime_suspend(struct device *, u32);
+extern int dpm_runtime_suspend(struct device *, pm_message_t);
extern void dpm_runtime_resume(struct device *);

#else /* CONFIG_PM */
@@ -88,7 +88,7 @@

}

-static inline int dpm_runtime_suspend(struct device * dev, u32 state)
+static inline int dpm_runtime_suspend(struct device * dev, pm_message_t state)
{
return 0;
}
--- clean/drivers/base/power/resume.c 2004-08-15 19:14:55.000000000 +0200
+++ linux/drivers/base/power/resume.c 2004-11-14 23:36:46.000000000 +0100
@@ -36,7 +36,7 @@
struct device * dev = to_device(entry);
list_del_init(entry);

- if (!dev->power.prev_state)
+ if (dev->power.prev_state == PMSG_ON)
resume_device(dev);

list_add_tail(entry, &dpm_active);
--- clean/drivers/base/power/runtime.c 2004-08-15 19:14:55.000000000 +0200
+++ linux/drivers/base/power/runtime.c 2004-11-14 23:36:46.000000000 +0100
@@ -13,10 +13,10 @@
static void runtime_resume(struct device * dev)
{
dev_dbg(dev, "resuming\n");
- if (!dev->power.power_state)
+ if (dev->power.power_state == PMSG_ON)
return;
if (!resume_device(dev))
- dev->power.power_state = 0;
+ dev->power.power_state = PMSG_ON;
}


@@ -44,7 +44,7 @@
* @state: State to enter.
*/

-int dpm_runtime_suspend(struct device * dev, u32 state)
+int dpm_runtime_suspend(struct device * dev, pm_message_t state)
{
int error = 0;

@@ -52,7 +52,7 @@
if (dev->power.power_state == state)
goto Done;

- if (dev->power.power_state)
+ if (dev->power.power_state != PMSG_ON)
runtime_resume(dev);

if (!(error = suspend_device(dev, state)))
@@ -73,7 +73,7 @@
* always be able to tell, but we need accurate information to
* work reliably.
*/
-void dpm_set_power_state(struct device * dev, u32 state)
+void dpm_set_power_state(struct device * dev, pm_message_t state)
{
down(&dpm_sem);
dev->power.power_state = state;
--- clean/drivers/base/power/shutdown.c 2004-08-15 19:14:55.000000000 +0200
+++ linux/drivers/base/power/shutdown.c 2004-11-14 23:36:46.000000000 +0100
@@ -29,7 +29,8 @@
dev->driver->shutdown(dev);
return 0;
}
- return dpm_runtime_suspend(dev, dev->detach_state);
+ /* FIXME */
+ return dpm_runtime_suspend(dev, PMSG_FREEZE);
}


--- clean/drivers/base/power/suspend.c 2004-08-15 19:14:55.000000000 +0200
+++ linux/drivers/base/power/suspend.c 2004-11-14 23:36:46.000000000 +0100
@@ -11,7 +11,7 @@
#include <linux/device.h>
#include "power.h"

-extern int sysdev_suspend(u32 state);
+extern int sysdev_suspend(pm_message_t state);

/*
* The entries in the dpm_active list are in a depth first order, simply
@@ -35,7 +35,7 @@
* @state: Power state device is entering.
*/

-int suspend_device(struct device * dev, u32 state)
+int suspend_device(struct device * dev, pm_message_t state)
{
int error = 0;

@@ -43,7 +43,7 @@

dev->power.prev_state = dev->power.power_state;

- if (dev->bus && dev->bus->suspend && !dev->power.power_state)
+ if (dev->bus && dev->bus->suspend && (dev->power.power_state == PMSG_ON))
error = dev->bus->suspend(dev, state);

return error;
@@ -70,7 +70,7 @@
*
*/

-int device_suspend(u32 state)
+int device_suspend(pm_message_t state)
{
int error = 0;

@@ -112,7 +112,7 @@
* done, power down system devices.
*/

-int device_power_down(u32 state)
+int device_power_down(pm_message_t state)
{
int error = 0;
struct device * dev;
--- clean/drivers/char/vt.c 2004-10-01 00:30:12.000000000 +0200
+++ linux/drivers/char/vt.c 2004-10-26 00:14:17.000000000 +0200
@@ -2186,8 +2186,6 @@
if (!printable || test_and_set_bit(0, &printing))
return;

- pm_access(pm_con);
-
if (kmsg_redirect && vc_cons_allocated(kmsg_redirect - 1))
currcons = kmsg_redirect - 1;

@@ -2387,7 +2385,6 @@
{
int retval;

- pm_access(pm_con);
retval = do_con_write(tty, from_user, buf, count);
con_flush_chars(tty);

@@ -2398,7 +2395,6 @@
{
if (in_interrupt())
return; /* n_r3964 calls put_char() from interrupt context */
- pm_access(pm_con);
do_con_write(tty, 0, &ch, 1);
}

@@ -2467,8 +2463,6 @@
if (in_interrupt()) /* from flush_to_ldisc */
return;

- pm_access(pm_con);
-
/* if we race with con_close(), vt may be null */
acquire_console_sem();
vt = tty->driver_data;
--- clean/drivers/ide/ide-disk.c 2004-10-01 00:30:12.000000000 +0200
+++ linux/drivers/ide/ide-disk.c 2004-10-29 11:56:48.000000000 +0200
@@ -1419,9 +1419,12 @@
{
switch (rq->pm->pm_step) {
case idedisk_pm_flush_cache: /* Suspend step 1 (flush cache) complete */
- if (rq->pm->pm_state == 4)
+#if 0
+ /* FIXME!! */
+ if (system_state == SYSTEM_SNAPSHOT)
rq->pm->pm_step = ide_pm_state_completed;
else
+#endif
rq->pm->pm_step = idedisk_pm_standby;
break;
case idedisk_pm_standby: /* Suspend step 2 (standby) complete */
@@ -1702,7 +1705,6 @@
return;
}

- printk("Shutdown: %s\n", drive->name);
dev->bus->suspend(dev, PM_SUSPEND_STANDBY);
}

--- clean/drivers/ide/ide.c 2004-10-01 00:30:12.000000000 +0200
+++ linux/drivers/ide/ide.c 2004-11-04 00:32:27.000000000 +0100
@@ -1499,7 +1499,7 @@
return 1;
}

-static int generic_ide_suspend(struct device *dev, u32 state)
+static int generic_ide_suspend(struct device *dev, pm_message_t state)
{
ide_drive_t *drive = dev->driver_data;
struct request rq;
--- clean/drivers/ieee1394/ieee1394_core.c 2004-06-22 12:36:07.000000000 +0200
+++ linux/drivers/ieee1394/ieee1394_core.c 2004-10-29 11:56:46.000000000 +0200
@@ -1039,6 +1039,11 @@
continue;
}

+ if (current->flags & PF_FREEZE) {
+ refrigerator(0);
+ continue;
+ }
+
while ((skb = skb_dequeue(&hpsbpkt_queue)) != NULL) {
packet = (struct hpsb_packet *)skb->data;

--- clean/drivers/input/input.c 2004-08-15 19:14:56.000000000 +0200
+++ linux/drivers/input/input.c 2004-10-25 23:54:57.000000000 +0200
@@ -67,9 +67,6 @@
{
struct input_handle *handle;

- if (dev->pm_dev)
- pm_access(dev->pm_dev);
-
if (type > EV_MAX || !test_bit(type, dev->evbit))
return;

@@ -230,8 +227,6 @@

int input_open_device(struct input_handle *handle)
{
- if (handle->dev->pm_dev)
- pm_access(handle->dev->pm_dev);
handle->open++;
if (handle->dev->open)
return handle->dev->open(handle->dev);
@@ -249,8 +244,6 @@
void input_close_device(struct input_handle *handle)
{
input_release_device(handle);
- if (handle->dev->pm_dev)
- pm_dev_idle(handle->dev->pm_dev);
if (handle->dev->close)
handle->dev->close(handle->dev);
handle->open--;
--- clean/drivers/pci/pci-driver.c 2004-10-01 00:30:16.000000000 +0200
+++ linux/drivers/pci/pci-driver.c 2004-11-04 00:26:44.000000000 +0100
@@ -295,7 +295,7 @@
return 0;
}

-static int pci_device_suspend(struct device * dev, u32 state)
+static int pci_device_suspend(struct device * dev, pm_message_t state)
{
struct pci_dev * pci_dev = to_pci_dev(dev);
struct pci_driver * drv = pci_dev->driver;
--- clean/drivers/pci/pci.c 2004-10-01 00:30:16.000000000 +0200
+++ linux/drivers/pci/pci.c 2004-11-14 23:36:46.000000000 +0100
@@ -229,7 +229,7 @@
/**
* pci_set_power_state - Set the power state of a PCI device
* @dev: PCI device to be suspended
- * @state: Power state we're entering
+ * @state: PCI power state (D0, D1, D2, D3hot, D3cold) we're entering
*
* Transition a device to a new power state, using the Power Management
* Capabilities in the device's config space.
@@ -242,7 +242,7 @@
*/

int
-pci_set_power_state(struct pci_dev *dev, int state)
+pci_set_power_state(struct pci_dev *dev, pci_power_t state)
{
int pm;
u16 pmcsr;
@@ -300,6 +300,30 @@
}

/**
+ * pci_choose_state - Choose the power state of a PCI device
+ * @dev: PCI device to be suspended
+ * @state: target sleep state for the whole system
+ *
+ * Returns PCI power state suitable for given device and given system
+ * message.
+ */
+
+pci_power_t pci_choose_state(struct pci_dev *dev, u32 state)
+{
+ if (!pci_find_capability(dev, PCI_CAP_ID_PM))
+ return PCI_D0;
+
+ switch (state) {
+ case 0: return PCI_D0;
+ case 2: return PCI_D2;
+ case 3: return PCI_D3hot;
+ default: BUG();
+ }
+}
+
+EXPORT_SYMBOL(pci_choose_state);
+
+/**
* pci_save_state - save the PCI configuration space of a device before suspending
* @dev: - PCI device that we're dealing with
* @buffer: - buffer to hold config space context
@@ -365,7 +389,7 @@
{
int err;

- pci_set_power_state(dev, 0);
+ pci_set_power_state(dev, PCI_D0);
if ((err = pcibios_enable_device(dev, bars)) < 0)
return err;
return 0;
@@ -422,7 +446,7 @@
* 0 if operation is successful.
*
*/
-int pci_enable_wake(struct pci_dev *dev, u32 state, int enable)
+int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
{
int pm;
u16 value;
--- clean/drivers/usb/core/hcd-pci.c 2004-10-01 00:30:19.000000000 +0200
+++ linux/drivers/usb/core/hcd-pci.c 2004-11-14 23:36:46.000000000 +0100
@@ -355,8 +355,8 @@
hcd->state = USB_STATE_RESUMING;

if (has_pci_pm)
- pci_set_power_state (dev, 0);
- dev->dev.power.power_state = 0;
+ pci_set_power_state (dev, PCI_D0);
+ dev->dev.power.power_state = PMSG_ON;
retval = request_irq (dev->irq, usb_hcd_irq, SA_SHIRQ,
hcd->description, hcd);
if (retval < 0) {
--- clean/drivers/usb/host/ohci-hub.c 2004-10-19 14:16:28.000000000 +0200
+++ linux/drivers/usb/host/ohci-hub.c 2004-11-14 23:36:46.000000000 +0100
@@ -76,7 +76,7 @@
struct usb_device *root = hcd_to_bus (&ohci->hcd)->root_hub;
int status = 0;

- if (root->dev.power.power_state != 0)
+ if (root->dev.power.power_state != PMSG_ON)
return 0;
if (time_before (jiffies, ohci->next_statechange))
return -EAGAIN;
--- clean/include/asm-i386/suspend.h 2004-08-15 19:15:04.000000000 +0200
+++ linux/include/asm-i386/suspend.h 2004-10-29 11:56:46.000000000 +0200
@@ -9,6 +9,9 @@
static inline int
arch_prepare_suspend(void)
{
+ /* If you want to make non-PSE machine work, turn off paging
+ in do_magic. swsusp_pg_dir should have identity mapping, so
+ it could work... */
if (!cpu_has_pse)
return -EPERM;
return 0;
--- clean/include/linux/device.h 2004-10-01 00:30:29.000000000 +0200
+++ linux/include/linux/device.h 2004-11-04 00:25:52.000000000 +0100
@@ -61,7 +61,7 @@
int (*match)(struct device * dev, struct device_driver * drv);
int (*hotplug) (struct device *dev, char **envp,
int num_envp, char *buffer, int buffer_size);
- int (*suspend)(struct device * dev, u32 state);
+ int (*suspend)(struct device * dev, pm_message_t state);
int (*resume)(struct device * dev);
};

--- clean/include/linux/page-flags.h 2004-10-01 00:30:30.000000000 +0200
+++ linux/include/linux/page-flags.h 2004-10-19 16:38:18.000000000 +0200
@@ -74,7 +74,7 @@
#define PG_swapcache 16 /* Swap page: swp_entry_t in private */
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
#define PG_reclaim 18 /* To be reclaimed asap */
-
+#define PG_nosave_free 19 /* Page is free and should not be written */

/*
* Global page accounting. One instance per CPU. Only unsigned longs are
@@ -277,6 +277,10 @@
#define ClearPageNosave(page) clear_bit(PG_nosave, &(page)->flags)
#define TestClearPageNosave(page) test_and_clear_bit(PG_nosave, &(page)->flags)

+#define PageNosaveFree(page) test_bit(PG_nosave_free, &(page)->flags)
+#define SetPageNosaveFree(page) set_bit(PG_nosave_free, &(page)->flags)
+#define ClearPageNosaveFree(page) clear_bit(PG_nosave_free, &(page)->flags)
+
#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags)
#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
--- clean/include/linux/pci.h 2004-10-01 00:30:30.000000000 +0200
+++ linux/include/linux/pci.h 2004-11-14 23:36:46.000000000 +0100
@@ -480,6 +480,14 @@
#define DEVICE_COUNT_COMPATIBLE 4
#define DEVICE_COUNT_RESOURCE 12

+typedef int __bitwise pci_power_t;
+
+#define PCI_D0 ((pci_power_t __force) 0)
+#define PCI_D1 ((pci_power_t __force) 1)
+#define PCI_D2 ((pci_power_t __force) 2)
+#define PCI_D3hot ((pci_power_t __force) 3)
+#define PCI_D3cold ((pci_power_t __force) 4)
+
/*
* The pci_dev structure is used to describe PCI devices.
*/
@@ -508,7 +516,7 @@
this if your device has broken DMA
or supports 64-bit transfers. */

- u32 current_state; /* Current operating state. In ACPI-speak,
+ pci_power_t current_state; /* Current operating state. In ACPI-speak,
this is D0-D3, D0 being fully functional,
and D3 being off. */

@@ -645,7 +653,7 @@
struct pci_dynids dynids;
};

-#define to_pci_driver(drv) container_of(drv,struct pci_driver, driver)
+#define to_pci_driver(drv) container_of(drv, struct pci_driver, driver)

/**
* PCI_DEVICE - macro used to describe a specific pci device
@@ -781,8 +789,8 @@
/* Power management related routines */
int pci_save_state(struct pci_dev *dev, u32 *buffer);
int pci_restore_state(struct pci_dev *dev, u32 *buffer);
-int pci_set_power_state(struct pci_dev *dev, int state);
-int pci_enable_wake(struct pci_dev *dev, u32 state, int enable);
+int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
+int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable);

/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
void pci_bus_assign_resources(struct pci_bus *bus);
--- clean/include/linux/pm.h 2004-10-01 00:30:30.000000000 +0200
+++ linux/include/linux/pm.h 2004-11-14 23:36:46.000000000 +0100
@@ -28,44 +28,28 @@
#include <asm/atomic.h>

/*
- * Power management requests
+ * Power management requests... these are passed to pm_send_all() and friends.
+ *
+ * these functions are old and deprecated, see below.
*/
-enum
-{
- PM_SUSPEND, /* enter D1-D3 */
- PM_RESUME, /* enter D0 */
-
- PM_SAVE_STATE, /* save device's state */
+typedef int __bitwise pm_request_t;

- /* enable wake-on */
- PM_SET_WAKEUP,
-
- /* bus resource management */
- PM_GET_RESOURCES,
- PM_SET_RESOURCES,
-
- /* base station management */
- PM_EJECT,
- PM_LOCK,
-};
+#define PM_SUSPEND ((__force pm_request_t) 1) /* enter D1-D3 */
+#define PM_RESUME ((__force pm_request_t) 2) /* enter D0 */

-typedef int pm_request_t;

/*
- * Device types
+ * Device types... these are passed to pm_register
*/
-enum
-{
- PM_UNKNOWN_DEV = 0, /* generic */
- PM_SYS_DEV, /* system device (fan, KB controller, ...) */
- PM_PCI_DEV, /* PCI device */
- PM_USB_DEV, /* USB device */
- PM_SCSI_DEV, /* SCSI device */
- PM_ISA_DEV, /* ISA device */
- PM_MTD_DEV, /* Memory Technology Device */
-};
+typedef int __bitwise pm_dev_t;

-typedef int pm_dev_t;
+#define PM_UNKNOWN_DEV ((__force pm_request_t) 0) /* generic */
+#define PM_SYS_DEV ((__force pm_request_t) 1) /* system device (fan, KB controller, ...) */
+#define PM_PCI_DEV ((__force pm_request_t) 2) /* PCI device */
+#define PM_USB_DEV ((__force pm_request_t) 3) /* USB device */
+#define PM_SCSI_DEV ((__force pm_request_t) 4) /* SCSI device */
+#define PM_ISA_DEV ((__force pm_request_t) 5) /* ISA device */
+#define PM_MTD_DEV ((__force pm_request_t) 6) /* Memory Technology Device */

/*
* System device hardware ID (PnP) values
@@ -119,37 +103,27 @@
/*
* Register a device with power management
*/
-struct pm_dev *pm_register(pm_dev_t type,
- unsigned long id,
- pm_callback callback);
+struct pm_dev __deprecated *pm_register(pm_dev_t type, unsigned long id, pm_callback callback);

/*
* Unregister a device with power management
*/
-void pm_unregister(struct pm_dev *dev);
+void __deprecated pm_unregister(struct pm_dev *dev);

/*
* Unregister all devices with matching callback
*/
-void pm_unregister_all(pm_callback callback);
+void __deprecated pm_unregister_all(pm_callback callback);

/*
* Send a request to a single device
*/
-int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data);
+int __deprecated pm_send(struct pm_dev *dev, pm_request_t rqst, void *data);

/*
* Send a request to all devices
*/
-int pm_send_all(pm_request_t rqst, void *data);
-
-/*
- * Find a device
- */
-struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from);
-
-static inline void pm_access(struct pm_dev *dev) {}
-static inline void pm_dev_idle(struct pm_dev *dev) {}
+int __deprecated pm_send_all(pm_request_t rqst, void *data);

#else /* CONFIG_PM */

@@ -176,16 +150,10 @@
return 0;
}

-static inline struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from)
-{
- return 0;
-}
-
-static inline void pm_access(struct pm_dev *dev) {}
-static inline void pm_dev_idle(struct pm_dev *dev) {}
-
#endif /* CONFIG_PM */

+/* Functions above this comment are list-based old-style power
+ * managment. Please avoid using them. */

/*
* Callbacks for platform drivers to implement.
@@ -193,34 +161,32 @@
extern void (*pm_idle)(void);
extern void (*pm_power_off)(void);

-enum {
- PM_SUSPEND_ON = 0,
- PM_SUSPEND_STANDBY = 1,
- /* NOTE: PM_SUSPEND_MEM == PCI_D3hot */
- PM_SUSPEND_MEM = 3,
- PM_SUSPEND_DISK = 4,
- PM_SUSPEND_MAX = 5,
-};
-
-enum {
- PM_DISK_FIRMWARE = 1,
- PM_DISK_PLATFORM,
- PM_DISK_SHUTDOWN,
- PM_DISK_REBOOT,
- PM_DISK_MAX,
-};
+typedef int __bitwise suspend_state_t;

+#define PM_SUSPEND_ON ((__force suspend_state_t) 0)
+#define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1)
+#define PM_SUSPEND_MEM ((__force suspend_state_t) 3)
+#define PM_SUSPEND_DISK ((__force suspend_state_t) 4)
+#define PM_SUSPEND_MAX ((__force suspend_state_t) 5)
+
+typedef int __bitwise suspend_disk_method_t;
+
+#define PM_DISK_FIRMWARE ((__force suspend_disk_method_t) 1)
+#define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 2)
+#define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 3)
+#define PM_DISK_REBOOT ((__force suspend_disk_method_t) 4)
+#define PM_DISK_MAX ((__force suspend_disk_method_t) 5)

struct pm_ops {
- u32 pm_disk_mode;
- int (*prepare)(u32 state);
- int (*enter)(u32 state);
- int (*finish)(u32 state);
+ suspend_disk_method_t pm_disk_mode;
+ int (*prepare)(suspend_state_t state);
+ int (*enter)(suspend_state_t state);
+ int (*finish)(suspend_state_t state);
};

extern void pm_set_ops(struct pm_ops *);

-extern int pm_suspend(u32 state);
+extern int pm_suspend(suspend_state_t state);


/*
@@ -229,10 +195,34 @@

struct device;

+typedef u32 __bitwise pm_message_t;
+
+/*
+ * There are 4 important states driver can be in:
+ * ON -- driver is working
+ * FREEZE -- stop operations and apply whatever policy is applicable to a suspended driver
+ * of that class, freeze queues for block like IDE does, drop packets for
+ * ethernet, etc... stop DMA engine too etc... so a consistent image can be
+ * saved; but do not power any hardware down.
+ * SUSPEND - like FREEZE, but hardware is doing as much powersaving as possible. Roughly
+ * pci D3.
+ *
+ * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3 (SUSPEND).
+ * We'll need to fix the drivers. So yes, putting 3 to all diferent defines is intentional,
+ * and will go away as soon as drivers are fixed. Also note that typedef is neccessary,
+ * we'll probably want to switch to
+ * typedef struct pm_message_t { int event; int flags; } pm_message_t
+ * or something similar soon.
+ */
+
+#define PMSG_FREEZE ((__force pm_message_t) 3)
+#define PMSG_SUSPEND ((__force pm_message_t) 3)
+#define PMSG_ON ((__force pm_message_t) 0)
+
struct dev_pm_info {
- u32 power_state;
+ pm_message_t power_state;
#ifdef CONFIG_PM
- u32 prev_state;
+ pm_message_t prev_state;
u8 * saved_state;
atomic_t pm_users;
struct device * pm_parent;
@@ -242,8 +232,8 @@

extern void device_pm_set_parent(struct device * dev, struct device * parent);

-extern int device_suspend(u32 state);
-extern int device_power_down(u32 state);
+extern int device_suspend(pm_message_t state);
+extern int device_power_down(pm_message_t state);
extern void device_power_up(void);
extern void device_resume(void);

--- clean/include/linux/suspend.h 2004-10-01 00:30:31.000000000 +0200
+++ linux/include/linux/suspend.h 2004-10-29 11:56:46.000000000 +0200
@@ -31,6 +31,7 @@

/* mm/page_alloc.c */
extern void drain_local_pages(void);
+extern void mark_free_pages(struct zone *zone);

/* kernel/power/swsusp.c */
extern int software_suspend(void);
@@ -54,6 +55,8 @@

#else
static inline void refrigerator(unsigned long flag) {}
+static inline int freeze_processes(void) { BUG(); }
+static inline void thaw_processes(void) {}
#endif /* CONFIG_PM */

#ifdef CONFIG_SMP
--- clean/kernel/power/disk.c 2004-10-01 00:30:32.000000000 +0200
+++ linux/kernel/power/disk.c 2004-11-16 13:14:09.000000000 +0100
@@ -3,6 +3,7 @@
*
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2004 Pavel Machek <[email protected]>
*
* This file is released under the GPLv2.
*
@@ -15,10 +16,11 @@
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/fs.h>
+#include <linux/device.h>
#include "power.h"


-extern u32 pm_disk_mode;
+extern suspend_disk_method_t pm_disk_mode;
extern struct pm_ops * pm_ops;

extern int swsusp_suspend(void);
@@ -41,7 +43,7 @@
* there ain't no turning back.
*/

-static int power_down(u32 mode)
+static void power_down(suspend_disk_method_t mode)
{
unsigned long flags;
int error = 0;
@@ -49,7 +51,7 @@
local_irq_save(flags);
switch(mode) {
case PM_DISK_PLATFORM:
- device_power_down(PM_SUSPEND_DISK);
+ device_power_down(PMSG_SUSPEND);
error = pm_ops->enter(PM_SUSPEND_DISK);
break;
case PM_DISK_SHUTDOWN:
@@ -67,7 +69,6 @@
after resume. */
printk(KERN_CRIT "Please power me down manually\n");
while(1);
- return 0;
}


@@ -85,13 +86,26 @@

static void free_some_memory(void)
{
- printk("Freeing memory: ");
- while (shrink_all_memory(10000))
- printk(".");
- printk("|\n");
+ int i;
+ for (i=0; i<5; i++) {
+ int i = 0, tmp;
+ long pages = 0;
+ char *p = "-\\|/";
+
+ printk("Freeing memory... ");
+ while ((tmp = shrink_all_memory(10000))) {
+ pages += tmp;
+ printk("\b%c", p[i]);
+ i++;
+ if (i > 3)
+ i = 0;
+ }
+ printk("\bdone (%li pages freed)\n", pages);
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(HZ/5);
+ }
}

-
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -133,8 +147,10 @@
free_some_memory();

disable_nonboot_cpus();
- if ((error = device_suspend(PM_SUSPEND_DISK)))
+ if ((error = device_suspend(PMSG_FREEZE))) {
+ printk("Some devices failed to suspend\n");
goto Finish;
+ }

return 0;
Finish:
@@ -152,7 +168,7 @@
*
* If we're going through the firmware, then get it over with quickly.
*
- * If not, then call pmdis to do it's thing, then figure out how
+ * If not, then call swsusp to do its thing, then figure out how
* to power down the system.
*/

@@ -174,18 +190,9 @@

if (in_suspend) {
pr_debug("PM: writing image.\n");
-
- /*
- * FIXME: Leftover from swsusp. Are they necessary?
- */
- mb();
- barrier();
-
error = swsusp_write();
- if (!error) {
- error = power_down(pm_disk_mode);
- pr_debug("PM: Power down failed.\n");
- }
+ if (!error)
+ power_down(pm_disk_mode);
} else
pr_debug("PM: Image restored successfully.\n");
swsusp_free();
@@ -282,7 +289,7 @@

static ssize_t disk_show(struct subsystem * subsys, char * buf)
{
- return sprintf(buf,"%s\n",pm_disk_modes[pm_disk_mode]);
+ return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]);
}


@@ -292,7 +299,7 @@
int i;
int len;
char *p;
- u32 mode = 0;
+ suspend_disk_method_t mode = 0;

p = memchr(buf, '\n', n);
len = p ? p - buf : n;
--- clean/kernel/power/main.c 2004-10-01 00:30:32.000000000 +0200
+++ linux/kernel/power/main.c 2004-11-14 23:36:46.000000000 +0100
@@ -22,7 +22,7 @@
DECLARE_MUTEX(pm_sem);

struct pm_ops * pm_ops = NULL;
-u32 pm_disk_mode = PM_DISK_SHUTDOWN;
+suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;

/**
* pm_set_ops - Set the global power method table.
@@ -46,7 +46,7 @@
* the platform can enter the requested state.
*/

-static int suspend_prepare(u32 state)
+static int suspend_prepare(suspend_state_t state)
{
int error = 0;

@@ -65,7 +65,7 @@
goto Thaw;
}

- if ((error = device_suspend(state)))
+ if ((error = device_suspend(PMSG_SUSPEND)))
goto Finish;
return 0;
Finish:
@@ -78,13 +78,14 @@
}


-static int suspend_enter(u32 state)
+static int suspend_enter(suspend_state_t state)
{
int error = 0;
unsigned long flags;

local_irq_save(flags);
- if ((error = device_power_down(state)))
+
+ if ((error = device_power_down(PMSG_SUSPEND)))
goto Done;
error = pm_ops->enter(state);
device_power_up();
@@ -99,10 +100,10 @@
* @state: State we're coming out of.
*
* Call platform code to clean up, restart processes, and free the
- * console that we've allocated.
+ * console that we've allocated. This is not called for suspend-to-disk.
*/

-static void suspend_finish(u32 state)
+static void suspend_finish(suspend_state_t state)
{
device_resume();
if (pm_ops && pm_ops->finish)
@@ -133,7 +134,7 @@
* we've woken up).
*/

-static int enter_state(u32 state)
+static int enter_state(suspend_state_t state)
{
int error;

@@ -183,7 +184,7 @@
* structure, and enter (above).
*/

-int pm_suspend(u32 state)
+int pm_suspend(suspend_state_t state)
{
if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX)
return enter_state(state);
@@ -221,7 +222,7 @@

static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n)
{
- u32 state = PM_SUSPEND_STANDBY;
+ suspend_state_t state = PM_SUSPEND_STANDBY;
char ** s;
char *p;
int error;
@@ -230,8 +231,8 @@
p = memchr(buf, '\n', n);
len = p ? p - buf : n;

- for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
- if (*s && !strncmp(buf, *s, len))
+ for (s = &pm_states[state]; *s; s++, state++) {
+ if (!strncmp(buf, *s, len))
break;
}
if (*s)
--- clean/kernel/power/pm.c 2004-08-15 19:15:06.000000000 +0200
+++ linux/kernel/power/pm.c 2004-10-25 23:02:25.000000000 +0200
@@ -256,41 +256,10 @@
return 0;
}

-/**
- * pm_find - find a device
- * @type: type of device
- * @from: where to start looking
- *
- * Scan the power management list for devices of a specific type. The
- * return value for a matching device may be passed to further calls
- * to this function to find further matches. A %NULL indicates the end
- * of the list.
- *
- * To search from the beginning pass %NULL as the @from value.
- *
- * The caller MUST hold the pm_devs_lock lock when calling this
- * function. The instant that the lock is dropped all pointers returned
- * may become invalid.
- */
-
-struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from)
-{
- struct list_head *entry = from ? from->entry.next:pm_devs.next;
- while (entry != &pm_devs) {
- struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
- if (type == PM_UNKNOWN_DEV || dev->type == type)
- return dev;
- entry = entry->next;
- }
- return NULL;
-}
-
EXPORT_SYMBOL(pm_register);
EXPORT_SYMBOL(pm_unregister);
EXPORT_SYMBOL(pm_unregister_all);
-EXPORT_SYMBOL(pm_send);
EXPORT_SYMBOL(pm_send_all);
-EXPORT_SYMBOL(pm_find);
EXPORT_SYMBOL(pm_active);


--- clean/kernel/power/swsusp.c 2004-10-19 14:16:29.000000000 +0200
+++ linux/kernel/power/swsusp.c 2004-11-16 13:14:49.000000000 +0100
@@ -74,11 +74,8 @@
/* References to section boundaries */
extern char __nosave_begin, __nosave_end;

-extern int is_head_of_free_region(struct page *);
-
/* Variables to be preserved over suspend */
-int pagedir_order_check;
-int nr_copy_pages_check;
+static int pagedir_order_check;

extern char resume_file[];
static dev_t resume_device;
@@ -294,15 +291,19 @@
{
int error = 0;
int i;
+ unsigned int mod = nr_copy_pages / 100;
+
+ if (!mod)
+ mod = 1;

- printk( "Writing data to swap (%d pages): ", nr_copy_pages );
+ printk( "Writing data to swap (%d pages)... ", nr_copy_pages );
for (i = 0; i < nr_copy_pages && !error; i++) {
- if (!(i%100))
- printk( "." );
+ if (!(i%mod))
+ printk( "\b\b\b\b%3d%%", i / mod );
error = write_page((pagedir_nosave+i)->address,
&((pagedir_nosave+i)->swap_address));
}
- printk(" %d Pages done.\n",i);
+ printk("\b\b\b\bdone\n");
return error;
}

@@ -422,12 +423,12 @@
static int save_highmem_zone(struct zone *zone)
{
unsigned long zone_pfn;
+ mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
struct page *page;
struct highmem_page *save;
void *kaddr;
unsigned long pfn = zone_pfn + zone->zone_start_pfn;
- int chunk_size;

if (!(pfn%1000))
printk(".");
@@ -444,11 +445,9 @@
printk("highmem reserved page?!\n");
continue;
}
- if ((chunk_size = is_head_of_free_region(page))) {
- pfn += chunk_size - 1;
- zone_pfn += chunk_size - 1;
+ BUG_ON(PageNosave(page));
+ if (PageNosaveFree(page))
continue;
- }
save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
if (!save)
return -ENOMEM;
@@ -520,21 +519,16 @@
* We save a page if it's Reserved, and not in the range of pages
* statically defined as 'unsaveable', or if it isn't reserved, and
* isn't part of a free chunk of pages.
- * If it is part of a free chunk, we update @pfn to point to the last
- * page of the chunk.
*/

static int saveable(struct zone * zone, unsigned long * zone_pfn)
{
unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
- unsigned long chunk_size;
struct page * page;

if (!pfn_valid(pfn))
return 0;

- if (!(pfn%1000))
- printk(".");
page = pfn_to_page(pfn);
BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page))
@@ -543,10 +537,8 @@
pr_debug("[nosave pfn 0x%lx]", pfn);
return 0;
}
- if ((chunk_size = is_head_of_free_region(page))) {
- *zone_pfn += chunk_size - 1;
+ if (PageNosaveFree(page))
return 0;
- }

return 1;
}
@@ -559,10 +551,11 @@
nr_copy_pages = 0;

for_each_zone(zone) {
- if (!is_highmem(zone)) {
- for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
- nr_copy_pages += saveable(zone, &zone_pfn);
- }
+ if (is_highmem(zone))
+ continue;
+ mark_free_pages(zone);
+ for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
+ nr_copy_pages += saveable(zone, &zone_pfn);
}
}

@@ -572,52 +565,26 @@
struct zone *zone;
unsigned long zone_pfn;
struct pbe * pbe = pagedir_nosave;
+ int pages_copied = 0;

for_each_zone(zone) {
- if (!is_highmem(zone))
- for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
- if (saveable(zone, &zone_pfn)) {
- struct page * page;
- page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
- pbe->orig_address = (long) page_address(page);
- /* copy_page is no usable for copying task structs. */
- memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
- pbe++;
- }
- }
- }
-}
-
-
-static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir)
-{
- unsigned long zone_pfn, pagedir_end, pagedir_pfn, pagedir_end_pfn;
- pagedir_end = pagedir + (PAGE_SIZE << pagedir_order);
- pagedir_pfn = __pa(pagedir) >> PAGE_SHIFT;
- pagedir_end_pfn = __pa(pagedir_end) >> PAGE_SHIFT;
- for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
- struct page *page;
- unsigned long pfn = zone_pfn + zone->zone_start_pfn;
- if (!pfn_valid(pfn))
- continue;
- page = pfn_to_page(pfn);
- if (!TestClearPageNosave(page))
- continue;
- else if (pfn >= pagedir_pfn && pfn < pagedir_end_pfn)
+ if (is_highmem(zone))
continue;
- __free_page(page);
- }
-}
-
-void swsusp_free(void)
-{
- unsigned long p = (unsigned long)pagedir_save;
- struct zone *zone;
- for_each_zone(zone) {
- if (!is_highmem(zone))
- free_suspend_pagedir_zone(zone, p);
+ mark_free_pages(zone);
+ for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
+ if (saveable(zone, &zone_pfn)) {
+ struct page * page;
+ page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
+ pbe->orig_address = (long) page_address(page);
+ /* copy_page is not usable for copying task structs. */
+ memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
+ pbe++;
+ pages_copied++;
+ }
+ }
}
- free_pages(p, pagedir_order);
+ BUG_ON(pages_copied > nr_copy_pages);
+ nr_copy_pages = pages_copied;
}


@@ -683,6 +650,24 @@
return 0;
}

+/**
+ * free_image_pages - Free pages allocated for snapshot
+ */
+
+static void free_image_pages(void)
+{
+ struct pbe * p;
+ int i;
+
+ p = pagedir_save;
+ for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
+ if (p->address) {
+ ClearPageNosave(virt_to_page(p->address));
+ free_page(p->address);
+ p->address = 0;
+ }
+ }
+}

/**
* alloc_image_pages - Allocate pages for the snapshot.
@@ -696,18 +681,19 @@

for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
- if(!p->address)
- goto Error;
+ if (!p->address)
+ return -ENOMEM;
SetPageNosave(virt_to_page(p->address));
}
return 0;
- Error:
- do {
- if (p->address)
- free_page(p->address);
- p->address = 0;
- } while (p-- > pagedir_save);
- return -ENOMEM;
+}
+
+void swsusp_free(void)
+{
+ BUG_ON(PageNosave(virt_to_page(pagedir_save)));
+ BUG_ON(PageNosaveFree(virt_to_page(pagedir_save)));
+ free_image_pages();
+ free_pages((unsigned long) pagedir_save, pagedir_order);
}


@@ -775,19 +761,19 @@
return error;
}

- nr_copy_pages_check = nr_copy_pages;
pagedir_order_check = pagedir_order;
return 0;
}

int suspend_prepare_image(void)
{
- unsigned int nr_needed_pages = 0;
+ unsigned int nr_needed_pages;
int error;

pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
printk(KERN_CRIT "Suspend machine: Not enough free pages for highmem\n");
+ restore_highmem();
return -ENOMEM;
}

@@ -854,11 +840,13 @@
if ((error = arch_prepare_suspend()))
return error;
local_irq_disable();
+ sysdev_suspend(PMSG_FREEZE);
save_processor_state();
error = swsusp_arch_suspend();
/* Restore control flow magically appears here */
restore_processor_state();
restore_highmem();
+ sysdev_resume();
local_irq_enable();
return error;
}
@@ -866,11 +854,11 @@

asmlinkage int swsusp_restore(void)
{
- BUG_ON (nr_copy_pages_check != nr_copy_pages);
BUG_ON (pagedir_order_check != pagedir_order);

/* Even mappings of "global" things (vmalloc) need to be fixed */
__flush_tlb_global();
+ wbinvd(); /* Nigel says wbinvd here is good idea... */
return 0;
}

@@ -878,6 +866,7 @@
{
int error;
local_irq_disable();
+ sysdev_suspend(PMSG_FREEZE);
/* We'll ignore saved state, but this gets preempt count (etc) right */
save_processor_state();
error = swsusp_arch_resume();
@@ -887,6 +876,7 @@
BUG_ON(!error);
restore_processor_state();
restore_highmem();
+ sysdev_resume();
local_irq_enable();
return error;
}
@@ -978,6 +968,8 @@
c = *c;
free_pages((unsigned long)f, pagedir_order);
}
+ if (ret)
+ return ret;
printk("|\n");
return check_pagedir();
}
@@ -993,24 +985,14 @@

static atomic_t io_done = ATOMIC_INIT(0);

-static void start_io(void)
-{
- atomic_set(&io_done,1);
-}
-
static int end_io(struct bio * bio, unsigned int num, int err)
{
- atomic_set(&io_done,0);
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ panic("I/O error reading memory image");
+ atomic_set(&io_done, 0);
return 0;
}

-static void wait_io(void)
-{
- while(atomic_read(&io_done))
- io_schedule();
-}
-
-
static struct block_device * resume_bdev;

/**
@@ -1045,9 +1027,12 @@

if (rw == WRITE)
bio_set_pages_dirty(bio);
- start_io();
+
+ atomic_set(&io_done, 1);
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
- wait_io();
+ while (atomic_read(&io_done))
+ yield();
+
Done:
bio_put(bio);
return error;
@@ -1103,6 +1088,7 @@
return -EPERM;
}
nr_copy_pages = swsusp_info.image_pages;
+ pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
return error;
}

@@ -1121,7 +1107,7 @@
*/
error = bio_write_page(0, &swsusp_header);
} else {
- pr_debug(KERN_ERR "swsusp: Invalid partition type.\n");
+ pr_debug(KERN_ERR "swsusp: Suspend partition has wrong signature?\n");
return -EINVAL;
}
if (!error)
@@ -1141,14 +1127,18 @@
struct pbe * p;
int error;
int i;
+ int mod = nr_copy_pages / 100;
+
+ if (!mod)
+ mod = 1;

if ((error = swsusp_pagedir_relocate()))
return error;

- printk( "Reading image data (%d pages): ", nr_copy_pages );
+ printk( "Reading image data (%d pages): ", nr_copy_pages );
for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
- if (!(i%100))
- printk( "." );
+ if (!(i%mod))
+ printk( "\b\b\b\b%3d%%", i / mod );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
}
@@ -1165,9 +1155,7 @@
int i, n = swsusp_info.pagedir_pages;
int error = 0;

- pagedir_order = get_bitmask_order(n);
-
- addr =__get_free_pages(GFP_ATOMIC, pagedir_order);
+ addr = __get_free_pages(GFP_ATOMIC, pagedir_order);
if (!addr)
return -ENOMEM;
pagedir_nosave = (struct pbe *)addr;
--- clean/kernel/signal.c 2004-10-01 00:30:32.000000000 +0200
+++ linux/kernel/signal.c 2004-10-29 11:56:46.000000000 +0200
@@ -21,6 +21,7 @@
#include <linux/binfmts.h>
#include <linux/security.h>
#include <linux/ptrace.h>
+#include <linux/suspend.h>
#include <asm/param.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -1483,8 +1484,7 @@
unsigned long flags;
struct sighand_struct *psig;

- if (sig == -1)
- BUG();
+ BUG_ON(sig == -1);

/* do_notify_parent_cldstop should have been called instead. */
BUG_ON(tsk->state & (TASK_STOPPED|TASK_TRACED));
@@ -2260,6 +2260,8 @@
ret = -EINTR;
}

+ if (current->flags & PF_FREEZE)
+ refrigerator(1);
return ret;
}

--- clean/kernel/sys.c 2004-10-01 00:30:32.000000000 +0200
+++ linux/kernel/sys.c 2004-11-14 23:36:46.000000000 +0100
@@ -471,6 +471,7 @@
case LINUX_REBOOT_CMD_HALT:
notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
system_state = SYSTEM_HALT;
+ device_suspend(PMSG_SUSPEND);
device_shutdown();
printk(KERN_EMERG "System halted.\n");
machine_halt();
@@ -481,6 +482,7 @@
case LINUX_REBOOT_CMD_POWER_OFF:
notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
system_state = SYSTEM_POWER_OFF;
+ device_suspend(PMSG_SUSPEND);
device_shutdown();
printk(KERN_EMERG "Power down.\n");
machine_power_off();
@@ -497,6 +499,7 @@

notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
system_state = SYSTEM_RESTART;
+ device_suspend(PMSG_FREEZE);
device_shutdown();
printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
machine_restart(buffer);
--- clean/mm/page_alloc.c 2004-10-01 00:30:32.000000000 +0200
+++ linux/mm/page_alloc.c 2004-10-29 11:56:47.000000000 +0200
@@ -434,26 +434,30 @@
#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */

#ifdef CONFIG_PM
-int is_head_of_free_region(struct page *page)
+
+void mark_free_pages(struct zone *zone)
{
- struct zone *zone = page_zone(page);
- unsigned long flags;
+ unsigned long zone_pfn, flags;
int order;
struct list_head *curr;

- /*
- * Should not matter as we need quiescent system for
- * suspend anyway, but...
- */
+ if (!zone->spanned_pages)
+ return;
+
spin_lock_irqsave(&zone->lock, flags);
+ for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
+ ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn));
+
for (order = MAX_ORDER - 1; order >= 0; --order)
- list_for_each(curr, &zone->free_area[order].free_list)
- if (page == list_entry(curr, struct page, lru)) {
- spin_unlock_irqrestore(&zone->lock, flags);
- return 1 << order;
- }
+ list_for_each(curr, &zone->free_area[order].free_list) {
+ unsigned long start_pfn, i;
+
+ start_pfn = page_to_pfn(list_entry(curr, struct page, lru));
+
+ for (i=0; i < (1<<order); i++)
+ SetPageNosaveFree(pfn_to_page(start_pfn+i));
+ }
spin_unlock_irqrestore(&zone->lock, flags);
- return 0;
}

/*
@@ -1568,7 +1572,7 @@
zone->zone_start_pfn = zone_start_pfn;

if ((zone_start_pfn) & (zone_required_alignment-1))
- printk("BUG: wrong zone alignment, it will crash\n");
+ printk(KERN_CRIT "BUG: wrong zone alignment, it will crash\n");

memmap_init(size, nid, j, zone_start_pfn);



--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-20 03:13:28

by Hu Gang

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

On Sat, Nov 20, 2004 at 01:30:10AM +0100, Pavel Machek wrote:
> Hi!
>
> > This patch using pagemap for PageSet2 bitmap, It increase suspend
> > speed, In my PowerPC suspend only need 5 secs, cool.
> >
> > Test passed in my ppc and x86 laptop.
> >
> > ppc swsusp patch for 2.6.9
> > http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> > Have fun.
>
> BTW here's my curent bigdiff. It already has some rather nice
> swsusp speedups. Please try it on your machine; if it works for you,
> try to send your patches relative to this one. I hope to merge these
> changes during 2.6.11.

Really big diff, I'll trying.

Here is my diff.

Changes:
* Change pcs_ to page_cachs_
* Hold lru_lock to sure data not modified, I can't sure that full
works, but tested passed.
* Adding new page flags, I'll move to mm-flags when it doing right
things.
* If memory not enough, using shrink_all_memory to get more.

diff -ur linux-2.6.9/kernel/power/disk.c linux-2.6.9-hg/kernel/power/disk.c
--- linux-2.6.9/kernel/power/disk.c 2004-10-20 16:00:53.000000000 +0800
+++ linux-2.6.9-hg/kernel/power/disk.c 2004-11-20 09:37:17.000000000 +0800
@@ -17,7 +17,6 @@
#include <linux/fs.h>
#include "power.h"

-
extern u32 pm_disk_mode;
extern struct pm_ops * pm_ops;

@@ -27,6 +26,8 @@
extern int swsusp_resume(void);
extern int swsusp_free(void);

+extern int write_page_caches(void);
+extern int read_page_caches(void);

static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -73,7 +74,7 @@

static int in_suspend __nosavedata = 0;

-
+#if 0
/**
* free_some_memory - Try to free as much memory as possible
*
@@ -91,7 +92,7 @@
printk("|\n");
}

-
+#endif
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -104,13 +105,14 @@
{
device_resume();
platform_finish();
+ read_page_caches();
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
}


-static int prepare(void)
+static int prepare(int resume)
{
int error;

@@ -130,9 +132,14 @@
}

/* Free memory before shutting down devices. */
- free_some_memory();
+ //free_some_memory();

disable_nonboot_cpus();
+ if (!resume) {
+ if ((error = write_page_caches())) {
+ goto Finish;
+ }
+ }
if ((error = device_suspend(PM_SUSPEND_DISK)))
goto Finish;

@@ -160,7 +167,7 @@
{
int error;

- if ((error = prepare()))
+ if ((error = prepare(0)))
return error;

pr_debug("PM: Attempting to suspend to disk.\n");
@@ -226,7 +233,7 @@

pr_debug("PM: Preparing system for restore.\n");

- if ((error = prepare()))
+ if ((error = prepare(1)))
goto Free;

barrier();
diff -ur linux-2.6.9/kernel/power/process.c linux-2.6.9-hg/kernel/power/process.c
--- linux-2.6.9/kernel/power/process.c 2004-10-20 16:00:53.000000000 +0800
+++ linux-2.6.9-hg/kernel/power/process.c 2004-11-20 01:20:31.000000000 +0800
@@ -4,8 +4,6 @@
*
* Originally from swsusp.
*/
-
-
#undef DEBUG

#include <linux/smp_lock.h>
diff -ur linux-2.6.9/kernel/power/swsusp.c linux-2.6.9-hg/kernel/power/swsusp.c
--- linux-2.6.9/kernel/power/swsusp.c 2004-10-20 16:00:53.000000000 +0800
+++ linux-2.6.9-hg/kernel/power/swsusp.c 2004-11-20 10:45:13.000000000 +0800
@@ -301,6 +301,12 @@
printk( "." );
error = write_page((pagedir_nosave+i)->address,
&((pagedir_nosave+i)->swap_address));
+#ifdef PCS_DEBUG
+ pr_debug("data_write: %p %p %u\n",
+ (void *)(pagedir_nosave+i)->address,
+ (void *)(pagedir_nosave+i)->orig_address,
+ (pagedir_nosave+i)->swap_address);
+#endif
}
printk(" %d Pages done.\n",i);
return error;
@@ -505,6 +511,326 @@
return 0;
}

+/**
+ * calc_order - Determine the order of allocation needed for pagedir_save.
+ *
+ * This looks tricky, but is just subtle. Please fix it some time.
+ * Since there are %nr_copy_pages worth of pages in the snapshot, we need
+ * to allocate enough contiguous space to hold
+ * (%nr_copy_pages * sizeof(struct pbe)),
+ * which has the saved/orig locations of the page..
+ *
+ * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
+ * structures, then we call get_bitmask_order(), which will tell us the
+ * last bit set in the number, starting with 1. (If we need 30 pages, that
+ * is 0x0000001e in hex. The last bit is the 5th, which is the order we
+ * would use to allocate 32 contiguous pages).
+ *
+ * Since we also need to save those pages, we add the number of pages that
+ * we need to nr_copy_pages, and in case of an overflow, do the
+ * calculation again to update the number of pages needed.
+ *
+ * With this model, we will tend to waste a lot of memory if we just cross
+ * an order boundary. Plus, the higher the order of allocation that we try
+ * to do, the more likely we are to fail in a low-memory situtation
+ * (though we're unlikely to get this far in such a case, since swsusp
+ * requires half of memory to be free anyway).
+ */
+
+static void calc_order(int *po, int *nr)
+{
+ int diff = 0;
+ int order = 0;
+
+ do {
+ diff = get_bitmask_order(SUSPEND_PD_PAGES(*nr)) - order;
+ if (diff) {
+ order += diff;
+ *nr += 1 << diff;
+ }
+ } while(diff);
+ *po = order;
+}
+
+typedef int (*do_page_t)(struct page *page, void *p);
+
+static int foreach_zone_page(struct zone *zone, do_page_t fun, void *p)
+{
+ int inactive = 0, active = 0;
+
+ /* spin_lock_irq(&zone->lru_lock); */
+ if (zone->nr_inactive) {
+ struct list_head * entry = zone->inactive_list.prev;
+ while (entry != &zone->inactive_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ inactive += fun(page, p);
+ } else {
+ inactive ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ if (zone->nr_active) {
+ struct list_head * entry = zone->active_list.prev;
+ while (entry != &zone->active_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ active += fun(page, p);
+ } else {
+ active ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ /* spin_unlock_irq(&zone->lru_lock); */
+
+ return (active + inactive);
+}
+
+/* I'll move this to include/linux/page-flags.h */
+#define PG_pcs (PG_reclaim + 1)
+
+#define SetPagePcs(page) set_bit(PG_pcs, &(page)->flags)
+#define ClearPagePcs(page) clear_bit(PG_pcs, &(page)->flags)
+#define PagePcs(page) test_bit(PG_pcs, &(page)->flags)
+
+static int setup_pcs_pe(struct page *page, void *p)
+{
+ suspend_pagedir_t **pe = p;
+ unsigned long pfn = page_to_pfn(page);
+
+ BUG_ON(PageReserved(page) && PageNosave(page));
+ if (!pfn_valid(pfn)) {
+ printk("not valid page\n");
+ return 0;
+ }
+ if (PageNosave(page)) {
+ printk("nosave\n");
+ return 0;
+ }
+ if (PageReserved(page) /*&& pfn_is_nosave(pfn)*/) {
+ printk("[nosave]\n");
+ return 0;
+ }
+ if (PageSlab(page)) {
+ printk("slab\n");
+ return (0);
+ }
+ if (pe && *pe) {
+ BUG_ON(!PagePcs(page));
+ (*pe)->address = (long) page_address(page);
+ (*pe) ++;
+ }
+ SetPagePcs(page);
+
+ return (1);
+}
+
+static int count_pcs(struct zone *zone, suspend_pagedir_t **pe)
+{
+ return foreach_zone_page(zone, setup_pcs_pe, pe);
+}
+
+static suspend_pagedir_t *pagedir_cache = NULL;
+static int nr_copy_pcs = 0;
+static int pcs_order = 0;
+
+static int alloc_pagedir_cache(void)
+{
+ int need_nr_copy_pcs = nr_copy_pcs;
+
+ calc_order(&pcs_order, &need_nr_copy_pcs);
+ pagedir_cache = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, pcs_order);
+ if (!pagedir_cache)
+ return -ENOMEM;
+ memset(pagedir_cache, 0, (1 << pcs_order) * PAGE_SIZE);
+
+ pr_debug("alloc pcs %p, %d\n", pagedir_cache, pcs_order);
+
+ return 0;
+}
+
+static void page_cache_unlock(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_unlock_irq(&zone->lru_lock);
+ }
+ }
+}
+
+static void page_cache_lock(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_lock_irq(&zone->lru_lock);
+ }
+ }
+}
+int bio_read_page(pgoff_t page_off, void * page);
+
+int read_page_caches(void)
+{
+ struct pbe * p;
+ int error = 0, i;
+ swp_entry_t entry;
+
+ printk( "Reading Page Caches (%d pages): ", nr_copy_pcs);
+ for(i = 0, p = pagedir_cache; i < nr_copy_pcs && !error; i++, p++) {
+ if (!(i%100))
+ printk( "." );
+ error = bio_read_page(swp_offset(p->swap_address),
+ (void *)p->address);
+#ifdef PCS_DEBUG
+ pr_debug("pcs_read: %p %p %u\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+#endif
+ }
+
+ for (i = 0; i < nr_copy_pcs; i++) {
+ entry = (pagedir_cache + i)->swap_address;
+ if (entry.val)
+ swap_free(entry);
+ }
+ free_pages((unsigned long)pagedir_cache, pcs_order);
+
+ printk(" %d done.\n",i);
+
+ page_cache_unlock();
+
+ return (0);
+}
+
+static int pcs_write(void)
+{
+ int error = 0;
+ int i;
+
+ printk( "Writing PageCaches to swap (%d pages): ", nr_copy_pcs);
+ for (i = 0; i < nr_copy_pcs && !error; i++) {
+ if (!(i%100))
+ printk( "." );
+ error = write_page((pagedir_cache+i)->address,
+ &((pagedir_cache+i)->swap_address));
+#ifdef PCS_DEBUG
+ pr_debug("pcs_write: %p %p %u\n",
+ (void *)(pagedir_cache+i)->address,
+ (void *)(pagedir_cache+i)->orig_address,
+ (pagedir_cache+i)->swap_address);
+#endif
+ }
+ printk(" %d Pages done.\n",i);
+
+ return error;
+}
+
+static void count_data_pages(void);
+static int swsusp_alloc(void);
+
+static void page_caches_recal(void)
+{
+ struct zone *zone;
+ int i;
+
+ for (i = 0; i < max_mapnr; i++)
+ ClearPagePcs(mem_map+i);
+
+ nr_copy_pcs = 0;
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ nr_copy_pcs += count_pcs(zone, NULL);
+ }
+ }
+}
+
+int write_page_caches(void)
+{
+ struct zone *zone;
+ suspend_pagedir_t *pe = NULL;
+ int error;
+ int recal = 0;
+
+ page_cache_lock();
+ page_caches_recal();
+
+ if (nr_copy_pcs == 0) {
+ page_cache_unlock();
+ return (0);
+ }
+ printk("swsusp: Need to copy %u pcs\n", nr_copy_pcs);
+
+ if ((error = swsusp_swap_check())) {
+ page_cache_unlock();
+ return error;
+ }
+
+ if ((error = alloc_pagedir_cache())) {
+ page_cache_unlock();
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ while (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
+ if (recal == 0) {
+ page_cache_unlock();
+ }
+ printk("#");
+ shrink_all_memory(nr_copy_pages + PAGES_FOR_IO);
+ recal ++;
+ }
+
+ if (recal) {
+ page_cache_lock();
+ page_caches_recal();
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(2/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ error = swsusp_alloc();
+ if (error) {
+ printk("swsusp_alloc failed, %d\n", error);
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(final): Need to copy %u/%u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pages_check, nr_copy_pcs);
+ BUG_ON(nr_copy_pages_check != nr_copy_pages);
+
+ pe = pagedir_cache;
+
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ count_pcs(zone, &pe);
+ }
+ }
+ error = pcs_write();
+ if (error)
+ return error;
+
+ return (0);
+}

static int pfn_is_nosave(unsigned long pfn)
{
@@ -543,11 +869,14 @@
pr_debug("[nosave pfn 0x%lx]", pfn);
return 0;
}
+ if (PagePcs(page)) {
+ BUG_ON(zone->nr_inactive == 0 && zone->nr_active == 0) ;
+ return (0);
+ }
if ((chunk_size = is_head_of_free_region(page))) {
*zone_pfn += chunk_size - 1;
return 0;
}
-
return 1;
}

@@ -557,9 +886,11 @@
unsigned long zone_pfn;

nr_copy_pages = 0;
+ nr_copy_pcs = 0;

for_each_zone(zone) {
if (!is_highmem(zone)) {
+ nr_copy_pcs += count_pcs(zone, NULL);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
}
@@ -621,47 +952,6 @@
}


-/**
- * calc_order - Determine the order of allocation needed for pagedir_save.
- *
- * This looks tricky, but is just subtle. Please fix it some time.
- * Since there are %nr_copy_pages worth of pages in the snapshot, we need
- * to allocate enough contiguous space to hold
- * (%nr_copy_pages * sizeof(struct pbe)),
- * which has the saved/orig locations of the page..
- *
- * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
- * structures, then we call get_bitmask_order(), which will tell us the
- * last bit set in the number, starting with 1. (If we need 30 pages, that
- * is 0x0000001e in hex. The last bit is the 5th, which is the order we
- * would use to allocate 32 contiguous pages).
- *
- * Since we also need to save those pages, we add the number of pages that
- * we need to nr_copy_pages, and in case of an overflow, do the
- * calculation again to update the number of pages needed.
- *
- * With this model, we will tend to waste a lot of memory if we just cross
- * an order boundary. Plus, the higher the order of allocation that we try
- * to do, the more likely we are to fail in a low-memory situtation
- * (though we're unlikely to get this far in such a case, since swsusp
- * requires half of memory to be free anyway).
- */
-
-
-static void calc_order(void)
-{
- int diff = 0;
- int order = 0;
-
- do {
- diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
- if (diff) {
- order += diff;
- nr_copy_pages += 1 << diff;
- }
- } while(diff);
- pagedir_order = order;
-}


/**
@@ -673,13 +963,15 @@

static int alloc_pagedir(void)
{
- calc_order();
+ calc_order(&pagedir_order, &nr_copy_pages);
pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
pagedir_order);
if (!pagedir_save)
return -ENOMEM;
memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
+
pagedir_nosave = pagedir_save;
+ pr_debug("pagedir %p, %d\n", pagedir_save, pagedir_order);
return 0;
}

@@ -766,11 +1058,11 @@
return -ENOSPC;

if ((error = alloc_pagedir())) {
- pr_debug("suspend: Allocating pagedir failed.\n");
+ printk("suspend: Allocating pagedir failed.\n");
return error;
}
if ((error = alloc_image_pages())) {
- pr_debug("suspend: Allocating image pages failed.\n");
+ printk("suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}
@@ -783,7 +1075,6 @@
int suspend_prepare_image(void)
{
unsigned int nr_needed_pages = 0;
- int error;

pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
@@ -791,15 +1082,8 @@
return -ENOMEM;
}

- drain_local_pages();
- count_data_pages();
- printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;

- error = swsusp_alloc();
- if (error)
- return error;
-
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
@@ -1011,7 +1295,7 @@
}


-static struct block_device * resume_bdev;
+static struct block_device * resume_bdev __nosavedata;

/**
* submit - submit BIO request.
@@ -1151,6 +1435,11 @@
printk( "." );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
+#ifdef PCS_DEBUG
+ pr_debug("data_read: %p %p %u\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+#endif
}
printk(" %d done.\n",i);
return error;
@@ -1219,7 +1508,7 @@
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
- blkdev_put(resume_bdev);
+ /* blkdev_put(resume_bdev); */
} else
error = PTR_ERR(resume_bdev);

Only in linux-2.6.9-hg/mm: .vmscan.c.swp
--
--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-20 05:35:37

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] Software Suspend split to two stage V2.

Hi!

> This patch using pagemap for PageSet2 bitmap, It increase suspend
> speed, In my PowerPC suspend only need 5 secs, cool.

Well, speed is nice but... I have O(n^2) => O(n) patch in my tree that
provides some nice speedup too, and is less invasive. They are
probably orthogonal.

* you'll have to explain (in Documentation/power/swsusp.txt) why this
is safe. Normal swsusp is safe because interrupts&DMAs are
disabled. You are doing writes but data in page-cache may not be
modified, right? What are exact requirements for this and how it is
guaranteed that data are indeed not modified?

* what is pcs_ prefix? Page Cache Suspend?

(ouch and be warned that this will take quite long to get
in. There are patches in my queue I'd like to get in first, like
O(n^2) => O(n) page marking).

> +static void calc_order(int *po, int *nr)
> +{

"po" is bad name even for local variable.

> +static unsigned long *pageset2map = NULL;
> +
> +#define PAGENUMBER(page) (page-mem_map)
> +#define PAGEINDEX(page) ((PAGENUMBER(page))/(8*sizeof(unsigned long)))
> +#define PAGEBIT(page) ((int) ((PAGENUMBER(page))%(8 * sizeof(unsigned long))))
> +
> +#define BITS_PER_PAGE (PAGE_SIZE * 8)
> +#define PAGES_PER_BITMAP ((max_mapnr + BITS_PER_PAGE - 1) / BITS_PER_PAGE)
> +#define BITMAP_ORDER (get_bitmask_order((PAGES_PER_BITMAP) - 1))

> +#define PagePageset2(page) \
> + test_bit(PAGEBIT(page), &pageset2map[PAGEINDEX(page)])
> +#define SetPagePageset2(page) \
> + set_bit(PAGEBIT(page), &pageset2map[PAGEINDEX(page)])

Can't you just get another bit in page.h to avoid these arrays?

> +static int pcs_write(void)
> +{
> + int error = 0;
> + int i;
> +
> + printk( "Writing PageCaches to swap (%d pages): ", nr_copy_pcs);
> + for (i = 0; i < nr_copy_pcs && !error; i++) {
> + if (!(i%100))
> + printk( "." );

Please take % progress from newer swsusp.

> +static void count_data_pages(void);
> +static int swsusp_alloc(void);
> +
> +int pcs_suspend(int resume)
> +{
> + struct zone *zone;
> + suspend_pagedir_t *pe = NULL;
> + int error;
> +
> + if (resume == 1) {
> + return (0);
> + }
> + if (resume == 2) {
> + pcs_read();
> + pcs_free_pagemap();
> + return (0);
> + }

I'd understand int resume taking 0 and 1, but what does 2 mean? Also
use return 0; not return (0);

Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-20 08:15:58

by Hu Gang

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

On Sat, Nov 20, 2004 at 01:30:10AM +0100, Pavel Machek wrote:
> Hi!
>
> > This patch using pagemap for PageSet2 bitmap, It increase suspend
> > speed, In my PowerPC suspend only need 5 secs, cool.
> >
> > Test passed in my ppc and x86 laptop.
> >
> > ppc swsusp patch for 2.6.9
> > http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> > Have fun.
>
> BTW here's my curent bigdiff. It already has some rather nice
> swsusp speedups. Please try it on your machine; if it works for you,
> try to send your patches relative to this one. I hope to merge these
> changes during 2.6.11.
>

Here is the patch relative to your big diff. It tested pass with my x86
pc, But the sysfs interface can't works, I using reboot system call.

TODO:
Using range struct replace with pagedir in PageCache links, Current
pagedir can't works with large pages, it need many continuous phiscal
pages

diff -ur linux-2.6.9-peval/kernel/power/disk.c linux-2.6.9-peval-hg/kernel/power/disk.c
--- linux-2.6.9-peval/kernel/power/disk.c 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg/kernel/power/disk.c 2004-11-20 14:51:21.000000000 +0800
@@ -29,6 +29,8 @@
extern int swsusp_resume(void);
extern int swsusp_free(void);

+extern int write_page_caches(void);
+extern int read_page_caches(void);

static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -106,6 +108,7 @@
}
}

+
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -118,13 +121,14 @@
{
device_resume();
platform_finish();
+ read_page_caches();
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
}


-static int prepare(void)
+static int prepare(int resume)
{
int error;

@@ -144,9 +148,13 @@
}

/* Free memory before shutting down devices. */
- free_some_memory();
+ /* free_some_memory(); */

disable_nonboot_cpus();
+ if (!resume)
+ if ((error = write_page_caches())) {
+ goto Finish;
+ }
if ((error = device_suspend(PMSG_FREEZE))) {
printk("Some devices failed to suspend\n");
goto Finish;
@@ -176,7 +184,7 @@
{
int error;

- if ((error = prepare()))
+ if ((error = prepare(0)))
return error;

pr_debug("PM: Attempting to suspend to disk.\n");
@@ -233,7 +241,7 @@

pr_debug("PM: Preparing system for restore.\n");

- if ((error = prepare()))
+ if ((error = prepare(1)))
goto Free;

barrier();
Only in linux-2.6.9-peval-hg/kernel/power: disk.c~
diff -ur linux-2.6.9-peval/kernel/power/process.c linux-2.6.9-peval-hg/kernel/power/process.c
--- linux-2.6.9-peval/kernel/power/process.c 2004-10-20 16:00:53.000000000 +0800
+++ linux-2.6.9-peval-hg/kernel/power/process.c 2004-11-20 14:47:40.000000000 +0800
@@ -4,8 +4,6 @@
*
* Originally from swsusp.
*/
-
-
#undef DEBUG

#include <linux/smp_lock.h>
diff -ur linux-2.6.9-peval/kernel/power/swsusp.c linux-2.6.9-peval-hg/kernel/power/swsusp.c
--- linux-2.6.9-peval/kernel/power/swsusp.c 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg/kernel/power/swsusp.c 2004-11-20 16:04:27.000000000 +0800
@@ -76,6 +76,7 @@

/* Variables to be preserved over suspend */
static int pagedir_order_check;
+static int nr_copy_pages_check;

extern char resume_file[];
static dev_t resume_device;
@@ -302,6 +303,12 @@
printk( "\b\b\b\b%3d%%", i / mod );
error = write_page((pagedir_nosave+i)->address,
&((pagedir_nosave+i)->swap_address));
+#ifdef PCS_DEBUG
+ pr_debug("data_write: %p %p %u\n",
+ (void *)(pagedir_nosave+i)->address,
+ (void *)(pagedir_nosave+i)->orig_address,
+ (pagedir_nosave+i)->swap_address);
+#endif
}
printk("\b\b\b\bdone\n");
return error;
@@ -504,6 +511,327 @@
return 0;
}

+/**
+ * calc_order - Determine the order of allocation needed for pagedir_save.
+ *
+ * This looks tricky, but is just subtle. Please fix it some time.
+ * Since there are %nr_copy_pages worth of pages in the snapshot, we need
+ * to allocate enough contiguous space to hold
+ * (%nr_copy_pages * sizeof(struct pbe)),
+ * which has the saved/orig locations of the page..
+ *
+ * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
+ * structures, then we call get_bitmask_order(), which will tell us the
+ * last bit set in the number, starting with 1. (If we need 30 pages, that
+ * is 0x0000001e in hex. The last bit is the 5th, which is the order we
+ * would use to allocate 32 contiguous pages).
+ *
+ * Since we also need to save those pages, we add the number of pages that
+ * we need to nr_copy_pages, and in case of an overflow, do the
+ * calculation again to update the number of pages needed.
+ *
+ * With this model, we will tend to waste a lot of memory if we just cross
+ * an order boundary. Plus, the higher the order of allocation that we try
+ * to do, the more likely we are to fail in a low-memory situtation
+ * (though we're unlikely to get this far in such a case, since swsusp
+ * requires half of memory to be free anyway).
+ */
+
+static void calc_order(int *po, int *nr)
+{
+ int diff = 0;
+ int order = 0;
+
+ do {
+ diff = get_bitmask_order(SUSPEND_PD_PAGES(*nr)) - order;
+ if (diff) {
+ order += diff;
+ *nr += 1 << diff;
+ }
+ } while(diff);
+ *po = order;
+}
+
+typedef int (*do_page_t)(struct page *page, void *p);
+
+static int foreach_zone_page(struct zone *zone, do_page_t fun, void *p)
+{
+ int inactive = 0, active = 0;
+
+ /* spin_lock_irq(&zone->lru_lock); */
+ if (zone->nr_inactive) {
+ struct list_head * entry = zone->inactive_list.prev;
+ while (entry != &zone->inactive_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ inactive += fun(page, p);
+ } else {
+ inactive ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ if (zone->nr_active) {
+ struct list_head * entry = zone->active_list.prev;
+ while (entry != &zone->active_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ active += fun(page, p);
+ } else {
+ active ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ /* spin_unlock_irq(&zone->lru_lock); */
+
+ return (active + inactive);
+}
+
+/* I'll move this to include/linux/page-flags.h */
+#define PG_pcs (PG_nosave_free + 1)
+
+#define SetPagePcs(page) set_bit(PG_pcs, &(page)->flags)
+#define ClearPagePcs(page) clear_bit(PG_pcs, &(page)->flags)
+#define PagePcs(page) test_bit(PG_pcs, &(page)->flags)
+
+static int setup_pcs_pe(struct page *page, void *p)
+{
+ suspend_pagedir_t **pe = p;
+ unsigned long pfn = page_to_pfn(page);
+
+ BUG_ON(PageReserved(page) && PageNosave(page));
+ if (!pfn_valid(pfn)) {
+ printk("not valid page\n");
+ return 0;
+ }
+ if (PageNosave(page)) {
+ printk("nosave\n");
+ return 0;
+ }
+ if (PageReserved(page) /*&& pfn_is_nosave(pfn)*/) {
+ printk("[nosave]\n");
+ return 0;
+ }
+ if (PageSlab(page)) {
+ printk("slab\n");
+ return (0);
+ }
+ if (pe && *pe) {
+ BUG_ON(!PagePcs(page));
+ (*pe)->address = (long) page_address(page);
+ (*pe) ++;
+ }
+ SetPagePcs(page);
+
+ return (1);
+}
+
+static int count_pcs(struct zone *zone, suspend_pagedir_t **pe)
+{
+ return foreach_zone_page(zone, setup_pcs_pe, pe);
+}
+
+static suspend_pagedir_t *pagedir_cache = NULL;
+static int nr_copy_pcs = 0;
+static int pcs_order = 0;
+
+static int alloc_pagedir_cache(void)
+{
+ int need_nr_copy_pcs = nr_copy_pcs;
+
+ calc_order(&pcs_order, &need_nr_copy_pcs);
+ pagedir_cache = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, pcs_order);
+ if (!pagedir_cache)
+ return -ENOMEM;
+ memset(pagedir_cache, 0, (1 << pcs_order) * PAGE_SIZE);
+
+ pr_debug("alloc pcs %p, %d\n", pagedir_cache, pcs_order);
+
+ return 0;
+}
+
+static void page_cache_unlock(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_unlock_irq(&zone->lru_lock);
+ }
+ }
+}
+
+static void page_cache_lock(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_lock_irq(&zone->lru_lock);
+ }
+ }
+}
+int bio_read_page(pgoff_t page_off, void * page);
+
+int read_page_caches(void)
+{
+ struct pbe * p;
+ int error = 0, i;
+ swp_entry_t entry;
+ int mod = nr_copy_pcs / 100;
+
+ printk( "Reading PageCaches from swap (%d pages)... ", nr_copy_pcs);
+ for(i = 0, p = pagedir_cache; i < nr_copy_pcs && !error; i++, p++) {
+ if (!(i%100))
+ printk( "\b\b\b\b%3d%%", i / mod );
+ error = bio_read_page(swp_offset(p->swap_address),
+ (void *)p->address);
+#ifdef PCS_DEBUG
+ pr_debug("pcs_read: %p %p %u\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+#endif
+ }
+
+ for (i = 0; i < nr_copy_pcs; i++) {
+ entry = (pagedir_cache + i)->swap_address;
+ if (entry.val)
+ swap_free(entry);
+ }
+ free_pages((unsigned long)pagedir_cache, pcs_order);
+
+ printk("\b\b\b\bdone\n");
+
+ page_cache_unlock();
+
+ return (0);
+}
+
+static int pcs_write(void)
+{
+ int error = 0;
+ int i;
+ int mod = nr_copy_pcs / 100;
+
+ printk( "Writing PageCaches to swap (%d pages)... ", nr_copy_pcs);
+ for (i = 0; i < nr_copy_pcs && !error; i++) {
+ if (!(i%100))
+ printk( "\b\b\b\b%3d%%", i / mod );
+ error = write_page((pagedir_cache+i)->address,
+ &((pagedir_cache+i)->swap_address));
+#ifdef PCS_DEBUG
+ pr_debug("pcs_write: %p %p %u\n",
+ (void *)(pagedir_cache+i)->address,
+ (void *)(pagedir_cache+i)->orig_address,
+ (pagedir_cache+i)->swap_address);
+#endif
+ }
+ printk("\b\b\b\bdone\n");
+
+ return error;
+}
+
+static void count_data_pages(void);
+static int swsusp_alloc(void);
+
+static void page_caches_recal(void)
+{
+ struct zone *zone;
+ int i;
+
+ for (i = 0; i < max_mapnr; i++)
+ ClearPagePcs(mem_map+i);
+
+ nr_copy_pcs = 0;
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ nr_copy_pcs += count_pcs(zone, NULL);
+ }
+ }
+}
+
+int write_page_caches(void)
+{
+ struct zone *zone;
+ suspend_pagedir_t *pe = NULL;
+ int error;
+ int recal = 0;
+
+ page_cache_lock();
+ page_caches_recal();
+
+ if (nr_copy_pcs == 0) {
+ page_cache_unlock();
+ return (0);
+ }
+ printk("swsusp: Need to copy %u pcs\n", nr_copy_pcs);
+
+ if ((error = swsusp_swap_check())) {
+ page_cache_unlock();
+ return error;
+ }
+
+ if ((error = alloc_pagedir_cache())) {
+ page_cache_unlock();
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ while (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
+ if (recal == 0) {
+ page_cache_unlock();
+ }
+ printk("#");
+ shrink_all_memory(nr_copy_pages + PAGES_FOR_IO);
+ recal ++;
+ }
+
+ if (recal) {
+ page_cache_lock();
+ page_caches_recal();
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(2/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ error = swsusp_alloc();
+ if (error) {
+ printk("swsusp_alloc failed, %d\n", error);
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(final): Need to copy %u/%u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pages_check, nr_copy_pcs);
+
+ pe = pagedir_cache;
+
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ count_pcs(zone, &pe);
+ }
+ }
+ error = pcs_write();
+ if (error)
+ return error;
+
+ return (0);
+}

static int pfn_is_nosave(unsigned long pfn)
{
@@ -539,7 +867,10 @@
}
if (PageNosaveFree(page))
return 0;
-
+ if (PagePcs(page)) {
+ BUG_ON(zone->nr_inactive == 0 && zone->nr_active == 0);
+ return 0;
+ }
return 1;
}

@@ -549,10 +880,12 @@
unsigned long zone_pfn;

nr_copy_pages = 0;
+ nr_copy_pcs = 0;

for_each_zone(zone) {
if (is_highmem(zone))
continue;
+ nr_copy_pcs += count_pcs(zone, NULL);
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
@@ -588,47 +921,6 @@
}


-/**
- * calc_order - Determine the order of allocation needed for pagedir_save.
- *
- * This looks tricky, but is just subtle. Please fix it some time.
- * Since there are %nr_copy_pages worth of pages in the snapshot, we need
- * to allocate enough contiguous space to hold
- * (%nr_copy_pages * sizeof(struct pbe)),
- * which has the saved/orig locations of the page..
- *
- * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
- * structures, then we call get_bitmask_order(), which will tell us the
- * last bit set in the number, starting with 1. (If we need 30 pages, that
- * is 0x0000001e in hex. The last bit is the 5th, which is the order we
- * would use to allocate 32 contiguous pages).
- *
- * Since we also need to save those pages, we add the number of pages that
- * we need to nr_copy_pages, and in case of an overflow, do the
- * calculation again to update the number of pages needed.
- *
- * With this model, we will tend to waste a lot of memory if we just cross
- * an order boundary. Plus, the higher the order of allocation that we try
- * to do, the more likely we are to fail in a low-memory situtation
- * (though we're unlikely to get this far in such a case, since swsusp
- * requires half of memory to be free anyway).
- */
-
-
-static void calc_order(void)
-{
- int diff = 0;
- int order = 0;
-
- do {
- diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
- if (diff) {
- order += diff;
- nr_copy_pages += 1 << diff;
- }
- } while(diff);
- pagedir_order = order;
-}


/**
@@ -640,13 +932,15 @@

static int alloc_pagedir(void)
{
- calc_order();
+ calc_order(&pagedir_order, &nr_copy_pages);
pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
pagedir_order);
if (!pagedir_save)
return -ENOMEM;
memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
+
pagedir_nosave = pagedir_save;
+ pr_debug("pagedir %p, %d\n", pagedir_save, pagedir_order);
return 0;
}

@@ -752,15 +1046,16 @@
return -ENOSPC;

if ((error = alloc_pagedir())) {
- pr_debug("suspend: Allocating pagedir failed.\n");
+ printk("suspend: Allocating pagedir failed.\n");
return error;
}
if ((error = alloc_image_pages())) {
- pr_debug("suspend: Allocating image pages failed.\n");
+ printk("suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}

+ nr_copy_pages_check = nr_copy_pages;
pagedir_order_check = pagedir_order;
return 0;
}
@@ -768,7 +1063,6 @@
int suspend_prepare_image(void)
{
unsigned int nr_needed_pages;
- int error;

pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
@@ -777,15 +1071,8 @@
return -ENOMEM;
}

- drain_local_pages();
- count_data_pages();
- printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;

- error = swsusp_alloc();
- if (error)
- return error;
-
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
@@ -855,7 +1142,8 @@
asmlinkage int swsusp_restore(void)
{
BUG_ON (pagedir_order_check != pagedir_order);
-
+ BUG_ON (nr_copy_pages_check != nr_copy_pages);
+
/* Even mappings of "global" things (vmalloc) need to be fixed */
__flush_tlb_global();
wbinvd(); /* Nigel says wbinvd here is good idea... */
@@ -993,7 +1281,7 @@
return 0;
}

-static struct block_device * resume_bdev;
+static struct block_device * resume_bdev __nosavedata;

/**
* submit - submit BIO request.
@@ -1141,6 +1429,11 @@
printk( "\b\b\b\b%3d%%", i / mod );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
+#ifdef PCS_DEBUG
+ pr_debug("data_read: %p %p %u\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+#endif
}
printk(" %d done.\n",i);
return error;
@@ -1207,7 +1500,7 @@
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
- blkdev_put(resume_bdev);
+ /* blkdev_put(resume_bdev); */
} else
error = PTR_ERR(resume_bdev);

Only in linux-2.6.9-peval-hg/kernel/power: swsusp.c~

--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-20 09:33:36

by Hu Gang

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

On Sat, Nov 20, 2004 at 01:30:10AM +0100, Pavel Machek wrote:
> Hi!
>
> > This patch using pagemap for PageSet2 bitmap, It increase suspend
> > speed, In my PowerPC suspend only need 5 secs, cool.
> >
> > Test passed in my ppc and x86 laptop.
> >
> > ppc swsusp patch for 2.6.9
> > http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> > Have fun.
>
> BTW here's my curent bigdiff. It already has some rather nice
> swsusp speedups. Please try it on your machine; if it works for you,
> try to send your patches relative to this one. I hope to merge these
> changes during 2.6.11.
>
> Pavel
>

Here is my diff with powerpc support, tested passed, readlly faster in
my powerpc laptop.

First get clean 2.6.9 kernel, apply big diff, apply my diff, apply
2.6.9-oom-kill-fix.patch from ck1.

* The sysfs interface can't works, I still using reboot system call
reading Documents/power/swsusp.txt.

Have fun.


diff -ur linux-2.6.9-peval-hg/arch/ppc/Kconfig linux-2.6.9-peval-hg-ppc.old/arch/ppc/Kconfig
--- linux-2.6.9-peval-hg/arch/ppc/Kconfig 2004-10-20 15:58:39.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/arch/ppc/Kconfig 2004-11-20 16:17:05.000000000 +0800
@@ -983,6 +983,8 @@

source "drivers/zorro/Kconfig"

+source kernel/power/Kconfig
+
endmenu

menu "Bus options"
Only in linux-2.6.9-peval-hg-ppc.old/arch/ppc: Kconfig~
diff -ur linux-2.6.9-peval-hg/arch/ppc/kernel/Makefile linux-2.6.9-peval-hg-ppc.old/arch/ppc/kernel/Makefile
--- linux-2.6.9-peval-hg/arch/ppc/kernel/Makefile 2004-10-20 15:58:40.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/arch/ppc/kernel/Makefile 2004-11-20 16:17:05.000000000 +0800
@@ -16,6 +16,7 @@
semaphore.o syscalls.o setup.o \
cputable.o ppc_htab.o
obj-$(CONFIG_6xx) += l2cr.o cpu_setup_6xx.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
obj-$(CONFIG_POWER4) += cpu_setup_power4.o
obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-mapping.o
diff -ur linux-2.6.9-peval-hg/arch/ppc/kernel/signal.c linux-2.6.9-peval-hg-ppc.old/arch/ppc/kernel/signal.c
--- linux-2.6.9-peval-hg/arch/ppc/kernel/signal.c 2004-10-20 15:58:41.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/arch/ppc/kernel/signal.c 2004-11-20 16:17:05.000000000 +0800
@@ -28,6 +28,7 @@
#include <linux/elf.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
+#include <linux/suspend.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -604,6 +605,14 @@
unsigned long frame, newsp;
int signr, ret;

+ if (current->flags & PF_FREEZE) {
+ refrigerator(PF_FREEZE);
+ signr = 0;
+ ret = regs->gpr[3];
+ if (!signal_pending(current))
+ goto no_signal;
+ }
+
if (!oldset)
oldset = &current->blocked;

@@ -626,6 +635,7 @@
regs->gpr[3] = EINTR;
/* note that the cr0.SO bit is already set */
} else {
+no_signal:
regs->nip -= 4; /* Back up & retry system call */
regs->result = 0;
regs->trap = 0;
Only in linux-2.6.9-peval-hg-ppc.old/arch/ppc/kernel: swsusp.S
diff -ur linux-2.6.9-peval-hg/arch/ppc/kernel/vmlinux.lds.S linux-2.6.9-peval-hg-ppc.old/arch/ppc/kernel/vmlinux.lds.S
--- linux-2.6.9-peval-hg/arch/ppc/kernel/vmlinux.lds.S 2004-10-20 15:58:41.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/arch/ppc/kernel/vmlinux.lds.S 2004-11-20 16:17:05.000000000 +0800
@@ -74,6 +74,12 @@
CONSTRUCTORS
}

+ . = ALIGN(4096);
+ __nosave_begin = .;
+ .data_nosave : { *(.data.nosave) }
+ . = ALIGN(4096);
+ __nosave_end = .;
+
. = ALIGN(32);
.data.cacheline_aligned : { *(.data.cacheline_aligned) }

diff -ur linux-2.6.9-peval-hg/arch/ppc/platforms/pmac_setup.c linux-2.6.9-peval-hg-ppc.old/arch/ppc/platforms/pmac_setup.c
--- linux-2.6.9-peval-hg/arch/ppc/platforms/pmac_setup.c 2004-10-20 15:58:41.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/arch/ppc/platforms/pmac_setup.c 2004-11-20 16:43:16.000000000 +0800
@@ -51,6 +51,7 @@
#include <linux/irq.h>
#include <linux/seq_file.h>
#include <linux/root_dev.h>
+#include <linux/suspend.h>

#include <asm/reg.h>
#include <asm/sections.h>
@@ -70,6 +71,8 @@
#include <asm/pmac_feature.h>
#include <asm/time.h>
#include <asm/of_device.h>
+#include <asm/mmu_context.h>
+
#include "pmac_pic.h"
#include "mem_pieces.h"

@@ -420,11 +423,67 @@
#endif
}

+/* TODO: Merge the suspend-to-ram with the common code !!!
+ * currently, this is a stub implementation for suspend-to-disk
+ * only
+ */
+
+#ifdef CONFIG_PM
+
+extern void enable_kernel_altivec(void);
+
+static int pmac_pm_prepare(u32 state)
+{
+ printk(KERN_DEBUG "pmac_pm_prepare(%d)\n", state);
+
+ return 0;
+}
+
+static int pmac_pm_enter(u32 state)
+{
+ printk(KERN_DEBUG "pmac_pm_enter(%d)\n", state);
+
+ /* Giveup the lazy FPU & vec so we don't have to back them
+ * up from the low level code
+ */
+ enable_kernel_fp();
+
+#ifdef CONFIG_ALTIVEC
+ if (cur_cpu_spec[0]->cpu_features & CPU_FTR_ALTIVEC)
+ enable_kernel_altivec();
+#endif /* CONFIG_ALTIVEC */
+
+ return 0;
+}
+
+static int pmac_pm_finish(u32 state)
+{
+ printk(KERN_DEBUG "pmac_pm_finish(%d)\n", state);
+
+ /* Restore userland MMU context */
+ set_context(current->active_mm->context, current->active_mm->pgd);
+
+ return 0;
+}
+
+static struct pm_ops pmac_pm_ops = {
+ .pm_disk_mode = PM_DISK_SHUTDOWN,
+ .prepare = pmac_pm_prepare,
+ .enter = pmac_pm_enter,
+ .finish = pmac_pm_finish,
+};
+
+#endif /* CONFIG_PM */
+
static int initializing = 1;

static int pmac_late_init(void)
{
initializing = 0;
+
+#ifdef CONFIG_PM
+ pm_set_ops(&pmac_pm_ops);
+#endif /* CONFIG_PM */
return 0;
}

diff -ur linux-2.6.9-peval-hg/arch/ppc/syslib/open_pic.c linux-2.6.9-peval-hg-ppc.old/arch/ppc/syslib/open_pic.c
--- linux-2.6.9-peval-hg/arch/ppc/syslib/open_pic.c 2004-10-20 15:58:42.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/arch/ppc/syslib/open_pic.c 2004-11-20 16:25:10.000000000 +0800
@@ -776,7 +776,8 @@
if (ISR[irq] == 0)
return;
if (!cpus_empty(keepmask)) {
- cpumask_t irqdest = { .bits[0] = openpic_read(&ISR[irq]->Destination) };
+ cpumask_t irqdest;
+ irqdest.bits[0] = openpic_read(&ISR[irq]->Destination);
cpus_and(irqdest, irqdest, keepmask);
cpus_or(physmask, physmask, irqdest);
}
diff -ur linux-2.6.9-peval-hg/drivers/ide/ppc/pmac.c linux-2.6.9-peval-hg-ppc.old/drivers/ide/ppc/pmac.c
--- linux-2.6.9-peval-hg/drivers/ide/ppc/pmac.c 2004-10-20 15:59:12.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/drivers/ide/ppc/pmac.c 2004-11-20 16:17:05.000000000 +0800
@@ -32,6 +32,7 @@
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/pci.h>
+#include <linux/pm.h>
#include <linux/adb.h>
#include <linux/pmu.h>

@@ -1364,7 +1365,7 @@
ide_hwif_t *hwif = (ide_hwif_t *)dev_get_drvdata(&mdev->ofdev.dev);
int rc = 0;

- if (state != mdev->ofdev.dev.power_state && state >= 2) {
+ if (state != mdev->ofdev.dev.power_state && state == PM_SUSPEND_MEM) {
rc = pmac_ide_do_suspend(hwif);
if (rc == 0)
mdev->ofdev.dev.power_state = state;
@@ -1472,7 +1473,7 @@
ide_hwif_t *hwif = (ide_hwif_t *)pci_get_drvdata(pdev);
int rc = 0;

- if (state != pdev->dev.power_state && state >= 2) {
+ if (state != pdev->dev.power_state && state == PM_SUSPEND_MEM ) {
rc = pmac_ide_do_suspend(hwif);
if (rc == 0)
pdev->dev.power_state = state;
diff -ur linux-2.6.9-peval-hg/drivers/macintosh/Kconfig linux-2.6.9-peval-hg-ppc.old/drivers/macintosh/Kconfig
--- linux-2.6.9-peval-hg/drivers/macintosh/Kconfig 2004-10-20 15:53:31.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/drivers/macintosh/Kconfig 2004-11-20 16:17:05.000000000 +0800
@@ -80,7 +80,7 @@

config PMAC_PBOOK
bool "Power management support for PowerBooks"
- depends on ADB_PMU
+ depends on PM && ADB_PMU
---help---
This provides support for putting a PowerBook to sleep; it also
enables media bay support. Power management works on the
@@ -97,11 +97,6 @@
have it autoloaded. The act of removing the module shuts down the
sound hardware for more power savings.

-config PM
- bool
- depends on PPC_PMAC && ADB_PMU && PMAC_PBOOK
- default y
-
config PMAC_APM_EMU
tristate "APM emulation"
depends on PMAC_PBOOK
diff -ur linux-2.6.9-peval-hg/drivers/macintosh/mediabay.c linux-2.6.9-peval-hg-ppc.old/drivers/macintosh/mediabay.c
--- linux-2.6.9-peval-hg/drivers/macintosh/mediabay.c 2004-10-20 15:53:32.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/drivers/macintosh/mediabay.c 2004-11-20 16:17:05.000000000 +0800
@@ -713,7 +713,7 @@
{
struct media_bay_info *bay = macio_get_drvdata(mdev);

- if (state != mdev->ofdev.dev.power_state && state >= 2) {
+ if (state != mdev->ofdev.dev.power_state && state == PM_SUSPEND_MEM) {
down(&bay->lock);
bay->sleeping = 1;
set_mb_power(bay, 0);
diff -ur linux-2.6.9-peval-hg/drivers/macintosh/therm_adt746x.c linux-2.6.9-peval-hg-ppc.old/drivers/macintosh/therm_adt746x.c
--- linux-2.6.9-peval-hg/drivers/macintosh/therm_adt746x.c 2004-10-20 15:59:24.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/drivers/macintosh/therm_adt746x.c 2004-11-20 16:17:05.000000000 +0800
@@ -22,6 +22,7 @@
#include <linux/spinlock.h>
#include <linux/smp_lock.h>
#include <linux/wait.h>
+#include <linux/suspend.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/io.h>
@@ -238,6 +239,11 @@
#endif
while(!kthread_should_stop())
{
+ if (current->flags & PF_FREEZE) {
+ printk(KERN_INFO "therm_adt746x: freezing thermostat\n");
+ refrigerator(PF_FREEZE);
+ }
+
msleep_interruptible(2000);

/* Check status */
diff -ur linux-2.6.9-peval-hg/drivers/macintosh/therm_pm72.c linux-2.6.9-peval-hg-ppc.old/drivers/macintosh/therm_pm72.c
--- linux-2.6.9-peval-hg/drivers/macintosh/therm_pm72.c 2004-10-20 15:53:32.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/drivers/macintosh/therm_pm72.c 2004-11-20 16:17:05.000000000 +0800
@@ -88,6 +88,7 @@
#include <linux/spinlock.h>
#include <linux/smp_lock.h>
#include <linux/wait.h>
+#include <linux/suspend.h>
#include <linux/reboot.h>
#include <linux/kmod.h>
#include <linux/i2c.h>
@@ -1044,6 +1045,11 @@
while (state == state_attached) {
unsigned long elapsed, start;

+ if (current->flags & PF_FREEZE) {
+ printk(KERN_INFO "therm_pm72: freezing thermostat\n");
+ refrigerator(PF_FREEZE);
+ }
+
start = jiffies;

down(&driver_lock);
diff -ur linux-2.6.9-peval-hg/drivers/macintosh/via-pmu.c linux-2.6.9-peval-hg-ppc.old/drivers/macintosh/via-pmu.c
--- linux-2.6.9-peval-hg/drivers/macintosh/via-pmu.c 2004-10-20 15:59:24.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/drivers/macintosh/via-pmu.c 2004-11-20 16:23:11.000000000 +0800
@@ -43,6 +43,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/device.h>
+#include <linux/sysdev.h>
#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <asm/prom.h>
@@ -2326,7 +2327,7 @@
/* Sync the disks. */
/* XXX It would be nice to have some way to ensure that
* nobody is dirtying any new buffers while we wait. That
- * could be acheived using the refrigerator for processes
+ * could be achieved using the refrigerator for processes
* that swsusp uses
*/
sys_sync();
@@ -2379,7 +2380,6 @@

/* Wait for completion of async backlight requests */
while (!bright_req_1.complete || !bright_req_2.complete ||
-
!batt_req.complete)
pmu_poll();

@@ -3048,6 +3048,88 @@
}
#endif /* DEBUG_SLEEP */

+
+/* FIXME: This is a temporary set of callbacks to enable us
+ * to do suspend-to-disk.
+ */
+
+#ifdef CONFIG_PM
+
+static int pmu_sys_suspended = 0;
+
+static int pmu_sys_suspend(struct sys_device *sysdev, pm_message_t state)
+{
+ if (state != PMSG_FREEZE || pmu_sys_suspended)
+ return 0;
+
+ /* Suspend PMU event interrupts */
+ pmu_suspend();
+
+ pmu_sys_suspended = 1;
+ return 0;
+}
+
+static int pmu_sys_resume(struct sys_device *sysdev)
+{
+ struct adb_request req;
+
+ if (!pmu_sys_suspended)
+ return 0;
+
+ /* Tell PMU we are ready */
+ pmu_request(&req, NULL, 2, PMU_SYSTEM_READY, 2);
+ pmu_wait_complete(&req);
+
+ /* Resume PMU event interrupts */
+ pmu_resume();
+
+ pmu_sys_suspended = 0;
+
+ return 0;
+}
+
+#endif /* CONFIG_PM */
+
+static struct sysdev_class pmu_sysclass = {
+ set_kset_name("pmu"),
+};
+
+static struct sys_device device_pmu = {
+ .id = 0,
+ .cls = &pmu_sysclass,
+};
+
+static struct sysdev_driver driver_pmu = {
+#ifdef CONFIG_PM
+ .suspend = &pmu_sys_suspend,
+ .resume = &pmu_sys_resume,
+#endif /* CONFIG_PM */
+};
+
+static int __init init_pmu_sysfs(void)
+{
+ int rc;
+
+ rc = sysdev_class_register(&pmu_sysclass);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys class\n");
+ return -ENODEV;
+ }
+ rc = sysdev_register(&device_pmu);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys device\n");
+ return -ENODEV;
+ }
+ rc = sysdev_driver_register(&pmu_sysclass, &driver_pmu);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys driver\n");
+ return -ENODEV;
+ }
+ return 0;
+}
+
+subsys_initcall(init_pmu_sysfs);
+
EXPORT_SYMBOL(pmu_request);
EXPORT_SYMBOL(pmu_poll);
EXPORT_SYMBOL(pmu_poll_adb);
diff -ur linux-2.6.9-peval-hg/drivers/video/aty/radeon_pm.c linux-2.6.9-peval-hg-ppc.old/drivers/video/aty/radeon_pm.c
--- linux-2.6.9-peval-hg/drivers/video/aty/radeon_pm.c 2004-10-20 15:55:34.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/drivers/video/aty/radeon_pm.c 2004-11-20 16:17:05.000000000 +0800
@@ -859,6 +859,10 @@
* know we'll be rebooted, ...
*/

+#if 0 /* this breaks suspend to ram until the dust settles... */
+ if (state != PM_SUSPEND_MEM)
+#endif
+ return 0;
printk(KERN_DEBUG "radeonfb: suspending to state: %d...\n", state);

acquire_console_sem();
Only in linux-2.6.9-peval-hg-ppc.old/include/asm-ppc: suspend.h
diff -ur linux-2.6.9-peval-hg/include/linux/reboot.h linux-2.6.9-peval-hg-ppc.old/include/linux/reboot.h
--- linux-2.6.9-peval-hg/include/linux/reboot.h 2004-06-16 13:20:26.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/include/linux/reboot.h 2004-11-20 16:17:05.000000000 +0800
@@ -42,6 +42,8 @@
extern int register_reboot_notifier(struct notifier_block *);
extern int unregister_reboot_notifier(struct notifier_block *);

+/* For use by swsusp only */
+extern struct notifier_block *reboot_notifier_list;

/*
* Architecture-specific implementations of sys_reboot commands.
diff -ur linux-2.6.9-peval-hg/include/linux/suspend.h linux-2.6.9-peval-hg-ppc.old/include/linux/suspend.h
--- linux-2.6.9-peval-hg/include/linux/suspend.h 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/include/linux/suspend.h 2004-11-20 16:17:05.000000000 +0800
@@ -1,7 +1,7 @@
#ifndef _LINUX_SWSUSP_H
#define _LINUX_SWSUSP_H

-#ifdef CONFIG_X86
+#if (defined CONFIG_X86) || (defined CONFIG_PPC32)
#include <asm/suspend.h>
#endif
#include <linux/swap.h>
diff -ur linux-2.6.9-peval-hg/kernel/power/disk.c linux-2.6.9-peval-hg-ppc.old/kernel/power/disk.c
--- linux-2.6.9-peval-hg/kernel/power/disk.c 2004-11-20 14:51:21.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/kernel/power/disk.c 2004-11-20 16:19:03.000000000 +0800
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/fs.h>
+#include <linux/reboot.h>
#include <linux/device.h>
#include "power.h"

@@ -50,14 +51,16 @@
unsigned long flags;
int error = 0;

- local_irq_save(flags);
switch(mode) {
case PM_DISK_PLATFORM:
- device_power_down(PMSG_SUSPEND);
+ /* device_power_down(PMSG_SUSPEND); */
+ local_irq_save(flags);
error = pm_ops->enter(PM_SUSPEND_DISK);
+ local_irq_restore(flags);
break;
case PM_DISK_SHUTDOWN:
printk("Powering off system\n");
+ notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
device_shutdown();
machine_power_off();
break;
diff -ur linux-2.6.9-peval-hg/kernel/power/disk.c~ linux-2.6.9-peval-hg-ppc.old/kernel/power/disk.c~
--- linux-2.6.9-peval-hg/kernel/power/disk.c~ 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/kernel/power/disk.c~ 2004-11-20 14:51:21.000000000 +0800
@@ -29,6 +29,8 @@
extern int swsusp_resume(void);
extern int swsusp_free(void);

+extern int write_page_caches(void);
+extern int read_page_caches(void);

static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -106,6 +108,7 @@
}
}

+
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -118,13 +121,14 @@
{
device_resume();
platform_finish();
+ read_page_caches();
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
}


-static int prepare(void)
+static int prepare(int resume)
{
int error;

@@ -144,9 +148,13 @@
}

/* Free memory before shutting down devices. */
- free_some_memory();
+ /* free_some_memory(); */

disable_nonboot_cpus();
+ if (!resume)
+ if ((error = write_page_caches())) {
+ goto Finish;
+ }
if ((error = device_suspend(PMSG_FREEZE))) {
printk("Some devices failed to suspend\n");
goto Finish;
@@ -176,7 +184,7 @@
{
int error;

- if ((error = prepare()))
+ if ((error = prepare(0)))
return error;

pr_debug("PM: Attempting to suspend to disk.\n");
@@ -233,7 +241,7 @@

pr_debug("PM: Preparing system for restore.\n");

- if ((error = prepare()))
+ if ((error = prepare(1)))
goto Free;

barrier();
Only in linux-2.6.9-peval-hg-ppc.old/kernel/power: disk.c.rej
diff -ur linux-2.6.9-peval-hg/kernel/power/main.c linux-2.6.9-peval-hg-ppc.old/kernel/power/main.c
--- linux-2.6.9-peval-hg/kernel/power/main.c 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/kernel/power/main.c 2004-11-20 16:17:05.000000000 +0800
@@ -4,7 +4,7 @@
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Open Source Development Lab
*
- * This file is release under the GPLv2
+ * This file is released under the GPLv2
*
*/

diff -ur linux-2.6.9-peval-hg/kernel/power/swsusp.c linux-2.6.9-peval-hg-ppc.old/kernel/power/swsusp.c
--- linux-2.6.9-peval-hg/kernel/power/swsusp.c 2004-11-20 16:04:27.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/kernel/power/swsusp.c 2004-11-20 16:46:14.000000000 +0800
@@ -1138,7 +1138,7 @@
return error;
}

-
+#if defined(__i386__)
asmlinkage int swsusp_restore(void)
{
BUG_ON (pagedir_order_check != pagedir_order);
@@ -1149,6 +1149,7 @@
wbinvd(); /* Nigel says wbinvd here is good idea... */
return 0;
}
+#endif

int swsusp_resume(void)
{
@@ -1453,7 +1454,7 @@
return -ENOMEM;
pagedir_nosave = (struct pbe *)addr;

- pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);
+ pr_debug("swsusp: Reading pagedir (%d Pages)\n",n);

for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
@@ -1483,7 +1484,7 @@
}

/**
- * pmdisk_read - Read saved image from swap.
+ * swsusp_read - Read saved image from swap.
*/

int __init swsusp_read(void)
@@ -1507,6 +1508,6 @@
if (!error)
pr_debug("Reading resume file was successful\n");
else
- pr_debug("pmdisk: Error %d resuming\n", error);
+ pr_debug("swsusp: Error %d resuming\n", error);
return error;
}
diff -ur linux-2.6.9-peval-hg/kernel/power/swsusp.c~ linux-2.6.9-peval-hg-ppc.old/kernel/power/swsusp.c~
--- linux-2.6.9-peval-hg/kernel/power/swsusp.c~ 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/kernel/power/swsusp.c~ 2004-11-20 16:04:27.000000000 +0800
@@ -76,6 +76,7 @@

/* Variables to be preserved over suspend */
static int pagedir_order_check;
+static int nr_copy_pages_check;

extern char resume_file[];
static dev_t resume_device;
@@ -302,6 +303,12 @@
printk( "\b\b\b\b%3d%%", i / mod );
error = write_page((pagedir_nosave+i)->address,
&((pagedir_nosave+i)->swap_address));
+#ifdef PCS_DEBUG
+ pr_debug("data_write: %p %p %u\n",
+ (void *)(pagedir_nosave+i)->address,
+ (void *)(pagedir_nosave+i)->orig_address,
+ (pagedir_nosave+i)->swap_address);
+#endif
}
printk("\b\b\b\bdone\n");
return error;
@@ -504,6 +511,327 @@
return 0;
}

+/**
+ * calc_order - Determine the order of allocation needed for pagedir_save.
+ *
+ * This looks tricky, but is just subtle. Please fix it some time.
+ * Since there are %nr_copy_pages worth of pages in the snapshot, we need
+ * to allocate enough contiguous space to hold
+ * (%nr_copy_pages * sizeof(struct pbe)),
+ * which has the saved/orig locations of the page..
+ *
+ * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
+ * structures, then we call get_bitmask_order(), which will tell us the
+ * last bit set in the number, starting with 1. (If we need 30 pages, that
+ * is 0x0000001e in hex. The last bit is the 5th, which is the order we
+ * would use to allocate 32 contiguous pages).
+ *
+ * Since we also need to save those pages, we add the number of pages that
+ * we need to nr_copy_pages, and in case of an overflow, do the
+ * calculation again to update the number of pages needed.
+ *
+ * With this model, we will tend to waste a lot of memory if we just cross
+ * an order boundary. Plus, the higher the order of allocation that we try
+ * to do, the more likely we are to fail in a low-memory situtation
+ * (though we're unlikely to get this far in such a case, since swsusp
+ * requires half of memory to be free anyway).
+ */
+
+static void calc_order(int *po, int *nr)
+{
+ int diff = 0;
+ int order = 0;
+
+ do {
+ diff = get_bitmask_order(SUSPEND_PD_PAGES(*nr)) - order;
+ if (diff) {
+ order += diff;
+ *nr += 1 << diff;
+ }
+ } while(diff);
+ *po = order;
+}
+
+typedef int (*do_page_t)(struct page *page, void *p);
+
+static int foreach_zone_page(struct zone *zone, do_page_t fun, void *p)
+{
+ int inactive = 0, active = 0;
+
+ /* spin_lock_irq(&zone->lru_lock); */
+ if (zone->nr_inactive) {
+ struct list_head * entry = zone->inactive_list.prev;
+ while (entry != &zone->inactive_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ inactive += fun(page, p);
+ } else {
+ inactive ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ if (zone->nr_active) {
+ struct list_head * entry = zone->active_list.prev;
+ while (entry != &zone->active_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ active += fun(page, p);
+ } else {
+ active ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ /* spin_unlock_irq(&zone->lru_lock); */
+
+ return (active + inactive);
+}
+
+/* I'll move this to include/linux/page-flags.h */
+#define PG_pcs (PG_nosave_free + 1)
+
+#define SetPagePcs(page) set_bit(PG_pcs, &(page)->flags)
+#define ClearPagePcs(page) clear_bit(PG_pcs, &(page)->flags)
+#define PagePcs(page) test_bit(PG_pcs, &(page)->flags)
+
+static int setup_pcs_pe(struct page *page, void *p)
+{
+ suspend_pagedir_t **pe = p;
+ unsigned long pfn = page_to_pfn(page);
+
+ BUG_ON(PageReserved(page) && PageNosave(page));
+ if (!pfn_valid(pfn)) {
+ printk("not valid page\n");
+ return 0;
+ }
+ if (PageNosave(page)) {
+ printk("nosave\n");
+ return 0;
+ }
+ if (PageReserved(page) /*&& pfn_is_nosave(pfn)*/) {
+ printk("[nosave]\n");
+ return 0;
+ }
+ if (PageSlab(page)) {
+ printk("slab\n");
+ return (0);
+ }
+ if (pe && *pe) {
+ BUG_ON(!PagePcs(page));
+ (*pe)->address = (long) page_address(page);
+ (*pe) ++;
+ }
+ SetPagePcs(page);
+
+ return (1);
+}
+
+static int count_pcs(struct zone *zone, suspend_pagedir_t **pe)
+{
+ return foreach_zone_page(zone, setup_pcs_pe, pe);
+}
+
+static suspend_pagedir_t *pagedir_cache = NULL;
+static int nr_copy_pcs = 0;
+static int pcs_order = 0;
+
+static int alloc_pagedir_cache(void)
+{
+ int need_nr_copy_pcs = nr_copy_pcs;
+
+ calc_order(&pcs_order, &need_nr_copy_pcs);
+ pagedir_cache = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, pcs_order);
+ if (!pagedir_cache)
+ return -ENOMEM;
+ memset(pagedir_cache, 0, (1 << pcs_order) * PAGE_SIZE);
+
+ pr_debug("alloc pcs %p, %d\n", pagedir_cache, pcs_order);
+
+ return 0;
+}
+
+static void page_cache_unlock(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_unlock_irq(&zone->lru_lock);
+ }
+ }
+}
+
+static void page_cache_lock(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_lock_irq(&zone->lru_lock);
+ }
+ }
+}
+int bio_read_page(pgoff_t page_off, void * page);
+
+int read_page_caches(void)
+{
+ struct pbe * p;
+ int error = 0, i;
+ swp_entry_t entry;
+ int mod = nr_copy_pcs / 100;
+
+ printk( "Reading PageCaches from swap (%d pages)... ", nr_copy_pcs);
+ for(i = 0, p = pagedir_cache; i < nr_copy_pcs && !error; i++, p++) {
+ if (!(i%100))
+ printk( "\b\b\b\b%3d%%", i / mod );
+ error = bio_read_page(swp_offset(p->swap_address),
+ (void *)p->address);
+#ifdef PCS_DEBUG
+ pr_debug("pcs_read: %p %p %u\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+#endif
+ }
+
+ for (i = 0; i < nr_copy_pcs; i++) {
+ entry = (pagedir_cache + i)->swap_address;
+ if (entry.val)
+ swap_free(entry);
+ }
+ free_pages((unsigned long)pagedir_cache, pcs_order);
+
+ printk("\b\b\b\bdone\n");
+
+ page_cache_unlock();
+
+ return (0);
+}
+
+static int pcs_write(void)
+{
+ int error = 0;
+ int i;
+ int mod = nr_copy_pcs / 100;
+
+ printk( "Writing PageCaches to swap (%d pages)... ", nr_copy_pcs);
+ for (i = 0; i < nr_copy_pcs && !error; i++) {
+ if (!(i%100))
+ printk( "\b\b\b\b%3d%%", i / mod );
+ error = write_page((pagedir_cache+i)->address,
+ &((pagedir_cache+i)->swap_address));
+#ifdef PCS_DEBUG
+ pr_debug("pcs_write: %p %p %u\n",
+ (void *)(pagedir_cache+i)->address,
+ (void *)(pagedir_cache+i)->orig_address,
+ (pagedir_cache+i)->swap_address);
+#endif
+ }
+ printk("\b\b\b\bdone\n");
+
+ return error;
+}
+
+static void count_data_pages(void);
+static int swsusp_alloc(void);
+
+static void page_caches_recal(void)
+{
+ struct zone *zone;
+ int i;
+
+ for (i = 0; i < max_mapnr; i++)
+ ClearPagePcs(mem_map+i);
+
+ nr_copy_pcs = 0;
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ nr_copy_pcs += count_pcs(zone, NULL);
+ }
+ }
+}
+
+int write_page_caches(void)
+{
+ struct zone *zone;
+ suspend_pagedir_t *pe = NULL;
+ int error;
+ int recal = 0;
+
+ page_cache_lock();
+ page_caches_recal();
+
+ if (nr_copy_pcs == 0) {
+ page_cache_unlock();
+ return (0);
+ }
+ printk("swsusp: Need to copy %u pcs\n", nr_copy_pcs);
+
+ if ((error = swsusp_swap_check())) {
+ page_cache_unlock();
+ return error;
+ }
+
+ if ((error = alloc_pagedir_cache())) {
+ page_cache_unlock();
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ while (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
+ if (recal == 0) {
+ page_cache_unlock();
+ }
+ printk("#");
+ shrink_all_memory(nr_copy_pages + PAGES_FOR_IO);
+ recal ++;
+ }
+
+ if (recal) {
+ page_cache_lock();
+ page_caches_recal();
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(2/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ error = swsusp_alloc();
+ if (error) {
+ printk("swsusp_alloc failed, %d\n", error);
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(final): Need to copy %u/%u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pages_check, nr_copy_pcs);
+
+ pe = pagedir_cache;
+
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ count_pcs(zone, &pe);
+ }
+ }
+ error = pcs_write();
+ if (error)
+ return error;
+
+ return (0);
+}

static int pfn_is_nosave(unsigned long pfn)
{
@@ -539,7 +867,10 @@
}
if (PageNosaveFree(page))
return 0;
-
+ if (PagePcs(page)) {
+ BUG_ON(zone->nr_inactive == 0 && zone->nr_active == 0);
+ return 0;
+ }
return 1;
}

@@ -549,10 +880,12 @@
unsigned long zone_pfn;

nr_copy_pages = 0;
+ nr_copy_pcs = 0;

for_each_zone(zone) {
if (is_highmem(zone))
continue;
+ nr_copy_pcs += count_pcs(zone, NULL);
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
@@ -588,47 +921,6 @@
}


-/**
- * calc_order - Determine the order of allocation needed for pagedir_save.
- *
- * This looks tricky, but is just subtle. Please fix it some time.
- * Since there are %nr_copy_pages worth of pages in the snapshot, we need
- * to allocate enough contiguous space to hold
- * (%nr_copy_pages * sizeof(struct pbe)),
- * which has the saved/orig locations of the page..
- *
- * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
- * structures, then we call get_bitmask_order(), which will tell us the
- * last bit set in the number, starting with 1. (If we need 30 pages, that
- * is 0x0000001e in hex. The last bit is the 5th, which is the order we
- * would use to allocate 32 contiguous pages).
- *
- * Since we also need to save those pages, we add the number of pages that
- * we need to nr_copy_pages, and in case of an overflow, do the
- * calculation again to update the number of pages needed.
- *
- * With this model, we will tend to waste a lot of memory if we just cross
- * an order boundary. Plus, the higher the order of allocation that we try
- * to do, the more likely we are to fail in a low-memory situtation
- * (though we're unlikely to get this far in such a case, since swsusp
- * requires half of memory to be free anyway).
- */
-
-
-static void calc_order(void)
-{
- int diff = 0;
- int order = 0;
-
- do {
- diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
- if (diff) {
- order += diff;
- nr_copy_pages += 1 << diff;
- }
- } while(diff);
- pagedir_order = order;
-}


/**
@@ -640,13 +932,15 @@

static int alloc_pagedir(void)
{
- calc_order();
+ calc_order(&pagedir_order, &nr_copy_pages);
pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
pagedir_order);
if (!pagedir_save)
return -ENOMEM;
memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
+
pagedir_nosave = pagedir_save;
+ pr_debug("pagedir %p, %d\n", pagedir_save, pagedir_order);
return 0;
}

@@ -752,15 +1046,16 @@
return -ENOSPC;

if ((error = alloc_pagedir())) {
- pr_debug("suspend: Allocating pagedir failed.\n");
+ printk("suspend: Allocating pagedir failed.\n");
return error;
}
if ((error = alloc_image_pages())) {
- pr_debug("suspend: Allocating image pages failed.\n");
+ printk("suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}

+ nr_copy_pages_check = nr_copy_pages;
pagedir_order_check = pagedir_order;
return 0;
}
@@ -768,7 +1063,6 @@
int suspend_prepare_image(void)
{
unsigned int nr_needed_pages;
- int error;

pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
@@ -777,15 +1071,8 @@
return -ENOMEM;
}

- drain_local_pages();
- count_data_pages();
- printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;

- error = swsusp_alloc();
- if (error)
- return error;
-
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
@@ -855,7 +1142,8 @@
asmlinkage int swsusp_restore(void)
{
BUG_ON (pagedir_order_check != pagedir_order);
-
+ BUG_ON (nr_copy_pages_check != nr_copy_pages);
+
/* Even mappings of "global" things (vmalloc) need to be fixed */
__flush_tlb_global();
wbinvd(); /* Nigel says wbinvd here is good idea... */
@@ -993,7 +1281,7 @@
return 0;
}

-static struct block_device * resume_bdev;
+static struct block_device * resume_bdev __nosavedata;

/**
* submit - submit BIO request.
@@ -1141,6 +1429,11 @@
printk( "\b\b\b\b%3d%%", i / mod );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
+#ifdef PCS_DEBUG
+ pr_debug("data_read: %p %p %u\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+#endif
}
printk(" %d done.\n",i);
return error;
@@ -1207,7 +1500,7 @@
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
- blkdev_put(resume_bdev);
+ /* blkdev_put(resume_bdev); */
} else
error = PTR_ERR(resume_bdev);

Only in linux-2.6.9-peval-hg-ppc.old/kernel/power: swsusp.c.rej
diff -ur linux-2.6.9-peval-hg/kernel/sys.c linux-2.6.9-peval-hg-ppc.old/kernel/sys.c
--- linux-2.6.9-peval-hg/kernel/sys.c 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg-ppc.old/kernel/sys.c 2004-11-20 16:17:05.000000000 +0800
@@ -84,7 +84,7 @@
* and the like.
*/

-static struct notifier_block *reboot_notifier_list;
+struct notifier_block *reboot_notifier_list;
rwlock_t notifier_lock = RW_LOCK_UNLOCKED;

/**

--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-20 10:16:47

by Pavel Machek

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi!

> > > This patch using pagemap for PageSet2 bitmap, It increase suspend
> > > speed, In my PowerPC suspend only need 5 secs, cool.
> > >
> > > Test passed in my ppc and x86 laptop.
> > >
> > > ppc swsusp patch for 2.6.9
> > > http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> > > Have fun.
> >
> > BTW here's my curent bigdiff. It already has some rather nice
> > swsusp speedups. Please try it on your machine; if it works for you,
> > try to send your patches relative to this one. I hope to merge these
> > changes during 2.6.11.
>
> Really big diff, I'll trying.
>
> Here is my diff.
>
> Changes:
> * Change pcs_ to page_cachs_
> * Hold lru_lock to sure data not modified, I can't sure that full
> works, but tested passed.

I'd really like to understand why it works (and have it documented
somewhere).

Good test to break swsusp is run kernel compilation in one window and
suspend every 30 seconds from another one.
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-20 22:36:23

by Pavel Machek

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi!

> > > This patch using pagemap for PageSet2 bitmap, It increase suspend
> > > speed, In my PowerPC suspend only need 5 secs, cool.
> > >
> > > Test passed in my ppc and x86 laptop.
> > >
> > > ppc swsusp patch for 2.6.9
> > > http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> > > Have fun.
> >
> > BTW here's my curent bigdiff. It already has some rather nice
> > swsusp speedups. Please try it on your machine; if it works for you,
> > try to send your patches relative to this one. I hope to merge these
> > changes during 2.6.11.
>
> Here is the patch relative to your big diff. It tested pass with my x86
> pc, But the sysfs interface can't works, I using reboot system call.

Even without CONFIG_PREEMPT, it does NULL pointer dereference in
copy_data_pages.
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-20 22:37:59

by Pavel Machek

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi!

> > > This patch using pagemap for PageSet2 bitmap, It increase suspend
> > > speed, In my PowerPC suspend only need 5 secs, cool.
> > >
> > > Test passed in my ppc and x86 laptop.
> > >
> > > ppc swsusp patch for 2.6.9
> > > http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> > > Have fun.
> >
> > BTW here's my curent bigdiff. It already has some rather nice
> > swsusp speedups. Please try it on your machine; if it works for you,
> > try to send your patches relative to this one. I hope to merge these
> > changes during 2.6.11.
>
> Here is the patch relative to your big diff. It tested pass with my x86
> pc, But the sysfs interface can't works, I using reboot system call.

Try enabling config_preempt and see how it prints about 1000 warnings
and then oopses. (Okay, perhaps oops is because of highmem? I'll
check.)

Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-20 22:37:59

by Pavel Machek

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi!

> > > This patch using pagemap for PageSet2 bitmap, It increase suspend
> > > speed, In my PowerPC suspend only need 5 secs, cool.
> > >
> > > Test passed in my ppc and x86 laptop.
> > >
> > > ppc swsusp patch for 2.6.9
> > > http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> > > Have fun.
> >
> > BTW here's my curent bigdiff. It already has some rather nice
> > swsusp speedups. Please try it on your machine; if it works for you,
> > try to send your patches relative to this one. I hope to merge these
> > changes during 2.6.11.
>
> Here is the patch relative to your big diff. It tested pass with my x86
> pc, But the sysfs interface can't works, I using reboot system call.

Okay, I tried wihtout PREEMPT and HIGHMEM, and it seemed to work
okay. [Well, it was somehow too fast ;-)].
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-20 22:54:40

by Pavel Machek

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi!

> > > This patch using pagemap for PageSet2 bitmap, It increase suspend
> > > speed, In my PowerPC suspend only need 5 secs, cool.
> > >
> > > Test passed in my ppc and x86 laptop.
> > >
> > > ppc swsusp patch for 2.6.9
> > > http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> > > Have fun.
> >
> > BTW here's my curent bigdiff. It already has some rather nice
> > swsusp speedups. Please try it on your machine; if it works for you,
> > try to send your patches relative to this one. I hope to merge these
> > changes during 2.6.11.
> >
>
> Here is the patch relative to your big diff. It tested pass with my x86
> pc, But the sysfs interface can't works, I using reboot system call.

Without PREEMPT and HIGHMEM it worked okay on an idle system. When I
started kernel compilation while trying to swsusp, it crashed on
resume.
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-21 07:54:51

by Hu Gang

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

On Sat, Nov 20, 2004 at 11:49:37PM +0100, Pavel Machek wrote:
> Hi!
>
> > > > This patch using pagemap for PageSet2 bitmap, It increase suspend
> > > > speed, In my PowerPC suspend only need 5 secs, cool.
> > > >
> > > > Test passed in my ppc and x86 laptop.
> > > >
> > > > ppc swsusp patch for 2.6.9
> > > > http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> > > > Have fun.
> > >
> > > BTW here's my curent bigdiff. It already has some rather nice
> > > swsusp speedups. Please try it on your machine; if it works for you,
> > > try to send your patches relative to this one. I hope to merge these
> > > changes during 2.6.11.
> > >
> >
> > Here is the patch relative to your big diff. It tested pass with my x86
> > pc, But the sysfs interface can't works, I using reboot system call.
>
> Without PREEMPT and HIGHMEM it worked okay on an idle system. When I
> started kernel compilation while trying to swsusp, it crashed on
> resume.
> Pavel

Good, Not only works for myself. Here is the update patch relative to
your diff, Now We not need continuous page to save pagecache pagedir.
have a look, please.

I don't wannt this patch can merge into mainline kernel, just have a
look thanks.

TODO:
* I have to be sure the pagecache are not modified after saved to swap
device until snaphot memory finished, This is the problem that why only
system idle can worked, But how can i sure that? :)
* Adding comments in source code, :)
* Clean ppc part then send to Ben.

diff -ur linux-2.6.9-peval/kernel/power/disk.c linux-2.6.9-peval-hg/kernel/power/disk.c
--- linux-2.6.9-peval/kernel/power/disk.c 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg/kernel/power/disk.c 2004-11-20 14:51:21.000000000 +0800
@@ -29,6 +29,8 @@
extern int swsusp_resume(void);
extern int swsusp_free(void);

+extern int write_page_caches(void);
+extern int read_page_caches(void);

static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -106,6 +108,7 @@
}
}

+
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -118,13 +121,14 @@
{
device_resume();
platform_finish();
+ read_page_caches();
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
}


-static int prepare(void)
+static int prepare(int resume)
{
int error;

@@ -144,9 +148,13 @@
}

/* Free memory before shutting down devices. */
- free_some_memory();
+ /* free_some_memory(); */

disable_nonboot_cpus();
+ if (!resume)
+ if ((error = write_page_caches())) {
+ goto Finish;
+ }
if ((error = device_suspend(PMSG_FREEZE))) {
printk("Some devices failed to suspend\n");
goto Finish;
@@ -176,7 +184,7 @@
{
int error;

- if ((error = prepare()))
+ if ((error = prepare(0)))
return error;

pr_debug("PM: Attempting to suspend to disk.\n");
@@ -233,7 +241,7 @@

pr_debug("PM: Preparing system for restore.\n");

- if ((error = prepare()))
+ if ((error = prepare(1)))
goto Free;

barrier();
diff -ur linux-2.6.9-peval/kernel/power/swsusp.c linux-2.6.9-peval-hg/kernel/power/swsusp.c
--- linux-2.6.9-peval/kernel/power/swsusp.c 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg/kernel/power/swsusp.c 2004-11-20 23:52:26.000000000 +0800
@@ -76,6 +76,7 @@

/* Variables to be preserved over suspend */
static int pagedir_order_check;
+static int nr_copy_pages_check;

extern char resume_file[];
static dev_t resume_device;
@@ -302,6 +303,12 @@
printk( "\b\b\b\b%3d%%", i / mod );
error = write_page((pagedir_nosave+i)->address,
&((pagedir_nosave+i)->swap_address));
+#ifdef PCS_DEBUG
+ pr_debug("data_write: %p %p %u\n",
+ (void *)(pagedir_nosave+i)->address,
+ (void *)(pagedir_nosave+i)->orig_address,
+ (pagedir_nosave+i)->swap_address);
+#endif
}
printk("\b\b\b\bdone\n");
return error;
@@ -504,6 +511,452 @@
return 0;
}

+typedef int (*do_page_t)(struct page *page, int p);
+
+static int foreach_zone_page(struct zone *zone, do_page_t fun, int p)
+{
+ int inactive = 0, active = 0;
+
+ /* spin_lock_irq(&zone->lru_lock); */
+ if (zone->nr_inactive) {
+ struct list_head * entry = zone->inactive_list.prev;
+ while (entry != &zone->inactive_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ inactive += fun(page, p);
+ } else {
+ inactive ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ if (zone->nr_active) {
+ struct list_head * entry = zone->active_list.prev;
+ while (entry != &zone->active_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ active += fun(page, p);
+ } else {
+ active ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ /* spin_unlock_irq(&zone->lru_lock); */
+
+ return (active + inactive);
+}
+
+/* I'll move this to include/linux/page-flags.h */
+#define PG_pcs (PG_nosave_free + 1)
+
+#define SetPagePcs(page) set_bit(PG_pcs, &(page)->flags)
+#define ClearPagePcs(page) clear_bit(PG_pcs, &(page)->flags)
+#define PagePcs(page) test_bit(PG_pcs, &(page)->flags)
+
+/* #define PCS_DEBUG */
+
+static struct pbe *find_pbe_by_index(int index);
+
+static int nr_copy_pcs = 0;
+
+static int setup_pcs_pe(struct page *page, int setup)
+{
+ unsigned long pfn = page_to_pfn(page);
+
+ BUG_ON(PageReserved(page) && PageNosave(page));
+ if (!pfn_valid(pfn)) {
+ printk("not valid page\n");
+ return 0;
+ }
+ if (PageNosave(page)) {
+ printk("nosave\n");
+ return 0;
+ }
+ if (PageReserved(page) /*&& pfn_is_nosave(pfn)*/) {
+ printk("[nosave]\n");
+ return 0;
+ }
+ if (PageSlab(page)) {
+ printk("slab\n");
+ return (0);
+ }
+ if (setup) {
+ struct pbe *p = find_pbe_by_index(nr_copy_pcs);
+ p->address = (long)page_address(page);
+#ifdef PCS_DEBUG
+ printk("setup_pcs: cur %p, addr %p, next %p, nr%d\n",
+ p, p->address, p->orig_address, nr_copy_pcs);
+#endif
+ nr_copy_pcs ++;
+ }
+ SetPagePcs(page);
+
+ return (1);
+}
+
+static int count_pcs(struct zone *zone, int p)
+{
+ return foreach_zone_page(zone, setup_pcs_pe, p);
+}
+
+static suspend_pagedir_t *pagedir_cache = NULL;
+
+/*
+ * redefine in PageCahe pagdir.
+ *
+ * struct pbe {
+ * unsigned long address;
+ * unsigned long orig_address; pointer of next struct pbe
+ * swp_entry_t swap_address;
+ * swp_entry_t dummy; current index
+ * }
+ *
+ */
+static suspend_pagedir_t * alloc_one_pagedir(suspend_pagedir_t *prev,
+ unsigned int nums)
+{
+ suspend_pagedir_t *pgdir;
+ int i;
+
+ pgdir = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!pgdir) {
+ return NULL;
+ }
+#ifdef PCS_DEBUG
+ printk("pgdir: %p, %p, %d\n", pgdir, prev, sizeof(suspend_pagedir_t));
+#endif
+ memset(pgdir, 0, PAGE_SIZE);
+ for (i = 0; i < nums; i++) {
+ pgdir[i].dummy.val = i;
+ pgdir[i].orig_address = (unsigned long)NULL;
+ if (prev == NULL) continue;
+ prev[i].orig_address = (unsigned long)pgdir;
+ }
+
+ return (pgdir);
+}
+
+/* for each pagdir */
+typedef int (*susp_pgdir_t)(suspend_pagedir_t *cur, void *fun, void *arg);
+
+static int for_each_pgdir(susp_pgdir_t fun, void *subfun, void *arg)
+{
+ suspend_pagedir_t *pgdir = pagedir_cache;
+ int error = 0;
+
+ while (pgdir != NULL) {
+ suspend_pagedir_t *next = (suspend_pagedir_t *)pgdir->orig_address;
+#ifdef PCS_DEBUG
+ printk("next %p, cur %p\n", next, pgdir);
+#endif
+ error = fun(pgdir, subfun, arg);
+ if (error) return error;
+ pgdir = next;
+ }
+
+ return (0);
+}
+
+/* free one pagedir */
+static int free_one_pagedir(suspend_pagedir_t *pgdir, void *fun, void *arg)
+{
+ free_page((unsigned long)pgdir);
+ return (0);
+}
+
+typedef int (*swsup_pbe_t)(struct pbe *bpe, void *p);
+
+static int for_pbe_one_pgdir(suspend_pagedir_t *pgdir, void *_fun, void *arg)
+{
+ unsigned int num_pbes = PAGE_SIZE / sizeof(suspend_pagedir_t) - 1, nums;
+ swsup_pbe_t fun = _fun;
+ int error = 0;
+
+#ifdef PCS_DEBUG
+ printk("for_pbe_one_pgdir: %p, %p, %p\n", pgdir, _fun, arg);
+#endif
+ for (nums = 0; nums < num_pbes; nums++) {
+ error = fun(pgdir, arg);
+ pgdir ++;
+ if (error) return error;
+ }
+
+ return (0);
+}
+
+static int for_each_pbe(swsup_pbe_t fun, void *p)
+{
+ return for_each_pgdir(for_pbe_one_pgdir, fun, p);
+}
+
+static struct pbe *find_pbe_by_index(int index)
+{
+ unsigned int num_pbes = PAGE_SIZE / sizeof(suspend_pagedir_t) - 1,
+ nums = num_pbes;
+ suspend_pagedir_t *pgdir = pagedir_cache, *next;
+
+ while (pgdir != NULL) {
+ if (index < nums)
+ return pgdir + (index % num_pbes);
+ next = (suspend_pagedir_t *)pgdir->orig_address;
+ nums += num_pbes;
+ pgdir = next;
+ }
+
+ return (NULL);
+}
+
+static int alloc_pagedir_cache(void)
+{
+ unsigned int num_pbes = PAGE_SIZE / sizeof(suspend_pagedir_t) - 1, nums = 0;
+ suspend_pagedir_t *prev, *cur = NULL;
+
+ /* be sure suspend_pagedir_t can safed put into one page */
+ BUG_ON(PAGE_SIZE % sizeof(suspend_pagedir_t));
+
+ /* alloc pagedir head */
+ pagedir_cache = alloc_one_pagedir(NULL, num_pbes);
+ if (!pagedir_cache) {
+ return -ENOMEM;
+ }
+ prev = pagedir_cache;
+
+ for (nums = num_pbes; nums < nr_copy_pcs; nums += num_pbes) {
+ cur = alloc_one_pagedir(prev, num_pbes);
+ if (!cur) {
+ goto no_mem;
+ }
+ prev = cur;
+ }
+
+ pr_debug("swsusp: nums %d, nums_pbes %d \n", nums, num_pbes);
+ return 0;
+
+no_mem:
+ printk("swsusp: alloc_pages failed, %d\n", nr_copy_pcs);
+ for_each_pgdir(free_one_pagedir, NULL, NULL);
+
+ return (-ENOMEM);
+}
+
+/* pagedir lock and unlock function */
+static void page_cache_unlock(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_unlock_irq(&zone->lru_lock);
+ }
+ }
+}
+
+static void page_cache_lock(void)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_lock_irq(&zone->lru_lock);
+ }
+ }
+}
+
+int bio_read_page(pgoff_t page_off, void * page);
+
+static int mod = 1;
+
+static int pagecache_read_pbe(struct pbe *p, void *tmp)
+{
+ int error = 0, i = *(int*)tmp;
+ swp_entry_t entry;
+
+ if (!(i%100))
+ printk( "\b\b\b\b%3d%%", i / mod );
+
+ if (i == nr_copy_pcs) return -1;
+
+ (*(int*)tmp) ++;
+#ifdef PCS_DEBUG
+ printk("pagecache_read_pbe: %p %p %u\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+#endif
+
+ error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+ if (error) return error;
+
+ entry = p->swap_address;
+ if (entry.val)
+ swap_free(entry);
+
+ return (0);
+}
+
+int read_page_caches(void)
+{
+ int error = 0, i = 0;
+
+ mod = nr_copy_pcs / 100;
+
+ printk( "Reading PageCaches from swap (%d pages)... ", nr_copy_pcs);
+ error = for_each_pbe(pagecache_read_pbe, &i);
+ printk("\b\b\b\bdone\n");
+
+ for_each_pgdir(free_one_pagedir, NULL, NULL);
+
+ page_cache_unlock();
+
+ if (i == nr_copy_pcs) return (0);
+
+ return error;
+}
+
+static int pagecache_write_pbe(struct pbe *p, void *tmp)
+{
+ int error = 0, i = *(int*)tmp;
+
+ if (!(i%100))
+ printk( "\b\b\b\b%3d%%", i / mod );
+
+ if (i == nr_copy_pcs) return -1;
+
+ (*(int*)tmp) ++;
+ error = write_page(p->address, &p->swap_address);
+ if (error) return error;
+
+#ifdef PCS_DEBUG
+ printk("pagecache_write_pbe: %p, %p %p %u\n",
+ p, (void *)p->address, (void *)p->orig_address, p->swap_address);
+#endif
+
+ return (0);
+}
+
+static int pcs_write(void)
+{
+ int i = 0, error;
+
+ page_cache_lock();
+
+ mod = nr_copy_pcs / 100;
+
+ printk( "Writing PageCaches to swap (%d pages)... ", nr_copy_pcs);
+ error = for_each_pbe(pagecache_write_pbe, &i);
+ printk("\b\b\b\bdone\n");
+
+ if (i == nr_copy_pcs) return (0);
+
+ return error;
+}
+
+static int setup_pagedir_pbe(void)
+{
+ struct zone *zone;
+
+ nr_copy_pcs = 0;
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ count_pcs(zone, 1);
+ }
+ }
+
+ return (0);
+}
+
+static void count_data_pages(void);
+static int swsusp_alloc(void);
+
+static void page_caches_recal(void)
+{
+ struct zone *zone;
+ int i;
+
+ for (i = 0; i < max_mapnr; i++)
+ ClearPagePcs(mem_map+i);
+
+ nr_copy_pcs = 0;
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ nr_copy_pcs += count_pcs(zone, 0);
+ }
+ }
+}
+
+int write_page_caches(void)
+{
+ int error;
+ int recal = 0;
+
+ page_cache_lock();
+ page_caches_recal();
+
+ if (nr_copy_pcs == 0) {
+ page_cache_unlock();
+ return (0);
+ }
+ printk("swsusp: Need to copy %u pcs\n", nr_copy_pcs);
+
+ if ((error = swsusp_swap_check())) {
+ page_cache_unlock();
+ return error;
+ }
+
+ if ((error = alloc_pagedir_cache())) {
+ page_cache_unlock();
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ while (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
+ if (recal == 0) {
+ page_cache_unlock();
+ }
+ printk("#");
+ shrink_all_memory(nr_copy_pages + PAGES_FOR_IO);
+ recal ++;
+ }
+
+ if (recal) {
+ page_cache_lock();
+ page_caches_recal();
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(2/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ error = swsusp_alloc();
+ if (error) {
+ printk("swsusp_alloc failed, %d\n", error);
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(final): Need to copy %u/%u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pages_check, nr_copy_pcs);
+
+ setup_pagedir_pbe();
+
+ error = pcs_write();
+ if (error)
+ return error;
+
+ return (0);
+}

static int pfn_is_nosave(unsigned long pfn)
{
@@ -539,7 +992,10 @@
}
if (PageNosaveFree(page))
return 0;
-
+ if (PagePcs(page)) {
+ BUG_ON(zone->nr_inactive == 0 && zone->nr_active == 0);
+ return 0;
+ }
return 1;
}

@@ -549,10 +1005,12 @@
unsigned long zone_pfn;

nr_copy_pages = 0;
+ nr_copy_pcs = 0;

for_each_zone(zone) {
if (is_highmem(zone))
continue;
+ nr_copy_pcs += count_pcs(zone, 0);
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
@@ -646,7 +1104,9 @@
if (!pagedir_save)
return -ENOMEM;
memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
+
pagedir_nosave = pagedir_save;
+ pr_debug("pagedir %p, %d\n", pagedir_save, pagedir_order);
return 0;
}

@@ -752,15 +1212,16 @@
return -ENOSPC;

if ((error = alloc_pagedir())) {
- pr_debug("suspend: Allocating pagedir failed.\n");
+ printk("suspend: Allocating pagedir failed.\n");
return error;
}
if ((error = alloc_image_pages())) {
- pr_debug("suspend: Allocating image pages failed.\n");
+ printk("suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}

+ nr_copy_pages_check = nr_copy_pages;
pagedir_order_check = pagedir_order;
return 0;
}
@@ -768,7 +1229,6 @@
int suspend_prepare_image(void)
{
unsigned int nr_needed_pages;
- int error;

pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
@@ -777,15 +1237,8 @@
return -ENOMEM;
}

- drain_local_pages();
- count_data_pages();
- printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;

- error = swsusp_alloc();
- if (error)
- return error;
-
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
@@ -855,10 +1308,12 @@
asmlinkage int swsusp_restore(void)
{
BUG_ON (pagedir_order_check != pagedir_order);
-
+ BUG_ON (nr_copy_pages_check != nr_copy_pages);
+#if defined(__i386__)
/* Even mappings of "global" things (vmalloc) need to be fixed */
__flush_tlb_global();
wbinvd(); /* Nigel says wbinvd here is good idea... */
+#endif
return 0;
}

@@ -993,7 +1448,7 @@
return 0;
}

-static struct block_device * resume_bdev;
+static struct block_device * resume_bdev __nosavedata;

/**
* submit - submit BIO request.
@@ -1141,6 +1596,11 @@
printk( "\b\b\b\b%3d%%", i / mod );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
+#ifdef PCS_DEBUG
+ pr_debug("data_read: %p %p %u\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+#endif
}
printk(" %d done.\n",i);
return error;
@@ -1207,7 +1667,7 @@
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
- blkdev_put(resume_bdev);
+ /* blkdev_put(resume_bdev); */
} else
error = PTR_ERR(resume_bdev);

--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-22 07:56:40

by Hu Gang

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

On Sat, Nov 20, 2004 at 11:49:37PM +0100, Pavel Machek wrote:
> Hi!
>
> > > > This patch using pagemap for PageSet2 bitmap, It increase suspend
> > > > speed, In my PowerPC suspend only need 5 secs, cool.
> > > >
> > > > Test passed in my ppc and x86 laptop.
> > > >
> > > > ppc swsusp patch for 2.6.9
> > > > http://honk.physik.uni-konstanz.de/~agx/linux-ppc/kernel/
> > > > Have fun.
> > >
> > > BTW here's my curent bigdiff. It already has some rather nice
> > > swsusp speedups. Please try it on your machine; if it works for you,
> > > try to send your patches relative to this one. I hope to merge these
> > > changes during 2.6.11.
> > >
> >
> > Here is the patch relative to your big diff. It tested pass with my x86
> > pc, But the sysfs interface can't works, I using reboot system call.
>
> Without PREEMPT and HIGHMEM it worked okay on an idle system. When I
> started kernel compilation while trying to swsusp, it crashed on
> resume.
> Pavel

Here is my big diff relative to your big diff. :), It works.

- Not need continuous page for pagedir.
Swsusp using continuous page (pagedir), to save the new address, old
address and swap offset, but in current implemention, it using
continuous page as array, so if has so many pages to save, we have to
allocate many (>5) continuous pages, most it it will failed.

I using a easy link struct to resolve it.

a powerpc version will come soon.

Pavel, Have a look, and comment, thanks.

diff -ur linux-2.6.9-peval/arch/i386/power/swsusp.S linux-2.6.9-peval-hg/arch/i386/power/swsusp.S
--- linux-2.6.9-peval/arch/i386/power/swsusp.S 2004-10-20 15:58:34.000000000 +0800
+++ linux-2.6.9-peval-hg/arch/i386/power/swsusp.S 2004-11-22 15:05:06.000000000 +0800
@@ -31,25 +31,59 @@
movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
movl %ecx,%cr3

- movl pagedir_nosave, %ebx
- xorl %eax, %eax
- xorl %edx, %edx
+ mov pagedir_nosave, %edx
+ test %edx, %edx
+ mov %edx, swsusp_pbe_pgdir
+ je copy_loop_end
+
+copy_loop_start:
+ mov swsusp_pbe_pgdir, %edx
+ mov 0xc(%edx), %eax
+ mov %eax, swsusp_pbe_next
+ xor %eax, %eax
+ mov %eax, swsusp_pbe_nums
+
+ lea 0x0(%esi,1), %esi
+ lea 0x0(%edi,1), %edi
+ mov 0x4(%edx),%eax
+ test %eax, %eax
+ je copy_loop_end
.p2align 4,,7

-copy_loop:
- movl 4(%ebx,%edx),%edi
- movl (%ebx,%edx),%esi
-
- movl $1024, %ecx
- rep
- movsl
-
- incl %eax
- addl $16, %edx
- cmpl nr_copy_pages,%eax
- jb copy_loop
+copy_one_pgdir:
+ xor %ecx, %ecx
+ lea 0x0(%esi,1), %esi
.p2align 4,,7

+copy_one_page:
+ mov 0x4(%edx), %eax
+ mov (%edx), %edx
+ mov (%edx,%ecx,4), %edx
+ mov %edx,(%eax,%ecx,4)
+ inc %ecx
+ cmp $0x3ff, %ecx
+ ja copy_one_pgdir_end
+ mov swsusp_pbe_pgdir, %edx
+ jmp copy_one_page
+ .p2align 4,,7
+
+copy_one_pgdir_end:
+ mov swsusp_pbe_nums, %eax
+ mov swsusp_pbe_pgdir, %edx
+ inc %eax
+ mov %eax, swsusp_pbe_nums
+
+ add $0x10, %edx
+ cmp $0xfe, %eax
+ mov %edx, swsusp_pbe_pgdir
+
+ jbe copy_one_pgdir
+ mov swsusp_pbe_next, %eax
+ test %eax, %eax
+ mov %eax, swsusp_pbe_pgdir
+ jne copy_loop_start
+copy_loop_end:
+
movl saved_context_esp, %esp
movl saved_context_ebp, %ebp
movl saved_context_ebx, %ebx
Only in linux-2.6.9-peval-hg/arch/i386/power: .swsusp.S.swp
Only in linux-2.6.9-peval-hg: b-i386
Only in linux-2.6.9-peval-hg: b-ppc
diff -ur linux-2.6.9-peval/kernel/power/disk.c linux-2.6.9-peval-hg/kernel/power/disk.c
--- linux-2.6.9-peval/kernel/power/disk.c 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg/kernel/power/disk.c 2004-11-20 14:51:21.000000000 +0800
@@ -29,6 +29,8 @@
extern int swsusp_resume(void);
extern int swsusp_free(void);

+extern int write_page_caches(void);
+extern int read_page_caches(void);

static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -106,6 +108,7 @@
}
}

+
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -118,13 +121,14 @@
{
device_resume();
platform_finish();
+ read_page_caches();
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
}


-static int prepare(void)
+static int prepare(int resume)
{
int error;

@@ -144,9 +148,13 @@
}

/* Free memory before shutting down devices. */
- free_some_memory();
+ /* free_some_memory(); */

disable_nonboot_cpus();
+ if (!resume)
+ if ((error = write_page_caches())) {
+ goto Finish;
+ }
if ((error = device_suspend(PMSG_FREEZE))) {
printk("Some devices failed to suspend\n");
goto Finish;
@@ -176,7 +184,7 @@
{
int error;

- if ((error = prepare()))
+ if ((error = prepare(0)))
return error;

pr_debug("PM: Attempting to suspend to disk.\n");
@@ -233,7 +241,7 @@

pr_debug("PM: Preparing system for restore.\n");

- if ((error = prepare()))
+ if ((error = prepare(1)))
goto Free;

barrier();
diff -ur linux-2.6.9-peval/kernel/power/swsusp.c linux-2.6.9-peval-hg/kernel/power/swsusp.c
--- linux-2.6.9-peval/kernel/power/swsusp.c 2004-11-20 14:14:45.000000000 +0800
+++ linux-2.6.9-peval-hg/kernel/power/swsusp.c 2004-11-22 15:11:18.000000000 +0800
@@ -75,7 +75,7 @@
extern char __nosave_begin, __nosave_end;

/* Variables to be preserved over suspend */
-static int pagedir_order_check;
+static int nr_copy_pages_check;

extern char resume_file[];
static dev_t resume_device;
@@ -97,7 +97,6 @@
*/
suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
static suspend_pagedir_t *pagedir_save;
-static int pagedir_order __nosavedata = 0;

#define SWSUSP_SIG "S1SUSPEND"

@@ -223,7 +222,110 @@
swap_list_unlock();
}

+#define ONE_PAGE_PBE_NUM ( PAGE_SIZE / sizeof(struct pbe) - 1)

+/* for each pagdir */
+typedef int (*susp_pgdir_t)(suspend_pagedir_t *cur, void *fun, void *arg);
+
+static int inline for_each_pgdir(struct pbe *pbe, susp_pgdir_t fun,
+ void *subfun, void *arg)
+{
+ suspend_pagedir_t *pgdir = pbe;
+ int error = 0;
+
+ while (pgdir != NULL) {
+ suspend_pagedir_t *next = (suspend_pagedir_t *)pgdir->dummy.val;
+ pr_debug("next %p, cur %p\n", next, pgdir);
+ error = fun(pgdir, subfun, arg);
+ if (error) return error;
+ pgdir = next;
+ }
+
+ return (0);
+}
+
+/* free one pagedir */
+static int free_one_pagedir(suspend_pagedir_t *pgdir, void *fun, void *arg)
+{
+ free_page((unsigned long)pgdir);
+ return (0);
+}
+
+typedef int (*swsup_pbe_t)(struct pbe *bpe, void *p);
+
+static int for_pbe_one_pgdir(suspend_pagedir_t *pgdir, void *_fun,
+ void *arg)
+{
+ unsigned int nums;
+ swsup_pbe_t fun = _fun;
+ int error = 0;
+
+ pr_debug("for_pbe_one_pgdir: %p, %p, %p\n", pgdir, _fun, arg);
+ for (nums = 0; nums < ONE_PAGE_PBE_NUM; nums++) {
+ error = fun(pgdir, arg);
+ pgdir ++;
+ if (error) return error;
+ }
+
+ return (0);
+}
+
+static int for_each_pbe(struct pbe *pbe, swsup_pbe_t fun, void *p)
+{
+ return for_each_pgdir(pbe, for_pbe_one_pgdir, fun, p);
+}
+
+unsigned long swsusp_pbe_nums __nosavedata;
+suspend_pagedir_t *swsusp_pbe_pgdir __nosavedata, *swsusp_pbe_next __nosavedata;
+
+/*
+ * for_each_pbe_copy_back
+ *
+ * That usefuly for writing the code in assemble code.
+ *
+ */
+/* #define CREATE_ASM_CODE */
+#ifdef CREATE_ASM_CODE
+asmlinkage void inline for_each_pbe_copy_back(void)
+{
+ swsusp_pbe_pgdir = pagedir_nosave;
+ while (swsusp_pbe_pgdir != NULL) {
+ swsusp_pbe_next = (suspend_pagedir_t *)swsusp_pbe_pgdir->dummy.val;
+ for (swsusp_pbe_nums = 0;
+ swsusp_pbe_nums < ONE_PAGE_PBE_NUM;
+ swsusp_pbe_nums++) {
+ register unsigned long i;
+ if (swsusp_pbe_pgdir->orig_address == 0) return;
+ for (i = 0; i < PAGE_SIZE / sizeof(unsigned long); i++) {
+ *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i)) =
+ *(((unsigned long *)(swsusp_pbe_pgdir->address) + i));
+ }
+ swsusp_pbe_pgdir ++;
+ }
+ swsusp_pbe_pgdir = swsusp_pbe_next;
+ }
+}
+#endif
+
+static struct pbe *find_pbe_by_index(int index, struct pbe *pgdir)
+{
+ unsigned int nums = ONE_PAGE_PBE_NUM;
+ suspend_pagedir_t *next, *ret = NULL;
+
+ pr_debug("find_pbe_by_index %d, %p\n", index, pgdir);
+ while (pgdir != NULL) {
+ if (index < nums) {
+ ret = pgdir + (index % ONE_PAGE_PBE_NUM);
+ break;
+ }
+ next = (suspend_pagedir_t *)pgdir->dummy.val;
+ nums += ONE_PAGE_PBE_NUM;
+ pgdir = next;
+ }
+ pr_debug("find_pbe index %d -> %p\n", index, ret);
+
+ return (ret);
+}

/**
* write_swap_page - Write one page to a fresh swap location.
@@ -257,6 +359,20 @@
return error;
}

+static int data_free_pbe(struct pbe *p, void *tmp)
+{
+ swp_entry_t entry;
+
+ if (swp_offset(p->swap_address)== 0) return -1;
+
+ (*(int*)tmp) ++;
+ entry = p->swap_address;
+ if (entry.val)
+ swap_free(entry);
+ p->swap_address = (swp_entry_t){0};
+
+ return (0);
+}

/**
* data_free - Free the swap entries used by the saved image.
@@ -267,43 +383,56 @@

static void data_free(void)
{
- swp_entry_t entry;
- int i;
+ int i = 0;
+ for_each_pbe(pagedir_nosave, data_free_pbe, &i);
+ BUG_ON( i != nr_copy_pages);
+}

- for (i = 0; i < nr_copy_pages; i++) {
- entry = (pagedir_nosave + i)->swap_address;
- if (entry.val)
- swap_free(entry);
- else
- break;
- (pagedir_nosave + i)->swap_address = (swp_entry_t){0};
- }
+static int mod_progress = 1;
+
+static void inline mod_printk_progress(int i)
+{
+ if (mod_progress == 0) mod_progress = 1;
+ if (!(i%100))
+ printk( "\b\b\b\b%3d%%", i / mod_progress );
}

+static int write_one_pbe(struct pbe *p, void *tmp)
+{
+ int error = 0, i = *(int*)tmp;
+
+ mod_printk_progress(i);
+
+ pr_debug("write_one_pbe: %p, %p %p ",
+ p, (void *)p->address, (void *)p->orig_address);
+ if (p->orig_address == 0) return -1;
+
+ (*(int*)tmp) ++;
+ error = write_page(p->address, &p->swap_address);
+ if (error) return error;
+
+ pr_debug("%lu\n", swp_offset(p->swap_address));
+
+ return (0);
+}

/**
* data_write - Write saved image to swap.
*
* Walk the list of pages in the image and sync each one to swap.
*/
-
static int data_write(void)
{
- int error = 0;
- int i;
- unsigned int mod = nr_copy_pages / 100;
-
- if (!mod)
- mod = 1;
+ int i = 0, error;
+
+ mod_progress = nr_copy_pages / 100;

- printk( "Writing data to swap (%d pages)... ", nr_copy_pages );
- for (i = 0; i < nr_copy_pages && !error; i++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
- error = write_page((pagedir_nosave+i)->address,
- &((pagedir_nosave+i)->swap_address));
- }
+ printk( "Writing data to swap (%d pages)... ", nr_copy_pages);
+ error = for_each_pbe(pagedir_nosave, write_one_pbe, &i);
printk("\b\b\b\bdone\n");
+
+ if (i == nr_copy_pages) return (0);
+
return error;
}

@@ -363,6 +492,15 @@
swap_free(swsusp_info.pagedir[i]);
}

+static int write_one_pagedir(suspend_pagedir_t *pgdir,
+ void *fun, void *arg)
+{
+ int i = *(int *)arg;
+
+ (*(int *)arg) ++;
+
+ return write_page((unsigned long)pgdir, &swsusp_info.pagedir[i]);
+}

/**
* write_pagedir - Write the array of pages holding the page directory.
@@ -371,15 +509,12 @@

static int write_pagedir(void)
{
- unsigned long addr = (unsigned long)pagedir_nosave;
- int error = 0;
- int n = SUSPEND_PD_PAGES(nr_copy_pages);
- int i;
+ int error = 0, n = 0;

- swsusp_info.pagedir_pages = n;
+ error = for_each_pgdir(pagedir_nosave, write_one_pagedir, NULL, &n);
printk( "Writing pagedir (%d pages)\n", n);
- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE)
- error = write_page(addr, &swsusp_info.pagedir[i]);
+ swsusp_info.pagedir_pages = n;
+
return error;
}

@@ -504,6 +639,355 @@
return 0;
}

+typedef int (*do_page_t)(struct page *page, int p);
+
+static int foreach_zone_page(struct zone *zone, do_page_t fun, int p)
+{
+ int inactive = 0, active = 0;
+
+ spin_lock_irq(&zone->lru_lock);
+ if (zone->nr_inactive) {
+ struct list_head * entry = zone->inactive_list.prev;
+ while (entry != &zone->inactive_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ inactive += fun(page, p);
+ } else {
+ inactive ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ if (zone->nr_active) {
+ struct list_head * entry = zone->active_list.prev;
+ while (entry != &zone->active_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ active += fun(page, p);
+ } else {
+ active ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ spin_unlock_irq(&zone->lru_lock);
+
+ return (active + inactive);
+}
+
+/* I'll move this to include/linux/page-flags.h */
+#define PG_pcs (PG_nosave_free + 1)
+
+#define SetPagePcs(page) set_bit(PG_pcs, &(page)->flags)
+#define ClearPagePcs(page) clear_bit(PG_pcs, &(page)->flags)
+#define PagePcs(page) test_bit(PG_pcs, &(page)->flags)
+
+static suspend_pagedir_t *pagedir_cache = NULL;
+static int nr_copy_pcs = 0;
+
+static int setup_pcs_pe(struct page *page, int setup)
+{
+ unsigned long pfn = page_to_pfn(page);
+
+ BUG_ON(PageReserved(page) && PageNosave(page));
+ if (!pfn_valid(pfn)) {
+ printk("not valid page\n");
+ return 0;
+ }
+ if (PageNosave(page)) {
+ printk("nosave\n");
+ return 0;
+ }
+ if (PageReserved(page) /*&& pfn_is_nosave(pfn)*/) {
+ printk("[nosave]\n");
+ return 0;
+ }
+ if (PageSlab(page)) {
+ printk("slab\n");
+ return (0);
+ }
+ if (setup) {
+ struct pbe *p = find_pbe_by_index(nr_copy_pcs, pagedir_cache);
+ p->address = (long)page_address(page);
+ pr_debug("setup_pcs: cur %p, addr %p, next %p, nr %u\n",
+ (void*)p, (void*)p->address,
+ (void*)p->orig_address, nr_copy_pcs);
+ nr_copy_pcs ++;
+ }
+ SetPagePcs(page);
+
+ return (1);
+}
+
+static int count_pcs(struct zone *zone, int p)
+{
+ return foreach_zone_page(zone, setup_pcs_pe, p);
+}
+
+static int check_pbe_addr(struct pbe *p, void *addr)
+{
+ unsigned long addre = (unsigned long)addr + PAGE_SIZE;
+
+ if (p->orig_address == (unsigned long)0) return 0;
+ if (p->orig_address >= (unsigned long)addr && p->orig_address < addre)
+ return 1;
+ return 0;
+}
+
+
+/*
+ * redefine in PageCahe pagdir.
+ *
+ * struct pbe {
+ * unsigned long address;
+ * unsigned long orig_address; pointer of next struct pbe
+ * swp_entry_t swap_address;
+ * swp_entry_t dummy; current index
+ * }
+ *
+ */
+static suspend_pagedir_t * alloc_one_pagedir(suspend_pagedir_t *prev,
+ suspend_pagedir_t *collide)
+{
+ suspend_pagedir_t *pgdir = NULL;
+ int i;
+
+ pgdir = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!pgdir) {
+ return NULL;
+ }
+
+ if (collide) {
+ while (for_each_pbe((struct pbe *)collide, check_pbe_addr, pgdir)) {
+ free_page((unsigned long)pgdir);
+ pgdir = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!pgdir) {
+ return NULL;
+ }
+ }
+ }
+
+ pr_debug("pgdir: %p, %p, %d\n", pgdir, prev, sizeof(suspend_pagedir_t));
+ memset(pgdir, 0, PAGE_SIZE);
+ for (i = 0; i < ONE_PAGE_PBE_NUM ; i++) {
+ pgdir[i].dummy.val = (unsigned long)NULL;
+ pgdir[i].address = 0;
+ pgdir[i].orig_address = 0;
+ if (prev == NULL) continue;
+ prev[i].dummy.val= (unsigned long)pgdir;
+ }
+
+ return (pgdir);
+}
+
+static int alloc_pagedir(struct pbe **pbe, int pbe_nums, struct pbe *collide)
+{
+ unsigned int nums = 0, alloc_nums = 1;
+ suspend_pagedir_t *prev, *cur = NULL;
+
+ /* alloc pagedir head */
+ prev = alloc_one_pagedir(NULL, collide);
+ if (!prev) {
+ return -ENOMEM;
+ }
+ *pbe = prev;
+
+ for (nums = ONE_PAGE_PBE_NUM; nums < pbe_nums; nums += ONE_PAGE_PBE_NUM) {
+ cur = alloc_one_pagedir(prev, collide);
+ if (!cur) {
+ goto no_mem;
+ }
+ prev = cur;
+ alloc_nums ++;
+ }
+ return alloc_nums;
+
+no_mem:
+ for_each_pgdir(*pbe, free_one_pagedir, NULL, NULL);
+ *pbe = NULL;
+
+ return (-ENOMEM);
+}
+
+int bio_read_page(pgoff_t page_off, void * page);
+
+static int pagecache_read_pbe(struct pbe *p, void *tmp)
+{
+ int error = 0, i = *(int*)tmp;
+ swp_entry_t entry;
+
+ mod_printk_progress(i);
+
+ if (swp_offset(p->swap_address)== 0) return -1;
+
+ (*(int*)tmp) ++;
+ pr_debug("pagecache_read_pbe: %p %p %lu\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+
+ error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+ if (error) return error;
+
+ entry = p->swap_address;
+ if (entry.val)
+ swap_free(entry);
+
+ return (0);
+}
+
+int read_page_caches(void)
+{
+ int error = 0, i = 0;
+
+ mod_progress = nr_copy_pcs / 100;
+
+ printk( "Reading PageCaches from swap (%d pages)... ", nr_copy_pcs);
+ error = for_each_pbe(pagedir_cache, pagecache_read_pbe, &i);
+ printk("\b\b\b\bdone\n");
+
+ for_each_pgdir(pagedir_cache, free_one_pagedir, NULL, NULL);
+
+ if (i == nr_copy_pcs) return (0);
+
+ return error;
+}
+
+static int pagecache_write_pbe(struct pbe *p, void *tmp)
+{
+ int error = 0, i = *(int*)tmp;
+
+ mod_printk_progress(i);
+
+ pr_debug("pagecache_write_pbe: %p, %p %p ",
+ p, (void *)p->address, (void *)p->orig_address);
+ if (p->address == 0) return -1;
+
+ (*(int*)tmp) ++;
+ error = write_page(p->address, &p->swap_address);
+ if (error) return error;
+
+ pr_debug("%lu\n", swp_offset(p->swap_address));
+
+ return (0);
+}
+
+static int pcs_write(void)
+{
+ int i = 0, error;
+
+ mod_progress = nr_copy_pcs / 100;
+
+ printk( "Writing PageCaches to swap (%d pages)... ", nr_copy_pcs);
+ error = for_each_pbe(pagedir_cache, pagecache_write_pbe, &i);
+ printk("\b\b\b\bdone\n");
+
+ if (i == nr_copy_pcs) return (0);
+
+ return error;
+}
+
+static int setup_pagedir_pbe(void)
+{
+ struct zone *zone;
+
+ nr_copy_pcs = 0;
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ count_pcs(zone, 1);
+ }
+ }
+
+ return (0);
+}
+
+static void count_data_pages(void);
+static int swsusp_alloc(void);
+
+static void page_caches_recal(void)
+{
+ struct zone *zone;
+ int i;
+
+ for (i = 0; i < max_mapnr; i++)
+ ClearPagePcs(mem_map+i);
+
+ nr_copy_pcs = 0;
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ nr_copy_pcs += count_pcs(zone, 0);
+ }
+ }
+}
+
+int write_page_caches(void)
+{
+ int error;
+ int recal = 0;
+
+ page_caches_recal();
+
+ if (nr_copy_pcs == 0) {
+ return (0);
+ }
+ printk("swsusp: Need to copy %u pcs\n", nr_copy_pcs);
+
+ if ((error = swsusp_swap_check())) {
+ return error;
+ }
+
+ if (alloc_pagedir(&pagedir_cache, nr_copy_pcs, NULL) < 0) {
+ return -ENOMEM;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ while (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
+ if (recal == 0) {
+ }
+ printk("#");
+ shrink_all_memory(nr_copy_pages + PAGES_FOR_IO);
+ recal ++;
+ }
+
+ if (recal) {
+ page_caches_recal();
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(2/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ error = swsusp_alloc();
+ if (error) {
+ printk("swsusp_alloc failed, %d\n", error);
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(final): Need to copy %u/%u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pages_check, nr_copy_pcs);
+
+ setup_pagedir_pbe();
+ pr_debug("after setup_pagedir_pbe \n");
+
+ error = pcs_write();
+ if (error)
+ return error;
+
+ return (0);
+}

static int pfn_is_nosave(unsigned long pfn)
{
@@ -539,7 +1023,10 @@
}
if (PageNosaveFree(page))
return 0;
-
+ if (PagePcs(page)) {
+ BUG_ON(zone->nr_inactive == 0 && zone->nr_active == 0);
+ return 0;
+ }
return 1;
}

@@ -549,10 +1036,12 @@
unsigned long zone_pfn;

nr_copy_pages = 0;
+ nr_copy_pcs = 0;

for_each_zone(zone) {
if (is_highmem(zone))
continue;
+ nr_copy_pcs += count_pcs(zone, 0);
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
@@ -564,7 +1053,6 @@
{
struct zone *zone;
unsigned long zone_pfn;
- struct pbe * pbe = pagedir_nosave;
int pages_copied = 0;

for_each_zone(zone) {
@@ -574,11 +1062,12 @@
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
if (saveable(zone, &zone_pfn)) {
struct page * page;
+ struct pbe * pbe = find_pbe_by_index(pages_copied, pagedir_nosave);
+ BUG_ON(pbe == NULL);
page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
pbe->orig_address = (long) page_address(page);
/* copy_page is not usable for copying task structs. */
memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
- pbe++;
pages_copied++;
}
}
@@ -587,104 +1076,44 @@
nr_copy_pages = pages_copied;
}

-
-/**
- * calc_order - Determine the order of allocation needed for pagedir_save.
- *
- * This looks tricky, but is just subtle. Please fix it some time.
- * Since there are %nr_copy_pages worth of pages in the snapshot, we need
- * to allocate enough contiguous space to hold
- * (%nr_copy_pages * sizeof(struct pbe)),
- * which has the saved/orig locations of the page..
- *
- * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
- * structures, then we call get_bitmask_order(), which will tell us the
- * last bit set in the number, starting with 1. (If we need 30 pages, that
- * is 0x0000001e in hex. The last bit is the 5th, which is the order we
- * would use to allocate 32 contiguous pages).
- *
- * Since we also need to save those pages, we add the number of pages that
- * we need to nr_copy_pages, and in case of an overflow, do the
- * calculation again to update the number of pages needed.
- *
- * With this model, we will tend to waste a lot of memory if we just cross
- * an order boundary. Plus, the higher the order of allocation that we try
- * to do, the more likely we are to fail in a low-memory situtation
- * (though we're unlikely to get this far in such a case, since swsusp
- * requires half of memory to be free anyway).
- */
-
-
-static void calc_order(void)
+static int free_one_snapshot_pbe(struct pbe *p, void *tmp)
{
- int diff = 0;
- int order = 0;
-
- do {
- diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
- if (diff) {
- order += diff;
- nr_copy_pages += 1 << diff;
- }
- } while(diff);
- pagedir_order = order;
+ if (p->address) {
+ ClearPageNosave(virt_to_page(p->address));
+ free_page(p->address);
+ p->address = 0;
+ }
+ return (0);
}

-
/**
- * alloc_pagedir - Allocate the page directory.
- *
- * First, determine exactly how many contiguous pages we need and
- * allocate them.
+ * free_image_pages - Free pages allocated for snapshot
*/
-
-static int alloc_pagedir(void)
+static void free_image_pages(void)
{
- calc_order();
- pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
- pagedir_order);
- if (!pagedir_save)
- return -ENOMEM;
- memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
- pagedir_nosave = pagedir_save;
- return 0;
+ for_each_pbe(pagedir_save, free_one_snapshot_pbe, NULL);
}

-/**
- * free_image_pages - Free pages allocated for snapshot
- */
-
-static void free_image_pages(void)
+static int alloc_one_snapshot_pbe(struct pbe *p, void *tmp)
{
- struct pbe * p;
- int i;
+ (*(int *)tmp) ++;
+ p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ if (!p->address)
+ return -ENOMEM;
+ SetPageNosave(virt_to_page(p->address));

- p = pagedir_save;
- for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
- if (p->address) {
- ClearPageNosave(virt_to_page(p->address));
- free_page(p->address);
- p->address = 0;
- }
- }
+ return (0);
}
-
/**
* alloc_image_pages - Allocate pages for the snapshot.
*
*/
-
static int alloc_image_pages(void)
{
- struct pbe * p;
- int i;
+ int i = 0;
+
+ for_each_pbe(pagedir_save, alloc_one_snapshot_pbe, &i);

- for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
- p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
- if (!p->address)
- return -ENOMEM;
- SetPageNosave(virt_to_page(p->address));
- }
return 0;
}

@@ -693,7 +1122,7 @@
BUG_ON(PageNosave(virt_to_page(pagedir_save)));
BUG_ON(PageNosaveFree(virt_to_page(pagedir_save)));
free_image_pages();
- free_pages((unsigned long) pagedir_save, pagedir_order);
+ for_each_pgdir(pagedir_save, free_one_pagedir, NULL, NULL);
}


@@ -730,7 +1159,7 @@
struct sysinfo i;

si_swapinfo(&i);
- if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) {
+ if (i.freeswap < (nr_copy_pages + nr_copy_pcs + PAGES_FOR_IO)) {
pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
return 0;
}
@@ -750,25 +1179,26 @@

if (!enough_swap())
return -ENOSPC;
-
- if ((error = alloc_pagedir())) {
- pr_debug("suspend: Allocating pagedir failed.\n");
- return error;
+ error = alloc_pagedir(&pagedir_save, nr_copy_pages, NULL);
+ if (error < 0) {
+ printk("suspend: Allocating pagedir failed.\n");
+ return -ENOMEM;
}
+ nr_copy_pages += error;
if ((error = alloc_image_pages())) {
- pr_debug("suspend: Allocating image pages failed.\n");
+ printk("suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}
+ pagedir_nosave = pagedir_save;
+ nr_copy_pages_check = nr_copy_pages;

- pagedir_order_check = pagedir_order;
return 0;
}

int suspend_prepare_image(void)
{
unsigned int nr_needed_pages;
- int error;

pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
@@ -777,15 +1207,8 @@
return -ENOMEM;
}

- drain_local_pages();
- count_data_pages();
- printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;

- error = swsusp_alloc();
- if (error)
- return error;
-
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
@@ -854,11 +1277,13 @@

asmlinkage int swsusp_restore(void)
{
- BUG_ON (pagedir_order_check != pagedir_order);
-
+ BUG_ON (nr_copy_pages_check != nr_copy_pages);
+
/* Even mappings of "global" things (vmalloc) need to be fixed */
+#if defined(CONFIG_X86) && defined(CONFIG_X86_64)
__flush_tlb_global();
wbinvd(); /* Nigel says wbinvd here is good idea... */
+#endif
return 0;
}

@@ -881,99 +1306,6 @@
return error;
}

-
-
-/* More restore stuff */
-
-#define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0)
-
-/*
- * Returns true if given address/order collides with any orig_address
- */
-static int __init does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
- int order)
-{
- int i;
- unsigned long addre = addr + (PAGE_SIZE<<order);
-
- for (i=0; i < nr_copy_pages; i++)
- if ((pagedir+i)->orig_address >= addr &&
- (pagedir+i)->orig_address < addre)
- return 1;
-
- return 0;
-}
-
-/*
- * We check here that pagedir & pages it points to won't collide with pages
- * where we're going to restore from the loaded pages later
- */
-static int __init check_pagedir(void)
-{
- int i;
-
- for(i=0; i < nr_copy_pages; i++) {
- unsigned long addr;
-
- do {
- addr = get_zeroed_page(GFP_ATOMIC);
- if(!addr)
- return -ENOMEM;
- } while (does_collide(addr));
-
- (pagedir_nosave+i)->address = addr;
- }
- return 0;
-}
-
-static int __init swsusp_pagedir_relocate(void)
-{
- /*
- * We have to avoid recursion (not to overflow kernel stack),
- * and that's why code looks pretty cryptic
- */
- suspend_pagedir_t *old_pagedir = pagedir_nosave;
- void **eaten_memory = NULL;
- void **c = eaten_memory, *m, *f;
- int ret = 0;
-
- printk("Relocating pagedir ");
-
- if (!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
- printk("not necessary\n");
- return check_pagedir();
- }
-
- while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
- if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
- break;
- eaten_memory = m;
- printk( "." );
- *eaten_memory = c;
- c = eaten_memory;
- }
-
- if (!m) {
- printk("out of memory\n");
- ret = -ENOMEM;
- } else {
- pagedir_nosave =
- memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
- }
-
- c = eaten_memory;
- while (c) {
- printk(":");
- f = c;
- c = *c;
- free_pages((unsigned long)f, pagedir_order);
- }
- if (ret)
- return ret;
- printk("|\n");
- return check_pagedir();
-}
-
/**
* Using bio to read from swap.
* This code requires a bit more work than just using buffer heads
@@ -993,7 +1325,7 @@
return 0;
}

-static struct block_device * resume_bdev;
+static struct block_device * resume_bdev __nosavedata;

/**
* submit - submit BIO request.
@@ -1088,7 +1420,6 @@
return -EPERM;
}
nr_copy_pages = swsusp_info.image_pages;
- pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
return error;
}

@@ -1115,62 +1446,124 @@
return error;
}

+static int __init check_one_pbe(struct pbe *p, void *collide)
+{
+ unsigned long addr = 0;
+ static int checked = 0;
+
+ if (p->orig_address == 0) return (checked);
+
+ do {
+ addr = get_zeroed_page(GFP_ATOMIC);
+ if(!addr)
+ return -ENOMEM;
+ pr_debug("check_one_pbe: %p %p %p ", p, (void*)addr, (void*)p->orig_address);
+ } while(for_each_pbe((struct pbe *)collide, check_pbe_addr, (void*)addr));
+ pr_debug("done\n");
+ p->address = addr;
+ checked ++;
+
+ return (0);
+}
+
+/*
+ * We check here that pagedir & pages it points to won't collide with pages
+ * where we're going to restore from the loaded pages later
+ */
+static int __init check_pagedir(void)
+{
+ int i;
+
+ i = for_each_pbe(pagedir_nosave, check_one_pbe, pagedir_nosave);
+ BUG_ON(i != nr_copy_pages);
+
+ return (0);
+}
+
+static int __init read_one_pbe(struct pbe *p, void *tmp)
+{
+ int error = 0, i = *(int*)tmp;
+
+ mod_printk_progress(i);
+
+ pr_debug("read_one_pbe: %p %p %p %lu, %d\n",
+ p, (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address), i);
+ if (p->orig_address == 0) return -1;
+ (*(int*)tmp) ++;
+
+ error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+ if (error) return error;
+
+ return (0);
+}
+
/**
* swsusp_read_data - Read image pages from swap.
*
- * You do not need to check for overlaps, check_pagedir()
- * already did that.
*/

static int __init data_read(void)
{
- struct pbe * p;
int error;
- int i;
- int mod = nr_copy_pages / 100;
-
- if (!mod)
- mod = 1;
+ int i = 0;

- if ((error = swsusp_pagedir_relocate()))
- return error;
+ if (check_pagedir()) {
+ return -ENOMEM;
+ }
+ mod_progress = nr_copy_pages / 100;

printk( "Reading image data (%d pages): ", nr_copy_pages );
- for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
- error = bio_read_page(swp_offset(p->swap_address),
- (void *)p->address);
- }
+ error = for_each_pbe(pagedir_nosave, read_one_pbe, &i);
printk(" %d done.\n",i);
- return error;

+ BUG_ON( i != nr_copy_pages );
+
+ return 0;
}

extern dev_t __init name_to_dev_t(const char *line);

+static int __init read_one_pagedir(suspend_pagedir_t *pgdir,
+ void *fun, void *arg)
+{
+ int i = *(int *)arg;
+ int max = (int)fun;
+ unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
+ unsigned long next;
+ int error = 0;
+
+ (*(int *)arg) ++;
+ next = pgdir->dummy.val;
+ pr_debug("read_one_pagedir: %p, %d, %lu, %lu\n", pgdir, i, offset, next);
+ if (i == max) return 0;
+ if (offset)
+ error = bio_read_page(offset, (void *)pgdir);
+ else
+ error = -EFAULT;
+ pgdir->dummy.val = next;
+
+ return (error);
+}
+
static int __init read_pagedir(void)
{
- unsigned long addr;
- int i, n = swsusp_info.pagedir_pages;
+ int i = 0, n = swsusp_info.pagedir_pages;
int error = 0;

- addr = __get_free_pages(GFP_ATOMIC, pagedir_order);
- if (!addr)
+ error = alloc_pagedir(&pagedir_nosave, nr_copy_pages, NULL);
+ if (error < 0)
return -ENOMEM;
- pagedir_nosave = (struct pbe *)addr;

pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);

- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
- unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
- if (offset)
- error = bio_read_page(offset, (void *)addr);
- else
- error = -EFAULT;
- }
+ error = for_each_pgdir(pagedir_nosave, read_one_pagedir, (void*)n, &i);
+
+ BUG_ON(i != n);
+
if (error)
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+ for_each_pgdir(pagedir_nosave, free_one_pagedir, NULL, NULL);
+
return error;
}

@@ -1185,7 +1578,7 @@
if ((error = read_pagedir()))
return error;
if ((error = data_read()))
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+ for_each_pgdir(pagedir_nosave, free_one_pagedir, NULL, NULL);
return error;
}

@@ -1207,7 +1600,7 @@
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
- blkdev_put(resume_bdev);
+ /* blkdev_put(resume_bdev); */
} else
error = PTR_ERR(resume_bdev);


--
--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-22 10:26:37

by Pavel Machek

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi!

> > > Here is the patch relative to your big diff. It tested pass with my x86
> > > pc, But the sysfs interface can't works, I using reboot system call.
> >
> > Without PREEMPT and HIGHMEM it worked okay on an idle system. When I
> > started kernel compilation while trying to swsusp, it crashed on
> > resume.
>
> Here is my big diff relative to your big diff. :), It works.
>
> - Not need continuous page for pagedir.
> Swsusp using continuous page (pagedir), to save the new address, old
> address and swap offset, but in current implemention, it using
> continuous page as array, so if has so many pages to save, we have to
> allocate many (>5) continuous pages, most it it will failed.
>
> I using a easy link struct to resolve it.

Yes, I'd like to get rid of "too many continuous pages" problem
before. Small problem is that it needs to update x86-64 too, but I
guess that's okay. I'd like that version to go in *before* that
page-cache stuff (it actually fails a lot in wild).

Could you possibly put page-cache stuff into separate file? It would
be even nicer to have it configurable (run-time or compile-time) so
that if swsusp fails, I can tell people "try again with page-cache
stuff turned off"...
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-22 11:06:17

by Pavel Machek

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi!

> > Yes, I'd like to get rid of "too many continuous pages" problem
> > before. Small problem is that it needs to update x86-64 too, but I
> I have not x86-64, so I have no chance to do it.

I have access to x86-64, so I can do it...
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-22 10:49:58

by Hu Gang

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

On Mon, Nov 22, 2004 at 11:26:12AM +0100, Pavel Machek wrote:
> Hi!
>
> > > > Here is the patch relative to your big diff. It tested pass with my x86
> > > > pc, But the sysfs interface can't works, I using reboot system call.
> > >
> > > Without PREEMPT and HIGHMEM it worked okay on an idle system. When I
> > > started kernel compilation while trying to swsusp, it crashed on
> > > resume.
> >
> > Here is my big diff relative to your big diff. :), It works.
> >
> > - Not need continuous page for pagedir.
> > Swsusp using continuous page (pagedir), to save the new address, old
> > address and swap offset, but in current implemention, it using
> > continuous page as array, so if has so many pages to save, we have to
> > allocate many (>5) continuous pages, most it it will failed.
> >
> > I using a easy link struct to resolve it.
>
> Yes, I'd like to get rid of "too many continuous pages" problem
> before. Small problem is that it needs to update x86-64 too, but I
I have not x86-64, so I have no chance to do it.

> guess that's okay. I'd like that version to go in *before* that
> page-cache stuff (it actually fails a lot in wild).
Yes, I agree.

>
> Could you possibly put page-cache stuff into separate file? It would
> be even nicer to have it configurable (run-time or compile-time) so
> that if swsusp fails, I can tell people "try again with page-cache
> stuff turned off"...
> Pavel
I'll do that. :)

--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-22 12:13:51

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

On Monday 22 of November 2004 12:02, Pavel Machek wrote:
> Hi!
>
> > > Yes, I'd like to get rid of "too many continuous pages" problem
> > > before. Small problem is that it needs to update x86-64 too, but I
> > I have not x86-64, so I have no chance to do it.
>
> I have access to x86-64, so I can do it...

Please, let me know when you have it done. I'd like to test ... ;-)

RJW

--
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
-- Lewis Carroll "Alice's Adventures in Wonderland"

2004-11-22 17:05:37

by Hu Gang

[permalink] [raw]
Subject: [PATH] swsusp update 3/3

On Mon, Nov 22, 2004 at 12:02:47PM +0100, Pavel Machek wrote:
> Hi!
>
> > > Yes, I'd like to get rid of "too many continuous pages" problem
> > > before. Small problem is that it needs to update x86-64 too, but I
> > I have not x86-64, so I have no chance to do it.
>
> I have access to x86-64, so I can do it...
> Pavel
> --
> People were complaining that M$ turns users into beta-testers...
> ...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

--- linux-2.6.9-ppc-g4-peval/arch/ppc/Kconfig 2004-10-20 15:58:39.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/Kconfig 2004-11-22 17:16:58.000000000 +0800
@@ -983,6 +983,8 @@

source "drivers/zorro/Kconfig"

+source kernel/power/Kconfig
+
endmenu

menu "Bus options"
--- linux-2.6.9-ppc-g4-peval/arch/ppc/kernel/Makefile 2004-10-20 15:58:40.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/Makefile 2004-11-22 17:16:58.000000000 +0800
@@ -16,6 +16,7 @@
semaphore.o syscalls.o setup.o \
cputable.o ppc_htab.o
obj-$(CONFIG_6xx) += l2cr.o cpu_setup_6xx.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
obj-$(CONFIG_POWER4) += cpu_setup_power4.o
obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-mapping.o
--- /dev/null 2004-06-07 18:45:47.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/swsusp.S 1904-01-01 08:47:25.000000000 +0706
@@ -0,0 +1,366 @@
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/offsets.h>
+
+
+/*
+ * Structure for storing CPU registers on the save area.
+ */
+#define SL_SP 0
+#define SL_PC 4
+#define SL_MSR 8
+#define SL_SDR1 0xc
+#define SL_SPRG0 0x10 /* 4 sprg's */
+#define SL_DBAT0 0x20
+#define SL_IBAT0 0x28
+#define SL_DBAT1 0x30
+#define SL_IBAT1 0x38
+#define SL_DBAT2 0x40
+#define SL_IBAT2 0x48
+#define SL_DBAT3 0x50
+#define SL_IBAT3 0x58
+#define SL_TB 0x60
+#define SL_R2 0x68
+#define SL_CR 0x6c
+#define SL_LR 0x70
+#define SL_R12 0x74 /* r12 to r31 */
+#define SL_SIZE (SL_R12 + 80)
+
+ .section .data
+ .align 5
+
+_GLOBAL(swsusp_save_area)
+ .space SL_SIZE
+
+
+ .section .text
+ .align 5
+
+_GLOBAL(swsusp_arch_suspend)
+
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+
+ mflr r0
+ stw r0,SL_LR(r11)
+ mfcr r0
+ stw r0,SL_CR(r11)
+ stw r1,SL_SP(r11)
+ stw r2,SL_R2(r11)
+ stmw r12,SL_R12(r11)
+
+ /* Save MSR & SDR1 */
+ mfmsr r4
+ stw r4,SL_MSR(r11)
+ mfsdr1 r4
+ stw r4,SL_SDR1(r11)
+
+ /* Get a stable timebase and save it */
+1: mftbu r4
+ stw r4,SL_TB(r11)
+ mftb r5
+ stw r5,SL_TB+4(r11)
+ mftbu r3
+ cmpw r3,r4
+ bne 1b
+
+ /* Save SPRGs */
+ mfsprg r4,0
+ stw r4,SL_SPRG0(r11)
+ mfsprg r4,1
+ stw r4,SL_SPRG0+4(r11)
+ mfsprg r4,2
+ stw r4,SL_SPRG0+8(r11)
+ mfsprg r4,3
+ stw r4,SL_SPRG0+12(r11)
+
+ /* Save BATs */
+ mfdbatu r4,0
+ stw r4,SL_DBAT0(r11)
+ mfdbatl r4,0
+ stw r4,SL_DBAT0+4(r11)
+ mfdbatu r4,1
+ stw r4,SL_DBAT1(r11)
+ mfdbatl r4,1
+ stw r4,SL_DBAT1+4(r11)
+ mfdbatu r4,2
+ stw r4,SL_DBAT2(r11)
+ mfdbatl r4,2
+ stw r4,SL_DBAT2+4(r11)
+ mfdbatu r4,3
+ stw r4,SL_DBAT3(r11)
+ mfdbatl r4,3
+ stw r4,SL_DBAT3+4(r11)
+ mfibatu r4,0
+ stw r4,SL_IBAT0(r11)
+ mfibatl r4,0
+ stw r4,SL_IBAT0+4(r11)
+ mfibatu r4,1
+ stw r4,SL_IBAT1(r11)
+ mfibatl r4,1
+ stw r4,SL_IBAT1+4(r11)
+ mfibatu r4,2
+ stw r4,SL_IBAT2(r11)
+ mfibatl r4,2
+ stw r4,SL_IBAT2+4(r11)
+ mfibatu r4,3
+ stw r4,SL_IBAT3(r11)
+ mfibatl r4,3
+ stw r4,SL_IBAT3+4(r11)
+
+#if 0
+ /* Backup various CPU config stuffs */
+ bl __save_cpu_setup
+#endif
+ /* Call the low level suspend stuff (we should probably have made
+ * a stackframe...
+ */
+ bl swsusp_save
+
+ /* Restore LR from the save area */
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+ lwz r0,SL_LR(r11)
+ mtlr r0
+
+ blr
+
+
+/* Resume code */
+_GLOBAL(swsusp_arch_resume)
+
+ /* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+ DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ sync
+
+ /* Disable MSR:DR to make sure we don't take a TLB or
+ * hash miss during the copy, as our hash table will
+ * for a while be unuseable. For .text, we assume we are
+ * covered by a BAT. This works only for non-G5 at this
+ * point. G5 will need a better approach, possibly using
+ * a small temporary hash table filled with large mappings,
+ * disabling the MMU completely isn't a good option for
+ * performance reasons.
+ * (Note that 750's may have the same performance issue as
+ * the G5 in this case, we should investigate using moving
+ * BATs for these CPUs)
+ */
+ mfmsr r0
+ sync
+ rlwinm r0,r0,0,28,26 /* clear MSR_DR */
+ mtmsr r0
+ sync
+ isync
+
+ /* Load ptr the list of pages to copy in r11 */
+ lis r9,pagedir_nosave@ha
+ addi r9,r9,pagedir_nosave@l
+ tophys(r9,r9)
+ lwz r9,0(r9)
+#if 0
+ twi 31,r0,0 /* triger trap */
+#endif
+ cmpwi r9,0
+ beq copy_loop_end
+copy_loop:
+ tophys(r9,r9)
+ lwz r6,12(r9)
+ li r10,0
+copy_one_pgdir:
+ lwz r11,0(r9)
+ addi r8,r10,1
+ cmpwi r11,0
+ addi r7,r9,16
+ beq copy_loop_end
+ li r0,256
+ mtctr r0
+ lwz r9,4(r9)
+#if 0
+ twi 31,r0,0 /* triger trap */
+#endif
+ tophys(r10,r11)
+ tophys(r11,r9)
+copy_one_page:
+ lwz r0,0(r10)
+ stw r0,0(r11)
+ lwz r9,4(r10)
+ stw r9,4(r11)
+ lwz r0,8(r10)
+ stw r0,8(r11)
+ lwz r9,12(r10)
+ addi r10,r10,16
+ stw r9,12(r11)
+ addi r11,r11,16
+ bdnz copy_one_page
+ mr r10,r8
+ cmplwi r10,254
+ mr r9,r7
+ ble copy_one_pgdir
+ mr r9,r6
+ bne copy_loop
+copy_loop_end:
+
+ /* Do a very simple cache flush/inval of the L1 to ensure
+ * coherency of the icache
+ */
+ lis r3,0x0002
+ mtctr r3
+ li r3, 0
+1:
+ lwz r0,0(r3)
+ addi r3,r3,0x0020
+ bdnz 1b
+ isync
+ sync
+
+ /* Now flush those cache lines */
+ lis r3,0x0002
+ mtctr r3
+ li r3, 0
+1:
+ dcbf 0,r3
+ addi r3,r3,0x0020
+ bdnz 1b
+ sync
+
+ /* Ok, we are now running with the kernel data of the old
+ * kernel fully restored. We can get to the save area
+ * easily now. As for the rest of the code, it assumes the
+ * loader kernel and the booted one are exactly identical
+ */
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+ tophys(r11,r11)
+
+#if 0
+ /* Restore various CPU config stuffs */
+ bl __restore_cpu_setup
+#endif
+ /* Restore the BATs, and SDR1. Then we can turn on the MMU.
+ * This is a bit hairy as we are running out of those BATs,
+ * but first, our code is probably in the icache, and we are
+ * writing the same value to the BAT, so that should be fine,
+ * though a better solution will have to be found long-term
+ */
+ lwz r4,SL_SDR1(r11)
+ mtsdr1 r4
+ lwz r4,SL_SPRG0(r11)
+ mtsprg 0,r4
+ lwz r4,SL_SPRG0+4(r11)
+ mtsprg 1,r4
+ lwz r4,SL_SPRG0+8(r11)
+ mtsprg 2,r4
+ lwz r4,SL_SPRG0+12(r11)
+ mtsprg 3,r4
+
+#if 0
+ lwz r4,SL_DBAT0(r11)
+ mtdbatu 0,r4
+ lwz r4,SL_DBAT0+4(r11)
+ mtdbatl 0,r4
+ lwz r4,SL_DBAT1(r11)
+ mtdbatu 1,r4
+ lwz r4,SL_DBAT1+4(r11)
+ mtdbatl 1,r4
+ lwz r4,SL_DBAT2(r11)
+ mtdbatu 2,r4
+ lwz r4,SL_DBAT2+4(r11)
+ mtdbatl 2,r4
+ lwz r4,SL_DBAT3(r11)
+ mtdbatu 3,r4
+ lwz r4,SL_DBAT3+4(r11)
+ mtdbatl 3,r4
+ lwz r4,SL_IBAT0(r11)
+ mtibatu 0,r4
+ lwz r4,SL_IBAT0+4(r11)
+ mtibatl 0,r4
+ lwz r4,SL_IBAT1(r11)
+ mtibatu 1,r4
+ lwz r4,SL_IBAT1+4(r11)
+ mtibatl 1,r4
+ lwz r4,SL_IBAT2(r11)
+ mtibatu 2,r4
+ lwz r4,SL_IBAT2+4(r11)
+ mtibatl 2,r4
+ lwz r4,SL_IBAT3(r11)
+ mtibatu 3,r4
+ lwz r4,SL_IBAT3+4(r11)
+ mtibatl 3,r4
+#endif
+
+BEGIN_FTR_SECTION
+ li r4,0
+ mtspr SPRN_DBAT4U,r4
+ mtspr SPRN_DBAT4L,r4
+ mtspr SPRN_DBAT5U,r4
+ mtspr SPRN_DBAT5L,r4
+ mtspr SPRN_DBAT6U,r4
+ mtspr SPRN_DBAT6L,r4
+ mtspr SPRN_DBAT7U,r4
+ mtspr SPRN_DBAT7L,r4
+ mtspr SPRN_IBAT4U,r4
+ mtspr SPRN_IBAT4L,r4
+ mtspr SPRN_IBAT5U,r4
+ mtspr SPRN_IBAT5L,r4
+ mtspr SPRN_IBAT6U,r4
+ mtspr SPRN_IBAT6L,r4
+ mtspr SPRN_IBAT7U,r4
+ mtspr SPRN_IBAT7L,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS)
+
+ /* Flush all TLBs */
+ lis r4,0x1000
+1: addic. r4,r4,-0x1000
+ tlbie r4
+ blt 1b
+ sync
+
+ /* restore the MSR and turn on the MMU */
+ lwz r3,SL_MSR(r11)
+ bl turn_on_mmu
+ tovirt(r11,r11)
+
+ /* Restore TB */
+ li r3,0
+ mttbl r3
+ lwz r3,SL_TB(r11)
+ lwz r4,SL_TB+4(r11)
+ mttbu r3
+ mttbl r4
+
+ /* Kick decrementer */
+ li r0,1
+ mtdec r0
+
+ /* Restore the callee-saved registers and return */
+ lwz r0,SL_CR(r11)
+ mtcr r0
+ lwz r2,SL_R2(r11)
+ lmw r12,SL_R12(r11)
+ lwz r1,SL_SP(r11)
+ lwz r0,SL_LR(r11)
+ mtlr r0
+
+ // XXX Note: we don't really need to call swsusp_resume
+
+ li r3,0
+ blr
+
+/* FIXME:This construct is actually not useful since we don't shut
+ * down the instruction MMU, we could just flip back MSR-DR on.
+ */
+turn_on_mmu:
+ mflr r4
+ mtsrr0 r4
+ mtsrr1 r3
+ sync
+ isync
+ rfi
+
--- /dev/null 2004-06-07 18:45:47.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/asm-ppc/suspend.h 2004-11-22 17:40:42.000000000 +0800
@@ -0,0 +1,12 @@
+static inline int arch_prepare_suspend(void)
+{
+ return 0;
+}
+
+static inline void save_processor_state(void)
+{
+}
+
+static inline void restore_processor_state(void)
+{
+}
--- linux-2.6.9-ppc-g4-peval/arch/ppc/kernel/signal.c 2004-10-20 15:58:41.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/signal.c 2004-11-22 17:16:58.000000000 +0800
@@ -28,6 +28,7 @@
#include <linux/elf.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
+#include <linux/suspend.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -604,6 +605,14 @@
unsigned long frame, newsp;
int signr, ret;

+ if (current->flags & PF_FREEZE) {
+ refrigerator(PF_FREEZE);
+ signr = 0;
+ ret = regs->gpr[3];
+ if (!signal_pending(current))
+ goto no_signal;
+ }
+
if (!oldset)
oldset = &current->blocked;

@@ -626,6 +635,7 @@
regs->gpr[3] = EINTR;
/* note that the cr0.SO bit is already set */
} else {
+no_signal:
regs->nip -= 4; /* Back up & retry system call */
regs->result = 0;
regs->trap = 0;
--- linux-2.6.9-ppc-g4-peval/arch/ppc/kernel/vmlinux.lds.S 2004-10-20 15:58:41.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/vmlinux.lds.S 2004-11-22 17:16:58.000000000 +0800
@@ -74,6 +74,12 @@
CONSTRUCTORS
}

+ . = ALIGN(4096);
+ __nosave_begin = .;
+ .data_nosave : { *(.data.nosave) }
+ . = ALIGN(4096);
+ __nosave_end = .;
+
. = ALIGN(32);
.data.cacheline_aligned : { *(.data.cacheline_aligned) }

--- linux-2.6.9-ppc-g4-peval/arch/ppc/platforms/pmac_setup.c 2004-10-20 15:58:41.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/platforms/pmac_setup.c 2004-11-22 17:36:22.000000000 +0800
@@ -51,6 +51,7 @@
#include <linux/irq.h>
#include <linux/seq_file.h>
#include <linux/root_dev.h>
+#include <linux/suspend.h>

#include <asm/reg.h>
#include <asm/sections.h>
@@ -70,6 +71,8 @@
#include <asm/pmac_feature.h>
#include <asm/time.h>
#include <asm/of_device.h>
+#include <asm/mmu_context.h>
+
#include "pmac_pic.h"
#include "mem_pieces.h"

@@ -420,11 +423,67 @@
#endif
}

+/* TODO: Merge the suspend-to-ram with the common code !!!
+ * currently, this is a stub implementation for suspend-to-disk
+ * only
+ */
+
+#ifdef CONFIG_PM
+
+extern void enable_kernel_altivec(void);
+
+static int pmac_pm_prepare(suspend_state_t state)
+{
+ printk(KERN_DEBUG "pmac_pm_prepare(%d)\n", state);
+
+ return 0;
+}
+
+static int pmac_pm_enter(suspend_state_t state)
+{
+ printk(KERN_DEBUG "pmac_pm_enter(%d)\n", state);
+
+ /* Giveup the lazy FPU & vec so we don't have to back them
+ * up from the low level code
+ */
+ enable_kernel_fp();
+
+#ifdef CONFIG_ALTIVEC
+ if (cur_cpu_spec[0]->cpu_features & CPU_FTR_ALTIVEC)
+ enable_kernel_altivec();
+#endif /* CONFIG_ALTIVEC */
+
+ return 0;
+}
+
+static int pmac_pm_finish(suspend_state_t state)
+{
+ printk(KERN_DEBUG "pmac_pm_finish(%d)\n", state);
+
+ /* Restore userland MMU context */
+ set_context(current->active_mm->context, current->active_mm->pgd);
+
+ return 0;
+}
+
+static struct pm_ops pmac_pm_ops = {
+ .pm_disk_mode = PM_DISK_SHUTDOWN,
+ .prepare = pmac_pm_prepare,
+ .enter = pmac_pm_enter,
+ .finish = pmac_pm_finish,
+};
+
+#endif /* CONFIG_PM */
+
static int initializing = 1;

static int pmac_late_init(void)
{
initializing = 0;
+
+#ifdef CONFIG_PM
+ pm_set_ops(&pmac_pm_ops);
+#endif /* CONFIG_PM */
return 0;
}

--- linux-2.6.9-ppc-g4-peval/arch/ppc/syslib/open_pic.c 2004-10-20 15:58:42.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/syslib/open_pic.c 2004-11-22 17:16:58.000000000 +0800
@@ -776,7 +776,8 @@
if (ISR[irq] == 0)
return;
if (!cpus_empty(keepmask)) {
- cpumask_t irqdest = { .bits[0] = openpic_read(&ISR[irq]->Destination) };
+ cpumask_t irqdest;
+ irqdest.bits[0] = openpic_read(&ISR[irq]->Destination);
cpus_and(irqdest, irqdest, keepmask);
cpus_or(physmask, physmask, irqdest);
}
--- linux-2.6.9-ppc-g4-peval/drivers/ide/ppc/pmac.c 2004-10-20 15:59:12.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/ide/ppc/pmac.c 2004-11-22 17:16:58.000000000 +0800
@@ -32,6 +32,7 @@
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/pci.h>
+#include <linux/pm.h>
#include <linux/adb.h>
#include <linux/pmu.h>

@@ -1364,7 +1365,7 @@
ide_hwif_t *hwif = (ide_hwif_t *)dev_get_drvdata(&mdev->ofdev.dev);
int rc = 0;

- if (state != mdev->ofdev.dev.power_state && state >= 2) {
+ if (state != mdev->ofdev.dev.power_state && state == PM_SUSPEND_MEM) {
rc = pmac_ide_do_suspend(hwif);
if (rc == 0)
mdev->ofdev.dev.power_state = state;
@@ -1472,7 +1473,7 @@
ide_hwif_t *hwif = (ide_hwif_t *)pci_get_drvdata(pdev);
int rc = 0;

- if (state != pdev->dev.power_state && state >= 2) {
+ if (state != pdev->dev.power_state && state == PM_SUSPEND_MEM ) {
rc = pmac_ide_do_suspend(hwif);
if (rc == 0)
pdev->dev.power_state = state;
--- linux-2.6.9-ppc-g4-peval/drivers/macintosh/Kconfig 2004-10-20 15:53:31.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/Kconfig 2004-11-22 17:16:58.000000000 +0800
@@ -80,7 +80,7 @@

config PMAC_PBOOK
bool "Power management support for PowerBooks"
- depends on ADB_PMU
+ depends on PM && ADB_PMU
---help---
This provides support for putting a PowerBook to sleep; it also
enables media bay support. Power management works on the
@@ -97,11 +97,6 @@
have it autoloaded. The act of removing the module shuts down the
sound hardware for more power savings.

-config PM
- bool
- depends on PPC_PMAC && ADB_PMU && PMAC_PBOOK
- default y
-
config PMAC_APM_EMU
tristate "APM emulation"
depends on PMAC_PBOOK
--- linux-2.6.9-ppc-g4-peval/drivers/macintosh/mediabay.c 2004-10-20 15:53:32.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/mediabay.c 2004-11-22 17:16:58.000000000 +0800
@@ -713,7 +713,7 @@
{
struct media_bay_info *bay = macio_get_drvdata(mdev);

- if (state != mdev->ofdev.dev.power_state && state >= 2) {
+ if (state != mdev->ofdev.dev.power_state && state == PM_SUSPEND_MEM) {
down(&bay->lock);
bay->sleeping = 1;
set_mb_power(bay, 0);
--- linux-2.6.9-ppc-g4-peval/drivers/macintosh/therm_adt746x.c 2004-10-20 15:59:24.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/therm_adt746x.c 2004-11-22 17:16:58.000000000 +0800
@@ -22,6 +22,7 @@
#include <linux/spinlock.h>
#include <linux/smp_lock.h>
#include <linux/wait.h>
+#include <linux/suspend.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/io.h>
@@ -238,6 +239,11 @@
#endif
while(!kthread_should_stop())
{
+ if (current->flags & PF_FREEZE) {
+ printk(KERN_INFO "therm_adt746x: freezing thermostat\n");
+ refrigerator(PF_FREEZE);
+ }
+
msleep_interruptible(2000);

/* Check status */
--- linux-2.6.9-ppc-g4-peval/drivers/macintosh/therm_pm72.c 2004-10-20 15:53:32.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/therm_pm72.c 2004-11-22 17:16:58.000000000 +0800
@@ -88,6 +88,7 @@
#include <linux/spinlock.h>
#include <linux/smp_lock.h>
#include <linux/wait.h>
+#include <linux/suspend.h>
#include <linux/reboot.h>
#include <linux/kmod.h>
#include <linux/i2c.h>
@@ -1044,6 +1045,11 @@
while (state == state_attached) {
unsigned long elapsed, start;

+ if (current->flags & PF_FREEZE) {
+ printk(KERN_INFO "therm_pm72: freezing thermostat\n");
+ refrigerator(PF_FREEZE);
+ }
+
start = jiffies;

down(&driver_lock);
--- linux-2.6.9-ppc-g4-peval/drivers/macintosh/via-pmu.c 2004-10-20 15:59:24.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/via-pmu.c 2004-11-22 17:16:58.000000000 +0800
@@ -43,6 +43,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/device.h>
+#include <linux/sysdev.h>
#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <asm/prom.h>
@@ -2326,7 +2327,7 @@
/* Sync the disks. */
/* XXX It would be nice to have some way to ensure that
* nobody is dirtying any new buffers while we wait. That
- * could be acheived using the refrigerator for processes
+ * could be achieved using the refrigerator for processes
* that swsusp uses
*/
sys_sync();
@@ -2379,7 +2380,6 @@

/* Wait for completion of async backlight requests */
while (!bright_req_1.complete || !bright_req_2.complete ||
-
!batt_req.complete)
pmu_poll();

@@ -3048,6 +3048,88 @@
}
#endif /* DEBUG_SLEEP */

+
+/* FIXME: This is a temporary set of callbacks to enable us
+ * to do suspend-to-disk.
+ */
+
+#ifdef CONFIG_PM
+
+static int pmu_sys_suspended = 0;
+
+static int pmu_sys_suspend(struct sys_device *sysdev, pm_message_t state)
+{
+ if (state != PMSG_FREEZE || pmu_sys_suspended)
+ return 0;
+
+ /* Suspend PMU event interrupts */
+ pmu_suspend();
+
+ pmu_sys_suspended = 1;
+ return 0;
+}
+
+static int pmu_sys_resume(struct sys_device *sysdev)
+{
+ struct adb_request req;
+
+ if (!pmu_sys_suspended)
+ return 0;
+
+ /* Tell PMU we are ready */
+ pmu_request(&req, NULL, 2, PMU_SYSTEM_READY, 2);
+ pmu_wait_complete(&req);
+
+ /* Resume PMU event interrupts */
+ pmu_resume();
+
+ pmu_sys_suspended = 0;
+
+ return 0;
+}
+
+#endif /* CONFIG_PM */
+
+static struct sysdev_class pmu_sysclass = {
+ set_kset_name("pmu"),
+};
+
+static struct sys_device device_pmu = {
+ .id = 0,
+ .cls = &pmu_sysclass,
+};
+
+static struct sysdev_driver driver_pmu = {
+#ifdef CONFIG_PM
+ .suspend = &pmu_sys_suspend,
+ .resume = &pmu_sys_resume,
+#endif /* CONFIG_PM */
+};
+
+static int __init init_pmu_sysfs(void)
+{
+ int rc;
+
+ rc = sysdev_class_register(&pmu_sysclass);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys class\n");
+ return -ENODEV;
+ }
+ rc = sysdev_register(&device_pmu);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys device\n");
+ return -ENODEV;
+ }
+ rc = sysdev_driver_register(&pmu_sysclass, &driver_pmu);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys driver\n");
+ return -ENODEV;
+ }
+ return 0;
+}
+
+subsys_initcall(init_pmu_sysfs);
+
EXPORT_SYMBOL(pmu_request);
EXPORT_SYMBOL(pmu_poll);
EXPORT_SYMBOL(pmu_poll_adb);
--- linux-2.6.9-ppc-g4-peval/drivers/video/aty/radeon_pm.c 2004-10-20 15:55:34.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/video/aty/radeon_pm.c 2004-11-22 17:16:58.000000000 +0800
@@ -859,6 +859,10 @@
* know we'll be rebooted, ...
*/

+#if 0 /* this breaks suspend to ram until the dust settles... */
+ if (state != PM_SUSPEND_MEM)
+#endif
+ return 0;
printk(KERN_DEBUG "radeonfb: suspending to state: %d...\n", state);

acquire_console_sem();
--
--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-22 17:15:39

by Hu Gang

[permalink] [raw]
Subject: [PATH] swsusp update 2/3

On Mon, Nov 22, 2004 at 12:02:47PM +0100, Pavel Machek wrote:
> Hi!
>
> > > Yes, I'd like to get rid of "too many continuous pages" problem
> > > before. Small problem is that it needs to update x86-64 too, but I
> > I have not x86-64, so I have no chance to do it.
>
> I have access to x86-64, so I can do it...
> Pavel
> --
> People were complaining that M$ turns users into beta-testers...
> ...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

--- linux-2.6.9-ppc-g4-peval/arch/i386/power/swsusp.S 2004-10-20 15:58:34.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/i386/power/swsusp.S 2004-11-22 17:17:19.000000000 +0800
@@ -31,25 +31,59 @@
movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
movl %ecx,%cr3

- movl pagedir_nosave, %ebx
- xorl %eax, %eax
- xorl %edx, %edx
+ mov pagedir_nosave, %edx
+ test %edx, %edx
+ mov %edx, swsusp_pbe_pgdir
+ je copy_loop_end
+
+copy_loop_start:
+ mov swsusp_pbe_pgdir, %edx
+ mov 0xc(%edx), %eax
+ mov %eax, swsusp_pbe_next
+ xor %eax, %eax
+ mov %eax, swsusp_pbe_nums
+
+ lea 0x0(%esi,1), %esi
+ lea 0x0(%edi,1), %edi
+ mov 0x4(%edx),%eax
+ test %eax, %eax
+ je copy_loop_end
.p2align 4,,7

-copy_loop:
- movl 4(%ebx,%edx),%edi
- movl (%ebx,%edx),%esi
-
- movl $1024, %ecx
- rep
- movsl
-
- incl %eax
- addl $16, %edx
- cmpl nr_copy_pages,%eax
- jb copy_loop
+copy_one_pgdir:
+ xor %ecx, %ecx
+ lea 0x0(%esi,1), %esi
.p2align 4,,7

+copy_one_page:
+ mov 0x4(%edx), %eax
+ mov (%edx), %edx
+ mov (%edx,%ecx,4), %edx
+ mov %edx,(%eax,%ecx,4)
+ inc %ecx
+ cmp $0x3ff, %ecx
+ ja copy_one_pgdir_end
+ mov swsusp_pbe_pgdir, %edx
+ jmp copy_one_page
+ .p2align 4,,7
+
+copy_one_pgdir_end:
+ mov swsusp_pbe_nums, %eax
+ mov swsusp_pbe_pgdir, %edx
+ inc %eax
+ mov %eax, swsusp_pbe_nums
+
+ add $0x10, %edx
+ cmp $0xfe, %eax
+ mov %edx, swsusp_pbe_pgdir
+
+ jbe copy_one_pgdir
+ mov swsusp_pbe_next, %eax
+ test %eax, %eax
+ mov %eax, swsusp_pbe_pgdir
+ jne copy_loop_start
+copy_loop_end:
+
movl saved_context_esp, %esp
movl saved_context_ebp, %ebp
movl saved_context_ebx, %ebx

--
--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-22 17:23:25

by Hu Gang

[permalink] [raw]
Subject: [PATH] swsusp update 1/3

On Mon, Nov 22, 2004 at 12:02:47PM +0100, Pavel Machek wrote:
> Hi!
>
> > > Yes, I'd like to get rid of "too many continuous pages" problem
> > > before. Small problem is that it needs to update x86-64 too, but I
> > I have not x86-64, so I have no chance to do it.
>
> I have access to x86-64, so I can do it...
> Pavel

Ok, Now I finised ppc part, it works. :)

Here is all of the patch relative with your big diff.
core.diff - swsusp core part.
i386.diff - i386 part.
ppc.diff - PowerPC part.

Now we have a option in /proc/sys/kernel/swsusp_pagecache, if that is
sure using swsusp pagecache, otherwise.

Please test and comments. thanks.


--- linux-2.6.9-ppc-g4-peval/include/linux/reboot.h 2004-06-16 13:20:26.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/linux/reboot.h 2004-11-22 17:16:58.000000000 +0800
@@ -42,6 +42,8 @@
extern int register_reboot_notifier(struct notifier_block *);
extern int unregister_reboot_notifier(struct notifier_block *);

+/* For use by swsusp only */
+extern struct notifier_block *reboot_notifier_list;

/*
* Architecture-specific implementations of sys_reboot commands.
--- linux-2.6.9-ppc-g4-peval/include/linux/suspend.h 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/linux/suspend.h 2004-11-22 17:16:58.000000000 +0800
@@ -1,7 +1,7 @@
#ifndef _LINUX_SWSUSP_H
#define _LINUX_SWSUSP_H

-#ifdef CONFIG_X86
+#if (defined CONFIG_X86) || (defined CONFIG_PPC32)
#include <asm/suspend.h>
#endif
#include <linux/swap.h>
--- linux-2.6.9-ppc-g4-peval/kernel/power/disk.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/disk.c 2004-11-22 17:17:19.000000000 +0800
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/fs.h>
+#include <linux/reboot.h>
#include <linux/device.h>
#include "power.h"

@@ -29,6 +30,8 @@
extern int swsusp_resume(void);
extern int swsusp_free(void);

+extern int write_page_caches(void);
+extern int read_page_caches(void);

static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -48,14 +51,16 @@
unsigned long flags;
int error = 0;

- local_irq_save(flags);
switch(mode) {
case PM_DISK_PLATFORM:
- device_power_down(PMSG_SUSPEND);
+ /* device_power_down(PMSG_SUSPEND); */
+ local_irq_save(flags);
error = pm_ops->enter(PM_SUSPEND_DISK);
+ local_irq_restore(flags);
break;
case PM_DISK_SHUTDOWN:
printk("Powering off system\n");
+ notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
device_shutdown();
machine_power_off();
break;
@@ -106,6 +111,7 @@
}
}

+
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -118,13 +124,14 @@
{
device_resume();
platform_finish();
+ read_page_caches();
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
}


-static int prepare(void)
+static int prepare(int resume)
{
int error;

@@ -144,9 +151,13 @@
}

/* Free memory before shutting down devices. */
- free_some_memory();
+ /* free_some_memory(); */

disable_nonboot_cpus();
+ if (!resume)
+ if ((error = write_page_caches())) {
+ goto Finish;
+ }
if ((error = device_suspend(PMSG_FREEZE))) {
printk("Some devices failed to suspend\n");
goto Finish;
@@ -176,7 +187,7 @@
{
int error;

- if ((error = prepare()))
+ if ((error = prepare(0)))
return error;

pr_debug("PM: Attempting to suspend to disk.\n");
@@ -233,7 +244,7 @@

pr_debug("PM: Preparing system for restore.\n");

- if ((error = prepare()))
+ if ((error = prepare(1)))
goto Free;

barrier();
--- linux-2.6.9-ppc-g4-peval/kernel/power/main.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/main.c 2004-11-22 17:16:58.000000000 +0800
@@ -4,7 +4,7 @@
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Open Source Development Lab
*
- * This file is release under the GPLv2
+ * This file is released under the GPLv2
*
*/

--- linux-2.6.9-ppc-g4-peval/kernel/power/swsusp.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/swsusp.c 2004-11-23 00:44:27.000000000 +0800
@@ -74,9 +74,6 @@
/* References to section boundaries */
extern char __nosave_begin, __nosave_end;

-/* Variables to be preserved over suspend */
-static int pagedir_order_check;
-
extern char resume_file[];
static dev_t resume_device;
/* Local variables that should not be affected by save */
@@ -97,7 +94,6 @@
*/
suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
static suspend_pagedir_t *pagedir_save;
-static int pagedir_order __nosavedata = 0;

#define SWSUSP_SIG "S1SUSPEND"

@@ -223,7 +219,148 @@
swap_list_unlock();
}

+#define ONE_PAGE_PBE_NUM ( PAGE_SIZE / sizeof(struct pbe) - 1)
+
+/* for each pagdir */
+typedef int (*susp_pgdir_t)(suspend_pagedir_t *cur, void *fun, void *arg);
+
+static int inline for_each_pgdir(struct pbe *pbe, susp_pgdir_t fun,
+ void *subfun, void *arg)
+{
+ suspend_pagedir_t *pgdir = pbe;
+ int error = 0;
+
+ while (pgdir != NULL) {
+ suspend_pagedir_t *next = (suspend_pagedir_t *)pgdir->dummy.val;
+ pr_debug("next %p, cur %p\n", next, pgdir);
+ error = fun(pgdir, subfun, arg);
+ if (error) return error;
+ pgdir = next;
+ }
+
+ return (0);
+}
+
+/* free one pagedir */
+static int free_one_pagedir(suspend_pagedir_t *pgdir, void *fun, void *arg)
+{
+ free_page((unsigned long)pgdir);
+ return (0);
+}
+
+typedef int (*swsup_pbe_t)(struct pbe *bpe, void *p);
+
+static int for_pbe_one_pgdir(suspend_pagedir_t *pgdir, void *_fun,
+ void *arg)
+{
+ unsigned int nums;
+ swsup_pbe_t fun = _fun;
+ int error = 0;
+
+ pr_debug("for_pbe_one_pgdir: %p, %p, %p\n", pgdir, _fun, arg);
+ for (nums = 0; nums < ONE_PAGE_PBE_NUM; nums++) {
+ error = fun(pgdir, arg);
+ pgdir ++;
+ if (error) return error;
+ }
+
+ return (0);
+}
+
+static int for_each_pbe(struct pbe *pbe, swsup_pbe_t fun, void *p)
+{
+ return for_each_pgdir(pbe, for_pbe_one_pgdir, fun, p);
+}
+
+unsigned long swsusp_pbe_nums __nosavedata;
+suspend_pagedir_t *swsusp_pbe_pgdir __nosavedata, *swsusp_pbe_next __nosavedata;
+
+/*
+ * for_each_pbe_copy_back
+ *
+ * That usefuly for writing the code in assemble code.
+ *
+ */
+/* #define CREATE_ASM_CODE */
+#ifdef CREATE_ASM_CODE
+asmlinkage void for_each_pbe_copy_back_i386(void)
+{
+ swsusp_pbe_pgdir = pagedir_nosave;
+ while (swsusp_pbe_pgdir != NULL) {
+ swsusp_pbe_next = (suspend_pagedir_t *)swsusp_pbe_pgdir->dummy.val;
+ for (swsusp_pbe_nums = 0;
+ swsusp_pbe_nums < ONE_PAGE_PBE_NUM;
+ swsusp_pbe_nums++) {
+ register unsigned long i;
+ if (swsusp_pbe_pgdir->orig_address == 0) return;
+ for (i = 0; i < PAGE_SIZE / (sizeof(unsigned long)); i+=4) {
+ *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i)) =
+ *(((unsigned long *)(swsusp_pbe_pgdir->address) + i));
+ *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+1)) =
+ *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+1));
+ *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+2)) =
+ *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+2));
+ *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+3)) =
+ *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+3));
+ }
+ swsusp_pbe_pgdir ++;
+ }
+ swsusp_pbe_pgdir = swsusp_pbe_next;
+ }
+}
+/*
+ * PowerPC version
+ * that work in in real mode and have a lots register
+ */
+asmlinkage void for_each_pbe_copy_back_ppc(void)
+{
+ struct pbe *pgdir = pagedir_nosave, *next;
+
+ while (pgdir != NULL) {
+ unsigned int nums;
+ pgdir = (struct pbe *)__pa(pgdir);
+ next = (suspend_pagedir_t *)pgdir->dummy.val;
+ for (nums = 0; nums < ONE_PAGE_PBE_NUM; nums++) {
+ register unsigned long i;
+ unsigned long *orig, *copy;
+ copy = pgdir->address;
+ if (copy == 0) goto end;
+ copy = __pa(copy);
+ orig = __pa(pgdir->orig_address);
+ for (i = 0; i < PAGE_SIZE / (sizeof(unsigned long)); i += 4) {
+ *(orig + i) = *(copy + i);
+ *(orig + i+1) = *(copy + i+1);
+ *(orig + i+2) = *(copy + i+2);
+ *(orig + i+3) = *(copy + i+3);
+ }
+ pgdir ++;
+ }
+ pgdir = next;
+ }
+end:
+ panic("");
+}
+#endif
+
+static struct pbe *find_pbe_by_index(int index, struct pbe *pgdir)
+{
+ unsigned int nums = ONE_PAGE_PBE_NUM;
+ suspend_pagedir_t *next, *ret = NULL;
+
+ pr_debug("find_pbe_by_index %d, %p\n", index, pgdir);
+ while (pgdir != NULL) {
+ if (index < nums) {
+ ret = pgdir + (index % ONE_PAGE_PBE_NUM);
+ break;
+ }
+ next = (suspend_pagedir_t *)pgdir->dummy.val;
+ nums += ONE_PAGE_PBE_NUM;
+ pgdir = next;
+ }
+ pr_debug("find_pbe index %d -> %p\n", index, ret);

+ return (ret);
+}

/**
* write_swap_page - Write one page to a fresh swap location.
@@ -257,6 +394,20 @@
return error;
}

+static int data_free_pbe(struct pbe *p, void *tmp)
+{
+ swp_entry_t entry;
+
+ if (swp_offset(p->swap_address)== 0) return -1;
+
+ (*(int*)tmp) ++;
+ entry = p->swap_address;
+ if (entry.val)
+ swap_free(entry);
+ p->swap_address = (swp_entry_t){0};
+
+ return (0);
+}

/**
* data_free - Free the swap entries used by the saved image.
@@ -267,43 +418,56 @@

static void data_free(void)
{
- swp_entry_t entry;
- int i;
+ int i = 0;
+ for_each_pbe(pagedir_nosave, data_free_pbe, &i);
+ BUG_ON( i != nr_copy_pages);
+}

- for (i = 0; i < nr_copy_pages; i++) {
- entry = (pagedir_nosave + i)->swap_address;
- if (entry.val)
- swap_free(entry);
- else
- break;
- (pagedir_nosave + i)->swap_address = (swp_entry_t){0};
- }
+static int mod_progress = 1;
+
+static void inline mod_printk_progress(int i)
+{
+ if (mod_progress == 0) mod_progress = 1;
+ if (!(i%100))
+ printk( "\b\b\b\b%3d%%", i / mod_progress );
}

+static int write_one_pbe(struct pbe *p, void *tmp)
+{
+ int error = 0, i = *(int*)tmp;
+
+ mod_printk_progress(i);
+
+ pr_debug("write_one_pbe: %p, %p %p ",
+ p, (void *)p->address, (void *)p->orig_address);
+ if (p->orig_address == 0) return -1;
+
+ (*(int*)tmp) ++;
+ error = write_page(p->address, &p->swap_address);
+ if (error) return error;
+
+ pr_debug("%lu\n", swp_offset(p->swap_address));
+
+ return (0);
+}

/**
* data_write - Write saved image to swap.
*
* Walk the list of pages in the image and sync each one to swap.
*/
-
static int data_write(void)
{
- int error = 0;
- int i;
- unsigned int mod = nr_copy_pages / 100;
-
- if (!mod)
- mod = 1;
+ int i = 0, error;
+
+ mod_progress = nr_copy_pages / 100;

- printk( "Writing data to swap (%d pages)... ", nr_copy_pages );
- for (i = 0; i < nr_copy_pages && !error; i++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
- error = write_page((pagedir_nosave+i)->address,
- &((pagedir_nosave+i)->swap_address));
- }
+ printk( "Writing data to swap (%d pages)... ", nr_copy_pages);
+ error = for_each_pbe(pagedir_nosave, write_one_pbe, &i);
printk("\b\b\b\bdone\n");
+
+ if (i == nr_copy_pages) return (0);
+
return error;
}

@@ -363,6 +527,15 @@
swap_free(swsusp_info.pagedir[i]);
}

+static int write_one_pagedir(suspend_pagedir_t *pgdir,
+ void *fun, void *arg)
+{
+ int i = *(int *)arg;
+
+ (*(int *)arg) ++;
+
+ return write_page((unsigned long)pgdir, &swsusp_info.pagedir[i]);
+}

/**
* write_pagedir - Write the array of pages holding the page directory.
@@ -371,15 +544,12 @@

static int write_pagedir(void)
{
- unsigned long addr = (unsigned long)pagedir_nosave;
- int error = 0;
- int n = SUSPEND_PD_PAGES(nr_copy_pages);
- int i;
+ int error = 0, n = 0;

- swsusp_info.pagedir_pages = n;
+ error = for_each_pgdir(pagedir_nosave, write_one_pagedir, NULL, &n);
printk( "Writing pagedir (%d pages)\n", n);
- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE)
- error = write_page(addr, &swsusp_info.pagedir[i]);
+ swsusp_info.pagedir_pages = n;
+
return error;
}

@@ -504,6 +674,366 @@
return 0;
}

+typedef int (*do_page_t)(struct page *page, int p);
+
+static int foreach_zone_page(struct zone *zone, do_page_t fun, int p)
+{
+ int inactive = 0, active = 0;
+
+ spin_lock_irq(&zone->lru_lock);
+ if (zone->nr_inactive) {
+ struct list_head * entry = zone->inactive_list.prev;
+ while (entry != &zone->inactive_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ inactive += fun(page, p);
+ } else {
+ inactive ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ if (zone->nr_active) {
+ struct list_head * entry = zone->active_list.prev;
+ while (entry != &zone->active_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ active += fun(page, p);
+ } else {
+ active ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ spin_unlock_irq(&zone->lru_lock);
+
+ return (active + inactive);
+}
+
+/* enable/disable pagecache suspend */
+int swsusp_pagecache = 1;
+
+/* I'll move this to include/linux/page-flags.h */
+#define PG_pcs (PG_nosave_free + 1)
+
+#define SetPagePcs(page) set_bit(PG_pcs, &(page)->flags)
+#define ClearPagePcs(page) clear_bit(PG_pcs, &(page)->flags)
+#define PagePcs(page) test_bit(PG_pcs, &(page)->flags)
+
+static suspend_pagedir_t *pagedir_cache = NULL;
+static int nr_copy_pcs = 0;
+
+static int setup_pcs_pe(struct page *page, int setup)
+{
+ unsigned long pfn = page_to_pfn(page);
+
+ BUG_ON(PageReserved(page) && PageNosave(page));
+ if (!pfn_valid(pfn)) {
+ printk("not valid page\n");
+ return 0;
+ }
+ if (PageNosave(page)) {
+ printk("nosave\n");
+ return 0;
+ }
+ if (PageReserved(page) /*&& pfn_is_nosave(pfn)*/) {
+ printk("[nosave]\n");
+ return 0;
+ }
+ if (PageSlab(page)) {
+ printk("slab\n");
+ return (0);
+ }
+ if (setup) {
+ struct pbe *p = find_pbe_by_index(nr_copy_pcs, pagedir_cache);
+ p->address = (long)page_address(page);
+ pr_debug("setup_pcs: cur %p, addr %p, next %p, nr %u\n",
+ (void*)p, (void*)p->address,
+ (void*)p->orig_address, nr_copy_pcs);
+ nr_copy_pcs ++;
+ }
+ SetPagePcs(page);
+
+ return (1);
+}
+
+static int count_pcs(struct zone *zone, int p)
+{
+ if (swsusp_pagecache)
+ return foreach_zone_page(zone, setup_pcs_pe, p);
+ return (0);
+}
+
+static int check_pbe_addr(struct pbe *p, void *addr)
+{
+ unsigned long addre = (unsigned long)addr + PAGE_SIZE;
+
+ if (p->orig_address == (unsigned long)0) return 0;
+ if (p->orig_address >= (unsigned long)addr && p->orig_address < addre)
+ return 1;
+ return 0;
+}
+
+
+/*
+ * redefine in PageCahe pagdir.
+ *
+ * struct pbe {
+ * unsigned long address;
+ * unsigned long orig_address; pointer of next struct pbe
+ * swp_entry_t swap_address;
+ * swp_entry_t dummy; current index
+ * }
+ *
+ */
+static suspend_pagedir_t * alloc_one_pagedir(suspend_pagedir_t *prev,
+ suspend_pagedir_t *collide)
+{
+ suspend_pagedir_t *pgdir = NULL;
+ int i;
+
+ pgdir = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!pgdir) {
+ return NULL;
+ }
+
+ if (collide) {
+ while (for_each_pbe((struct pbe *)collide, check_pbe_addr, pgdir)) {
+ free_page((unsigned long)pgdir);
+ pgdir = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!pgdir) {
+ return NULL;
+ }
+ }
+ }
+
+ pr_debug("pgdir: %p, %p, %d\n", pgdir, prev, sizeof(suspend_pagedir_t));
+ memset(pgdir, 0, PAGE_SIZE);
+ for (i = 0; i < ONE_PAGE_PBE_NUM ; i++) {
+ pgdir[i].dummy.val = (unsigned long)NULL;
+ pgdir[i].address = 0;
+ pgdir[i].orig_address = 0;
+ if (prev == NULL) continue;
+ prev[i].dummy.val= (unsigned long)pgdir;
+ }
+
+ return (pgdir);
+}
+
+static int alloc_pagedir(struct pbe **pbe, int pbe_nums, struct pbe *collide)
+{
+ unsigned int nums = 0, alloc_nums = 1;
+ suspend_pagedir_t *prev, *cur = NULL;
+
+ /* alloc pagedir head */
+ prev = alloc_one_pagedir(NULL, collide);
+ if (!prev) {
+ return -ENOMEM;
+ }
+ *pbe = prev;
+
+ for (nums = ONE_PAGE_PBE_NUM; nums < pbe_nums; nums += ONE_PAGE_PBE_NUM) {
+ cur = alloc_one_pagedir(prev, collide);
+ if (!cur) {
+ goto no_mem;
+ }
+ prev = cur;
+ alloc_nums ++;
+ }
+ return alloc_nums;
+
+no_mem:
+ for_each_pgdir(*pbe, free_one_pagedir, NULL, NULL);
+ *pbe = NULL;
+
+ return (-ENOMEM);
+}
+
+int bio_read_page(pgoff_t page_off, void * page);
+
+static int pagecache_read_pbe(struct pbe *p, void *tmp)
+{
+ int error = 0, i = *(int*)tmp;
+ swp_entry_t entry;
+
+ mod_printk_progress(i);
+
+ if (swp_offset(p->swap_address)== 0) return -1;
+
+ (*(int*)tmp) ++;
+ pr_debug("pagecache_read_pbe: %p %p %lu\n",
+ (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address));
+
+ error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+ if (error) return error;
+
+ entry = p->swap_address;
+ if (entry.val)
+ swap_free(entry);
+
+ return (0);
+}
+
+int read_page_caches(void)
+{
+ int error = 0, i = 0;
+
+ if (swsusp_pagecache == 0) return (0);
+
+ mod_progress = nr_copy_pcs / 100;
+
+ printk( "Reading PageCaches from swap (%d pages)... ", nr_copy_pcs);
+ error = for_each_pbe(pagedir_cache, pagecache_read_pbe, &i);
+ printk("\b\b\b\bdone\n");
+
+ for_each_pgdir(pagedir_cache, free_one_pagedir, NULL, NULL);
+
+ if (i == nr_copy_pcs) return (0);
+
+ return error;
+}
+
+static int pagecache_write_pbe(struct pbe *p, void *tmp)
+{
+ int error = 0, i = *(int*)tmp;
+
+ mod_printk_progress(i);
+
+ pr_debug("pagecache_write_pbe: %p, %p %p ",
+ p, (void *)p->address, (void *)p->orig_address);
+ if (p->address == 0) return -1;
+
+ (*(int*)tmp) ++;
+ error = write_page(p->address, &p->swap_address);
+ if (error) return error;
+
+ pr_debug("%lu\n", swp_offset(p->swap_address));
+
+ return (0);
+}
+
+static int pcs_write(void)
+{
+ int i = 0, error;
+
+ mod_progress = nr_copy_pcs / 100;
+
+ printk( "Writing PageCaches to swap (%d pages)... ", nr_copy_pcs);
+ error = for_each_pbe(pagedir_cache, pagecache_write_pbe, &i);
+ printk("\b\b\b\bdone\n");
+
+ if (i == nr_copy_pcs) return (0);
+
+ return error;
+}
+
+static int setup_pagedir_pbe(void)
+{
+ struct zone *zone;
+
+ nr_copy_pcs = 0;
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ count_pcs(zone, 1);
+ }
+ }
+
+ return (0);
+}
+
+static void count_data_pages(void);
+static int swsusp_alloc(void);
+
+static void page_caches_recal(void)
+{
+ struct zone *zone;
+ int i;
+
+ for (i = 0; i < max_mapnr; i++)
+ ClearPagePcs(mem_map+i);
+
+ nr_copy_pcs = 0;
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ nr_copy_pcs += count_pcs(zone, 0);
+ }
+ }
+}
+
+int write_page_caches(void)
+{
+ int error;
+ int recal = 0;
+
+ if (swsusp_pagecache) {
+ page_caches_recal();
+
+ if (nr_copy_pcs == 0) {
+ return (0);
+ }
+ printk("swsusp: Need to copy %u pcs\n", nr_copy_pcs);
+ if (alloc_pagedir(&pagedir_cache, nr_copy_pcs, NULL) < 0) {
+ return -ENOMEM;
+ }
+ }
+
+ if ((error = swsusp_swap_check())) {
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ while (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
+ if (recal == 0) {
+ printk("swsusp: try shrink memory ");
+ }
+ shrink_all_memory(nr_copy_pages + PAGES_FOR_IO + recal);
+ recal += PAGES_FOR_IO;
+ }
+
+ if (recal) {
+ printk("done\n");
+ page_caches_recal();
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(2/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ error = swsusp_alloc();
+ if (error) {
+ printk("swsusp_alloc failed, %d\n", error);
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(final): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ if (swsusp_pagecache) {
+ setup_pagedir_pbe();
+ pr_debug("after setup_pagedir_pbe \n");
+
+ error = pcs_write();
+ if (error)
+ return error;
+ }
+
+ return (0);
+}

static int pfn_is_nosave(unsigned long pfn)
{
@@ -539,7 +1069,10 @@
}
if (PageNosaveFree(page))
return 0;
-
+ if (PagePcs(page) && swsusp_pagecache) {
+ BUG_ON(zone->nr_inactive == 0 && zone->nr_active == 0);
+ return 0;
+ }
return 1;
}

@@ -549,10 +1082,12 @@
unsigned long zone_pfn;

nr_copy_pages = 0;
+ nr_copy_pcs = 0;

for_each_zone(zone) {
if (is_highmem(zone))
continue;
+ nr_copy_pcs += count_pcs(zone, 0);
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
@@ -564,7 +1099,6 @@
{
struct zone *zone;
unsigned long zone_pfn;
- struct pbe * pbe = pagedir_nosave;
int pages_copied = 0;

for_each_zone(zone) {
@@ -574,11 +1108,12 @@
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
if (saveable(zone, &zone_pfn)) {
struct page * page;
+ struct pbe * pbe = find_pbe_by_index(pages_copied, pagedir_nosave);
+ BUG_ON(pbe == NULL);
page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
pbe->orig_address = (long) page_address(page);
/* copy_page is not usable for copying task structs. */
memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
- pbe++;
pages_copied++;
}
}
@@ -587,104 +1122,44 @@
nr_copy_pages = pages_copied;
}

-
-/**
- * calc_order - Determine the order of allocation needed for pagedir_save.
- *
- * This looks tricky, but is just subtle. Please fix it some time.
- * Since there are %nr_copy_pages worth of pages in the snapshot, we need
- * to allocate enough contiguous space to hold
- * (%nr_copy_pages * sizeof(struct pbe)),
- * which has the saved/orig locations of the page..
- *
- * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
- * structures, then we call get_bitmask_order(), which will tell us the
- * last bit set in the number, starting with 1. (If we need 30 pages, that
- * is 0x0000001e in hex. The last bit is the 5th, which is the order we
- * would use to allocate 32 contiguous pages).
- *
- * Since we also need to save those pages, we add the number of pages that
- * we need to nr_copy_pages, and in case of an overflow, do the
- * calculation again to update the number of pages needed.
- *
- * With this model, we will tend to waste a lot of memory if we just cross
- * an order boundary. Plus, the higher the order of allocation that we try
- * to do, the more likely we are to fail in a low-memory situtation
- * (though we're unlikely to get this far in such a case, since swsusp
- * requires half of memory to be free anyway).
- */
-
-
-static void calc_order(void)
+static int free_one_snapshot_pbe(struct pbe *p, void *tmp)
{
- int diff = 0;
- int order = 0;
-
- do {
- diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
- if (diff) {
- order += diff;
- nr_copy_pages += 1 << diff;
- }
- } while(diff);
- pagedir_order = order;
+ if (p->address) {
+ ClearPageNosave(virt_to_page(p->address));
+ free_page(p->address);
+ p->address = 0;
+ }
+ return (0);
}

-
/**
- * alloc_pagedir - Allocate the page directory.
- *
- * First, determine exactly how many contiguous pages we need and
- * allocate them.
+ * free_image_pages - Free pages allocated for snapshot
*/
-
-static int alloc_pagedir(void)
+static void free_image_pages(void)
{
- calc_order();
- pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
- pagedir_order);
- if (!pagedir_save)
- return -ENOMEM;
- memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
- pagedir_nosave = pagedir_save;
- return 0;
+ for_each_pbe(pagedir_save, free_one_snapshot_pbe, NULL);
}

-/**
- * free_image_pages - Free pages allocated for snapshot
- */
-
-static void free_image_pages(void)
+static int alloc_one_snapshot_pbe(struct pbe *p, void *tmp)
{
- struct pbe * p;
- int i;
+ (*(int *)tmp) ++;
+ p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ if (!p->address)
+ return -ENOMEM;
+ SetPageNosave(virt_to_page(p->address));

- p = pagedir_save;
- for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
- if (p->address) {
- ClearPageNosave(virt_to_page(p->address));
- free_page(p->address);
- p->address = 0;
- }
- }
+ return (0);
}
-
/**
* alloc_image_pages - Allocate pages for the snapshot.
*
*/
-
static int alloc_image_pages(void)
{
- struct pbe * p;
- int i;
+ int i = 0;
+
+ for_each_pbe(pagedir_save, alloc_one_snapshot_pbe, &i);

- for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
- p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
- if (!p->address)
- return -ENOMEM;
- SetPageNosave(virt_to_page(p->address));
- }
return 0;
}

@@ -693,7 +1168,7 @@
BUG_ON(PageNosave(virt_to_page(pagedir_save)));
BUG_ON(PageNosaveFree(virt_to_page(pagedir_save)));
free_image_pages();
- free_pages((unsigned long) pagedir_save, pagedir_order);
+ for_each_pgdir(pagedir_save, free_one_pagedir, NULL, NULL);
}


@@ -730,7 +1205,7 @@
struct sysinfo i;

si_swapinfo(&i);
- if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) {
+ if (i.freeswap < (nr_copy_pages + nr_copy_pcs + PAGES_FOR_IO)) {
pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
return 0;
}
@@ -750,25 +1225,24 @@

if (!enough_swap())
return -ENOSPC;
-
- if ((error = alloc_pagedir())) {
- pr_debug("suspend: Allocating pagedir failed.\n");
- return error;
+ error = alloc_pagedir(&pagedir_save, nr_copy_pages, NULL);
+ if (error < 0) {
+ printk("suspend: Allocating pagedir failed.\n");
+ return -ENOMEM;
}
if ((error = alloc_image_pages())) {
- pr_debug("suspend: Allocating image pages failed.\n");
+ printk("suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}
+ pagedir_nosave = pagedir_save;

- pagedir_order_check = pagedir_order;
return 0;
}

int suspend_prepare_image(void)
{
unsigned int nr_needed_pages;
- int error;

pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
@@ -777,15 +1251,8 @@
return -ENOMEM;
}

- drain_local_pages();
- count_data_pages();
- printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;

- error = swsusp_alloc();
- if (error)
- return error;
-
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
@@ -854,11 +1321,11 @@

asmlinkage int swsusp_restore(void)
{
- BUG_ON (pagedir_order_check != pagedir_order);
-
/* Even mappings of "global" things (vmalloc) need to be fixed */
+#if defined(CONFIG_X86) && defined(CONFIG_X86_64)
__flush_tlb_global();
wbinvd(); /* Nigel says wbinvd here is good idea... */
+#endif
return 0;
}

@@ -881,99 +1348,6 @@
return error;
}

-
-
-/* More restore stuff */
-
-#define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0)
-
-/*
- * Returns true if given address/order collides with any orig_address
- */
-static int __init does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
- int order)
-{
- int i;
- unsigned long addre = addr + (PAGE_SIZE<<order);
-
- for (i=0; i < nr_copy_pages; i++)
- if ((pagedir+i)->orig_address >= addr &&
- (pagedir+i)->orig_address < addre)
- return 1;
-
- return 0;
-}
-
-/*
- * We check here that pagedir & pages it points to won't collide with pages
- * where we're going to restore from the loaded pages later
- */
-static int __init check_pagedir(void)
-{
- int i;
-
- for(i=0; i < nr_copy_pages; i++) {
- unsigned long addr;
-
- do {
- addr = get_zeroed_page(GFP_ATOMIC);
- if(!addr)
- return -ENOMEM;
- } while (does_collide(addr));
-
- (pagedir_nosave+i)->address = addr;
- }
- return 0;
-}
-
-static int __init swsusp_pagedir_relocate(void)
-{
- /*
- * We have to avoid recursion (not to overflow kernel stack),
- * and that's why code looks pretty cryptic
- */
- suspend_pagedir_t *old_pagedir = pagedir_nosave;
- void **eaten_memory = NULL;
- void **c = eaten_memory, *m, *f;
- int ret = 0;
-
- printk("Relocating pagedir ");
-
- if (!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
- printk("not necessary\n");
- return check_pagedir();
- }
-
- while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
- if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
- break;
- eaten_memory = m;
- printk( "." );
- *eaten_memory = c;
- c = eaten_memory;
- }
-
- if (!m) {
- printk("out of memory\n");
- ret = -ENOMEM;
- } else {
- pagedir_nosave =
- memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
- }
-
- c = eaten_memory;
- while (c) {
- printk(":");
- f = c;
- c = *c;
- free_pages((unsigned long)f, pagedir_order);
- }
- if (ret)
- return ret;
- printk("|\n");
- return check_pagedir();
-}
-
/**
* Using bio to read from swap.
* This code requires a bit more work than just using buffer heads
@@ -993,7 +1367,7 @@
return 0;
}

-static struct block_device * resume_bdev;
+static struct block_device * resume_bdev __nosavedata;

/**
* submit - submit BIO request.
@@ -1088,7 +1462,6 @@
return -EPERM;
}
nr_copy_pages = swsusp_info.image_pages;
- pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
return error;
}

@@ -1115,62 +1488,124 @@
return error;
}

+static int __init check_one_pbe(struct pbe *p, void *collide)
+{
+ unsigned long addr = 0;
+ static int checked = 0;
+
+ if (p->orig_address == 0) return (checked);
+
+ do {
+ addr = get_zeroed_page(GFP_ATOMIC);
+ if(!addr)
+ return -ENOMEM;
+ pr_debug("check_one_pbe: %p %p %p ", p, (void*)addr, (void*)p->orig_address);
+ } while(for_each_pbe((struct pbe *)collide, check_pbe_addr, (void*)addr));
+ pr_debug("done\n");
+ p->address = addr;
+ checked ++;
+
+ return (0);
+}
+
+/*
+ * We check here that pagedir & pages it points to won't collide with pages
+ * where we're going to restore from the loaded pages later
+ */
+static int __init check_pagedir(void)
+{
+ int i;
+
+ i = for_each_pbe(pagedir_nosave, check_one_pbe, pagedir_nosave);
+ BUG_ON(i != nr_copy_pages);
+
+ return (0);
+}
+
+static int __init read_one_pbe(struct pbe *p, void *tmp)
+{
+ int error = 0, i = *(int*)tmp;
+
+ mod_printk_progress(i);
+
+ pr_debug("read_one_pbe: %p %p %p %lu, %d\n",
+ p, (void *)p->address, (void *)p->orig_address,
+ swp_offset(p->swap_address), i);
+ if (p->orig_address == 0) return -1;
+ (*(int*)tmp) ++;
+
+ error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+ if (error) return error;
+
+ return (0);
+}
+
/**
* swsusp_read_data - Read image pages from swap.
*
- * You do not need to check for overlaps, check_pagedir()
- * already did that.
*/

static int __init data_read(void)
{
- struct pbe * p;
int error;
- int i;
- int mod = nr_copy_pages / 100;
-
- if (!mod)
- mod = 1;
+ int i = 0;

- if ((error = swsusp_pagedir_relocate()))
- return error;
+ if (check_pagedir()) {
+ return -ENOMEM;
+ }
+ mod_progress = nr_copy_pages / 100;

printk( "Reading image data (%d pages): ", nr_copy_pages );
- for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
- error = bio_read_page(swp_offset(p->swap_address),
- (void *)p->address);
- }
+ error = for_each_pbe(pagedir_nosave, read_one_pbe, &i);
printk(" %d done.\n",i);
- return error;

+ BUG_ON( i != nr_copy_pages );
+
+ return 0;
}

extern dev_t __init name_to_dev_t(const char *line);

+static int __init read_one_pagedir(suspend_pagedir_t *pgdir,
+ void *fun, void *arg)
+{
+ int i = *(int *)arg;
+ int max = (int)fun;
+ unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
+ unsigned long next;
+ int error = 0;
+
+ (*(int *)arg) ++;
+ next = pgdir->dummy.val;
+ pr_debug("read_one_pagedir: %p, %d, %lu, %lu\n", pgdir, i, offset, next);
+ if (i == max) return 0;
+ if (offset)
+ error = bio_read_page(offset, (void *)pgdir);
+ else
+ error = -EFAULT;
+ pgdir->dummy.val = next;
+
+ return (error);
+}
+
static int __init read_pagedir(void)
{
- unsigned long addr;
- int i, n = swsusp_info.pagedir_pages;
+ int i = 0, n = swsusp_info.pagedir_pages;
int error = 0;

- addr = __get_free_pages(GFP_ATOMIC, pagedir_order);
- if (!addr)
+ error = alloc_pagedir(&pagedir_nosave, nr_copy_pages, NULL);
+ if (error < 0)
return -ENOMEM;
- pagedir_nosave = (struct pbe *)addr;

pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);

- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
- unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
- if (offset)
- error = bio_read_page(offset, (void *)addr);
- else
- error = -EFAULT;
- }
+ error = for_each_pgdir(pagedir_nosave, read_one_pagedir, (void*)n, &i);
+
+ BUG_ON(i != n);
+
if (error)
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+ for_each_pgdir(pagedir_nosave, free_one_pagedir, NULL, NULL);
+
return error;
}

@@ -1185,7 +1620,7 @@
if ((error = read_pagedir()))
return error;
if ((error = data_read()))
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+ for_each_pgdir(pagedir_nosave, free_one_pagedir, NULL, NULL);
return error;
}

@@ -1207,7 +1642,7 @@
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
- blkdev_put(resume_bdev);
+ /* blkdev_put(resume_bdev); */
} else
error = PTR_ERR(resume_bdev);

--- linux-2.6.9-ppc-g4-peval/kernel/sys.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/sys.c 2004-11-22 17:16:58.000000000 +0800
@@ -84,7 +84,7 @@
* and the like.
*/

-static struct notifier_block *reboot_notifier_list;
+struct notifier_block *reboot_notifier_list;
rwlock_t notifier_lock = RW_LOCK_UNLOCKED;

/**
--- linux-2.6.9-ppc-g4-peval/kernel/sysctl.c 2004-11-22 17:08:10.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/sysctl.c 2004-11-23 00:50:29.000000000 +0800
@@ -66,6 +66,10 @@
extern int printk_ratelimit_jiffies;
extern int printk_ratelimit_burst;

+#if defined(CONFIG_SOFTWARE_SUSPEND)
+extern int swsusp_pagecache;
+#endif
+
#if defined(CONFIG_X86_LOCAL_APIC) && defined(__i386__)
int unknown_nmi_panic;
extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
@@ -792,6 +796,18 @@
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
+#if defined(CONFIG_SOFTWARE_SUSPEND)
+ {
+ .ctl_name = VM_SWSUP_PAGECACHE,
+ .procname = "swsusp_pagecache",
+ .data = &swsusp_pagecache,
+ .maxlen = sizeof(swsusp_pagecache),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ },
+#endif
{
.ctl_name = VM_BLOCK_DUMP,
.procname = "block_dump",
--- linux-2.6.9-ppc-g4-peval/include/linux/sysctl.h 2004-11-22 17:08:10.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/linux/sysctl.h 2004-11-23 00:48:54.000000000 +0800
@@ -170,6 +170,7 @@
VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
VM_HARDMAPLIMIT=28, /* Make mapped a hard limit */
+ VM_SWSUP_PAGECACHE=29, /* Enable/Disable Suspend PageCaches */
};



--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-22 22:03:15

by Nigel Cunningham

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi.

You guys are slowly developing swsusp into suspend2 :>. Just in case
you're wondering, I haven't given up on merging; I've just been seeking
to get it as bug free as possible, do clean ups and documentation and so
on before getting stuck in to submitting it.

Regards,

Nigel

On Mon, 2004-11-22 at 22:54, Rafael J. Wysocki wrote:
> On Monday 22 of November 2004 12:02, Pavel Machek wrote:
> > Hi!
> >
> > > > Yes, I'd like to get rid of "too many continuous pages" problem
> > > > before. Small problem is that it needs to update x86-64 too, but I
> > > I have not x86-64, so I have no chance to do it.
> >
> > I have access to x86-64, so I can do it...
>
> Please, let me know when you have it done. I'd like to test ... ;-)
>
> RJW
--
Nigel Cunningham
Pastoral Worker
Christian Reformed Church of Tuggeranong
PO Box 1004, Tuggeranong, ACT 2901

You see, at just the right time, when we were still powerless, Christ
died for the ungodly. -- Romans 5:6

2004-11-23 21:57:18

by Pavel Machek

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi!

> You guys are slowly developing swsusp into suspend2 :>. Just in case
> you're wondering, I haven't given up on merging; I've just been seeking
> to get it as bug free as possible, do clean ups and documentation and so
> on before getting stuck in to submitting it.

:-) Well, see how he is producing relatively small patches ;-).

Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-23 22:03:51

by Nigel Cunningham

[permalink] [raw]
Subject: Re: swsusp bigdiff [was Re: [PATCH] Software Suspend split to two stage V2.]

Hi.

On Wed, 2004-11-24 at 08:54, Pavel Machek wrote:
> Hi!
>
> > You guys are slowly developing swsusp into suspend2 :>. Just in case
> > you're wondering, I haven't given up on merging; I've just been seeking
> > to get it as bug free as possible, do clean ups and documentation and so
> > on before getting stuck in to submitting it.
>
> :-) Well, see how he is producing relatively small patches ;-).

Mmm. But I don't want to try to patch swsusp into suspend2. I just want
to merge - there are too many big differences between swsusp and
suspend2 for that (it's been redesigned from the ground up).

I'm thinking that rather than trying to get everything nice and tidy and
perfect before I submit it, I should just put it up for review now,
acknowledging that I still need to do more work on the docs and so on,
and see how I go. Sound feasible?

Regards,

Nigel
--
Nigel Cunningham
Pastoral Worker
Christian Reformed Church of Tuggeranong
PO Box 1004, Tuggeranong, ACT 2901

You see, at just the right time, when we were still powerless, Christ
died for the ungodly. -- Romans 5:6

2004-11-23 22:18:33

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATH] swsusp update 1/3

Hi!

> > > > Yes, I'd like to get rid of "too many continuous pages" problem
> > > > before. Small problem is that it needs to update x86-64 too, but I
> > > I have not x86-64, so I have no chance to do it.
> >
> > I have access to x86-64, so I can do it...
> > Pavel
>
> Ok, Now I finised ppc part, it works. :)
>
> Here is all of the patch relative with your big diff.
> core.diff - swsusp core part.
> i386.diff - i386 part.
> ppc.diff - PowerPC part.
>
> Now we have a option in /proc/sys/kernel/swsusp_pagecache, if that is
> sure using swsusp pagecache, otherwise.

Hmm, okay, I guess temporary sysctl is okay. [I'd probably just put
there variable, and not export it to anyone. That way people will not
want us to retain that in future.]

> --- linux-2.6.9-ppc-g4-peval/include/linux/suspend.h 2004-11-22 17:11:35.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/include/linux/suspend.h 2004-11-22 17:16:58.000000000 +0800
> @@ -1,7 +1,7 @@
> #ifndef _LINUX_SWSUSP_H
> #define _LINUX_SWSUSP_H
>
> -#ifdef CONFIG_X86
> +#if (defined CONFIG_X86) || (defined CONFIG_PPC32)
~
extra space.


> @@ -48,14 +51,16 @@
> unsigned long flags;
> int error = 0;
>
> - local_irq_save(flags);
> switch(mode) {
> case PM_DISK_PLATFORM:
> - device_power_down(PMSG_SUSPEND);
> + /* device_power_down(PMSG_SUSPEND); */
> + local_irq_save(flags);
> error = pm_ops->enter(PM_SUSPEND_DISK);
> + local_irq_restore(flags);
> break;
> case PM_DISK_SHUTDOWN:
> printk("Powering off system\n");
> + notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
> device_shutdown();
> machine_power_off();
> break;

Either drop this one or explain why it is good idea. It seems to be
independend on the rest.

> @@ -144,9 +151,13 @@
> }
>
> /* Free memory before shutting down devices. */
> - free_some_memory();
> + /* free_some_memory(); */

Needs to be if (!swsusp_pagecache), right?

> --- linux-2.6.9-ppc-g4-peval/kernel/power/main.c 2004-11-22 17:11:35.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/main.c 2004-11-22 17:16:58.000000000 +0800
> @@ -4,7 +4,7 @@
> * Copyright (c) 2003 Patrick Mochel
> * Copyright (c) 2003 Open Source Development Lab
> *
> - * This file is release under the GPLv2
> + * This file is released under the GPLv2
> *
> */

Applied.

> @@ -223,7 +219,148 @@
> swap_list_unlock();
> }
>
> +#define ONE_PAGE_PBE_NUM ( PAGE_SIZE / sizeof(struct pbe) - 1)
> +
> +/* for each pagdir */
~ missing e

> +typedef int (*susp_pgdir_t)(suspend_pagedir_t *cur, void *fun, void *arg);
> +
> +static int inline for_each_pgdir(struct pbe *pbe, susp_pgdir_t fun,
> + void *subfun, void *arg)
> +{
> + suspend_pagedir_t *pgdir = pbe;
> + int error = 0;
> +
> + while (pgdir != NULL) {
> + suspend_pagedir_t *next = (suspend_pagedir_t *)pgdir->dummy.val;
> + pr_debug("next %p, cur %p\n", next, pgdir);
> + error = fun(pgdir, subfun, arg);
> + if (error) return error;
> + pgdir = next;
> + }
> +
> + return (0);
> +}

Perhaps this should be done as a macro to avoid casting fun forward
and back? See list_for_each for inspiration.

Also it would be nice to have this part of patch split out... I'd like
to merge it sooner than pagecache_write() and friends.

> +/*
> + * for_each_pbe_copy_back
> + *
> + * That usefuly for writing the code in assemble code.
> + *
> + */
> +/* #define CREATE_ASM_CODE */
> +#ifdef CREATE_ASM_CODE
> +asmlinkage void for_each_pbe_copy_back_i386(void)
> +{
> + swsusp_pbe_pgdir = pagedir_nosave;
> + while (swsusp_pbe_pgdir != NULL) {
> + swsusp_pbe_next = (suspend_pagedir_t *)swsusp_pbe_pgdir->dummy.val;
> + for (swsusp_pbe_nums = 0;
> + swsusp_pbe_nums < ONE_PAGE_PBE_NUM;
> + swsusp_pbe_nums++) {
> + register unsigned long i;
> + if (swsusp_pbe_pgdir->orig_address == 0) return;
> + for (i = 0; i < PAGE_SIZE / (sizeof(unsigned long)); i+=4) {
> + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i)) =
> + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i));
> + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+1)) =
> + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+1));
> + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+2)) =
> + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+2));
> + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+3)) =
> + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+3));

Do you really have to do manual loop unrolling? Why can't C code be
same for i386 and ppc?

> +static int mod_progress = 1;
> +
> +static void inline mod_printk_progress(int i)
> +{
> + if (mod_progress == 0) mod_progress = 1;
> + if (!(i%100))
> + printk( "\b\b\b\b%3d%%", i / mod_progress );
> }
>

Hmm, so you did cleanup to progress printing... Good, but it would be
nice to get it separately, too.

> @@ -730,7 +1205,7 @@
> struct sysinfo i;
>
> si_swapinfo(&i);
> - if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) {
> + if (i.freeswap < (nr_copy_pages + nr_copy_pcs + PAGES_FOR_IO)) {
> pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
> return 0;
> }
> @@ -750,25 +1225,24 @@
>
> if (!enough_swap())
> return -ENOSPC;
> -
> - if ((error = alloc_pagedir())) {
> - pr_debug("suspend: Allocating pagedir failed.\n");
> - return error;
> + error = alloc_pagedir(&pagedir_save, nr_copy_pages, NULL);
> + if (error < 0) {
> + printk("suspend: Allocating pagedir failed.\n");
> + return -ENOMEM;

Hmm, I liked previous code better. Plus you throw out error
information and just return -ENOMEM, always.

> if ((error = alloc_image_pages())) {
> - pr_debug("suspend: Allocating image pages failed.\n");
> + printk("suspend: Allocating image pages failed.\n");
> swsusp_free();
> return error;
> }

Applied.

> @@ -854,11 +1321,11 @@
>
> asmlinkage int swsusp_restore(void)
> {
> - BUG_ON (pagedir_order_check != pagedir_order);
> -
> /* Even mappings of "global" things (vmalloc) need to be fixed */
> +#if defined(CONFIG_X86) && defined(CONFIG_X86_64)
> __flush_tlb_global();
> wbinvd(); /* Nigel says wbinvd here is good idea... */
> +#endif

This is needed on i386, too... Okay, wbinvd probably can go... or do
we have some good arch-neutral wbinvd-like thing?
> @@ -993,7 +1367,7 @@
> return 0;
> }
>
> -static struct block_device * resume_bdev;
> +static struct block_device * resume_bdev __nosavedata;
>

Why?

> + return (0);

Please avoid "return (0);". Using "return 0;" will do just fine.

Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-23 22:25:31

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATH] swsusp update 2/3

Hi!

> > > > Yes, I'd like to get rid of "too many continuous pages" problem
> > > > before. Small problem is that it needs to update x86-64 too, but I
> > > I have not x86-64, so I have no chance to do it.

Looks okay to me...
Pavel

--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-23 22:33:34

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATH] swsusp update 3/3

Hi!

Can you start pushing this through maintainers?

LINUX FOR POWERPC
P: Paul Mackerras
M: [email protected]
W: http://www.penguinppc.org/
L: [email protected]
S: Supported

LINUX FOR POWER MACINTOSH
P: Benjamin Herrenschmidt
M: [email protected]
W: http://www.penguinppc.org/
L: [email protected]
S: Maintained

> --- linux-2.6.9-ppc-g4-peval/arch/ppc/Kconfig 2004-10-20 15:58:39.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/Kconfig 2004-11-22 17:16:58.000000000 +0800
> @@ -983,6 +983,8 @@
>
> source "drivers/zorro/Kconfig"
>
> +source kernel/power/Kconfig
> +
> endmenu
>
> menu "Bus options"
> --- linux-2.6.9-ppc-g4-peval/arch/ppc/kernel/Makefile 2004-10-20 15:58:40.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/Makefile 2004-11-22 17:16:58.000000000 +0800
> @@ -16,6 +16,7 @@
> semaphore.o syscalls.o setup.o \
> cputable.o ppc_htab.o
> obj-$(CONFIG_6xx) += l2cr.o cpu_setup_6xx.o
> +obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
> obj-$(CONFIG_POWER4) += cpu_setup_power4.o
> obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o
> obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-mapping.o

Ok. Or perhaps you want Kconfig part to go in last...

> --- /dev/null 2004-06-07 18:45:47.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/swsusp.S 1904-01-01 08:47:25.000000000 +0706
...
> + mfibatu r4,3
> + stw r4,SL_IBAT3(r11)
> + mfibatl r4,3
> + stw r4,SL_IBAT3+4(r11)
> +
> +#if 0
> + /* Backup various CPU config stuffs */
> + bl __save_cpu_setup
> +#endif
> + /* Call the low level suspend stuff (we should probably have made
> + * a stackframe...
> + */
> + bl swsusp_save

I can't really check ppc assembly, but you probably want to kill that
#if 0s.

> --- linux-2.6.9-ppc-g4-peval/arch/ppc/kernel/signal.c 2004-10-20 15:58:41.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/signal.c 2004-11-22 17:16:58.000000000 +0800
> @@ -28,6 +28,7 @@
> #include <linux/elf.h>
> #include <linux/tty.h>
> #include <linux/binfmts.h>
> +#include <linux/suspend.h>
> #include <asm/ucontext.h>
> #include <asm/uaccess.h>
> #include <asm/pgtable.h>
> @@ -604,6 +605,14 @@
> unsigned long frame, newsp;
> int signr, ret;
>
> + if (current->flags & PF_FREEZE) {
> + refrigerator(PF_FREEZE);
> + signr = 0;
> + ret = regs->gpr[3];
> + if (!signal_pending(current))
> + goto no_signal;
> + }
> +
> if (!oldset)
> oldset = &current->blocked;
>
> @@ -626,6 +635,7 @@
> regs->gpr[3] = EINTR;
> /* note that the cr0.SO bit is already set */
> } else {
> +no_signal:
> regs->nip -= 4; /* Back up & retry system call */
> regs->result = 0;
> regs->trap = 0;

Ok.

> --- linux-2.6.9-ppc-g4-peval/arch/ppc/kernel/vmlinux.lds.S 2004-10-20 15:58:41.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/vmlinux.lds.S 2004-11-22 17:16:58.000000000 +0800
> @@ -74,6 +74,12 @@
> CONSTRUCTORS
> }
>
> + . = ALIGN(4096);
> + __nosave_begin = .;
> + .data_nosave : { *(.data.nosave) }
> + . = ALIGN(4096);
> + __nosave_end = .;
> +
> . = ALIGN(32);
> .data.cacheline_aligned : { *(.data.cacheline_aligned) }
>

Ok.

> --- linux-2.6.9-ppc-g4-peval/arch/ppc/syslib/open_pic.c 2004-10-20 15:58:42.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/syslib/open_pic.c 2004-11-22 17:16:58.000000000 +0800
> @@ -776,7 +776,8 @@
> if (ISR[irq] == 0)
> return;
> if (!cpus_empty(keepmask)) {
> - cpumask_t irqdest = { .bits[0] = openpic_read(&ISR[irq]->Destination) };
> + cpumask_t irqdest;
> + irqdest.bits[0] = openpic_read(&ISR[irq]->Destination);
> cpus_and(irqdest, irqdest, keepmask);
> cpus_or(physmask, physmask, irqdest);
> }

Why this?

> --- linux-2.6.9-ppc-g4-peval/drivers/ide/ppc/pmac.c 2004-10-20 15:59:12.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/drivers/ide/ppc/pmac.c 2004-11-22 17:16:58.000000000 +0800
> @@ -32,6 +32,7 @@
> #include <linux/notifier.h>
> #include <linux/reboot.h>
> #include <linux/pci.h>
> +#include <linux/pm.h>
> #include <linux/adb.h>
> #include <linux/pmu.h>
>
> @@ -1364,7 +1365,7 @@
> ide_hwif_t *hwif = (ide_hwif_t *)dev_get_drvdata(&mdev->ofdev.dev);
> int rc = 0;
>
> - if (state != mdev->ofdev.dev.power_state && state >= 2) {
> + if (state != mdev->ofdev.dev.power_state && state == PM_SUSPEND_MEM) {
> rc = pmac_ide_do_suspend(hwif);
> if (rc == 0)
> mdev->ofdev.dev.power_state = state;
> @@ -1472,7 +1473,7 @@
> ide_hwif_t *hwif = (ide_hwif_t *)pci_get_drvdata(pdev);
> int rc = 0;
>
> - if (state != pdev->dev.power_state && state >= 2) {
> + if (state != pdev->dev.power_state && state == PM_SUSPEND_MEM ) {
> rc = pmac_ide_do_suspend(hwif);
> if (rc == 0)
> pdev->dev.power_state = state;

Don't do this just yet. Big changes in this area are pending.

> --- linux-2.6.9-ppc-g4-peval/drivers/macintosh/mediabay.c 2004-10-20 15:53:32.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/mediabay.c 2004-11-22 17:16:58.000000000 +0800
> @@ -713,7 +713,7 @@
> {
> struct media_bay_info *bay = macio_get_drvdata(mdev);
>
> - if (state != mdev->ofdev.dev.power_state && state >= 2) {
> + if (state != mdev->ofdev.dev.power_state && state == PM_SUSPEND_MEM) {
> down(&bay->lock);
> bay->sleeping = 1;
> set_mb_power(bay, 0);

Wait with this one, too.

> --- linux-2.6.9-ppc-g4-peval/drivers/macintosh/therm_adt746x.c 2004-10-20 15:59:24.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/therm_adt746x.c 2004-11-22 17:16:58.000000000 +0800
> @@ -22,6 +22,7 @@
> #include <linux/spinlock.h>
> #include <linux/smp_lock.h>
> #include <linux/wait.h>
> +#include <linux/suspend.h>
> #include <asm/prom.h>
> #include <asm/machdep.h>
> #include <asm/io.h>
> @@ -238,6 +239,11 @@
> #endif
> while(!kthread_should_stop())
> {
> + if (current->flags & PF_FREEZE) {
> + printk(KERN_INFO "therm_adt746x: freezing thermostat\n");
> + refrigerator(PF_FREEZE);
> + }
> +
> msleep_interruptible(2000);
>
> /* Check status */

You probably want to avoid that printk. (And similar for
therm_pm72). Otherwise good.

> --- linux-2.6.9-ppc-g4-peval/drivers/macintosh/via-pmu.c 2004-10-20 15:59:24.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/via-pmu.c 2004-11-22 17:16:58.000000000 +0800
> @@ -43,6 +43,7 @@
> #include <linux/init.h>
> #include <linux/interrupt.h>
> #include <linux/device.h>
> +#include <linux/sysdev.h>
> #include <linux/suspend.h>
> #include <linux/syscalls.h>
> #include <asm/prom.h>
> @@ -2326,7 +2327,7 @@
> /* Sync the disks. */
> /* XXX It would be nice to have some way to ensure that
> * nobody is dirtying any new buffers while we wait. That
> - * could be acheived using the refrigerator for processes
> + * could be achieved using the refrigerator for processes
> * that swsusp uses
> */
> sys_sync();
> @@ -2379,7 +2380,6 @@
>
> /* Wait for completion of async backlight requests */
> while (!bright_req_1.complete || !bright_req_2.complete ||
> -
> !batt_req.complete)
> pmu_poll();
>

Ok.

> --- linux-2.6.9-ppc-g4-peval/drivers/video/aty/radeon_pm.c 2004-10-20 15:55:34.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/drivers/video/aty/radeon_pm.c 2004-11-22 17:16:58.000000000 +0800
> @@ -859,6 +859,10 @@
> * know we'll be rebooted, ...
> */
>
> +#if 0 /* this breaks suspend to ram until the dust settles... */
> + if (state != PM_SUSPEND_MEM)
> +#endif
> + return 0;
> printk(KERN_DEBUG "radeonfb: suspending to state: %d...\n", state);
>
> acquire_console_sem();

Wait with this one. (And notice that this is not ppc-specific and
could do some damage...)
Pavel

--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-24 08:02:24

by Hu Gang

[permalink] [raw]
Subject: [PATH] 11-24 swsusp update 2/3

--i386.diff--

--- linux-2.6.9-ppc-g4-peval/arch/i386/power/swsusp.S 2004-10-20 15:58:34.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/i386/power/swsusp.S 2004-11-24 14:08:31.000000000 +0800
@@ -31,24 +31,33 @@
movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
movl %ecx,%cr3

- movl pagedir_nosave, %ebx
- xorl %eax, %eax
- xorl %edx, %edx
- .p2align 4,,7
-
-copy_loop:
- movl 4(%ebx,%edx),%edi
- movl (%ebx,%edx),%esi
-
- movl $1024, %ecx
- rep
- movsl
-
- incl %eax
- addl $16, %edx
- cmpl nr_copy_pages,%eax
- jb copy_loop
- .p2align 4,,7
+ movl pagedir_nosave, %eax
+ test %eax, %eax
+ je copy_loop_end
+ movl $1024, %edx
+
+copy_loop_start:
+ movl 0xc(%eax), %ebp
+ xorl %ebx, %ebx
+ leal 0x0(%esi),%esi
+
+copy_one_pgdir:
+ movl 0x4(%eax),%edi
+ test %edi, %edi
+ je copy_loop_end
+
+ movl (%eax), %esi
+ movl %edx, %ecx
+ repz movsl %ds:(%esi),%es:(%edi)
+
+ incl %ebx
+ addl $0x10, %eax
+ cmpl $0xff, %ebx
+ jbe copy_one_pgdir
+ test %ebp, %ebp
+ movl %ebp, %eax
+ jne copy_loop_start
+copy_loop_end:

movl saved_context_esp, %esp
movl saved_context_ebp, %ebp

2004-11-24 08:11:11

by Hu Gang

[permalink] [raw]
Subject: [PATH] 11-24 swsusp update 1/3

On Tue, Nov 23, 2004 at 11:14:30PM +0100, Pavel Machek wrote:
> > @@ -48,14 +51,16 @@
> > unsigned long flags;
> > int error = 0;
> >
> > - local_irq_save(flags);
> > switch(mode) {
> > case PM_DISK_PLATFORM:
> > - device_power_down(PMSG_SUSPEND);
> > + /* device_power_down(PMSG_SUSPEND); */
> > + local_irq_save(flags);
> > error = pm_ops->enter(PM_SUSPEND_DISK);
> > + local_irq_restore(flags);
> > break;
> > case PM_DISK_SHUTDOWN:
> > printk("Powering off system\n");
> > + notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
> > device_shutdown();
> > machine_power_off();
> > break;
>
> Either drop this one or explain why it is good idea. It seems to be
> independend on the rest.
This code I just copy from old ppc swsusp port, I don't why, :).

>
> > @@ -144,9 +151,13 @@
> > }
> >
> > /* Free memory before shutting down devices. */
> > - free_some_memory();
> > + /* free_some_memory(); */
>
> Needs to be if (!swsusp_pagecache), right?
I think we can drop this one, In write_page_caches has same code, and do
the best.

> + if (swsusp_pbe_pgdir->orig_address == 0) return;
> > + for (i = 0; i < PAGE_SIZE / (sizeof(unsigned long)); i+=4) {
> > + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i)) =
> > + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i));
> > + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+1)) =
> > + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+1));
> > + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+2)) =
> > + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+2));
> > + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+3)) =
> > + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+3));
>
> Do you really have to do manual loop unrolling? Why can't C code be
> same for i386 and ppc?
here is stupid code, update in my new patch, I using memcopy in i386, it
create small assemble code.

> >
> > if (!enough_swap())
> > return -ENOSPC;
> > -
> > - if ((error = alloc_pagedir())) {
> > - pr_debug("suspend: Allocating pagedir failed.\n");
> > - return error;
> > + error = alloc_pagedir(&pagedir_save, nr_copy_pages, NULL);
> > + if (error < 0) {
> > + printk("suspend: Allocating pagedir failed.\n");
> > + return -ENOMEM;
>
> Hmm, I liked previous code better. Plus you throw out error
> information and just return -ENOMEM, always.
Ok, It backed.

>
> > @@ -854,11 +1321,11 @@
> >
> > asmlinkage int swsusp_restore(void)
> > {
> > - BUG_ON (pagedir_order_check != pagedir_order);
> > -
> > /* Even mappings of "global" things (vmalloc) need to be fixed */
> > +#if defined(CONFIG_X86) && defined(CONFIG_X86_64)
> > __flush_tlb_global();
> > wbinvd(); /* Nigel says wbinvd here is good idea... */
> > +#endif
>
> This is needed on i386, too... Okay, wbinvd probably can go... or do
> we have some good arch-neutral wbinvd-like thing?
> > @@ -993,7 +1367,7 @@
> > return 0;
> > }
> >
> > -static struct block_device * resume_bdev;
> > +static struct block_device * resume_bdev __nosavedata;
> >
I'll re think.

>
> Why?
>
> > + return (0);
>
> Please avoid "return (0);". Using "return 0;" will do just fine.
fixed.

here is my patch relative with your big diff, hope can merge.

- correct relocating pages, that's every important, now new swsusp seem
stable for me.
- corrent calc_num.
- adding some comments, sorry for my stupid english.
- improvment i386 copy back code.

---core.diff--
--- linux-2.6.9-ppc-g4-peval/include/linux/reboot.h 2004-06-16 13:20:26.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/linux/reboot.h 2004-11-22 17:16:58.000000000 +0800
@@ -42,6 +42,8 @@
extern int register_reboot_notifier(struct notifier_block *);
extern int unregister_reboot_notifier(struct notifier_block *);

+/* For use by swsusp only */
+extern struct notifier_block *reboot_notifier_list;

/*
* Architecture-specific implementations of sys_reboot commands.
--- linux-2.6.9-ppc-g4-peval/include/linux/suspend.h 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/linux/suspend.h 2004-11-24 15:48:05.000000000 +0800
@@ -1,7 +1,7 @@
#ifndef _LINUX_SWSUSP_H
#define _LINUX_SWSUSP_H

-#ifdef CONFIG_X86
+#if (definedCONFIG_X86) || (defined CONFIG_PPC32)
#include <asm/suspend.h>
#endif
#include <linux/swap.h>
--- linux-2.6.9-ppc-g4-peval/kernel/power/disk.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/disk.c 2004-11-24 14:33:26.000000000 +0800
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/fs.h>
+#include <linux/reboot.h>
#include <linux/device.h>
#include "power.h"

@@ -29,6 +30,8 @@
extern int swsusp_resume(void);
extern int swsusp_free(void);

+extern int write_page_caches(void);
+extern int read_page_caches(void);

static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -48,14 +51,16 @@
unsigned long flags;
int error = 0;

- local_irq_save(flags);
switch(mode) {
case PM_DISK_PLATFORM:
- device_power_down(PMSG_SUSPEND);
+ /* device_power_down(PMSG_SUSPEND); */
+ local_irq_save(flags);
error = pm_ops->enter(PM_SUSPEND_DISK);
+ local_irq_restore(flags);
break;
case PM_DISK_SHUTDOWN:
printk("Powering off system\n");
+ notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
device_shutdown();
machine_power_off();
break;
@@ -106,6 +111,7 @@
}
}

+
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -117,6 +123,7 @@
static void finish(void)
{
device_resume();
+ read_page_caches();
platform_finish();
enable_nonboot_cpus();
thaw_processes();
@@ -124,7 +131,7 @@
}


-static int prepare(void)
+static int prepare(int resume)
{
int error;

@@ -144,9 +151,13 @@
}

/* Free memory before shutting down devices. */
- free_some_memory();
+ /* free_some_memory(); */

disable_nonboot_cpus();
+ if (!resume)
+ if ((error = write_page_caches())) {
+ goto Finish;
+ }
if ((error = device_suspend(PMSG_FREEZE))) {
printk("Some devices failed to suspend\n");
goto Finish;
@@ -176,7 +187,7 @@
{
int error;

- if ((error = prepare()))
+ if ((error = prepare(0)))
return error;

pr_debug("PM: Attempting to suspend to disk.\n");
@@ -233,7 +244,7 @@

pr_debug("PM: Preparing system for restore.\n");

- if ((error = prepare()))
+ if ((error = prepare(1)))
goto Free;

barrier();
--- linux-2.6.9-ppc-g4-peval/kernel/power/main.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/main.c 2004-11-22 17:16:58.000000000 +0800
@@ -4,7 +4,7 @@
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Open Source Development Lab
*
- * This file is release under the GPLv2
+ * This file is released under the GPLv2
*
*/

--- linux-2.6.9-ppc-g4-peval/kernel/power/swsusp.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/swsusp.c 2004-11-24 15:49:51.000000000 +0800
@@ -74,9 +74,6 @@
/* References to section boundaries */
extern char __nosave_begin, __nosave_end;

-/* Variables to be preserved over suspend */
-static int pagedir_order_check;
-
extern char resume_file[];
static dev_t resume_device;
/* Local variables that should not be affected by save */
@@ -97,7 +94,6 @@
*/
suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
static suspend_pagedir_t *pagedir_save;
-static int pagedir_order __nosavedata = 0;

#define SWSUSP_SIG "S1SUSPEND"

@@ -223,7 +219,146 @@
swap_list_unlock();
}

+#define ONE_PAGE_PBE_NUM (PAGE_SIZE/sizeof(struct pbe))
+
+/* for each pagdir */
+typedef int (*susp_pgdir_t)(suspend_pagedir_t *cur, void *fun, void *arg);
+
+static int inline for_each_pgdir(struct pbe *pbe, susp_pgdir_t fun,
+ void *subfun, void *arg)
+{
+ suspend_pagedir_t *pgdir = pbe;
+ int error = 0;
+
+ while (pgdir != NULL) {
+ suspend_pagedir_t *next = (suspend_pagedir_t *)pgdir->dummy.val;
+ /* pr_debug("for_each_pgdir: cur %p next %p\n", pgdir, next); */
+ error = fun(pgdir, subfun, arg);
+ if (error) return error;
+ pgdir = next;
+ }
+
+ return error;
+}

+/* free one pagedir */
+static int free_one_pagedir(suspend_pagedir_t *pgdir, void *fun, void *arg)
+{
+ free_page((unsigned long)pgdir);
+ return 0;
+}
+
+/*
+ * swsup_pbe_t
+ *
+ * a callback funtion in foreach pbe loop.
+ *
+ * @param pbe pointer of current pbe
+ * @param p private data
+ * @param cur current index
+ *
+ * @return 0 is ok, otherwise
+ */
+
+typedef int (*swsup_pbe_t)(struct pbe *bpe, void *p, int cur);
+
+/*
+ * for_each_pbe
+ *
+ * @param pbe pointer of the pbe head
+ * @param fun callback function
+ * @param p private data
+ * @param max max the the pbe numbers
+ *
+ * @return 0 is ok, otherwise
+ */
+static int for_each_pbe(struct pbe *pbe, swsup_pbe_t fun, void *p, int max)
+{
+ struct pbe *pgdir = pbe, *next = NULL;
+ unsigned long i = 0;
+ int error = 0;
+
+ while (pgdir != NULL) {
+ unsigned long nums;
+ next = (struct pbe*)pgdir->dummy.val;
+ for (nums = 0; nums < ONE_PAGE_PBE_NUM; nums++, pgdir++, i ++) {
+ if (i == max) { /* end */
+ return 0;
+ }
+ if((error = fun(pgdir, p, i))) { /* got error */
+ return error;
+ }
+ }
+ pgdir = next;
+ }
+ return (error);
+}
+/* for_each_pbe_copy_back
+ *
+ * That usefuly for writing the code in assemble code.
+ *
+ */
+/*#define CREATE_ASM_CODE */
+#ifdef CREATE_ASM_CODE
+#if 0
+#define GET_ADDRESS(x) __pa(x)
+#else
+#define GET_ADDRESS(x) (x)
+#endif
+asmlinkage void for_each_pbe_copy_back(void)
+{
+ struct pbe *pgdir, *next;
+
+ pgdir = pagedir_nosave;
+ while (pgdir != NULL) {
+ unsigned long nums, i;
+ pgdir = (struct pbe *)GET_ADDRESS(pgdir);
+ next = (struct pbe*)pgdir->dummy.val;
+ for (nums = 0; nums < ONE_PAGE_PBE_NUM; nums++) {
+ register unsigned long *orig, *copy;
+ orig = (unsigned long *)pgdir->orig_address;
+ if (orig == 0) goto end;
+ orig = (unsigned long *)GET_ADDRESS(orig);
+ copy = (unsigned long *)GET_ADDRESS(pgdir->address);
+#if 0
+ memcpy(orig, copy, PAGE_SIZE);
+#else
+ for (i = 0; i < PAGE_SIZE / sizeof(unsigned long); i+=4) {
+ *(orig + i) = *(copy + i);
+ *(orig + i+1) = *(copy + i+1);
+ *(orig + i+2) = *(copy + i+2);
+ *(orig + i+3) = *(copy + i+3);
+ }
+#endif
+ pgdir ++;
+ }
+ pgdir = next;
+ }
+end:
+ panic("just asm code");
+}
+#endif
+
+static int find_bpe_index(struct pbe *p, void *tmp, int cur)
+{
+ if (*(int *)tmp == cur) {
+ *(struct pbe **)tmp = p;
+ return (1);
+ }
+ return 0;
+}
+
+static struct pbe *find_pbe_by_index(struct pbe *pgdir, int index, int max)
+{
+ unsigned long p = index;
+
+ /* pr_debug("find_pbe_by_index: %p, %d, %d ", pgdir, index, max); */
+ if (for_each_pbe(pgdir, find_bpe_index, &p, max) == 1) {
+ /* pr_debug("%p\n", (void*)p); */
+ return ((struct pbe *)p);
+ }
+ return (NULL);
+}

/**
* write_swap_page - Write one page to a fresh swap location.
@@ -257,6 +392,17 @@
return error;
}

+static int data_free_pbe(struct pbe *p, void *tmp, int cur)
+{
+ swp_entry_t entry;
+
+ entry = p->swap_address;
+ if (entry.val)
+ swap_free(entry);
+ p->swap_address = (swp_entry_t){0};
+
+ return 0;
+}

/**
* data_free - Free the swap entries used by the saved image.
@@ -267,43 +413,50 @@

static void data_free(void)
{
- swp_entry_t entry;
- int i;
+ for_each_pbe(pagedir_nosave, data_free_pbe, NULL, nr_copy_pages);
+}

- for (i = 0; i < nr_copy_pages; i++) {
- entry = (pagedir_nosave + i)->swap_address;
- if (entry.val)
- swap_free(entry);
- else
- break;
- (pagedir_nosave + i)->swap_address = (swp_entry_t){0};
- }
+static int mod_progress = 1;
+
+static void inline mod_printk_progress(int i)
+{
+ if (mod_progress == 0) mod_progress = 1;
+ if (!(i%100))
+ printk( "\b\b\b\b%3d%%", i / mod_progress );
}

+static int write_one_pbe(struct pbe *p, void *tmp, int cur)
+{
+ int error = 0;
+
+ BUG_ON(p->address == 0);
+ BUG_ON(p->orig_address == 0);
+ if ((error = write_page(p->address, &p->swap_address))) {
+ return error;
+ }
+ mod_printk_progress(cur);
+ pr_debug("write_one_pbe: %p, o{%p} c{%p} %lu %d\n", p,
+ (void *)p->orig_address, (void *)p->address,
+ p->swap_address.val, cur);
+
+ return 0;
+}

/**
* data_write - Write saved image to swap.
*
* Walk the list of pages in the image and sync each one to swap.
*/
-
static int data_write(void)
{
- int error = 0;
- int i;
- unsigned int mod = nr_copy_pages / 100;
-
- if (!mod)
- mod = 1;
+ int error;
+
+ mod_progress = nr_copy_pages / 100;

- printk( "Writing data to swap (%d pages)... ", nr_copy_pages );
- for (i = 0; i < nr_copy_pages && !error; i++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
- error = write_page((pagedir_nosave+i)->address,
- &((pagedir_nosave+i)->swap_address));
- }
+ printk( "Writing data to swap (%d pages)... ", nr_copy_pages);
+ error = for_each_pbe(pagedir_nosave, write_one_pbe, NULL, nr_copy_pages);
printk("\b\b\b\bdone\n");
+
return error;
}

@@ -363,6 +516,18 @@
swap_free(swsusp_info.pagedir[i]);
}

+static int write_one_pagedir(suspend_pagedir_t *pgdir, void *fun, void *arg)
+{
+ int i = *(int *)arg;
+ int error;
+
+ if ((error = write_page((unsigned long)pgdir, &swsusp_info.pagedir[i]))) {
+ return (error);
+ }
+ (*(int *)arg) ++;
+
+ return 0;
+}

/**
* write_pagedir - Write the array of pages holding the page directory.
@@ -371,15 +536,12 @@

static int write_pagedir(void)
{
- unsigned long addr = (unsigned long)pagedir_nosave;
- int error = 0;
- int n = SUSPEND_PD_PAGES(nr_copy_pages);
- int i;
+ int error = 0, n = 0;

- swsusp_info.pagedir_pages = n;
+ error = for_each_pgdir(pagedir_nosave, write_one_pagedir, NULL, &n);
printk( "Writing pagedir (%d pages)\n", n);
- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE)
- error = write_page(addr, &swsusp_info.pagedir[i]);
+ swsusp_info.pagedir_pages = n;
+
return error;
}

@@ -504,6 +666,417 @@
return 0;
}

+typedef int (*do_page_t)(struct page *page, int p);
+
+static int foreach_zone_page(struct zone *zone, do_page_t fun, int p)
+{
+ int inactive = 0, active = 0;
+
+ spin_lock_irq(&zone->lru_lock);
+ if (zone->nr_inactive) {
+ struct list_head * entry = zone->inactive_list.prev;
+ while (entry != &zone->inactive_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ inactive += fun(page, p);
+ } else {
+ inactive ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ if (zone->nr_active) {
+ struct list_head * entry = zone->active_list.prev;
+ while (entry != &zone->active_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ active += fun(page, p);
+ } else {
+ active ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ spin_unlock_irq(&zone->lru_lock);
+
+ return (active + inactive);
+}
+
+/* enable/disable pagecache suspend */
+int swsusp_pagecache = 0;
+
+/* I'll move this to include/linux/page-flags.h */
+#define PG_pcs (PG_nosave_free + 1)
+
+#define SetPagePcs(page) set_bit(PG_pcs, &(page)->flags)
+#define ClearPagePcs(page) clear_bit(PG_pcs, &(page)->flags)
+#define PagePcs(page) test_bit(PG_pcs, &(page)->flags)
+
+static suspend_pagedir_t *pagedir_cache = NULL;
+static int nr_copy_pcs = 0;
+
+static void lock_pagecaches(void)
+{
+ struct zone *zone;
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_lock_irq(&zone->lru_lock);
+ }
+ }
+}
+
+static void unlock_pagecaches(void)
+{
+ struct zone *zone;
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_unlock_irq(&zone->lru_lock);
+ }
+ }
+}
+
+static int setup_pcs_pe(struct page *page, int setup)
+{
+ unsigned long pfn = page_to_pfn(page);
+
+ BUG_ON(PageReserved(page) && PageNosave(page));
+ if (!pfn_valid(pfn)) {
+ printk("not valid page\n");
+ return 0;
+ }
+ if (PageNosave(page)) {
+ printk("nosave\n");
+ return 0;
+ }
+ if (PageReserved(page) /*&& pfn_is_nosave(pfn)*/) {
+ printk("[nosave]\n");
+ return 0;
+ }
+ if (PageSlab(page)) {
+ printk("slab\n");
+ return 0;
+ }
+ if (setup) {
+ struct pbe *p = find_pbe_by_index(pagedir_cache, nr_copy_pcs, -1);
+ BUG_ON(p == NULL);
+ p->address = (long)page_address(page);
+ BUG_ON(p->address == 0);
+ /*pr_debug("setup_pcs: cur %p, o{%p}, d{%p}, nr %u\n",
+ (void*)p, (void*)p->orig_address,
+ (void*)p->address, nr_copy_pcs);*/
+ nr_copy_pcs ++;
+ }
+ SetPagePcs(page);
+
+ return (1);
+}
+
+static int count_pcs(struct zone *zone, int p)
+{
+ if (swsusp_pagecache)
+ return foreach_zone_page(zone, setup_pcs_pe, p);
+ return 0;
+}
+
+/*
+ * check the address in pbe list
+ */
+static int check_pbe_addr(struct pbe *p, void *addr, int cur)
+{
+ unsigned long addre = (unsigned long)addr + PAGE_SIZE;
+ BUG_ON(p->orig_address == 0);
+ if (p->orig_address >= (unsigned long)addr && p->orig_address < addre)
+ return 1;
+ return 0;
+}
+
+static int check_collide(struct pbe *old, int max, void *addr)
+{
+ return (for_each_pbe(old, check_pbe_addr, addr, max));
+}
+
+/*
+ * redefine in PageCahe pagdir.
+ *
+ * struct pbe {
+ * unsigned long address;
+ * unsigned long orig_address; pointer of next struct pbe
+ * swp_entry_t swap_address;
+ * swp_entry_t dummy; current index
+ * }
+ *
+ */
+static suspend_pagedir_t * alloc_one_pagedir(suspend_pagedir_t *prev,
+ suspend_pagedir_t *collide)
+{
+ suspend_pagedir_t *pgdir = NULL;
+ int i;
+
+ pgdir = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!pgdir) {
+ return NULL;
+ }
+
+ if (collide) {
+ while (check_collide((struct pbe *)collide, nr_copy_pages, pgdir)) {
+ /* free_page((unsigned long)pgdir); */
+ pgdir = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!pgdir) {
+ return NULL;
+ }
+ }
+ }
+
+ /*pr_debug("pgdir: %p, %p, %d\n",
+ pgdir, prev, sizeof(suspend_pagedir_t)); */
+ for (i = 0; i < ONE_PAGE_PBE_NUM; i++) {
+ pgdir[i].dummy.val = 0;
+ pgdir[i].address = 0;
+ pgdir[i].orig_address = 0;
+ if (prev)
+ prev[i].dummy.val= (unsigned long)pgdir;
+ }
+
+ return (pgdir);
+}
+
+/* calc_nums - Determine the nums of allocation needed for pagedir_save. */
+static int calc_nums(int nr_copy)
+{
+ int diff = 0, ret = 0;
+ do {
+ diff = (nr_copy / ONE_PAGE_PBE_NUM) - ret + 1;
+ if (diff) {
+ ret += diff;
+ nr_copy += diff;
+ }
+ } while (diff);
+ return nr_copy;
+}
+
+/*
+ * alloc_pagedir
+ *
+ * @param pbe
+ * @param pbe_nums
+ * @param collide
+ * @param page_nums
+ *
+ */
+static int alloc_pagedir(struct pbe **pbe, int pbe_nums,
+ struct pbe *collide, int page_nums)
+{
+ unsigned int nums = 0;
+ unsigned int after_alloc = pbe_nums;
+ suspend_pagedir_t *prev = NULL, *cur = NULL;
+
+ if (page_nums)
+ after_alloc = ONE_PAGE_PBE_NUM * page_nums;
+ else
+ after_alloc = calc_nums(after_alloc);
+
+ pr_debug("alloc_pagedir: %d, %d\n", pbe_nums, after_alloc);
+ for (nums = 0 ; nums < after_alloc ; nums += ONE_PAGE_PBE_NUM) {
+ cur = alloc_one_pagedir(prev, collide);
+ pr_debug("alloc_one_pagedir: %p\n", cur);
+ if (!cur) { /* get page failed */
+ goto no_mem;
+ }
+ if (nums == 0) { /* setup the head */
+ *pbe = cur;
+ }
+ prev = cur;
+ }
+ return after_alloc - pbe_nums;
+
+no_mem:
+ for_each_pgdir(*pbe, free_one_pagedir, NULL, NULL);
+ *pbe = NULL;
+
+ return (-ENOMEM);
+}
+
+static int bio_read_page(pgoff_t page_off, void * page);
+
+static int pagecache_read_pbe(struct pbe *p, void *tmp, int cur)
+{
+ int error = 0;
+ swp_entry_t entry;
+
+ mod_printk_progress(cur);
+
+ pr_debug("pagecache_read_pbe: %p, o{%p} c{%p} %lu\n",
+ p, (void *)p->orig_address, (void *)p->address,
+ swp_offset(p->swap_address));
+
+ error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+ if (error) return error;
+
+ entry = p->swap_address;
+ if (entry.val)
+ swap_free(entry);
+
+ return 0;
+}
+
+int read_page_caches(void)
+{
+ int error = 0;
+
+ if (swsusp_pagecache == 0) return 0;
+
+ mod_progress = nr_copy_pcs / 100;
+
+ printk( "Reading PageCaches from swap (%d pages)... ", nr_copy_pcs);
+ error = for_each_pbe(pagedir_cache, pagecache_read_pbe, NULL,
+ nr_copy_pcs);
+ printk("\b\b\b\bdone\n");
+
+ unlock_pagecaches();
+ for_each_pgdir(pagedir_cache, free_one_pagedir, NULL, NULL);
+
+ return error;
+}
+
+static int pagecache_write_pbe(struct pbe *p, void *tmp, int cur)
+{
+ int error = 0;
+
+ mod_printk_progress(cur);
+
+ pr_debug("pagecache_write_pbe: %p, o{%p} c{%p} %d ",
+ p, (void *)p->orig_address, (void *)p->address, cur);
+ BUG_ON(p->address == 0);
+ error = write_page(p->address, &p->swap_address);
+ if (error) return error;
+
+ pr_debug("%lu\n", swp_offset(p->swap_address));
+
+ return 0;
+}
+
+static int page_caches_write(void)
+{
+ int error;
+
+ mod_progress = nr_copy_pcs / 100;
+
+ lock_pagecaches();
+ printk( "Writing PageCaches to swap (%d pages)... ", nr_copy_pcs);
+ error = for_each_pbe(pagedir_cache, pagecache_write_pbe, NULL,
+ nr_copy_pcs);
+ printk("\b\b\b\bdone\n");
+
+ return error;
+}
+
+static int setup_pagedir_pbe(void)
+{
+ struct zone *zone;
+
+ nr_copy_pcs = 0;
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ count_pcs(zone, 1);
+ }
+ }
+
+ return 0;
+}
+
+static void count_data_pages(void);
+static int swsusp_alloc(void);
+
+static void page_caches_recal(void)
+{
+ struct zone *zone;
+ int i;
+
+ for (i = 0; i < max_mapnr; i++)
+ ClearPagePcs(mem_map+i);
+
+ nr_copy_pcs = 0;
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ nr_copy_pcs += count_pcs(zone, 0);
+ }
+ }
+}
+
+int write_page_caches(void)
+{
+ int error;
+ int recal = 0;
+
+ if ((error = swsusp_swap_check())) {
+ /* FIXME free pagedir_cache */
+ return error;
+ }
+
+ if (swsusp_pagecache) {
+ page_caches_recal();
+
+ if (nr_copy_pcs == 0) {
+ return 0;
+ }
+ printk("swsusp: Need to copy %u pcs\n", nr_copy_pcs);
+ if (alloc_pagedir(&pagedir_cache, nr_copy_pcs, NULL, 0) < 0) {
+ return -ENOMEM;
+ }
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ while (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
+ if (recal == 0) {
+ printk("swsusp: try shrink memory ");
+ }
+ shrink_all_memory(nr_copy_pages + PAGES_FOR_IO + recal);
+ recal += PAGES_FOR_IO;
+ }
+
+ if (recal) {
+ printk("done\n");
+ page_caches_recal();
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(2/2): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ error = swsusp_alloc();
+ if (error) {
+ printk("swsusp_alloc failed, %d\n", error);
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp(final): Need to copy %u pages, %u pcs\n",
+ nr_copy_pages, nr_copy_pcs);
+
+ if (swsusp_pagecache) {
+ setup_pagedir_pbe();
+ pr_debug("after setup_pagedir_pbe \n");
+
+ error = page_caches_write();
+ if (error)
+ return error;
+ }
+
+ return 0;
+}

static int pfn_is_nosave(unsigned long pfn)
{
@@ -539,7 +1112,10 @@
}
if (PageNosaveFree(page))
return 0;
-
+ if (PagePcs(page) && swsusp_pagecache) {
+ BUG_ON(zone->nr_inactive == 0 && zone->nr_active == 0);
+ return 0;
+ }
return 1;
}

@@ -549,10 +1125,12 @@
unsigned long zone_pfn;

nr_copy_pages = 0;
+ nr_copy_pcs = 0;

for_each_zone(zone) {
if (is_highmem(zone))
continue;
+ nr_copy_pcs += count_pcs(zone, 0);
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
@@ -564,7 +1142,6 @@
{
struct zone *zone;
unsigned long zone_pfn;
- struct pbe * pbe = pagedir_nosave;
int pages_copied = 0;

for_each_zone(zone) {
@@ -574,11 +1151,14 @@
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
if (saveable(zone, &zone_pfn)) {
struct page * page;
+ struct pbe * pbe = find_pbe_by_index(pagedir_nosave,
+ pages_copied, nr_copy_pages);
+ BUG_ON(pbe == NULL);
page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
pbe->orig_address = (long) page_address(page);
+ BUG_ON(pbe->orig_address == 0);
/* copy_page is not usable for copying task structs. */
memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
- pbe++;
pages_copied++;
}
}
@@ -587,105 +1167,38 @@
nr_copy_pages = pages_copied;
}

-
-/**
- * calc_order - Determine the order of allocation needed for pagedir_save.
- *
- * This looks tricky, but is just subtle. Please fix it some time.
- * Since there are %nr_copy_pages worth of pages in the snapshot, we need
- * to allocate enough contiguous space to hold
- * (%nr_copy_pages * sizeof(struct pbe)),
- * which has the saved/orig locations of the page..
- *
- * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
- * structures, then we call get_bitmask_order(), which will tell us the
- * last bit set in the number, starting with 1. (If we need 30 pages, that
- * is 0x0000001e in hex. The last bit is the 5th, which is the order we
- * would use to allocate 32 contiguous pages).
- *
- * Since we also need to save those pages, we add the number of pages that
- * we need to nr_copy_pages, and in case of an overflow, do the
- * calculation again to update the number of pages needed.
- *
- * With this model, we will tend to waste a lot of memory if we just cross
- * an order boundary. Plus, the higher the order of allocation that we try
- * to do, the more likely we are to fail in a low-memory situtation
- * (though we're unlikely to get this far in such a case, since swsusp
- * requires half of memory to be free anyway).
- */
-
-
-static void calc_order(void)
+static int free_one_snapshot_pbe(struct pbe *p, void *tmp, int cur)
{
- int diff = 0;
- int order = 0;
-
- do {
- diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
- if (diff) {
- order += diff;
- nr_copy_pages += 1 << diff;
- }
- } while(diff);
- pagedir_order = order;
-}
-
-
-/**
- * alloc_pagedir - Allocate the page directory.
- *
- * First, determine exactly how many contiguous pages we need and
- * allocate them.
- */
-
-static int alloc_pagedir(void)
-{
- calc_order();
- pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
- pagedir_order);
- if (!pagedir_save)
- return -ENOMEM;
- memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
- pagedir_nosave = pagedir_save;
+ ClearPageNosave(virt_to_page(p->address));
+ free_page(p->address);
+ p->address = 0;
return 0;
}

/**
* free_image_pages - Free pages allocated for snapshot
*/
-
static void free_image_pages(void)
{
- struct pbe * p;
- int i;
-
- p = pagedir_save;
- for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
- if (p->address) {
- ClearPageNosave(virt_to_page(p->address));
- free_page(p->address);
- p->address = 0;
- }
- }
+ for_each_pbe(pagedir_save, free_one_snapshot_pbe, NULL, nr_copy_pages);
}

+static int alloc_one_snapshot_pbe(struct pbe *p, void *tmp, int cur)
+{
+ p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ if (!p->address)
+ return -ENOMEM;
+ SetPageNosave(virt_to_page(p->address));
+ return 0;
+}
/**
* alloc_image_pages - Allocate pages for the snapshot.
*
*/
-
static int alloc_image_pages(void)
{
- struct pbe * p;
- int i;
-
- for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
- p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
- if (!p->address)
- return -ENOMEM;
- SetPageNosave(virt_to_page(p->address));
- }
- return 0;
+ return for_each_pbe(pagedir_save, alloc_one_snapshot_pbe, NULL,
+ nr_copy_pages);
}

void swsusp_free(void)
@@ -693,7 +1206,7 @@
BUG_ON(PageNosave(virt_to_page(pagedir_save)));
BUG_ON(PageNosaveFree(virt_to_page(pagedir_save)));
free_image_pages();
- free_pages((unsigned long) pagedir_save, pagedir_order);
+ for_each_pgdir(pagedir_save, free_one_pagedir, NULL, NULL);
}


@@ -730,7 +1243,7 @@
struct sysinfo i;

si_swapinfo(&i);
- if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) {
+ if (i.freeswap < (nr_copy_pages + nr_copy_pcs + PAGES_FOR_IO)) {
pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
return 0;
}
@@ -750,25 +1263,26 @@

if (!enough_swap())
return -ENOSPC;
-
- if ((error = alloc_pagedir())) {
- pr_debug("suspend: Allocating pagedir failed.\n");
- return error;
+ error = alloc_pagedir(&pagedir_save, nr_copy_pages, NULL, 0);
+ if (error < 0) {
+ printk("suspend: Allocating pagedir failed.\n");
+ return -ENOMEM;
}
+ pr_debug("alloc_pagedir: addon %d\n", error);
+ nr_copy_pages += error;
if ((error = alloc_image_pages())) {
- pr_debug("suspend: Allocating image pages failed.\n");
+ printk("suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}
+ pagedir_nosave = pagedir_save;

- pagedir_order_check = pagedir_order;
return 0;
}

int suspend_prepare_image(void)
{
unsigned int nr_needed_pages;
- int error;

pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
@@ -777,15 +1291,8 @@
return -ENOMEM;
}

- drain_local_pages();
- count_data_pages();
- printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;

- error = swsusp_alloc();
- if (error)
- return error;
-
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
@@ -827,10 +1334,10 @@

asmlinkage int swsusp_save(void)
{
- int error = 0;
+/* int error = 0;

if ((error = swsusp_swap_check()))
- return error;
+ return error; */
return suspend_prepare_image();
}

@@ -854,11 +1361,11 @@

asmlinkage int swsusp_restore(void)
{
- BUG_ON (pagedir_order_check != pagedir_order);
-
/* Even mappings of "global" things (vmalloc) need to be fixed */
+#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
__flush_tlb_global();
wbinvd(); /* Nigel says wbinvd here is good idea... */
+#endif
return 0;
}

@@ -881,99 +1388,6 @@
return error;
}

-
-
-/* More restore stuff */
-
-#define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0)
-
-/*
- * Returns true if given address/order collides with any orig_address
- */
-static int __init does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
- int order)
-{
- int i;
- unsigned long addre = addr + (PAGE_SIZE<<order);
-
- for (i=0; i < nr_copy_pages; i++)
- if ((pagedir+i)->orig_address >= addr &&
- (pagedir+i)->orig_address < addre)
- return 1;
-
- return 0;
-}
-
-/*
- * We check here that pagedir & pages it points to won't collide with pages
- * where we're going to restore from the loaded pages later
- */
-static int __init check_pagedir(void)
-{
- int i;
-
- for(i=0; i < nr_copy_pages; i++) {
- unsigned long addr;
-
- do {
- addr = get_zeroed_page(GFP_ATOMIC);
- if(!addr)
- return -ENOMEM;
- } while (does_collide(addr));
-
- (pagedir_nosave+i)->address = addr;
- }
- return 0;
-}
-
-static int __init swsusp_pagedir_relocate(void)
-{
- /*
- * We have to avoid recursion (not to overflow kernel stack),
- * and that's why code looks pretty cryptic
- */
- suspend_pagedir_t *old_pagedir = pagedir_nosave;
- void **eaten_memory = NULL;
- void **c = eaten_memory, *m, *f;
- int ret = 0;
-
- printk("Relocating pagedir ");
-
- if (!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
- printk("not necessary\n");
- return check_pagedir();
- }
-
- while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
- if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
- break;
- eaten_memory = m;
- printk( "." );
- *eaten_memory = c;
- c = eaten_memory;
- }
-
- if (!m) {
- printk("out of memory\n");
- ret = -ENOMEM;
- } else {
- pagedir_nosave =
- memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
- }
-
- c = eaten_memory;
- while (c) {
- printk(":");
- f = c;
- c = *c;
- free_pages((unsigned long)f, pagedir_order);
- }
- if (ret)
- return ret;
- printk("|\n");
- return check_pagedir();
-}
-
/**
* Using bio to read from swap.
* This code requires a bit more work than just using buffer heads
@@ -993,7 +1407,7 @@
return 0;
}

-static struct block_device * resume_bdev;
+static struct block_device * resume_bdev __nosavedata;

/**
* submit - submit BIO request.
@@ -1038,12 +1452,12 @@
return error;
}

-int bio_read_page(pgoff_t page_off, void * page)
+static int bio_read_page(pgoff_t page_off, void * page)
{
return submit(READ, page_off, page);
}

-int bio_write_page(pgoff_t page_off, void * page)
+static int bio_write_page(pgoff_t page_off, void * page)
{
return submit(WRITE, page_off, page);
}
@@ -1088,7 +1502,6 @@
return -EPERM;
}
nr_copy_pages = swsusp_info.image_pages;
- pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
return error;
}

@@ -1115,62 +1528,141 @@
return error;
}

+static int __init check_one_pbe(struct pbe *p, void *collide, int cur)
+{
+ unsigned long addr = 0;
+
+ pr_debug("check_one_pbe: %p %p o{%p} ",
+ p, (void*)addr, (void*)p->orig_address);
+ do {
+ if (addr) {
+ /*free_page(addr);*/
+ addr = 0;
+ }
+ addr = get_zeroed_page(GFP_ATOMIC);
+ if(!addr)
+ return -ENOMEM;
+ } while(check_collide((struct pbe *)collide, nr_copy_pages, (void*)addr));
+ pr_debug("c{%p} done\n", (void*)addr);
+ p->address = addr;
+
+ return 0;
+}
+
+/*
+ * We check here that pagedir & pages it points to won't collide with pages
+ * where we're going to restore from the loaded pages later
+ */
+static int __init check_pagedir(struct pbe *pbe)
+{
+ return for_each_pbe(pbe, check_one_pbe, pagedir_nosave, nr_copy_pages);
+}
+
+static int __init read_one_pbe(struct pbe *p, void *tmp, int cur)
+{
+ int error = 0;
+
+ mod_printk_progress(cur);
+
+ pr_debug("read_one_pbe: %p o{%p} c{%p} %lu, %d\n",
+ p, (void *)p->orig_address, (void *)p->address,
+ swp_offset(p->swap_address), cur);
+ error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+
+ return error;
+}
+
/**
* swsusp_read_data - Read image pages from swap.
*
- * You do not need to check for overlaps, check_pagedir()
- * already did that.
*/
+static void __init swsusp_copy_pagedir(suspend_pagedir_t *d_pgdir,
+ suspend_pagedir_t *s_pgdir)
+{
+ int i = 0;
+
+ while (s_pgdir != NULL) {
+ suspend_pagedir_t *s_next = (suspend_pagedir_t *)s_pgdir->dummy.val;
+ suspend_pagedir_t *d_next = (suspend_pagedir_t *)d_pgdir->dummy.val;
+ for (i = 0; i < ONE_PAGE_PBE_NUM; i++) {
+ d_pgdir->address = s_pgdir->address;
+ d_pgdir->orig_address = s_pgdir->orig_address;
+ d_pgdir->swap_address = s_pgdir->swap_address;
+ s_pgdir ++; d_pgdir ++;
+ }
+ d_pgdir = d_next;
+ s_pgdir = s_next;
+ };
+}

static int __init data_read(void)
{
- struct pbe * p;
int error;
- int i;
- int mod = nr_copy_pages / 100;
+ suspend_pagedir_t * addr = NULL;

- if (!mod)
- mod = 1;
-
- if ((error = swsusp_pagedir_relocate()))
+ printk("Relocating pagedir ");
+ error = alloc_pagedir(&addr, nr_copy_pages, pagedir_nosave,
+ swsusp_info.pagedir_pages);
+ if (error < 0) {
return error;
+ }
+ swsusp_copy_pagedir(addr, pagedir_nosave);
+ if (check_pagedir(addr)) {
+ return -ENOMEM;
+ }
+ for_each_pgdir(pagedir_nosave, free_one_pagedir, NULL, NULL);
+ printk("done\n");
+
+ pagedir_nosave = addr;
+
+ mod_progress = nr_copy_pages / 100;

printk( "Reading image data (%d pages): ", nr_copy_pages );
- for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
- error = bio_read_page(swp_offset(p->swap_address),
- (void *)p->address);
- }
- printk(" %d done.\n",i);
- return error;
+ error = for_each_pbe(pagedir_nosave, read_one_pbe, NULL, nr_copy_pages);
+ printk(" %d done.\n", nr_copy_pages);

+ return error;
}

extern dev_t __init name_to_dev_t(const char *line);

+static int __init read_one_pagedir(suspend_pagedir_t *pgdir,
+ void *fun, void *arg)
+{
+ int i = *(int *)arg;
+ unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
+ unsigned long next;
+ int error = 0;
+
+ next = pgdir->dummy.val;
+ pr_debug("read_one_pagedir: %p, %d, %lu, %p\n",
+ pgdir, i, offset, (void*)next);
+ if ((error = bio_read_page(offset, (void *)pgdir))) {
+ return error;
+ }
+ (*(int *)arg) ++;
+ pgdir->dummy.val = next;
+
+ return error;
+}
+
static int __init read_pagedir(void)
{
- unsigned long addr;
- int i, n = swsusp_info.pagedir_pages;
+ int i = 0, n = swsusp_info.pagedir_pages;
int error = 0;

- addr = __get_free_pages(GFP_ATOMIC, pagedir_order);
- if (!addr)
+ error = alloc_pagedir(&pagedir_nosave, nr_copy_pages, NULL, n);
+ if (error < 0)
return -ENOMEM;
- pagedir_nosave = (struct pbe *)addr;

- pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);
+ printk("pmdisk: Reading pagedir (%d Pages)\n",n);
+
+ error = for_each_pgdir(pagedir_nosave, read_one_pagedir, NULL, &i);
+ BUG_ON(i != n);

- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
- unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
- if (offset)
- error = bio_read_page(offset, (void *)addr);
- else
- error = -EFAULT;
- }
if (error)
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+ for_each_pgdir(pagedir_nosave, free_one_pagedir, NULL, NULL);
+
return error;
}

@@ -1185,7 +1677,7 @@
if ((error = read_pagedir()))
return error;
if ((error = data_read()))
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+ for_each_pgdir(pagedir_nosave, free_one_pagedir, NULL, NULL);
return error;
}

@@ -1207,7 +1699,7 @@
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
- blkdev_put(resume_bdev);
+ /* blkdev_put(resume_bdev); */
} else
error = PTR_ERR(resume_bdev);

--- linux-2.6.9-ppc-g4-peval/kernel/sys.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/sys.c 2004-11-22 17:16:58.000000000 +0800
@@ -84,7 +84,7 @@
* and the like.
*/

-static struct notifier_block *reboot_notifier_list;
+struct notifier_block *reboot_notifier_list;
rwlock_t notifier_lock = RW_LOCK_UNLOCKED;

/**
--- linux-2.6.9-ppc-g4-peval/kernel/sysctl.c 2004-11-22 17:08:10.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/sysctl.c 2004-11-24 14:12:57.000000000 +0800
@@ -66,6 +66,10 @@
extern int printk_ratelimit_jiffies;
extern int printk_ratelimit_burst;

+#if defined(CONFIG_SOFTWARE_SUSPEND)
+extern int swsusp_pagecache;
+#endif
+
#if defined(CONFIG_X86_LOCAL_APIC) && defined(__i386__)
int unknown_nmi_panic;
extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
@@ -792,6 +796,18 @@
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
+#if defined(CONFIG_SOFTWARE_SUSPEND)
+ {
+ .ctl_name = VM_SWSUSP_PAGECACHE,
+ .procname = "swsusp_pagecache",
+ .data = &swsusp_pagecache,
+ .maxlen = sizeof(swsusp_pagecache),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ },
+#endif
{
.ctl_name = VM_BLOCK_DUMP,
.procname = "block_dump",
--- linux-2.6.9-ppc-g4-peval/include/linux/sysctl.h 2004-11-22 17:08:10.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/linux/sysctl.h 2004-11-24 14:13:08.000000000 +0800
@@ -170,6 +170,7 @@
VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
VM_HARDMAPLIMIT=28, /* Make mapped a hard limit */
+ VM_SWSUSP_PAGECACHE=29, /* Enable/Disable Suspend PageCaches */
};


--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-24 08:13:54

by Hu Gang

[permalink] [raw]
Subject: [PATH] 11-24 swsusp update 3/3

--ppc.diff--

--- linux-2.6.9-ppc-g4-peval/arch/ppc/Kconfig 2004-10-20 15:58:39.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/Kconfig 2004-11-22 17:16:58.000000000 +0800
@@ -983,6 +983,8 @@

source "drivers/zorro/Kconfig"

+source kernel/power/Kconfig
+
endmenu

menu "Bus options"
--- linux-2.6.9-ppc-g4-peval/arch/ppc/kernel/Makefile 2004-10-20 15:58:40.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/Makefile 2004-11-22 17:16:58.000000000 +0800
@@ -16,6 +16,7 @@
semaphore.o syscalls.o setup.o \
cputable.o ppc_htab.o
obj-$(CONFIG_6xx) += l2cr.o cpu_setup_6xx.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
obj-$(CONFIG_POWER4) += cpu_setup_power4.o
obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-mapping.o
--- /dev/null 2004-06-07 18:45:47.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/swsusp.S 2004-11-24 15:36:21.000000000 +0800
@@ -0,0 +1,366 @@
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/offsets.h>
+
+
+/*
+ * Structure for storing CPU registers on the save area.
+ */
+#define SL_SP 0
+#define SL_PC 4
+#define SL_MSR 8
+#define SL_SDR1 0xc
+#define SL_SPRG0 0x10 /* 4 sprg's */
+#define SL_DBAT0 0x20
+#define SL_IBAT0 0x28
+#define SL_DBAT1 0x30
+#define SL_IBAT1 0x38
+#define SL_DBAT2 0x40
+#define SL_IBAT2 0x48
+#define SL_DBAT3 0x50
+#define SL_IBAT3 0x58
+#define SL_TB 0x60
+#define SL_R2 0x68
+#define SL_CR 0x6c
+#define SL_LR 0x70
+#define SL_R12 0x74 /* r12 to r31 */
+#define SL_SIZE (SL_R12 + 80)
+
+ .section .data
+ .align 5
+
+_GLOBAL(swsusp_save_area)
+ .space SL_SIZE
+
+
+ .section .text
+ .align 5
+
+_GLOBAL(swsusp_arch_suspend)
+
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+
+ mflr r0
+ stw r0,SL_LR(r11)
+ mfcr r0
+ stw r0,SL_CR(r11)
+ stw r1,SL_SP(r11)
+ stw r2,SL_R2(r11)
+ stmw r12,SL_R12(r11)
+
+ /* Save MSR & SDR1 */
+ mfmsr r4
+ stw r4,SL_MSR(r11)
+ mfsdr1 r4
+ stw r4,SL_SDR1(r11)
+
+ /* Get a stable timebase and save it */
+1: mftbu r4
+ stw r4,SL_TB(r11)
+ mftb r5
+ stw r5,SL_TB+4(r11)
+ mftbu r3
+ cmpw r3,r4
+ bne 1b
+
+ /* Save SPRGs */
+ mfsprg r4,0
+ stw r4,SL_SPRG0(r11)
+ mfsprg r4,1
+ stw r4,SL_SPRG0+4(r11)
+ mfsprg r4,2
+ stw r4,SL_SPRG0+8(r11)
+ mfsprg r4,3
+ stw r4,SL_SPRG0+12(r11)
+
+ /* Save BATs */
+ mfdbatu r4,0
+ stw r4,SL_DBAT0(r11)
+ mfdbatl r4,0
+ stw r4,SL_DBAT0+4(r11)
+ mfdbatu r4,1
+ stw r4,SL_DBAT1(r11)
+ mfdbatl r4,1
+ stw r4,SL_DBAT1+4(r11)
+ mfdbatu r4,2
+ stw r4,SL_DBAT2(r11)
+ mfdbatl r4,2
+ stw r4,SL_DBAT2+4(r11)
+ mfdbatu r4,3
+ stw r4,SL_DBAT3(r11)
+ mfdbatl r4,3
+ stw r4,SL_DBAT3+4(r11)
+ mfibatu r4,0
+ stw r4,SL_IBAT0(r11)
+ mfibatl r4,0
+ stw r4,SL_IBAT0+4(r11)
+ mfibatu r4,1
+ stw r4,SL_IBAT1(r11)
+ mfibatl r4,1
+ stw r4,SL_IBAT1+4(r11)
+ mfibatu r4,2
+ stw r4,SL_IBAT2(r11)
+ mfibatl r4,2
+ stw r4,SL_IBAT2+4(r11)
+ mfibatu r4,3
+ stw r4,SL_IBAT3(r11)
+ mfibatl r4,3
+ stw r4,SL_IBAT3+4(r11)
+
+#if 0
+ /* Backup various CPU config stuffs */
+ bl __save_cpu_setup
+#endif
+ /* Call the low level suspend stuff (we should probably have made
+ * a stackframe...
+ */
+ bl swsusp_save
+
+ /* Restore LR from the save area */
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+ lwz r0,SL_LR(r11)
+ mtlr r0
+
+ blr
+
+
+/* Resume code */
+_GLOBAL(swsusp_arch_resume)
+
+ /* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+ DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ sync
+
+ /* Disable MSR:DR to make sure we don't take a TLB or
+ * hash miss during the copy, as our hash table will
+ * for a while be unuseable. For .text, we assume we are
+ * covered by a BAT. This works only for non-G5 at this
+ * point. G5 will need a better approach, possibly using
+ * a small temporary hash table filled with large mappings,
+ * disabling the MMU completely isn't a good option for
+ * performance reasons.
+ * (Note that 750's may have the same performance issue as
+ * the G5 in this case, we should investigate using moving
+ * BATs for these CPUs)
+ */
+ mfmsr r0
+ sync
+ rlwinm r0,r0,0,28,26 /* clear MSR_DR */
+ mtmsr r0
+ sync
+ isync
+
+ /* Load ptr the list of pages to copy in r11 */
+ lis r9,pagedir_nosave@ha
+ addi r9,r9,pagedir_nosave@l
+ tophys(r9,r9)
+ lwz r9, 0(r9)
+#if 0
+ twi 31,r0,0 /* triger trap */
+#endif
+ cmpwi r9, 0
+ beq copy_loop_end
+copy_loop:
+ tophys(r9,r9)
+ lwz r6, 12(r9)
+ li r10, 0
+copy_one_pgdir:
+ lwz r11, 4(r9)
+ addi r8,r10,1
+ cmpwi r11, 0
+ addi r7,r9,16
+ beq copy_loop_end
+ li r0, 256
+ mtctr r0
+ lwz r9,0(r9)
+#if 0
+ twi 31,r0,0 /* triger trap */
+#endif
+ tophys(r10,r11)
+ tophys(r11,r9)
+copy_one_page:
+ lwz r0, 0(r11)
+ stw r0, 0(r10)
+ lwz r9, 4(r11)
+ stw r9, 4(r10)
+ lwz r0, 8(r11)
+ stw r0, 8(r10)
+ lwz r9, 12(r11)
+ addi r11,r11,16
+ stw r9, 12(r10)
+ addi r10,r10,16
+ bdnz copy_one_page
+ mr r10, r8
+ cmplwi r10, 255
+ mr r9, r7
+ ble copy_one_pgdir
+ mr r9, r6
+ bne copy_loop
+copy_loop_end:
+
+ /* Do a very simple cache flush/inval of the L1 to ensure
+ * coherency of the icache
+ */
+ lis r3,0x0002
+ mtctr r3
+ li r3, 0
+1:
+ lwz r0,0(r3)
+ addi r3,r3,0x0020
+ bdnz 1b
+ isync
+ sync
+
+ /* Now flush those cache lines */
+ lis r3,0x0002
+ mtctr r3
+ li r3, 0
+1:
+ dcbf 0,r3
+ addi r3,r3,0x0020
+ bdnz 1b
+ sync
+
+ /* Ok, we are now running with the kernel data of the old
+ * kernel fully restored. We can get to the save area
+ * easily now. As for the rest of the code, it assumes the
+ * loader kernel and the booted one are exactly identical
+ */
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+ tophys(r11,r11)
+
+#if 0
+ /* Restore various CPU config stuffs */
+ bl __restore_cpu_setup
+#endif
+ /* Restore the BATs, and SDR1. Then we can turn on the MMU.
+ * This is a bit hairy as we are running out of those BATs,
+ * but first, our code is probably in the icache, and we are
+ * writing the same value to the BAT, so that should be fine,
+ * though a better solution will have to be found long-term
+ */
+ lwz r4,SL_SDR1(r11)
+ mtsdr1 r4
+ lwz r4,SL_SPRG0(r11)
+ mtsprg 0,r4
+ lwz r4,SL_SPRG0+4(r11)
+ mtsprg 1,r4
+ lwz r4,SL_SPRG0+8(r11)
+ mtsprg 2,r4
+ lwz r4,SL_SPRG0+12(r11)
+ mtsprg 3,r4
+
+#if 0
+ lwz r4,SL_DBAT0(r11)
+ mtdbatu 0,r4
+ lwz r4,SL_DBAT0+4(r11)
+ mtdbatl 0,r4
+ lwz r4,SL_DBAT1(r11)
+ mtdbatu 1,r4
+ lwz r4,SL_DBAT1+4(r11)
+ mtdbatl 1,r4
+ lwz r4,SL_DBAT2(r11)
+ mtdbatu 2,r4
+ lwz r4,SL_DBAT2+4(r11)
+ mtdbatl 2,r4
+ lwz r4,SL_DBAT3(r11)
+ mtdbatu 3,r4
+ lwz r4,SL_DBAT3+4(r11)
+ mtdbatl 3,r4
+ lwz r4,SL_IBAT0(r11)
+ mtibatu 0,r4
+ lwz r4,SL_IBAT0+4(r11)
+ mtibatl 0,r4
+ lwz r4,SL_IBAT1(r11)
+ mtibatu 1,r4
+ lwz r4,SL_IBAT1+4(r11)
+ mtibatl 1,r4
+ lwz r4,SL_IBAT2(r11)
+ mtibatu 2,r4
+ lwz r4,SL_IBAT2+4(r11)
+ mtibatl 2,r4
+ lwz r4,SL_IBAT3(r11)
+ mtibatu 3,r4
+ lwz r4,SL_IBAT3+4(r11)
+ mtibatl 3,r4
+#endif
+
+BEGIN_FTR_SECTION
+ li r4,0
+ mtspr SPRN_DBAT4U,r4
+ mtspr SPRN_DBAT4L,r4
+ mtspr SPRN_DBAT5U,r4
+ mtspr SPRN_DBAT5L,r4
+ mtspr SPRN_DBAT6U,r4
+ mtspr SPRN_DBAT6L,r4
+ mtspr SPRN_DBAT7U,r4
+ mtspr SPRN_DBAT7L,r4
+ mtspr SPRN_IBAT4U,r4
+ mtspr SPRN_IBAT4L,r4
+ mtspr SPRN_IBAT5U,r4
+ mtspr SPRN_IBAT5L,r4
+ mtspr SPRN_IBAT6U,r4
+ mtspr SPRN_IBAT6L,r4
+ mtspr SPRN_IBAT7U,r4
+ mtspr SPRN_IBAT7L,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS)
+
+ /* Flush all TLBs */
+ lis r4,0x1000
+1: addic. r4,r4,-0x1000
+ tlbie r4
+ blt 1b
+ sync
+
+ /* restore the MSR and turn on the MMU */
+ lwz r3,SL_MSR(r11)
+ bl turn_on_mmu
+ tovirt(r11,r11)
+
+ /* Restore TB */
+ li r3,0
+ mttbl r3
+ lwz r3,SL_TB(r11)
+ lwz r4,SL_TB+4(r11)
+ mttbu r3
+ mttbl r4
+
+ /* Kick decrementer */
+ li r0,1
+ mtdec r0
+
+ /* Restore the callee-saved registers and return */
+ lwz r0,SL_CR(r11)
+ mtcr r0
+ lwz r2,SL_R2(r11)
+ lmw r12,SL_R12(r11)
+ lwz r1,SL_SP(r11)
+ lwz r0,SL_LR(r11)
+ mtlr r0
+
+ // XXX Note: we don't really need to call swsusp_resume
+
+ li r3,0
+ blr
+
+/* FIXME:This construct is actually not useful since we don't shut
+ * down the instruction MMU, we could just flip back MSR-DR on.
+ */
+turn_on_mmu:
+ mflr r4
+ mtsrr0 r4
+ mtsrr1 r3
+ sync
+ isync
+ rfi
+
--- /dev/null 2004-06-07 18:45:47.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/asm-ppc/suspend.h 2004-11-22 17:40:42.000000000 +0800
@@ -0,0 +1,12 @@
+static inline int arch_prepare_suspend(void)
+{
+ return 0;
+}
+
+static inline void save_processor_state(void)
+{
+}
+
+static inline void restore_processor_state(void)
+{
+}
--- linux-2.6.9-ppc-g4-peval/arch/ppc/kernel/signal.c 2004-10-20 15:58:41.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/signal.c 2004-11-22 17:16:58.000000000 +0800
@@ -28,6 +28,7 @@
#include <linux/elf.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
+#include <linux/suspend.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -604,6 +605,14 @@
unsigned long frame, newsp;
int signr, ret;

+ if (current->flags & PF_FREEZE) {
+ refrigerator(PF_FREEZE);
+ signr = 0;
+ ret = regs->gpr[3];
+ if (!signal_pending(current))
+ goto no_signal;
+ }
+
if (!oldset)
oldset = &current->blocked;

@@ -626,6 +635,7 @@
regs->gpr[3] = EINTR;
/* note that the cr0.SO bit is already set */
} else {
+no_signal:
regs->nip -= 4; /* Back up & retry system call */
regs->result = 0;
regs->trap = 0;
--- linux-2.6.9-ppc-g4-peval/arch/ppc/kernel/vmlinux.lds.S 2004-10-20 15:58:41.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/kernel/vmlinux.lds.S 2004-11-22 17:16:58.000000000 +0800
@@ -74,6 +74,12 @@
CONSTRUCTORS
}

+ . = ALIGN(4096);
+ __nosave_begin = .;
+ .data_nosave : { *(.data.nosave) }
+ . = ALIGN(4096);
+ __nosave_end = .;
+
. = ALIGN(32);
.data.cacheline_aligned : { *(.data.cacheline_aligned) }

--- linux-2.6.9-ppc-g4-peval/arch/ppc/platforms/pmac_setup.c 2004-10-20 15:58:41.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/platforms/pmac_setup.c 2004-11-22 17:36:22.000000000 +0800
@@ -51,6 +51,7 @@
#include <linux/irq.h>
#include <linux/seq_file.h>
#include <linux/root_dev.h>
+#include <linux/suspend.h>

#include <asm/reg.h>
#include <asm/sections.h>
@@ -70,6 +71,8 @@
#include <asm/pmac_feature.h>
#include <asm/time.h>
#include <asm/of_device.h>
+#include <asm/mmu_context.h>
+
#include "pmac_pic.h"
#include "mem_pieces.h"

@@ -420,11 +423,67 @@
#endif
}

+/* TODO: Merge the suspend-to-ram with the common code !!!
+ * currently, this is a stub implementation for suspend-to-disk
+ * only
+ */
+
+#ifdef CONFIG_PM
+
+extern void enable_kernel_altivec(void);
+
+static int pmac_pm_prepare(suspend_state_t state)
+{
+ printk(KERN_DEBUG "pmac_pm_prepare(%d)\n", state);
+
+ return 0;
+}
+
+static int pmac_pm_enter(suspend_state_t state)
+{
+ printk(KERN_DEBUG "pmac_pm_enter(%d)\n", state);
+
+ /* Giveup the lazy FPU & vec so we don't have to back them
+ * up from the low level code
+ */
+ enable_kernel_fp();
+
+#ifdef CONFIG_ALTIVEC
+ if (cur_cpu_spec[0]->cpu_features & CPU_FTR_ALTIVEC)
+ enable_kernel_altivec();
+#endif /* CONFIG_ALTIVEC */
+
+ return 0;
+}
+
+static int pmac_pm_finish(suspend_state_t state)
+{
+ printk(KERN_DEBUG "pmac_pm_finish(%d)\n", state);
+
+ /* Restore userland MMU context */
+ set_context(current->active_mm->context, current->active_mm->pgd);
+
+ return 0;
+}
+
+static struct pm_ops pmac_pm_ops = {
+ .pm_disk_mode = PM_DISK_SHUTDOWN,
+ .prepare = pmac_pm_prepare,
+ .enter = pmac_pm_enter,
+ .finish = pmac_pm_finish,
+};
+
+#endif /* CONFIG_PM */
+
static int initializing = 1;

static int pmac_late_init(void)
{
initializing = 0;
+
+#ifdef CONFIG_PM
+ pm_set_ops(&pmac_pm_ops);
+#endif /* CONFIG_PM */
return 0;
}

--- linux-2.6.9-ppc-g4-peval/arch/ppc/syslib/open_pic.c 2004-10-20 15:58:42.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/arch/ppc/syslib/open_pic.c 2004-11-22 17:16:58.000000000 +0800
@@ -776,7 +776,8 @@
if (ISR[irq] == 0)
return;
if (!cpus_empty(keepmask)) {
- cpumask_t irqdest = { .bits[0] = openpic_read(&ISR[irq]->Destination) };
+ cpumask_t irqdest;
+ irqdest.bits[0] = openpic_read(&ISR[irq]->Destination);
cpus_and(irqdest, irqdest, keepmask);
cpus_or(physmask, physmask, irqdest);
}
--- linux-2.6.9-ppc-g4-peval/drivers/ide/ppc/pmac.c 2004-10-20 15:59:12.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/ide/ppc/pmac.c 2004-11-22 17:16:58.000000000 +0800
@@ -32,6 +32,7 @@
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/pci.h>
+#include <linux/pm.h>
#include <linux/adb.h>
#include <linux/pmu.h>

@@ -1364,7 +1365,7 @@
ide_hwif_t *hwif = (ide_hwif_t *)dev_get_drvdata(&mdev->ofdev.dev);
int rc = 0;

- if (state != mdev->ofdev.dev.power_state && state >= 2) {
+ if (state != mdev->ofdev.dev.power_state && state == PM_SUSPEND_MEM) {
rc = pmac_ide_do_suspend(hwif);
if (rc == 0)
mdev->ofdev.dev.power_state = state;
@@ -1472,7 +1473,7 @@
ide_hwif_t *hwif = (ide_hwif_t *)pci_get_drvdata(pdev);
int rc = 0;

- if (state != pdev->dev.power_state && state >= 2) {
+ if (state != pdev->dev.power_state && state == PM_SUSPEND_MEM ) {
rc = pmac_ide_do_suspend(hwif);
if (rc == 0)
pdev->dev.power_state = state;
--- linux-2.6.9-ppc-g4-peval/drivers/macintosh/Kconfig 2004-10-20 15:53:31.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/Kconfig 2004-11-22 17:16:58.000000000 +0800
@@ -80,7 +80,7 @@

config PMAC_PBOOK
bool "Power management support for PowerBooks"
- depends on ADB_PMU
+ depends on PM && ADB_PMU
---help---
This provides support for putting a PowerBook to sleep; it also
enables media bay support. Power management works on the
@@ -97,11 +97,6 @@
have it autoloaded. The act of removing the module shuts down the
sound hardware for more power savings.

-config PM
- bool
- depends on PPC_PMAC && ADB_PMU && PMAC_PBOOK
- default y
-
config PMAC_APM_EMU
tristate "APM emulation"
depends on PMAC_PBOOK
--- linux-2.6.9-ppc-g4-peval/drivers/macintosh/mediabay.c 2004-10-20 15:53:32.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/mediabay.c 2004-11-22 17:16:58.000000000 +0800
@@ -713,7 +713,7 @@
{
struct media_bay_info *bay = macio_get_drvdata(mdev);

- if (state != mdev->ofdev.dev.power_state && state >= 2) {
+ if (state != mdev->ofdev.dev.power_state && state == PM_SUSPEND_MEM) {
down(&bay->lock);
bay->sleeping = 1;
set_mb_power(bay, 0);
--- linux-2.6.9-ppc-g4-peval/drivers/macintosh/therm_adt746x.c 2004-10-20 15:59:24.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/therm_adt746x.c 2004-11-22 17:16:58.000000000 +0800
@@ -22,6 +22,7 @@
#include <linux/spinlock.h>
#include <linux/smp_lock.h>
#include <linux/wait.h>
+#include <linux/suspend.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/io.h>
@@ -238,6 +239,11 @@
#endif
while(!kthread_should_stop())
{
+ if (current->flags & PF_FREEZE) {
+ printk(KERN_INFO "therm_adt746x: freezing thermostat\n");
+ refrigerator(PF_FREEZE);
+ }
+
msleep_interruptible(2000);

/* Check status */
--- linux-2.6.9-ppc-g4-peval/drivers/macintosh/therm_pm72.c 2004-10-20 15:53:32.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/therm_pm72.c 2004-11-22 17:16:58.000000000 +0800
@@ -88,6 +88,7 @@
#include <linux/spinlock.h>
#include <linux/smp_lock.h>
#include <linux/wait.h>
+#include <linux/suspend.h>
#include <linux/reboot.h>
#include <linux/kmod.h>
#include <linux/i2c.h>
@@ -1044,6 +1045,11 @@
while (state == state_attached) {
unsigned long elapsed, start;

+ if (current->flags & PF_FREEZE) {
+ printk(KERN_INFO "therm_pm72: freezing thermostat\n");
+ refrigerator(PF_FREEZE);
+ }
+
start = jiffies;

down(&driver_lock);
--- linux-2.6.9-ppc-g4-peval/drivers/macintosh/via-pmu.c 2004-10-20 15:59:24.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/via-pmu.c 2004-11-22 17:16:58.000000000 +0800
@@ -43,6 +43,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/device.h>
+#include <linux/sysdev.h>
#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <asm/prom.h>
@@ -2326,7 +2327,7 @@
/* Sync the disks. */
/* XXX It would be nice to have some way to ensure that
* nobody is dirtying any new buffers while we wait. That
- * could be acheived using the refrigerator for processes
+ * could be achieved using the refrigerator for processes
* that swsusp uses
*/
sys_sync();
@@ -2379,7 +2380,6 @@

/* Wait for completion of async backlight requests */
while (!bright_req_1.complete || !bright_req_2.complete ||
-
!batt_req.complete)
pmu_poll();

@@ -3048,6 +3048,88 @@
}
#endif /* DEBUG_SLEEP */

+
+/* FIXME: This is a temporary set of callbacks to enable us
+ * to do suspend-to-disk.
+ */
+
+#ifdef CONFIG_PM
+
+static int pmu_sys_suspended = 0;
+
+static int pmu_sys_suspend(struct sys_device *sysdev, pm_message_t state)
+{
+ if (state != PMSG_FREEZE || pmu_sys_suspended)
+ return 0;
+
+ /* Suspend PMU event interrupts */
+ pmu_suspend();
+
+ pmu_sys_suspended = 1;
+ return 0;
+}
+
+static int pmu_sys_resume(struct sys_device *sysdev)
+{
+ struct adb_request req;
+
+ if (!pmu_sys_suspended)
+ return 0;
+
+ /* Tell PMU we are ready */
+ pmu_request(&req, NULL, 2, PMU_SYSTEM_READY, 2);
+ pmu_wait_complete(&req);
+
+ /* Resume PMU event interrupts */
+ pmu_resume();
+
+ pmu_sys_suspended = 0;
+
+ return 0;
+}
+
+#endif /* CONFIG_PM */
+
+static struct sysdev_class pmu_sysclass = {
+ set_kset_name("pmu"),
+};
+
+static struct sys_device device_pmu = {
+ .id = 0,
+ .cls = &pmu_sysclass,
+};
+
+static struct sysdev_driver driver_pmu = {
+#ifdef CONFIG_PM
+ .suspend = &pmu_sys_suspend,
+ .resume = &pmu_sys_resume,
+#endif /* CONFIG_PM */
+};
+
+static int __init init_pmu_sysfs(void)
+{
+ int rc;
+
+ rc = sysdev_class_register(&pmu_sysclass);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys class\n");
+ return -ENODEV;
+ }
+ rc = sysdev_register(&device_pmu);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys device\n");
+ return -ENODEV;
+ }
+ rc = sysdev_driver_register(&pmu_sysclass, &driver_pmu);
+ if (rc) {
+ printk(KERN_ERR "Failed registering PMU sys driver\n");
+ return -ENODEV;
+ }
+ return 0;
+}
+
+subsys_initcall(init_pmu_sysfs);
+
EXPORT_SYMBOL(pmu_request);
EXPORT_SYMBOL(pmu_poll);
EXPORT_SYMBOL(pmu_poll_adb);
--- linux-2.6.9-ppc-g4-peval/drivers/video/aty/radeon_pm.c 2004-10-20 15:55:34.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/drivers/video/aty/radeon_pm.c 2004-11-22 17:16:58.000000000 +0800
@@ -859,6 +859,10 @@
* know we'll be rebooted, ...
*/

+#if 0 /* this breaks suspend to ram until the dust settles... */
+ if (state != PM_SUSPEND_MEM)
+#endif
+ return 0;
printk(KERN_DEBUG "radeonfb: suspending to state: %d...\n", state);

acquire_console_sem();

2004-11-24 09:21:17

by Hu Gang

[permalink] [raw]
Subject: Re: [PATH] 11-24 swsusp update 3/3

On Wed, Nov 24, 2004 at 04:04:59PM +0800, [email protected] wrote:
> --ppc.diff--
>

Slient warnning message when writing page cache to swap device and fix a bug.


diff -ur linux-2.6.9-ppc-g4-peval-11-24/include/linux/suspend.h linux-2.6.9-ppc-g4-peval-hg/include/linux/suspend.h
--- linux-2.6.9-ppc-g4-peval-11-24/include/linux/suspend.h 2004-11-24 16:12:29.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/linux/suspend.h 2004-11-24 16:51:48.000000000 +0800
@@ -1,7 +1,7 @@
#ifndef _LINUX_SWSUSP_H
#define _LINUX_SWSUSP_H

-#if (definedCONFIG_X86) || (defined CONFIG_PPC32)
+#if (defined(CONFIG_X86)) || (defined (CONFIG_PPC32))
#include <asm/suspend.h>
#endif
#include <linux/swap.h>
diff -ur linux-2.6.9-ppc-g4-peval-11-24/kernel/power/disk.c linux-2.6.9-ppc-g4-peval-hg/kernel/power/disk.c
--- linux-2.6.9-ppc-g4-peval-11-24/kernel/power/disk.c 2004-11-24 16:12:29.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/disk.c 2004-11-24 15:59:56.000000000 +0800
@@ -123,8 +123,8 @@
static void finish(void)
{
device_resume();
- read_page_caches();
platform_finish();
+ read_page_caches();
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
diff -ur linux-2.6.9-ppc-g4-peval-11-24/kernel/power/swsusp.c linux-2.6.9-ppc-g4-peval-hg/kernel/power/swsusp.c
--- linux-2.6.9-ppc-g4-peval-11-24/kernel/power/swsusp.c 2004-11-24 16:12:29.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/swsusp.c 2004-11-24 17:11:35.000000000 +0800
@@ -221,7 +221,7 @@

#define ONE_PAGE_PBE_NUM (PAGE_SIZE/sizeof(struct pbe))

-/* for each pagdir */
+/* for each pagedir */
typedef int (*susp_pgdir_t)(suspend_pagedir_t *cur, void *fun, void *arg);

static int inline for_each_pgdir(struct pbe *pbe, susp_pgdir_t fun,
@@ -706,14 +706,14 @@
int swsusp_pagecache = 0;

/* I'll move this to include/linux/page-flags.h */
-#define PG_pcs (PG_nosave_free + 1)
+#define PG_page_caches (PG_nosave_free + 1)

-#define SetPagePcs(page) set_bit(PG_pcs, &(page)->flags)
-#define ClearPagePcs(page) clear_bit(PG_pcs, &(page)->flags)
-#define PagePcs(page) test_bit(PG_pcs, &(page)->flags)
+#define SetPagePcs(page) set_bit(PG_page_caches, &(page)->flags)
+#define ClearPagePcs(page) clear_bit(PG_page_caches, &(page)->flags)
+#define PagePcs(page) test_bit(PG_page_caches, &(page)->flags)

static suspend_pagedir_t *pagedir_cache = NULL;
-static int nr_copy_pcs = 0;
+static int nr_copy_page_caches = 0;

static void lock_pagecaches(void)
{
@@ -735,7 +735,7 @@
}
}

-static int setup_pcs_pe(struct page *page, int setup)
+static int setup_page_caches_pe(struct page *page, int setup)
{
unsigned long pfn = page_to_pfn(page);

@@ -757,24 +757,24 @@
return 0;
}
if (setup) {
- struct pbe *p = find_pbe_by_index(pagedir_cache, nr_copy_pcs, -1);
+ struct pbe *p = find_pbe_by_index(pagedir_cache, nr_copy_page_caches, -1);
BUG_ON(p == NULL);
p->address = (long)page_address(page);
BUG_ON(p->address == 0);
- /*pr_debug("setup_pcs: cur %p, o{%p}, d{%p}, nr %u\n",
+ /*pr_debug("setup_page_caches: cur %p, o{%p}, d{%p}, nr %u\n",
(void*)p, (void*)p->orig_address,
- (void*)p->address, nr_copy_pcs);*/
- nr_copy_pcs ++;
+ (void*)p->address, nr_copy_page_caches);*/
+ nr_copy_page_caches ++;
}
SetPagePcs(page);

return (1);
}

-static int count_pcs(struct zone *zone, int p)
+static int count_page_caches(struct zone *zone, int p)
{
if (swsusp_pagecache)
- return foreach_zone_page(zone, setup_pcs_pe, p);
+ return foreach_zone_page(zone, setup_page_caches_pe, p);
return 0;
}

@@ -898,6 +898,8 @@
return (-ENOMEM);
}

+static char *page_cache_buf = NULL;
+
static int bio_read_page(pgoff_t page_off, void * page);

static int pagecache_read_pbe(struct pbe *p, void *tmp, int cur)
@@ -911,8 +913,9 @@
p, (void *)p->orig_address, (void *)p->address,
swp_offset(p->swap_address));

- error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+ error = bio_read_page(swp_offset(p->swap_address), page_cache_buf);
if (error) return error;
+ memcpy((void*)p->address, (void*)page_cache_buf, PAGE_SIZE);

entry = p->swap_address;
if (entry.val)
@@ -927,15 +930,16 @@

if (swsusp_pagecache == 0) return 0;

- mod_progress = nr_copy_pcs / 100;
+ mod_progress = nr_copy_page_caches / 100;

- printk( "Reading PageCaches from swap (%d pages)... ", nr_copy_pcs);
+ printk( "Reading PageCaches from swap (%d pages)... ", nr_copy_page_caches);
error = for_each_pbe(pagedir_cache, pagecache_read_pbe, NULL,
- nr_copy_pcs);
+ nr_copy_page_caches);
printk("\b\b\b\bdone\n");

unlock_pagecaches();
for_each_pgdir(pagedir_cache, free_one_pagedir, NULL, NULL);
+ free_page((unsigned long)page_cache_buf);

return error;
}
@@ -949,7 +953,8 @@
pr_debug("pagecache_write_pbe: %p, o{%p} c{%p} %d ",
p, (void *)p->orig_address, (void *)p->address, cur);
BUG_ON(p->address == 0);
- error = write_page(p->address, &p->swap_address);
+ memcpy((void *)page_cache_buf, (void*)p->address, PAGE_SIZE);
+ error = write_page((unsigned long)page_cache_buf, &p->swap_address);
if (error) return error;

pr_debug("%lu\n", swp_offset(p->swap_address));
@@ -961,12 +966,12 @@
{
int error;

- mod_progress = nr_copy_pcs / 100;
+ mod_progress = nr_copy_page_caches / 100;

lock_pagecaches();
- printk( "Writing PageCaches to swap (%d pages)... ", nr_copy_pcs);
+ printk( "Writing PageCaches to swap (%d pages)... ", nr_copy_page_caches);
error = for_each_pbe(pagedir_cache, pagecache_write_pbe, NULL,
- nr_copy_pcs);
+ nr_copy_page_caches);
printk("\b\b\b\bdone\n");

return error;
@@ -976,10 +981,10 @@
{
struct zone *zone;

- nr_copy_pcs = 0;
+ nr_copy_page_caches = 0;
for_each_zone(zone) {
if (!is_highmem(zone)) {
- count_pcs(zone, 1);
+ count_page_caches(zone, 1);
}
}

@@ -997,11 +1002,11 @@
for (i = 0; i < max_mapnr; i++)
ClearPagePcs(mem_map+i);

- nr_copy_pcs = 0;
+ nr_copy_page_caches = 0;
drain_local_pages();
for_each_zone(zone) {
if (!is_highmem(zone)) {
- nr_copy_pcs += count_pcs(zone, 0);
+ nr_copy_page_caches += count_pcs(zone, 0);
}
}
}
@@ -1017,43 +1022,36 @@
}

if (swsusp_pagecache) {
+ page_cache_buf = (char *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!page_cache_buf) {
+ return -ENOMEM;
+ }
+
page_caches_recal();

- if (nr_copy_pcs == 0) {
+ if (nr_copy_page_caches == 0) {
return 0;
}
- printk("swsusp: Need to copy %u pcs\n", nr_copy_pcs);
- if (alloc_pagedir(&pagedir_cache, nr_copy_pcs, NULL, 0) < 0) {
+ if (alloc_pagedir(&pagedir_cache, nr_copy_page_caches, NULL, 0) < 0) {
return -ENOMEM;
}
}

drain_local_pages();
count_data_pages();
- printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
- nr_copy_pages, nr_copy_pcs);

- while (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
- if (recal == 0) {
- printk("swsusp: try shrink memory ");
+ if (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
+ printk("swsusp: shrink memory:... ");
+ while (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
+ shrink_all_memory(nr_copy_pages + PAGES_FOR_IO);
+ recal ++;
+ printk("\b\b\b\b%4d", recal);
}
- shrink_all_memory(nr_copy_pages + PAGES_FOR_IO + recal);
- recal += PAGES_FOR_IO;
- }
-
- if (recal) {
- printk("done\n");
+ printk(" done\n");
page_caches_recal();
drain_local_pages();
count_data_pages();
- printk("swsusp(1/2): Need to copy %u pages, %u pcs\n",
- nr_copy_pages, nr_copy_pcs);
}
-
- drain_local_pages();
- count_data_pages();
- printk("swsusp(2/2): Need to copy %u pages, %u pcs\n",
- nr_copy_pages, nr_copy_pcs);

error = swsusp_alloc();
if (error) {
@@ -1063,8 +1061,8 @@

drain_local_pages();
count_data_pages();
- printk("swsusp(final): Need to copy %u pages, %u pcs\n",
- nr_copy_pages, nr_copy_pcs);
+ printk("swsusp: Need to copy %u pages, %u page_caches, %d freed\n",
+ nr_copy_pages, nr_copy_page_caches, nr_free_pages());

if (swsusp_pagecache) {
setup_pagedir_pbe();
@@ -1125,12 +1123,12 @@
unsigned long zone_pfn;

nr_copy_pages = 0;
- nr_copy_pcs = 0;
+ nr_copy_page_caches = 0;

for_each_zone(zone) {
if (is_highmem(zone))
continue;
- nr_copy_pcs += count_pcs(zone, 0);
+ nr_copy_page_caches += count_pcs(zone, 0);
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
@@ -1243,7 +1241,7 @@
struct sysinfo i;

si_swapinfo(&i);
- if (i.freeswap < (nr_copy_pages + nr_copy_pcs + PAGES_FOR_IO)) {
+ if (i.freeswap < (nr_copy_pages + nr_copy_page_caches + PAGES_FOR_IO)) {
pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
return 0;
}

--
--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc

2004-11-24 10:32:09

by Guido Günther

[permalink] [raw]
Subject: Re: [PATH] swsusp update 3/3

On Tue, Nov 23, 2004 at 12:58:58AM +0800, [email protected] wrote:
> --- linux-2.6.9-ppc-g4-peval/drivers/video/aty/radeon_pm.c 2004-10-20 15:55:34.000000000 +0800
> +++ linux-2.6.9-ppc-g4-peval-hg/drivers/video/aty/radeon_pm.c 2004-11-22 17:16:58.000000000 +0800
> @@ -859,6 +859,10 @@
> * know we'll be rebooted, ...
> */
>
> +#if 0 /* this breaks suspend to ram until the dust settles... */
> + if (state != PM_SUSPEND_MEM)
> +#endif
> + return 0;
> printk(KERN_DEBUG "radeonfb: suspending to state: %d...\n", state);
>
> acquire_console_sem();
Please don't. I only added this to my ppc swsusp patches as a temporary
hack. It should use "flags = SUSPEND_TO_RAM" from Pavel's bigdiff.
I submitted other parts to BenH a while ago, I'm currently working on
cleaning some parts up and make it work with suspend-to-ram.
-- Guido

2004-11-24 10:56:59

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATH] 11-24 swsusp update 1/3

Hi!

> > Either drop this one or explain why it is good idea. It seems to be
> > independend on the rest.
> This code I just copy from old ppc swsusp port, I don't why, :).

So drop the patch...

> >
> > > @@ -144,9 +151,13 @@
> > > }
> > >
> > > /* Free memory before shutting down devices. */
> > > - free_some_memory();
> > > + /* free_some_memory(); */
> >
> > Needs to be if (!swsusp_pagecache), right?
> I think we can drop this one, In write_page_caches has same code, and do
> the best.

So at least delete it properly; no need to comment it out.

> > + if (swsusp_pbe_pgdir->orig_address == 0) return;
> > > + for (i = 0; i < PAGE_SIZE / (sizeof(unsigned long)); i+=4) {
> > > + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i)) =
> > > + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i));
> > > + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+1)) =
> > > + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+1));
> > > + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+2)) =
> > > + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+2));
> > > + *(((unsigned long *)(swsusp_pbe_pgdir->orig_address) + i+3)) =
> > > + *(((unsigned long *)(swsusp_pbe_pgdir->address) + i+3));
> >
> > Do you really have to do manual loop unrolling? Why can't C code be
> > same for i386 and ppc?
> here is stupid code, update in my new patch, I using memcopy in i386, it
> create small assemble code.

Warning: memcpy() may uses MMX or SSE or something on some cpus....

> > Please avoid "return (0);". Using "return 0;" will do just fine.
> fixed.
>
> here is my patch relative with your big diff, hope can merge.

I have already too big difference against mainline, so I can only
merge trivial patches at this point. When 2.6.10 comes out, I'd like
to merge "no-high-order-allocation" patch, and "pagecache writer"
sometime after that...
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-24 11:28:50

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATH] swsusp update 1/3

Hi!

> > Ok, Now I finised ppc part, it works. :)
> >
> > Here is all of the patch relative with your big diff.
> > core.diff - swsusp core part.
> > i386.diff - i386 part.
> > ppc.diff - PowerPC part.
> >
> > Now we have a option in /proc/sys/kernel/swsusp_pagecache, if that is
> > sure using swsusp pagecache, otherwise.
>
> Hmm, okay, I guess temporary sysctl is okay. [I'd probably just put
> there variable, and not export it to anyone. That way people will not
> want us to retain that in future.]

I've tried 11-24 version here, and it killed my machine during
suspend. (While radeonfb was suspended -> no usefull output). Can you
enable CONFIG_PREEMPT and CONFIG_HIGHMEM and get it to work?

Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2004-11-24 17:42:45

by Colin Leroy

[permalink] [raw]
Subject: Re: [PATH] 11-24 swsusp update 3/3

On 24 Nov 2004 at 16h11, [email protected] wrote:

Hi,

> +++ linux-2.6.9-ppc-g4-peval-hg/drivers/macintosh/therm_adt746x.c
> @@ -238,6 +239,11 @@
> #endif
> while(!kthread_should_stop())
> {
> + if (current->flags & PF_FREEZE) {
> + printk(KERN_INFO "therm_adt746x: freezing thermostat\n");
> + refrigerator(PF_FREEZE);
> + }
> +

It's already in BK:
http://linux.bkbits.net:8080/linux-2.5/cset@4174ae53kZONcTQPizEVPMKvSrJB6g
--
Colin

2004-11-24 18:49:21

by Hu Gang

[permalink] [raw]
Subject: Re: [PATH] swsusp update 1/3

On Wed, Nov 24, 2004 at 12:28:34PM +0100, Pavel Machek wrote:
> > Hmm, okay, I guess temporary sysctl is okay. [I'd probably just put
> > there variable, and not export it to anyone. That way people will not
> > want us to retain that in future.]
>
> I've tried 11-24 version here, and it killed my machine during
> suspend. (While radeonfb was suspended -> no usefull output). Can you
> enable CONFIG_PREEMPT and CONFIG_HIGHMEM and get it to work?
>
Yes, It passed in my two computers, a pc and a PowerBook G4, enable
CONFIG_HIGMEM and CONFIG_PREEMPT.

Here is a new patch relative with your big diff.

- using a bitmap do collidate check, faster than before.
I can't sure using four pages do bitmap that's enough.
- changing pgdir_for_each to list_for.. style.

- I'm using qemu as i386 suspend testing platform, tha't perfect to do
kernel level debug. If someone wanny using qemu as linux suspend
testing platform, please apply this patch to let qemu support ide idle
command. http://soulinfo.com/~hugang/swsusp/qemu/ide.patch

only core.diff attached, other parts nothing changed.

--- linux-2.6.9-ppc-g4-peval/include/linux/reboot.h 2004-06-16 13:20:26.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/linux/reboot.h 2004-11-22 17:16:58.000000000 +0800
@@ -42,6 +42,8 @@
extern int register_reboot_notifier(struct notifier_block *);
extern int unregister_reboot_notifier(struct notifier_block *);

+/* For use by swsusp only */
+extern struct notifier_block *reboot_notifier_list;

/*
* Architecture-specific implementations of sys_reboot commands.
--- linux-2.6.9-ppc-g4-peval/include/linux/suspend.h 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/linux/suspend.h 2004-11-24 16:51:48.000000000 +0800
@@ -1,7 +1,7 @@
#ifndef _LINUX_SWSUSP_H
#define _LINUX_SWSUSP_H

-#ifdef CONFIG_X86
+#if (defined(CONFIG_X86)) || (defined (CONFIG_PPC32))
#include <asm/suspend.h>
#endif
#include <linux/swap.h>
--- linux-2.6.9-ppc-g4-peval/kernel/power/disk.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/disk.c 2004-11-24 15:59:56.000000000 +0800
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/fs.h>
+#include <linux/reboot.h>
#include <linux/device.h>
#include "power.h"

@@ -29,6 +30,8 @@
extern int swsusp_resume(void);
extern int swsusp_free(void);

+extern int write_page_caches(void);
+extern int read_page_caches(void);

static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -48,14 +51,16 @@
unsigned long flags;
int error = 0;

- local_irq_save(flags);
switch(mode) {
case PM_DISK_PLATFORM:
- device_power_down(PMSG_SUSPEND);
+ /* device_power_down(PMSG_SUSPEND); */
+ local_irq_save(flags);
error = pm_ops->enter(PM_SUSPEND_DISK);
+ local_irq_restore(flags);
break;
case PM_DISK_SHUTDOWN:
printk("Powering off system\n");
+ notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
device_shutdown();
machine_power_off();
break;
@@ -106,6 +111,7 @@
}
}

+
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -118,13 +124,14 @@
{
device_resume();
platform_finish();
+ read_page_caches();
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
}


-static int prepare(void)
+static int prepare(int resume)
{
int error;

@@ -144,9 +151,13 @@
}

/* Free memory before shutting down devices. */
- free_some_memory();
+ /* free_some_memory(); */

disable_nonboot_cpus();
+ if (!resume)
+ if ((error = write_page_caches())) {
+ goto Finish;
+ }
if ((error = device_suspend(PMSG_FREEZE))) {
printk("Some devices failed to suspend\n");
goto Finish;
@@ -176,7 +187,7 @@
{
int error;

- if ((error = prepare()))
+ if ((error = prepare(0)))
return error;

pr_debug("PM: Attempting to suspend to disk.\n");
@@ -233,7 +244,7 @@

pr_debug("PM: Preparing system for restore.\n");

- if ((error = prepare()))
+ if ((error = prepare(1)))
goto Free;

barrier();
--- linux-2.6.9-ppc-g4-peval/kernel/power/main.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/main.c 2004-11-22 17:16:58.000000000 +0800
@@ -4,7 +4,7 @@
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Open Source Development Lab
*
- * This file is release under the GPLv2
+ * This file is released under the GPLv2
*
*/

--- linux-2.6.9-ppc-g4-peval/kernel/power/swsusp.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/power/swsusp.c 2004-11-25 01:46:52.000000000 +0800
@@ -74,9 +74,6 @@
/* References to section boundaries */
extern char __nosave_begin, __nosave_end;

-/* Variables to be preserved over suspend */
-static int pagedir_order_check;
-
extern char resume_file[];
static dev_t resume_device;
/* Local variables that should not be affected by save */
@@ -97,7 +94,6 @@
*/
suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
static suspend_pagedir_t *pagedir_save;
-static int pagedir_order __nosavedata = 0;

#define SWSUSP_SIG "S1SUSPEND"

@@ -222,8 +218,137 @@
}
swap_list_unlock();
}
+
+#define ONE_PAGE_PBE_NUM (PAGE_SIZE/sizeof(struct pbe))
+
+/* for each pagedir */
+typedef int (*susp_pgdir_t)(suspend_pagedir_t *cur, void *fun, void *arg);
+
+#define pgdir_for_each_safe(pos, n, head) \
+ for(pos = head, n = pos ? (suspend_pagedir_t*)pos->dummy.val : NULL; \
+ pos != NULL; \
+ pos = n, n = pos ? (suspend_pagedir_t *)pos->dummy.val : NULL)
+
+/* free pagedir */
+static void pagedir_free(suspend_pagedir_t *head)
+{
+ suspend_pagedir_t *next, *cur;
+ pgdir_for_each_safe(cur, next, head) {
+ free_page((unsigned long)cur);
+ }
+}
+
+/*
+ * swsup_pbe_t
+ *
+ * a callback funtion in foreach pbe loop.
+ *
+ * @param pbe pointer of current pbe
+ * @param p private data
+ * @param cur current index
+ *
+ * @return 0 is ok, otherwise
+ */
+
+typedef int (*swsup_pbe_t)(struct pbe *pbe, void *p, int cur);
+
+/*
+ * for_each_pbe
+ *
+ * @param pbe pointer of the pbe head
+ * @param fun callback function
+ * @param p private data
+ * @param max max the the pbe numbers
+ *
+ * @return 0 is ok, otherwise
+ */
+static int for_each_pbe(struct pbe *pbe, swsup_pbe_t fun, void *p, int max)
+{
+ struct pbe *pgdir = pbe, *next = NULL;
+ unsigned long i = 0;
+ int error = 0;
+
+ while (pgdir != NULL) {
+ unsigned long nums;
+ next = (struct pbe*)pgdir->dummy.val;
+ for (nums = 0; nums < ONE_PAGE_PBE_NUM; nums++, pgdir++, i ++) {
+ if (i == max) { /* end */
+ return 0;
+ }
+ if((error = fun(pgdir, p, i))) { /* got error */
+ return error;
+ }
+ }
+ pgdir = next;
+ }
+ return (error);
+}
+/* for_each_pbe_copy_back
+ *
+ * That usefuly for writing the code in assemble code.
+ *
+ */
+/*#define CREATE_ASM_CODE */
+#ifdef CREATE_ASM_CODE
+#if 0
+#define GET_ADDRESS(x) __pa(x)
+#else
+#define GET_ADDRESS(x) (x)
+#endif
+asmlinkage void for_each_pbe_copy_back(void)
+{
+ struct pbe *pgdir, *next;
+
+ pgdir = pagedir_nosave;
+ while (pgdir != NULL) {
+ unsigned long nums, i;
+ pgdir = (struct pbe *)GET_ADDRESS(pgdir);
+ next = (struct pbe*)pgdir->dummy.val;
+ for (nums = 0; nums < ONE_PAGE_PBE_NUM; nums++) {
+ register unsigned long *orig, *copy;
+ orig = (unsigned long *)pgdir->orig_address;
+ if (orig == 0) goto end;
+ orig = (unsigned long *)GET_ADDRESS(orig);
+ copy = (unsigned long *)GET_ADDRESS(pgdir->address);
+#if 0
+ memcpy(orig, copy, PAGE_SIZE);
+#else
+ for (i = 0; i < PAGE_SIZE / sizeof(unsigned long); i+=4) {
+ *(orig + i) = *(copy + i);
+ *(orig + i+1) = *(copy + i+1);
+ *(orig + i+2) = *(copy + i+2);
+ *(orig + i+3) = *(copy + i+3);
+ }
+#endif
+ pgdir ++;
+ }
+ pgdir = next;
+ }
+end:
+ panic("just asm code");
+}
+#endif
+
+static int find_bpe_index(struct pbe *p, void *tmp, int cur)
+{
+ if (*(int *)tmp == cur) {
+ *(struct pbe **)tmp = p;
+ return (1);
+ }
+ return 0;
+}

+static struct pbe *find_pbe_by_index(struct pbe *pgdir, int index, int max)
+{
+ unsigned long p = index;

+ /* pr_debug("find_pbe_by_index: %p, %d, %d ", pgdir, index, max); */
+ if (for_each_pbe(pgdir, find_bpe_index, &p, max) == 1) {
+ /* pr_debug("%p\n", (void*)p); */
+ return ((struct pbe *)p);
+ }
+ return (NULL);
+}

/**
* write_swap_page - Write one page to a fresh swap location.
@@ -257,6 +382,17 @@
return error;
}

+static int data_free_pbe(struct pbe *p, void *tmp, int cur)
+{
+ swp_entry_t entry;
+
+ entry = p->swap_address;
+ if (entry.val)
+ swap_free(entry);
+ p->swap_address = (swp_entry_t){0};
+
+ return 0;
+}

/**
* data_free - Free the swap entries used by the saved image.
@@ -267,43 +403,49 @@

static void data_free(void)
{
- swp_entry_t entry;
- int i;
+ for_each_pbe(pagedir_nosave, data_free_pbe, NULL, nr_copy_pages);
+}

- for (i = 0; i < nr_copy_pages; i++) {
- entry = (pagedir_nosave + i)->swap_address;
- if (entry.val)
- swap_free(entry);
- else
- break;
- (pagedir_nosave + i)->swap_address = (swp_entry_t){0};
- }
+static int mod_progress = 1;
+
+static void inline mod_printk_progress(int i)
+{
+ if (mod_progress == 0) mod_progress = 1;
+ if (!(i%100))
+ printk( "\b\b\b\b%3d%%", i / mod_progress );
}

+static int write_one_pbe(struct pbe *p, void *tmp, int cur)
+{
+ int error = 0;
+
+ BUG_ON(p->address == 0);
+ BUG_ON(p->orig_address == 0);
+ if ((error = write_page(p->address, &p->swap_address))) {
+ return error;
+ }
+ mod_printk_progress(cur);
+ pr_debug("write_one_pbe: %p, o{%p} c{%p} %lu %d\n", p,
+ (void *)p->orig_address, (void *)p->address,
+ p->swap_address.val, cur);
+ return 0;
+}

/**
* data_write - Write saved image to swap.
*
* Walk the list of pages in the image and sync each one to swap.
*/
-
static int data_write(void)
{
- int error = 0;
- int i;
- unsigned int mod = nr_copy_pages / 100;
-
- if (!mod)
- mod = 1;
+ int error;
+
+ mod_progress = nr_copy_pages / 100;

- printk( "Writing data to swap (%d pages)... ", nr_copy_pages );
- for (i = 0; i < nr_copy_pages && !error; i++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
- error = write_page((pagedir_nosave+i)->address,
- &((pagedir_nosave+i)->swap_address));
- }
+ printk( "Writing data to swap (%d pages)... ", nr_copy_pages);
+ error = for_each_pbe(pagedir_nosave, write_one_pbe, NULL, nr_copy_pages);
printk("\b\b\b\bdone\n");
+
return error;
}

@@ -363,7 +505,6 @@
swap_free(swsusp_info.pagedir[i]);
}

-
/**
* write_pagedir - Write the array of pages holding the page directory.
* @last: Last swap entry we write (needed for header).
@@ -371,15 +512,19 @@

static int write_pagedir(void)
{
- unsigned long addr = (unsigned long)pagedir_nosave;
- int error = 0;
- int n = SUSPEND_PD_PAGES(nr_copy_pages);
- int i;
+ int error = 0, n = 0;
+ suspend_pagedir_t *pgdir, *next;

- swsusp_info.pagedir_pages = n;
+ pgdir_for_each_safe(pgdir, next, pagedir_nosave) {
+ error = write_page((unsigned long)pgdir, &swsusp_info.pagedir[n]);
+ if (error) {
+ break;
+ }
+ n++;
+ }
printk( "Writing pagedir (%d pages)\n", n);
- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE)
- error = write_page(addr, &swsusp_info.pagedir[i]);
+ swsusp_info.pagedir_pages = n;
+
return error;
}

@@ -504,6 +649,464 @@
return 0;
}

+typedef int (*do_page_t)(struct page *page, int p);
+
+static int foreach_zone_page(struct zone *zone, do_page_t fun, int p)
+{
+ int inactive = 0, active = 0;
+
+ spin_lock_irq(&zone->lru_lock);
+ if (zone->nr_inactive) {
+ struct list_head * entry = zone->inactive_list.prev;
+ while (entry != &zone->inactive_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ inactive += fun(page, p);
+ } else {
+ inactive ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ if (zone->nr_active) {
+ struct list_head * entry = zone->active_list.prev;
+ while (entry != &zone->active_list) {
+ if (fun) {
+ struct page * page = list_entry(entry, struct page, lru);
+ active += fun(page, p);
+ } else {
+ active ++;
+ }
+ entry = entry->prev;
+ }
+ }
+ spin_unlock_irq(&zone->lru_lock);
+
+ return (active + inactive);
+}
+
+/* enable/disable pagecache suspend */
+int swsusp_pagecache = 0;
+
+/* I'll move this to include/linux/page-flags.h */
+#define PG_page_caches (PG_nosave_free + 1)
+
+#define SetPagePcs(page) set_bit(PG_page_caches, &(page)->flags)
+#define ClearPagePcs(page) clear_bit(PG_page_caches, &(page)->flags)
+#define PagePcs(page) test_bit(PG_page_caches, &(page)->flags)
+
+static suspend_pagedir_t *pagedir_cache = NULL;
+static int nr_copy_page_caches = 0;
+
+static void lock_pagecaches(void)
+{
+ struct zone *zone;
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_lock_irq(&zone->lru_lock);
+ }
+ }
+}
+
+static void unlock_pagecaches(void)
+{
+ struct zone *zone;
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ spin_unlock_irq(&zone->lru_lock);
+ }
+ }
+}
+
+static int setup_page_caches_pe(struct page *page, int setup)
+{
+ unsigned long pfn = page_to_pfn(page);
+
+ BUG_ON(PageReserved(page) && PageNosave(page));
+ if (!pfn_valid(pfn)) {
+ printk("not valid page\n");
+ return 0;
+ }
+ if (PageNosave(page)) {
+ printk("nosave\n");
+ return 0;
+ }
+ if (PageReserved(page) /*&& pfn_is_nosave(pfn)*/) {
+ printk("[nosave]\n");
+ return 0;
+ }
+ if (PageSlab(page)) {
+ printk("slab\n");
+ return 0;
+ }
+ if (setup) {
+ struct pbe *p = find_pbe_by_index(pagedir_cache, nr_copy_page_caches, -1);
+ BUG_ON(p == NULL);
+ p->address = (long)page_address(page);
+ BUG_ON(p->address == 0);
+ /*pr_debug("setup_page_caches: cur %p, o{%p}, d{%p}, nr %u\n",
+ (void*)p, (void*)p->orig_address,
+ (void*)p->address, nr_copy_page_caches);*/
+ nr_copy_page_caches ++;
+ }
+ SetPagePcs(page);
+
+ return (1);
+}
+
+static int count_page_caches(struct zone *zone, int p)
+{
+ if (swsusp_pagecache)
+ return foreach_zone_page(zone, setup_page_caches_pe, p);
+ return 0;
+}
+
+ /* a bitmap base collide check */
+static inline void collide_set_bit(unsigned char *bitmap,
+ unsigned long bitnum)
+{
+ bitnum -= 0xc0000000;
+ bitnum = bitnum >> 12;
+ bitmap[bitnum / 8] |= (1 << (bitnum%8));
+}
+
+static inline int collide_is_bit_set(unsigned char *bitmap,
+ unsigned long bitnum)
+{
+ bitnum -= 0xc0000000;
+ bitnum = bitnum >> 12;
+ return !!(bitmap[bitnum / 8] & (1 << (bitnum%8)));
+}
+
+static void collide_bitmap_free(unsigned char *bitmap)
+{
+ free_pages((unsigned long)bitmap, 2);
+}
+
+/*
+ * four pages are enough for bitmap
+ *
+ */
+static unsigned char *collide_bitmap_init(struct pbe *pgdir)
+{
+ unsigned char *bitmap =
+ (unsigned char *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, 2);
+ struct pbe *next;
+
+ if (bitmap == NULL) {
+ return NULL;
+ }
+ memset(bitmap, 0, 4 * PAGE_SIZE);
+
+ /* do base check */
+ BUG_ON(collide_is_bit_set(bitmap, (unsigned long)bitmap) == 1);
+ collide_set_bit(bitmap, (unsigned long)bitmap);
+ BUG_ON(collide_is_bit_set(bitmap, (unsigned long)bitmap) == 0);
+
+ while (pgdir != NULL) {
+ unsigned long nums;
+ next = (struct pbe*)pgdir->dummy.val;
+ for (nums = 0; nums < ONE_PAGE_PBE_NUM; nums++) {
+ collide_set_bit(bitmap, (unsigned long)pgdir);
+ collide_set_bit(bitmap, (unsigned long)pgdir->orig_address);
+ pgdir ++;
+ }
+ pgdir = next;
+ }
+
+ return bitmap;
+}
+
+/*
+ * redefine in PageCahe pagdir.
+ *
+ * struct pbe {
+ * unsigned long address;
+ * unsigned long orig_address; pointer of next struct pbe
+ * swp_entry_t swap_address;
+ * swp_entry_t dummy; current index
+ * }
+ *
+ */
+static suspend_pagedir_t * alloc_one_pagedir(suspend_pagedir_t *prev,
+ unsigned char *collide)
+{
+ suspend_pagedir_t *pgdir = NULL;
+ int i;
+
+ pgdir = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!pgdir) {
+ return NULL;
+ }
+
+ if (collide) {
+ while (collide_is_bit_set(collide, (unsigned long)pgdir)) {
+ pgdir = (suspend_pagedir_t *)
+ __get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!pgdir) {
+ return NULL;
+ }
+ }
+ }
+
+ /*pr_debug("pgdir: %p, %p, %d\n",
+ pgdir, prev, sizeof(suspend_pagedir_t)); */
+ for (i = 0; i < ONE_PAGE_PBE_NUM; i++) {
+ pgdir[i].dummy.val = 0;
+ pgdir[i].address = 0;
+ pgdir[i].orig_address = 0;
+ if (prev)
+ prev[i].dummy.val= (unsigned long)pgdir;
+ }
+
+ return (pgdir);
+}
+
+/* calc_nums - Determine the nums of allocation needed for pagedir_save. */
+static int calc_nums(int nr_copy)
+{
+ int diff = 0, ret = 0;
+ do {
+ diff = (nr_copy / ONE_PAGE_PBE_NUM) - ret + 1;
+ if (diff) {
+ ret += diff;
+ nr_copy += diff;
+ }
+ } while (diff);
+ return nr_copy;
+}
+
+
+/*
+ * alloc_pagedir
+ *
+ * @param pbe
+ * @param pbe_nums
+ * @param collide
+ * @param page_nums
+ *
+ */
+static int alloc_pagedir(struct pbe **pbe, int pbe_nums,
+ unsigned char *collide, int page_nums)
+{
+ unsigned int nums = 0;
+ unsigned int after_alloc = pbe_nums;
+ suspend_pagedir_t *prev = NULL, *cur = NULL;
+
+ if (page_nums)
+ after_alloc = ONE_PAGE_PBE_NUM * page_nums;
+ else
+ after_alloc = calc_nums(after_alloc);
+
+ pr_debug("alloc_pagedir: %d, %d\n", pbe_nums, after_alloc);
+ for (nums = 0 ; nums < after_alloc ; nums += ONE_PAGE_PBE_NUM) {
+ cur = alloc_one_pagedir(prev, collide);
+ pr_debug("alloc_one_pagedir: %p\n", cur);
+ if (!cur) { /* get page failed */
+ goto no_mem;
+ }
+ if (nums == 0) { /* setup the head */
+ *pbe = cur;
+ }
+ prev = cur;
+ }
+ return after_alloc - pbe_nums;
+
+no_mem:
+ pagedir_free(*pbe);
+ *pbe = NULL;
+
+ return (-ENOMEM);
+}
+
+static char *page_cache_buf = NULL;
+
+static int bio_read_page(pgoff_t page_off, void * page);
+
+static int pagecache_read_pbe(struct pbe *p, void *tmp, int cur)
+{
+ int error = 0;
+ swp_entry_t entry;
+
+ mod_printk_progress(cur);
+
+ pr_debug("pagecache_read_pbe: %p, o{%p} c{%p} %lu\n",
+ p, (void *)p->orig_address, (void *)p->address,
+ swp_offset(p->swap_address));
+
+ error = bio_read_page(swp_offset(p->swap_address), page_cache_buf);
+ if (error) return error;
+ memcpy((void*)p->address, (void*)page_cache_buf, PAGE_SIZE);
+
+ entry = p->swap_address;
+ if (entry.val)
+ swap_free(entry);
+
+ return 0;
+}
+
+int read_page_caches(void)
+{
+ int error = 0;
+
+ if (swsusp_pagecache == 0) return 0;
+
+ mod_progress = nr_copy_page_caches / 100;
+
+ printk( "Reading PageCaches from swap (%d pages)... ", nr_copy_page_caches);
+ error = for_each_pbe(pagedir_cache, pagecache_read_pbe, NULL,
+ nr_copy_page_caches);
+ printk("\b\b\b\bdone\n");
+
+ unlock_pagecaches();
+ pagedir_free(pagedir_cache);
+ free_page((unsigned long)page_cache_buf);
+
+ return error;
+}
+
+static int pagecache_write_pbe(struct pbe *p, void *tmp, int cur)
+{
+ int error = 0;
+
+ mod_printk_progress(cur);
+
+ pr_debug("pagecache_write_pbe: %p, o{%p} c{%p} %d ",
+ p, (void *)p->orig_address, (void *)p->address, cur);
+ BUG_ON(p->address == 0);
+ memcpy((void *)page_cache_buf, (void*)p->address, PAGE_SIZE);
+ error = write_page((unsigned long)page_cache_buf, &p->swap_address);
+ if (error) return error;
+
+ pr_debug("%lu\n", swp_offset(p->swap_address));
+
+ return 0;
+}
+
+static int page_caches_write(void)
+{
+ int error;
+
+ mod_progress = nr_copy_page_caches / 100;
+
+ lock_pagecaches();
+ printk( "Writing PageCaches to swap (%d pages)... ",
+ nr_copy_page_caches);
+ error = for_each_pbe(pagedir_cache, pagecache_write_pbe, NULL,
+ nr_copy_page_caches);
+ printk("\b\b\b\bdone\n");
+
+ return error;
+}
+
+static int setup_pagedir_pbe(void)
+{
+ struct zone *zone;
+
+ nr_copy_page_caches = 0;
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ count_page_caches(zone, 1);
+ }
+ }
+
+ return 0;
+}
+
+static void count_data_pages(void);
+static int swsusp_alloc(void);
+
+static void page_caches_recal(void)
+{
+ struct zone *zone;
+ int i;
+
+ for (i = 0; i < max_mapnr; i++)
+ ClearPagePcs(mem_map+i);
+
+ nr_copy_page_caches = 0;
+ drain_local_pages();
+ for_each_zone(zone) {
+ if (!is_highmem(zone)) {
+ nr_copy_page_caches += count_page_caches(zone, 0);
+ }
+ }
+}
+
+int write_page_caches(void)
+{
+ int error;
+ int recal = 0;
+
+ if ((error = swsusp_swap_check())) {
+ /* FIXME free pagedir_cache */
+ return error;
+ }
+
+ if (swsusp_pagecache) {
+ page_cache_buf = (char *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, 0);
+ if (!page_cache_buf) {
+ /* FIXME try shrink memory */
+ return -ENOMEM;
+ }
+
+ page_caches_recal();
+
+ if (nr_copy_page_caches == 0) {
+ return 0;
+ }
+ if (alloc_pagedir(&pagedir_cache, nr_copy_page_caches, NULL, 0) < 0) {
+ /* FIXME try shrink memory */
+ return -ENOMEM;
+ }
+ }
+
+ drain_local_pages();
+ count_data_pages();
+
+ if (nr_free_pages() < nr_copy_pages + PAGES_FOR_IO) {
+ printk("swsusp: need %d pages, free %d pages\n",
+ nr_copy_pages, nr_free_pages());
+ printk("swsusp: Freeing memory:... ");
+ while (shrink_all_memory(nr_copy_pages * 2)) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(HZ/5);
+ drain_local_pages();
+ count_data_pages();
+ if (nr_free_pages() > nr_copy_pages + PAGES_FOR_IO)
+ break;
+ recal ++;
+ printk("\b\b\b\b\b%5d", nr_free_pages());
+ }
+ printk(" done\n");
+ page_caches_recal();
+ drain_local_pages();
+ count_data_pages();
+ }
+
+ error = swsusp_alloc();
+ if (error) {
+ printk("swsusp_alloc failed, %d\n", error);
+ return error;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp: Need to copy %u pages, %u page_caches\n",
+ nr_copy_pages, nr_copy_page_caches);
+
+ if (swsusp_pagecache) {
+ setup_pagedir_pbe();
+ pr_debug("after setup_pagedir_pbe \n");
+
+ error = page_caches_write();
+ if (error)
+ return error;
+ }
+
+ return 0;
+}

static int pfn_is_nosave(unsigned long pfn)
{
@@ -539,7 +1142,10 @@
}
if (PageNosaveFree(page))
return 0;
-
+ if (PagePcs(page) && swsusp_pagecache) {
+ BUG_ON(zone->nr_inactive == 0 && zone->nr_active == 0);
+ return 0;
+ }
return 1;
}

@@ -549,10 +1155,12 @@
unsigned long zone_pfn;

nr_copy_pages = 0;
+ nr_copy_page_caches = 0;

for_each_zone(zone) {
if (is_highmem(zone))
continue;
+ nr_copy_page_caches += count_page_caches(zone, 0);
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
@@ -564,7 +1172,6 @@
{
struct zone *zone;
unsigned long zone_pfn;
- struct pbe * pbe = pagedir_nosave;
int pages_copied = 0;

for_each_zone(zone) {
@@ -574,11 +1181,14 @@
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
if (saveable(zone, &zone_pfn)) {
struct page * page;
+ struct pbe * pbe = find_pbe_by_index(pagedir_nosave,
+ pages_copied, nr_copy_pages);
+ BUG_ON(pbe == NULL);
page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
pbe->orig_address = (long) page_address(page);
+ BUG_ON(pbe->orig_address == 0);
/* copy_page is not usable for copying task structs. */
memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
- pbe++;
pages_copied++;
}
}
@@ -587,105 +1197,38 @@
nr_copy_pages = pages_copied;
}

-
-/**
- * calc_order - Determine the order of allocation needed for pagedir_save.
- *
- * This looks tricky, but is just subtle. Please fix it some time.
- * Since there are %nr_copy_pages worth of pages in the snapshot, we need
- * to allocate enough contiguous space to hold
- * (%nr_copy_pages * sizeof(struct pbe)),
- * which has the saved/orig locations of the page..
- *
- * SUSPEND_PD_PAGES() tells us how many pages we need to hold those
- * structures, then we call get_bitmask_order(), which will tell us the
- * last bit set in the number, starting with 1. (If we need 30 pages, that
- * is 0x0000001e in hex. The last bit is the 5th, which is the order we
- * would use to allocate 32 contiguous pages).
- *
- * Since we also need to save those pages, we add the number of pages that
- * we need to nr_copy_pages, and in case of an overflow, do the
- * calculation again to update the number of pages needed.
- *
- * With this model, we will tend to waste a lot of memory if we just cross
- * an order boundary. Plus, the higher the order of allocation that we try
- * to do, the more likely we are to fail in a low-memory situtation
- * (though we're unlikely to get this far in such a case, since swsusp
- * requires half of memory to be free anyway).
- */
-
-
-static void calc_order(void)
-{
- int diff = 0;
- int order = 0;
-
- do {
- diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
- if (diff) {
- order += diff;
- nr_copy_pages += 1 << diff;
- }
- } while(diff);
- pagedir_order = order;
-}
-
-
-/**
- * alloc_pagedir - Allocate the page directory.
- *
- * First, determine exactly how many contiguous pages we need and
- * allocate them.
- */
-
-static int alloc_pagedir(void)
+static int free_one_snapshot_pbe(struct pbe *p, void *tmp, int cur)
{
- calc_order();
- pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
- pagedir_order);
- if (!pagedir_save)
- return -ENOMEM;
- memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
- pagedir_nosave = pagedir_save;
+ ClearPageNosave(virt_to_page(p->address));
+ free_page(p->address);
+ p->address = 0;
return 0;
}

/**
* free_image_pages - Free pages allocated for snapshot
*/
-
static void free_image_pages(void)
{
- struct pbe * p;
- int i;
-
- p = pagedir_save;
- for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
- if (p->address) {
- ClearPageNosave(virt_to_page(p->address));
- free_page(p->address);
- p->address = 0;
- }
- }
+ for_each_pbe(pagedir_save, free_one_snapshot_pbe, NULL, nr_copy_pages);
}

+static int alloc_one_snapshot_pbe(struct pbe *p, void *tmp, int cur)
+{
+ p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ if (!p->address)
+ return -ENOMEM;
+ SetPageNosave(virt_to_page(p->address));
+ return 0;
+}
/**
* alloc_image_pages - Allocate pages for the snapshot.
*
*/
-
static int alloc_image_pages(void)
{
- struct pbe * p;
- int i;
-
- for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
- p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
- if (!p->address)
- return -ENOMEM;
- SetPageNosave(virt_to_page(p->address));
- }
- return 0;
+ return for_each_pbe(pagedir_save, alloc_one_snapshot_pbe, NULL,
+ nr_copy_pages);
}

void swsusp_free(void)
@@ -693,7 +1236,7 @@
BUG_ON(PageNosave(virt_to_page(pagedir_save)));
BUG_ON(PageNosaveFree(virt_to_page(pagedir_save)));
free_image_pages();
- free_pages((unsigned long) pagedir_save, pagedir_order);
+ pagedir_free(pagedir_save);
}


@@ -730,7 +1273,7 @@
struct sysinfo i;

si_swapinfo(&i);
- if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) {
+ if (i.freeswap < (nr_copy_pages + nr_copy_page_caches + PAGES_FOR_IO)) {
pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
return 0;
}
@@ -750,25 +1293,26 @@

if (!enough_swap())
return -ENOSPC;
-
- if ((error = alloc_pagedir())) {
- pr_debug("suspend: Allocating pagedir failed.\n");
- return error;
+ error = alloc_pagedir(&pagedir_save, nr_copy_pages, NULL, 0);
+ if (error < 0) {
+ printk("suspend: Allocating pagedir failed.\n");
+ return -ENOMEM;
}
+ pr_debug("alloc_pagedir: addon %d\n", error);
+ nr_copy_pages += error;
if ((error = alloc_image_pages())) {
- pr_debug("suspend: Allocating image pages failed.\n");
+ printk("suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}
+ pagedir_nosave = pagedir_save;

- pagedir_order_check = pagedir_order;
return 0;
}

int suspend_prepare_image(void)
{
unsigned int nr_needed_pages;
- int error;

pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
@@ -777,15 +1321,8 @@
return -ENOMEM;
}

- drain_local_pages();
- count_data_pages();
- printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;

- error = swsusp_alloc();
- if (error)
- return error;
-
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
@@ -827,10 +1364,6 @@

asmlinkage int swsusp_save(void)
{
- int error = 0;
-
- if ((error = swsusp_swap_check()))
- return error;
return suspend_prepare_image();
}

@@ -854,11 +1387,11 @@

asmlinkage int swsusp_restore(void)
{
- BUG_ON (pagedir_order_check != pagedir_order);
-
/* Even mappings of "global" things (vmalloc) need to be fixed */
+#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
__flush_tlb_global();
wbinvd(); /* Nigel says wbinvd here is good idea... */
+#endif
return 0;
}

@@ -881,99 +1414,6 @@
return error;
}

-
-
-/* More restore stuff */
-
-#define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0)
-
-/*
- * Returns true if given address/order collides with any orig_address
- */
-static int __init does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
- int order)
-{
- int i;
- unsigned long addre = addr + (PAGE_SIZE<<order);
-
- for (i=0; i < nr_copy_pages; i++)
- if ((pagedir+i)->orig_address >= addr &&
- (pagedir+i)->orig_address < addre)
- return 1;
-
- return 0;
-}
-
-/*
- * We check here that pagedir & pages it points to won't collide with pages
- * where we're going to restore from the loaded pages later
- */
-static int __init check_pagedir(void)
-{
- int i;
-
- for(i=0; i < nr_copy_pages; i++) {
- unsigned long addr;
-
- do {
- addr = get_zeroed_page(GFP_ATOMIC);
- if(!addr)
- return -ENOMEM;
- } while (does_collide(addr));
-
- (pagedir_nosave+i)->address = addr;
- }
- return 0;
-}
-
-static int __init swsusp_pagedir_relocate(void)
-{
- /*
- * We have to avoid recursion (not to overflow kernel stack),
- * and that's why code looks pretty cryptic
- */
- suspend_pagedir_t *old_pagedir = pagedir_nosave;
- void **eaten_memory = NULL;
- void **c = eaten_memory, *m, *f;
- int ret = 0;
-
- printk("Relocating pagedir ");
-
- if (!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
- printk("not necessary\n");
- return check_pagedir();
- }
-
- while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
- if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
- break;
- eaten_memory = m;
- printk( "." );
- *eaten_memory = c;
- c = eaten_memory;
- }
-
- if (!m) {
- printk("out of memory\n");
- ret = -ENOMEM;
- } else {
- pagedir_nosave =
- memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
- }
-
- c = eaten_memory;
- while (c) {
- printk(":");
- f = c;
- c = *c;
- free_pages((unsigned long)f, pagedir_order);
- }
- if (ret)
- return ret;
- printk("|\n");
- return check_pagedir();
-}
-
/**
* Using bio to read from swap.
* This code requires a bit more work than just using buffer heads
@@ -993,7 +1433,7 @@
return 0;
}

-static struct block_device * resume_bdev;
+static struct block_device * resume_bdev __nosavedata;

/**
* submit - submit BIO request.
@@ -1038,12 +1478,12 @@
return error;
}

-int bio_read_page(pgoff_t page_off, void * page)
+static int bio_read_page(pgoff_t page_off, void * page)
{
return submit(READ, page_off, page);
}

-int bio_write_page(pgoff_t page_off, void * page)
+static int bio_write_page(pgoff_t page_off, void * page)
{
return submit(WRITE, page_off, page);
}
@@ -1088,7 +1528,6 @@
return -EPERM;
}
nr_copy_pages = swsusp_info.image_pages;
- pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
return error;
}

@@ -1115,62 +1554,171 @@
return error;
}

+static void **eaten_memory = NULL;
+
+static void __init eat_progress(void)
+{
+ char *eaten_progess = "-\\|/";
+ static int eaten_i = 0;
+
+ printk("\b%c", eaten_progess[eaten_i]);
+ eaten_i ++;
+ if (eaten_i > 3) eaten_i = 0;
+}
+
+static int __init check_one_pbe(struct pbe *p, void *collide, int cur)
+{
+ unsigned long addr = 0;
+ void **c = eaten_memory;
+
+ pr_debug("check_one_pbe: %p %p o{%p} ",
+ p, (void*)addr, (void*)p->orig_address);
+ do {
+ if (addr) {
+ eat_progress();
+ eaten_memory = (void**)addr;
+ *eaten_memory = c;
+ c = eaten_memory;
+ }
+ addr = get_zeroed_page(GFP_ATOMIC);
+ if(!addr)
+ return -ENOMEM;
+ } while(collide_is_bit_set(collide, addr));
+ pr_debug("c{%p} done\n", (void*)addr);
+ p->address = addr;
+
+ return 0;
+}
+
+/*
+ * We check here that pagedir & pages it points to won't collide with pages
+ * where we're going to restore from the loaded pages later
+ */
+static int __init check_pagedir(struct pbe *pbe, unsigned char *collide)
+{
+ void **c, *f;
+ int ret = for_each_pbe(pbe, check_one_pbe, collide, nr_copy_pages);
+ c = eaten_memory;
+ while (c) {
+ eat_progress();
+ f = c;
+ c = *c;
+ free_pages((unsigned long)f, 0);
+ }
+ return (ret);
+}
+
+static int __init read_one_pbe(struct pbe *p, void *tmp, int cur)
+{
+ int error = 0;
+
+ mod_printk_progress(cur);
+
+ pr_debug("read_one_pbe: %p o{%p} c{%p} %lu, %d\n",
+ p, (void *)p->orig_address, (void *)p->address,
+ swp_offset(p->swap_address), cur);
+ error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+
+ return error;
+}
/**
* swsusp_read_data - Read image pages from swap.
*
- * You do not need to check for overlaps, check_pagedir()
- * already did that.
*/
+static void __init swsusp_copy_pagedir(suspend_pagedir_t *d_pgdir,
+ suspend_pagedir_t *s_pgdir)
+{
+ int i = 0;
+
+ while (s_pgdir != NULL) {
+ suspend_pagedir_t *s_next = (suspend_pagedir_t *)s_pgdir->dummy.val;
+ suspend_pagedir_t *d_next = (suspend_pagedir_t *)d_pgdir->dummy.val;
+ for (i = 0; i < ONE_PAGE_PBE_NUM; i++) {
+ d_pgdir->address = s_pgdir->address;
+ d_pgdir->orig_address = s_pgdir->orig_address;
+ d_pgdir->swap_address = s_pgdir->swap_address;
+ s_pgdir ++; d_pgdir ++;
+ }
+ d_pgdir = d_next;
+ s_pgdir = s_next;
+ };
+}

static int __init data_read(void)
{
- struct pbe * p;
int error;
- int i;
- int mod = nr_copy_pages / 100;
+ suspend_pagedir_t * addr = NULL;
+ unsigned char *bitmap = collide_bitmap_init(pagedir_nosave);

- if (!mod)
- mod = 1;
+ BUG_ON(bitmap == NULL);

- if ((error = swsusp_pagedir_relocate()))
+ printk("Relocating pagedir ...");
+ error = alloc_pagedir(&addr, nr_copy_pages, bitmap,
+ swsusp_info.pagedir_pages);
+ if (error < 0) {
return error;
+ }
+ swsusp_copy_pagedir(addr, pagedir_nosave);
+ if (check_pagedir(addr, bitmap)) {
+ return -ENOMEM;
+ }
+ collide_bitmap_free(bitmap);
+ pagedir_free(pagedir_nosave);
+ printk(" done\n");
+
+ pagedir_nosave = addr;
+
+ mod_progress = nr_copy_pages / 100;

printk( "Reading image data (%d pages): ", nr_copy_pages );
- for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
- error = bio_read_page(swp_offset(p->swap_address),
- (void *)p->address);
- }
- printk(" %d done.\n",i);
- return error;
+ error = for_each_pbe(pagedir_nosave, read_one_pbe, NULL, nr_copy_pages);
+ printk(" %d done.\n", nr_copy_pages);

+ return error;
}

extern dev_t __init name_to_dev_t(const char *line);

-static int __init read_pagedir(void)
+static int __init read_one_pagedir(suspend_pagedir_t *pgdir, int i)
{
- unsigned long addr;
- int i, n = swsusp_info.pagedir_pages;
+ unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
+ unsigned long next;
int error = 0;

- addr = __get_free_pages(GFP_ATOMIC, pagedir_order);
- if (!addr)
- return -ENOMEM;
- pagedir_nosave = (struct pbe *)addr;
+ next = pgdir->dummy.val;
+ pr_debug("read_one_pagedir: %p, %d, %lu, %p\n",
+ pgdir, i, offset, (void*)next);
+ if ((error = bio_read_page(offset, (void *)pgdir))) {
+ return error;
+ }
+ pgdir->dummy.val = next;

- pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);
+ return error;
+}

- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
- unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
- if (offset)
- error = bio_read_page(offset, (void *)addr);
- else
- error = -EFAULT;
- }
- if (error)
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+/*
+ * reading pagedir from swap device
+ */
+static int __init read_pagedir(void)
+{
+ int i = 0, n = swsusp_info.pagedir_pages;
+ int error = 0;
+ suspend_pagedir_t *pgdir, *next;
+
+ error = alloc_pagedir(&pagedir_nosave, nr_copy_pages, NULL, n);
+ if (error < 0)
+ return -ENOMEM;
+
+ printk("pmdisk: Reading pagedir (%d Pages)\n",n);
+ pgdir_for_each_safe(pgdir, next, pagedir_nosave) {
+ error = read_one_pagedir(pgdir, i);
+ if (error) break;
+ i++;
+ }
+ BUG_ON(i != n);
+ if (error)
+ pagedir_free(pagedir_nosave);
+
return error;
}

@@ -1185,7 +1733,7 @@
if ((error = read_pagedir()))
return error;
if ((error = data_read()))
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+ pagedir_free(pagedir_nosave);
return error;
}

@@ -1207,7 +1755,7 @@
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
- blkdev_put(resume_bdev);
+ /* blkdev_put(resume_bdev); */
} else
error = PTR_ERR(resume_bdev);

--- linux-2.6.9-ppc-g4-peval/kernel/sys.c 2004-11-22 17:11:35.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/sys.c 2004-11-22 17:16:58.000000000 +0800
@@ -84,7 +84,7 @@
* and the like.
*/

-static struct notifier_block *reboot_notifier_list;
+struct notifier_block *reboot_notifier_list;
rwlock_t notifier_lock = RW_LOCK_UNLOCKED;

/**
--- linux-2.6.9-ppc-g4-peval/kernel/sysctl.c 2004-11-22 17:08:10.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/kernel/sysctl.c 2004-11-24 14:12:57.000000000 +0800
@@ -66,6 +66,10 @@
extern int printk_ratelimit_jiffies;
extern int printk_ratelimit_burst;

+#if defined(CONFIG_SOFTWARE_SUSPEND)
+extern int swsusp_pagecache;
+#endif
+
#if defined(CONFIG_X86_LOCAL_APIC) && defined(__i386__)
int unknown_nmi_panic;
extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
@@ -792,6 +796,18 @@
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
+#if defined(CONFIG_SOFTWARE_SUSPEND)
+ {
+ .ctl_name = VM_SWSUSP_PAGECACHE,
+ .procname = "swsusp_pagecache",
+ .data = &swsusp_pagecache,
+ .maxlen = sizeof(swsusp_pagecache),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ },
+#endif
{
.ctl_name = VM_BLOCK_DUMP,
.procname = "block_dump",
--- linux-2.6.9-ppc-g4-peval/include/linux/sysctl.h 2004-11-22 17:08:10.000000000 +0800
+++ linux-2.6.9-ppc-g4-peval-hg/include/linux/sysctl.h 2004-11-24 14:13:08.000000000 +0800
@@ -170,6 +170,7 @@
VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
VM_HARDMAPLIMIT=28, /* Make mapped a hard limit */
+ VM_SWSUSP_PAGECACHE=29, /* Enable/Disable Suspend PageCaches */
};

--
--
Hu Gang / Steve
Linux Registered User 204016
GPG Public Key: http://soulinfo.com/~hugang/hugang.asc