2006-03-19 15:32:36

by Con Kolivas

[permalink] [raw]
Subject: [PATCH][1/3] mm: swsusp shrink_all_memory tweaks

This patch is a rewrite of the shrink_all_memory function used by swsusp
prior to suspending to disk.

The special hooks into balance_pgdat for shrink_all_memory have been removed
thus simplifying that function significantly.

Some code will now be compiled out in the !CONFIG_PM case.

shrink_all_memory now uses shrink_zone and shrink_slab directly with an extra
entry in the struct scan_control suspend_pass. This is used to alter what
lists will be shrunk by shrink_zone on successive passes. The aim of this is
to alter the reclaim logic to choose the best pages to keep on resume, to
free the minimum amount of memory required to suspend and free the memory
faster.

Signed-off-by: Con Kolivas <[email protected]>

kernel/power/swsusp.c | 10 ---
mm/vmscan.c | 165 ++++++++++++++++++++++++++++++--------------------
2 files changed, 105 insertions(+), 70 deletions(-)

Index: linux-2.6.16-rc6-mm2/mm/vmscan.c
===================================================================
--- linux-2.6.16-rc6-mm2.orig/mm/vmscan.c 2006-03-19 15:40:41.000000000 +1100
+++ linux-2.6.16-rc6-mm2/mm/vmscan.c 2006-03-20 02:17:01.000000000 +1100
@@ -62,8 +62,33 @@ struct scan_control {
* In this context, it doesn't matter that we scan the
* whole list at once. */
int swap_cluster_max;
+
+#ifdef CONFIG_PM
+ /*
+ * If we're doing suspend to disk, what pass is this.
+ * We decrement to allow code to transparently do normal reclaim
+ * without explicitly setting it to 0.
+ *
+ * 4 = Reclaim from inactive_list only
+ * 3 = Reclaim from active list but don't reclaim mapped
+ * 2 = 2nd pass of type 2
+ * 1 = Reclaim mapped (normal reclaim)
+ * 0 = 2nd pass of type 1
+ */
+ int suspend_pass;
+#endif
};

+/*
+ * When scanning for the swsusp function shrink_all_memory we only shrink
+ * active lists on the 2nd pass.
+ */
+#ifdef CONFIG_PM
+#define suspend_scan_active(sc) ((sc)->suspend_pass < 4)
+#else
+#define suspend_scan_active(sc) 1
+#endif
+
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))

#ifdef ARCH_HAS_PREFETCH
@@ -840,7 +865,8 @@ static void shrink_active_list(unsigned
}

/*
- * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
+ * This is a basic per-zone page freer. Used by kswapd, direct reclaim and
+ * the swsusp specific shrink_all_memory functions.
*/
static unsigned long shrink_zone(int priority, struct zone *zone,
struct scan_control *sc)
@@ -858,7 +884,7 @@ static unsigned long shrink_zone(int pri
*/
zone->nr_scan_active += (zone->nr_active >> priority) + 1;
nr_active = zone->nr_scan_active;
- if (nr_active >= sc->swap_cluster_max)
+ if (nr_active >= sc->swap_cluster_max && suspend_scan_active(sc))
zone->nr_scan_active = 0;
else
nr_active = 0;
@@ -935,7 +961,12 @@ static unsigned long shrink_zones(int pr
}
return nr_reclaimed;
}
-
+
+#define for_each_priority_reverse(priority) \
+ for (priority = DEF_PRIORITY; \
+ priority >= 0; \
+ priority--)
+
/*
* This is the main entry point to direct page reclaim.
*
@@ -979,7 +1010,7 @@ unsigned long try_to_free_pages(struct z
lru_pages += zone->nr_active + zone->nr_inactive;
}

- for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+ for_each_priority_reverse(priority) {
sc.nr_mapped = read_page_state(nr_mapped);
sc.nr_scanned = 0;
if (!priority)
@@ -1029,10 +1060,6 @@ out:
* For kswapd, balance_pgdat() will work across all this node's zones until
* they are all at pages_high.
*
- * If `nr_pages' is non-zero then it is the number of pages which are to be
- * reclaimed, regardless of the zone occupancies. This is a software suspend
- * special.
- *
* Returns the number of pages which were actually freed.
*
* There is special handling here for zones which are full of pinned pages.
@@ -1050,10 +1077,8 @@ out:
* the page allocator fallback scheme to ensure that aging of pages is balanced
* across the zones.
*/
-static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
- int order)
+static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
{
- unsigned long to_free = nr_pages;
int all_zones_ok;
int priority;
int i;
@@ -1063,7 +1088,7 @@ static unsigned long balance_pgdat(pg_da
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.may_swap = 1,
- .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,
+ .swap_cluster_max = SWAP_CLUSTER_MAX,
};

loop_again:
@@ -1080,7 +1105,7 @@ loop_again:
zone->temp_priority = DEF_PRIORITY;
}

- for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+ for_each_priority_reverse(priority) {
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
unsigned long lru_pages = 0;

@@ -1090,31 +1115,27 @@ loop_again:

all_zones_ok = 1;

- if (nr_pages == 0) {
- /*
- * Scan in the highmem->dma direction for the highest
- * zone which needs scanning
- */
- for (i = pgdat->nr_zones - 1; i >= 0; i--) {
- struct zone *zone = pgdat->node_zones + i;
+ /*
+ * Scan in the highmem->dma direction for the highest
+ * zone which needs scanning
+ */
+ for (i = pgdat->nr_zones - 1; i >= 0; i--) {
+ struct zone *zone = pgdat->node_zones + i;

- if (!populated_zone(zone))
- continue;
+ if (!populated_zone(zone))
+ continue;

- if (zone->all_unreclaimable &&
- priority != DEF_PRIORITY)
- continue;
+ if (zone->all_unreclaimable &&
+ priority != DEF_PRIORITY)
+ continue;

- if (!zone_watermark_ok(zone, order,
- zone->pages_high, 0, 0)) {
- end_zone = i;
- goto scan;
- }
+ if (!zone_watermark_ok(zone, order, zone->pages_high,
+ 0, 0)) {
+ end_zone = i;
+ goto scan;
}
- goto out;
- } else {
- end_zone = pgdat->nr_zones - 1;
}
+ goto out;
scan:
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
@@ -1141,11 +1162,9 @@ scan:
if (zone->all_unreclaimable && priority != DEF_PRIORITY)
continue;

- if (nr_pages == 0) { /* Not software suspend */
- if (!zone_watermark_ok(zone, order,
- zone->pages_high, end_zone, 0))
- all_zones_ok = 0;
- }
+ if (!zone_watermark_ok(zone, order, zone->pages_high,
+ end_zone, 0))
+ all_zones_ok = 0;
zone->temp_priority = priority;
if (zone->prev_priority > priority)
zone->prev_priority = priority;
@@ -1170,8 +1189,6 @@ scan:
total_scanned > nr_reclaimed + nr_reclaimed / 2)
sc.may_writepage = 1;
}
- if (nr_pages && to_free > nr_reclaimed)
- continue; /* swsusp: need to do more work */
if (all_zones_ok)
break; /* kswapd: all done */
/*
@@ -1187,7 +1204,7 @@ scan:
* matches the direct reclaim path behaviour in terms of impact
* on zone->*_priority.
*/
- if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages)
+ if (nr_reclaimed >= SWAP_CLUSTER_MAX)
break;
}
out:
@@ -1269,7 +1286,7 @@ static int kswapd(void *p)
}
finish_wait(&pgdat->kswapd_wait, &wait);

- balance_pgdat(pgdat, 0, order);
+ balance_pgdat(pgdat, order);
}
return 0;
}
@@ -1299,36 +1316,58 @@ void wakeup_kswapd(struct zone *zone, in
#ifdef CONFIG_PM
/*
* Try to free `nr_pages' of memory, system-wide. Returns the number of freed
- * pages.
+ * pages. It does this via shrink_zone in passes. Rather than trying to age
+ * LRUs the aim is to preserve the overall LRU order by reclaiming
+ * preferentially inactive > active > active referenced > active mapped
*/
unsigned long shrink_all_memory(unsigned long nr_pages)
{
- pg_data_t *pgdat;
- unsigned long nr_to_free = nr_pages;
unsigned long ret = 0;
- unsigned retry = 2;
- struct reclaim_state reclaim_state = {
- .reclaimed_slab = 0,
+ struct scan_control sc = {
+ .gfp_mask = GFP_KERNEL,
+ .may_swap = 1,
+ .swap_cluster_max = nr_pages,
+ .suspend_pass = 4,
+ .may_writepage = 1,
};

delay_swap_prefetch();

- current->reclaim_state = &reclaim_state;
-repeat:
- for_each_online_pgdat(pgdat) {
- unsigned long freed;
+ do {
+ int priority;

- freed = balance_pgdat(pgdat, nr_to_free, 0);
- ret += freed;
- nr_to_free -= freed;
- if ((long)nr_to_free <= 0)
- break;
- }
- if (retry-- && ret < nr_pages) {
- blk_congestion_wait(WRITE, HZ/5);
- goto repeat;
- }
- current->reclaim_state = NULL;
+ for_each_priority_reverse(priority) {
+ struct zone *zone;
+ unsigned long lru_pages = 0;
+
+ for_each_zone(zone) {
+ unsigned long freed;
+
+ if (!populated_zone(zone))
+ continue;
+
+ if (zone->all_unreclaimable &&
+ priority != DEF_PRIORITY)
+ continue;
+
+ lru_pages += zone->nr_active +
+ zone->nr_inactive;
+ /*
+ * shrink_active_list needs this to reclaim
+ * mapped pages
+ */
+ if (sc.suspend_pass < 2)
+ zone->prev_priority = 0;
+ freed = shrink_zone(priority, zone, &sc);
+ ret += freed;
+ if (ret > nr_pages)
+ goto out;
+ }
+ shrink_slab(0, sc.gfp_mask, lru_pages);
+ }
+ blk_congestion_wait(WRITE, HZ / 5);
+ } while (--sc.suspend_pass >= 0);
+out:
return ret;
}
#endif
Index: linux-2.6.16-rc6-mm2/kernel/power/swsusp.c
===================================================================
--- linux-2.6.16-rc6-mm2.orig/kernel/power/swsusp.c 2006-03-19 15:40:42.000000000 +1100
+++ linux-2.6.16-rc6-mm2/kernel/power/swsusp.c 2006-03-20 02:15:47.000000000 +1100
@@ -173,9 +173,6 @@ void free_all_swap_pages(int swap, struc
* Notice: all userland should be stopped before it is called, or
* livelock is possible.
*/
-
-#define SHRINK_BITE 10000
-
int swsusp_shrink_memory(void)
{
long size, tmp;
@@ -194,14 +191,13 @@ int swsusp_shrink_memory(void)
for_each_zone (zone)
if (!is_highmem(zone))
tmp -= zone->free_pages;
+ if (tmp <= 0)
+ tmp = size - image_size / PAGE_SIZE;
if (tmp > 0) {
- tmp = shrink_all_memory(SHRINK_BITE);
+ tmp = shrink_all_memory(tmp);
if (!tmp)
return -ENOMEM;
pages += tmp;
- } else if (size > image_size / PAGE_SIZE) {
- tmp = shrink_all_memory(SHRINK_BITE);
- pages += tmp;
}
printk("\b%c", p[i++%4]);
} while (tmp > 0);


2006-03-20 11:41:52

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH][1/3] mm: swsusp shrink_all_memory tweaks

Con Kolivas wrote:

> -
> +
> +#define for_each_priority_reverse(priority) \
> + for (priority = DEF_PRIORITY; \
> + priority >= 0; \
> + priority--)
> +
> /*
> * This is the main entry point to direct page reclaim.
> *
> @@ -979,7 +1010,7 @@ unsigned long try_to_free_pages(struct z
> lru_pages += zone->nr_active + zone->nr_inactive;
> }
>
> - for (priority = DEF_PRIORITY; priority >= 0; priority--) {
> + for_each_priority_reverse(priority) {
> sc.nr_mapped = read_page_state(nr_mapped);
> sc.nr_scanned = 0;
> if (!priority)

I still don't like this change. Apart from being harder to read in
my opinion, I don't believe there is a precedent for "consolidating"
simple for loops in the kernel, is there?

More complex loops get helpers, but they're made part of the wider
well-known kernel API.

Why does for_each_priority_reverse blow up when you pass it an unsigned
argument? What range has priority? What direction does the loop go in?
(_reverse postfix doesn't tell me, because it is going from low->high
priority so I would have thought that is going forward, or up)

You had to look in two places each time you wanted to know the answers.

--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com

2006-03-20 11:52:05

by Con Kolivas

[permalink] [raw]
Subject: Re: [PATCH][1/3] mm: swsusp shrink_all_memory tweaks

On Monday 20 March 2006 22:41, Nick Piggin wrote:
> I still don't like this change.

Fine.

Respin.

Cheers,
Con
---
This patch is a rewrite of the shrink_all_memory function used by swsusp
prior to suspending to disk.

The special hooks into balance_pgdat for shrink_all_memory have been removed
thus simplifying that function significantly.

Some code will now be compiled out in the !CONFIG_PM case.

shrink_all_memory now uses shrink_zone and shrink_slab directly with an extra
entry in the struct scan_control suspend_pass. This is used to alter what
lists will be shrunk by shrink_zone on successive passes. The aim of this is
to alter the reclaim logic to choose the best pages to keep on resume, to
free the minimum amount of memory required to suspend and free the memory
faster.

Signed-off-by: Con Kolivas <[email protected]>

kernel/power/swsusp.c | 10 ---
mm/vmscan.c | 154 ++++++++++++++++++++++++++++++--------------------
2 files changed, 97 insertions(+), 67 deletions(-)

Index: linux-2.6.16-rc6-mm2/mm/vmscan.c
===================================================================
--- linux-2.6.16-rc6-mm2.orig/mm/vmscan.c 2006-03-20 22:44:10.000000000 +1100
+++ linux-2.6.16-rc6-mm2/mm/vmscan.c 2006-03-20 22:46:54.000000000 +1100
@@ -62,8 +62,33 @@ struct scan_control {
* In this context, it doesn't matter that we scan the
* whole list at once. */
int swap_cluster_max;
+
+#ifdef CONFIG_PM
+ /*
+ * If we're doing suspend to disk, what pass is this.
+ * We decrement to allow code to transparently do normal reclaim
+ * without explicitly setting it to 0.
+ *
+ * 4 = Reclaim from inactive_list only
+ * 3 = Reclaim from active list but don't reclaim mapped
+ * 2 = 2nd pass of type 2
+ * 1 = Reclaim mapped (normal reclaim)
+ * 0 = 2nd pass of type 1
+ */
+ int suspend_pass;
+#endif
};

+/*
+ * When scanning for the swsusp function shrink_all_memory we only shrink
+ * active lists on the 2nd pass.
+ */
+#ifdef CONFIG_PM
+#define suspend_scan_active(sc) ((sc)->suspend_pass < 4)
+#else
+#define suspend_scan_active(sc) 1
+#endif
+
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))

#ifdef ARCH_HAS_PREFETCH
@@ -840,7 +865,8 @@ static void shrink_active_list(unsigned
}

/*
- * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
+ * This is a basic per-zone page freer. Used by kswapd, direct reclaim and
+ * the swsusp specific shrink_all_memory functions.
*/
static unsigned long shrink_zone(int priority, struct zone *zone,
struct scan_control *sc)
@@ -858,7 +884,7 @@ static unsigned long shrink_zone(int pri
*/
zone->nr_scan_active += (zone->nr_active >> priority) + 1;
nr_active = zone->nr_scan_active;
- if (nr_active >= sc->swap_cluster_max)
+ if (nr_active >= sc->swap_cluster_max && suspend_scan_active(sc))
zone->nr_scan_active = 0;
else
nr_active = 0;
@@ -1029,10 +1055,6 @@ out:
* For kswapd, balance_pgdat() will work across all this node's zones until
* they are all at pages_high.
*
- * If `nr_pages' is non-zero then it is the number of pages which are to be
- * reclaimed, regardless of the zone occupancies. This is a software suspend
- * special.
- *
* Returns the number of pages which were actually freed.
*
* There is special handling here for zones which are full of pinned pages.
@@ -1050,10 +1072,8 @@ out:
* the page allocator fallback scheme to ensure that aging of pages is balanced
* across the zones.
*/
-static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
- int order)
+static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
{
- unsigned long to_free = nr_pages;
int all_zones_ok;
int priority;
int i;
@@ -1063,7 +1083,7 @@ static unsigned long balance_pgdat(pg_da
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.may_swap = 1,
- .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,
+ .swap_cluster_max = SWAP_CLUSTER_MAX,
};

loop_again:
@@ -1090,31 +1110,27 @@ loop_again:

all_zones_ok = 1;

- if (nr_pages == 0) {
- /*
- * Scan in the highmem->dma direction for the highest
- * zone which needs scanning
- */
- for (i = pgdat->nr_zones - 1; i >= 0; i--) {
- struct zone *zone = pgdat->node_zones + i;
+ /*
+ * Scan in the highmem->dma direction for the highest
+ * zone which needs scanning
+ */
+ for (i = pgdat->nr_zones - 1; i >= 0; i--) {
+ struct zone *zone = pgdat->node_zones + i;

- if (!populated_zone(zone))
- continue;
+ if (!populated_zone(zone))
+ continue;

- if (zone->all_unreclaimable &&
- priority != DEF_PRIORITY)
- continue;
+ if (zone->all_unreclaimable &&
+ priority != DEF_PRIORITY)
+ continue;

- if (!zone_watermark_ok(zone, order,
- zone->pages_high, 0, 0)) {
- end_zone = i;
- goto scan;
- }
+ if (!zone_watermark_ok(zone, order, zone->pages_high,
+ 0, 0)) {
+ end_zone = i;
+ goto scan;
}
- goto out;
- } else {
- end_zone = pgdat->nr_zones - 1;
}
+ goto out;
scan:
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
@@ -1141,11 +1157,9 @@ scan:
if (zone->all_unreclaimable && priority != DEF_PRIORITY)
continue;

- if (nr_pages == 0) { /* Not software suspend */
- if (!zone_watermark_ok(zone, order,
- zone->pages_high, end_zone, 0))
- all_zones_ok = 0;
- }
+ if (!zone_watermark_ok(zone, order, zone->pages_high,
+ end_zone, 0))
+ all_zones_ok = 0;
zone->temp_priority = priority;
if (zone->prev_priority > priority)
zone->prev_priority = priority;
@@ -1170,8 +1184,6 @@ scan:
total_scanned > nr_reclaimed + nr_reclaimed / 2)
sc.may_writepage = 1;
}
- if (nr_pages && to_free > nr_reclaimed)
- continue; /* swsusp: need to do more work */
if (all_zones_ok)
break; /* kswapd: all done */
/*
@@ -1187,7 +1199,7 @@ scan:
* matches the direct reclaim path behaviour in terms of impact
* on zone->*_priority.
*/
- if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages)
+ if (nr_reclaimed >= SWAP_CLUSTER_MAX)
break;
}
out:
@@ -1269,7 +1281,7 @@ static int kswapd(void *p)
}
finish_wait(&pgdat->kswapd_wait, &wait);

- balance_pgdat(pgdat, 0, order);
+ balance_pgdat(pgdat, order);
}
return 0;
}
@@ -1299,36 +1311,58 @@ void wakeup_kswapd(struct zone *zone, in
#ifdef CONFIG_PM
/*
* Try to free `nr_pages' of memory, system-wide. Returns the number of freed
- * pages.
+ * pages. It does this via shrink_zone in passes. Rather than trying to age
+ * LRUs the aim is to preserve the overall LRU order by reclaiming
+ * preferentially inactive > active > active referenced > active mapped
*/
unsigned long shrink_all_memory(unsigned long nr_pages)
{
- pg_data_t *pgdat;
- unsigned long nr_to_free = nr_pages;
unsigned long ret = 0;
- unsigned retry = 2;
- struct reclaim_state reclaim_state = {
- .reclaimed_slab = 0,
+ struct scan_control sc = {
+ .gfp_mask = GFP_KERNEL,
+ .may_swap = 1,
+ .swap_cluster_max = nr_pages,
+ .suspend_pass = 4,
+ .may_writepage = 1,
};

delay_swap_prefetch();

- current->reclaim_state = &reclaim_state;
-repeat:
- for_each_online_pgdat(pgdat) {
- unsigned long freed;
+ do {
+ int priority;

- freed = balance_pgdat(pgdat, nr_to_free, 0);
- ret += freed;
- nr_to_free -= freed;
- if ((long)nr_to_free <= 0)
- break;
- }
- if (retry-- && ret < nr_pages) {
- blk_congestion_wait(WRITE, HZ/5);
- goto repeat;
- }
- current->reclaim_state = NULL;
+ for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+ struct zone *zone;
+ unsigned long lru_pages = 0;
+
+ for_each_zone(zone) {
+ unsigned long freed;
+
+ if (!populated_zone(zone))
+ continue;
+
+ if (zone->all_unreclaimable &&
+ priority != DEF_PRIORITY)
+ continue;
+
+ lru_pages += zone->nr_active +
+ zone->nr_inactive;
+ /*
+ * shrink_active_list needs this to reclaim
+ * mapped pages
+ */
+ if (sc.suspend_pass < 2)
+ zone->prev_priority = 0;
+ freed = shrink_zone(priority, zone, &sc);
+ ret += freed;
+ if (ret > nr_pages)
+ goto out;
+ }
+ shrink_slab(0, sc.gfp_mask, lru_pages);
+ }
+ blk_congestion_wait(WRITE, HZ / 5);
+ } while (--sc.suspend_pass >= 0);
+out:
return ret;
}
#endif
Index: linux-2.6.16-rc6-mm2/kernel/power/swsusp.c
===================================================================
--- linux-2.6.16-rc6-mm2.orig/kernel/power/swsusp.c 2006-03-20 22:44:10.000000000 +1100
+++ linux-2.6.16-rc6-mm2/kernel/power/swsusp.c 2006-03-20 22:46:12.000000000 +1100
@@ -173,9 +173,6 @@ void free_all_swap_pages(int swap, struc
* Notice: all userland should be stopped before it is called, or
* livelock is possible.
*/
-
-#define SHRINK_BITE 10000
-
int swsusp_shrink_memory(void)
{
long size, tmp;
@@ -194,14 +191,13 @@ int swsusp_shrink_memory(void)
for_each_zone (zone)
if (!is_highmem(zone))
tmp -= zone->free_pages;
+ if (tmp <= 0)
+ tmp = size - image_size / PAGE_SIZE;
if (tmp > 0) {
- tmp = shrink_all_memory(SHRINK_BITE);
+ tmp = shrink_all_memory(tmp);
if (!tmp)
return -ENOMEM;
pages += tmp;
- } else if (size > image_size / PAGE_SIZE) {
- tmp = shrink_all_memory(SHRINK_BITE);
- pages += tmp;
}
printk("\b%c", p[i++%4]);
} while (tmp > 0);

2006-03-20 18:47:54

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH][1/3] mm: swsusp shrink_all_memory tweaks

On Monday 20 March 2006 12:50, Con Kolivas wrote:
> On Monday 20 March 2006 22:41, Nick Piggin wrote:
> > I still don't like this change.
>
> Fine.
>
> Respin.
}-- snip --{
> Index: linux-2.6.16-rc6-mm2/kernel/power/swsusp.c
> ===================================================================
> --- linux-2.6.16-rc6-mm2.orig/kernel/power/swsusp.c 2006-03-20 22:44:10.000000000 +1100
> +++ linux-2.6.16-rc6-mm2/kernel/power/swsusp.c 2006-03-20 22:46:12.000000000 +1100
> @@ -173,9 +173,6 @@ void free_all_swap_pages(int swap, struc
> * Notice: all userland should be stopped before it is called, or
> * livelock is possible.
> */
> -
> -#define SHRINK_BITE 10000
> -
> int swsusp_shrink_memory(void)
> {
> long size, tmp;
> @@ -194,14 +191,13 @@ int swsusp_shrink_memory(void)
> for_each_zone (zone)
> if (!is_highmem(zone))
> tmp -= zone->free_pages;
> + if (tmp <= 0)
> + tmp = size - image_size / PAGE_SIZE;
> if (tmp > 0) {
> - tmp = shrink_all_memory(SHRINK_BITE);
> + tmp = shrink_all_memory(tmp);
> if (!tmp)
> return -ENOMEM;
> pages += tmp;
> - } else if (size > image_size / PAGE_SIZE) {
> - tmp = shrink_all_memory(SHRINK_BITE);
> - pages += tmp;
> }
> printk("\b%c", p[i++%4]);
> } while (tmp > 0);
>

swsusp_shrink_memory() is still wrong, because it will always fail for
image_size = 0. My bad, sorry.

The appended patch (on top of yours) should fix that (hope I did it right
this time).

kernel/power/swsusp.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)

Index: linux-2.6.16-rc6-mm2/kernel/power/swsusp.c
===================================================================
--- linux-2.6.16-rc6-mm2.orig/kernel/power/swsusp.c
+++ linux-2.6.16-rc6-mm2/kernel/power/swsusp.c
@@ -192,13 +192,17 @@ int swsusp_shrink_memory(void)
for_each_zone (zone)
if (!is_highmem(zone))
tmp -= zone->free_pages;
- if (tmp <= 0)
- tmp = size - image_size / PAGE_SIZE;
if (tmp > 0) {
tmp = shrink_all_memory(tmp);
if (!tmp)
return -ENOMEM;
pages += tmp;
+ } else {
+ size -= image_size / PAGE_SIZE;
+ if (size > 0) {
+ tmp = shrink_all_memory(size);
+ pages += tmp;
+ }
}
printk("\b%c", p[i++%4]);
} while (tmp > 0);

2006-03-24 07:08:06

by Con Kolivas

[permalink] [raw]
Subject: [PATCH] mm: swsusp shrink_all_memory tweaks

On Tuesday 21 March 2006 05:46, Rafael J. Wysocki wrote:
> swsusp_shrink_memory() is still wrong, because it will always fail for
> image_size = 0. My bad, sorry.
>
> The appended patch (on top of yours) should fix that (hope I did it right
> this time).

Well I discovered that if all the necessary memory is freed in one call to
shrink_all_memory we don't get the nice updating printout from
swsusp_shrink_memory telling us we're making progress. So instead of
modifying the function to call shrink_all_memory with the full amount (and
since we've botched swsusp_shrink_memory a few times between us), we should
limit it to a max of SHRINK_BITEs instead.

This patch is fine standalone.

Rafael, Pavel what do you think of this one?

Cheers,
Con
---
This patch is a rewrite of the shrink_all_memory function used by swsusp
prior to suspending to disk.

The special hooks into balance_pgdat for shrink_all_memory have been removed
thus simplifying that function significantly.

Some code will now be compiled out in the !CONFIG_PM case.

shrink_all_memory now uses shrink_zone and shrink_slab directly with an extra
entry in the struct scan_control suspend_pass. This is used to alter what
lists will be shrunk by shrink_zone on successive passes. The aim of this is
to alter the reclaim logic to choose the best pages to keep on resume, to
free the minimum amount of memory required to suspend and free the memory
faster.

Signed-off-by: Con Kolivas <[email protected]>

kernel/power/swsusp.c | 10 ++-
mm/vmscan.c | 154 ++++++++++++++++++++++++++++++--------------------
2 files changed, 102 insertions(+), 62 deletions(-)

Index: linux-2.6.16-mm1/mm/vmscan.c
===================================================================
--- linux-2.6.16-mm1.orig/mm/vmscan.c 2006-03-24 15:04:13.000000000 +1100
+++ linux-2.6.16-mm1/mm/vmscan.c 2006-03-24 15:06:16.000000000 +1100
@@ -62,8 +62,33 @@ struct scan_control {
* In this context, it doesn't matter that we scan the
* whole list at once. */
int swap_cluster_max;
+
+#ifdef CONFIG_PM
+ /*
+ * If we're doing suspend to disk, what pass is this.
+ * We decrement to allow code to transparently do normal reclaim
+ * without explicitly setting it to 0.
+ *
+ * 4 = Reclaim from inactive_list only
+ * 3 = Reclaim from active list but don't reclaim mapped
+ * 2 = 2nd pass of type 2
+ * 1 = Reclaim mapped (normal reclaim)
+ * 0 = 2nd pass of type 1
+ */
+ int suspend_pass;
+#endif
};

+/*
+ * When scanning for the swsusp function shrink_all_memory we only shrink
+ * active lists on the 2nd pass.
+ */
+#ifdef CONFIG_PM
+#define suspend_scan_active(sc) ((sc)->suspend_pass < 4)
+#else
+#define suspend_scan_active(sc) 1
+#endif
+
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))

#ifdef ARCH_HAS_PREFETCH
@@ -840,7 +865,8 @@ static void shrink_active_list(unsigned
}

/*
- * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
+ * This is a basic per-zone page freer. Used by kswapd, direct reclaim and
+ * the swsusp specific shrink_all_memory functions.
*/
static unsigned long shrink_zone(int priority, struct zone *zone,
struct scan_control *sc)
@@ -858,7 +884,7 @@ static unsigned long shrink_zone(int pri
*/
zone->nr_scan_active += (zone->nr_active >> priority) + 1;
nr_active = zone->nr_scan_active;
- if (nr_active >= sc->swap_cluster_max)
+ if (nr_active >= sc->swap_cluster_max && suspend_scan_active(sc))
zone->nr_scan_active = 0;
else
nr_active = 0;
@@ -1029,10 +1055,6 @@ out:
* For kswapd, balance_pgdat() will work across all this node's zones until
* they are all at pages_high.
*
- * If `nr_pages' is non-zero then it is the number of pages which are to be
- * reclaimed, regardless of the zone occupancies. This is a software suspend
- * special.
- *
* Returns the number of pages which were actually freed.
*
* There is special handling here for zones which are full of pinned pages.
@@ -1050,10 +1072,8 @@ out:
* the page allocator fallback scheme to ensure that aging of pages is balanced
* across the zones.
*/
-static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
- int order)
+static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
{
- unsigned long to_free = nr_pages;
int all_zones_ok;
int priority;
int i;
@@ -1063,7 +1083,7 @@ static unsigned long balance_pgdat(pg_da
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.may_swap = 1,
- .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,
+ .swap_cluster_max = SWAP_CLUSTER_MAX,
};

loop_again:
@@ -1090,31 +1110,27 @@ loop_again:

all_zones_ok = 1;

- if (nr_pages == 0) {
- /*
- * Scan in the highmem->dma direction for the highest
- * zone which needs scanning
- */
- for (i = pgdat->nr_zones - 1; i >= 0; i--) {
- struct zone *zone = pgdat->node_zones + i;
+ /*
+ * Scan in the highmem->dma direction for the highest
+ * zone which needs scanning
+ */
+ for (i = pgdat->nr_zones - 1; i >= 0; i--) {
+ struct zone *zone = pgdat->node_zones + i;

- if (!populated_zone(zone))
- continue;
+ if (!populated_zone(zone))
+ continue;

- if (zone->all_unreclaimable &&
- priority != DEF_PRIORITY)
- continue;
+ if (zone->all_unreclaimable &&
+ priority != DEF_PRIORITY)
+ continue;

- if (!zone_watermark_ok(zone, order,
- zone->pages_high, 0, 0)) {
- end_zone = i;
- goto scan;
- }
+ if (!zone_watermark_ok(zone, order, zone->pages_high,
+ 0, 0)) {
+ end_zone = i;
+ goto scan;
}
- goto out;
- } else {
- end_zone = pgdat->nr_zones - 1;
}
+ goto out;
scan:
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
@@ -1141,11 +1157,9 @@ scan:
if (zone->all_unreclaimable && priority != DEF_PRIORITY)
continue;

- if (nr_pages == 0) { /* Not software suspend */
- if (!zone_watermark_ok(zone, order,
- zone->pages_high, end_zone, 0))
- all_zones_ok = 0;
- }
+ if (!zone_watermark_ok(zone, order, zone->pages_high,
+ end_zone, 0))
+ all_zones_ok = 0;
zone->temp_priority = priority;
if (zone->prev_priority > priority)
zone->prev_priority = priority;
@@ -1170,8 +1184,6 @@ scan:
total_scanned > nr_reclaimed + nr_reclaimed / 2)
sc.may_writepage = 1;
}
- if (nr_pages && to_free > nr_reclaimed)
- continue; /* swsusp: need to do more work */
if (all_zones_ok)
break; /* kswapd: all done */
/*
@@ -1187,7 +1199,7 @@ scan:
* matches the direct reclaim path behaviour in terms of impact
* on zone->*_priority.
*/
- if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages)
+ if (nr_reclaimed >= SWAP_CLUSTER_MAX)
break;
}
out:
@@ -1269,7 +1281,7 @@ static int kswapd(void *p)
}
finish_wait(&pgdat->kswapd_wait, &wait);

- balance_pgdat(pgdat, 0, order);
+ balance_pgdat(pgdat, order);
}
return 0;
}
@@ -1299,36 +1311,58 @@ void wakeup_kswapd(struct zone *zone, in
#ifdef CONFIG_PM
/*
* Try to free `nr_pages' of memory, system-wide. Returns the number of freed
- * pages.
+ * pages. It does this via shrink_zone in passes. Rather than trying to age
+ * LRUs the aim is to preserve the overall LRU order by reclaiming
+ * preferentially inactive > active > active referenced > active mapped
*/
unsigned long shrink_all_memory(unsigned long nr_pages)
{
- pg_data_t *pgdat;
- unsigned long nr_to_free = nr_pages;
unsigned long ret = 0;
- unsigned retry = 2;
- struct reclaim_state reclaim_state = {
- .reclaimed_slab = 0,
+ struct scan_control sc = {
+ .gfp_mask = GFP_KERNEL,
+ .may_swap = 1,
+ .swap_cluster_max = nr_pages,
+ .suspend_pass = 4,
+ .may_writepage = 1,
};

delay_swap_prefetch();

- current->reclaim_state = &reclaim_state;
-repeat:
- for_each_online_pgdat(pgdat) {
- unsigned long freed;
+ do {
+ int priority;

- freed = balance_pgdat(pgdat, nr_to_free, 0);
- ret += freed;
- nr_to_free -= freed;
- if ((long)nr_to_free <= 0)
- break;
- }
- if (retry-- && ret < nr_pages) {
- blk_congestion_wait(WRITE, HZ/5);
- goto repeat;
- }
- current->reclaim_state = NULL;
+ for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+ struct zone *zone;
+ unsigned long lru_pages = 0;
+
+ for_each_zone(zone) {
+ unsigned long freed;
+
+ if (!populated_zone(zone))
+ continue;
+
+ if (zone->all_unreclaimable &&
+ priority != DEF_PRIORITY)
+ continue;
+
+ lru_pages += zone->nr_active +
+ zone->nr_inactive;
+ /*
+ * shrink_active_list needs this to reclaim
+ * mapped pages
+ */
+ if (sc.suspend_pass < 2)
+ zone->prev_priority = 0;
+ freed = shrink_zone(priority, zone, &sc);
+ ret += freed;
+ if (ret > nr_pages)
+ goto out;
+ }
+ shrink_slab(0, sc.gfp_mask, lru_pages);
+ }
+ blk_congestion_wait(WRITE, HZ / 5);
+ } while (--sc.suspend_pass >= 0);
+out:
return ret;
}
#endif
Index: linux-2.6.16-mm1/kernel/power/swsusp.c
===================================================================
--- linux-2.6.16-mm1.orig/kernel/power/swsusp.c 2006-03-24 15:04:13.000000000 +1100
+++ linux-2.6.16-mm1/kernel/power/swsusp.c 2006-03-24 17:48:55.000000000 +1100
@@ -175,6 +175,12 @@ void free_all_swap_pages(int swap, struc
*/

#define SHRINK_BITE 10000
+static unsigned long shrink_bites(long tmp)
+{
+ if (tmp > SHRINK_BITE)
+ tmp = SHRINK_BITE;
+ return shrink_all_memory(tmp);
+}

int swsusp_shrink_memory(void)
{
@@ -195,12 +201,12 @@ int swsusp_shrink_memory(void)
if (!is_highmem(zone))
tmp -= zone->free_pages;
if (tmp > 0) {
- tmp = shrink_all_memory(SHRINK_BITE);
+ tmp = shrink_bites(tmp);
if (!tmp)
return -ENOMEM;
pages += tmp;
} else if (size > image_size / PAGE_SIZE) {
- tmp = shrink_all_memory(SHRINK_BITE);
+ tmp = shrink_bites(size - (image_size / PAGE_SIZE));
pages += tmp;
}
printk("\b%c", p[i++%4]);

2006-03-24 15:17:24

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH] mm: swsusp shrink_all_memory tweaks

On Friday 24 March 2006 08:07, Con Kolivas wrote:
> On Tuesday 21 March 2006 05:46, Rafael J. Wysocki wrote:
> > swsusp_shrink_memory() is still wrong, because it will always fail for
> > image_size = 0. My bad, sorry.
> >
> > The appended patch (on top of yours) should fix that (hope I did it right
> > this time).
>
> Well I discovered that if all the necessary memory is freed in one call to
> shrink_all_memory we don't get the nice updating printout from
> swsusp_shrink_memory telling us we're making progress. So instead of
> modifying the function to call shrink_all_memory with the full amount (and
> since we've botched swsusp_shrink_memory a few times between us), we should
> limit it to a max of SHRINK_BITEs instead.
>
> This patch is fine standalone.
>
> Rafael, Pavel what do you think of this one?

In principle it looks good to me, but when I tested the previous one I noticed
shrink_all_memory() tended to return 0 prematurely (ie. when it was possible
to free some more pages). It only happened if more than 50% of memory was
occupied by application data.

Unfortunately I couldn't find the reason.

Greetings,
Rafael

2006-03-24 15:30:52

by Con Kolivas

[permalink] [raw]
Subject: Re: [PATCH] mm: swsusp shrink_all_memory tweaks

On Saturday 25 March 2006 02:16, Rafael J. Wysocki wrote:
> On Friday 24 March 2006 08:07, Con Kolivas wrote:
> > On Tuesday 21 March 2006 05:46, Rafael J. Wysocki wrote:
> > > swsusp_shrink_memory() is still wrong, because it will always fail for
> > > image_size = 0. My bad, sorry.
> > >
> > > The appended patch (on top of yours) should fix that (hope I did it
> > > right this time).
> >
> > Well I discovered that if all the necessary memory is freed in one call
> > to shrink_all_memory we don't get the nice updating printout from
> > swsusp_shrink_memory telling us we're making progress. So instead of
> > modifying the function to call shrink_all_memory with the full amount
> > (and since we've botched swsusp_shrink_memory a few times between us), we
> > should limit it to a max of SHRINK_BITEs instead.
> >
> > This patch is fine standalone.
> >
> > Rafael, Pavel what do you think of this one?
>
> In principle it looks good to me, but when I tested the previous one I
> noticed shrink_all_memory() tended to return 0 prematurely (ie. when it was
> possible to free some more pages). It only happened if more than 50% of
> memory was occupied by application data.
>
> Unfortunately I couldn't find the reason.

Perhaps it was just trying to free up too much in one go. There are a number
of steps a mapped page needs to go through before being finally swapped and
there are a limited number of iterations over it. Limiting it to SHRINK_BITEs
at a time will probably improve that.

Cheers,
Con

2006-03-24 16:16:07

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH] mm: swsusp shrink_all_memory tweaks

On Friday 24 March 2006 16:30, Con Kolivas wrote:
> On Saturday 25 March 2006 02:16, Rafael J. Wysocki wrote:
> > On Friday 24 March 2006 08:07, Con Kolivas wrote:
> > > On Tuesday 21 March 2006 05:46, Rafael J. Wysocki wrote:
> > > > swsusp_shrink_memory() is still wrong, because it will always fail for
> > > > image_size = 0. My bad, sorry.
> > > >
> > > > The appended patch (on top of yours) should fix that (hope I did it
> > > > right this time).
> > >
> > > Well I discovered that if all the necessary memory is freed in one call
> > > to shrink_all_memory we don't get the nice updating printout from
> > > swsusp_shrink_memory telling us we're making progress. So instead of
> > > modifying the function to call shrink_all_memory with the full amount
> > > (and since we've botched swsusp_shrink_memory a few times between us), we
> > > should limit it to a max of SHRINK_BITEs instead.
> > >
> > > This patch is fine standalone.
> > >
> > > Rafael, Pavel what do you think of this one?
> >
> > In principle it looks good to me, but when I tested the previous one I
> > noticed shrink_all_memory() tended to return 0 prematurely (ie. when it was
> > possible to free some more pages). It only happened if more than 50% of
> > memory was occupied by application data.
> >
> > Unfortunately I couldn't find the reason.
>
> Perhaps it was just trying to free up too much in one go. There are a number
> of steps a mapped page needs to go through before being finally swapped and
> there are a limited number of iterations over it. Limiting it to SHRINK_BITEs
> at a time will probably improve that.

OK [I'll be testing it for the next couple of days.]

Greetings,
Rafael

2006-03-27 12:24:47

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] mm: swsusp shrink_all_memory tweaks

Hi!

> > swsusp_shrink_memory() is still wrong, because it will always fail for
> > image_size = 0. My bad, sorry.
> >
> > The appended patch (on top of yours) should fix that (hope I did it right
> > this time).
>
> Well I discovered that if all the necessary memory is freed in one call to
> shrink_all_memory we don't get the nice updating printout from
> swsusp_shrink_memory telling us we're making progress. So instead of
> modifying the function to call shrink_all_memory with the full amount (and
> since we've botched swsusp_shrink_memory a few times between us), we should
> limit it to a max of SHRINK_BITEs instead.
>
> This patch is fine standalone.
>
> Rafael, Pavel what do you think of this one?

Looks good to me (but I'm not a mm expert).
Pavel

--
Picture of sleeping (Linux) penguin wanted...

2006-03-30 17:13:41

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH] mm: swsusp shrink_all_memory tweaks

Hi,

On Friday 24 March 2006 17:14, Rafael J. Wysocki wrote:
> On Friday 24 March 2006 16:30, Con Kolivas wrote:
> > On Saturday 25 March 2006 02:16, Rafael J. Wysocki wrote:
> > > On Friday 24 March 2006 08:07, Con Kolivas wrote:
> > > > On Tuesday 21 March 2006 05:46, Rafael J. Wysocki wrote:
> > > > > swsusp_shrink_memory() is still wrong, because it will always fail for
> > > > > image_size = 0. My bad, sorry.
> > > > >
> > > > > The appended patch (on top of yours) should fix that (hope I did it
> > > > > right this time).
> > > >
> > > > Well I discovered that if all the necessary memory is freed in one call
> > > > to shrink_all_memory we don't get the nice updating printout from
> > > > swsusp_shrink_memory telling us we're making progress. So instead of
> > > > modifying the function to call shrink_all_memory with the full amount
> > > > (and since we've botched swsusp_shrink_memory a few times between us), we
> > > > should limit it to a max of SHRINK_BITEs instead.
> > > >
> > > > This patch is fine standalone.
> > > >
> > > > Rafael, Pavel what do you think of this one?
> > >
> > > In principle it looks good to me, but when I tested the previous one I
> > > noticed shrink_all_memory() tended to return 0 prematurely (ie. when it was
> > > possible to free some more pages). It only happened if more than 50% of
> > > memory was occupied by application data.
> > >
> > > Unfortunately I couldn't find the reason.
> >
> > Perhaps it was just trying to free up too much in one go. There are a number
> > of steps a mapped page needs to go through before being finally swapped and
> > there are a limited number of iterations over it. Limiting it to SHRINK_BITEs
> > at a time will probably improve that.
>
> OK [I'll be testing it for the next couple of days.]

OK, I have the following observations:

1) The patch generally causes more memory to be freed during suspend than
the unpatched code (good).
2) However, if more than 50% of RAM is used by application data, it causes
the swap prefetch to trigger during resume (that's an impression; anyway
the system swaps in a lot at that time), which takes some time (generally
it makes resume 5-10s longer on my box).
3) The problem with returning zero prematurely has not been entirely
eliminated. It's happened for me only once, though.

Greetings,
Rafael

2006-03-30 18:38:33

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH] mm: swsusp shrink_all_memory tweaks

[update]

On Thursday 30 March 2006 19:12, Rafael J. Wysocki wrote:
> On Friday 24 March 2006 17:14, Rafael J. Wysocki wrote:
> > On Friday 24 March 2006 16:30, Con Kolivas wrote:
> > > On Saturday 25 March 2006 02:16, Rafael J. Wysocki wrote:
> > > > On Friday 24 March 2006 08:07, Con Kolivas wrote:
> > > > > On Tuesday 21 March 2006 05:46, Rafael J. Wysocki wrote:
> > > > > > swsusp_shrink_memory() is still wrong, because it will always fail for
> > > > > > image_size = 0. My bad, sorry.
> > > > > >
> > > > > > The appended patch (on top of yours) should fix that (hope I did it
> > > > > > right this time).
> > > > >
> > > > > Well I discovered that if all the necessary memory is freed in one call
> > > > > to shrink_all_memory we don't get the nice updating printout from
> > > > > swsusp_shrink_memory telling us we're making progress. So instead of
> > > > > modifying the function to call shrink_all_memory with the full amount
> > > > > (and since we've botched swsusp_shrink_memory a few times between us), we
> > > > > should limit it to a max of SHRINK_BITEs instead.
> > > > >
> > > > > This patch is fine standalone.
> > > > >
> > > > > Rafael, Pavel what do you think of this one?
> > > >
> > > > In principle it looks good to me, but when I tested the previous one I
> > > > noticed shrink_all_memory() tended to return 0 prematurely (ie. when it was
> > > > possible to free some more pages). It only happened if more than 50% of
> > > > memory was occupied by application data.
> > > >
> > > > Unfortunately I couldn't find the reason.
> > >
> > > Perhaps it was just trying to free up too much in one go. There are a number
> > > of steps a mapped page needs to go through before being finally swapped and
> > > there are a limited number of iterations over it. Limiting it to SHRINK_BITEs
> > > at a time will probably improve that.
> >
> > OK [I'll be testing it for the next couple of days.]
>
> OK, I have the following observations:
>
> 1) The patch generally causes more memory to be freed during suspend than
> the unpatched code (good).

Oops. s/more/less/

By which I mean with the patch applied the actual image size is usually closer
to the value of image_size.

> 2) However, if more than 50% of RAM is used by application data, it causes
> the swap prefetch to trigger during resume (that's an impression; anyway
> the system swaps in a lot at that time), which takes some time (generally
> it makes resume 5-10s longer on my box).
> 3) The problem with returning zero prematurely has not been entirely
> eliminated. It's happened for me only once, though.

Greetings,
Rafael

2006-03-30 20:39:14

by Con Kolivas

[permalink] [raw]
Subject: Re: [PATCH] mm: swsusp shrink_all_memory tweaks

On Friday 31 March 2006 03:12, Rafael J. Wysocki wrote:
> OK, I have the following observations:

Thanks.
>
> 1) The patch generally causes more memory to be freed during suspend than
> the unpatched code (good).

Yes I know you meant less, that's good.

> 2) However, if more than 50% of RAM is used by application data, it causes
> the swap prefetch to trigger during resume (that's an impression; anyway
> the system swaps in a lot at that time), which takes some time (generally
> it makes resume 5-10s longer on my box).

Is that with this "swsusp shrink_all_memory tweaks" patch alone? It doesn't
touch swap prefetch.

> 3) The problem with returning zero prematurely has not been entirely
> eliminated. It's happened for me only once, though.

Probably hard to say, but is the system in any better state after resume has
completed? That was one of the aims. Also a major part of this patch is a
cleanup of the hot balance_pgdat function as well, which suspend no longer
touches with this patch.

Cheers,
Con

2006-03-30 20:58:50

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH] mm: swsusp shrink_all_memory tweaks

On Thursday 30 March 2006 22:38, Con Kolivas wrote:
> On Friday 31 March 2006 03:12, Rafael J. Wysocki wrote:
> > OK, I have the following observations:
>
> Thanks.
> >
> > 1) The patch generally causes more memory to be freed during suspend than
> > the unpatched code (good).
>
> Yes I know you meant less, that's good.
>
> > 2) However, if more than 50% of RAM is used by application data, it causes
> > the swap prefetch to trigger during resume (that's an impression; anyway
> > the system swaps in a lot at that time), which takes some time (generally
> > it makes resume 5-10s longer on my box).
>
> Is that with this "swsusp shrink_all_memory tweaks" patch alone? It doesn't
> touch swap prefetch.

Still swap prefetch is present in -mm so it can be triggered incidentally
I think.

> > 3) The problem with returning zero prematurely has not been entirely
> > eliminated. It's happened for me only once, though.
>
> Probably hard to say, but is the system in any better state after resume has
> completed?

It seems so, but it also depends on the (actual) image size, memory usage
before suspend etc. Well ...

> That was one of the aims. Also a major part of this patch is a cleanup of
> the hot balance_pgdat function as well, which suspend no longer touches with
> this patch.

I think the patch is a good idea overall, but it needs some more testing.
I'll try to figure out a way to measure its performance, so we have some
hard data to discuss.

Greetings,
Rafael