2014-06-13 04:38:10

by Chen Yucong

[permalink] [raw]
Subject: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption

shrink_page_list() has too many arguments that have already reached ten.
Some of those arguments and temporary variables introduces extra 80 bytes
on the stack. This patch wraps five parameters into shrink_result and removes
some temporary variables, thus making the relative functions to consume fewer
stack space.

Before mm/vmscan.c is changed:
text data bss dec hex filename
6876698 957224 966656 8800578 864942 vmlinux-3.15

After mm/vmscan.c is changed:
text data bss dec hex filename
6876506 957224 966656 8800386 864882 vmlinux-3.15


scripts/checkstack.pl can be used for checking the change of the target function stack.

Before mm/vmscan.c is changed:

0xffffffff810af103 shrink_inactive_list []: 152
0xffffffff810af43d shrink_inactive_list []: 152
-------------------------------------------------------------
0xffffffff810aede8 reclaim_clean_pages_from_list []: 184
0xffffffff810aeef8 reclaim_clean_pages_from_list []: 184
-------------------------------------------------------------
0xffffffff810ae582 shrink_page_list []: 232
0xffffffff810aedb5 shrink_page_list []: 232

After mm/vmscan.c is changed::

0xffffffff810af078 shrink_inactive_list []: 120
0xffffffff810af36d shrink_inactive_list []: 120
-------------------------------------------------------------
0xffffffff810aed6c reclaim_clean_pages_from_list []: 152
0xffffffff810aee68 reclaim_clean_pages_from_list []: 152
--------------------------------------------------------------------------------------
0xffffffff810ae586 shrink_page_list []: 184 ---> sub $0xb8,%rsp
0xffffffff810aed36 shrink_page_list []: 184 ---> add $0xb8,%rsp

Via the above figures, we can find that the difference value of the stack is 32 for
shrink_inactive_list and reclaim_clean_pages_from_list, and this value is 48(232-184)
for shrink_page_list. From the hierarchy of functions called, the total difference
value is 80(32+48) for this change.

Changes since v1: https://lkml.org/lkml/2014/6/12/159
* Rename arg_container to shrink_result
* Change the the way of initializing shrink_result object.

Signed-off-by: Chen Yucong <[email protected]>
---
mm/vmscan.c | 62 ++++++++++++++++++++++++++---------------------------------
1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index a8ffe4e..3f28e39 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -791,28 +791,31 @@ static void page_check_dirty_writeback(struct page *page,
}

/*
+ * Callers pass a prezeroed shrink_result into the shrink functions to gather
+ * statistics about how many pages of particular states were processed
+ */
+struct shrink_result {
+ unsigned long nr_dirty;
+ unsigned long nr_unqueued_dirty;
+ unsigned long nr_congested;
+ unsigned long nr_writeback;
+ unsigned long nr_immediate;
+};
+
+/*
* shrink_page_list() returns the number of reclaimed pages
*/
static unsigned long shrink_page_list(struct list_head *page_list,
struct zone *zone,
struct scan_control *sc,
enum ttu_flags ttu_flags,
- unsigned long *ret_nr_dirty,
- unsigned long *ret_nr_unqueued_dirty,
- unsigned long *ret_nr_congested,
- unsigned long *ret_nr_writeback,
- unsigned long *ret_nr_immediate,
+ struct shrink_result *sr,
bool force_reclaim)
{
LIST_HEAD(ret_pages);
LIST_HEAD(free_pages);
int pgactivate = 0;
- unsigned long nr_unqueued_dirty = 0;
- unsigned long nr_dirty = 0;
- unsigned long nr_congested = 0;
unsigned long nr_reclaimed = 0;
- unsigned long nr_writeback = 0;
- unsigned long nr_immediate = 0;

cond_resched();

@@ -858,10 +861,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*/
page_check_dirty_writeback(page, &dirty, &writeback);
if (dirty || writeback)
- nr_dirty++;
+ sr->nr_dirty++;

if (dirty && !writeback)
- nr_unqueued_dirty++;
+ sr->nr_unqueued_dirty++;

/*
* Treat this page as congested if the underlying BDI is or if
@@ -872,7 +875,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
mapping = page_mapping(page);
if ((mapping && bdi_write_congested(mapping->backing_dev_info)) ||
(writeback && PageReclaim(page)))
- nr_congested++;
+ sr->nr_congested++;

/*
* If a page at the tail of the LRU is under writeback, there
@@ -916,7 +919,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (current_is_kswapd() &&
PageReclaim(page) &&
zone_is_reclaim_writeback(zone)) {
- nr_immediate++;
+ sr->nr_immediate++;
goto keep_locked;

/* Case 2 above */
@@ -934,7 +937,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* and it's also appropriate in global reclaim.
*/
SetPageReclaim(page);
- nr_writeback++;
+ sr->nr_writeback++;

goto keep_locked;

@@ -1132,11 +1135,6 @@ keep:
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
mem_cgroup_uncharge_end();
- *ret_nr_dirty += nr_dirty;
- *ret_nr_congested += nr_congested;
- *ret_nr_unqueued_dirty += nr_unqueued_dirty;
- *ret_nr_writeback += nr_writeback;
- *ret_nr_immediate += nr_immediate;
return nr_reclaimed;
}

@@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
.priority = DEF_PRIORITY,
.may_unmap = 1,
};
- unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
+ unsigned long ret;
+ struct shrink_result dummy = { };
struct page *page, *next;
LIST_HEAD(clean_pages);

@@ -1161,8 +1160,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
}

ret = shrink_page_list(&clean_pages, zone, &sc,
- TTU_UNMAP|TTU_IGNORE_ACCESS,
- &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
+ TTU_UNMAP|TTU_IGNORE_ACCESS, &dummy, true);
list_splice(&clean_pages, page_list);
mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
return ret;
@@ -1469,11 +1467,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
unsigned long nr_scanned;
unsigned long nr_reclaimed = 0;
unsigned long nr_taken;
- unsigned long nr_dirty = 0;
- unsigned long nr_congested = 0;
- unsigned long nr_unqueued_dirty = 0;
- unsigned long nr_writeback = 0;
- unsigned long nr_immediate = 0;
+ struct shrink_result sr = { };
isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
struct zone *zone = lruvec_zone(lruvec);
@@ -1515,9 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
return 0;

nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP,
- &nr_dirty, &nr_unqueued_dirty, &nr_congested,
- &nr_writeback, &nr_immediate,
- false);
+ &sr, false);

spin_lock_irq(&zone->lru_lock);

@@ -1554,7 +1546,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* of pages under pages flagged for immediate reclaim and stall if any
* are encountered in the nr_immediate check below.
*/
- if (nr_writeback && nr_writeback == nr_taken)
+ if (sr.nr_writeback && sr.nr_writeback == nr_taken)
zone_set_flag(zone, ZONE_WRITEBACK);

/*
@@ -1566,7 +1558,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* Tag a zone as congested if all the dirty pages scanned were
* backed by a congested BDI and wait_iff_congested will stall.
*/
- if (nr_dirty && nr_dirty == nr_congested)
+ if (sr.nr_dirty && sr.nr_dirty == sr.nr_congested)
zone_set_flag(zone, ZONE_CONGESTED);

/*
@@ -1576,7 +1568,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* pages from reclaim context. It will forcibly stall in the
* next check.
*/
- if (nr_unqueued_dirty == nr_taken)
+ if (sr.nr_unqueued_dirty == nr_taken)
zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);

/*
@@ -1585,7 +1577,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* implies that pages are cycling through the LRU faster than
* they are written so also forcibly stall.
*/
- if ((nr_unqueued_dirty == nr_taken || nr_immediate) &&
+ if ((sr.nr_unqueued_dirty == nr_taken || sr.nr_immediate) &&
current_may_throttle())
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
--
1.7.10.4


2014-06-13 04:42:29

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption

On Fri, 13 Jun 2014 12:36:31 +0800 Chen Yucong <[email protected]> wrote:

> @@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> .priority = DEF_PRIORITY,
> .may_unmap = 1,
> };
> - unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> + unsigned long ret;
> + struct shrink_result dummy = { };

You didn't like the idea of making this static?

2014-06-13 04:52:24

by Konstantin Khlebnikov

[permalink] [raw]
Subject: Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption

On Fri, Jun 13, 2014 at 8:36 AM, Chen Yucong <[email protected]> wrote:
> shrink_page_list() has too many arguments that have already reached ten.
> Some of those arguments and temporary variables introduces extra 80 bytes
> on the stack. This patch wraps five parameters into shrink_result and removes
> some temporary variables, thus making the relative functions to consume fewer
> stack space.

I think it's better to put them into struct scan_control.
Reset them before calling shrinker or take a snapshot to get delta.

>
> Before mm/vmscan.c is changed:
> text data bss dec hex filename
> 6876698 957224 966656 8800578 864942 vmlinux-3.15
>
> After mm/vmscan.c is changed:
> text data bss dec hex filename
> 6876506 957224 966656 8800386 864882 vmlinux-3.15
>
>
> scripts/checkstack.pl can be used for checking the change of the target function stack.
>
> Before mm/vmscan.c is changed:
>
> 0xffffffff810af103 shrink_inactive_list []: 152
> 0xffffffff810af43d shrink_inactive_list []: 152
> -------------------------------------------------------------
> 0xffffffff810aede8 reclaim_clean_pages_from_list []: 184
> 0xffffffff810aeef8 reclaim_clean_pages_from_list []: 184
> -------------------------------------------------------------
> 0xffffffff810ae582 shrink_page_list []: 232
> 0xffffffff810aedb5 shrink_page_list []: 232
>
> After mm/vmscan.c is changed::
>
> 0xffffffff810af078 shrink_inactive_list []: 120
> 0xffffffff810af36d shrink_inactive_list []: 120
> -------------------------------------------------------------
> 0xffffffff810aed6c reclaim_clean_pages_from_list []: 152
> 0xffffffff810aee68 reclaim_clean_pages_from_list []: 152
> --------------------------------------------------------------------------------------
> 0xffffffff810ae586 shrink_page_list []: 184 ---> sub $0xb8,%rsp
> 0xffffffff810aed36 shrink_page_list []: 184 ---> add $0xb8,%rsp
>
> Via the above figures, we can find that the difference value of the stack is 32 for
> shrink_inactive_list and reclaim_clean_pages_from_list, and this value is 48(232-184)
> for shrink_page_list. From the hierarchy of functions called, the total difference
> value is 80(32+48) for this change.
>
> Changes since v1: https://lkml.org/lkml/2014/6/12/159
> * Rename arg_container to shrink_result
> * Change the the way of initializing shrink_result object.
>
> Signed-off-by: Chen Yucong <[email protected]>
> ---
> mm/vmscan.c | 62 ++++++++++++++++++++++++++---------------------------------
> 1 file changed, 27 insertions(+), 35 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index a8ffe4e..3f28e39 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -791,28 +791,31 @@ static void page_check_dirty_writeback(struct page *page,
> }
>
> /*
> + * Callers pass a prezeroed shrink_result into the shrink functions to gather
> + * statistics about how many pages of particular states were processed
> + */
> +struct shrink_result {
> + unsigned long nr_dirty;
> + unsigned long nr_unqueued_dirty;
> + unsigned long nr_congested;
> + unsigned long nr_writeback;
> + unsigned long nr_immediate;
> +};
> +
> +/*
> * shrink_page_list() returns the number of reclaimed pages
> */
> static unsigned long shrink_page_list(struct list_head *page_list,
> struct zone *zone,
> struct scan_control *sc,
> enum ttu_flags ttu_flags,
> - unsigned long *ret_nr_dirty,
> - unsigned long *ret_nr_unqueued_dirty,
> - unsigned long *ret_nr_congested,
> - unsigned long *ret_nr_writeback,
> - unsigned long *ret_nr_immediate,
> + struct shrink_result *sr,
> bool force_reclaim)
> {
> LIST_HEAD(ret_pages);
> LIST_HEAD(free_pages);
> int pgactivate = 0;
> - unsigned long nr_unqueued_dirty = 0;
> - unsigned long nr_dirty = 0;
> - unsigned long nr_congested = 0;
> unsigned long nr_reclaimed = 0;
> - unsigned long nr_writeback = 0;
> - unsigned long nr_immediate = 0;
>
> cond_resched();
>
> @@ -858,10 +861,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
> */
> page_check_dirty_writeback(page, &dirty, &writeback);
> if (dirty || writeback)
> - nr_dirty++;
> + sr->nr_dirty++;
>
> if (dirty && !writeback)
> - nr_unqueued_dirty++;
> + sr->nr_unqueued_dirty++;
>
> /*
> * Treat this page as congested if the underlying BDI is or if
> @@ -872,7 +875,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
> mapping = page_mapping(page);
> if ((mapping && bdi_write_congested(mapping->backing_dev_info)) ||
> (writeback && PageReclaim(page)))
> - nr_congested++;
> + sr->nr_congested++;
>
> /*
> * If a page at the tail of the LRU is under writeback, there
> @@ -916,7 +919,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
> if (current_is_kswapd() &&
> PageReclaim(page) &&
> zone_is_reclaim_writeback(zone)) {
> - nr_immediate++;
> + sr->nr_immediate++;
> goto keep_locked;
>
> /* Case 2 above */
> @@ -934,7 +937,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
> * and it's also appropriate in global reclaim.
> */
> SetPageReclaim(page);
> - nr_writeback++;
> + sr->nr_writeback++;
>
> goto keep_locked;
>
> @@ -1132,11 +1135,6 @@ keep:
> list_splice(&ret_pages, page_list);
> count_vm_events(PGACTIVATE, pgactivate);
> mem_cgroup_uncharge_end();
> - *ret_nr_dirty += nr_dirty;
> - *ret_nr_congested += nr_congested;
> - *ret_nr_unqueued_dirty += nr_unqueued_dirty;
> - *ret_nr_writeback += nr_writeback;
> - *ret_nr_immediate += nr_immediate;
> return nr_reclaimed;
> }
>
> @@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> .priority = DEF_PRIORITY,
> .may_unmap = 1,
> };
> - unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> + unsigned long ret;
> + struct shrink_result dummy = { };
> struct page *page, *next;
> LIST_HEAD(clean_pages);
>
> @@ -1161,8 +1160,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> }
>
> ret = shrink_page_list(&clean_pages, zone, &sc,
> - TTU_UNMAP|TTU_IGNORE_ACCESS,
> - &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
> + TTU_UNMAP|TTU_IGNORE_ACCESS, &dummy, true);
> list_splice(&clean_pages, page_list);
> mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
> return ret;
> @@ -1469,11 +1467,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> unsigned long nr_scanned;
> unsigned long nr_reclaimed = 0;
> unsigned long nr_taken;
> - unsigned long nr_dirty = 0;
> - unsigned long nr_congested = 0;
> - unsigned long nr_unqueued_dirty = 0;
> - unsigned long nr_writeback = 0;
> - unsigned long nr_immediate = 0;
> + struct shrink_result sr = { };
> isolate_mode_t isolate_mode = 0;
> int file = is_file_lru(lru);
> struct zone *zone = lruvec_zone(lruvec);
> @@ -1515,9 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> return 0;
>
> nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP,
> - &nr_dirty, &nr_unqueued_dirty, &nr_congested,
> - &nr_writeback, &nr_immediate,
> - false);
> + &sr, false);
>
> spin_lock_irq(&zone->lru_lock);
>
> @@ -1554,7 +1546,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> * of pages under pages flagged for immediate reclaim and stall if any
> * are encountered in the nr_immediate check below.
> */
> - if (nr_writeback && nr_writeback == nr_taken)
> + if (sr.nr_writeback && sr.nr_writeback == nr_taken)
> zone_set_flag(zone, ZONE_WRITEBACK);
>
> /*
> @@ -1566,7 +1558,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> * Tag a zone as congested if all the dirty pages scanned were
> * backed by a congested BDI and wait_iff_congested will stall.
> */
> - if (nr_dirty && nr_dirty == nr_congested)
> + if (sr.nr_dirty && sr.nr_dirty == sr.nr_congested)
> zone_set_flag(zone, ZONE_CONGESTED);
>
> /*
> @@ -1576,7 +1568,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> * pages from reclaim context. It will forcibly stall in the
> * next check.
> */
> - if (nr_unqueued_dirty == nr_taken)
> + if (sr.nr_unqueued_dirty == nr_taken)
> zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
>
> /*
> @@ -1585,7 +1577,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> * implies that pages are cycling through the LRU faster than
> * they are written so also forcibly stall.
> */
> - if ((nr_unqueued_dirty == nr_taken || nr_immediate) &&
> + if ((sr.nr_unqueued_dirty == nr_taken || sr.nr_immediate) &&
> current_may_throttle())
> congestion_wait(BLK_RW_ASYNC, HZ/10);
> }
> --
> 1.7.10.4
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>

2014-06-13 05:10:44

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption

On Fri, Jun 13, 2014 at 12:36:31PM +0800, Chen Yucong wrote:
> shrink_page_list() has too many arguments that have already reached ten.
> Some of those arguments and temporary variables introduces extra 80 bytes
> on the stack. This patch wraps five parameters into shrink_result and removes
> some temporary variables, thus making the relative functions to consume fewer
> stack space.
>
> Before mm/vmscan.c is changed:
> text data bss dec hex filename
> 6876698 957224 966656 8800578 864942 vmlinux-3.15
>
> After mm/vmscan.c is changed:
> text data bss dec hex filename
> 6876506 957224 966656 8800386 864882 vmlinux-3.15
>
>
> scripts/checkstack.pl can be used for checking the change of the target function stack.
>
> Before mm/vmscan.c is changed:
>
> 0xffffffff810af103 shrink_inactive_list []: 152
> 0xffffffff810af43d shrink_inactive_list []: 152
> -------------------------------------------------------------
> 0xffffffff810aede8 reclaim_clean_pages_from_list []: 184
> 0xffffffff810aeef8 reclaim_clean_pages_from_list []: 184
> -------------------------------------------------------------
> 0xffffffff810ae582 shrink_page_list []: 232
> 0xffffffff810aedb5 shrink_page_list []: 232
>
> After mm/vmscan.c is changed::
>
> 0xffffffff810af078 shrink_inactive_list []: 120
> 0xffffffff810af36d shrink_inactive_list []: 120
> -------------------------------------------------------------
> 0xffffffff810aed6c reclaim_clean_pages_from_list []: 152
> 0xffffffff810aee68 reclaim_clean_pages_from_list []: 152
> --------------------------------------------------------------------------------------
> 0xffffffff810ae586 shrink_page_list []: 184 ---> sub $0xb8,%rsp
> 0xffffffff810aed36 shrink_page_list []: 184 ---> add $0xb8,%rsp
>
> Via the above figures, we can find that the difference value of the stack is 32 for
> shrink_inactive_list and reclaim_clean_pages_from_list, and this value is 48(232-184)
> for shrink_page_list. From the hierarchy of functions called, the total difference
> value is 80(32+48) for this change.

We just increased the stack size by 8k. I'm not saying that we
shouldn't work on our stack footprint, but is this really worth it?
It doesn't make that code easier to follow exactly.

> Changes since v1: https://lkml.org/lkml/2014/6/12/159
> * Rename arg_container to shrink_result
> * Change the the way of initializing shrink_result object.
>
> Signed-off-by: Chen Yucong <[email protected]>
> ---
> mm/vmscan.c | 62 ++++++++++++++++++++++++++---------------------------------
> 1 file changed, 27 insertions(+), 35 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index a8ffe4e..3f28e39 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -791,28 +791,31 @@ static void page_check_dirty_writeback(struct page *page,
> }
>
> /*
> + * Callers pass a prezeroed shrink_result into the shrink functions to gather
> + * statistics about how many pages of particular states were processed
> + */
> +struct shrink_result {
> + unsigned long nr_dirty;
> + unsigned long nr_unqueued_dirty;
> + unsigned long nr_congested;
> + unsigned long nr_writeback;
> + unsigned long nr_immediate;
> +};

This exclusively contains statistics on the writeback states of the
scanned pages. struct writeback_stats?

2014-06-13 05:21:46

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption

On Fri, Jun 13, 2014 at 08:52:22AM +0400, Konstantin Khlebnikov wrote:
> On Fri, Jun 13, 2014 at 8:36 AM, Chen Yucong <[email protected]> wrote:
> > shrink_page_list() has too many arguments that have already reached ten.
> > Some of those arguments and temporary variables introduces extra 80 bytes
> > on the stack. This patch wraps five parameters into shrink_result and removes
> > some temporary variables, thus making the relative functions to consume fewer
> > stack space.
>
> I think it's better to put them into struct scan_control.
> Reset them before calling shrinker or take a snapshot to get delta.

scan_control applies to the whole reclaim invocation*, it would be
confusing as hell to have things in there that only apply to certain
sublevels. Please don't do that.

If you on the other hand take snapshots and accumulate them over the
whole run, it might actually make sense to move sc->nr_scanned and
sc->nr_reclaimed into shrink_results instead. But I'm not sure it's
worth the extra snapshotting code, given that we don't actually need
the accumulated numbers at the outer levels right now.

* sc->swappiness being the recent exception, I'll send a fix for that.

2014-06-13 05:23:54

by Chen Yucong

[permalink] [raw]
Subject: Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption

On Thu, 2014-06-12 at 21:40 -0700, Andrew Morton wrote:
> On Fri, 13 Jun 2014 12:36:31 +0800 Chen Yucong <[email protected]> wrote:
>
> > @@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> > .priority = DEF_PRIORITY,
> > .may_unmap = 1,
> > };
> > - unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> > + unsigned long ret;
> > + struct shrink_result dummy = { };
>
> You didn't like the idea of making this static?
Sorry! It's my negligence.
If we make dummy static, it can help us save more stack.

without change:
0xffffffff810aede8 reclaim_clean_pages_from_list []: 184
0xffffffff810aeef8 reclaim_clean_pages_from_list []: 184

with change: struct shrink_result dummy = {};
0xffffffff810aed6c reclaim_clean_pages_from_list []: 152
0xffffffff810aee68 reclaim_clean_pages_from_list []: 152

with change: static struct shrink_result dummy ={};
0xffffffff810aed69 reclaim_clean_pages_from_list []: 120
0xffffffff810aee4d reclaim_clean_pages_from_list []: 120

thx!
cyc

2014-06-13 10:21:37

by Konstantin Khlebnikov

[permalink] [raw]
Subject: Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption

On Fri, Jun 13, 2014 at 9:21 AM, Johannes Weiner <[email protected]> wrote:
> On Fri, Jun 13, 2014 at 08:52:22AM +0400, Konstantin Khlebnikov wrote:
>> On Fri, Jun 13, 2014 at 8:36 AM, Chen Yucong <[email protected]> wrote:
>> > shrink_page_list() has too many arguments that have already reached ten.
>> > Some of those arguments and temporary variables introduces extra 80 bytes
>> > on the stack. This patch wraps five parameters into shrink_result and removes
>> > some temporary variables, thus making the relative functions to consume fewer
>> > stack space.
>>
>> I think it's better to put them into struct scan_control.
>> Reset them before calling shrinker or take a snapshot to get delta.
>
> scan_control applies to the whole reclaim invocation*, it would be
> confusing as hell to have things in there that only apply to certain
> sublevels. Please don't do that.

scan_control is internal private structure and reclaimer is small and
simple enough to hold whole state here.
For me it's easier to track state of single structure which is alive
during whole invocation,
than several smaller structures especially if some of them disappears
from time to time.

If it would be easier for you -- shrink_result might be embedded as
sub-structure.

>
> If you on the other hand take snapshots and accumulate them over the
> whole run, it might actually make sense to move sc->nr_scanned and
> sc->nr_reclaimed into shrink_results instead. But I'm not sure it's
> worth the extra snapshotting code, given that we don't actually need
> the accumulated numbers at the outer levels right now.
>
> * sc->swappiness being the recent exception, I'll send a fix for that.

2014-06-13 16:28:17

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption

On Fri, Jun 13, 2014 at 01:21:15PM +0800, Chen Yucong wrote:
> On Thu, 2014-06-12 at 21:40 -0700, Andrew Morton wrote:
> > On Fri, 13 Jun 2014 12:36:31 +0800 Chen Yucong <[email protected]> wrote:
> >
> > > @@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> > > .priority = DEF_PRIORITY,
> > > .may_unmap = 1,
> > > };
> > > - unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> > > + unsigned long ret;
> > > + struct shrink_result dummy = { };
> >
> > You didn't like the idea of making this static?
> Sorry! It's my negligence.
> If we make dummy static, it can help us save more stack.
>
> without change:
> 0xffffffff810aede8 reclaim_clean_pages_from_list []: 184
> 0xffffffff810aeef8 reclaim_clean_pages_from_list []: 184
>
> with change: struct shrink_result dummy = {};
> 0xffffffff810aed6c reclaim_clean_pages_from_list []: 152
> 0xffffffff810aee68 reclaim_clean_pages_from_list []: 152
>
> with change: static struct shrink_result dummy ={};
> 0xffffffff810aed69 reclaim_clean_pages_from_list []: 120
> 0xffffffff810aee4d reclaim_clean_pages_from_list []: 120

FWIW, I copied bloat-o-meter and hacked up a quick comparison tool
that you can feed two outputs of checkstack.pl for a whole vmlinux and
it shows you the delta.

The output for your patch (with the static dummy) looks like this:

+0/-240 -240
shrink_inactive_list 136 112 -24
shrink_page_list 208 160 -48
reclaim_clean_pages_from_list 168 - -168

(The stack footprint for reclaim_clean_pages_from_list is actually 96
after your patch, but checkstack.pl skips frames under 100)

---
#!/usr/bin/python
#
# Based on bloat-o-meter

import sys
import re

if len(sys.argv) != 3:
print("usage: %s file1 file2" % sys.argv[0])
sys.exit(1)

def getsizes(filename):
sym = {}
for line in open(filename):
x = re.split('(0x.*) (.*) (.*):[ \t]*(.*)', line)
try:
foo, addr, name, src, size, bar = x
except:
print(x)
raise Exception
try:
sym[name] = int(size)
except:
continue
return sym

old = getsizes(sys.argv[1])
new = getsizes(sys.argv[2])

inc = 0
dec = 0
delta = []
common = {}

for a in old:
if a in new:
common[a] = 1

for name in old:
if name not in common:
dec += old[name]
delta.append((-old[name], name))

for name in new:
if name not in common:
inc += new[name]
delta.append((new[name], name))

for name in common:
d = new.get(name, 0) - old.get(name, 0)
if d > 0: inc += d
if d < 0: dec -= d
delta.append((d, name))

delta.sort()
delta.reverse()

print("+%d/-%d %+d" % (inc, dec, inc - dec))
for d, name in delta:
if d:
print("%-40s %7s %7s %+7d" % (name, old.get(name, "-"), new.get(name, "-"), d))

2014-06-14 03:06:29

by Chen Yucong

[permalink] [raw]
Subject: Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption

On Fri, 2014-06-13 at 12:28 -0400, Johannes Weiner wrote:
> On Fri, Jun 13, 2014 at 01:21:15PM +0800, Chen Yucong wrote:
> > On Thu, 2014-06-12 at 21:40 -0700, Andrew Morton wrote:
> > > On Fri, 13 Jun 2014 12:36:31 +0800 Chen Yucong <[email protected]> wrote:
> > >
> > > > @@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> > > > .priority = DEF_PRIORITY,
> > > > .may_unmap = 1,
> > > > };
> > > > - unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> > > > + unsigned long ret;
> > > > + struct shrink_result dummy = { };
> > >
> > > You didn't like the idea of making this static?
> > Sorry! It's my negligence.
> > If we make dummy static, it can help us save more stack.
> >
> > without change:
> > 0xffffffff810aede8 reclaim_clean_pages_from_list []: 184
> > 0xffffffff810aeef8 reclaim_clean_pages_from_list []: 184
> >
> > with change: struct shrink_result dummy = {};
> > 0xffffffff810aed6c reclaim_clean_pages_from_list []: 152
> > 0xffffffff810aee68 reclaim_clean_pages_from_list []: 152
> >
> > with change: static struct shrink_result dummy ={};
> > 0xffffffff810aed69 reclaim_clean_pages_from_list []: 120
> > 0xffffffff810aee4d reclaim_clean_pages_from_list []: 120
>
> FWIW, I copied bloat-o-meter and hacked up a quick comparison tool
> that you can feed two outputs of checkstack.pl for a whole vmlinux and
> it shows you the delta.
>
> The output for your patch (with the static dummy) looks like this:
>
> +0/-240 -240
> shrink_inactive_list 136 112 -24
> shrink_page_list 208 160 -48
> reclaim_clean_pages_from_list 168 - -168
>
> (The stack footprint for reclaim_clean_pages_from_list is actually 96
> after your patch, but checkstack.pl skips frames under 100)
>
Thanks very much for your comparison tool. Its output is more concise.

thx!
cyc

gcc version 4.7.3 (Gentoo 4.7.3-r1 p1.4, pie-0.5.5)
kernel version 3.15(stable)
Intel(R) Core(TM)2 Duo CPU T5670 @ 1.80GHz

The output for this patch (with the static dummy) is:

+0/-144 -144
shrink_inactive_list 152 120 -32
shrink_page_list 232 184 -48
reclaim_clean_pages_from_list 184 120 -64

-------
gcc version 4.7.2 (Debian 4.7.2-5)
kernel version 3.15(stable)
Intel(R) Core(TM) i5-2320 CPU @ 3.00GHz

The output for this patch (with the static dummy) is:

shrink_inactive_list 136 120 -16
shrink_page_list 216 168 -48
reclaim_clean_pages_from_list 184 120 -64