2014-06-12 09:38:53

by Chen Yucong

[permalink] [raw]
Subject: [PATCH] mm/vmscan.c: wrap five parameters into arg_container in shrink_page_list()

shrink_page_list() has too many arguments that have already reached ten.
Some of those arguments and temporary variables introduces extra 80 bytes
on the stack.

This patch wraps five parameters into arg_container and removes some temporary
variables, thus making shrink_page_list() to consume fewer stack space.

Before mm/vmscan.c is modified:
text data bss dec hex filename
6876698 957224 966656 8800578 864942 vmlinux-3.15

After mm/vmscan.c is changed:
text data bss dec hex filename
6876506 957224 966656 8800386 864882 vmlinux-3.15

Signed-off-by: Chen Yucong <[email protected]>
---
mm/vmscan.c | 64 +++++++++++++++++++++++++++--------------------------------
1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index a8ffe4e..538cdcf 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -790,6 +790,14 @@ static void page_check_dirty_writeback(struct page *page,
mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
}

+struct arg_container {
+ unsigned long nr_dirty;
+ unsigned long nr_unqueued_dirty;
+ unsigned long nr_congested;
+ unsigned long nr_writeback;
+ unsigned long nr_immediate;
+};
+
/*
* shrink_page_list() returns the number of reclaimed pages
*/
@@ -797,22 +805,13 @@ static unsigned long shrink_page_list(struct list_head *page_list,
struct zone *zone,
struct scan_control *sc,
enum ttu_flags ttu_flags,
- unsigned long *ret_nr_dirty,
- unsigned long *ret_nr_unqueued_dirty,
- unsigned long *ret_nr_congested,
- unsigned long *ret_nr_writeback,
- unsigned long *ret_nr_immediate,
+ struct arg_container *ac,
bool force_reclaim)
{
LIST_HEAD(ret_pages);
LIST_HEAD(free_pages);
int pgactivate = 0;
- unsigned long nr_unqueued_dirty = 0;
- unsigned long nr_dirty = 0;
- unsigned long nr_congested = 0;
unsigned long nr_reclaimed = 0;
- unsigned long nr_writeback = 0;
- unsigned long nr_immediate = 0;

cond_resched();

@@ -858,10 +857,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*/
page_check_dirty_writeback(page, &dirty, &writeback);
if (dirty || writeback)
- nr_dirty++;
+ ac->nr_dirty++;

if (dirty && !writeback)
- nr_unqueued_dirty++;
+ ac->nr_unqueued_dirty++;

/*
* Treat this page as congested if the underlying BDI is or if
@@ -872,7 +871,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
mapping = page_mapping(page);
if ((mapping && bdi_write_congested(mapping->backing_dev_info)) ||
(writeback && PageReclaim(page)))
- nr_congested++;
+ ac->nr_congested++;

/*
* If a page at the tail of the LRU is under writeback, there
@@ -916,7 +915,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (current_is_kswapd() &&
PageReclaim(page) &&
zone_is_reclaim_writeback(zone)) {
- nr_immediate++;
+ ac->nr_immediate++;
goto keep_locked;

/* Case 2 above */
@@ -934,7 +933,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* and it's also appropriate in global reclaim.
*/
SetPageReclaim(page);
- nr_writeback++;
+ ac->nr_writeback++;

goto keep_locked;

@@ -1132,11 +1131,6 @@ keep:
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
mem_cgroup_uncharge_end();
- *ret_nr_dirty += nr_dirty;
- *ret_nr_congested += nr_congested;
- *ret_nr_unqueued_dirty += nr_unqueued_dirty;
- *ret_nr_writeback += nr_writeback;
- *ret_nr_immediate += nr_immediate;
return nr_reclaimed;
}

@@ -1148,7 +1142,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
.priority = DEF_PRIORITY,
.may_unmap = 1,
};
- unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
+ unsigned long ret;
+ struct arg_container dummy;
struct page *page, *next;
LIST_HEAD(clean_pages);

@@ -1161,8 +1156,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
}

ret = shrink_page_list(&clean_pages, zone, &sc,
- TTU_UNMAP|TTU_IGNORE_ACCESS,
- &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
+ TTU_UNMAP|TTU_IGNORE_ACCESS, &dummy, true);
list_splice(&clean_pages, page_list);
mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
return ret;
@@ -1469,11 +1463,13 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
unsigned long nr_scanned;
unsigned long nr_reclaimed = 0;
unsigned long nr_taken;
- unsigned long nr_dirty = 0;
- unsigned long nr_congested = 0;
- unsigned long nr_unqueued_dirty = 0;
- unsigned long nr_writeback = 0;
- unsigned long nr_immediate = 0;
+ struct arg_container ac = {
+ .nr_dirty = 0,
+ .nr_congested = 0,
+ .nr_unqueued_dirty = 0,
+ .nr_writeback = 0,
+ .nr_immediate = 0,
+ };
isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
struct zone *zone = lruvec_zone(lruvec);
@@ -1515,9 +1511,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
return 0;

nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP,
- &nr_dirty, &nr_unqueued_dirty, &nr_congested,
- &nr_writeback, &nr_immediate,
- false);
+ &ac, false);

spin_lock_irq(&zone->lru_lock);

@@ -1554,7 +1548,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* of pages under pages flagged for immediate reclaim and stall if any
* are encountered in the nr_immediate check below.
*/
- if (nr_writeback && nr_writeback == nr_taken)
+ if (ac.nr_writeback && ac.nr_writeback == nr_taken)
zone_set_flag(zone, ZONE_WRITEBACK);

/*
@@ -1566,7 +1560,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* Tag a zone as congested if all the dirty pages scanned were
* backed by a congested BDI and wait_iff_congested will stall.
*/
- if (nr_dirty && nr_dirty == nr_congested)
+ if (ac.nr_dirty && ac.nr_dirty == ac.nr_congested)
zone_set_flag(zone, ZONE_CONGESTED);

/*
@@ -1576,7 +1570,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* pages from reclaim context. It will forcibly stall in the
* next check.
*/
- if (nr_unqueued_dirty == nr_taken)
+ if (ac.nr_unqueued_dirty == nr_taken)
zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);

/*
@@ -1585,7 +1579,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* implies that pages are cycling through the LRU faster than
* they are written so also forcibly stall.
*/
- if ((nr_unqueued_dirty == nr_taken || nr_immediate) &&
+ if ((ac.nr_unqueued_dirty == nr_taken || ac.nr_immediate) &&
current_may_throttle())
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
--
1.7.10.4


2014-06-12 12:45:55

by Rik van Riel

[permalink] [raw]
Subject: Re: [PATCH] mm/vmscan.c: wrap five parameters into arg_container in shrink_page_list()

On 06/12/2014 05:36 AM, Chen Yucong wrote:
> shrink_page_list() has too many arguments that have already reached ten.
> Some of those arguments and temporary variables introduces extra 80 bytes
> on the stack.
>
> This patch wraps five parameters into arg_container and removes some temporary
> variables, thus making shrink_page_list() to consume fewer stack space.

Won't the container with those arguments now live on the stack,
using up the same space that the variables used to take?

> Before mm/vmscan.c is modified:
> text data bss dec hex filename
> 6876698 957224 966656 8800578 864942 vmlinux-3.15
>
> After mm/vmscan.c is changed:
> text data bss dec hex filename
> 6876506 957224 966656 8800386 864882 vmlinux-3.15
>
> Signed-off-by: Chen Yucong <[email protected]>
> ---
> mm/vmscan.c | 64 +++++++++++++++++++++++++++--------------------------------
> 1 file changed, 29 insertions(+), 35 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index a8ffe4e..538cdcf 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -790,6 +790,14 @@ static void page_check_dirty_writeback(struct page *page,
> mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
> }
>
> +struct arg_container {
> + unsigned long nr_dirty;
> + unsigned long nr_unqueued_dirty;
> + unsigned long nr_congested;
> + unsigned long nr_writeback;
> + unsigned long nr_immediate;
> +};
> +
> /*
> * shrink_page_list() returns the number of reclaimed pages
> */
> @@ -797,22 +805,13 @@ static unsigned long shrink_page_list(struct list_head *page_list,
> struct zone *zone,
> struct scan_control *sc,
> enum ttu_flags ttu_flags,
> - unsigned long *ret_nr_dirty,
> - unsigned long *ret_nr_unqueued_dirty,
> - unsigned long *ret_nr_congested,
> - unsigned long *ret_nr_writeback,
> - unsigned long *ret_nr_immediate,
> + struct arg_container *ac,
> bool force_reclaim)
> {
> LIST_HEAD(ret_pages);
> LIST_HEAD(free_pages);
> int pgactivate = 0;
> - unsigned long nr_unqueued_dirty = 0;
> - unsigned long nr_dirty = 0;
> - unsigned long nr_congested = 0;
> unsigned long nr_reclaimed = 0;
> - unsigned long nr_writeback = 0;
> - unsigned long nr_immediate = 0;
>
> cond_resched();
>
> @@ -858,10 +857,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
> */
> page_check_dirty_writeback(page, &dirty, &writeback);
> if (dirty || writeback)
> - nr_dirty++;
> + ac->nr_dirty++;
>
> if (dirty && !writeback)
> - nr_unqueued_dirty++;
> + ac->nr_unqueued_dirty++;
>
> /*
> * Treat this page as congested if the underlying BDI is or if
> @@ -872,7 +871,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
> mapping = page_mapping(page);
> if ((mapping && bdi_write_congested(mapping->backing_dev_info)) ||
> (writeback && PageReclaim(page)))
> - nr_congested++;
> + ac->nr_congested++;
>
> /*
> * If a page at the tail of the LRU is under writeback, there
> @@ -916,7 +915,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
> if (current_is_kswapd() &&
> PageReclaim(page) &&
> zone_is_reclaim_writeback(zone)) {
> - nr_immediate++;
> + ac->nr_immediate++;
> goto keep_locked;
>
> /* Case 2 above */
> @@ -934,7 +933,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
> * and it's also appropriate in global reclaim.
> */
> SetPageReclaim(page);
> - nr_writeback++;
> + ac->nr_writeback++;
>
> goto keep_locked;
>
> @@ -1132,11 +1131,6 @@ keep:
> list_splice(&ret_pages, page_list);
> count_vm_events(PGACTIVATE, pgactivate);
> mem_cgroup_uncharge_end();
> - *ret_nr_dirty += nr_dirty;
> - *ret_nr_congested += nr_congested;
> - *ret_nr_unqueued_dirty += nr_unqueued_dirty;
> - *ret_nr_writeback += nr_writeback;
> - *ret_nr_immediate += nr_immediate;
> return nr_reclaimed;
> }
>
> @@ -1148,7 +1142,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> .priority = DEF_PRIORITY,
> .may_unmap = 1,
> };
> - unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> + unsigned long ret;
> + struct arg_container dummy;
> struct page *page, *next;
> LIST_HEAD(clean_pages);
>
> @@ -1161,8 +1156,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> }
>
> ret = shrink_page_list(&clean_pages, zone, &sc,
> - TTU_UNMAP|TTU_IGNORE_ACCESS,
> - &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
> + TTU_UNMAP|TTU_IGNORE_ACCESS, &dummy, true);
> list_splice(&clean_pages, page_list);
> mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
> return ret;
> @@ -1469,11 +1463,13 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> unsigned long nr_scanned;
> unsigned long nr_reclaimed = 0;
> unsigned long nr_taken;
> - unsigned long nr_dirty = 0;
> - unsigned long nr_congested = 0;
> - unsigned long nr_unqueued_dirty = 0;
> - unsigned long nr_writeback = 0;
> - unsigned long nr_immediate = 0;
> + struct arg_container ac = {
> + .nr_dirty = 0,
> + .nr_congested = 0,
> + .nr_unqueued_dirty = 0,
> + .nr_writeback = 0,
> + .nr_immediate = 0,
> + };
> isolate_mode_t isolate_mode = 0;
> int file = is_file_lru(lru);
> struct zone *zone = lruvec_zone(lruvec);
> @@ -1515,9 +1511,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> return 0;
>
> nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP,
> - &nr_dirty, &nr_unqueued_dirty, &nr_congested,
> - &nr_writeback, &nr_immediate,
> - false);
> + &ac, false);
>
> spin_lock_irq(&zone->lru_lock);
>
> @@ -1554,7 +1548,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> * of pages under pages flagged for immediate reclaim and stall if any
> * are encountered in the nr_immediate check below.
> */
> - if (nr_writeback && nr_writeback == nr_taken)
> + if (ac.nr_writeback && ac.nr_writeback == nr_taken)
> zone_set_flag(zone, ZONE_WRITEBACK);
>
> /*
> @@ -1566,7 +1560,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> * Tag a zone as congested if all the dirty pages scanned were
> * backed by a congested BDI and wait_iff_congested will stall.
> */
> - if (nr_dirty && nr_dirty == nr_congested)
> + if (ac.nr_dirty && ac.nr_dirty == ac.nr_congested)
> zone_set_flag(zone, ZONE_CONGESTED);
>
> /*
> @@ -1576,7 +1570,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> * pages from reclaim context. It will forcibly stall in the
> * next check.
> */
> - if (nr_unqueued_dirty == nr_taken)
> + if (ac.nr_unqueued_dirty == nr_taken)
> zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
>
> /*
> @@ -1585,7 +1579,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> * implies that pages are cycling through the LRU faster than
> * they are written so also forcibly stall.
> */
> - if ((nr_unqueued_dirty == nr_taken || nr_immediate) &&
> + if ((ac.nr_unqueued_dirty == nr_taken || ac.nr_immediate) &&
> current_may_throttle())
> congestion_wait(BLK_RW_ASYNC, HZ/10);
> }
>


--
All rights reversed

2014-06-12 13:27:20

by Chen Yucong

[permalink] [raw]
Subject: Re: [PATCH] mm/vmscan.c: wrap five parameters into arg_container in shrink_page_list()

On Thu, 2014-06-12 at 08:45 -0400, Rik van Riel wrote:
> > shrink_page_list() has too many arguments that have already reached
> ten.
> > Some of those arguments and temporary variables introduces extra 80
> bytes
> > on the stack.
> >
> > This patch wraps five parameters into arg_container and removes some
> temporary
> > variables, thus making shrink_page_list() to consume fewer stack
> space.
>
> Won't the container with those arguments now live on the stack,
> using up the same space that the variables used to take?
>
Of course, the container with those arguments live on the stack.

One of the key reason for introducing this patch is to avoid passing
five pointer arguments to shrink_page_list().

The arg_container also uses up the same space that the variables used to
take.
If the those arguments is wrapped to arg_container, we just need to pass
one pointer to shrink_page_list instead of five.

thx!
cyc

2014-06-12 20:00:20

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] mm/vmscan.c: wrap five parameters into arg_container in shrink_page_list()

On Thu, 12 Jun 2014 17:36:35 +0800 Chen Yucong <[email protected]> wrote:

> shrink_page_list() has too many arguments that have already reached ten.
> Some of those arguments and temporary variables introduces extra 80 bytes
> on the stack.
>
> This patch wraps five parameters into arg_container and removes some temporary
> variables, thus making shrink_page_list() to consume fewer stack space.
>
> Before mm/vmscan.c is modified:
> text data bss dec hex filename
> 6876698 957224 966656 8800578 864942 vmlinux-3.15
>
> After mm/vmscan.c is changed:
> text data bss dec hex filename
> 6876506 957224 966656 8800386 864882 vmlinux-3.15

Code size reduction is a good sign.

> 1 file changed, 29 insertions(+), 35 deletions(-)

We can look at the frame pointer alterations. Requires
CONFIG_FRAME_POINTER. There's also scripts/checkstack.pl.

Without:

shrink_page_list:
pushq %rbp #
movq %rsp, %rbp #,
pushq %r15 #
pushq %r14 #
pushq %r13 #
pushq %r12 #
pushq %rbx #
subq $184, %rsp #,

With:

shrink_page_list:
pushq %rbp #
movq %rsp, %rbp #,
pushq %r15 #
pushq %r14 #
pushq %r13 #
pushq %r12 #
pushq %rbx #
subq $136, %rsp #,

So we've saved approx 184-136=48 bytes of stack in shrink_page_list().
shrink_inactive_list() stack space is unchanged.

Please do this sort of analysis yourself and include it in the changelogs.

> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -790,6 +790,14 @@ static void page_check_dirty_writeback(struct page *page,
> mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
> }
>
> +struct arg_container {
> + unsigned long nr_dirty;
> + unsigned long nr_unqueued_dirty;
> + unsigned long nr_congested;
> + unsigned long nr_writeback;
> + unsigned long nr_immediate;
> +};

This name is dreadful. Let's give it a nice, meaningful name and
document it appropriately. So it all looks like a part of the vmscan
code and not some hack which was bolted onto the side to save a bit of
stack.

Something like

/*
* Callers pass a prezeroed shrink_result into the shrink functions to gather
* statistics about how many pages of particular states were processed
*/
struct shrink_result {
...


> /*
> * shrink_page_list() returns the number of reclaimed pages
> */
>
> ...
>
> @@ -1148,7 +1142,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> .priority = DEF_PRIORITY,
> .may_unmap = 1,
> };
> - unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> + unsigned long ret;
> + struct arg_container dummy;

If we're not going to use this then we can make it static and save more
stack. That will have some runtime cost as different CPUs fight over
ownership of cachelines but I doubt if it will be significant.

If we leave it on the stack then this code will send kmemcheck berzerk
with all the used-uninitialized errors. Presumably that it already the
case. Perhaps `dummy' should be initialized if kmemcheck is in
operation, dunno.


> struct page *page, *next;
> LIST_HEAD(clean_pages);
>
>
> ...
>
> @@ -1469,11 +1463,13 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
> unsigned long nr_scanned;
> unsigned long nr_reclaimed = 0;
> unsigned long nr_taken;
> - unsigned long nr_dirty = 0;
> - unsigned long nr_congested = 0;
> - unsigned long nr_unqueued_dirty = 0;
> - unsigned long nr_writeback = 0;
> - unsigned long nr_immediate = 0;
> + struct arg_container ac = {
> + .nr_dirty = 0,
> + .nr_congested = 0,
> + .nr_unqueued_dirty = 0,
> + .nr_writeback = 0,
> + .nr_immediate = 0,
> + };

This:

struct arg_container ac = { };

> isolate_mode_t isolate_mode = 0;
> int file = is_file_lru(lru);
> struct zone *zone = lruvec_zone(lruvec);
>
> ...
>