Hi
This patch series have following three patches.
[1/3] Rename pgmoved variable in shrink_active_list()
[2/3] mm: shrink_inactive_lis() nr_scan accounting fix fix
[3/3] add isolate pages vmstat
[1/3] and [2/3] are trivial code neating. [3/3] is the heart of this fix.
Subject: [PATCH] Rename pgmoved variable in shrink_active_list()
Currently, pgmoved variable have two meanings. it cause harder reviewing a bit.
This patch separate it.
Signed-off-by: KOSAKI Motohiro <[email protected]>
---
mm/vmscan.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
Index: b/mm/vmscan.c
===================================================================
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1239,7 +1239,7 @@ static void move_active_pages_to_lru(str
static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
struct scan_control *sc, int priority, int file)
{
- unsigned long pgmoved;
+ unsigned long nr_taken;
unsigned long pgscanned;
unsigned long vm_flags;
LIST_HEAD(l_hold); /* The pages which were snipped off */
@@ -1247,10 +1247,11 @@ static void shrink_active_list(unsigned
LIST_HEAD(l_inactive);
struct page *page;
struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+ unsigned long nr_rotated = 0;
lru_add_drain();
spin_lock_irq(&zone->lru_lock);
- pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
+ nr_taken = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
ISOLATE_ACTIVE, zone,
sc->mem_cgroup, 1, file);
/*
@@ -1260,16 +1261,15 @@ static void shrink_active_list(unsigned
if (scanning_global_lru(sc)) {
zone->pages_scanned += pgscanned;
}
- reclaim_stat->recent_scanned[!!file] += pgmoved;
+ reclaim_stat->recent_scanned[!!file] += nr_taken;
__count_zone_vm_events(PGREFILL, zone, pgscanned);
if (file)
- __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
+ __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
else
- __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
+ __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
- pgmoved = 0; /* count referenced (mapping) mapped pages */
while (!list_empty(&l_hold)) {
cond_resched();
page = lru_to_page(&l_hold);
@@ -1283,7 +1283,7 @@ static void shrink_active_list(unsigned
/* page_referenced clears PageReferenced */
if (page_mapping_inuse(page) &&
page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
- pgmoved++;
+ nr_rotated++;
/*
* Identify referenced, file-backed active pages and
* give them one more trip around the active list. So
@@ -1312,7 +1312,7 @@ static void shrink_active_list(unsigned
* helps balance scan pressure between file and anonymous pages in
* get_scan_ratio.
*/
- reclaim_stat->recent_rotated[!!file] += pgmoved;
+ reclaim_stat->recent_rotated[!!file] += nr_rotated;
move_active_pages_to_lru(zone, &l_active,
LRU_ACTIVE + file * LRU_FILE);
Subject: [PATCH] mm: shrink_inactive_lis() nr_scan accounting fix fix
If sc->isolate_pages() return 0, we don't need to call shrink_page_list().
In past days, shrink_inactive_list() handled it properly.
But commit fb8d14e1 (three years ago commit!) breaked it. current shrink_inactive_list()
always call shrink_page_list() although isolate_pages() return 0.
This patch restore proper return value check.
Requirements:
o "nr_taken == 0" condition should stay before calling shrink_page_list().
o "nr_taken == 0" condition should stay after nr_scan related statistics
modification.
Cc: Wu Fengguang <[email protected]>
Signed-off-by: KOSAKI Motohiro <[email protected]>
---
mm/vmscan.c | 30 ++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
Index: b/mm/vmscan.c
===================================================================
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1071,6 +1071,20 @@ static unsigned long shrink_inactive_lis
nr_taken = sc->isolate_pages(sc->swap_cluster_max,
&page_list, &nr_scan, sc->order, mode,
zone, sc->mem_cgroup, 0, file);
+
+ if (scanning_global_lru(sc)) {
+ zone->pages_scanned += nr_scan;
+ if (current_is_kswapd())
+ __count_zone_vm_events(PGSCAN_KSWAPD, zone,
+ nr_scan);
+ else
+ __count_zone_vm_events(PGSCAN_DIRECT, zone,
+ nr_scan);
+ }
+
+ if (nr_taken == 0)
+ goto done;
+
nr_active = clear_active_flags(&page_list, count);
__count_vm_events(PGDEACTIVATE, nr_active);
@@ -1083,8 +1097,6 @@ static unsigned long shrink_inactive_lis
__mod_zone_page_state(zone, NR_INACTIVE_ANON,
-count[LRU_INACTIVE_ANON]);
- if (scanning_global_lru(sc))
- zone->pages_scanned += nr_scan;
reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
@@ -1118,18 +1130,12 @@ static unsigned long shrink_inactive_lis
}
nr_reclaimed += nr_freed;
+
local_irq_disable();
- if (current_is_kswapd()) {
- __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
+ if (current_is_kswapd())
__count_vm_events(KSWAPD_STEAL, nr_freed);
- } else if (scanning_global_lru(sc))
- __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
-
__count_zone_vm_events(PGSTEAL, zone, nr_freed);
- if (nr_taken == 0)
- goto done;
-
spin_lock(&zone->lru_lock);
/*
* Put back any unfreeable pages.
@@ -1159,9 +1165,9 @@ static unsigned long shrink_inactive_lis
}
}
} while (nr_scanned < max_scan);
- spin_unlock(&zone->lru_lock);
+
done:
- local_irq_enable();
+ spin_unlock_irq(&zone->lru_lock);
pagevec_release(&pvec);
return nr_reclaimed;
}
ChangeLog
Since v5
- Rewrote the description
- Treat page migration
Since v4
- Changed displaing order in show_free_areas() (as Wu's suggested)
Since v3
- Fixed misaccount page bug when lumby reclaim occur
Since v2
- Separated IsolateLRU field to Isolated(anon) and Isolated(file)
Since v1
- Renamed IsolatePages to IsolatedLRU
==================================
Subject: [PATCH] add isolate pages vmstat
If the system is running a heavy load of processes then concurrent reclaim
can isolate a large numbe of pages from the LRU. /proc/meminfo and the
output generated for an OOM do not show how many pages were isolated.
This patch shows the information about isolated pages.
reproduce way
-----------------------
% ./hackbench 140 process 1000
=> OOM occur
active_anon:146 inactive_anon:0 isolated_anon:49245
active_file:79 inactive_file:18 isolated_file:113
unevictable:0 dirty:0 writeback:0 unstable:0 buffer:39
free:370 slab_reclaimable:309 slab_unreclaimable:5492
mapped:53 shmem:15 pagetables:28140 bounce:0
Signed-off-by: KOSAKI Motohiro <[email protected]>
Acked-by: Rik van Riel <[email protected]>
Acked-by: Wu Fengguang <[email protected]>
Reviewed-by: Minchan Kim <[email protected]>
---
drivers/base/node.c | 4 ++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 2 ++
mm/migrate.c | 11 +++++++++++
mm/page_alloc.c | 12 +++++++++---
mm/vmscan.c | 12 +++++++++++-
mm/vmstat.c | 2 ++
7 files changed, 43 insertions(+), 4 deletions(-)
Index: b/fs/proc/meminfo.c
===================================================================
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -65,6 +65,8 @@ static int meminfo_proc_show(struct seq_
"Active(file): %8lu kB\n"
"Inactive(file): %8lu kB\n"
"Unevictable: %8lu kB\n"
+ "Isolated(anon): %8lu kB\n"
+ "Isolated(file): %8lu kB\n"
"Mlocked: %8lu kB\n"
#ifdef CONFIG_HIGHMEM
"HighTotal: %8lu kB\n"
@@ -110,6 +112,8 @@ static int meminfo_proc_show(struct seq_
K(pages[LRU_ACTIVE_FILE]),
K(pages[LRU_INACTIVE_FILE]),
K(pages[LRU_UNEVICTABLE]),
+ K(global_page_state(NR_ISOLATED_ANON)),
+ K(global_page_state(NR_ISOLATED_FILE)),
K(global_page_state(NR_MLOCK)),
#ifdef CONFIG_HIGHMEM
K(i.totalhigh),
Index: b/include/linux/mmzone.h
===================================================================
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -100,6 +100,8 @@ enum zone_stat_item {
NR_BOUNCE,
NR_VMSCAN_WRITE,
NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
+ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
+ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
#ifdef CONFIG_NUMA
NUMA_HIT, /* allocated in intended node */
Index: b/mm/page_alloc.c
===================================================================
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2115,16 +2115,18 @@ void show_free_areas(void)
}
}
- printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
- " inactive_file:%lu"
+ printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
+ " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
" unevictable:%lu"
" dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
global_page_state(NR_ACTIVE_ANON),
- global_page_state(NR_ACTIVE_FILE),
global_page_state(NR_INACTIVE_ANON),
+ global_page_state(NR_ISOLATED_ANON),
+ global_page_state(NR_ACTIVE_FILE),
global_page_state(NR_INACTIVE_FILE),
+ global_page_state(NR_ISOLATED_FILE),
global_page_state(NR_UNEVICTABLE),
global_page_state(NR_FILE_DIRTY),
global_page_state(NR_WRITEBACK),
@@ -2152,6 +2154,8 @@ void show_free_areas(void)
" active_file:%lukB"
" inactive_file:%lukB"
" unevictable:%lukB"
+ " isolated(anon):%lukB"
+ " isolated(file):%lukB"
" present:%lukB"
" mlocked:%lukB"
" dirty:%lukB"
@@ -2178,6 +2182,8 @@ void show_free_areas(void)
K(zone_page_state(zone, NR_ACTIVE_FILE)),
K(zone_page_state(zone, NR_INACTIVE_FILE)),
K(zone_page_state(zone, NR_UNEVICTABLE)),
+ K(zone_page_state(zone, NR_ISOLATED_ANON)),
+ K(zone_page_state(zone, NR_ISOLATED_FILE)),
K(zone->present_pages),
K(zone_page_state(zone, NR_MLOCK)),
K(zone_page_state(zone, NR_FILE_DIRTY)),
Index: b/mm/vmscan.c
===================================================================
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1067,6 +1067,8 @@ static unsigned long shrink_inactive_lis
unsigned long nr_active;
unsigned int count[NR_LRU_LISTS] = { 0, };
int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE;
+ unsigned long nr_anon;
+ unsigned long nr_file;
nr_taken = sc->isolate_pages(sc->swap_cluster_max,
&page_list, &nr_scan, sc->order, mode,
@@ -1097,6 +1099,10 @@ static unsigned long shrink_inactive_lis
__mod_zone_page_state(zone, NR_INACTIVE_ANON,
-count[LRU_INACTIVE_ANON]);
+ nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
+ nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
+ __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
@@ -1164,6 +1170,9 @@ static unsigned long shrink_inactive_lis
spin_lock_irq(&zone->lru_lock);
}
}
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
+ __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+
} while (nr_scanned < max_scan);
done:
@@ -1274,6 +1283,7 @@ static void shrink_active_list(unsigned
__mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
else
__mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
spin_unlock_irq(&zone->lru_lock);
while (!list_empty(&l_hold)) {
@@ -1324,7 +1334,7 @@ static void shrink_active_list(unsigned
LRU_ACTIVE + file * LRU_FILE);
move_active_pages_to_lru(zone, &l_inactive,
LRU_BASE + file * LRU_FILE);
-
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
}
Index: b/mm/vmstat.c
===================================================================
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -644,6 +644,8 @@ static const char * const vmstat_text[]
"nr_bounce",
"nr_vmscan_write",
"nr_writeback_temp",
+ "nr_isolated_anon",
+ "nr_isolated_file",
"nr_shmem",
#ifdef CONFIG_NUMA
"numa_hit",
Index: b/drivers/base/node.c
===================================================================
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -73,6 +73,8 @@ static ssize_t node_read_meminfo(struct
"Node %d Active(file): %8lu kB\n"
"Node %d Inactive(file): %8lu kB\n"
"Node %d Unevictable: %8lu kB\n"
+ "Node %d Isolated(anon): %8lu kB\n"
+ "Node %d Isolated(file): %8lu kB\n"
"Node %d Mlocked: %8lu kB\n"
#ifdef CONFIG_HIGHMEM
"Node %d HighTotal: %8lu kB\n"
@@ -106,6 +108,8 @@ static ssize_t node_read_meminfo(struct
nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
nid, K(node_page_state(nid, NR_UNEVICTABLE)),
+ nid, K(node_page_state(nid, NR_ISOLATED_ANON)),
+ nid, K(node_page_state(nid, NR_ISOLATED_FILE)),
nid, K(node_page_state(nid, NR_MLOCK)),
#ifdef CONFIG_HIGHMEM
nid, K(i.totalhigh),
Index: b/mm/migrate.c
===================================================================
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -67,6 +67,8 @@ int putback_lru_pages(struct list_head *
list_for_each_entry_safe(page, page2, l, lru) {
list_del(&page->lru);
+ dec_zone_page_state(page, NR_ISOLATED_ANON +
+ !!page_is_file_cache(page));
putback_lru_page(page);
count++;
}
@@ -696,6 +698,8 @@ unlock:
* restored.
*/
list_del(&page->lru);
+ dec_zone_page_state(page, NR_ISOLATED_ANON +
+ !!page_is_file_cache(page));
putback_lru_page(page);
}
@@ -740,6 +744,13 @@ int migrate_pages(struct list_head *from
struct page *page2;
int swapwrite = current->flags & PF_SWAPWRITE;
int rc;
+ int flags;
+
+ local_irq_save(flags);
+ list_for_each_entry(page, from, lru)
+ __inc_zone_page_state(page, NR_ISOLATED_ANON +
+ !!page_is_file_cache(page));
+ local_irq_restore(flags);
if (!swapwrite)
current->flags |= PF_SWAPWRITE;
KOSAKI Motohiro wrote:
> Subject: [PATCH] Rename pgmoved variable in shrink_active_list()
>
> Currently, pgmoved variable have two meanings. it cause harder reviewing a bit.
> This patch separate it.
>
>
> Signed-off-by: KOSAKI Motohiro <[email protected]>
Reviewed-by: Rik van Riel <[email protected]>
--
All rights reversed.
KOSAKI Motohiro wrote:
> Subject: [PATCH] mm: shrink_inactive_lis() nr_scan accounting fix fix
>
> If sc->isolate_pages() return 0, we don't need to call shrink_page_list().
> In past days, shrink_inactive_list() handled it properly.
>
> But commit fb8d14e1 (three years ago commit!) breaked it. current shrink_inactive_list()
> always call shrink_page_list() although isolate_pages() return 0.
>
> This patch restore proper return value check.
>
>
> Requirements:
> o "nr_taken == 0" condition should stay before calling shrink_page_list().
> o "nr_taken == 0" condition should stay after nr_scan related statistics
> modification.
>
>
> Cc: Wu Fengguang <[email protected]>
> Signed-off-by: KOSAKI Motohiro <[email protected]>
Reviewed-by: Rik van Riel <[email protected]>
--
All rights reversed.
On Thu, 16 Jul 2009 09:52:34 +0900 (JST) KOSAKI Motohiro <[email protected]> wrote:
> if (file)
> - __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
> + __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
> else
> - __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
> + __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
we could have used __sub_zone_page_state() there.
On Thu, 16 Jul 2009 09:55:47 +0900 (JST) KOSAKI Motohiro <[email protected]> wrote:
> ChangeLog
> Since v5
> - Rewrote the description
> - Treat page migration
> Since v4
> - Changed displaing order in show_free_areas() (as Wu's suggested)
> Since v3
> - Fixed misaccount page bug when lumby reclaim occur
> Since v2
> - Separated IsolateLRU field to Isolated(anon) and Isolated(file)
> Since v1
> - Renamed IsolatePages to IsolatedLRU
>
> ==================================
> Subject: [PATCH] add isolate pages vmstat
>
> If the system is running a heavy load of processes then concurrent reclaim
> can isolate a large numbe of pages from the LRU. /proc/meminfo and the
> output generated for an OOM do not show how many pages were isolated.
>
> This patch shows the information about isolated pages.
>
>
> reproduce way
> -----------------------
> % ./hackbench 140 process 1000
> => OOM occur
>
> active_anon:146 inactive_anon:0 isolated_anon:49245
> active_file:79 inactive_file:18 isolated_file:113
> unevictable:0 dirty:0 writeback:0 unstable:0 buffer:39
> free:370 slab_reclaimable:309 slab_unreclaimable:5492
> mapped:53 shmem:15 pagetables:28140 bounce:0
>
>
> Signed-off-by: KOSAKI Motohiro <[email protected]>
> Acked-by: Rik van Riel <[email protected]>
> Acked-by: Wu Fengguang <[email protected]>
> Reviewed-by: Minchan Kim <[email protected]>
> ---
> drivers/base/node.c | 4 ++++
> fs/proc/meminfo.c | 4 ++++
> include/linux/mmzone.h | 2 ++
> mm/migrate.c | 11 +++++++++++
> mm/page_alloc.c | 12 +++++++++---
> mm/vmscan.c | 12 +++++++++++-
> mm/vmstat.c | 2 ++
> 7 files changed, 43 insertions(+), 4 deletions(-)
>
> Index: b/fs/proc/meminfo.c
> ===================================================================
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -65,6 +65,8 @@ static int meminfo_proc_show(struct seq_
> "Active(file): %8lu kB\n"
> "Inactive(file): %8lu kB\n"
> "Unevictable: %8lu kB\n"
> + "Isolated(anon): %8lu kB\n"
> + "Isolated(file): %8lu kB\n"
> "Mlocked: %8lu kB\n"
Are these counters really important enough to justify being present in
/proc/meminfo? They seem fairly low-level developer-only details.
Perhaps relegate them to /proc/vmstat?
> #ifdef CONFIG_HIGHMEM
> "HighTotal: %8lu kB\n"
> @@ -110,6 +112,8 @@ static int meminfo_proc_show(struct seq_
> K(pages[LRU_ACTIVE_FILE]),
> K(pages[LRU_INACTIVE_FILE]),
> K(pages[LRU_UNEVICTABLE]),
> + K(global_page_state(NR_ISOLATED_ANON)),
> + K(global_page_state(NR_ISOLATED_FILE)),
> K(global_page_state(NR_MLOCK)),
> #ifdef CONFIG_HIGHMEM
> K(i.totalhigh),
> Index: b/include/linux/mmzone.h
> ===================================================================
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -100,6 +100,8 @@ enum zone_stat_item {
> NR_BOUNCE,
> NR_VMSCAN_WRITE,
> NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
> + NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
> + NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
> NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
> #ifdef CONFIG_NUMA
> NUMA_HIT, /* allocated in intended node */
> Index: b/mm/page_alloc.c
> ===================================================================
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2115,16 +2115,18 @@ void show_free_areas(void)
> }
> }
>
> - printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
> - " inactive_file:%lu"
> + printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
> + " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
> " unevictable:%lu"
> " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
> " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
> " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
> global_page_state(NR_ACTIVE_ANON),
> - global_page_state(NR_ACTIVE_FILE),
> global_page_state(NR_INACTIVE_ANON),
> + global_page_state(NR_ISOLATED_ANON),
> + global_page_state(NR_ACTIVE_FILE),
> global_page_state(NR_INACTIVE_FILE),
> + global_page_state(NR_ISOLATED_FILE),
> global_page_state(NR_UNEVICTABLE),
> global_page_state(NR_FILE_DIRTY),
> global_page_state(NR_WRITEBACK),
> @@ -2152,6 +2154,8 @@ void show_free_areas(void)
> " active_file:%lukB"
> " inactive_file:%lukB"
> " unevictable:%lukB"
> + " isolated(anon):%lukB"
> + " isolated(file):%lukB"
> " present:%lukB"
> " mlocked:%lukB"
> " dirty:%lukB"
> @@ -2178,6 +2182,8 @@ void show_free_areas(void)
> K(zone_page_state(zone, NR_ACTIVE_FILE)),
> K(zone_page_state(zone, NR_INACTIVE_FILE)),
> K(zone_page_state(zone, NR_UNEVICTABLE)),
> + K(zone_page_state(zone, NR_ISOLATED_ANON)),
> + K(zone_page_state(zone, NR_ISOLATED_FILE)),
> K(zone->present_pages),
> K(zone_page_state(zone, NR_MLOCK)),
> K(zone_page_state(zone, NR_FILE_DIRTY)),
> Index: b/mm/vmscan.c
> ===================================================================
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1067,6 +1067,8 @@ static unsigned long shrink_inactive_lis
> unsigned long nr_active;
> unsigned int count[NR_LRU_LISTS] = { 0, };
> int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE;
> + unsigned long nr_anon;
> + unsigned long nr_file;
>
> nr_taken = sc->isolate_pages(sc->swap_cluster_max,
> &page_list, &nr_scan, sc->order, mode,
> @@ -1097,6 +1099,10 @@ static unsigned long shrink_inactive_lis
> __mod_zone_page_state(zone, NR_INACTIVE_ANON,
> -count[LRU_INACTIVE_ANON]);
>
> + nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
> + nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
> + __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
> + __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
>
> reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
> reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
> @@ -1164,6 +1170,9 @@ static unsigned long shrink_inactive_lis
> spin_lock_irq(&zone->lru_lock);
> }
> }
> + __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
> + __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
> +
> } while (nr_scanned < max_scan);
This is a non-trivial amount of extra stuff. Do we really need it?
Looks good to me.
On Thu, Jul 16, 2009 at 9:52 AM, KOSAKI
Motohiro<[email protected]> wrote:
> Subject: [PATCH] Rename pgmoved variable in shrink_active_list()
>
> Currently, pgmoved variable have two meanings. it cause harder reviewing a bit.
> This patch separate it.
>
>
> Signed-off-by: KOSAKI Motohiro <[email protected]>
Reviewed-by: Minchan Kim <[email protected]>
--
Kind regards,
Minchan Kim
On Thu, Jul 16, 2009 at 12:16 PM, Andrew
Morton<[email protected]> wrote:
> On Thu, 16 Jul 2009 09:52:34 +0900 (JST) KOSAKI Motohiro <[email protected]> wrote:
>
>> if (file)
>> - __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
>> + __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
>> else
>> - __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
>> + __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
>
> we could have used __sub_zone_page_state() there.
Yes. It can be changed all at once by separate patches. :)
--
Kind regards,
Minchan Kim
Good.
I decided making patch like this since I knew this problem.
You are too diligent :)
Thanks for good attitude.
On Thu, Jul 16, 2009 at 9:53 AM, KOSAKI
Motohiro<[email protected]> wrote:
> Subject: [PATCH] mm: shrink_inactive_lis() nr_scan accounting fix fix
>
> If sc->isolate_pages() return 0, we don't need to call shrink_page_list().
> In past days, shrink_inactive_list() handled it properly.
>
> But commit fb8d14e1 (three years ago commit!) breaked it. current shrink_inactive_list()
> always call shrink_page_list() although isolate_pages() return 0.
>
> This patch restore proper return value check.
>
>
> Requirements:
> o "nr_taken == 0" condition should stay before calling shrink_page_list().
> o "nr_taken == 0" condition should stay after nr_scan related statistics
> modification.
>
>
> Cc: Wu Fengguang <[email protected]>
> Signed-off-by: KOSAKI Motohiro <[email protected]>
Reviewed-by: Minchan Kim <[email protected]>
--
Kind regards,
Minchan Kim
On Thu, Jul 16, 2009 at 12:16 PM, AndrewMorton<[email protected]> wrote:> On Thu, 16 Jul 2009 09:55:47 +0900 (JST) KOSAKI Motohiro <[email protected]> wrote:>>> ChangeLog>> Since v5>> - Rewrote the description>> - Treat page migration>> Since v4>> - Changed displaing order in show_free_areas() (as Wu's suggested)>> Since v3>> - Fixed misaccount page bug when lumby reclaim occur>> Since v2>> - Separated IsolateLRU field to Isolated(anon) and Isolated(file)>> Since v1>> - Renamed IsolatePages to IsolatedLRU>>>> ==================================>> Subject: [PATCH] add isolate pages vmstat>>>> If the system is running a heavy load of processes then concurrent reclaim>> can isolate a large numbe of pages from the LRU. /proc/meminfo and the>> output generated for an OOM do not show how many pages were isolated.>>>> This patch shows the information about isolated pages.>>>>>> reproduce way>> ----------------------->> % ./hackbench 140 process 1000>> => OOM occur>>>> active_anon:146 inactive_anon:0 isolated_anon:49245>> active_file:79 inactive_file:18 isolated_file:113>> unevictable:0 dirty:0 writeback:0 unstable:0 buffer:39>> free:370 slab_reclaimable:309 slab_unreclaimable:5492>> mapped:53 shmem:15 pagetables:28140 bounce:0>>>>>> Signed-off-by: KOSAKI Motohiro <[email protected]>>> Acked-by: Rik van Riel <[email protected]>>> Acked-by: Wu Fengguang <[email protected]>>> Reviewed-by: Minchan Kim <[email protected]>>> --->> drivers/base/node.c | 4 ++++>> fs/proc/meminfo.c | 4 ++++>> include/linux/mmzone.h | 2 ++>> mm/migrate.c | 11 +++++++++++>> mm/page_alloc.c | 12 +++++++++--->> mm/vmscan.c | 12 +++++++++++->> mm/vmstat.c | 2 ++>> 7 files changed, 43 insertions(+), 4 deletions(-)>>>> Index: b/fs/proc/meminfo.c>> ===================================================================>> --- a/fs/proc/meminfo.c>> +++ b/fs/proc/meminfo.c>> @@ -65,6 +65,8 @@ static int meminfo_proc_show(struct seq_>> "Active(file): %8lu kB\n">> "Inactive(file): %8lu kB\n">> "Unevictable: %8lu kB\n">> + "Isolated(anon): %8lu kB\n">> + "Isolated(file): %8lu kB\n">> "Mlocked: %8lu kB\n">> Are these counters really important enough to justify being present in> /proc/meminfo? They seem fairly low-level developer-only details.> Perhaps relegate them to /proc/vmstat?>>> #ifdef CONFIG_HIGHMEM>> "HighTotal: %8lu kB\n">> @@ -110,6 +112,8 @@ static int meminfo_proc_show(struct seq_>> K(pages[LRU_ACTIVE_FILE]),>> K(pages[LRU_INACTIVE_FILE]),>> K(pages[LRU_UNEVICTABLE]),>> + K(global_page_state(NR_ISOLATED_ANON)),>> + K(global_page_state(NR_ISOLATED_FILE)),>> K(global_page_state(NR_MLOCK)),>> #ifdef CONFIG_HIGHMEM>> K(i.totalhigh),>> Index: b/include/linux/mmzone.h>> ===================================================================>> --- a/include/linux/mmzone.h>> +++ b/include/linux/mmzone.h>> @@ -100,6 +100,8 @@ enum zone_stat_item {>> NR_BOUNCE,>> NR_VMSCAN_WRITE,>> NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */>> + NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */>> + NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */>> NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */>> #ifdef CONFIG_NUMA>> NUMA_HIT, /* allocated in intended node */>> Index: b/mm/page_alloc.c>> ===================================================================>> --- a/mm/page_alloc.c>> +++ b/mm/page_alloc.c>> @@ -2115,16 +2115,18 @@ void show_free_areas(void)>> }>> }>>>> - printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n">> - " inactive_file:%lu">> + printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n">> + " active_file:%lu inactive_file:%lu isolated_file:%lu\n">> " unevictable:%lu">> " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n">> " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n">> " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",>> global_page_state(NR_ACTIVE_ANON),>> - global_page_state(NR_ACTIVE_FILE),>> global_page_state(NR_INACTIVE_ANON),>> + global_page_state(NR_ISOLATED_ANON),>> + global_page_state(NR_ACTIVE_FILE),>> global_page_state(NR_INACTIVE_FILE),>> + global_page_state(NR_ISOLATED_FILE),>> global_page_state(NR_UNEVICTABLE),>> global_page_state(NR_FILE_DIRTY),>> global_page_state(NR_WRITEBACK),>> @@ -2152,6 +2154,8 @@ void show_free_areas(void)>> " active_file:%lukB">> " inactive_file:%lukB">> " unevictable:%lukB">> + " isolated(anon):%lukB">> + " isolated(file):%lukB">> " present:%lukB">> " mlocked:%lukB">> " dirty:%lukB">> @@ -2178,6 +2182,8 @@ void show_free_areas(void)>> K(zone_page_state(zone, NR_ACTIVE_FILE)),>> K(zone_page_state(zone, NR_INACTIVE_FILE)),>> K(zone_page_state(zone, NR_UNEVICTABLE)),>> + K(zone_page_state(zone, NR_ISOLATED_ANON)),>> + K(zone_page_state(zone, NR_ISOLATED_FILE)),>> K(zone->present_pages),>> K(zone_page_state(zone, NR_MLOCK)),>> K(zone_page_state(zone, NR_FILE_DIRTY)),>> Index: b/mm/vmscan.c>> ===================================================================>> --- a/mm/vmscan.c>> +++ b/mm/vmscan.c>> @@ -1067,6 +1067,8 @@ static unsigned long shrink_inactive_lis>> unsigned long nr_active;>> unsigned int count[NR_LRU_LISTS] = { 0, };>> int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE;>> + unsigned long nr_anon;>> + unsigned long nr_file;>>>> nr_taken = sc->isolate_pages(sc->swap_cluster_max,>> &page_list, &nr_scan, sc->order, mode,>> @@ -1097,6 +1099,10 @@ static unsigned long shrink_inactive_lis>> __mod_zone_page_state(zone, NR_INACTIVE_ANON,>> -count[LRU_INACTIVE_ANON]);>>>> + nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];>> + nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];>> + __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);>> + __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);>>>> reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];>> reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];>> @@ -1164,6 +1170,9 @@ static unsigned long shrink_inactive_lis>> spin_lock_irq(&zone->lru_lock);>> }>> }>> + __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);>> + __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);>> +>> } while (nr_scanned < max_scan);>> This is a non-trivial amount of extra stuff. Do we really need it?>
I thought so.This patch results form process fork bomb(ex, mstctl11 in LTP).Too many isolated patches are based on isolation counter.
So, I think we need this until now.If we can solve the problem with different method, then we can drop this.
-- Kind regards,Minchan Kim????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
> On Thu, 16 Jul 2009 09:52:34 +0900 (JST) KOSAKI Motohiro <[email protected]> wrote:
>
> > if (file)
> > - __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
> > + __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
> > else
> > - __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
> > + __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
>
> we could have used __sub_zone_page_state() there.
__add_zone_page_state() and __sub_zone_page_state() are no user.
Instead, we can remove it?
==============================================
Subject: Kill __{add,sub}_zone_page_state()
Currently, __add_zone_page_state() and __sub_zone_page_state() are unused.
This patch remove it.
Signed-off-by: KOSAKI Motohiro <[email protected]>
---
include/linux/vmstat.h | 5 -----
1 file changed, 5 deletions(-)
Index: b/include/linux/vmstat.h
===================================================================
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -210,11 +210,6 @@ extern void zone_statistics(struct zone
#endif /* CONFIG_NUMA */
-#define __add_zone_page_state(__z, __i, __d) \
- __mod_zone_page_state(__z, __i, __d)
-#define __sub_zone_page_state(__z, __i, __d) \
- __mod_zone_page_state(__z, __i,-(__d))
-
#define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
> > reproduce way
> > -----------------------
> > % ./hackbench 140 process 1000
> > => OOM occur
> >
> > active_anon:146 inactive_anon:0 isolated_anon:49245
> > active_file:79 inactive_file:18 isolated_file:113
> > unevictable:0 dirty:0 writeback:0 unstable:0 buffer:39
> > free:370 slab_reclaimable:309 slab_unreclaimable:5492
> > mapped:53 shmem:15 pagetables:28140 bounce:0
> >
> > @@ -1164,6 +1170,9 @@ static unsigned long shrink_inactive_lis
> > spin_lock_irq(&zone->lru_lock);
> > }
> > }
> > + __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
> > + __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
> > +
> > } while (nr_scanned < max_scan);
>
> This is a non-trivial amount of extra stuff. Do we really need it?
In general, Administrator really hate large amount unaccounted memory.
Recent msgctl11 discussion, We faced it isolate pages about 1/3 system
memory.
Ahtough Rik's patch is applied, vmscan can isolate >1GB memory.
That's my point.
On Thu, 16 Jul 2009 13:22:30 +0900 (JST) KOSAKI Motohiro <[email protected]> wrote:
> -#define __add_zone_page_state(__z, __i, __d) \
> - __mod_zone_page_state(__z, __i, __d)
> -#define __sub_zone_page_state(__z, __i, __d) \
> - __mod_zone_page_state(__z, __i,-(__d))
> -
yeah, whatever, I don't think they add a lot of value personally.
I guess they're a _bit_ clearer than doing __sub_zone_page_state() with
a negated argument. Shrug.
> On Thu, 16 Jul 2009 13:22:30 +0900 (JST) KOSAKI Motohiro <[email protected]> wrote:
>
> > -#define __add_zone_page_state(__z, __i, __d) \
> > - __mod_zone_page_state(__z, __i, __d)
> > -#define __sub_zone_page_state(__z, __i, __d) \
> > - __mod_zone_page_state(__z, __i,-(__d))
> > -
>
> yeah, whatever, I don't think they add a lot of value personally.
>
> I guess they're a _bit_ clearer than doing __sub_zone_page_state() with
> a negated argument. Shrug.
OK, I've catched your point.
I'll make all caller replacing patches.
Thanks.
> > On Thu, 16 Jul 2009 13:22:30 +0900 (JST) KOSAKI Motohiro <[email protected]> wrote:
> >
> > > -#define __add_zone_page_state(__z, __i, __d) \
> > > - __mod_zone_page_state(__z, __i, __d)
> > > -#define __sub_zone_page_state(__z, __i, __d) \
> > > - __mod_zone_page_state(__z, __i,-(__d))
> > > -
> >
> > yeah, whatever, I don't think they add a lot of value personally.
> >
> > I guess they're a _bit_ clearer than doing __sub_zone_page_state() with
> > a negated argument. Shrug.
>
> OK, I've catched your point.
> I'll make all caller replacing patches.
Please drop last mail. I was confused ;-)
On Thu, 16 Jul 2009 13:38:21 +0900 (JST) KOSAKI Motohiro <[email protected]> wrote:
> > On Thu, 16 Jul 2009 13:22:30 +0900 (JST) KOSAKI Motohiro <[email protected]> wrote:
> >
> > > -#define __add_zone_page_state(__z, __i, __d) \
> > > - __mod_zone_page_state(__z, __i, __d)
> > > -#define __sub_zone_page_state(__z, __i, __d) \
> > > - __mod_zone_page_state(__z, __i,-(__d))
> > > -
> >
> > yeah, whatever, I don't think they add a lot of value personally.
> >
> > I guess they're a _bit_ clearer than doing __sub_zone_page_state() with
> > a negated argument. Shrug.
>
> OK, I've catched your point.
I don't think I have a point ;)
> I'll make all caller replacing patches.
Well, if you guys think it's worth it, sure.
On Thu, Jul 16, 2009 at 08:52:34AM +0800, KOSAKI Motohiro wrote:
> Subject: [PATCH] Rename pgmoved variable in shrink_active_list()
>
> Currently, pgmoved variable have two meanings. it cause harder reviewing a bit.
> This patch separate it.
>
> Signed-off-by: KOSAKI Motohiro <[email protected]>
Reviewed-by: Wu Fengguang <[email protected]>
On Thu, Jul 16, 2009 at 08:53:42AM +0800, KOSAKI Motohiro wrote:
> Subject: [PATCH] mm: shrink_inactive_lis() nr_scan accounting fix fix
>
> If sc->isolate_pages() return 0, we don't need to call shrink_page_list().
> In past days, shrink_inactive_list() handled it properly.
>
> But commit fb8d14e1 (three years ago commit!) breaked it. current shrink_inactive_list()
> always call shrink_page_list() although isolate_pages() return 0.
>
> This patch restore proper return value check.
Patch looks good, but there is another minor problem..
>
> Requirements:
> o "nr_taken == 0" condition should stay before calling shrink_page_list().
> o "nr_taken == 0" condition should stay after nr_scan related statistics
> modification.
>
>
> Cc: Wu Fengguang <[email protected]>
> Signed-off-by: KOSAKI Motohiro <[email protected]>
> ---
> mm/vmscan.c | 30 ++++++++++++++++++------------
> 1 file changed, 18 insertions(+), 12 deletions(-)
>
> Index: b/mm/vmscan.c
> ===================================================================
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1071,6 +1071,20 @@ static unsigned long shrink_inactive_lis
> nr_taken = sc->isolate_pages(sc->swap_cluster_max,
> &page_list, &nr_scan, sc->order, mode,
> zone, sc->mem_cgroup, 0, file);
> +
> + if (scanning_global_lru(sc)) {
> + zone->pages_scanned += nr_scan;
> + if (current_is_kswapd())
> + __count_zone_vm_events(PGSCAN_KSWAPD, zone,
> + nr_scan);
> + else
> + __count_zone_vm_events(PGSCAN_DIRECT, zone,
> + nr_scan);
> + }
> +
> + if (nr_taken == 0)
> + goto done;
Not a newly introduced problem, but this early break might under scan
the list, if (max_scan > swap_cluster_max). Luckily the only two
callers all call with (max_scan <= swap_cluster_max).
What shall we do? The comprehensive solution may be to
- remove the big do-while loop
- replace sc->swap_cluster_max => max_scan
- take care in the callers to not passing small max_scan values
Or to simply make this function more robust like this?
---
mm/vmscan.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
--- linux.orig/mm/vmscan.c
+++ linux/mm/vmscan.c
@@ -1098,7 +1098,7 @@ static unsigned long shrink_inactive_lis
lru_add_drain();
spin_lock_irq(&zone->lru_lock);
- do {
+ while (nr_scanned < max_scan) {
struct page *page;
unsigned long nr_taken;
unsigned long nr_scan;
@@ -1112,6 +1112,7 @@ static unsigned long shrink_inactive_lis
nr_taken = sc->isolate_pages(sc->swap_cluster_max,
&page_list, &nr_scan, sc->order, mode,
zone, sc->mem_cgroup, 0, file);
+ nr_scanned += nr_scan;
if (scanning_global_lru(sc)) {
zone->pages_scanned += nr_scan;
@@ -1123,8 +1124,10 @@ static unsigned long shrink_inactive_lis
nr_scan);
}
- if (nr_taken == 0)
- goto done;
+ if (nr_taken == 0) {
+ cond_resched_lock(&zone->lru_lock);
+ continue;
+ }
nr_active = clear_active_flags(&page_list, count);
__count_vm_events(PGDEACTIVATE, nr_active);
@@ -1150,7 +1153,6 @@ static unsigned long shrink_inactive_lis
spin_unlock_irq(&zone->lru_lock);
- nr_scanned += nr_scan;
nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
/*
@@ -1212,9 +1214,8 @@ static unsigned long shrink_inactive_lis
__mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
__mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
- } while (nr_scanned < max_scan);
+ }
-done:
spin_unlock_irq(&zone->lru_lock);
pagevec_release(&pvec);
return nr_reclaimed;
Reviewed-by: Christoph Lameter <[email protected]>
On Thu, 16 Jul 2009, KOSAKI Motohiro wrote:
> Index: b/mm/migrate.c
> ===================================================================
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -67,6 +67,8 @@ int putback_lru_pages(struct list_head *
>
> list_for_each_entry_safe(page, page2, l, lru) {
> list_del(&page->lru);
> + dec_zone_page_state(page, NR_ISOLATED_ANON +
> + !!page_is_file_cache(page));
> putback_lru_page(page);
> count++;
> }
ok.
> @@ -696,6 +698,8 @@ unlock:
> * restored.
> */
> list_del(&page->lru);
> + dec_zone_page_state(page, NR_ISOLATED_ANON +
> + !!page_is_file_cache(page));
> putback_lru_page(page);
> }
>
ok.
> @@ -740,6 +744,13 @@ int migrate_pages(struct list_head *from
> struct page *page2;
> int swapwrite = current->flags & PF_SWAPWRITE;
> int rc;
> + int flags;
> +
> + local_irq_save(flags);
> + list_for_each_entry(page, from, lru)
> + __inc_zone_page_state(page, NR_ISOLATED_ANON +
> + !!page_is_file_cache(page));
> + local_irq_restore(flags);
>
Why do a separate pass over all the migrates pages? Can you add the
_inc_xx somewhere after the page was isolated from the lru by calling
try_to_unmap()?
On Thu, Jul 16, 2009 at 09:52:34AM +0900, KOSAKI Motohiro wrote:
> Subject: [PATCH] Rename pgmoved variable in shrink_active_list()
>
> Currently, pgmoved variable have two meanings. it cause harder reviewing a bit.
> This patch separate it.
>
>
> Signed-off-by: KOSAKI Motohiro <[email protected]>
Reviewed-by: Johannes Weiner <[email protected]>
Below are just minor suggestions regarding the changed code.
> ---
> mm/vmscan.c | 16 ++++++++--------
> 1 file changed, 8 insertions(+), 8 deletions(-)
>
> Index: b/mm/vmscan.c
> ===================================================================
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1239,7 +1239,7 @@ static void move_active_pages_to_lru(str
> static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
> struct scan_control *sc, int priority, int file)
> {
> - unsigned long pgmoved;
> + unsigned long nr_taken;
> unsigned long pgscanned;
> unsigned long vm_flags;
> LIST_HEAD(l_hold); /* The pages which were snipped off */
> @@ -1247,10 +1247,11 @@ static void shrink_active_list(unsigned
> LIST_HEAD(l_inactive);
> struct page *page;
> struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
> + unsigned long nr_rotated = 0;
>
> lru_add_drain();
> spin_lock_irq(&zone->lru_lock);
> - pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
> + nr_taken = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
> ISOLATE_ACTIVE, zone,
> sc->mem_cgroup, 1, file);
> /*
> @@ -1260,16 +1261,15 @@ static void shrink_active_list(unsigned
> if (scanning_global_lru(sc)) {
> zone->pages_scanned += pgscanned;
> }
> - reclaim_stat->recent_scanned[!!file] += pgmoved;
> + reclaim_stat->recent_scanned[!!file] += nr_taken;
Hm, file is a boolean already, the double negation can probably be
dropped.
> __count_zone_vm_events(PGREFILL, zone, pgscanned);
> if (file)
> - __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
> + __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
> else
> - __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
> + __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
Should perhaps be in another patch, but we could use
__mod_zone_page_state(zone, LRU_ACTIVE + file * LRU_FILE);
like in the call to move_active_pages_to_lru().
> spin_unlock_irq(&zone->lru_lock);
>
> - pgmoved = 0; /* count referenced (mapping) mapped pages */
> while (!list_empty(&l_hold)) {
> cond_resched();
> page = lru_to_page(&l_hold);
> @@ -1283,7 +1283,7 @@ static void shrink_active_list(unsigned
> /* page_referenced clears PageReferenced */
> if (page_mapping_inuse(page) &&
> page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
> - pgmoved++;
> + nr_rotated++;
> /*
> * Identify referenced, file-backed active pages and
> * give them one more trip around the active list. So
> @@ -1312,7 +1312,7 @@ static void shrink_active_list(unsigned
> * helps balance scan pressure between file and anonymous pages in
> * get_scan_ratio.
> */
> - reclaim_stat->recent_rotated[!!file] += pgmoved;
> + reclaim_stat->recent_rotated[!!file] += nr_rotated;
file is boolean.
There is one more double negation in isolate_pages_global() that can
be dropped as well. If you agree, I can submit all those changes in
separate patches.
Hannes
> Not a newly introduced problem, but this early break might under scan
> the list, if (max_scan > swap_cluster_max). Luckily the only two
> callers all call with (max_scan <= swap_cluster_max).
>
> What shall we do? The comprehensive solution may be to
> - remove the big do-while loop
> - replace sc->swap_cluster_max => max_scan
> - take care in the callers to not passing small max_scan values
>
> Or to simply make this function more robust like this?
Sorry, I haven't catch your point. Can you please tell me your worried
scenario?
> On Thu, Jul 16, 2009 at 09:52:34AM +0900, KOSAKI Motohiro wrote:
> > Subject: [PATCH] Rename pgmoved variable in shrink_active_list()
> >
> > Currently, pgmoved variable have two meanings. it cause harder reviewing a bit.
> > This patch separate it.
> >
> >
> > Signed-off-by: KOSAKI Motohiro <[email protected]>
>
> Reviewed-by: Johannes Weiner <[email protected]>
>
> Below are just minor suggestions regarding the changed code.
>
> > ---
> > mm/vmscan.c | 16 ++++++++--------
> > 1 file changed, 8 insertions(+), 8 deletions(-)
> >
> > Index: b/mm/vmscan.c
> > ===================================================================
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -1239,7 +1239,7 @@ static void move_active_pages_to_lru(str
> > static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
> > struct scan_control *sc, int priority, int file)
> > {
> > - unsigned long pgmoved;
> > + unsigned long nr_taken;
> > unsigned long pgscanned;
> > unsigned long vm_flags;
> > LIST_HEAD(l_hold); /* The pages which were snipped off */
> > @@ -1247,10 +1247,11 @@ static void shrink_active_list(unsigned
> > LIST_HEAD(l_inactive);
> > struct page *page;
> > struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
> > + unsigned long nr_rotated = 0;
> >
> > lru_add_drain();
> > spin_lock_irq(&zone->lru_lock);
> > - pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
> > + nr_taken = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
> > ISOLATE_ACTIVE, zone,
> > sc->mem_cgroup, 1, file);
> > /*
> > @@ -1260,16 +1261,15 @@ static void shrink_active_list(unsigned
> > if (scanning_global_lru(sc)) {
> > zone->pages_scanned += pgscanned;
> > }
> > - reclaim_stat->recent_scanned[!!file] += pgmoved;
> > + reclaim_stat->recent_scanned[!!file] += nr_taken;
>
> Hm, file is a boolean already, the double negation can probably be
> dropped.
>
> > __count_zone_vm_events(PGREFILL, zone, pgscanned);
> > if (file)
> > - __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
> > + __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
> > else
> > - __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
> > + __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
>
> Should perhaps be in another patch, but we could use
>
> __mod_zone_page_state(zone, LRU_ACTIVE + file * LRU_FILE);
>
> like in the call to move_active_pages_to_lru().
>
> > spin_unlock_irq(&zone->lru_lock);
> >
> > - pgmoved = 0; /* count referenced (mapping) mapped pages */
> > while (!list_empty(&l_hold)) {
> > cond_resched();
> > page = lru_to_page(&l_hold);
> > @@ -1283,7 +1283,7 @@ static void shrink_active_list(unsigned
> > /* page_referenced clears PageReferenced */
> > if (page_mapping_inuse(page) &&
> > page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
> > - pgmoved++;
> > + nr_rotated++;
> > /*
> > * Identify referenced, file-backed active pages and
> > * give them one more trip around the active list. So
> > @@ -1312,7 +1312,7 @@ static void shrink_active_list(unsigned
> > * helps balance scan pressure between file and anonymous pages in
> > * get_scan_ratio.
> > */
> > - reclaim_stat->recent_rotated[!!file] += pgmoved;
> > + reclaim_stat->recent_rotated[!!file] += nr_rotated;
>
> file is boolean.
>
> There is one more double negation in isolate_pages_global() that can
> be dropped as well. If you agree, I can submit all those changes in
> separate patches.
Yeah, thanks please.
> > @@ -740,6 +744,13 @@ int migrate_pages(struct list_head *from
> > struct page *page2;
> > int swapwrite = current->flags & PF_SWAPWRITE;
> > int rc;
> > + int flags;
> > +
> > + local_irq_save(flags);
> > + list_for_each_entry(page, from, lru)
> > + __inc_zone_page_state(page, NR_ISOLATED_ANON +
> > + !!page_is_file_cache(page));
> > + local_irq_restore(flags);
> >
>
> Why do a separate pass over all the migrates pages? Can you add the
> _inc_xx somewhere after the page was isolated from the lru by calling
> try_to_unmap()?
calling try_to_unmap()? the pages are isolated before calling migrate_pages().
migrate_pages() have multiple caller. then I put this __inc_xx into top of
migrate_pages().
On Fri, 17 Jul 2009, KOSAKI Motohiro wrote:
> > Why do a separate pass over all the migrates pages? Can you add the
> > _inc_xx somewhere after the page was isolated from the lru by calling
> > try_to_unmap()?
>
> calling try_to_unmap()? the pages are isolated before calling migrate_pages().
> migrate_pages() have multiple caller. then I put this __inc_xx into top of
> migrate_pages().
Then put the inc_xxx's where the pages are isolated.
> On Fri, 17 Jul 2009, KOSAKI Motohiro wrote:
>
> > > Why do a separate pass over all the migrates pages? Can you add the
> > > _inc_xx somewhere after the page was isolated from the lru by calling
> > > try_to_unmap()?
> >
> > calling try_to_unmap()? the pages are isolated before calling migrate_pages().
> > migrate_pages() have multiple caller. then I put this __inc_xx into top of
> > migrate_pages().
>
> Then put the inc_xxx's where the pages are isolated.
Is there any benefit? Why do we need sprinkle __inc_xx to many place?
On Mon, 20 Jul 2009, KOSAKI Motohiro wrote:
> > On Fri, 17 Jul 2009, KOSAKI Motohiro wrote:
> >
> > > > Why do a separate pass over all the migrates pages? Can you add the
> > > > _inc_xx somewhere after the page was isolated from the lru by calling
> > > > try_to_unmap()?
> > >
> > > calling try_to_unmap()? the pages are isolated before calling migrate_pages().
> > > migrate_pages() have multiple caller. then I put this __inc_xx into top of
> > > migrate_pages().
> >
> > Then put the inc_xxx's where the pages are isolated.
>
> Is there any benefit? Why do we need sprinkle __inc_xx to many place?
Its only needed in one place where the pages are isolated.
> On Mon, 20 Jul 2009, KOSAKI Motohiro wrote:
>
> > > On Fri, 17 Jul 2009, KOSAKI Motohiro wrote:
> > >
> > > > > Why do a separate pass over all the migrates pages? Can you add the
> > > > > _inc_xx somewhere after the page was isolated from the lru by calling
> > > > > try_to_unmap()?
> > > >
> > > > calling try_to_unmap()? the pages are isolated before calling migrate_pages().
> > > > migrate_pages() have multiple caller. then I put this __inc_xx into top of
> > > > migrate_pages().
> > >
> > > Then put the inc_xxx's where the pages are isolated.
> >
> > Is there any benefit? Why do we need sprinkle __inc_xx to many place?
>
> Its only needed in one place where the pages are isolated.
Umm.. I haven't understand this benefit. but I guess I can do that.
I'll think it later deeply.