2023-10-20 08:44:50

by Huan Yang

[permalink] [raw]
Subject: [PATCH 0/2] check MGLRU promoted without hold page lock

This patchset add a new reclaim_stat named nr_promote to observe
number folios which MGLRU promoted before shrink touch, and then
show in mm_vmscan_lru_shrink_inactive. Also, fix nr_scanned in MGLRU
trace into nr_taken. (patch1)

Base this trace, here are many folio promoted before shrink touch,
so, due to the high frequence, move this check before hold page lock
is better.(patch2)

Due to we will not actually touch this folio, nr_promote trace is
unnecessary, remove this trace.

This patchset based on link:
https://lore.kernel.org/all/[email protected]/

Which have many mistake pointed by Yu Zhao.(thanks)

Huan Yang (2):
tracing: mm: multigen-lru: fix mglru trace
mm: multi-gen LRU: move promoted folio out of lock

include/trace/events/vmscan.h | 3 ++-
mm/vmscan.c | 31 ++++++++++++++++++++++---------
2 files changed, 24 insertions(+), 10 deletions(-)

--
2.34.1


2023-10-20 08:44:57

by Huan Yang

[permalink] [raw]
Subject: [PATCH 2/2] mm: multi-gen LRU: move promoted folio out of lock

With nr_prmote trace, show that here are many folio
promoted before shrink check.

I just test by below cmd, and grep nr_reclaimed=0:
```
trace-cmd record -e vmscan:mm_vmscan_lru_shrink_inactive\
stressapptest -M 8096 -s 120 -m 1 -W
trace-cmd report | grep "nr_reclaim\=0"
```
Then find many show like below:
```
<...>-9042 [001] 43.290759: mm_vmscan_lru_shrink_inactive:
nid=0 nr_scanned=64 nr_reclaimed=0 nr_dirty=0 nr_writeback=0
nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=1
nr_ref_keep=0 nr_unmap_fail=0 nr_promote=63 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC
```

Many scanned folio is promoted ahead. So, this promoted check better
checked before trylock folio.

And, now that promoted alread checked before touch, no need to trace it
anymore, remove this trace.

Signed-off-by: Huan Yang <[email protected]>
---
include/linux/vmstat.h | 1 -
include/trace/events/vmscan.h | 6 ++----
mm/vmscan.c | 12 +++++-------
3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index a3710ce08434..fed855bae6d8 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -32,7 +32,6 @@ struct reclaim_stat {
unsigned nr_ref_keep;
unsigned nr_unmap_fail;
unsigned nr_lazyfree_fail;
- unsigned nr_promote;
};

enum writeback_stat_item {
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index ffcf288879e0..41964d6e8dd1 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -366,7 +366,6 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
__field(unsigned int, nr_activate1)
__field(unsigned long, nr_ref_keep)
__field(unsigned long, nr_unmap_fail)
- __field(unsigned long, nr_promote)
__field(int, priority)
__field(int, reclaim_flags)
),
@@ -383,20 +382,19 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
__entry->nr_activate1 = stat->nr_activate[1];
__entry->nr_ref_keep = stat->nr_ref_keep;
__entry->nr_unmap_fail = stat->nr_unmap_fail;
- __entry->nr_promote = stat->nr_promote;
__entry->priority = priority;
__entry->reclaim_flags = trace_reclaim_flags(file);
),

TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d"
- " nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld nr_promote=%ld priority=%d flags=%s",
+ " nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
__entry->nid,
__entry->nr_scanned, __entry->nr_reclaimed,
__entry->nr_dirty, __entry->nr_writeback,
__entry->nr_congested, __entry->nr_immediate,
__entry->nr_activate0, __entry->nr_activate1,
__entry->nr_ref_keep, __entry->nr_unmap_fail,
- __entry->nr_promote, __entry->priority,
+ __entry->priority,
show_reclaim_flags(__entry->reclaim_flags))
);

diff --git a/mm/vmscan.c b/mm/vmscan.c
index fb5df298c955..98a7b0f738bd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1045,6 +1045,11 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
folio = lru_to_folio(folio_list);
list_del(&folio->lru);

+ /* folio_update_gen() tried to promote this page? */
+ if (lru_gen_enabled() && !ignore_references &&
+ folio_mapped(folio) && folio_test_referenced(folio))
+ goto keep;
+
if (!folio_trylock(folio))
goto keep;

@@ -1061,13 +1066,6 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
if (!sc->may_unmap && folio_mapped(folio))
goto keep_locked;

- /* folio_update_gen() tried to promote this page? */
- if (lru_gen_enabled() && !ignore_references &&
- folio_mapped(folio) && folio_test_referenced(folio)) {
- stat->nr_promote += nr_pages;
- goto keep_locked;
- }
-
/*
* The number of dirty pages determines if a node is marked
* reclaim_congested. kswapd will stall and start writing
--
2.34.1

2023-10-20 08:45:02

by Huan Yang

[permalink] [raw]
Subject: [PATCH 1/2] tracing: mm: multigen-lru: fix mglru trace

This patch add reclaim stat:
nr_promote: nr_pages shrink before promote by folio_update_gen.

And then, use correct nr_scanned which evict_folios passed into
trace_mm_vmscan_lru_shrink_inactive.

Mistake info like this:
```
kswapd0-89 [000] 64.887613: mm_vmscan_lru_shrink_inactive:
nid=0 nr_scanned=64 nr_reclaimed=0 nr_dirty=0 nr_writeback=0
nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0
nr_ref_keep=0 nr_unmap_fail=0 priority=4
flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC
```
Correct info like this:
```
<...>-9041 [006] 43.738481: mm_vmscan_lru_shrink_inactive:
nid=0 nr_scanned=13 nr_reclaimed=0 nr_dirty=0 nr_writeback=0
nr_congested=0 nr_immediate=0 nr_activate_anon=9 nr_activate_file=0
nr_ref_keep=0 nr_unmap_fail=0 nr_promote=4 priority=12
flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC
```

Signed-off-by: Huan Yang <[email protected]>
---
include/linux/vmstat.h | 1 +
include/trace/events/vmscan.h | 7 +++++--
mm/vmscan.c | 25 ++++++++++++++++++++-----
3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index fed855bae6d8..a3710ce08434 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -32,6 +32,7 @@ struct reclaim_stat {
unsigned nr_ref_keep;
unsigned nr_unmap_fail;
unsigned nr_lazyfree_fail;
+ unsigned nr_promote;
};

enum writeback_stat_item {
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 1a488c30afa5..ffcf288879e0 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -366,6 +366,7 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
__field(unsigned int, nr_activate1)
__field(unsigned long, nr_ref_keep)
__field(unsigned long, nr_unmap_fail)
+ __field(unsigned long, nr_promote)
__field(int, priority)
__field(int, reclaim_flags)
),
@@ -382,18 +383,20 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
__entry->nr_activate1 = stat->nr_activate[1];
__entry->nr_ref_keep = stat->nr_ref_keep;
__entry->nr_unmap_fail = stat->nr_unmap_fail;
+ __entry->nr_promote = stat->nr_promote;
__entry->priority = priority;
__entry->reclaim_flags = trace_reclaim_flags(file);
),

- TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
+ TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d"
+ " nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld nr_promote=%ld priority=%d flags=%s",
__entry->nid,
__entry->nr_scanned, __entry->nr_reclaimed,
__entry->nr_dirty, __entry->nr_writeback,
__entry->nr_congested, __entry->nr_immediate,
__entry->nr_activate0, __entry->nr_activate1,
__entry->nr_ref_keep, __entry->nr_unmap_fail,
- __entry->priority,
+ __entry->nr_promote, __entry->priority,
show_reclaim_flags(__entry->reclaim_flags))
);

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2cc0cb41fb32..fb5df298c955 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1063,8 +1063,10 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,

/* folio_update_gen() tried to promote this page? */
if (lru_gen_enabled() && !ignore_references &&
- folio_mapped(folio) && folio_test_referenced(folio))
+ folio_mapped(folio) && folio_test_referenced(folio)) {
+ stat->nr_promote += nr_pages;
goto keep_locked;
+ }

/*
* The number of dirty pages determines if a node is marked
@@ -4495,6 +4497,8 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
int type;
int scanned;
int reclaimed;
+ unsigned long nr_taken = sc->nr_scanned;
+ unsigned int total_reclaimed = 0;
LIST_HEAD(list);
LIST_HEAD(clean);
struct folio *folio;
@@ -4521,10 +4525,7 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
return scanned;
retry:
reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
- sc->nr_reclaimed += reclaimed;
- trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
- scanned, reclaimed, &stat, sc->priority,
- type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
+ total_reclaimed += reclaimed;

list_for_each_entry_safe_reverse(folio, next, &list, lru) {
if (!folio_evictable(folio)) {
@@ -4582,6 +4583,20 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
goto retry;
}

+ /**
+ * MGLRU scan_folios return nr_scanned no only contains
+ * isolated folios. To get actually touched folios in
+ * shrink_folios_list, we can record last nr_scanned which
+ * sc saved, and sc nr_scanned will update for each folios
+ * which we touched. New count sub last can get right nr_taken
+ */
+ nr_taken = sc->nr_scanned - nr_taken;
+
+ sc->nr_reclaimed += total_reclaimed;
+ trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, nr_taken,
+ total_reclaimed, &stat,
+ sc->priority, type);
+
return scanned;
}

--
2.34.1