2024-01-04 10:06:19

by Bixuan Cui

[permalink] [raw]
Subject: [PATCH -next v5 0/2] Make memory reclamation measurable

When the system memory is low, kswapd reclaims the memory. The key steps
of memory reclamation include
1.shrink_lruvec
* shrink_active_list, moves folios from the active LRU to the inactive LRU
* shrink_inactive_list, shrink lru from inactive LRU list
2.shrink_slab
* shrinker->count_objects(), calculates the freeable memory
* shrinker->scan_objects(), reclaims the slab memory

The existing tracers in the vmscan are as follows:

--do_try_to_free_pages
--shrink_zones
--trace_mm_vmscan_node_reclaim_begin (tracer)
--shrink_node
--shrink_node_memcgs
--trace_mm_vmscan_memcg_shrink_begin (tracer)
--shrink_lruvec
--shrink_list
--shrink_active_list
--trace_mm_vmscan_lru_shrink_active (tracer)
--shrink_inactive_list
--trace_mm_vmscan_lru_shrink_inactive (tracer)
--shrink_active_list
--shrink_slab
--do_shrink_slab
--shrinker->count_objects()
--trace_mm_shrink_slab_start (tracer)
--shrinker->scan_objects()
--trace_mm_shrink_slab_end (tracer)
--trace_mm_vmscan_memcg_shrink_end (tracer)
--trace_mm_vmscan_node_reclaim_end (tracer)

If we get the duration and quantity of shrink lru and slab,
then we can measure the memory recycling, as follows

Measuring memory reclamation with bpf:
LRU FILE:
CPU COMM ShrinkActive(us) ShrinkInactive(us) Reclaim(page)
7 kswapd0 26 51 32
7 kswapd0 52 47 13
SLAB:
CPU COMM OBJ_NAME Count_Dur(us) Freeable(page) Scan_Dur(us) Reclaim(page)
1 kswapd0 super_cache_scan.cfi_jt 2 341 3225 128
7 kswapd0 super_cache_scan.cfi_jt 0 2247 8524 1024
7 kswapd0 super_cache_scan.cfi_jt 2367 0 0 0

For this, add the new tracer to shrink_active_list/shrink_inactive_list
and shrinker->count_objects().

Changes:
v5: * Use 'DECLARE_EVENT_CLASS(mm_vmscan_lru_shrink_start_template' to
replace 'RACE_EVENT(mm_vmscan_lru_shrink_inactive/active_start'
* Add the explanation for adding new shrink lru events into 'mm: vmscan: add new event to trace shrink lru'
v4: Add Reviewed-by and Changlog to every patch.
v3: Swap the positions of 'nid' and 'freeable' to prevent the hole in the trace event.
v2: Modify trace_mm_vmscan_lru_shrink_inactive() in evict_folios() at the same time to fix build error.

cuibixuan (2):
mm: shrinker: add new event to trace shrink count
mm: vmscan: add new event to trace shrink lru

include/trace/events/vmscan.h | 80 ++++++++++++++++++++++++++++++++++-
mm/shrinker.c | 4 ++
mm/vmscan.c | 11 +++--
3 files changed, 90 insertions(+), 5 deletions(-)

--
2.17.1



2024-01-04 10:06:30

by Bixuan Cui

[permalink] [raw]
Subject: [PATCH -next v5 1/2] mm: shrinker: add new event to trace shrink count

From: cuibixuan <[email protected]>

do_shrink_slab() calculates the freeable memory through shrinker->count_objects(),
and then reclaims the memory through shrinker->scan_objects(). When reclaiming
memory, shrinker->count_objects() takes a certain amount of time:

Fun spend(us)
ext4_es_count 4302
ext4_es_scan 12
super_cache_count 4195
super_cache_scan 2103

Therefore, adding the trace event to count_objects() can more accurately
obtain the time taken for slab memory recycling.

Example of output:
kswapd0-103 [003] ..... 1098.317942: mm_shrink_count_start: kfree_rcu_shrink_count.cfi_jt+0x0/0x8 00000000c540ff51: nid: 0
kswapd0-103 [003] ..... 1098.317951: mm_shrink_count_end: kfree_rcu_shrink_count.cfi_jt+0x0/0x8 00000000c540ff51: nid: 0 freeable:36

Signed-off-by: Bixuan Cui <[email protected]>
Reviewed-by: Steven Rostedt <[email protected]>
---
Changes:
v5: * Use 'DECLARE_EVENT_CLASS(mm_vmscan_lru_shrink_start_template' to
replace 'RACE_EVENT(mm_vmscan_lru_shrink_inactive/active_start'
* Add the explanation for adding new shrink lru events into 'mm: vmscan: add new event to trace shrink lru'
v4: * Add Reviewed-by and Changlog to every patch.
v3: * Swap the positions of 'nid' and 'freeable' to prevent the hole in the trace event.
v2: * Modify trace_mm_vmscan_lru_shrink_inactive() in evict_folios() at the same time to fix build error (Andrew pointed out).

include/trace/events/vmscan.h | 49 +++++++++++++++++++++++++++++++++++
mm/shrinker.c | 4 +++
2 files changed, 53 insertions(+)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 1a488c30afa5..b99cd28c9815 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -196,6 +196,55 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_re
);
#endif /* CONFIG_MEMCG */

+TRACE_EVENT(mm_shrink_count_start,
+ TP_PROTO(struct shrinker *shr, struct shrink_control *sc),
+
+ TP_ARGS(shr, sc),
+
+ TP_STRUCT__entry(
+ __field(struct shrinker *, shr)
+ __field(void *, shrink)
+ __field(int, nid)
+ ),
+
+ TP_fast_assign(
+ __entry->shr = shr;
+ __entry->shrink = shr->count_objects;
+ __entry->nid = sc->nid;
+ ),
+
+ TP_printk("%pS %p: nid: %d",
+ __entry->shrink,
+ __entry->shr,
+ __entry->nid)
+);
+
+TRACE_EVENT(mm_shrink_count_end,
+ TP_PROTO(struct shrinker *shr, struct shrink_control *sc, long freeable),
+
+ TP_ARGS(shr, sc, freeable),
+
+ TP_STRUCT__entry(
+ __field(struct shrinker *, shr)
+ __field(void *, shrink)
+ __field(long, freeable)
+ __field(int, nid)
+ ),
+
+ TP_fast_assign(
+ __entry->shr = shr;
+ __entry->shrink = shr->count_objects;
+ __entry->freeable = freeable;
+ __entry->nid = sc->nid;
+ ),
+
+ TP_printk("%pS %p: nid: %d freeable:%ld",
+ __entry->shrink,
+ __entry->shr,
+ __entry->nid,
+ __entry->freeable)
+);
+
TRACE_EVENT(mm_shrink_slab_start,
TP_PROTO(struct shrinker *shr, struct shrink_control *sc,
long nr_objects_to_shrink, unsigned long cache_items,
diff --git a/mm/shrinker.c b/mm/shrinker.c
index dd91eab43ed3..d0c7bf61db61 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -379,7 +379,11 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
: SHRINK_BATCH;
long scanned = 0, next_deferred;

+ trace_mm_shrink_count_start(shrinker, shrinkctl);
+
freeable = shrinker->count_objects(shrinker, shrinkctl);
+
+ trace_mm_shrink_count_end(shrinker, shrinkctl, freeable);
if (freeable == 0 || freeable == SHRINK_EMPTY)
return freeable;

--
2.17.1


2024-01-04 10:07:09

by Bixuan Cui

[permalink] [raw]
Subject: [PATCH -next v5 2/2] mm: vmscan: add new event to trace shrink lru

From: cuibixuan <[email protected]>

Page reclaim is an important part of memory reclaim, including:
* shrink_active_list(), moves folios from the active LRU to the inactive LRU
* shrink_inactive_list(), shrink lru from inactive LRU list

Add the new events to calculate the execution time to better evaluate
the entire memory recycling ratio.

Example of output:
kswapd0-103 [007] ..... 1098.353020: mm_vmscan_lru_shrink_active_start: nid=0
kswapd0-103 [007] ..... 1098.353040: mm_vmscan_lru_shrink_active_end: nid=0 nr_taken=32 nr_active=0 nr_deactivated=32 nr_referenced=0 priority=6 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC
kswapd0-103 [007] ..... 1098.353040: mm_vmscan_lru_shrink_inactive_start: nid=0
kswapd0-103 [007] ..... 1098.353094: mm_vmscan_lru_shrink_inactive_end: nid=0 nr_scanned=32 nr_reclaimed=0 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=32 nr_unmap_fail=0 priority=6 flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC
kswapd0-103 [007] ..... 1098.353094: mm_vmscan_lru_shrink_inactive_start: nid=0
kswapd0-103 [007] ..... 1098.353162: mm_vmscan_lru_shrink_inactive_end: nid=0 nr_scanned=32 nr_reclaimed=21 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=11 nr_unmap_fail=0 priority=6 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC

Signed-off-by: Bixuan Cui <[email protected]>
Reviewed-by: Andrew Morton <[email protected]>
---
Changes:
v5: * Use 'DECLARE_EVENT_CLASS(mm_vmscan_lru_shrink_start_template' to
replace 'RACE_EVENT(mm_vmscan_lru_shrink_inactive/active_start'
* Add the explanation for adding new shrink lru events into 'mm: vmscan: add new event to trace shrink lru'
v4: * Add Reviewed-by and Changlog to every patch.
v3: * Swap the positions of 'nid' and 'freeable' to prevent the hole in the trace event.
v2: * Modify trace_mm_vmscan_lru_shrink_inactive() in evict_folios() at the same time to fix build error (Andrew pointed out).

include/trace/events/vmscan.h | 31 +++++++++++++++++++++++++++++--
mm/vmscan.c | 11 ++++++++---
2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index b99cd28c9815..4793d952c248 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -395,7 +395,34 @@ TRACE_EVENT(mm_vmscan_write_folio,
show_reclaim_flags(__entry->reclaim_flags))
);

-TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
+DECLARE_EVENT_CLASS(mm_vmscan_lru_shrink_start_template,
+
+ TP_PROTO(int nid),
+
+ TP_ARGS(nid),
+
+ TP_STRUCT__entry(
+ __field(int, nid)
+ ),
+
+ TP_fast_assign(
+ __entry->nid = nid;
+ ),
+
+ TP_printk("nid=%d", __entry->nid)
+);
+
+DEFINE_EVENT(mm_vmscan_lru_shrink_start_template, mm_vmscan_lru_shrink_inactive_start,
+ TP_PROTO(int nid),
+ TP_ARGS(nid)
+);
+
+DEFINE_EVENT(mm_vmscan_lru_shrink_start_template, mm_vmscan_lru_shrink_active_start,
+ TP_PROTO(int nid),
+ TP_ARGS(nid)
+);
+
+TRACE_EVENT(mm_vmscan_lru_shrink_inactive_end,

TP_PROTO(int nid,
unsigned long nr_scanned, unsigned long nr_reclaimed,
@@ -446,7 +473,7 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
show_reclaim_flags(__entry->reclaim_flags))
);

-TRACE_EVENT(mm_vmscan_lru_shrink_active,
+TRACE_EVENT(mm_vmscan_lru_shrink_active_end,

TP_PROTO(int nid, unsigned long nr_taken,
unsigned long nr_active, unsigned long nr_deactivated,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4e3b835c6b4a..a44d9624d60f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1906,6 +1906,8 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
bool stalled = false;

+ trace_mm_vmscan_lru_shrink_inactive_start(pgdat->node_id);
+
while (unlikely(too_many_isolated(pgdat, file, sc))) {
if (stalled)
return 0;
@@ -1990,7 +1992,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
if (file)
sc->nr.file_taken += nr_taken;

- trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
+ trace_mm_vmscan_lru_shrink_inactive_end(pgdat->node_id,
nr_scanned, nr_reclaimed, &stat, sc->priority, file);
return nr_reclaimed;
}
@@ -2028,6 +2030,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
int file = is_file_lru(lru);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);

+ trace_mm_vmscan_lru_shrink_active_start(pgdat->node_id);
+
lru_add_drain();

spin_lock_irq(&lruvec->lru_lock);
@@ -2107,7 +2111,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
lru_note_cost(lruvec, file, 0, nr_rotated);
mem_cgroup_uncharge_list(&l_active);
free_unref_page_list(&l_active);
- trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate,
+ trace_mm_vmscan_lru_shrink_active_end(pgdat->node_id, nr_taken, nr_activate,
nr_deactivate, nr_rotated, sc->priority, file);
}

@@ -4524,9 +4528,10 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
if (list_empty(&list))
return scanned;
retry:
+ trace_mm_vmscan_lru_shrink_inactive_start(pgdat->node_id);
reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
sc->nr_reclaimed += reclaimed;
- trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
+ trace_mm_vmscan_lru_shrink_inactive_end(pgdat->node_id,
scanned, reclaimed, &stat, sc->priority,
type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);

--
2.17.1