2023-01-18 00:12:23

by Minchan Kim

[permalink] [raw]
Subject: [PATCH 1/3] mm: return the number of pages successfully paged out

The reclaim_pages MADV_PAGEOUT uses needs to return the number of
pages paged-out successfully, not only the number of reclaimed pages
in the operation because those pages paged-out successfully will be
reclaimed easily at the memory pressure due to asynchronous writeback
rotation(i.e., PG_reclaim with folio_rotate_reclaimable).

This patch renames the reclaim_pages with paging_out(with hope that
it's clear from operation point of view) and then adds a additional
stat in reclaim_stat to represent the number of paged-out but kept
in the memory for rotation on writeback completion.

With that stat, madvise_pageout can know how many pages were paged-out
successfully as well as reclaimed. The return value will be used for
statistics in next patch.

Signed-off-by: Minchan Kim <[email protected]>
---
include/linux/swap.h | 2 +-
include/linux/vmstat.h | 1 +
mm/damon/paddr.c | 2 +-
mm/madvise.c | 4 ++--
mm/vmscan.c | 31 ++++++++++++++++++++++---------
5 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a18cf4b7c724..0ada46b595cd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -435,7 +435,7 @@ extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
long remove_mapping(struct address_space *mapping, struct folio *folio);

-extern unsigned long reclaim_pages(struct list_head *page_list);
+extern unsigned int paging_out(struct list_head *page_list);
#ifdef CONFIG_NUMA
extern int node_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 19cf5b6892ce..cda903a8fa6e 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -28,6 +28,7 @@ struct reclaim_stat {
unsigned nr_writeback;
unsigned nr_immediate;
unsigned nr_pageout;
+ unsigned nr_pageout_keep;
unsigned nr_activate[ANON_AND_FILE];
unsigned nr_ref_keep;
unsigned nr_unmap_fail;
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index e1a4315c4be6..be2a731d3459 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -226,7 +226,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r)
put_page(page);
}
}
- applied = reclaim_pages(&page_list);
+ applied = paging_out(&page_list);
cond_resched();
return applied * PAGE_SIZE;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index c7105ec6d08c..a4a03054ab6b 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -400,7 +400,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
huge_unlock:
spin_unlock(ptl);
if (pageout)
- reclaim_pages(&page_list);
+ paging_out(&page_list);
return 0;
}

@@ -491,7 +491,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(orig_pte, ptl);
if (pageout)
- reclaim_pages(&page_list);
+ paging_out(&page_list);
cond_resched();

return 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 04d8b88e5216..579a7ebbe24a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1933,6 +1933,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
goto activate_locked;
case PAGE_SUCCESS:
stat->nr_pageout += nr_pages;
+ stat->nr_pageout_keep += nr_pages;

if (folio_test_writeback(folio))
goto keep;
@@ -1948,6 +1949,8 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
if (folio_test_dirty(folio) ||
folio_test_writeback(folio))
goto keep_locked;
+
+ stat->nr_pageout_keep -= nr_pages;
mapping = folio_mapping(folio);
fallthrough;
case PAGE_CLEAN:
@@ -2646,9 +2649,9 @@ static void shrink_active_list(unsigned long nr_to_scan,
}

static unsigned int reclaim_folio_list(struct list_head *folio_list,
- struct pglist_data *pgdat)
+ struct pglist_data *pgdat,
+ struct reclaim_stat *stat)
{
- struct reclaim_stat dummy_stat;
unsigned int nr_reclaimed;
struct folio *folio;
struct scan_control sc = {
@@ -2659,7 +2662,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
.no_demotion = 1,
};

- nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
+ nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, stat, false);
while (!list_empty(folio_list)) {
folio = lru_to_folio(folio_list);
list_del(&folio->lru);
@@ -2669,15 +2672,23 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
return nr_reclaimed;
}

-unsigned long reclaim_pages(struct list_head *folio_list)
+/*
+ * paging_out - reclaim clean pages and write dirty pages into storage
+ * @folio_list: pages for paging out
+ *
+ * paging_out() writes dirty pages to backing storage and/or reclaim
+ * clean pages from memory. Returns the number of written/reclaimed pages.
+ */
+unsigned int paging_out(struct list_head *folio_list)
{
int nid;
- unsigned int nr_reclaimed = 0;
+ unsigned int nr_pageout = 0;
LIST_HEAD(node_folio_list);
unsigned int noreclaim_flag;
+ struct reclaim_stat stat;

if (list_empty(folio_list))
- return nr_reclaimed;
+ return nr_pageout;

noreclaim_flag = memalloc_noreclaim_save();

@@ -2691,15 +2702,17 @@ unsigned long reclaim_pages(struct list_head *folio_list)
continue;
}

- nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid));
+ nr_pageout += reclaim_folio_list(&node_folio_list, NODE_DATA(nid), &stat);
+ nr_pageout += stat.nr_pageout_keep;
nid = folio_nid(lru_to_folio(folio_list));
} while (!list_empty(folio_list));

- nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid));
+ nr_pageout += reclaim_folio_list(&node_folio_list, NODE_DATA(nid), &stat);
+ nr_pageout += stat.nr_pageout_keep;

memalloc_noreclaim_restore(noreclaim_flag);

- return nr_reclaimed;
+ return nr_pageout;
}

static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
--
2.39.0.314.g84b9a713c41-goog


2023-01-18 00:13:30

by Minchan Kim

[permalink] [raw]
Subject: [PATCH 3/3] mm: add vmstat statistics for madvise_[cold|pageout]

madvise LRU manipulation APIs need to scan address ranges to find
present pages at page table and provides advice hints for them.

Likewise pg[scan/steal] count on vmstat, madvise_pg[scanned/hinted]
shows the proactive reclaim efficiency so this patch addes those
two statistics in vmstat.

Signed-off-by: Minchan Kim <[email protected]>
---
include/linux/vm_event_item.h | 2 ++
mm/madvise.c | 19 +++++++++++++++----
mm/vmstat.c | 2 ++
3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 3518dba1e02f..8b9fb2e151eb 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -49,6 +49,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGSCAN_FILE,
PGSTEAL_ANON,
PGSTEAL_FILE,
+ MADVISE_PGSCANNED,
+ MADVISE_PGHINTED,
#ifdef CONFIG_NUMA
PGSCAN_ZONE_RECLAIM_FAILED,
#endif
diff --git a/mm/madvise.c b/mm/madvise.c
index a4a03054ab6b..0e58545ff6e9 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -334,6 +334,8 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
spinlock_t *ptl;
struct page *page = NULL;
LIST_HEAD(page_list);
+ unsigned int nr_scanned = 0;
+ unsigned int nr_hinted = 0;

if (fatal_signal_pending(current))
return -EINTR;
@@ -343,6 +345,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
pmd_t orig_pmd;
unsigned long next = pmd_addr_end(addr, end);

+ nr_scanned += HPAGE_PMD_NR
tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
ptl = pmd_trans_huge_lock(pmd, vma);
if (!ptl)
@@ -396,11 +399,15 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
list_add(&page->lru, &page_list);
}
} else
- deactivate_page(page);
+ if (deactivate_page(page))
+ nr_hinted += HPAGE_PMD_NR;
huge_unlock:
spin_unlock(ptl);
if (pageout)
- paging_out(&page_list);
+ nr_hinted += paging_out(&page_list);
+
+ count_vm_events(MADVISE_PGSCANNED, nr_scanned);
+ count_vm_events(MADVISE_PGHINTED, nr_hinted);
return 0;
}

@@ -414,6 +421,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
arch_enter_lazy_mmu_mode();
for (; addr < end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
+ nr_scanned++;

if (pte_none(ptent))
continue;
@@ -485,14 +493,17 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
list_add(&page->lru, &page_list);
}
} else
- deactivate_page(page);
+ if (deactivate_page(page))
+ nr_hinted++;
}

arch_leave_lazy_mmu_mode();
pte_unmap_unlock(orig_pte, ptl);
if (pageout)
- paging_out(&page_list);
+ nr_hinted += paging_out(&page_list);
cond_resched();
+ count_vm_events(MADVISE_PGSCANNED, nr_scanned);
+ count_vm_events(MADVISE_PGHINTED, nr_hinted);

return 0;
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b2371d745e00..0139feade854 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1280,6 +1280,8 @@ const char * const vmstat_text[] = {
"pgscan_file",
"pgsteal_anon",
"pgsteal_file",
+ "madvise_pgscanned",
+ "madvise_pghinted",

#ifdef CONFIG_NUMA
"zone_reclaim_failed",
--
2.39.0.314.g84b9a713c41-goog

2023-01-18 00:51:36

by Minchan Kim

[permalink] [raw]
Subject: [PATCH 2/3] mm: return boolean for deactivate_page

Returns true if the page was successfully deactivated. The return value will
be used for statistics in next patch.

Signed-off-by: Minchan Kim <[email protected]>
---
include/linux/swap.h | 2 +-
mm/swap.c | 6 ++++--
2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0ada46b595cd..803e5fa4cd86 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -409,7 +409,7 @@ extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
extern void lru_add_drain_all(void);
-extern void deactivate_page(struct page *page);
+extern bool deactivate_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
extern void swap_setup(void);

diff --git a/mm/swap.c b/mm/swap.c
index 955930f41d20..52532859c05b 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -725,9 +725,9 @@ void deactivate_file_folio(struct folio *folio)
*
* deactivate_page() moves @page to the inactive list if @page was on the active
* list and was not an unevictable page. This is done to accelerate the reclaim
- * of @page.
+ * of @page. If page was deactivated successfully, returns true.
*/
-void deactivate_page(struct page *page)
+bool deactivate_page(struct page *page)
{
struct folio *folio = page_folio(page);

@@ -740,7 +740,9 @@ void deactivate_page(struct page *page)
fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate);
folio_batch_add_and_move(fbatch, folio, lru_deactivate_fn);
local_unlock(&cpu_fbatches.lock);
+ return true;
}
+ return false;
}

/**
--
2.39.0.314.g84b9a713c41-goog

2023-01-18 00:54:44

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Tue, Jan 17, 2023 at 03:53:12PM -0800, Andrew Morton wrote:
>
> I'm all hung up on the naming of everything.
>
> > mm: return the number of pages successfully paged out
>
> This is a vague title - MM is a big place. Perhaps "mm/vmscan: ..."
>
> On Tue, 17 Jan 2023 15:16:30 -0800 Minchan Kim <[email protected]> wrote:
>
> > The reclaim_pages MADV_PAGEOUT uses needs to return the number of
> > pages paged-out successfully, not only the number of reclaimed pages
> > in the operation because those pages paged-out successfully will be
> > reclaimed easily at the memory pressure due to asynchronous writeback
> > rotation(i.e., PG_reclaim with folio_rotate_reclaimable).
>
> So... what does "paged out" actually mean? "writeback to backing
> store was initiated"? From an application's point of view it means "no
> longer in page tables needs a fault to get it back", no?

Yes, both are correct in my view since pageout is initiated after
unmapping the page from page table and think that's better wording
to be in description. Let me use the explanation in the description
at next spin. Thanks.

>
> > This patch renames the reclaim_pages with paging_out(with hope that
>
> "page_out" or "pageout" would be better than "paging_out".

pageout was taken from vmscan.c. Then I will use the page_out unless
some suggests better naming.

>
> > it's clear from operation point of view) and then adds a additional
> > stat in reclaim_stat to represent the number of paged-out but kept
> > in the memory for rotation on writeback completion.
>
> So it's the number of pages against which we have initiated writeback.
> Why not call it "nr_writeback" or similar?

Currently, nr_writeback is used to indicate how many times shrink_folio_list
encoutered PG_writeback in the page list.

TLDR: I need to distinguish syncronous writeback and asynchronous
writeback.

Actually, I wanted to use nr_pageout but it would make double counting
from madvise_pageout PoV depending on backing storage's speed.
(For example, madvise_pageout tried 32 pages and 12 pages were swapped
out very quickly so those 12 pages were reclaimed under shrink_folio_list
context so it returns 12. But the other 20 pages were swapped out slowly
due to the device was congested so they were rotated back when the write
was done. In the case, madvise_pageout want to get 32 as successful paging
out's result rather than only 12 pages)

Maybe, nr_pageout_async?

>
> > With that stat, madvise_pageout can know how many pages were paged-out
> > successfully as well as reclaimed. The return value will be used for
> > statistics in next patch.
> >
> > ...
> >
> > -unsigned long reclaim_pages(struct list_head *folio_list)
> > +/*
> > + * paging_out - reclaim clean pages and write dirty pages into storage
> > + * @folio_list: pages for paging out
> > + *
> > + * paging_out() writes dirty pages to backing storage and/or reclaim
> > + * clean pages from memory. Returns the number of written/reclaimed pages.
>
> s/reclaim/reclaims/
>
> "and/or" it vague - just "or", I think.

Since the page list could have immediate reclaimable pages(A) when backing storage
is enough fast and just written pages(B) when the backing storage is slowed
down, even A + B if the device is congested in the middle of doing
operation, I think "and/or" is right.

>
> "written/reclaimed" is vague. "number of reclaimed pages plus the
> number of pages against which writeback was initiated" is precise.

Sure, let me correct it.

Thanks, Andrew.

2023-01-18 01:06:38

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out


I'm all hung up on the naming of everything.

> mm: return the number of pages successfully paged out

This is a vague title - MM is a big place. Perhaps "mm/vmscan: ..."

On Tue, 17 Jan 2023 15:16:30 -0800 Minchan Kim <[email protected]> wrote:

> The reclaim_pages MADV_PAGEOUT uses needs to return the number of
> pages paged-out successfully, not only the number of reclaimed pages
> in the operation because those pages paged-out successfully will be
> reclaimed easily at the memory pressure due to asynchronous writeback
> rotation(i.e., PG_reclaim with folio_rotate_reclaimable).

So... what does "paged out" actually mean? "writeback to backing
store was initiated"? From an application's point of view it means "no
longer in page tables needs a fault to get it back", no?

> This patch renames the reclaim_pages with paging_out(with hope that

"page_out" or "pageout" would be better than "paging_out".

> it's clear from operation point of view) and then adds a additional
> stat in reclaim_stat to represent the number of paged-out but kept
> in the memory for rotation on writeback completion.

So it's the number of pages against which we have initiated writeback.
Why not call it "nr_writeback" or similar?

> With that stat, madvise_pageout can know how many pages were paged-out
> successfully as well as reclaimed. The return value will be used for
> statistics in next patch.
>
> ...
>
> -unsigned long reclaim_pages(struct list_head *folio_list)
> +/*
> + * paging_out - reclaim clean pages and write dirty pages into storage
> + * @folio_list: pages for paging out
> + *
> + * paging_out() writes dirty pages to backing storage and/or reclaim
> + * clean pages from memory. Returns the number of written/reclaimed pages.

s/reclaim/reclaims/

"and/or" it vague - just "or", I think.

"written/reclaimed" is vague. "number of reclaimed pages plus the
number of pages against which writeback was initiated" is precise.


2023-01-18 01:24:57

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Tue, Jan 17, 2023 at 04:35:00PM -0800, Minchan Kim wrote:
> Yes, both are correct in my view since pageout is initiated after
> unmapping the page from page table and think that's better wording
> to be in description. Let me use the explanation in the description
> at next spin. Thanks.

For the next spin, you'll want to do it against mm-unstable as
deactivate_page() is now folio_deactivate().

2023-01-18 02:52:37

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Wed, Jan 18, 2023 at 12:58:09AM +0000, Matthew Wilcox wrote:
> On Tue, Jan 17, 2023 at 04:35:00PM -0800, Minchan Kim wrote:
> > Yes, both are correct in my view since pageout is initiated after
> > unmapping the page from page table and think that's better wording
> > to be in description. Let me use the explanation in the description
> > at next spin. Thanks.
>
> For the next spin, you'll want to do it against mm-unstable as
> deactivate_page() is now folio_deactivate().

I was curious what branch I need to use baseline for creating a patch
since I saw multiple branches recent mm/

Thanks for the hint. Sure, will do.

2023-01-18 10:47:35

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Tue 17-01-23 15:16:30, Minchan Kim wrote:
> The reclaim_pages MADV_PAGEOUT uses needs to return the number of
> pages paged-out successfully, not only the number of reclaimed pages
> in the operation because those pages paged-out successfully will be
> reclaimed easily at the memory pressure due to asynchronous writeback
> rotation(i.e., PG_reclaim with folio_rotate_reclaimable).
>
> This patch renames the reclaim_pages with paging_out(with hope that
> it's clear from operation point of view) and then adds a additional
> stat in reclaim_stat to represent the number of paged-out but kept
> in the memory for rotation on writeback completion.
>
> With that stat, madvise_pageout can know how many pages were paged-out
> successfully as well as reclaimed. The return value will be used for
> statistics in next patch.

I really fail to see the reson for the rename and paging_out doesn't
even make much sense as a name TBH.
--
Michal Hocko
SUSE Labs

2023-01-18 11:11:56

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH 3/3] mm: add vmstat statistics for madvise_[cold|pageout]

On Tue 17-01-23 15:16:32, Minchan Kim wrote:
> madvise LRU manipulation APIs need to scan address ranges to find
> present pages at page table and provides advice hints for them.
>
> Likewise pg[scan/steal] count on vmstat, madvise_pg[scanned/hinted]
> shows the proactive reclaim efficiency so this patch addes those
> two statistics in vmstat.

Please describe the usecase for those new counters.
--
Michal Hocko
SUSE Labs

2023-01-18 18:03:11

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH 3/3] mm: add vmstat statistics for madvise_[cold|pageout]

On Wed, Jan 18, 2023 at 06:27:02PM +0100, Michal Hocko wrote:
> On Wed 18-01-23 09:15:34, Minchan Kim wrote:
> > On Wed, Jan 18, 2023 at 10:11:46AM +0100, Michal Hocko wrote:
> > > On Tue 17-01-23 15:16:32, Minchan Kim wrote:
> > > > madvise LRU manipulation APIs need to scan address ranges to find
> > > > present pages at page table and provides advice hints for them.
> > > >
> > > > Likewise pg[scan/steal] count on vmstat, madvise_pg[scanned/hinted]
> > > > shows the proactive reclaim efficiency so this patch addes those
> > > > two statistics in vmstat.
> > >
> > > Please describe the usecase for those new counters.
> >
> > I wanted to know the proactive reclaim efficieny using MADV_COLD/MDDV_PAGEOUT.
> > Userspace has several policy which when/which vmas need to be hinted by the call
> > and they are evolving. I needed to know how effectively their policy works since
> > the vma ranges are huge(i.e., nr_hinted/nr_scanned).
>
> I can see how that can be an interesting information but is there
> anything actionable about that beyond debugging purposes? In other words
> isn't this something that could be done by tracing instead?

That's the statictis for telemetry. With those stat, we are collecting
various vmstat fields(i.e., pgsteal/pgscan) from real field devices
and thought those two stats would be good fit along with other reclaim
statistics in vmstat since we can know how much proactive madvise policy
could make system healthier(e.g., less kswapd scan, less allocstall
and so on).

>
> Also how are you going to identify specific madvise calls when they can
> interleave arbitrarily?

I guess you are talking about how we could separate MADV_PAGEOUT and
MADV_COLD from vmstat. That's valid question. I thought for the start,
adds just umbrella stat like this and if we want to break down, we need
to introudce sysfs likewise slab.

2023-01-18 18:16:20

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Wed 18-01-23 09:09:36, Minchan Kim wrote:
> On Wed, Jan 18, 2023 at 10:10:44AM +0100, Michal Hocko wrote:
> > On Tue 17-01-23 15:16:30, Minchan Kim wrote:
> > > The reclaim_pages MADV_PAGEOUT uses needs to return the number of
> > > pages paged-out successfully, not only the number of reclaimed pages
> > > in the operation because those pages paged-out successfully will be
> > > reclaimed easily at the memory pressure due to asynchronous writeback
> > > rotation(i.e., PG_reclaim with folio_rotate_reclaimable).
> > >
> > > This patch renames the reclaim_pages with paging_out(with hope that
> > > it's clear from operation point of view) and then adds a additional
> > > stat in reclaim_stat to represent the number of paged-out but kept
> > > in the memory for rotation on writeback completion.
> > >
> > > With that stat, madvise_pageout can know how many pages were paged-out
> > > successfully as well as reclaimed. The return value will be used for
> > > statistics in next patch.
> >
> > I really fail to see the reson for the rename and paging_out doesn't
> > even make much sense as a name TBH.
>
> Currently, what we are doing to reclaim memory is
>
> reclaim_folio_list
> shrink_folio_list
> if (folio_mapped(folio))
> try_to_unmap(folio)
>
> if (folio_test_dirty(folio))
> pageout
>
> Based on the structure, pageout is just one of way to reclaim memory.
>
> With MADV_PAGEOUT, what user want to know how many pages
> were paged out as they requested(from userspace PoV, how many times
> pages fault happens in future accesses), not the number of reclaimed
> pages shrink_folio_list returns currently.
>
> In the sense, I wanted to distinguish between reclaim and pageout.

But MADV_PAGEOUT is documented to trigger memory reclaim in general
not a pageout. Let me quote from the man page
: Reclaim a given range of pages. This is done to free up memory occupied
: by these pages.

Sure anonymous pages can be paged out to the swap storage but with the
upcomming multi-tiering it can be also "paged out" to a lower tier. All
that leads to freeing up memory that is currently mapped by that address
range.

Anyway, what do you actually meen by distinguishing between reclaim and
pageout. Aren't those just two names for the same thing?
--
Michal Hocko
SUSE Labs

2023-01-18 18:20:23

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Wed, Jan 18, 2023 at 10:10:44AM +0100, Michal Hocko wrote:
> On Tue 17-01-23 15:16:30, Minchan Kim wrote:
> > The reclaim_pages MADV_PAGEOUT uses needs to return the number of
> > pages paged-out successfully, not only the number of reclaimed pages
> > in the operation because those pages paged-out successfully will be
> > reclaimed easily at the memory pressure due to asynchronous writeback
> > rotation(i.e., PG_reclaim with folio_rotate_reclaimable).
> >
> > This patch renames the reclaim_pages with paging_out(with hope that
> > it's clear from operation point of view) and then adds a additional
> > stat in reclaim_stat to represent the number of paged-out but kept
> > in the memory for rotation on writeback completion.
> >
> > With that stat, madvise_pageout can know how many pages were paged-out
> > successfully as well as reclaimed. The return value will be used for
> > statistics in next patch.
>
> I really fail to see the reson for the rename and paging_out doesn't
> even make much sense as a name TBH.

Currently, what we are doing to reclaim memory is

reclaim_folio_list
shrink_folio_list
if (folio_mapped(folio))
try_to_unmap(folio)

if (folio_test_dirty(folio))
pageout

Based on the structure, pageout is just one of way to reclaim memory.

With MADV_PAGEOUT, what user want to know how many pages
were paged out as they requested(from userspace PoV, how many times
pages fault happens in future accesses), not the number of reclaimed
pages shrink_folio_list returns currently.

In the sense, I wanted to distinguish between reclaim and pageout.

2023-01-18 18:37:00

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH 3/3] mm: add vmstat statistics for madvise_[cold|pageout]

On Wed 18-01-23 09:15:34, Minchan Kim wrote:
> On Wed, Jan 18, 2023 at 10:11:46AM +0100, Michal Hocko wrote:
> > On Tue 17-01-23 15:16:32, Minchan Kim wrote:
> > > madvise LRU manipulation APIs need to scan address ranges to find
> > > present pages at page table and provides advice hints for them.
> > >
> > > Likewise pg[scan/steal] count on vmstat, madvise_pg[scanned/hinted]
> > > shows the proactive reclaim efficiency so this patch addes those
> > > two statistics in vmstat.
> >
> > Please describe the usecase for those new counters.
>
> I wanted to know the proactive reclaim efficieny using MADV_COLD/MDDV_PAGEOUT.
> Userspace has several policy which when/which vmas need to be hinted by the call
> and they are evolving. I needed to know how effectively their policy works since
> the vma ranges are huge(i.e., nr_hinted/nr_scanned).

I can see how that can be an interesting information but is there
anything actionable about that beyond debugging purposes? In other words
isn't this something that could be done by tracing instead?

Also how are you going to identify specific madvise calls when they can
interleave arbitrarily?

--
Michal Hocko
SUSE Labs

2023-01-18 18:46:44

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Wed, Jan 18, 2023 at 06:35:32PM +0100, Michal Hocko wrote:
> On Wed 18-01-23 09:09:36, Minchan Kim wrote:
> > On Wed, Jan 18, 2023 at 10:10:44AM +0100, Michal Hocko wrote:
> > > On Tue 17-01-23 15:16:30, Minchan Kim wrote:
> > > > The reclaim_pages MADV_PAGEOUT uses needs to return the number of
> > > > pages paged-out successfully, not only the number of reclaimed pages
> > > > in the operation because those pages paged-out successfully will be
> > > > reclaimed easily at the memory pressure due to asynchronous writeback
> > > > rotation(i.e., PG_reclaim with folio_rotate_reclaimable).
> > > >
> > > > This patch renames the reclaim_pages with paging_out(with hope that
> > > > it's clear from operation point of view) and then adds a additional
> > > > stat in reclaim_stat to represent the number of paged-out but kept
> > > > in the memory for rotation on writeback completion.
> > > >
> > > > With that stat, madvise_pageout can know how many pages were paged-out
> > > > successfully as well as reclaimed. The return value will be used for
> > > > statistics in next patch.
> > >
> > > I really fail to see the reson for the rename and paging_out doesn't
> > > even make much sense as a name TBH.
> >
> > Currently, what we are doing to reclaim memory is
> >
> > reclaim_folio_list
> > shrink_folio_list
> > if (folio_mapped(folio))
> > try_to_unmap(folio)
> >
> > if (folio_test_dirty(folio))
> > pageout
> >
> > Based on the structure, pageout is just one of way to reclaim memory.
> >
> > With MADV_PAGEOUT, what user want to know how many pages
> > were paged out as they requested(from userspace PoV, how many times
> > pages fault happens in future accesses), not the number of reclaimed
> > pages shrink_folio_list returns currently.
> >
> > In the sense, I wanted to distinguish between reclaim and pageout.
>
> But MADV_PAGEOUT is documented to trigger memory reclaim in general
> not a pageout. Let me quote from the man page
> : Reclaim a given range of pages. This is done to free up memory occupied
> : by these pages.

IMO, we need to change the documentation something like this.

: Try to reclaim a given range of pages. The reclaim carries on the
unmap pages from address space and then write them out to backing
storage. It could help to free up memory occupied by these pages
or improve memory reclaim efficiency.

>
> Sure anonymous pages can be paged out to the swap storage but with the
> upcomming multi-tiering it can be also "paged out" to a lower tier. All
> that leads to freeing up memory that is currently mapped by that address
> range.

I am not familiar with multi-tiering. However, thing is the operation
of pageout is synchronous or not. If it's synchronous(IOW, when the
pageout returns, the page was really written to the storage), yes,
it can reclaim memory. If the backing storage is asynchrnous device
(which is *major* these days), we cannot reclaim the memory but just
wrote the page to the storage with hope it could help reclaim speed
at next iteration of reclaim.

>
> Anyway, what do you actually meen by distinguishing between reclaim and
> pageout. Aren't those just two names for the same thing?

reclaim is realy memory freeing but pageout is just one of the way
to achieve the memory freeing, which is not guaranteed depending on
backing storage's speed.

2023-01-18 18:47:11

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH 3/3] mm: add vmstat statistics for madvise_[cold|pageout]

On Wed, Jan 18, 2023 at 10:11:46AM +0100, Michal Hocko wrote:
> On Tue 17-01-23 15:16:32, Minchan Kim wrote:
> > madvise LRU manipulation APIs need to scan address ranges to find
> > present pages at page table and provides advice hints for them.
> >
> > Likewise pg[scan/steal] count on vmstat, madvise_pg[scanned/hinted]
> > shows the proactive reclaim efficiency so this patch addes those
> > two statistics in vmstat.
>
> Please describe the usecase for those new counters.

I wanted to know the proactive reclaim efficieny using MADV_COLD/MDDV_PAGEOUT.
Userspace has several policy which when/which vmas need to be hinted by the call
and they are evolving. I needed to know how effectively their policy works since
the vma ranges are huge(i.e., nr_hinted/nr_scanned).

2023-01-18 21:40:30

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Wed 18-01-23 10:07:17, Minchan Kim wrote:
> On Wed, Jan 18, 2023 at 06:35:32PM +0100, Michal Hocko wrote:
> > On Wed 18-01-23 09:09:36, Minchan Kim wrote:
> > > On Wed, Jan 18, 2023 at 10:10:44AM +0100, Michal Hocko wrote:
> > > > On Tue 17-01-23 15:16:30, Minchan Kim wrote:
> > > > > The reclaim_pages MADV_PAGEOUT uses needs to return the number of
> > > > > pages paged-out successfully, not only the number of reclaimed pages
> > > > > in the operation because those pages paged-out successfully will be
> > > > > reclaimed easily at the memory pressure due to asynchronous writeback
> > > > > rotation(i.e., PG_reclaim with folio_rotate_reclaimable).
> > > > >
> > > > > This patch renames the reclaim_pages with paging_out(with hope that
> > > > > it's clear from operation point of view) and then adds a additional
> > > > > stat in reclaim_stat to represent the number of paged-out but kept
> > > > > in the memory for rotation on writeback completion.
> > > > >
> > > > > With that stat, madvise_pageout can know how many pages were paged-out
> > > > > successfully as well as reclaimed. The return value will be used for
> > > > > statistics in next patch.
> > > >
> > > > I really fail to see the reson for the rename and paging_out doesn't
> > > > even make much sense as a name TBH.
> > >
> > > Currently, what we are doing to reclaim memory is
> > >
> > > reclaim_folio_list
> > > shrink_folio_list
> > > if (folio_mapped(folio))
> > > try_to_unmap(folio)
> > >
> > > if (folio_test_dirty(folio))
> > > pageout
> > >
> > > Based on the structure, pageout is just one of way to reclaim memory.
> > >
> > > With MADV_PAGEOUT, what user want to know how many pages
> > > were paged out as they requested(from userspace PoV, how many times
> > > pages fault happens in future accesses), not the number of reclaimed
> > > pages shrink_folio_list returns currently.
> > >
> > > In the sense, I wanted to distinguish between reclaim and pageout.
> >
> > But MADV_PAGEOUT is documented to trigger memory reclaim in general
> > not a pageout. Let me quote from the man page
> > : Reclaim a given range of pages. This is done to free up memory occupied
> > : by these pages.
>
> IMO, we need to change the documentation something like this.
>
> : Try to reclaim a given range of pages. The reclaim carries on the
> unmap pages from address space and then write them out to backing
> storage. It could help to free up memory occupied by these pages
> or improve memory reclaim efficiency.

But this is not what the implementation does nor should it be specific
about what reclaim actual can do. The specific implementation of the
reclaim is an implementation detail.

> > Sure anonymous pages can be paged out to the swap storage but with the
> > upcomming multi-tiering it can be also "paged out" to a lower tier. All
> > that leads to freeing up memory that is currently mapped by that address
> > range.
>
> I am not familiar with multi-tiering. However, thing is the operation
> of pageout is synchronous or not. If it's synchronous(IOW, when the
> pageout returns, the page was really written to the storage), yes,
> it can reclaim memory. If the backing storage is asynchrnous device
> (which is *major* these days), we cannot reclaim the memory but just
> wrote the page to the storage with hope it could help reclaim speed
> at next iteration of reclaim.

I am sorry but I do not follow. Synchronicity of the reclaim should be
completely irrelevant. Even swapout (pageout from your POV AFAIU) can be
async or sync.

> > Anyway, what do you actually meen by distinguishing between reclaim and
> > pageout. Aren't those just two names for the same thing?
>
> reclaim is realy memory freeing but pageout is just one of the way
> to achieve the memory freeing, which is not guaranteed depending on
> backing storage's speed.

Try to think about it some more. Do you really want the MADV_PAGEOUT to
be so specific about how the memory reclaim is achieved? How do you
reflect new ways of reclaiming memory - e.g. memory demotion when the
primary memory gets freed by migrating the content to a slower type of
memory yet not write it out to ultra slow swap storage (which is just
yet another tier that cannot be accessed directly without an explicit
IO)?

--
Michal Hocko
SUSE Labs

2023-01-18 22:04:55

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH 3/3] mm: add vmstat statistics for madvise_[cold|pageout]

On Wed, Jan 18, 2023 at 10:13:38PM +0100, Michal Hocko wrote:
> On Wed 18-01-23 09:55:38, Minchan Kim wrote:
> > On Wed, Jan 18, 2023 at 06:27:02PM +0100, Michal Hocko wrote:
> > > On Wed 18-01-23 09:15:34, Minchan Kim wrote:
> > > > On Wed, Jan 18, 2023 at 10:11:46AM +0100, Michal Hocko wrote:
> > > > > On Tue 17-01-23 15:16:32, Minchan Kim wrote:
> > > > > > madvise LRU manipulation APIs need to scan address ranges to find
> > > > > > present pages at page table and provides advice hints for them.
> > > > > >
> > > > > > Likewise pg[scan/steal] count on vmstat, madvise_pg[scanned/hinted]
> > > > > > shows the proactive reclaim efficiency so this patch addes those
> > > > > > two statistics in vmstat.
> > > > >
> > > > > Please describe the usecase for those new counters.
> > > >
> > > > I wanted to know the proactive reclaim efficieny using MADV_COLD/MDDV_PAGEOUT.
> > > > Userspace has several policy which when/which vmas need to be hinted by the call
> > > > and they are evolving. I needed to know how effectively their policy works since
> > > > the vma ranges are huge(i.e., nr_hinted/nr_scanned).
> > >
> > > I can see how that can be an interesting information but is there
> > > anything actionable about that beyond debugging purposes? In other words
> > > isn't this something that could be done by tracing instead?
> >
> > That's the statictis for telemetry. With those stat, we are collecting
> > various vmstat fields(i.e., pgsteal/pgscan) from real field devices
> > and thought those two stats would be good fit along with other reclaim
> > statistics in vmstat since we can know how much proactive madvise policy
> > could make system healthier(e.g., less kswapd scan, less allocstall
> > and so on).
> >
> > >
> > > Also how are you going to identify specific madvise calls when they can
> > > interleave arbitrarily?
> >
> > I guess you are talking about how we could separate MADV_PAGEOUT and
> > MADV_COLD from vmstat. That's valid question. I thought for the start,
> > adds just umbrella stat like this and if we want to break down, we need
> > to introudce sysfs likewise slab.
>
> No, not really. MADV_COLD is about aging. There is no actual reclaim
> going on so pgscan/steal metrics do not make any sense. I am asking
> about potential different concurrent MADV_PAGEOUT happening. From what
> you've said earlier (how effectively policy works) I have understood you
> want to find out how a specific MADV_PAGEOUT effective is. But there

No, it 's not a specific MADV_PAGEOUT but system global policy.
Android has used the ActivityManagerService to control proactive
memory compaction from apps since it could control life of apps.
You can think it as userspace kswapd.

> maybe different callers of this applied to all sorts of different memory
> mappings and therefore the efficiency might be really different. As
> there is no clear way to tell one from the other I am really questioning
> whether this global stat is actually useful.

The purpose is global stat.

2023-01-18 22:25:03

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH 3/3] mm: add vmstat statistics for madvise_[cold|pageout]

On Wed 18-01-23 09:55:38, Minchan Kim wrote:
> On Wed, Jan 18, 2023 at 06:27:02PM +0100, Michal Hocko wrote:
> > On Wed 18-01-23 09:15:34, Minchan Kim wrote:
> > > On Wed, Jan 18, 2023 at 10:11:46AM +0100, Michal Hocko wrote:
> > > > On Tue 17-01-23 15:16:32, Minchan Kim wrote:
> > > > > madvise LRU manipulation APIs need to scan address ranges to find
> > > > > present pages at page table and provides advice hints for them.
> > > > >
> > > > > Likewise pg[scan/steal] count on vmstat, madvise_pg[scanned/hinted]
> > > > > shows the proactive reclaim efficiency so this patch addes those
> > > > > two statistics in vmstat.
> > > >
> > > > Please describe the usecase for those new counters.
> > >
> > > I wanted to know the proactive reclaim efficieny using MADV_COLD/MDDV_PAGEOUT.
> > > Userspace has several policy which when/which vmas need to be hinted by the call
> > > and they are evolving. I needed to know how effectively their policy works since
> > > the vma ranges are huge(i.e., nr_hinted/nr_scanned).
> >
> > I can see how that can be an interesting information but is there
> > anything actionable about that beyond debugging purposes? In other words
> > isn't this something that could be done by tracing instead?
>
> That's the statictis for telemetry. With those stat, we are collecting
> various vmstat fields(i.e., pgsteal/pgscan) from real field devices
> and thought those two stats would be good fit along with other reclaim
> statistics in vmstat since we can know how much proactive madvise policy
> could make system healthier(e.g., less kswapd scan, less allocstall
> and so on).
>
> >
> > Also how are you going to identify specific madvise calls when they can
> > interleave arbitrarily?
>
> I guess you are talking about how we could separate MADV_PAGEOUT and
> MADV_COLD from vmstat. That's valid question. I thought for the start,
> adds just umbrella stat like this and if we want to break down, we need
> to introudce sysfs likewise slab.

No, not really. MADV_COLD is about aging. There is no actual reclaim
going on so pgscan/steal metrics do not make any sense. I am asking
about potential different concurrent MADV_PAGEOUT happening. From what
you've said earlier (how effectively policy works) I have understood you
want to find out how a specific MADV_PAGEOUT effective is. But there
maybe different callers of this applied to all sorts of different memory
mappings and therefore the efficiency might be really different. As
there is no clear way to tell one from the other I am really questioning
whether this global stat is actually useful.

--
Michal Hocko
SUSE Labs

2023-01-18 22:47:06

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Wed, Jan 18, 2023 at 10:23:02PM +0100, Michal Hocko wrote:
> On Wed 18-01-23 10:07:17, Minchan Kim wrote:
> > On Wed, Jan 18, 2023 at 06:35:32PM +0100, Michal Hocko wrote:
> > > On Wed 18-01-23 09:09:36, Minchan Kim wrote:
> > > > On Wed, Jan 18, 2023 at 10:10:44AM +0100, Michal Hocko wrote:
> > > > > On Tue 17-01-23 15:16:30, Minchan Kim wrote:
> > > > > > The reclaim_pages MADV_PAGEOUT uses needs to return the number of
> > > > > > pages paged-out successfully, not only the number of reclaimed pages
> > > > > > in the operation because those pages paged-out successfully will be
> > > > > > reclaimed easily at the memory pressure due to asynchronous writeback
> > > > > > rotation(i.e., PG_reclaim with folio_rotate_reclaimable).
> > > > > >
> > > > > > This patch renames the reclaim_pages with paging_out(with hope that
> > > > > > it's clear from operation point of view) and then adds a additional
> > > > > > stat in reclaim_stat to represent the number of paged-out but kept
> > > > > > in the memory for rotation on writeback completion.
> > > > > >
> > > > > > With that stat, madvise_pageout can know how many pages were paged-out
> > > > > > successfully as well as reclaimed. The return value will be used for
> > > > > > statistics in next patch.
> > > > >
> > > > > I really fail to see the reson for the rename and paging_out doesn't
> > > > > even make much sense as a name TBH.
> > > >
> > > > Currently, what we are doing to reclaim memory is
> > > >
> > > > reclaim_folio_list
> > > > shrink_folio_list
> > > > if (folio_mapped(folio))
> > > > try_to_unmap(folio)
> > > >
> > > > if (folio_test_dirty(folio))
> > > > pageout
> > > >
> > > > Based on the structure, pageout is just one of way to reclaim memory.
> > > >
> > > > With MADV_PAGEOUT, what user want to know how many pages
> > > > were paged out as they requested(from userspace PoV, how many times
> > > > pages fault happens in future accesses), not the number of reclaimed
> > > > pages shrink_folio_list returns currently.
> > > >
> > > > In the sense, I wanted to distinguish between reclaim and pageout.
> > >
> > > But MADV_PAGEOUT is documented to trigger memory reclaim in general
> > > not a pageout. Let me quote from the man page
> > > : Reclaim a given range of pages. This is done to free up memory occupied
> > > : by these pages.
> >
> > IMO, we need to change the documentation something like this.
> >
> > : Try to reclaim a given range of pages. The reclaim carries on the
> > unmap pages from address space and then write them out to backing
> > storage. It could help to free up memory occupied by these pages
> > or improve memory reclaim efficiency.
>
> But this is not what the implementation does nor should it be specific
> about what reclaim actual can do. The specific implementation of the
> reclaim is an implementation detail.
>
> > > Sure anonymous pages can be paged out to the swap storage but with the
> > > upcomming multi-tiering it can be also "paged out" to a lower tier. All
> > > that leads to freeing up memory that is currently mapped by that address
> > > range.
> >
> > I am not familiar with multi-tiering. However, thing is the operation
> > of pageout is synchronous or not. If it's synchronous(IOW, when the
> > pageout returns, the page was really written to the storage), yes,
> > it can reclaim memory. If the backing storage is asynchrnous device
> > (which is *major* these days), we cannot reclaim the memory but just
> > wrote the page to the storage with hope it could help reclaim speed
> > at next iteration of reclaim.
>
> I am sorry but I do not follow. Synchronicity of the reclaim should be
> completely irrelevant. Even swapout (pageout from your POV AFAIU) can be
> async or sync.
>
> > > Anyway, what do you actually meen by distinguishing between reclaim and
> > > pageout. Aren't those just two names for the same thing?
> >
> > reclaim is realy memory freeing but pageout is just one of the way
> > to achieve the memory freeing, which is not guaranteed depending on
> > backing storage's speed.
>
> Try to think about it some more. Do you really want the MADV_PAGEOUT to
> be so specific about how the memory reclaim is achieved? How do you
> reflect new ways of reclaiming memory - e.g. memory demotion when the
> primary memory gets freed by migrating the content to a slower type of
> memory yet not write it out to ultra slow swap storage (which is just
> yet another tier that cannot be accessed directly without an explicit
> IO)?

I understand your concern now and believe better implementation would
account the number of virtual address scanning and the number of page
*unmapped from page table* so we don't need to worry what types of
paging out happens(e.g., write it to slower storage or demote it to
lower tier. In the end, userspace will see the paging in, anyway.)

"Unmapped the page from page table and demotes the page to secondary
device. User would see page fault when the next access happen"

If you agree it, yeah, I don't need to change anything in vmscan.c.
Instead, I could do everything in madvise.c

Let me know if you have other concern or suggestion.

Thanks, Michal.

2023-01-19 09:36:44

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Wed 18-01-23 14:27:23, Minchan Kim wrote:
[...]
> Let me know if you have other concern or suggestion.

I would propose to use a tracepoint to track this on the madvise side.
This way you can both track a per-process effectivity as well a madvise
originator effectivity (if the policy is implemented by a global monitor
then it won't get interfering activity by other users of this
interface). Global counters cannot do neither of that.
--
Michal Hocko
SUSE Labs

2023-01-19 21:33:05

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm: return the number of pages successfully paged out

On Thu, Jan 19, 2023 at 10:07:23AM +0100, Michal Hocko wrote:
> On Wed 18-01-23 14:27:23, Minchan Kim wrote:
> [...]
> > Let me know if you have other concern or suggestion.
>
> I would propose to use a tracepoint to track this on the madvise side.
> This way you can both track a per-process effectivity as well a madvise
> originator effectivity (if the policy is implemented by a global monitor
> then it won't get interfering activity by other users of this
> interface). Global counters cannot do neither of that.

I don't think the tracepoint is right approach for the purpose.

I understand we could get the same result using tracepoint using bpf or
something so whenever event happens, a daemon get the result and
accumlate the number so totally same result with global stat. Yeah,
technically it's doable. With the claim, there is nothing we can do
with tracpoint. Checks existing vmstat fields, why do we have them
into vmstat instead of tracepoint? TP is much easiler/fleixible but
with vmstat, we can get the ballpark under fleet easier to sense
what's going on simply, and once we found something weird, we could
turn on the trace to know the detail and TP would work for it.

With process control using process_madvise in centralized controlled
system, I think those two stats are really worth along with other
memory reclaim statistics to be captured for memory health. If we
have needs per-process level tracking(Actually, not for our case),
we could add the tracepoint later.