As huge page usage in the page cache and for shmem files proliferates
in our production environment, the performance monitoring team has
asked for per-cgroup stats on those pages.
We already track and export anon_thp per cgroup. We already track file
THP and shmem THP per node, so making them per-cgroup is only a matter
of switching from node to lruvec counters. All callsites are in places
where the pages are charged and locked, so page->memcg is stable.
Signed-off-by: Johannes Weiner <[email protected]>
---
mm/filemap.c | 4 ++--
mm/huge_memory.c | 4 ++--
mm/khugepaged.c | 4 ++--
mm/memcontrol.c | 6 +++++-
mm/shmem.c | 2 +-
5 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index e80aa9d2db68..334ce608735c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -204,9 +204,9 @@ static void unaccount_page_cache_page(struct address_space *mapping,
if (PageSwapBacked(page)) {
__mod_lruvec_page_state(page, NR_SHMEM, -nr);
if (PageTransHuge(page))
- __dec_node_page_state(page, NR_SHMEM_THPS);
+ __dec_lruvec_page_state(page, NR_SHMEM_THPS);
} else if (PageTransHuge(page)) {
- __dec_node_page_state(page, NR_FILE_THPS);
+ __dec_lruvec_page_state(page, NR_FILE_THPS);
filemap_nr_thps_dec(mapping);
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cba3812a5c3e..5fe044e5dad5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2707,9 +2707,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
spin_unlock(&ds_queue->split_queue_lock);
if (mapping) {
if (PageSwapBacked(head))
- __dec_node_page_state(head, NR_SHMEM_THPS);
+ __dec_lruvec_page_state(head, NR_SHMEM_THPS);
else
- __dec_node_page_state(head, NR_FILE_THPS);
+ __dec_lruvec_page_state(head, NR_FILE_THPS);
}
__split_huge_page(page, list, end, flags);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f1d5f6dde47c..04828e21f434 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1833,9 +1833,9 @@ static void collapse_file(struct mm_struct *mm,
}
if (is_shmem)
- __inc_node_page_state(new_page, NR_SHMEM_THPS);
+ __inc_lruvec_page_state(new_page, NR_SHMEM_THPS);
else {
- __inc_node_page_state(new_page, NR_FILE_THPS);
+ __inc_lruvec_page_state(new_page, NR_FILE_THPS);
filemap_nr_thps_inc(mapping);
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2636f8bad908..98177d5e8e03 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1507,6 +1507,8 @@ static struct memory_stat memory_stats[] = {
* constant(e.g. powerpc).
*/
{ "anon_thp", 0, NR_ANON_THPS },
+ { "file_thp", 0, NR_FILE_THPS },
+ { "shmem_thp", 0, NR_SHMEM_THPS },
#endif
{ "inactive_anon", PAGE_SIZE, NR_INACTIVE_ANON },
{ "active_anon", PAGE_SIZE, NR_ACTIVE_ANON },
@@ -1537,7 +1539,9 @@ static int __init memory_stats_init(void)
for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (memory_stats[i].idx == NR_ANON_THPS)
+ if (memory_stats[i].idx == NR_ANON_THPS ||
+ memory_stats[i].idx == NR_FILE_THPS ||
+ memory_stats[i].idx == NR_SHMEM_THPS)
memory_stats[i].ratio = HPAGE_PMD_SIZE;
#endif
VM_BUG_ON(!memory_stats[i].ratio);
diff --git a/mm/shmem.c b/mm/shmem.c
index 537c137698f8..5009d783d954 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -713,7 +713,7 @@ static int shmem_add_to_page_cache(struct page *page,
}
if (PageTransHuge(page)) {
count_vm_event(THP_FILE_ALLOC);
- __inc_node_page_state(page, NR_SHMEM_THPS);
+ __inc_lruvec_page_state(page, NR_SHMEM_THPS);
}
mapping->nrpages += nr;
__mod_lruvec_page_state(page, NR_FILE_PAGES, nr);
--
2.29.0
On Thu, 2020-10-22 at 11:18 -0400, Johannes Weiner wrote:
> index e80aa9d2db68..334ce608735c 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -204,9 +204,9 @@ static void unaccount_page_cache_page(struct
> address_space *mapping,
> if (PageSwapBacked(page)) {
> __mod_lruvec_page_state(page, NR_SHMEM, -nr);
> if (PageTransHuge(page))
> - __dec_node_page_state(page, NR_SHMEM_THPS);
> + __dec_lruvec_page_state(page, NR_SHMEM_THPS);
> } else if (PageTransHuge(page)) {
> - __dec_node_page_state(page, NR_FILE_THPS);
> + __dec_lruvec_page_state(page, NR_FILE_THPS);
> filemap_nr_thps_dec(mapping);
> }
This may be a dumb question, but does that mean the
NR_FILE_THPS number will no longer be visible in
/proc/vmstat or is there some magic I overlooked in
a cursory look of the code?
On Thu, Oct 22, 2020 at 8:20 AM Johannes Weiner <[email protected]> wrote:
>
> As huge page usage in the page cache and for shmem files proliferates
> in our production environment, the performance monitoring team has
> asked for per-cgroup stats on those pages.
>
> We already track and export anon_thp per cgroup. We already track file
> THP and shmem THP per node, so making them per-cgroup is only a matter
> of switching from node to lruvec counters. All callsites are in places
> where the pages are charged and locked, so page->memcg is stable.
>
> Signed-off-by: Johannes Weiner <[email protected]>
Reviewed-by: Shakeel Butt <[email protected]>
On Thu, 2020-10-22 at 12:49 -0400, Rik van Riel wrote:
> On Thu, 2020-10-22 at 11:18 -0400, Johannes Weiner wrote:
>
> > index e80aa9d2db68..334ce608735c 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -204,9 +204,9 @@ static void unaccount_page_cache_page(struct
> > address_space *mapping,
> > if (PageSwapBacked(page)) {
> > __mod_lruvec_page_state(page, NR_SHMEM, -nr);
> > if (PageTransHuge(page))
> > - __dec_node_page_state(page, NR_SHMEM_THPS);
> > + __dec_lruvec_page_state(page, NR_SHMEM_THPS);
> > } else if (PageTransHuge(page)) {
> > - __dec_node_page_state(page, NR_FILE_THPS);
> > + __dec_lruvec_page_state(page, NR_FILE_THPS);
> > filemap_nr_thps_dec(mapping);
> > }
>
> This may be a dumb question, but does that mean the
> NR_FILE_THPS number will no longer be visible in
> /proc/vmstat or is there some magic I overlooked in
> a cursory look of the code?
Never mind, I found it a few levels deep in
__dec_lruvec_page_state.
Reviewed-by: Rik van Riel <[email protected]>
--
All Rights Reversed.
On Thu, 22 Oct 2020, Johannes Weiner wrote:
> As huge page usage in the page cache and for shmem files proliferates
> in our production environment, the performance monitoring team has
> asked for per-cgroup stats on those pages.
>
> We already track and export anon_thp per cgroup. We already track file
> THP and shmem THP per node, so making them per-cgroup is only a matter
> of switching from node to lruvec counters. All callsites are in places
> where the pages are charged and locked, so page->memcg is stable.
>
> Signed-off-by: Johannes Weiner <[email protected]>
Acked-by: David Rientjes <[email protected]>
Nice!
On Thu 22-10-20 11:18:44, Johannes Weiner wrote:
> As huge page usage in the page cache and for shmem files proliferates
> in our production environment, the performance monitoring team has
> asked for per-cgroup stats on those pages.
>
> We already track and export anon_thp per cgroup. We already track file
> THP and shmem THP per node, so making them per-cgroup is only a matter
> of switching from node to lruvec counters. All callsites are in places
> where the pages are charged and locked, so page->memcg is stable.
>
> Signed-off-by: Johannes Weiner <[email protected]>
Acked-by: Michal Hocko <[email protected]>
> ---
> mm/filemap.c | 4 ++--
> mm/huge_memory.c | 4 ++--
> mm/khugepaged.c | 4 ++--
> mm/memcontrol.c | 6 +++++-
> mm/shmem.c | 2 +-
> 5 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/mm/filemap.c b/mm/filemap.c
> index e80aa9d2db68..334ce608735c 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -204,9 +204,9 @@ static void unaccount_page_cache_page(struct address_space *mapping,
> if (PageSwapBacked(page)) {
> __mod_lruvec_page_state(page, NR_SHMEM, -nr);
> if (PageTransHuge(page))
> - __dec_node_page_state(page, NR_SHMEM_THPS);
> + __dec_lruvec_page_state(page, NR_SHMEM_THPS);
> } else if (PageTransHuge(page)) {
> - __dec_node_page_state(page, NR_FILE_THPS);
> + __dec_lruvec_page_state(page, NR_FILE_THPS);
> filemap_nr_thps_dec(mapping);
> }
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index cba3812a5c3e..5fe044e5dad5 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2707,9 +2707,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
> spin_unlock(&ds_queue->split_queue_lock);
> if (mapping) {
> if (PageSwapBacked(head))
> - __dec_node_page_state(head, NR_SHMEM_THPS);
> + __dec_lruvec_page_state(head, NR_SHMEM_THPS);
> else
> - __dec_node_page_state(head, NR_FILE_THPS);
> + __dec_lruvec_page_state(head, NR_FILE_THPS);
> }
>
> __split_huge_page(page, list, end, flags);
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index f1d5f6dde47c..04828e21f434 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1833,9 +1833,9 @@ static void collapse_file(struct mm_struct *mm,
> }
>
> if (is_shmem)
> - __inc_node_page_state(new_page, NR_SHMEM_THPS);
> + __inc_lruvec_page_state(new_page, NR_SHMEM_THPS);
> else {
> - __inc_node_page_state(new_page, NR_FILE_THPS);
> + __inc_lruvec_page_state(new_page, NR_FILE_THPS);
> filemap_nr_thps_inc(mapping);
> }
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 2636f8bad908..98177d5e8e03 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1507,6 +1507,8 @@ static struct memory_stat memory_stats[] = {
> * constant(e.g. powerpc).
> */
> { "anon_thp", 0, NR_ANON_THPS },
> + { "file_thp", 0, NR_FILE_THPS },
> + { "shmem_thp", 0, NR_SHMEM_THPS },
> #endif
> { "inactive_anon", PAGE_SIZE, NR_INACTIVE_ANON },
> { "active_anon", PAGE_SIZE, NR_ACTIVE_ANON },
> @@ -1537,7 +1539,9 @@ static int __init memory_stats_init(void)
>
> for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> - if (memory_stats[i].idx == NR_ANON_THPS)
> + if (memory_stats[i].idx == NR_ANON_THPS ||
> + memory_stats[i].idx == NR_FILE_THPS ||
> + memory_stats[i].idx == NR_SHMEM_THPS)
> memory_stats[i].ratio = HPAGE_PMD_SIZE;
> #endif
> VM_BUG_ON(!memory_stats[i].ratio);
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 537c137698f8..5009d783d954 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -713,7 +713,7 @@ static int shmem_add_to_page_cache(struct page *page,
> }
> if (PageTransHuge(page)) {
> count_vm_event(THP_FILE_ALLOC);
> - __inc_node_page_state(page, NR_SHMEM_THPS);
> + __inc_lruvec_page_state(page, NR_SHMEM_THPS);
> }
> mapping->nrpages += nr;
> __mod_lruvec_page_state(page, NR_FILE_PAGES, nr);
> --
> 2.29.0
--
Michal Hocko
SUSE Labs
On Thu, Oct 22, 2020 at 12:57:55PM -0400, Rik van Riel wrote:
> On Thu, 2020-10-22 at 12:49 -0400, Rik van Riel wrote:
> > On Thu, 2020-10-22 at 11:18 -0400, Johannes Weiner wrote:
> >
> > > index e80aa9d2db68..334ce608735c 100644
> > > --- a/mm/filemap.c
> > > +++ b/mm/filemap.c
> > > @@ -204,9 +204,9 @@ static void unaccount_page_cache_page(struct
> > > address_space *mapping,
> > > if (PageSwapBacked(page)) {
> > > __mod_lruvec_page_state(page, NR_SHMEM, -nr);
> > > if (PageTransHuge(page))
> > > - __dec_node_page_state(page, NR_SHMEM_THPS);
> > > + __dec_lruvec_page_state(page, NR_SHMEM_THPS);
> > > } else if (PageTransHuge(page)) {
> > > - __dec_node_page_state(page, NR_FILE_THPS);
> > > + __dec_lruvec_page_state(page, NR_FILE_THPS);
> > > filemap_nr_thps_dec(mapping);
> > > }
> >
> > This may be a dumb question, but does that mean the
> > NR_FILE_THPS number will no longer be visible in
> > /proc/vmstat or is there some magic I overlooked in
> > a cursory look of the code?
>
> Never mind, I found it a few levels deep in
> __dec_lruvec_page_state.
No worries, it's a legit question.
lruvec is at the intersection of node and memcg, so I'm just moving
the accounting to a higher-granularity function that updates all
layers, including the node.
> Reviewed-by: Rik van Riel <[email protected]>
Thanks!
On Thu, 22 Oct 2020 11:18:44 -0400 Johannes Weiner <[email protected]> wrote:
> As huge page usage in the page cache and for shmem files proliferates
> in our production environment, the performance monitoring team has
> asked for per-cgroup stats on those pages.
>
> We already track and export anon_thp per cgroup. We already track file
> THP and shmem THP per node, so making them per-cgroup is only a matter
> of switching from node to lruvec counters. All callsites are in places
> where the pages are charged and locked, so page->memcg is stable.
>
> ...
>
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1507,6 +1507,8 @@ static struct memory_stat memory_stats[] = {
> * constant(e.g. powerpc).
> */
> { "anon_thp", 0, NR_ANON_THPS },
> + { "file_thp", 0, NR_FILE_THPS },
> + { "shmem_thp", 0, NR_SHMEM_THPS },
Documentation/admin-guide/cgroup-v2.rst is owed an update?
On Sun, Oct 25, 2020 at 11:37:25AM -0700, Andrew Morton wrote:
> On Thu, 22 Oct 2020 11:18:44 -0400 Johannes Weiner <[email protected]> wrote:
>
> > As huge page usage in the page cache and for shmem files proliferates
> > in our production environment, the performance monitoring team has
> > asked for per-cgroup stats on those pages.
> >
> > We already track and export anon_thp per cgroup. We already track file
> > THP and shmem THP per node, so making them per-cgroup is only a matter
> > of switching from node to lruvec counters. All callsites are in places
> > where the pages are charged and locked, so page->memcg is stable.
> >
> > ...
> >
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -1507,6 +1507,8 @@ static struct memory_stat memory_stats[] = {
> > * constant(e.g. powerpc).
> > */
> > { "anon_thp", 0, NR_ANON_THPS },
> > + { "file_thp", 0, NR_FILE_THPS },
> > + { "shmem_thp", 0, NR_SHMEM_THPS },
>
> Documentation/admin-guide/cgroup-v2.rst is owed an update?
Ah yes. This?
From 310c3e1714e1c093d4cd26dff38326fc348cdd31 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <[email protected]>
Date: Mon, 26 Oct 2020 13:39:19 -0400
Subject: [PATCH] mm: memcontrol: add file_thp, shmem_thp to memory.stat fix
Signed-off-by: Johannes Weiner <[email protected]>
---
Documentation/admin-guide/cgroup-v2.rst | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 608d7c279396..515bb13084a0 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1300,6 +1300,14 @@ PAGE_SIZE multiple when read back.
Amount of memory used in anonymous mappings backed by
transparent hugepages
+ file_thp
+ Amount of cached filesystem data backed by transparent
+ hugepages
+
+ shmem_thp
+ Amount of shm, tmpfs, shared anonymous mmap()s backed by
+ transparent hugepages
+
inactive_anon, active_anon, inactive_file, active_file, unevictable
Amount of memory, swap-backed and filesystem-backed,
on the internal memory management lists used by the
--
2.29.0
> On Oct 22, 2020, at 8:18 AM, Johannes Weiner <[email protected]> wrote:
>
> As huge page usage in the page cache and for shmem files proliferates
> in our production environment, the performance monitoring team has
> asked for per-cgroup stats on those pages.
>
> We already track and export anon_thp per cgroup. We already track file
> THP and shmem THP per node, so making them per-cgroup is only a matter
> of switching from node to lruvec counters. All callsites are in places
> where the pages are charged and locked, so page->memcg is stable.
>
> Signed-off-by: Johannes Weiner <[email protected]>
Acked-by: Song Liu <[email protected]>
Thanks!