PageKmemcg flag is currently defined as a page type (like buddy,
offline, table and guard). Semantically it means that the page
was accounted as a kernel memory by the page allocator and has
to be uncharged on the release.
As a side effect of defining the flag as a page type, the accounted
page can't be mapped to userspace (look at page_has_type() and
comments above). In particular, this blocks the accounting of
vmalloc-backed memory used by some bpf maps, because these maps
do map the memory to userspace.
One option is to fix it by complicating the access to page->mapcount,
which provides some free bits for page->page_type.
But it's way better to move this flag into page->memcg_data flags.
Indeed, the flag makes no sense without enabled memory cgroups
and memory cgroup pointer set in particular.
This commit replaces PageKmemcg() and __SetPageKmemcg() with
PageMemcgKmem() and __SetPageMemcgKmem(). __ClearPageKmemcg()
can be simple deleted because clear_page_mem_cgroup() already
does the job.
As a bonus, on !CONFIG_MEMCG build the PageMemcgKmem() check will
be compiled out.
Signed-off-by: Roman Gushchin <[email protected]>
---
include/linux/memcontrol.h | 54 +++++++++++++++++++++++++++++++++++---
include/linux/page-flags.h | 11 ++------
mm/memcontrol.c | 14 +++-------
mm/page_alloc.c | 2 +-
4 files changed, 58 insertions(+), 23 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 35f846c6b89b..b20bb63bf2eb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -346,6 +346,8 @@ extern struct mem_cgroup *root_mem_cgroup;
enum page_memcg_data_flags {
/* page->memcg_data is a pointer to an objcgs vector */
MEMCG_DATA_OBJCGS,
+ /* page has been accounted as a non-slab kernel page */
+ MEMCG_DATA_KMEM,
/* the next bit after the last actual flag */
__NR_MEMCG_DATA_FLAGS,
};
@@ -369,8 +371,12 @@ enum page_memcg_data_flags {
*/
static inline struct mem_cgroup *page_memcg(struct page *page)
{
+ unsigned long memcg_data = page->memcg_data;
+
VM_BUG_ON_PAGE(PageSlab(page), page);
- return (struct mem_cgroup *)page->memcg_data;
+ VM_BUG_ON_PAGE(memcg_data & (0x1UL << MEMCG_DATA_OBJCGS), page);
+
+ return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
@@ -416,7 +422,7 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
if (memcg_data & (0x1UL << MEMCG_DATA_OBJCGS))
return NULL;
- return (struct mem_cgroup *)memcg_data;
+ return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
@@ -449,6 +455,36 @@ static inline void clear_page_memcg(struct page *page)
page->memcg_data = 0;
}
+/*
+ * PageMemcgKmem - check if the page has MemcgKmem flag set
+ * @page: a pointer to the page struct
+ *
+ * Checks if the page has MemcgKmem flag set. The caller must ensure that
+ * the page has an associated memory cgroup. It's not safe to call this function
+ * against some types of pages, e.g. slab pages.
+ */
+static inline bool PageMemcgKmem(struct page *page)
+{
+ VM_BUG_ON_PAGE(test_bit(MEMCG_DATA_OBJCGS, &page->memcg_data), page);
+ return test_bit(MEMCG_DATA_KMEM, &page->memcg_data);
+}
+
+/*
+ * __SetPageMemcgKmem - set the page's MemcgKmem flag
+ * @page: a pointer to the page struct
+ *
+ * Set the page's MemcgKmem flag. The caller must ensure that the page has
+ * an associated memory cgroup. It's not safe to call this function
+ * against some types of pages, e.g. slab pages.
+ */
+static inline void __SetPageMemcgKmem(struct page *page)
+{
+ VM_BUG_ON_PAGE(!page->memcg_data, page);
+ VM_BUG_ON_PAGE(test_bit(MEMCG_DATA_OBJCGS, &page->memcg_data), page);
+ __set_bit(MEMCG_DATA_KMEM, &page->memcg_data);
+}
+
+
#ifdef CONFIG_MEMCG_KMEM
/*
* page_objcgs - get the object cgroups vector associated with a page
@@ -466,6 +502,7 @@ static inline struct obj_cgroup **page_objcgs(struct page *page)
VM_BUG_ON_PAGE(memcg_data &&
!(memcg_data & (0x1UL << MEMCG_DATA_OBJCGS)), page);
+ VM_BUG_ON_PAGE(memcg_data & (0x1UL << MEMCG_DATA_KMEM), page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
@@ -482,9 +519,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
- if (memcg_data && (memcg_data & (0x1UL << MEMCG_DATA_OBJCGS)))
+ if (memcg_data && (memcg_data & (0x1UL << MEMCG_DATA_OBJCGS))) {
+ VM_BUG_ON_PAGE(memcg_data & (0x1UL << MEMCG_DATA_KMEM), page);
return (struct obj_cgroup **)
(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ }
return NULL;
}
@@ -1169,6 +1208,15 @@ static inline void clear_page_memcg(struct page *page)
{
}
+static inline bool PageMemcgKmem(struct page *page)
+{
+ return false;
+}
+
+static inline void SetPageMemcgKmem(struct page *page)
+{
+}
+
static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
{
return true;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index fbbb841a9346..a7ca01ae78d9 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -712,9 +712,8 @@ PAGEFLAG_FALSE(DoubleMap)
#define PAGE_MAPCOUNT_RESERVE -128
#define PG_buddy 0x00000080
#define PG_offline 0x00000100
-#define PG_kmemcg 0x00000200
-#define PG_table 0x00000400
-#define PG_guard 0x00000800
+#define PG_table 0x00000200
+#define PG_guard 0x00000400
#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
@@ -765,12 +764,6 @@ PAGE_TYPE_OPS(Buddy, buddy)
*/
PAGE_TYPE_OPS(Offline, offline)
-/*
- * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
- * pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
- */
-PAGE_TYPE_OPS(Kmemcg, kmemcg)
-
/*
* Marks pages in use as page tables.
*/
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f32b7f611045..a3c2749a36e9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3087,7 +3087,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
if (!ret) {
set_page_memcg(page, memcg);
- __SetPageKmemcg(page);
+ __SetPageMemcgKmem(page);
return 0;
}
css_put(&memcg->css);
@@ -3112,10 +3112,6 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
__memcg_kmem_uncharge(memcg, nr_pages);
clear_page_memcg(page);
css_put(&memcg->css);
-
- /* slab pages do not have PageKmemcg flag set */
- if (PageKmemcg(page))
- __ClearPageKmemcg(page);
}
static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
@@ -6896,12 +6892,10 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
nr_pages = compound_nr(page);
ug->nr_pages += nr_pages;
- if (!PageKmemcg(page)) {
- ug->pgpgout++;
- } else {
+ if (PageMemcgKmem(page))
ug->nr_kmem += nr_pages;
- __ClearPageKmemcg(page);
- }
+ else
+ ug->pgpgout++;
ug->dummy_page = page;
clear_page_memcg(page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 377d65f25d7d..b70911cb9879 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1197,7 +1197,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
}
if (PageMappingFlags(page))
page->mapping = NULL;
- if (memcg_kmem_enabled() && PageKmemcg(page))
+ if (memcg_kmem_enabled() && PageMemcgKmem(page))
__memcg_kmem_uncharge_page(page, order);
if (check_free)
bad += check_free_page(page);
--
2.26.2
On Tue, Sep 29, 2020 at 04:59:20PM -0700, Roman Gushchin wrote:
> @@ -3087,7 +3087,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
> ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
> if (!ret) {
> set_page_memcg(page, memcg);
> - __SetPageKmemcg(page);
> + __SetPageMemcgKmem(page);
> return 0;
> }
> css_put(&memcg->css);
> @@ -3112,10 +3112,6 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
> __memcg_kmem_uncharge(memcg, nr_pages);
> clear_page_memcg(page);
> css_put(&memcg->css);
> -
> - /* slab pages do not have PageKmemcg flag set */
> - if (PageKmemcg(page))
> - __ClearPageKmemcg(page);
Hm, the named flags are great, and the getter functions are complex
enough to justify helpers.
But both flags are set along with the object pointer, and cleared when
the pointer is cleared (which makes sense, because pages can not
change their type while they're alive). page_clear_memcg() and
page_clear_objcgs() do the same thing.
page->memcg_data = (unsigned long)pointer | MEMCG_DATA_TYPE;
and
page->memcg_data = 0;
are straight-forward and really don't require abstraction. Please
open-code these.
On Tue, Sep 29, 2020 at 04:59:20PM -0700, Roman Gushchin wrote:
> @@ -449,6 +455,36 @@ static inline void clear_page_memcg(struct page *page)
> page->memcg_data = 0;
> }
>
> +/*
> + * PageMemcgKmem - check if the page has MemcgKmem flag set
> + * @page: a pointer to the page struct
> + *
> + * Checks if the page has MemcgKmem flag set. The caller must ensure that
> + * the page has an associated memory cgroup. It's not safe to call this function
> + * against some types of pages, e.g. slab pages.
> + */
> +static inline bool PageMemcgKmem(struct page *page)
> +{
> + VM_BUG_ON_PAGE(test_bit(MEMCG_DATA_OBJCGS, &page->memcg_data), page);
> + return test_bit(MEMCG_DATA_KMEM, &page->memcg_data);
Most other places need the bit mask and have to do ad-hoc shifting,
which is verbose and causes awkward line wrapping in various places.
There are no atomic accesses here, so there is no need to use the
atomic bitop interface here. I feel like I've mentioned this before.
Just make the MEMCG_DATA_ items bitfields directly, and do
return page->memcg_data & MEMCG_DATA_KMEM
here.
Thanks