2022-08-30 22:03:14

by Suren Baghdasaryan

[permalink] [raw]
Subject: [RFC PATCH 10/30] mm: enable page allocation tagging for __get_free_pages and alloc_pages

Redefine alloc_pages, __get_free_pages to record allocations done by
these functions. Instrument deallocation hooks to record object freeing.

Signed-off-by: Suren Baghdasaryan <[email protected]>
---
include/linux/gfp.h | 10 +++++++---
include/linux/page_ext.h | 3 ++-
include/linux/pgalloc_tag.h | 35 +++++++++++++++++++++++++++++++++++
mm/mempolicy.c | 4 ++--
mm/page_alloc.c | 13 ++++++++++---
5 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f314be58fa77..5cb950a49d40 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -6,6 +6,7 @@

#include <linux/mmzone.h>
#include <linux/topology.h>
+#include <linux/pgalloc_tag.h>

struct vm_area_struct;

@@ -267,12 +268,12 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
}

#ifdef CONFIG_NUMA
-struct page *alloc_pages(gfp_t gfp, unsigned int order);
+struct page *_alloc_pages(gfp_t gfp, unsigned int order);
struct folio *folio_alloc(gfp_t gfp, unsigned order);
struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, bool hugepage);
#else
-static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
+static inline struct page *_alloc_pages(gfp_t gfp_mask, unsigned int order)
{
return alloc_pages_node(numa_node_id(), gfp_mask, order);
}
@@ -283,6 +284,7 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
#define vma_alloc_folio(gfp, order, vma, addr, hugepage) \
folio_alloc(gfp, order)
#endif
+#define alloc_pages(gfp, order) pgtag_alloc_pages(gfp, order)
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
static inline struct page *alloc_page_vma(gfp_t gfp,
struct vm_area_struct *vma, unsigned long addr)
@@ -292,7 +294,9 @@ static inline struct page *alloc_page_vma(gfp_t gfp,
return &folio->page;
}

-extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
+extern unsigned long _get_free_pages(gfp_t gfp_mask, unsigned int order,
+ struct page **ppage);
+#define __get_free_pages(gfp_mask, order) pgtag_get_free_pages(gfp_mask, order)
extern unsigned long get_zeroed_page(gfp_t gfp_mask);

void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1);
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index fabb2e1e087f..b26077110fb3 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -4,7 +4,6 @@

#include <linux/types.h>
#include <linux/stacktrace.h>
-#include <linux/stackdepot.h>

struct pglist_data;
struct page_ext_operations {
@@ -14,6 +13,8 @@ struct page_ext_operations {
void (*init)(void);
};

+#include <linux/stackdepot.h>
+
#ifdef CONFIG_PAGE_EXTENSION

enum page_ext_flags {
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index f525abfe51d4..154ea7436fec 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -5,6 +5,8 @@
#ifndef _LINUX_PGALLOC_TAG_H
#define _LINUX_PGALLOC_TAG_H

+#ifdef CONFIG_PAGE_ALLOC_TAGGING
+
#include <linux/alloc_tag.h>
#include <linux/page_ext.h>

@@ -25,4 +27,37 @@ static inline void pgalloc_tag_dec(struct page *page, unsigned int order)
alloc_tag_sub(get_page_tag_ref(page), PAGE_SIZE << order);
}

+/*
+ * Redefinitions of the common page allocators/destructors
+ */
+#define pgtag_alloc_pages(gfp, order) \
+({ \
+ struct page *_page = _alloc_pages((gfp), (order)); \
+ \
+ if (_page) \
+ alloc_tag_add(get_page_tag_ref(_page), PAGE_SIZE << (order));\
+ _page; \
+})
+
+#define pgtag_get_free_pages(gfp_mask, order) \
+({ \
+ struct page *_page; \
+ unsigned long _res = _get_free_pages((gfp_mask), (order), &_page);\
+ \
+ if (_res) \
+ alloc_tag_add(get_page_tag_ref(_page), PAGE_SIZE << (order));\
+ _res; \
+})
+
+#else /* CONFIG_PAGE_ALLOC_TAGGING */
+
+#define pgtag_alloc_pages(gfp, order) _alloc_pages(gfp, order)
+
+#define pgtag_get_free_pages(gfp_mask, order) \
+ _get_free_pages((gfp_mask), (order), NULL)
+
+#define pgalloc_tag_dec(__page, __size) do {} while (0)
+
+#endif /* CONFIG_PAGE_ALLOC_TAGGING */
+
#endif /* _LINUX_PGALLOC_TAG_H */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b73d3248d976..f7e6d9564a49 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2249,7 +2249,7 @@ EXPORT_SYMBOL(vma_alloc_folio);
* flags are used.
* Return: The page on success or NULL if allocation fails.
*/
-struct page *alloc_pages(gfp_t gfp, unsigned order)
+struct page *_alloc_pages(gfp_t gfp, unsigned int order)
{
struct mempolicy *pol = &default_policy;
struct page *page;
@@ -2273,7 +2273,7 @@ struct page *alloc_pages(gfp_t gfp, unsigned order)

return page;
}
-EXPORT_SYMBOL(alloc_pages);
+EXPORT_SYMBOL(_alloc_pages);

struct folio *folio_alloc(gfp_t gfp, unsigned order)
{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e5486d47406e..165daba19e2a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -763,6 +763,7 @@ static inline bool pcp_allowed_order(unsigned int order)

static inline void free_the_page(struct page *page, unsigned int order)
{
+
if (pcp_allowed_order(order)) /* Via pcp? */
free_unref_page(page, order);
else
@@ -1120,6 +1121,8 @@ static inline void __free_one_page(struct page *page,
VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
VM_BUG_ON_PAGE(bad_range(zone, page), page);

+ pgalloc_tag_dec(page, order);
+
while (order < MAX_ORDER - 1) {
if (compaction_capture(capc, page, order, migratetype)) {
__mod_zone_freepage_state(zone, -(1 << order),
@@ -3440,6 +3443,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
int pindex;
bool free_high;

+ pgalloc_tag_dec(page, order);
__count_vm_event(PGFREE);
pindex = order_to_pindex(migratetype, order);
list_add(&page->pcp_list, &pcp->lists[pindex]);
@@ -5557,16 +5561,19 @@ EXPORT_SYMBOL(__folio_alloc);
* address cannot represent highmem pages. Use alloc_pages and then kmap if
* you need to access high mem.
*/
-unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
+unsigned long _get_free_pages(gfp_t gfp_mask, unsigned int order,
+ struct page **ppage)
{
struct page *page;

- page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order);
+ page = _alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order);
+ if (ppage)
+ *ppage = page;
if (!page)
return 0;
return (unsigned long) page_address(page);
}
-EXPORT_SYMBOL(__get_free_pages);
+EXPORT_SYMBOL(_get_free_pages);

unsigned long get_zeroed_page(gfp_t gfp_mask)
{
--
2.37.2.672.g94769d06f0-goog


2022-08-31 10:21:26

by Mel Gorman

[permalink] [raw]
Subject: Re: [RFC PATCH 10/30] mm: enable page allocation tagging for __get_free_pages and alloc_pages

On Tue, Aug 30, 2022 at 02:48:59PM -0700, Suren Baghdasaryan wrote:
> Redefine alloc_pages, __get_free_pages to record allocations done by
> these functions. Instrument deallocation hooks to record object freeing.
>
> Signed-off-by: Suren Baghdasaryan <[email protected]>
> +#ifdef CONFIG_PAGE_ALLOC_TAGGING
> +
> #include <linux/alloc_tag.h>
> #include <linux/page_ext.h>
>
> @@ -25,4 +27,37 @@ static inline void pgalloc_tag_dec(struct page *page, unsigned int order)
> alloc_tag_sub(get_page_tag_ref(page), PAGE_SIZE << order);
> }
>
> +/*
> + * Redefinitions of the common page allocators/destructors
> + */
> +#define pgtag_alloc_pages(gfp, order) \
> +({ \
> + struct page *_page = _alloc_pages((gfp), (order)); \
> + \
> + if (_page) \
> + alloc_tag_add(get_page_tag_ref(_page), PAGE_SIZE << (order));\
> + _page; \
> +})
> +

Instead of renaming alloc_pages, why is the tagging not done in
__alloc_pages()? At least __alloc_pages_bulk() is also missed. The branch
can be guarded with IS_ENABLED.

> +#define pgtag_get_free_pages(gfp_mask, order) \
> +({ \
> + struct page *_page; \
> + unsigned long _res = _get_free_pages((gfp_mask), (order), &_page);\
> + \
> + if (_res) \
> + alloc_tag_add(get_page_tag_ref(_page), PAGE_SIZE << (order));\
> + _res; \
> +})
> +

Similar, the tagging could happen in a core function instead of a wrapper.

> +#else /* CONFIG_PAGE_ALLOC_TAGGING */
> +
> +#define pgtag_alloc_pages(gfp, order) _alloc_pages(gfp, order)
> +
> +#define pgtag_get_free_pages(gfp_mask, order) \
> + _get_free_pages((gfp_mask), (order), NULL)
> +
> +#define pgalloc_tag_dec(__page, __size) do {} while (0)
> +
> +#endif /* CONFIG_PAGE_ALLOC_TAGGING */
> +
> #endif /* _LINUX_PGALLOC_TAG_H */
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index b73d3248d976..f7e6d9564a49 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -2249,7 +2249,7 @@ EXPORT_SYMBOL(vma_alloc_folio);
> * flags are used.
> * Return: The page on success or NULL if allocation fails.
> */
> -struct page *alloc_pages(gfp_t gfp, unsigned order)
> +struct page *_alloc_pages(gfp_t gfp, unsigned int order)
> {
> struct mempolicy *pol = &default_policy;
> struct page *page;
> @@ -2273,7 +2273,7 @@ struct page *alloc_pages(gfp_t gfp, unsigned order)
>
> return page;
> }
> -EXPORT_SYMBOL(alloc_pages);
> +EXPORT_SYMBOL(_alloc_pages);
>
> struct folio *folio_alloc(gfp_t gfp, unsigned order)
> {
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index e5486d47406e..165daba19e2a 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -763,6 +763,7 @@ static inline bool pcp_allowed_order(unsigned int order)
>
> static inline void free_the_page(struct page *page, unsigned int order)
> {
> +
> if (pcp_allowed_order(order)) /* Via pcp? */
> free_unref_page(page, order);
> else

Spurious wide-space change.

--
Mel Gorman
SUSE Labs

2022-08-31 16:03:23

by Suren Baghdasaryan

[permalink] [raw]
Subject: Re: [RFC PATCH 10/30] mm: enable page allocation tagging for __get_free_pages and alloc_pages

On Wed, Aug 31, 2022 at 8:45 AM Suren Baghdasaryan <[email protected]> wrote:
>
> On Wed, Aug 31, 2022 at 3:11 AM Mel Gorman <[email protected]> wrote:
> >
> > On Tue, Aug 30, 2022 at 02:48:59PM -0700, Suren Baghdasaryan wrote:
> > > Redefine alloc_pages, __get_free_pages to record allocations done by
> > > these functions. Instrument deallocation hooks to record object freeing.
> > >
> > > Signed-off-by: Suren Baghdasaryan <[email protected]>
> > > +#ifdef CONFIG_PAGE_ALLOC_TAGGING
> > > +
> > > #include <linux/alloc_tag.h>
> > > #include <linux/page_ext.h>
> > >
> > > @@ -25,4 +27,37 @@ static inline void pgalloc_tag_dec(struct page *page, unsigned int order)
> > > alloc_tag_sub(get_page_tag_ref(page), PAGE_SIZE << order);
> > > }
> > >
> > > +/*
> > > + * Redefinitions of the common page allocators/destructors
> > > + */
> > > +#define pgtag_alloc_pages(gfp, order) \
> > > +({ \
> > > + struct page *_page = _alloc_pages((gfp), (order)); \
> > > + \
> > > + if (_page) \
> > > + alloc_tag_add(get_page_tag_ref(_page), PAGE_SIZE << (order));\
> > > + _page; \
> > > +})
> > > +
> >
> > Instead of renaming alloc_pages, why is the tagging not done in
> > __alloc_pages()? At least __alloc_pages_bulk() is also missed. The branch
> > can be guarded with IS_ENABLED.
>
> Hmm. Assuming all the other allocators using __alloc_pages are inlined, that
> should work. I'll try that and if that works will incorporate in the
> next respin.
> Thanks!
>
> I don't think IS_ENABLED is required because the tagging functions are already
> defined as empty if the appropriate configs are not enabled. Unless I
> misunderstood
> your node.
>
> >
> > > +#define pgtag_get_free_pages(gfp_mask, order) \
> > > +({ \
> > > + struct page *_page; \
> > > + unsigned long _res = _get_free_pages((gfp_mask), (order), &_page);\
> > > + \
> > > + if (_res) \
> > > + alloc_tag_add(get_page_tag_ref(_page), PAGE_SIZE << (order));\
> > > + _res; \
> > > +})
> > > +
> >
> > Similar, the tagging could happen in a core function instead of a wrapper.

Ack.

> >
> > > +#else /* CONFIG_PAGE_ALLOC_TAGGING */
> > > +
> > > +#define pgtag_alloc_pages(gfp, order) _alloc_pages(gfp, order)
> > > +
> > > +#define pgtag_get_free_pages(gfp_mask, order) \
> > > + _get_free_pages((gfp_mask), (order), NULL)
> > > +
> > > +#define pgalloc_tag_dec(__page, __size) do {} while (0)
> > > +
> > > +#endif /* CONFIG_PAGE_ALLOC_TAGGING */
> > > +
> > > #endif /* _LINUX_PGALLOC_TAG_H */
> > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> > > index b73d3248d976..f7e6d9564a49 100644
> > > --- a/mm/mempolicy.c
> > > +++ b/mm/mempolicy.c
> > > @@ -2249,7 +2249,7 @@ EXPORT_SYMBOL(vma_alloc_folio);
> > > * flags are used.
> > > * Return: The page on success or NULL if allocation fails.
> > > */
> > > -struct page *alloc_pages(gfp_t gfp, unsigned order)
> > > +struct page *_alloc_pages(gfp_t gfp, unsigned int order)
> > > {
> > > struct mempolicy *pol = &default_policy;
> > > struct page *page;
> > > @@ -2273,7 +2273,7 @@ struct page *alloc_pages(gfp_t gfp, unsigned order)
> > >
> > > return page;
> > > }
> > > -EXPORT_SYMBOL(alloc_pages);
> > > +EXPORT_SYMBOL(_alloc_pages);
> > >
> > > struct folio *folio_alloc(gfp_t gfp, unsigned order)
> > > {
> > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > > index e5486d47406e..165daba19e2a 100644
> > > --- a/mm/page_alloc.c
> > > +++ b/mm/page_alloc.c
> > > @@ -763,6 +763,7 @@ static inline bool pcp_allowed_order(unsigned int order)
> > >
> > > static inline void free_the_page(struct page *page, unsigned int order)
> > > {
> > > +
> > > if (pcp_allowed_order(order)) /* Via pcp? */
> > > free_unref_page(page, order);
> > > else
> >
> > Spurious wide-space change.

Ack.

> >
> > --
> > Mel Gorman
> > SUSE Labs

2022-08-31 16:27:55

by Suren Baghdasaryan

[permalink] [raw]
Subject: Re: [RFC PATCH 10/30] mm: enable page allocation tagging for __get_free_pages and alloc_pages

On Wed, Aug 31, 2022 at 3:11 AM Mel Gorman <[email protected]> wrote:
>
> On Tue, Aug 30, 2022 at 02:48:59PM -0700, Suren Baghdasaryan wrote:
> > Redefine alloc_pages, __get_free_pages to record allocations done by
> > these functions. Instrument deallocation hooks to record object freeing.
> >
> > Signed-off-by: Suren Baghdasaryan <[email protected]>
> > +#ifdef CONFIG_PAGE_ALLOC_TAGGING
> > +
> > #include <linux/alloc_tag.h>
> > #include <linux/page_ext.h>
> >
> > @@ -25,4 +27,37 @@ static inline void pgalloc_tag_dec(struct page *page, unsigned int order)
> > alloc_tag_sub(get_page_tag_ref(page), PAGE_SIZE << order);
> > }
> >
> > +/*
> > + * Redefinitions of the common page allocators/destructors
> > + */
> > +#define pgtag_alloc_pages(gfp, order) \
> > +({ \
> > + struct page *_page = _alloc_pages((gfp), (order)); \
> > + \
> > + if (_page) \
> > + alloc_tag_add(get_page_tag_ref(_page), PAGE_SIZE << (order));\
> > + _page; \
> > +})
> > +
>
> Instead of renaming alloc_pages, why is the tagging not done in
> __alloc_pages()? At least __alloc_pages_bulk() is also missed. The branch
> can be guarded with IS_ENABLED.

Hmm. Assuming all the other allocators using __alloc_pages are inlined, that
should work. I'll try that and if that works will incorporate in the
next respin.
Thanks!

I don't think IS_ENABLED is required because the tagging functions are already
defined as empty if the appropriate configs are not enabled. Unless I
misunderstood
your node.

>
> > +#define pgtag_get_free_pages(gfp_mask, order) \
> > +({ \
> > + struct page *_page; \
> > + unsigned long _res = _get_free_pages((gfp_mask), (order), &_page);\
> > + \
> > + if (_res) \
> > + alloc_tag_add(get_page_tag_ref(_page), PAGE_SIZE << (order));\
> > + _res; \
> > +})
> > +
>
> Similar, the tagging could happen in a core function instead of a wrapper.
>
> > +#else /* CONFIG_PAGE_ALLOC_TAGGING */
> > +
> > +#define pgtag_alloc_pages(gfp, order) _alloc_pages(gfp, order)
> > +
> > +#define pgtag_get_free_pages(gfp_mask, order) \
> > + _get_free_pages((gfp_mask), (order), NULL)
> > +
> > +#define pgalloc_tag_dec(__page, __size) do {} while (0)
> > +
> > +#endif /* CONFIG_PAGE_ALLOC_TAGGING */
> > +
> > #endif /* _LINUX_PGALLOC_TAG_H */
> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> > index b73d3248d976..f7e6d9564a49 100644
> > --- a/mm/mempolicy.c
> > +++ b/mm/mempolicy.c
> > @@ -2249,7 +2249,7 @@ EXPORT_SYMBOL(vma_alloc_folio);
> > * flags are used.
> > * Return: The page on success or NULL if allocation fails.
> > */
> > -struct page *alloc_pages(gfp_t gfp, unsigned order)
> > +struct page *_alloc_pages(gfp_t gfp, unsigned int order)
> > {
> > struct mempolicy *pol = &default_policy;
> > struct page *page;
> > @@ -2273,7 +2273,7 @@ struct page *alloc_pages(gfp_t gfp, unsigned order)
> >
> > return page;
> > }
> > -EXPORT_SYMBOL(alloc_pages);
> > +EXPORT_SYMBOL(_alloc_pages);
> >
> > struct folio *folio_alloc(gfp_t gfp, unsigned order)
> > {
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index e5486d47406e..165daba19e2a 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -763,6 +763,7 @@ static inline bool pcp_allowed_order(unsigned int order)
> >
> > static inline void free_the_page(struct page *page, unsigned int order)
> > {
> > +
> > if (pcp_allowed_order(order)) /* Via pcp? */
> > free_unref_page(page, order);
> > else
>
> Spurious wide-space change.
>
> --
> Mel Gorman
> SUSE Labs

2022-08-31 18:16:37

by Kent Overstreet

[permalink] [raw]
Subject: Re: [RFC PATCH 10/30] mm: enable page allocation tagging for __get_free_pages and alloc_pages

On Wed, Aug 31, 2022 at 11:11:03AM +0100, Mel Gorman wrote:
> On Tue, Aug 30, 2022 at 02:48:59PM -0700, Suren Baghdasaryan wrote:
> > Redefine alloc_pages, __get_free_pages to record allocations done by
> > these functions. Instrument deallocation hooks to record object freeing.
> >
> > Signed-off-by: Suren Baghdasaryan <[email protected]>
> > +#ifdef CONFIG_PAGE_ALLOC_TAGGING
> > +
> > #include <linux/alloc_tag.h>
> > #include <linux/page_ext.h>
> >
> > @@ -25,4 +27,37 @@ static inline void pgalloc_tag_dec(struct page *page, unsigned int order)
> > alloc_tag_sub(get_page_tag_ref(page), PAGE_SIZE << order);
> > }
> >
> > +/*
> > + * Redefinitions of the common page allocators/destructors
> > + */
> > +#define pgtag_alloc_pages(gfp, order) \
> > +({ \
> > + struct page *_page = _alloc_pages((gfp), (order)); \
> > + \
> > + if (_page) \
> > + alloc_tag_add(get_page_tag_ref(_page), PAGE_SIZE << (order));\
> > + _page; \
> > +})
> > +
>
> Instead of renaming alloc_pages, why is the tagging not done in
> __alloc_pages()? At least __alloc_pages_bulk() is also missed. The branch
> can be guarded with IS_ENABLED.

It can't be in a function, it has to be in a wrapper macro.

alloc_tag_add() is a macro that defines a static struct in a special elf
section. That struct holds the allocation counters, and putting it in a special
elf section is how the code to list it in debugfs finds it.

Look at the dynamic debug code for prior precedence for this trick in the kernel
- that's how it makes pr_debug() calls dynamically controllable at runtime, from
debugfs. We're taking that method and turning it into a proper library.

Because all the counters are statically allocated, without even a pointer deref
to get to them in the allocation path (one pointer deref to get to them in the
deallocate path), that makes this _much, much_ cheaper than anything that could
be done with tracing - cheap enough that I expect many users will want to enable
it in production.

2022-09-01 01:29:30

by Suren Baghdasaryan

[permalink] [raw]
Subject: Re: [RFC PATCH 10/30] mm: enable page allocation tagging for __get_free_pages and alloc_pages

On Wed, Aug 31, 2022 at 10:46 AM Kent Overstreet
<[email protected]> wrote:
>
> On Wed, Aug 31, 2022 at 11:11:03AM +0100, Mel Gorman wrote:
> > On Tue, Aug 30, 2022 at 02:48:59PM -0700, Suren Baghdasaryan wrote:
> > > Redefine alloc_pages, __get_free_pages to record allocations done by
> > > these functions. Instrument deallocation hooks to record object freeing.
> > >
> > > Signed-off-by: Suren Baghdasaryan <[email protected]>
> > > +#ifdef CONFIG_PAGE_ALLOC_TAGGING
> > > +
> > > #include <linux/alloc_tag.h>
> > > #include <linux/page_ext.h>
> > >
> > > @@ -25,4 +27,37 @@ static inline void pgalloc_tag_dec(struct page *page, unsigned int order)
> > > alloc_tag_sub(get_page_tag_ref(page), PAGE_SIZE << order);
> > > }
> > >
> > > +/*
> > > + * Redefinitions of the common page allocators/destructors
> > > + */
> > > +#define pgtag_alloc_pages(gfp, order) \
> > > +({ \
> > > + struct page *_page = _alloc_pages((gfp), (order)); \
> > > + \
> > > + if (_page) \
> > > + alloc_tag_add(get_page_tag_ref(_page), PAGE_SIZE << (order));\
> > > + _page; \
> > > +})
> > > +
> >
> > Instead of renaming alloc_pages, why is the tagging not done in
> > __alloc_pages()? At least __alloc_pages_bulk() is also missed. The branch
> > can be guarded with IS_ENABLED.
>
> It can't be in a function, it has to be in a wrapper macro.

Ah, right. __FILE__, __LINE__ and others we use to record the call
location would point to include/linux/gfp.h instead of the location
allocation is performed at.

>
> alloc_tag_add() is a macro that defines a static struct in a special elf
> section. That struct holds the allocation counters, and putting it in a special
> elf section is how the code to list it in debugfs finds it.
>
> Look at the dynamic debug code for prior precedence for this trick in the kernel
> - that's how it makes pr_debug() calls dynamically controllable at runtime, from
> debugfs. We're taking that method and turning it into a proper library.
>
> Because all the counters are statically allocated, without even a pointer deref
> to get to them in the allocation path (one pointer deref to get to them in the
> deallocate path), that makes this _much, much_ cheaper than anything that could
> be done with tracing - cheap enough that I expect many users will want to enable
> it in production.
>
> --
> To unsubscribe from this group and stop receiving emails from it, send an email to [email protected].
>

2022-09-01 07:46:02

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC PATCH 10/30] mm: enable page allocation tagging for __get_free_pages and alloc_pages

On Wed, Aug 31, 2022 at 01:46:29PM -0400, Kent Overstreet wrote:

> Because all the counters are statically allocated, without even a pointer deref
> to get to them in the allocation path (one pointer deref to get to them in the
> deallocate path), that makes this _much, much_ cheaper than anything that could
> be done with tracing - cheap enough that I expect many users will want to enable
> it in production.

You're contributing to death-by-a-thousand-cuts here. By making all this
unconditional you're putting distros in a bind. Most of their users will
likely not care about this, but if they enable it, they'll still pay the
price for having it.

Even static counters will have cache misses etc..

So yes, for the few people that actually care about this stuff, this
might be a bit faster, but IMO it gets the econimics all backwards,
you're making everybody pay the price instead of only those that care.

Also note that you can have your tracepoint based handler have
statically allocated data just fine.