2022-09-05 03:35:17

by Oscar Salvador

[permalink] [raw]
Subject: [PATCH v2 1/3] lib/stackdepot: Add a refcount field in stack_record

We want to filter out page_owner output and print only those
stacks that have been repeated beyond a certain threshold.
This gives us the chance to get rid of a lot of noise.
In order to do that, we need to keep track of how many repeated stacks
(for allocation) do we have, so we add a new refcount_t field
in the stack_record struct.

Note that this might increase the size of the struct for some
architectures.
E.g: x86_64 is not affected due to alignment, but x86 32bits might.

The alternative would be to have some kind of struct like this:

struct track_stacks {
struct stack_record *stack;
struct track_stacks *next;
refcount_t stack_count;

But ithat would imply to perform more allocations and glue everything
together, which would make the code more complex, so I think that
going with a new field in the struct stack_record is good enough.

Note that on __set_page_owner_handle(), page_owner->handle is set,
and on __reset_page_owner(), page_owner->free_handle is set.

We are interested in page_owner->handle, so when __set_page_owner()
gets called, we derive the stack_record struct from page_owner->handle,
and we increment its refcount_t field; and when __reset_page_owner()
gets called, we derive its stack_record from page_owner->handle()
and we decrement its refcount_t field.

Signed-off-by: Oscar Salvador <[email protected]>
---
include/linux/stackdepot.h | 13 ++++++-
lib/stackdepot.c | 79 +++++++++++++++++++++++++++++++-------
mm/kasan/common.c | 3 +-
mm/page_owner.c | 14 +++++--
4 files changed, 89 insertions(+), 20 deletions(-)

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index bc2797955de9..4e3a88f135ee 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -15,9 +15,16 @@

typedef u32 depot_stack_handle_t;

+enum stack_depot_action {
+ STACK_DEPOT_ACTION_NONE,
+ STACK_DEPOT_ACTION_COUNT,
+};
+
depot_stack_handle_t __stack_depot_save(unsigned long *entries,
unsigned int nr_entries,
- gfp_t gfp_flags, bool can_alloc);
+ gfp_t gfp_flags, bool can_alloc,
+ enum stack_depot_action action);
+void stack_depot_dec_count(depot_stack_handle_t handle);

/*
* Every user of stack depot has to call stack_depot_init() during its own init
@@ -55,6 +62,10 @@ static inline int stack_depot_early_init(void) { return 0; }

depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned int nr_entries, gfp_t gfp_flags);
+depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
+ unsigned int nr_entries,
+ gfp_t gfp_flags,
+ enum stack_depot_action action);

unsigned int stack_depot_fetch(depot_stack_handle_t handle,
unsigned long **entries);
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index e73fda23388d..a806ef58a385 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -64,6 +64,7 @@ struct stack_record {
u32 hash; /* Hash in the hastable */
u32 size; /* Number of frames in the stack */
union handle_parts handle;
+ refcount_t count; /* Number of the same repeated stacks */
unsigned long entries[]; /* Variable-sized array of entries. */
};

@@ -140,6 +141,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
stack->handle.slabindex = depot_index;
stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
stack->handle.valid = 1;
+ refcount_set(&stack->count, 1);
memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
depot_offset += required_size;

@@ -341,6 +343,29 @@ void stack_depot_print(depot_stack_handle_t stack)
}
EXPORT_SYMBOL_GPL(stack_depot_print);

+static struct stack_record *stack_depot_getstack(depot_stack_handle_t handle)
+{
+ union handle_parts parts = { .handle = handle };
+ void *slab;
+ size_t offset = parts.offset << STACK_ALLOC_ALIGN;
+ struct stack_record *stack;
+
+ if(!handle)
+ return NULL;
+
+ if (parts.slabindex > depot_index) {
+ WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
+ parts.slabindex, depot_index, handle);
+ return NULL;
+ }
+ slab = stack_slabs[parts.slabindex];
+ if (!slab)
+ return NULL;
+
+ stack = slab + offset;
+ return stack;
+}
+
/**
* stack_depot_fetch - Fetch stack entries from a depot
*
@@ -353,30 +378,42 @@ EXPORT_SYMBOL_GPL(stack_depot_print);
unsigned int stack_depot_fetch(depot_stack_handle_t handle,
unsigned long **entries)
{
- union handle_parts parts = { .handle = handle };
- void *slab;
- size_t offset = parts.offset << STACK_ALLOC_ALIGN;
struct stack_record *stack;

*entries = NULL;
if (!handle)
return 0;

- if (parts.slabindex > depot_index) {
- WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
- parts.slabindex, depot_index, handle);
- return 0;
- }
- slab = stack_slabs[parts.slabindex];
- if (!slab)
+ stack = stack_depot_getstack(handle);
+ if (!stack)
return 0;
- stack = slab + offset;

*entries = stack->entries;
return stack->size;
}
EXPORT_SYMBOL_GPL(stack_depot_fetch);

+static void stack_depot_inc_count(struct stack_record *stack)
+{
+ refcount_inc(&stack->count);
+}
+
+void stack_depot_dec_count(depot_stack_handle_t handle)
+{
+ struct stack_record *stack = NULL;
+
+ stack = stack_depot_getstack(handle);
+ if (stack) {
+ /*
+ * page_owner creates some stacks via create_dummy_stack().
+ * We are not interested in those, so make sure we only decrement
+ * "valid" stacks.
+ */
+ if (refcount_read(&stack->count) > 1)
+ refcount_dec(&stack->count);
+ }
+}
+
/**
* __stack_depot_save - Save a stack trace from an array
*
@@ -402,7 +439,8 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
*/
depot_stack_handle_t __stack_depot_save(unsigned long *entries,
unsigned int nr_entries,
- gfp_t alloc_flags, bool can_alloc)
+ gfp_t alloc_flags, bool can_alloc,
+ enum stack_depot_action action)
{
struct stack_record *found = NULL, **bucket;
depot_stack_handle_t retval = 0;
@@ -488,8 +526,11 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
/* Nobody used this memory, ok to free it. */
free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
}
- if (found)
+ if (found) {
retval = found->handle.handle;
+ if (action == STACK_DEPOT_ACTION_COUNT)
+ stack_depot_inc_count(found);
+ }
fast_exit:
return retval;
}
@@ -511,6 +552,16 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned int nr_entries,
gfp_t alloc_flags)
{
- return __stack_depot_save(entries, nr_entries, alloc_flags, true);
+ return __stack_depot_save(entries, nr_entries, alloc_flags, true,
+ STACK_DEPOT_ACTION_NONE);
}
EXPORT_SYMBOL_GPL(stack_depot_save);
+
+depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
+ unsigned int nr_entries,
+ gfp_t alloc_flags,
+ enum stack_depot_action action)
+{
+ return __stack_depot_save(entries, nr_entries, alloc_flags, true, action);
+}
+EXPORT_SYMBOL_GPL(stack_depot_save_action);
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 69f583855c8b..8077c6e70815 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,7 +36,8 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc)
unsigned int nr_entries;

nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
- return __stack_depot_save(entries, nr_entries, flags, can_alloc);
+ return __stack_depot_save(entries, nr_entries, flags, can_alloc,
+ STACK_DEPOT_ACTION_NONE);
}

void kasan_set_track(struct kasan_track *track, gfp_t flags)
diff --git a/mm/page_owner.c b/mm/page_owner.c
index e4c6f3f1695b..8730f377fa91 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -106,7 +106,8 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
return (void *)page_ext + page_owner_ops.offset;
}

-static noinline depot_stack_handle_t save_stack(gfp_t flags)
+static noinline depot_stack_handle_t save_stack(gfp_t flags,
+ enum stack_depot_action action)
{
unsigned long entries[PAGE_OWNER_STACK_DEPTH];
depot_stack_handle_t handle;
@@ -125,7 +126,7 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
current->in_page_owner = 1;

nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
- handle = stack_depot_save(entries, nr_entries, flags);
+ handle = stack_depot_save_action(entries, nr_entries, flags, action);
if (!handle)
handle = failure_handle;

@@ -138,6 +139,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
int i;
struct page_ext *page_ext;
depot_stack_handle_t handle;
+ depot_stack_handle_t alloc_handle;
struct page_owner *page_owner;
u64 free_ts_nsec = local_clock();

@@ -145,7 +147,10 @@ void __reset_page_owner(struct page *page, unsigned short order)
if (unlikely(!page_ext))
return;

- handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
+ page_owner = get_page_owner(page_ext);
+ alloc_handle = page_owner->handle;
+
+ handle = save_stack(GFP_NOWAIT | __GFP_NOWARN, STACK_DEPOT_ACTION_NONE);
for (i = 0; i < (1 << order); i++) {
__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
page_owner = get_page_owner(page_ext);
@@ -153,6 +158,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
page_owner->free_ts_nsec = free_ts_nsec;
page_ext = page_ext_next(page_ext);
}
+ stack_depot_dec_count(alloc_handle);
}

static inline void __set_page_owner_handle(struct page_ext *page_ext,
@@ -189,7 +195,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
if (unlikely(!page_ext))
return;

- handle = save_stack(gfp_mask);
+ handle = save_stack(gfp_mask, STACK_DEPOT_ACTION_COUNT);
__set_page_owner_handle(page_ext, handle, order, gfp_mask);
}

--
2.35.3


2022-09-05 21:40:37

by Andrey Konovalov

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] lib/stackdepot: Add a refcount field in stack_record

On Mon, Sep 5, 2022 at 5:10 AM Oscar Salvador <[email protected]> wrote:
>
> We want to filter out page_owner output and print only those
> stacks that have been repeated beyond a certain threshold.
> This gives us the chance to get rid of a lot of noise.
> In order to do that, we need to keep track of how many repeated stacks
> (for allocation) do we have, so we add a new refcount_t field
> in the stack_record struct.
>
> Note that this might increase the size of the struct for some
> architectures.
> E.g: x86_64 is not affected due to alignment, but x86 32bits might.
>
> The alternative would be to have some kind of struct like this:
>
> struct track_stacks {
> struct stack_record *stack;
> struct track_stacks *next;
> refcount_t stack_count;
>
> But ithat would imply to perform more allocations and glue everything
> together, which would make the code more complex, so I think that
> going with a new field in the struct stack_record is good enough.
>
> Note that on __set_page_owner_handle(), page_owner->handle is set,
> and on __reset_page_owner(), page_owner->free_handle is set.
>
> We are interested in page_owner->handle, so when __set_page_owner()
> gets called, we derive the stack_record struct from page_owner->handle,
> and we increment its refcount_t field; and when __reset_page_owner()
> gets called, we derive its stack_record from page_owner->handle()
> and we decrement its refcount_t field.
>
> Signed-off-by: Oscar Salvador <[email protected]>
> ---
> include/linux/stackdepot.h | 13 ++++++-
> lib/stackdepot.c | 79 +++++++++++++++++++++++++++++++-------
> mm/kasan/common.c | 3 +-
> mm/page_owner.c | 14 +++++--
> 4 files changed, 89 insertions(+), 20 deletions(-)
>
> diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
> index bc2797955de9..4e3a88f135ee 100644
> --- a/include/linux/stackdepot.h
> +++ b/include/linux/stackdepot.h
> @@ -15,9 +15,16 @@
>
> typedef u32 depot_stack_handle_t;
>
> +enum stack_depot_action {
> + STACK_DEPOT_ACTION_NONE,
> + STACK_DEPOT_ACTION_COUNT,
> +};

Hi Oscar,

Why do we need these actions? Why not just increment the refcount on
each stack trace save?

> +
> depot_stack_handle_t __stack_depot_save(unsigned long *entries,
> unsigned int nr_entries,
> - gfp_t gfp_flags, bool can_alloc);
> + gfp_t gfp_flags, bool can_alloc,
> + enum stack_depot_action action);
> +void stack_depot_dec_count(depot_stack_handle_t handle);
>
> /*
> * Every user of stack depot has to call stack_depot_init() during its own init
> @@ -55,6 +62,10 @@ static inline int stack_depot_early_init(void) { return 0; }
>
> depot_stack_handle_t stack_depot_save(unsigned long *entries,
> unsigned int nr_entries, gfp_t gfp_flags);
> +depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
> + unsigned int nr_entries,
> + gfp_t gfp_flags,
> + enum stack_depot_action action);
>
> unsigned int stack_depot_fetch(depot_stack_handle_t handle,
> unsigned long **entries);
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index e73fda23388d..a806ef58a385 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -64,6 +64,7 @@ struct stack_record {
> u32 hash; /* Hash in the hastable */
> u32 size; /* Number of frames in the stack */
> union handle_parts handle;
> + refcount_t count; /* Number of the same repeated stacks */
> unsigned long entries[]; /* Variable-sized array of entries. */
> };
>
> @@ -140,6 +141,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
> stack->handle.slabindex = depot_index;
> stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
> stack->handle.valid = 1;
> + refcount_set(&stack->count, 1);
> memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
> depot_offset += required_size;
>
> @@ -341,6 +343,29 @@ void stack_depot_print(depot_stack_handle_t stack)
> }
> EXPORT_SYMBOL_GPL(stack_depot_print);
>
> +static struct stack_record *stack_depot_getstack(depot_stack_handle_t handle)
> +{
> + union handle_parts parts = { .handle = handle };
> + void *slab;
> + size_t offset = parts.offset << STACK_ALLOC_ALIGN;
> + struct stack_record *stack;
> +
> + if(!handle)
> + return NULL;
> +
> + if (parts.slabindex > depot_index) {
> + WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
> + parts.slabindex, depot_index, handle);
> + return NULL;
> + }
> + slab = stack_slabs[parts.slabindex];
> + if (!slab)
> + return NULL;
> +
> + stack = slab + offset;
> + return stack;
> +}
> +
> /**
> * stack_depot_fetch - Fetch stack entries from a depot
> *
> @@ -353,30 +378,42 @@ EXPORT_SYMBOL_GPL(stack_depot_print);
> unsigned int stack_depot_fetch(depot_stack_handle_t handle,
> unsigned long **entries)
> {
> - union handle_parts parts = { .handle = handle };
> - void *slab;
> - size_t offset = parts.offset << STACK_ALLOC_ALIGN;
> struct stack_record *stack;
>
> *entries = NULL;
> if (!handle)
> return 0;
>
> - if (parts.slabindex > depot_index) {
> - WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
> - parts.slabindex, depot_index, handle);
> - return 0;
> - }
> - slab = stack_slabs[parts.slabindex];
> - if (!slab)
> + stack = stack_depot_getstack(handle);
> + if (!stack)
> return 0;
> - stack = slab + offset;
>
> *entries = stack->entries;
> return stack->size;
> }
> EXPORT_SYMBOL_GPL(stack_depot_fetch);
>
> +static void stack_depot_inc_count(struct stack_record *stack)
> +{
> + refcount_inc(&stack->count);
> +}
> +
> +void stack_depot_dec_count(depot_stack_handle_t handle)
> +{
> + struct stack_record *stack = NULL;
> +
> + stack = stack_depot_getstack(handle);
> + if (stack) {
> + /*
> + * page_owner creates some stacks via create_dummy_stack().
> + * We are not interested in those, so make sure we only decrement
> + * "valid" stacks.
> + */
> + if (refcount_read(&stack->count) > 1)
> + refcount_dec(&stack->count);
> + }
> +}
> +
> /**
> * __stack_depot_save - Save a stack trace from an array
> *
> @@ -402,7 +439,8 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
> */
> depot_stack_handle_t __stack_depot_save(unsigned long *entries,
> unsigned int nr_entries,
> - gfp_t alloc_flags, bool can_alloc)
> + gfp_t alloc_flags, bool can_alloc,
> + enum stack_depot_action action)
> {
> struct stack_record *found = NULL, **bucket;
> depot_stack_handle_t retval = 0;
> @@ -488,8 +526,11 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
> /* Nobody used this memory, ok to free it. */
> free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
> }
> - if (found)
> + if (found) {
> retval = found->handle.handle;
> + if (action == STACK_DEPOT_ACTION_COUNT)
> + stack_depot_inc_count(found);
> + }
> fast_exit:
> return retval;
> }
> @@ -511,6 +552,16 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
> unsigned int nr_entries,
> gfp_t alloc_flags)
> {
> - return __stack_depot_save(entries, nr_entries, alloc_flags, true);
> + return __stack_depot_save(entries, nr_entries, alloc_flags, true,
> + STACK_DEPOT_ACTION_NONE);
> }
> EXPORT_SYMBOL_GPL(stack_depot_save);
> +
> +depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
> + unsigned int nr_entries,
> + gfp_t alloc_flags,
> + enum stack_depot_action action)
> +{
> + return __stack_depot_save(entries, nr_entries, alloc_flags, true, action);
> +}
> +EXPORT_SYMBOL_GPL(stack_depot_save_action);
> diff --git a/mm/kasan/common.c b/mm/kasan/common.c
> index 69f583855c8b..8077c6e70815 100644
> --- a/mm/kasan/common.c
> +++ b/mm/kasan/common.c
> @@ -36,7 +36,8 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc)
> unsigned int nr_entries;
>
> nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
> - return __stack_depot_save(entries, nr_entries, flags, can_alloc);
> + return __stack_depot_save(entries, nr_entries, flags, can_alloc,
> + STACK_DEPOT_ACTION_NONE);
> }
>
> void kasan_set_track(struct kasan_track *track, gfp_t flags)
> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index e4c6f3f1695b..8730f377fa91 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c

Could you split out the stack depot and the page_owner changes into
separate patches?

> @@ -106,7 +106,8 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
> return (void *)page_ext + page_owner_ops.offset;
> }
>
> -static noinline depot_stack_handle_t save_stack(gfp_t flags)
> +static noinline depot_stack_handle_t save_stack(gfp_t flags,
> + enum stack_depot_action action)
> {
> unsigned long entries[PAGE_OWNER_STACK_DEPTH];
> depot_stack_handle_t handle;
> @@ -125,7 +126,7 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
> current->in_page_owner = 1;
>
> nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
> - handle = stack_depot_save(entries, nr_entries, flags);
> + handle = stack_depot_save_action(entries, nr_entries, flags, action);
> if (!handle)
> handle = failure_handle;
>
> @@ -138,6 +139,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
> int i;
> struct page_ext *page_ext;
> depot_stack_handle_t handle;
> + depot_stack_handle_t alloc_handle;
> struct page_owner *page_owner;
> u64 free_ts_nsec = local_clock();
>
> @@ -145,7 +147,10 @@ void __reset_page_owner(struct page *page, unsigned short order)
> if (unlikely(!page_ext))
> return;
>
> - handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
> + page_owner = get_page_owner(page_ext);
> + alloc_handle = page_owner->handle;
> +
> + handle = save_stack(GFP_NOWAIT | __GFP_NOWARN, STACK_DEPOT_ACTION_NONE);
> for (i = 0; i < (1 << order); i++) {
> __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
> page_owner = get_page_owner(page_ext);
> @@ -153,6 +158,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
> page_owner->free_ts_nsec = free_ts_nsec;
> page_ext = page_ext_next(page_ext);
> }
> + stack_depot_dec_count(alloc_handle);
> }
>
> static inline void __set_page_owner_handle(struct page_ext *page_ext,
> @@ -189,7 +195,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
> if (unlikely(!page_ext))
> return;
>
> - handle = save_stack(gfp_mask);
> + handle = save_stack(gfp_mask, STACK_DEPOT_ACTION_COUNT);
> __set_page_owner_handle(page_ext, handle, order, gfp_mask);
> }
>
> --
> 2.35.3
>

2022-09-06 03:57:48

by Oscar Salvador

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] lib/stackdepot: Add a refcount field in stack_record

On Mon, Sep 05, 2022 at 10:57:20PM +0200, Andrey Konovalov wrote:
> On Mon, Sep 5, 2022 at 5:10 AM Oscar Salvador <[email protected]> wrote:
> > +enum stack_depot_action {
> > + STACK_DEPOT_ACTION_NONE,
> > + STACK_DEPOT_ACTION_COUNT,
> > +};
>
> Hi Oscar,

Hi Andrey

> Why do we need these actions? Why not just increment the refcount on
> each stack trace save?

Let me try to explain it.

Back in RFC, there were no actions and the refcount
was incremented/decremented in __set_page_ownwer()
and __reset_page_owner() functions.

This lead to a performance "problem", where you would
look for the stack twice, one when save it
and one when increment it.

We figured we could do better and, at least, for the __set_page_owner()
we could look just once for the stacktrace when calling __stack_depot_save,
and increment it directly there.

We cannot do that for __reset_page_owner(), because the stack we are
saving is the freeing stacktrace, and we are not interested in that.
That is why __reset_page_owner() does:

<---
depot_stack_handle_t alloc_handle;

...
alloc_handle = page_owner->handle;
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN, STACK_DEPOT_ACTION_NONE);
page_owner->free_handle = handle
stack_depot_dec_count(alloc_handle);
--->

alloc_handle contains the handle for the allocation stacktrace, which was set
in __set_page_owner(), and page_owner->free handle contains the handle for the
freeing stacktrace.
But we are only interested in the allocation stack and we only want to increment/decrement
that on allocation/free.


> Could you split out the stack depot and the page_owner changes into
> separate patches?

I could, I am not sure if it would make the review any easier though,
as you could not match stackdepot <-> page_owner changes.

And we should be adding a bunch of code that would not be used till later on.
But I can try it out if there is a strong opinion.

thanks for your time!


--
Oscar Salvador
SUSE Labs

2022-09-10 23:01:21

by Andrey Konovalov

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] lib/stackdepot: Add a refcount field in stack_record

On Tue, Sep 6, 2022 at 5:54 AM Oscar Salvador <[email protected]> wrote:
>
> On Mon, Sep 05, 2022 at 10:57:20PM +0200, Andrey Konovalov wrote:
> > On Mon, Sep 5, 2022 at 5:10 AM Oscar Salvador <[email protected]> wrote:
> > > +enum stack_depot_action {
> > > + STACK_DEPOT_ACTION_NONE,
> > > + STACK_DEPOT_ACTION_COUNT,
> > > +};
> >
> > Hi Oscar,
>
> Hi Andrey
>
> > Why do we need these actions? Why not just increment the refcount on
> > each stack trace save?
>
> Let me try to explain it.
>
> Back in RFC, there were no actions and the refcount
> was incremented/decremented in __set_page_ownwer()
> and __reset_page_owner() functions.
>
> This lead to a performance "problem", where you would
> look for the stack twice, one when save it
> and one when increment it.

I don't get this. If you increment the refcount at the same moment
when you save a stack trace, why do you need to do the lookup twice?

> We figured we could do better and, at least, for the __set_page_owner()
> we could look just once for the stacktrace when calling __stack_depot_save,
> and increment it directly there.

Exactly.

> We cannot do that for __reset_page_owner(), because the stack we are
> saving is the freeing stacktrace, and we are not interested in that.
> That is why __reset_page_owner() does:
>
> <---
> depot_stack_handle_t alloc_handle;
>
> ...
> alloc_handle = page_owner->handle;
> handle = save_stack(GFP_NOWAIT | __GFP_NOWARN, STACK_DEPOT_ACTION_NONE);
> page_owner->free_handle = handle
> stack_depot_dec_count(alloc_handle);
> --->
>
> alloc_handle contains the handle for the allocation stacktrace, which was set
> in __set_page_owner(), and page_owner->free handle contains the handle for the
> freeing stacktrace.
> But we are only interested in the allocation stack and we only want to increment/decrement
> that on allocation/free.

But what is the problem with incrementing the refcount for free
stacktrace in __reset_page_owner? You save the stack there anyway.

Or is this because you don't want to see free stack traces when
filtering for bigger refcounts? I would argue that this is not a thing
stack depot should care about. If there's a refcount, it should
reflect the true number of references.

Perhaps, what you need is some kind of per-stack trace user metadata.
And the details of what format this metadata takes shouldn't be
present in the stack depot code.

> > Could you split out the stack depot and the page_owner changes into
> > separate patches?
>
> I could, I am not sure if it would make the review any easier though,
> as you could not match stackdepot <-> page_owner changes.
>
> And we should be adding a bunch of code that would not be used till later on.
> But I can try it out if there is a strong opinion.

Yes, splitting would be quite useful. It allows backporting stack
depot changes without having to backport any page_owner code. As long
as there are not breaking interface changes, of course.

Thanks!

2022-09-19 15:21:04

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] lib/stackdepot: Add a refcount field in stack_record

On 9/11/22 00:33, Andrey Konovalov wrote:
>> We cannot do that for __reset_page_owner(), because the stack we are
>> saving is the freeing stacktrace, and we are not interested in that.
>> That is why __reset_page_owner() does:
>>
>> <---
>> depot_stack_handle_t alloc_handle;
>>
>> ...
>> alloc_handle = page_owner->handle;
>> handle = save_stack(GFP_NOWAIT | __GFP_NOWARN, STACK_DEPOT_ACTION_NONE);
>> page_owner->free_handle = handle
>> stack_depot_dec_count(alloc_handle);
>> --->
>>
>> alloc_handle contains the handle for the allocation stacktrace, which was set
>> in __set_page_owner(), and page_owner->free handle contains the handle for the
>> freeing stacktrace.
>> But we are only interested in the allocation stack and we only want to increment/decrement
>> that on allocation/free.
>
> But what is the problem with incrementing the refcount for free
> stacktrace in __reset_page_owner? You save the stack there anyway.
>
> Or is this because you don't want to see free stack traces when
> filtering for bigger refcounts? I would argue that this is not a thing
> stack depot should care about. If there's a refcount, it should
> reflect the true number of references.

But to keep this really a "true number" we would have to now decrement the
counter when the page gets freed by a different stack trace, i.e. we replace
the previously stored free_handle with another one. Which is possible, but
seems like a waste of CPU?

> Perhaps, what you need is some kind of per-stack trace user metadata.
> And the details of what format this metadata takes shouldn't be
> present in the stack depot code.

I was thinking ideally we might want fully separate stackdepot storages
per-stack trace user, i.e. separate hashmaps and dynamically allocated
handles instead of the static "slabs" array. Different users tend to have
distinct stacks so that would make lookups more effective too. Only
CONFIG_STACKDEPOT_ALWAYS_INIT users (KASAN) would keep using the static
array due to their inherent limitations wrt dynamic allocations.

Then these different stackdepot instances could be optionally refcounted or
not, and if need be, have additional metadata as you suggest.

>> > Could you split out the stack depot and the page_owner changes into
>> > separate patches?
>>
>> I could, I am not sure if it would make the review any easier though,
>> as you could not match stackdepot <-> page_owner changes.
>>
>> And we should be adding a bunch of code that would not be used till later on.
>> But I can try it out if there is a strong opinion.
>
> Yes, splitting would be quite useful. It allows backporting stack
> depot changes without having to backport any page_owner code. As long
> as there are not breaking interface changes, of course.
>
> Thanks!
>