2022-11-04 10:03:15

by Oscar Salvador

[permalink] [raw]
Subject: [PATCH v3 0/3] page_owner: print stacks and their counter

Changes v2 -> v3:
- Replace interface in favor of seq operations (suggested by Vlastimil)
- Use debugfs interface to store/read valued (suggested by Ammar)

Hi,

page_owner is a great debug functionality tool that gets us to know
about all pages that have been allocated/freed and their stacktrace.
This comes very handy when e.g: debugging leaks, as with some scripting
we might be able to see those stacktraces that are allocating pages
but not freeing theme.

In my experience, that is one of the most useful cases, but it can get
really tedious to screen through all pages aand try to reconstruct the
stack <-> allocated/freed relationship. There is a lot of noise
to cancel off.

This patch aims to fix that by adding a new functionality into page_owner.
What this does is to create a new read-only file "page_owner_stacks",
which prints only the allocating stacktraces and their counting, being that
the times the stacktrace has allocated - the times it has freed.

So we have a clear overview of stacks <-> allocated/freed relationship
without the need to fiddle with pages and trying to match free stacktraces
with allocated stacktraces.

This is achieved by adding a new refcount_t field in the stack_record struct,
incrementing that refcount_t everytime the same stacktrace allocates,
and decrementing it when it frees a page. Details can be seen in the
respective patches.

We also create another file called "page_owner_threshold", which let us
specify a threshold, so when when reading from "page_owner_stacks",
we will only see those stacktraces which counting goes beyond the
threshold we specified.

One thing I am not completely happy about is to polute lib/stackdepot.c file
with the stack_* functions.
We could sort that out if the stack_record struct definitions were in a header
file instead of stackdepot.c.
But I am not sure about that trade-off, so suggestions are accepted.

A PoC can be found below:

# cat /sys/kernel/debug/page_owner_threshold
0
# cat /sys/kernel/debug/page_owner_stacks > stacks_full.txt
# head -32 stacks_full.txt
prep_new_page+0x10d/0x180
get_page_from_freelist+0x1bd6/0x1e10
__alloc_pages+0x194/0x360
alloc_page_interleave+0x13/0x90
new_slab+0x31d/0x530
___slab_alloc+0x5d7/0x720
__slab_alloc.isra.85+0x4a/0x90
kmem_cache_alloc+0x455/0x4a0
acpi_ps_alloc_op+0x57/0x8f
acpi_ps_create_scope_op+0x12/0x23
acpi_ps_execute_method+0x102/0x2c1
acpi_ns_evaluate+0x343/0x4da
acpi_evaluate_object+0x1cb/0x392
acpi_run_osc+0x135/0x260
acpi_init+0x165/0x4ed
do_one_initcall+0x3e/0x200
stack count: 2

free_pcp_prepare+0x287/0x5c0
free_unref_page+0x1c/0xd0
__mmdrop+0x50/0x160
finish_task_switch+0x249/0x2b0
__schedule+0x2c3/0x960
schedule+0x44/0xb0
futex_wait_queue+0x70/0xd0
futex_wait+0x160/0x250
do_futex+0x11c/0x1b0
__x64_sys_futex+0x5e/0x1d0
do_syscall_64+0x37/0x90
entry_SYSCALL_64_after_hwframe+0x63/0xcd
stack count: 1



# echo 10000 > /sys/kernel/debug/page_owner_threshold
# cat /sys/kernel/debug/page_owner_stacks > stacks_10000.txt
# cat stacks_10000.txt
prep_new_page+0x10d/0x180
get_page_from_freelist+0x1bd6/0x1e10
__alloc_pages+0x194/0x360
folio_alloc+0x17/0x40
page_cache_ra_unbounded+0x96/0x170
filemap_get_pages+0x23d/0x5e0
filemap_read+0xbf/0x3a0
__kernel_read+0x136/0x2f0
kernel_read_file+0x197/0x2d0
kernel_read_file_from_fd+0x54/0x90
__do_sys_finit_module+0x89/0x120
do_syscall_64+0x37/0x90
entry_SYSCALL_64_after_hwframe+0x63/0xcd
stack count: 36195

prep_new_page+0x10d/0x180
get_page_from_freelist+0x1bd6/0x1e10
__alloc_pages+0x194/0x360
folio_alloc+0x17/0x40
page_cache_ra_unbounded+0x96/0x170
filemap_get_pages+0x23d/0x5e0
filemap_read+0xbf/0x3a0
new_sync_read+0x106/0x180
vfs_read+0x16f/0x190
ksys_read+0xa5/0xe0
do_syscall_64+0x37/0x90
entry_SYSCALL_64_after_hwframe+0x63/0xcd
stack count: 44484

prep_new_page+0x10d/0x180
get_page_from_freelist+0x1bd6/0x1e10
__alloc_pages+0x194/0x360
folio_alloc+0x17/0x40
page_cache_ra_unbounded+0x96/0x170
filemap_get_pages+0xdd/0x5e0
filemap_read+0xbf/0x3a0
new_sync_read+0x106/0x180
vfs_read+0x16f/0x190
ksys_read+0xa5/0xe0
do_syscall_64+0x37/0x90
entry_SYSCALL_64_after_hwframe+0x63/0xcd
stack count: 17874

Oscar Salvador (3):
lib/stackdepot: Add a refcount field in stack_record
mm, page_owner: Add page_owner_stacks file to print out only stacks
and their counte
mm,page_owner: Filter out stacks by a threshold counter

include/linux/stackdepot.h | 21 ++++-
lib/stackdepot.c | 167 +++++++++++++++++++++++++++++++++----
mm/kasan/common.c | 3 +-
mm/page_owner.c | 48 ++++++++++-
4 files changed, 219 insertions(+), 20 deletions(-)

--
2.35.3



2022-11-04 10:10:48

by Oscar Salvador

[permalink] [raw]
Subject: [PATCH v3 2/3] mm, page_owner: Add page_owner_stacks file to print out only stacks and their counte

We might be only interested in knowing about stacks <-> count
relationship, so instead of having to fiddle with page_owner
output and screen through pfns, let us add a new file called
'page_owner_stacks' that does just that.
By cating such file, we will get all the stacktraces followed by
its counter, so we can have a more global view.

Signed-off-by: Oscar Salvador <[email protected]
---
include/linux/stackdepot.h | 5 +++
lib/stackdepot.c | 73 ++++++++++++++++++++++++++++++++++++++
mm/page_owner.c | 29 +++++++++++++++
3 files changed, 107 insertions(+)

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 4e3a88f135ee..ca048a79bf7c 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -66,6 +66,11 @@ depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
unsigned int nr_entries,
gfp_t gfp_flags,
enum stack_depot_action action);
+#ifdef CONFIG_PAGE_OWNER
+void *stack_start(struct seq_file *m, loff_t *ppos);
+void *stack_next(struct seq_file *m, void *v, loff_t *ppos);
+int stack_print(struct seq_file *m, void *v);
+#endif

unsigned int stack_depot_fetch(depot_stack_handle_t handle,
unsigned long **entries);
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index a806ef58a385..97e5dce40f4b 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -33,6 +33,8 @@
#include <linux/types.h>
#include <linux/memblock.h>
#include <linux/kasan-enabled.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>

#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)

@@ -565,3 +567,74 @@ depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
return __stack_depot_save(entries, nr_entries, alloc_flags, true, action);
}
EXPORT_SYMBOL_GPL(stack_depot_save_action);
+
+#ifdef CONFIG_PAGE_OWNER
+void *stack_start(struct seq_file *m, loff_t *ppos)
+{
+ unsigned long *table = m->private;
+ struct stack_record **stacks, *stack;
+
+ /* First time */
+ if (*ppos == 0)
+ *table = 0;
+
+ if (*ppos == -1UL)
+ return NULL;
+
+ stacks = &stack_table[*table];
+ stack = (struct stack_record *)stacks;
+
+ return stack;
+}
+
+void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
+{
+ unsigned long *table = m->private;
+ unsigned long nr_table = *table;
+ struct stack_record *next = NULL, *stack = v, **stacks;
+ unsigned long stack_table_entries = stack_hash_mask + 1;
+
+ if (!stack) {
+new_table:
+ /* New table */
+ nr_table++;
+ if (nr_table >= stack_table_entries)
+ goto out;
+ stacks = &stack_table[nr_table];
+ stack = (struct stack_record *)stacks;
+ next = stack;
+ } else {
+ next = stack->next;
+ }
+
+ if (!next)
+ goto new_table;
+
+out:
+ *table = nr_table;
+ *ppos = (nr_table >= stack_table_entries) ? -1UL : *ppos + 1;
+ return next;
+}
+
+int stack_print(struct seq_file *m, void *v)
+{
+ char *buf;
+ int ret = 0;
+ struct stack_record *stack =v;
+
+ if (!stack->size || stack->size < 0 ||
+ stack->size > PAGE_SIZE || stack->handle.valid != 1 ||
+ refcount_read(&stack->count) < 1)
+ return 0;
+
+ buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ ret += stack_trace_snprint(buf, PAGE_SIZE, stack->entries, stack->size, 0);
+ scnprintf(buf + ret, PAGE_SIZE - ret, "stack count: %d\n\n",
+ refcount_read(&stack->count));
+ seq_printf(m, buf);
+ seq_puts(m, "\n\n");
+ kfree(buf);
+
+ return 0;
+}
+#endif
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 8730f377fa91..3808c0985060 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -668,6 +668,32 @@ static const struct file_operations proc_page_owner_operations = {
.read = read_page_owner,
};

+static void stack_stop(struct seq_file *m, void *v)
+{
+ return;
+}
+
+static const struct seq_operations page_owner_stack_op = {
+ .start = stack_start,
+ .next = stack_next,
+ .stop = stack_stop,
+ .show = stack_print
+};
+
+static int page_owner_stack_open(struct inode *inode, struct file *file)
+{
+ return seq_open_private(file, &page_owner_stack_op,
+ sizeof(unsigned long));
+}
+
+
+const struct file_operations page_owner_stack_operations = {
+ .open = page_owner_stack_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static int __init pageowner_init(void)
{
if (!static_branch_unlikely(&page_owner_inited)) {
@@ -678,6 +704,9 @@ static int __init pageowner_init(void)
debugfs_create_file("page_owner", 0400, NULL, NULL,
&proc_page_owner_operations);

+ debugfs_create_file("page_owner_stacks", S_IRUSR, NULL, NULL,
+ &page_owner_stack_operations);
+
return 0;
}
late_initcall(pageowner_init)
--
2.35.3