This series of patches proposes use of STACKDEPOT to
store user (SLAB_STORE_USER) information of a slub object.
As stack traces corresponding to each unique allocation
and freeing context can be saved and retrieved from STACKDEPOT,
we can reduce size of each object (~256 bytes) and hence save
memory without losing any information.
*PATCH-1: Makes STACKDEPOT explicitly configurable, so that
it can be enabled for storing allocation/freeing stack traces.
*PATCH-2: Uses STACKDEPOT to store allocation/freeing context
for a slub object.
I have marked this series as RFC, so that I can get feedback
about this change, because this change just involves debugging
framework and does not add any value to production scenarios.
Imran Khan (2):
lib, stackdepot: Add input prompt for STACKDEPOT option.
mm, slub: Use stackdepot to store user information for slub object.
lib/Kconfig | 3 +-
mm/slub.c | 87 +++++++++++++++++++++++++++++------------------------
2 files changed, 50 insertions(+), 40 deletions(-)
--
2.25.1
SLAB_STORE_USER causes information about allocating and freeing context
of a slub object, to be stored in metadata area in a couple of struct
track objects. These objects store allocation and/or freeing stack trace
in an array. This may result in same stack trace getting stored in metadata
area of multiple objects.
STACKDEPOT can be used to store unique stack traces without any
duplication,so use STACKDEPOT to store allocation and/or freeing stack
traces as well.
This results in low memory footprint, as we are not storing multiple
copies of the same stack trace for an allocation or free.
Signed-off-by: Imran Khan <[email protected]>
---
mm/slub.c | 87 ++++++++++++++++++++++++++++++-------------------------
1 file changed, 48 insertions(+), 39 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c
index df1ac8aff86f..8e2a2b837106 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -264,8 +264,8 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
#define TRACK_ADDRS_COUNT 16
struct track {
unsigned long addr; /* Called from address */
-#ifdef CONFIG_STACKTRACE
- unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
+#ifdef CONFIG_STACKDEPOT
+ depot_stack_handle_t stack;
#endif
int cpu; /* Was running on cpu */
int pid; /* Pid context */
@@ -668,6 +668,27 @@ static inline unsigned int get_info_end(struct kmem_cache *s)
return s->inuse;
}
+#ifdef CONFIG_STACKDEPOT
+static depot_stack_handle_t slub_debug_save_stack(gfp_t flags)
+{
+ unsigned long entries[TRACK_ADDRS_COUNT];
+ unsigned int nr_entries;
+
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 4);
+ nr_entries = filter_irq_stacks(entries, nr_entries);
+ return stack_depot_save(entries, nr_entries, flags);
+}
+
+static void print_stack(depot_stack_handle_t stack)
+{
+ unsigned long *entries;
+ unsigned int nr_entries;
+
+ nr_entries = stack_depot_fetch(stack, &entries);
+ stack_trace_print(entries, nr_entries, 0);
+}
+#endif
+
static struct track *get_track(struct kmem_cache *s, void *object,
enum track_item alloc)
{
@@ -679,21 +700,13 @@ static struct track *get_track(struct kmem_cache *s, void *object,
}
static void set_track(struct kmem_cache *s, void *object,
- enum track_item alloc, unsigned long addr)
+ enum track_item alloc, unsigned long addr, gfp_t flags)
{
struct track *p = get_track(s, object, alloc);
if (addr) {
-#ifdef CONFIG_STACKTRACE
- unsigned int nr_entries;
-
- metadata_access_enable();
- nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
- TRACK_ADDRS_COUNT, 3);
- metadata_access_disable();
-
- if (nr_entries < TRACK_ADDRS_COUNT)
- p->addrs[nr_entries] = 0;
+#ifdef CONFIG_STACKDEPOT
+ p->stack = slub_debug_save_stack(flags);
#endif
p->addr = addr;
p->cpu = smp_processor_id();
@@ -709,10 +722,11 @@ static void init_tracking(struct kmem_cache *s, void *object)
if (!(s->flags & SLAB_STORE_USER))
return;
- set_track(s, object, TRACK_FREE, 0UL);
- set_track(s, object, TRACK_ALLOC, 0UL);
+ set_track(s, object, TRACK_FREE, 0UL, 0U);
+ set_track(s, object, TRACK_ALLOC, 0UL, 0U);
}
+
static void print_track(const char *s, struct track *t, unsigned long pr_time)
{
if (!t->addr)
@@ -720,15 +734,11 @@ static void print_track(const char *s, struct track *t, unsigned long pr_time)
pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
-#ifdef CONFIG_STACKTRACE
- {
- int i;
- for (i = 0; i < TRACK_ADDRS_COUNT; i++)
- if (t->addrs[i])
- pr_err("\t%pS\n", (void *)t->addrs[i]);
- else
- break;
- }
+#ifdef CONFIG_STACKDEPOT
+ if (t->stack)
+ print_stack(t->stack);
+ else
+ pr_err("(stack is not available)\n");
#endif
}
@@ -1261,7 +1271,8 @@ static inline int alloc_consistency_checks(struct kmem_cache *s,
static noinline int alloc_debug_processing(struct kmem_cache *s,
struct page *page,
- void *object, unsigned long addr)
+ void *object, unsigned long addr,
+ gfp_t flags)
{
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!alloc_consistency_checks(s, page, object))
@@ -1270,7 +1281,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s,
/* Success perform special debug activities for allocs */
if (s->flags & SLAB_STORE_USER)
- set_track(s, object, TRACK_ALLOC, addr);
+ set_track(s, object, TRACK_ALLOC, addr, flags);
trace(s, page, object, 1);
init_object(s, object, SLUB_RED_ACTIVE);
return 1;
@@ -1350,7 +1361,7 @@ static noinline int free_debug_processing(
}
if (s->flags & SLAB_STORE_USER)
- set_track(s, object, TRACK_FREE, addr);
+ set_track(s, object, TRACK_FREE, addr, GFP_NOWAIT);
trace(s, page, object, 0);
/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
init_object(s, object, SLUB_RED_INACTIVE);
@@ -2987,7 +2998,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
check_new_page:
if (kmem_cache_debug(s)) {
- if (!alloc_debug_processing(s, page, freelist, addr)) {
+ if (!alloc_debug_processing(s, page, freelist, addr, gfpflags)) {
/* Slab failed checks. Next slab needed */
goto new_slab;
} else {
@@ -4275,6 +4286,8 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
void *objp0;
struct kmem_cache *s = page->slab_cache;
struct track __maybe_unused *trackp;
+ unsigned long __maybe_unused *entries;
+ unsigned int __maybe_unused nr_entries;
kpp->kp_ptr = object;
kpp->kp_page = page;
@@ -4297,19 +4310,15 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
objp = fixup_red_left(s, objp);
trackp = get_track(s, objp, TRACK_ALLOC);
kpp->kp_ret = (void *)trackp->addr;
-#ifdef CONFIG_STACKTRACE
- for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
- kpp->kp_stack[i] = (void *)trackp->addrs[i];
- if (!kpp->kp_stack[i])
- break;
- }
+#ifdef CONFIG_STACKDEPOT
+ nr_entries = stack_depot_fetch(trackp->stack, &entries);
+ for (i = 0; i < nr_entries; i++)
+ kpp->kp_stack[i] = (void *)entries[i];
trackp = get_track(s, objp, TRACK_FREE);
- for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
- kpp->kp_free_stack[i] = (void *)trackp->addrs[i];
- if (!kpp->kp_free_stack[i])
- break;
- }
+ nr_entries = stack_depot_fetch(trackp->stack, &entries);
+ for (i = 0; i < nr_entries; i++)
+ kpp->kp_free_stack[i] = (void *)entries[i];
#endif
#endif
}
--
2.25.1
So far CONFIG_STACKDEPOT option was being selected by
features that need STACKDEPOT support for their operations,
for example KASAN.
Since next patch makes use of STACKDEPOT to store user tracking
information for slub debugger and since user tracking info may
or may not store stack trace for allocating and freeing contexts,
make STACKDEPOT explicitly configurable.
Signed-off-by: Imran Khan <[email protected]>
---
lib/Kconfig | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/Kconfig b/lib/Kconfig
index 6a6ae5312fa0..7e4b54f48af7 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -665,8 +665,9 @@ config ARCH_STACKWALK
bool
config STACKDEPOT
- bool
+ def_bool n
select STACKTRACE
+ prompt "Enable stackdepot support"
config STACK_HASH_ORDER
int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
--
2.25.1
+CC rest of slab maintainers (please use get_maintainers.pl next time)
On 8/31/21 08:25, Imran Khan wrote:
> So far CONFIG_STACKDEPOT option was being selected by
> features that need STACKDEPOT support for their operations,
> for example KASAN.
> Since next patch makes use of STACKDEPOT to store user tracking
> information for slub debugger and since user tracking info may
> or may not store stack trace for allocating and freeing contexts,
> make STACKDEPOT explicitly configurable.
>
> Signed-off-by: Imran Khan <[email protected]>
Right so you're probably not aware, but switching slub to stackdepot was
just recently attempted by 788691464c29 ("mm/slub: use stackdepot to save
stack trace in objects"), but Linus reverted it in ae14c63a9f20. Enabling
stackdepot unconditionally was one of the reasons as there's memory overhead.
> ---
> lib/Kconfig | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/lib/Kconfig b/lib/Kconfig
> index 6a6ae5312fa0..7e4b54f48af7 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -665,8 +665,9 @@ config ARCH_STACKWALK
> bool
>
> config STACKDEPOT
> - bool
> + def_bool n
> select STACKTRACE
> + prompt "Enable stackdepot support"
>
> config STACK_HASH_ORDER
> int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
>
I'm not a big fan of a solution that throws another prompt at the person
configuring the kernel, especially if there's nothing to help decide how to
answer it.
Incidentally just yesterday I was trying locally to resolve this in a
different way, where stackdepot would be enabled if both CONFIG_SLUB_DEBUG
and CONFIG_STACKTRACE is enabled. That could be enough, unless there are
systems that would like to have STACKTRACE enabled for other reasons,
SLUB_DEBUG also for other reasons but still not want to pay the memory
overhead of stackdepot. At that point it could be more useful to investigate
if it's possible to optimize stackdepot to not make memblock allocations
unconditionally on init, but normal buddy allocations only when actually used.
Thoughts on the below?
----8<----
commit d3582a7c54b26f6fd3a44f1327a26c383e6b951c
Author: Vlastimil Babka <[email protected]>
Date: Mon Aug 30 17:26:15 2021 +0200
Rework stackdepot deps
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 80b57e7f4947..8c25b27014ee 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -267,7 +267,6 @@ config UNWINDER_FRAME_POINTER
config UNWINDER_GUESS
bool "Guess unwinder"
depends on EXPERT
- depends on !STACKDEPOT
help
This option enables the "guess" unwinder for unwinding kernel stack
traces. It scans the stack and reports every kernel text address it
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 7ff89690a976..4638c4ece8f2 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -47,7 +47,8 @@ config DRM_DEBUG_MM
bool "Insert extra checks and debug info into the DRM range managers"
default n
depends on DRM=y
- depends on STACKTRACE_SUPPORT
+ depends on STACKDEPOT_SUPPORT
+ select STACKTRACE
select STACKDEPOT
help
Enable allocation tracking of memory manager and leak detection on
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 72a38f28393f..70794c6bb84f 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -21,9 +21,11 @@ config DRM_I915_DEBUG
depends on DRM_I915
depends on EXPERT # only for developers
depends on !COMPILE_TEST # never built by robots
+ depends on STACKDEPOT_SUPPORT
select DEBUG_FS
select PREEMPT_COUNT
select I2C_CHARDEV
+ select STACKTRACE
select STACKDEPOT
select DRM_DP_AUX_CHARDEV
select X86_MSR # used by igt/pm_rpm
diff --git a/init/Kconfig b/init/Kconfig
index 55f9f7738ebb..f9a4ec802516 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1847,6 +1847,7 @@ config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS
+ select STACKDEPOT if STACKDEPOT_SUPPORT && STACKTRACE
help
SLUB has extensive debug support features. Disabling these can
result in significant savings in code size. This also disables
diff --git a/lib/Kconfig b/lib/Kconfig
index 5c9c0687f76d..67c388a09b7a 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -661,9 +661,37 @@ config ARCH_HAS_COPY_MC
config ARCH_STACKWALK
bool
+# encompasses STACKDEPOT dependencies
+config STACKDEPOT_SUPPORT
+ def_bool y
+ depends on STACKTRACE_SUPPORT && !UNWINDER_GUESS
+
+# STACKDEPOT consumes megabytes of memory when compiled in, so we want to make
+# sure it's only enabled when needed. But we don't want to burden the user with
+# another choice. STACKTRACE is already configurable (but often selected by
+# other configs).
+#
+# Due to how kconfig dependency resolution works, it's slightly complicated
+# as we cannot e.g. change STACKDEPOT to select STACKTRACE. Here are common
+# scenarios how to use STACKDEPOT with your config options:
+#
+# Scenario 1: hard dependency, will select also STACKTRACE, your config will
+# only be visible when STACKDEPOT_SUPPORT (and thus also STACKTRACE_SUPPORT) is
+# satisfied:
+#
+# depends on STACKDEPOT_SUPPORT
+# select STACKTRACE
+# select STACKDEPOT
+#
+# Scenario 2: soft dependency (e.g. for code with #ifdef CONFIG_STACKDEPOT
+# guards), will select STACKDEPOT only when STACKTRACE was configured by user
+# (or selected by some other config)
+#
+# select STACKDEPOT if STACKDEPOT_SUPPORT && STACKTRACE
+#
config STACKDEPOT
bool
- select STACKTRACE
+ depends on STACKDEPOT_SUPPORT
config STACK_HASH_ORDER
int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 1e73717802f8..56a248814788 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -47,7 +47,7 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT
config PAGE_OWNER
bool "Track page owner"
- depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
+ depends on DEBUG_KERNEL && STACKDEPOT_SUPPORT
select DEBUG_FS
select STACKTRACE
select STACKDEPOT
On 8/31/21 08:25, Imran Khan wrote:
> SLAB_STORE_USER causes information about allocating and freeing context
> of a slub object, to be stored in metadata area in a couple of struct
> track objects. These objects store allocation and/or freeing stack trace
> in an array. This may result in same stack trace getting stored in metadata
> area of multiple objects.
> STACKDEPOT can be used to store unique stack traces without any
> duplication,so use STACKDEPOT to store allocation and/or freeing stack
> traces as well.
> This results in low memory footprint, as we are not storing multiple
> copies of the same stack trace for an allocation or free.
>
> Signed-off-by: Imran Khan <[email protected]>
> ---
> mm/slub.c | 87 ++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 48 insertions(+), 39 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index df1ac8aff86f..8e2a2b837106 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -264,8 +264,8 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
> #define TRACK_ADDRS_COUNT 16
> struct track {
> unsigned long addr; /* Called from address */
> -#ifdef CONFIG_STACKTRACE
> - unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
> +#ifdef CONFIG_STACKDEPOT
> + depot_stack_handle_t stack;
> #endif
> int cpu; /* Was running on cpu */
> int pid; /* Pid context */
> @@ -668,6 +668,27 @@ static inline unsigned int get_info_end(struct kmem_cache *s)
> return s->inuse;
> }
>
> +#ifdef CONFIG_STACKDEPOT
> +static depot_stack_handle_t slub_debug_save_stack(gfp_t flags)
> +{
> + unsigned long entries[TRACK_ADDRS_COUNT];
> + unsigned int nr_entries;
> +
> + nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 4);
> + nr_entries = filter_irq_stacks(entries, nr_entries);
> + return stack_depot_save(entries, nr_entries, flags);
> +}
> +
> +static void print_stack(depot_stack_handle_t stack)
> +{
> + unsigned long *entries;
> + unsigned int nr_entries;
> +
> + nr_entries = stack_depot_fetch(stack, &entries);
> + stack_trace_print(entries, nr_entries, 0);
> +}
This function could become part of stackdepot itself?
> +#endif
> +
> static struct track *get_track(struct kmem_cache *s, void *object,
> enum track_item alloc)
> {
> @@ -679,21 +700,13 @@ static struct track *get_track(struct kmem_cache *s, void *object,
> }
>
> static void set_track(struct kmem_cache *s, void *object,
> - enum track_item alloc, unsigned long addr)
> + enum track_item alloc, unsigned long addr, gfp_t flags)
> {
> struct track *p = get_track(s, object, alloc);
>
> if (addr) {
> -#ifdef CONFIG_STACKTRACE
> - unsigned int nr_entries;
> -
> - metadata_access_enable();
> - nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
> - TRACK_ADDRS_COUNT, 3);
> - metadata_access_disable();
> -
> - if (nr_entries < TRACK_ADDRS_COUNT)
> - p->addrs[nr_entries] = 0;
> +#ifdef CONFIG_STACKDEPOT
> + p->stack = slub_debug_save_stack(flags);
> #endif
> p->addr = addr;
> p->cpu = smp_processor_id();
> @@ -709,10 +722,11 @@ static void init_tracking(struct kmem_cache *s, void *object)
> if (!(s->flags & SLAB_STORE_USER))
> return;
>
> - set_track(s, object, TRACK_FREE, 0UL);
> - set_track(s, object, TRACK_ALLOC, 0UL);
> + set_track(s, object, TRACK_FREE, 0UL, 0U);
> + set_track(s, object, TRACK_ALLOC, 0UL, 0U);
> }
>
> +
> static void print_track(const char *s, struct track *t, unsigned long pr_time)
> {
> if (!t->addr)
> @@ -720,15 +734,11 @@ static void print_track(const char *s, struct track *t, unsigned long pr_time)
>
> pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
> s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
> -#ifdef CONFIG_STACKTRACE
> - {
> - int i;
> - for (i = 0; i < TRACK_ADDRS_COUNT; i++)
> - if (t->addrs[i])
> - pr_err("\t%pS\n", (void *)t->addrs[i]);
> - else
> - break;
> - }
> +#ifdef CONFIG_STACKDEPOT
> + if (t->stack)
> + print_stack(t->stack);
> + else
> + pr_err("(stack is not available)\n");
> #endif
> }
>
> @@ -1261,7 +1271,8 @@ static inline int alloc_consistency_checks(struct kmem_cache *s,
>
> static noinline int alloc_debug_processing(struct kmem_cache *s,
> struct page *page,
> - void *object, unsigned long addr)
> + void *object, unsigned long addr,
> + gfp_t flags)
> {
> if (s->flags & SLAB_CONSISTENCY_CHECKS) {
> if (!alloc_consistency_checks(s, page, object))
> @@ -1270,7 +1281,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s,
>
> /* Success perform special debug activities for allocs */
> if (s->flags & SLAB_STORE_USER)
> - set_track(s, object, TRACK_ALLOC, addr);
> + set_track(s, object, TRACK_ALLOC, addr, flags);
> trace(s, page, object, 1);
> init_object(s, object, SLUB_RED_ACTIVE);
> return 1;
> @@ -1350,7 +1361,7 @@ static noinline int free_debug_processing(
> }
>
> if (s->flags & SLAB_STORE_USER)
> - set_track(s, object, TRACK_FREE, addr);
> + set_track(s, object, TRACK_FREE, addr, GFP_NOWAIT);
> trace(s, page, object, 0);
> /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
> init_object(s, object, SLUB_RED_INACTIVE);
> @@ -2987,7 +2998,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
> check_new_page:
>
> if (kmem_cache_debug(s)) {
> - if (!alloc_debug_processing(s, page, freelist, addr)) {
> + if (!alloc_debug_processing(s, page, freelist, addr, gfpflags)) {
> /* Slab failed checks. Next slab needed */
> goto new_slab;
> } else {
> @@ -4275,6 +4286,8 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
> void *objp0;
> struct kmem_cache *s = page->slab_cache;
> struct track __maybe_unused *trackp;
> + unsigned long __maybe_unused *entries;
> + unsigned int __maybe_unused nr_entries;
>
> kpp->kp_ptr = object;
> kpp->kp_page = page;
> @@ -4297,19 +4310,15 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
> objp = fixup_red_left(s, objp);
> trackp = get_track(s, objp, TRACK_ALLOC);
> kpp->kp_ret = (void *)trackp->addr;
> -#ifdef CONFIG_STACKTRACE
> - for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
> - kpp->kp_stack[i] = (void *)trackp->addrs[i];
> - if (!kpp->kp_stack[i])
> - break;
> - }
> +#ifdef CONFIG_STACKDEPOT
> + nr_entries = stack_depot_fetch(trackp->stack, &entries);
> + for (i = 0; i < nr_entries; i++)
> + kpp->kp_stack[i] = (void *)entries[i];
Hmm, in case stack_depot_save() fails and returns a zero handle (e.g. due to
enomem) this seems to rely on stack_depot_fetch() returning gracefully with
zero nr_entries for a zero handle. But I don't see such guarantee?
stack_depot_init() isn't creating such entry and stack_depot_save() doesn't
have such check. So it will work accidentally, or return garbage? But it
would be IMHO useful to add such guarantee to stackdepot one way or another.
> trackp = get_track(s, objp, TRACK_FREE);
> - for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
> - kpp->kp_free_stack[i] = (void *)trackp->addrs[i];
> - if (!kpp->kp_free_stack[i])
> - break;
> - }
> + nr_entries = stack_depot_fetch(trackp->stack, &entries);
> + for (i = 0; i < nr_entries; i++)
> + kpp->kp_free_stack[i] = (void *)entries[i];
> #endif
> #endif
> }
>
Hi Imran,
On Tue, Aug 31, 2021 at 8:28 AM Imran Khan <[email protected]> wrote:
> So far CONFIG_STACKDEPOT option was being selected by
> features that need STACKDEPOT support for their operations,
> for example KASAN.
> Since next patch makes use of STACKDEPOT to store user tracking
> information for slub debugger and since user tracking info may
> or may not store stack trace for allocating and freeing contexts,
> make STACKDEPOT explicitly configurable.
>
> Signed-off-by: Imran Khan <[email protected]>
Thanks for your patch!
> ---
> lib/Kconfig | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/lib/Kconfig b/lib/Kconfig
> index 6a6ae5312fa0..7e4b54f48af7 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -665,8 +665,9 @@ config ARCH_STACKWALK
> bool
>
> config STACKDEPOT
> - bool
> + def_bool n
Why this change? "n" is the default anyway.
> select STACKTRACE
> + prompt "Enable stackdepot support"
>
> config STACK_HASH_ORDER
> int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
On 31/8/21 9:25 pm, Vlastimil Babka wrote:
>
> +CC rest of slab maintainers (please use get_maintainers.pl next time)
>
> On 8/31/21 08:25, Imran Khan wrote:
>> So far CONFIG_STACKDEPOT option was being selected by
>> features that need STACKDEPOT support for their operations,
[...]
>>
>> Signed-off-by: Imran Khan <[email protected]>
>
> Right so you're probably not aware, but switching slub to stackdepot was
> just recently attempted by 788691464c29 ("mm/slub: use stackdepot to save
> stack trace in objects"), but Linus reverted it in ae14c63a9f20. Enabling
> stackdepot unconditionally was one of the reasons as there's memory overhead.
>
Indeed I was not aware of this earlier attempt. I checked tip of
linux-next and saw presence of array in struct track and then went ahead
with my attempt.
>> ---
>> lib/Kconfig | 3 ++-
>> 1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/Kconfig b/lib/Kconfig
>> index 6a6ae5312fa0..7e4b54f48af7 100644
>> --- a/lib/Kconfig
>> +++ b/lib/Kconfig
>> @@ -665,8 +665,9 @@ config ARCH_STACKWALK
>> bool
>>
>> config STACKDEPOT
>> - bool
>> + def_bool n
>> select STACKTRACE
>> + prompt "Enable stackdepot support"
>>
>> config STACK_HASH_ORDER
>> int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
>>
>
> I'm not a big fan of a solution that throws another prompt at the person
> configuring the kernel, especially if there's nothing to help decide how to
> answer it.
>
> Incidentally just yesterday I was trying locally to resolve this in a
> different way, where stackdepot would be enabled if both CONFIG_SLUB_DEBUG
> and CONFIG_STACKTRACE is enabled. That could be enough, unless there are
> systems that would like to have STACKTRACE enabled for other reasons,
> SLUB_DEBUG also for other reasons but still not want to pay the memory
> overhead of stackdepot. At that point it could be more useful to investigate
> if it's possible to optimize stackdepot to not make memblock allocations
> unconditionally on init, but normal buddy allocations only when actually used.
My main intention of making STACKDEPOT explicitly configurable was to
use it as substitute of STACKTRACE as far as storing user info was
concerned. But now I realize this approach was wrong mainly because
STACKTRACE does not have any memory overhead of its own whereas
STACKDEPOT has memory overhead of it's own. So if someone enables
STACKDEPOT but never enables slub_debug=U, STACKDEPOT
memory will remain useless.
Probably the most acceptable way forward in this scenario would be
optimize STACKDEPOT memory usage, as you have suggested.
I will do some more digging in this regard.
>
> Thoughts on the below?
>
> ----8<----
> commit d3582a7c54b26f6fd3a44f1327a26c383e6b951c
> Author: Vlastimil Babka <[email protected]>
> Date: Mon Aug 30 17:26:15 2021 +0200
>
> Rework stackdepot deps
>
> diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
> index 80b57e7f4947..8c25b27014ee 100644
> --- a/arch/x86/Kconfig.debug
> +++ b/arch/x86/Kconfig.debug
> @@ -267,7 +267,6 @@ config UNWINDER_FRAME_POINTER
> config UNWINDER_GUESS
> bool "Guess unwinder"
> depends on EXPERT
> - depends on !STACKDEPOT
> help
> This option enables the "guess" unwinder for unwinding kernel stack
> traces. It scans the stack and reports every kernel text address it
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 7ff89690a976..4638c4ece8f2 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -47,7 +47,8 @@ config DRM_DEBUG_MM
> bool "Insert extra checks and debug info into the DRM range managers"
> default n
> depends on DRM=y
> - depends on STACKTRACE_SUPPORT
> + depends on STACKDEPOT_SUPPORT
> + select STACKTRACE
> select STACKDEPOT
> help
> Enable allocation tracking of memory manager and leak detection on
> diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
> index 72a38f28393f..70794c6bb84f 100644
> --- a/drivers/gpu/drm/i915/Kconfig.debug
> +++ b/drivers/gpu/drm/i915/Kconfig.debug
> @@ -21,9 +21,11 @@ config DRM_I915_DEBUG
> depends on DRM_I915
> depends on EXPERT # only for developers
> depends on !COMPILE_TEST # never built by robots
> + depends on STACKDEPOT_SUPPORT
> select DEBUG_FS
> select PREEMPT_COUNT
> select I2C_CHARDEV
> + select STACKTRACE
> select STACKDEPOT
> select DRM_DP_AUX_CHARDEV
> select X86_MSR # used by igt/pm_rpm
> diff --git a/init/Kconfig b/init/Kconfig
> index 55f9f7738ebb..f9a4ec802516 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1847,6 +1847,7 @@ config SLUB_DEBUG
> default y
> bool "Enable SLUB debugging support" if EXPERT
> depends on SLUB && SYSFS
> + select STACKDEPOT if STACKDEPOT_SUPPORT && STACKTRACE
> help
> SLUB has extensive debug support features. Disabling these can
> result in significant savings in code size. This also disables
> diff --git a/lib/Kconfig b/lib/Kconfig
> index 5c9c0687f76d..67c388a09b7a 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -661,9 +661,37 @@ config ARCH_HAS_COPY_MC
> config ARCH_STACKWALK
> bool
>
> +# encompasses STACKDEPOT dependencies
> +config STACKDEPOT_SUPPORT
> + def_bool y
> + depends on STACKTRACE_SUPPORT && !UNWINDER_GUESS
> +
> +# STACKDEPOT consumes megabytes of memory when compiled in, so we want to make
> +# sure it's only enabled when needed. But we don't want to burden the user with
> +# another choice. STACKTRACE is already configurable (but often selected by
> +# other configs).
> +#
> +# Due to how kconfig dependency resolution works, it's slightly complicated
> +# as we cannot e.g. change STACKDEPOT to select STACKTRACE. Here are common
> +# scenarios how to use STACKDEPOT with your config options:
> +#
> +# Scenario 1: hard dependency, will select also STACKTRACE, your config will
> +# only be visible when STACKDEPOT_SUPPORT (and thus also STACKTRACE_SUPPORT) is
> +# satisfied:
> +#
> +# depends on STACKDEPOT_SUPPORT
> +# select STACKTRACE
> +# select STACKDEPOT
> +#
> +# Scenario 2: soft dependency (e.g. for code with #ifdef CONFIG_STACKDEPOT
> +# guards), will select STACKDEPOT only when STACKTRACE was configured by user
> +# (or selected by some other config)
> +#
> +# select STACKDEPOT if STACKDEPOT_SUPPORT && STACKTRACE
> +#
> config STACKDEPOT
> bool
> - select STACKTRACE
> + depends on STACKDEPOT_SUPPORT
>
> config STACK_HASH_ORDER
> int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
> diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
> index 1e73717802f8..56a248814788 100644
> --- a/mm/Kconfig.debug
> +++ b/mm/Kconfig.debug
> @@ -47,7 +47,7 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT
>
> config PAGE_OWNER
> bool "Track page owner"
> - depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
> + depends on DEBUG_KERNEL && STACKDEPOT_SUPPORT
> select DEBUG_FS
> select STACKTRACE
> select STACKDEPOT
>
This looks good to me but I will wait for feedback from other maintainers.
Thanks again for review and feedback.
-- Imran
On 31/8/21 10:06 pm, Vlastimil Babka wrote:
> On 8/31/21 08:25, Imran Khan wrote:
>> SLAB_STORE_USER causes information about allocating and freeing context
>> of a slub object, to be stored in metadata area in a couple of struct
>> track objects. These objects store allocation and/or freeing stack trace
>> in an array. This may result in same stack trace getting stored in metadata
>> area of multiple objects.
>> STACKDEPOT can be used to store unique stack traces without any
>> duplication,so use STACKDEPOT to store allocation and/or freeing stack
>> traces as well.
>> This results in low memory footprint, as we are not storing multiple
>> copies of the same stack trace for an allocation or free.
>>
>> Signed-off-by: Imran Khan <[email protected]>
>> ---
>> mm/slub.c | 87 ++++++++++++++++++++++++++++++-------------------------
>> 1 file changed, 48 insertions(+), 39 deletions(-)
>>
[...]
>> +
>> +static void print_stack(depot_stack_handle_t stack)
>> +{
>> + unsigned long *entries;
>> + unsigned int nr_entries;
>> +
>> + nr_entries = stack_depot_fetch(stack, &entries);
>> + stack_trace_print(entries, nr_entries, 0);
>> +}
>
> This function could become part of stackdepot itself?
>
Okay. I have made this function part of stackdepot in my new patch set.
Please see [1].
>> +#endif
>> +
>> static struct track *get_track(struct kmem_cache *s, void *object,
>> enum track_item alloc)
[...]
>> @@ -4297,19 +4310,15 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
>> objp = fixup_red_left(s, objp);
>> trackp = get_track(s, objp, TRACK_ALLOC);
>> kpp->kp_ret = (void *)trackp->addr;
>> -#ifdef CONFIG_STACKTRACE
>> - for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
>> - kpp->kp_stack[i] = (void *)trackp->addrs[i];
>> - if (!kpp->kp_stack[i])
>> - break;
>> - }
>> +#ifdef CONFIG_STACKDEPOT
>> + nr_entries = stack_depot_fetch(trackp->stack, &entries);
>> + for (i = 0; i < nr_entries; i++)
>> + kpp->kp_stack[i] = (void *)entries[i];
>
> Hmm, in case stack_depot_save() fails and returns a zero handle (e.g. due to
> enomem) this seems to rely on stack_depot_fetch() returning gracefully with
> zero nr_entries for a zero handle. But I don't see such guarantee?
> stack_depot_init() isn't creating such entry and stack_depot_save() doesn't
> have such check. So it will work accidentally, or return garbage? But it
> would be IMHO useful to add such guarantee to stackdepot one way or another.
>
I have addressed this scenario as well in my new patch set. Please see [1].
Since both of the changes suggested here pertain to stackdepot and are
unrelated to SLUB, I have posted those changes in a separate thread [1].
>> trackp = get_track(s, objp, TRACK_FREE);
>> - for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
>> - kpp->kp_free_stack[i] = (void *)trackp->addrs[i];
>> - if (!kpp->kp_free_stack[i])
>> - break;
>> - }
>> + nr_entries = stack_depot_fetch(trackp->stack, &entries);
>> + for (i = 0; i < nr_entries; i++)
>> + kpp->kp_free_stack[i] = (void *)entries[i];
>> #endif
>> #endif
>> }
>>
>
[1]
https://lore.kernel.org/lkml/[email protected]/
Thanks for review and feedback.
-- Imran