From: Andrey Konovalov <[email protected]>
Implement storing stack depot handles for alloc/free stack traces for
slab objects for the tag-based KASAN modes in a ring buffer.
This ring buffer is referred to as the stack ring.
On each alloc/free of a slab object, the tagged address of the object and
the current stack trace are recorded in the stack ring.
On each bug report, if the accessed address belongs to a slab object, the
stack ring is scanned for matching entries. The newest entries are used to
print the alloc/free stack traces in the report: one entry for alloc and
one for free.
The ring buffer is lock-free.
Signed-off-by: Andrey Konovalov <[email protected]>
---
The number of entries in the stack ring is fixed in this version of the
patch. We could either implement it as a config option or a command-line
argument. I tilt towards the latter option and will implement it in v2
unless there are objections.
---
mm/kasan/kasan.h | 20 ++++++++++++++
mm/kasan/report_tags.c | 61 ++++++++++++++++++++++++++++++++++++++++++
mm/kasan/tags.c | 30 +++++++++++++++++++++
3 files changed, 111 insertions(+)
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index c51cea31ced0..da9a3c56ef4b 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -2,6 +2,7 @@
#ifndef __MM_KASAN_KASAN_H
#define __MM_KASAN_KASAN_H
+#include <linux/atomic.h>
#include <linux/kasan.h>
#include <linux/kasan-tags.h>
#include <linux/kfence.h>
@@ -227,6 +228,25 @@ struct kasan_free_meta {
#endif /* CONFIG_KASAN_GENERIC */
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
+
+struct kasan_stack_ring_entry {
+ atomic64_t ptr; /* void * */
+ atomic64_t size; /* size_t */
+ atomic_t pid; /* u32 */
+ atomic_t stack; /* depot_stack_handle_t */
+ atomic_t is_free; /* bool */
+};
+
+#define KASAN_STACK_RING_ENTRIES (32 << 10)
+
+struct kasan_stack_ring {
+ atomic64_t pos;
+ struct kasan_stack_ring_entry entries[KASAN_STACK_RING_ENTRIES];
+};
+
+#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
+
#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
/* Used in KUnit-compatible KASAN tests. */
struct kunit_kasan_status {
diff --git a/mm/kasan/report_tags.c b/mm/kasan/report_tags.c
index 5cbac2cdb177..21911d1883d3 100644
--- a/mm/kasan/report_tags.c
+++ b/mm/kasan/report_tags.c
@@ -4,8 +4,12 @@
* Copyright (c) 2020 Google, Inc.
*/
+#include <linux/atomic.h>
+
#include "kasan.h"
+extern struct kasan_stack_ring stack_ring;
+
static const char *get_bug_type(struct kasan_report_info *info)
{
/*
@@ -24,5 +28,62 @@ static const char *get_bug_type(struct kasan_report_info *info)
void kasan_complete_mode_report_info(struct kasan_report_info *info)
{
+ u64 pos;
+ struct kasan_stack_ring_entry *entry;
+ void *object;
+ u32 pid;
+ depot_stack_handle_t stack;
+ bool is_free;
+ bool alloc_found = false, free_found = false;
+
info->bug_type = get_bug_type(info);
+
+ if (!info->cache || !info->object)
+ return;
+
+ pos = atomic64_read(&stack_ring.pos);
+
+ for (u64 i = pos - 1; i != pos - 1 - KASAN_STACK_RING_ENTRIES; i--) {
+ if (alloc_found && free_found)
+ break;
+
+ entry = &stack_ring.entries[i % KASAN_STACK_RING_ENTRIES];
+
+ /* Paired with atomic64_set_release() in save_stack_info(). */
+ object = (void *)atomic64_read_acquire(&entry->ptr);
+
+ if (kasan_reset_tag(object) != info->object ||
+ get_tag(object) != get_tag(info->access_addr))
+ continue;
+
+ pid = atomic_read(&entry->pid);
+ stack = atomic_read(&entry->stack);
+ is_free = atomic_read(&entry->is_free);
+
+ /* Try detecting if the entry was changed while being read. */
+ smp_mb();
+ if (object != (void *)atomic64_read(&entry->ptr))
+ continue;
+
+ if (is_free) {
+ /*
+ * Second free of the same object.
+ * Give up on trying to find the alloc entry.
+ */
+ if (free_found)
+ break;
+
+ info->free_track.pid = pid;
+ info->free_track.stack = stack;
+ free_found = true;
+ } else {
+ /* Second alloc of the same object. Give up. */
+ if (alloc_found)
+ break;
+
+ info->alloc_track.pid = pid;
+ info->alloc_track.stack = stack;
+ alloc_found = true;
+ }
+ }
}
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
index 39a0481e5228..286011307695 100644
--- a/mm/kasan/tags.c
+++ b/mm/kasan/tags.c
@@ -6,6 +6,7 @@
* Copyright (c) 2020 Google, Inc.
*/
+#include <linux/atomic.h>
#include <linux/init.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -16,11 +17,40 @@
#include <linux/types.h>
#include "kasan.h"
+#include "../slab.h"
+
+struct kasan_stack_ring stack_ring;
+
+void save_stack_info(struct kmem_cache *cache, void *object,
+ gfp_t flags, bool is_free)
+{
+ u64 pos;
+ struct kasan_stack_ring_entry *entry;
+ depot_stack_handle_t stack;
+
+ stack = kasan_save_stack(flags, true);
+
+ pos = atomic64_fetch_add(1, &stack_ring.pos);
+ entry = &stack_ring.entries[pos % KASAN_STACK_RING_ENTRIES];
+
+ atomic64_set(&entry->size, cache->object_size);
+ atomic_set(&entry->pid, current->pid);
+ atomic_set(&entry->stack, stack);
+ atomic_set(&entry->is_free, is_free);
+
+ /*
+ * Paired with atomic64_read_acquire() in
+ * kasan_complete_mode_report_info().
+ */
+ atomic64_set_release(&entry->ptr, (s64)object);
+}
void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
{
+ save_stack_info(cache, object, flags, false);
}
void kasan_save_free_info(struct kmem_cache *cache, void *object)
{
+ save_stack_info(cache, object, GFP_NOWAIT, true);
}
--
2.25.1
On Mon, Jun 13, 2022 at 10:14PM +0200, [email protected] wrote:
> From: Andrey Konovalov <[email protected]>
>
> Implement storing stack depot handles for alloc/free stack traces for
> slab objects for the tag-based KASAN modes in a ring buffer.
>
> This ring buffer is referred to as the stack ring.
>
> On each alloc/free of a slab object, the tagged address of the object and
> the current stack trace are recorded in the stack ring.
>
> On each bug report, if the accessed address belongs to a slab object, the
> stack ring is scanned for matching entries. The newest entries are used to
> print the alloc/free stack traces in the report: one entry for alloc and
> one for free.
>
> The ring buffer is lock-free.
>
> Signed-off-by: Andrey Konovalov <[email protected]>
>
> ---
>
> The number of entries in the stack ring is fixed in this version of the
> patch. We could either implement it as a config option or a command-line
> argument. I tilt towards the latter option and will implement it in v2
> unless there are objections.
Yes, that'd be good, along with just not allocating if no stacktraces
are requested per kasan.stacktrace=.
> ---
> mm/kasan/kasan.h | 20 ++++++++++++++
> mm/kasan/report_tags.c | 61 ++++++++++++++++++++++++++++++++++++++++++
> mm/kasan/tags.c | 30 +++++++++++++++++++++
> 3 files changed, 111 insertions(+)
>
> diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> index c51cea31ced0..da9a3c56ef4b 100644
> --- a/mm/kasan/kasan.h
> +++ b/mm/kasan/kasan.h
> @@ -2,6 +2,7 @@
> #ifndef __MM_KASAN_KASAN_H
> #define __MM_KASAN_KASAN_H
>
> +#include <linux/atomic.h>
> #include <linux/kasan.h>
> #include <linux/kasan-tags.h>
> #include <linux/kfence.h>
> @@ -227,6 +228,25 @@ struct kasan_free_meta {
>
> #endif /* CONFIG_KASAN_GENERIC */
>
> +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
> +
> +struct kasan_stack_ring_entry {
> + atomic64_t ptr; /* void * */
> + atomic64_t size; /* size_t */
> + atomic_t pid; /* u32 */
> + atomic_t stack; /* depot_stack_handle_t */
> + atomic_t is_free; /* bool */
Per comments below, consider making these non-atomic.
> +};
> +
> +#define KASAN_STACK_RING_ENTRIES (32 << 10)
> +
> +struct kasan_stack_ring {
> + atomic64_t pos;
> + struct kasan_stack_ring_entry entries[KASAN_STACK_RING_ENTRIES];
> +};
> +
> +#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
> +
> #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
> /* Used in KUnit-compatible KASAN tests. */
> struct kunit_kasan_status {
> diff --git a/mm/kasan/report_tags.c b/mm/kasan/report_tags.c
> index 5cbac2cdb177..21911d1883d3 100644
> --- a/mm/kasan/report_tags.c
> +++ b/mm/kasan/report_tags.c
> @@ -4,8 +4,12 @@
> * Copyright (c) 2020 Google, Inc.
> */
>
> +#include <linux/atomic.h>
> +
> #include "kasan.h"
>
> +extern struct kasan_stack_ring stack_ring;
> +
> static const char *get_bug_type(struct kasan_report_info *info)
> {
> /*
> @@ -24,5 +28,62 @@ static const char *get_bug_type(struct kasan_report_info *info)
>
> void kasan_complete_mode_report_info(struct kasan_report_info *info)
> {
> + u64 pos;
> + struct kasan_stack_ring_entry *entry;
> + void *object;
> + u32 pid;
> + depot_stack_handle_t stack;
> + bool is_free;
If you switch away from atomic for kasan_stack_ring_entry members, you
can just replace the above with a 'struct kasan_stack_ring_entry' and
READ_ONCE() each entry into it below.
> + bool alloc_found = false, free_found = false;
> +
> info->bug_type = get_bug_type(info);
> +
> + if (!info->cache || !info->object)
> + return;
> +
> + pos = atomic64_read(&stack_ring.pos);
> +
> + for (u64 i = pos - 1; i != pos - 1 - KASAN_STACK_RING_ENTRIES; i--) {
> + if (alloc_found && free_found)
> + break;
> +
> + entry = &stack_ring.entries[i % KASAN_STACK_RING_ENTRIES];
> +
> + /* Paired with atomic64_set_release() in save_stack_info(). */
> + object = (void *)atomic64_read_acquire(&entry->ptr);
> +
> + if (kasan_reset_tag(object) != info->object ||
> + get_tag(object) != get_tag(info->access_addr))
> + continue;
> +
> + pid = atomic_read(&entry->pid);
> + stack = atomic_read(&entry->stack);
> + is_free = atomic_read(&entry->is_free);
> +
> + /* Try detecting if the entry was changed while being read. */
> + smp_mb();
> + if (object != (void *)atomic64_read(&entry->ptr))
> + continue;
What if the object was changed, but 'ptr' is the same? It might very
well be possible to then read half of the info of the previous object,
and half of the new object (e.g. pid is old, stack is new).
Is the assumption that it is extremely unlikely that this will happen
where 1) address is the same, and 2) tags are the same? And if it does
happen, it is unlikely that there'll be a bug on that address?
It might be worth stating this in comments.
Another thing is, if there's a bug, but concurrently you have tons of
allocations/frees that change the ring's entries at a very high rate,
how likely is it that the entire ring will have been wiped before the
entry of interest is found again?
One way to guard against this is to prevent modifications of the ring
while the ring is searched. This could be implemented with a
percpu-rwsem, which is almost free for read-lockers but very expensive
for write-lockers. Insertions only acquire a read-lock, but on a bug
when searching the ring, you have to acquire a write-lock. Although you
currently take the contention hit for incrementing 'pos', so a plain
rwlock might also be ok.
It would be good to understand the probabilities of these corner cases
with some average to worst case workloads, and optimize based on that.
> +
> + if (is_free) {
> + /*
> + * Second free of the same object.
> + * Give up on trying to find the alloc entry.
> + */
> + if (free_found)
> + break;
> +
> + info->free_track.pid = pid;
> + info->free_track.stack = stack;
> + free_found = true;
> + } else {
> + /* Second alloc of the same object. Give up. */
> + if (alloc_found)
> + break;
> +
> + info->alloc_track.pid = pid;
> + info->alloc_track.stack = stack;
> + alloc_found = true;
> + }
> + }
> }
> diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
> index 39a0481e5228..286011307695 100644
> --- a/mm/kasan/tags.c
> +++ b/mm/kasan/tags.c
> @@ -6,6 +6,7 @@
> * Copyright (c) 2020 Google, Inc.
> */
>
> +#include <linux/atomic.h>
> #include <linux/init.h>
> #include <linux/kasan.h>
> #include <linux/kernel.h>
> @@ -16,11 +17,40 @@
> #include <linux/types.h>
>
> #include "kasan.h"
> +#include "../slab.h"
> +
> +struct kasan_stack_ring stack_ring;
This is a very large struct. Can it be allocated by memblock_alloc()
very early on only if required (kasan.stacktrace= can still switch it
off, right?).
> +void save_stack_info(struct kmem_cache *cache, void *object,
> + gfp_t flags, bool is_free)
static void save_stack_info(...)
> +{
> + u64 pos;
> + struct kasan_stack_ring_entry *entry;
> + depot_stack_handle_t stack;
> +
> + stack = kasan_save_stack(flags, true);
> +
> + pos = atomic64_fetch_add(1, &stack_ring.pos);
> + entry = &stack_ring.entries[pos % KASAN_STACK_RING_ENTRIES];
> +
> + atomic64_set(&entry->size, cache->object_size);
> + atomic_set(&entry->pid, current->pid);
> + atomic_set(&entry->stack, stack);
> + atomic_set(&entry->is_free, is_free);
> +
I don't see the point of these being atomic. You can make them normal
variables with the proper types, and use READ_ONCE() / WRITE_ONCE().
The only one where you truly need the atomic type is 'pos'.
> + /*
> + * Paired with atomic64_read_acquire() in
> + * kasan_complete_mode_report_info().
> + */
> + atomic64_set_release(&entry->ptr, (s64)object);
This could be smp_store_release() and 'ptr' can be just a normal pointer.
One thing that is not entirely impossible though (vs. re-reading same
pointer but inconsistent fields I mentioned above), is if something
wants to write to the ring, but stalls for a very long time before the
release of 'ptr', giving 'pos' the chance to wrap around and another
writer writing the same entry. Something like:
T0 | T1
--------------------------------------+--------------------------------
WRITE_ONCE(entry->size, ..) |
WRITE_ONCE(entry->pid, ..) |
| WRITE_ONCE(entry->size, ..)
| WRITE_ONCE(entry->pid, ..)
| WRITE_ONCE(entry->stack, ..)
| WRITE_ONCE(entry->is_free, ..)
| smp_store_release(entry->ptr, ...)
WRITE_ONCE(entry->stack, ..) |
WRITE_ONCE(entry->is_free, ..) |
smp_store_release(entry->ptr, ...) |
Which results in some mix of T0's and T1's data.
The way to solve this is to implement a try-lock using 'ptr':
#define BUSY_PTR ((void*)1) // non-zero because initial values are 0
old_ptr = READ_ONCE(entry->ptr);
if (old_ptr == BUSY_PTR)
goto next; /* Busy slot. */
if (!try_cmpxchg(&entry->ptr, &old_ptr, BUSY_PTR))
goto next; /* Busy slot. */
... set fields as before ...
smp_store_release(&entry->ptr, object);
> +}
>
> void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
> {
> + save_stack_info(cache, object, flags, false);
> }
>
> void kasan_save_free_info(struct kmem_cache *cache, void *object)
> {
> + save_stack_info(cache, object, GFP_NOWAIT, true);
> }
> --
> 2.25.1
On Mon, Jun 20, 2022 at 3:35 PM Marco Elver <[email protected]> wrote:
>
> > The number of entries in the stack ring is fixed in this version of the
> > patch. We could either implement it as a config option or a command-line
> > argument. I tilt towards the latter option and will implement it in v2
> > unless there are objections.
>
> Yes, that'd be good, along with just not allocating if no stacktraces
> are requested per kasan.stacktrace=.
Sounds good, will do in v2.
> > +struct kasan_stack_ring_entry {
> > + atomic64_t ptr; /* void * */
> > + atomic64_t size; /* size_t */
> > + atomic_t pid; /* u32 */
> > + atomic_t stack; /* depot_stack_handle_t */
> > + atomic_t is_free; /* bool */
>
> Per comments below, consider making these non-atomic.
Will do in v2.
> > void kasan_complete_mode_report_info(struct kasan_report_info *info)
> > {
> > + u64 pos;
> > + struct kasan_stack_ring_entry *entry;
> > + void *object;
> > + u32 pid;
> > + depot_stack_handle_t stack;
> > + bool is_free;
>
> If you switch away from atomic for kasan_stack_ring_entry members, you
> can just replace the above with a 'struct kasan_stack_ring_entry' and
> READ_ONCE() each entry into it below.
It would be a bit confusing to have two kasan_stack_ring_entry-based
variable in the function. I'll keep the current code if you don't
mind.
> > + bool alloc_found = false, free_found = false;
> > +
> > info->bug_type = get_bug_type(info);
> > +
> > + if (!info->cache || !info->object)
> > + return;
> > +
> > + pos = atomic64_read(&stack_ring.pos);
> > +
> > + for (u64 i = pos - 1; i != pos - 1 - KASAN_STACK_RING_ENTRIES; i--) {
> > + if (alloc_found && free_found)
> > + break;
> > +
> > + entry = &stack_ring.entries[i % KASAN_STACK_RING_ENTRIES];
> > +
> > + /* Paired with atomic64_set_release() in save_stack_info(). */
> > + object = (void *)atomic64_read_acquire(&entry->ptr);
> > +
> > + if (kasan_reset_tag(object) != info->object ||
> > + get_tag(object) != get_tag(info->access_addr))
> > + continue;
> > +
> > + pid = atomic_read(&entry->pid);
> > + stack = atomic_read(&entry->stack);
> > + is_free = atomic_read(&entry->is_free);
> > +
> > + /* Try detecting if the entry was changed while being read. */
> > + smp_mb();
> > + if (object != (void *)atomic64_read(&entry->ptr))
> > + continue;
>
> What if the object was changed, but 'ptr' is the same? It might very
> well be possible to then read half of the info of the previous object,
> and half of the new object (e.g. pid is old, stack is new).
>
> Is the assumption that it is extremely unlikely that this will happen
> where 1) address is the same, and 2) tags are the same? And if it does
> happen, it is unlikely that there'll be a bug on that address?
>
> It might be worth stating this in comments.
This part will be removed in v2 due to the addition of an rwlock, but
I'll add a comment about the stack ring being best-effort anyway.
> Another thing is, if there's a bug, but concurrently you have tons of
> allocations/frees that change the ring's entries at a very high rate,
> how likely is it that the entire ring will have been wiped before the
> entry of interest is found again?
>
> One way to guard against this is to prevent modifications of the ring
> while the ring is searched. This could be implemented with a
> percpu-rwsem, which is almost free for read-lockers but very expensive
> for write-lockers. Insertions only acquire a read-lock, but on a bug
> when searching the ring, you have to acquire a write-lock. Although you
> currently take the contention hit for incrementing 'pos', so a plain
> rwlock might also be ok.
Will add an rwlock in v2.
> It would be good to understand the probabilities of these corner cases
> with some average to worst case workloads, and optimize based on that.
With the new synchronizations and checks added in v2, the only
problematic issue is when the stack ring overflows. Please see my
response to your cover letter comment wrt this.
> > +struct kasan_stack_ring stack_ring;
>
> This is a very large struct. Can it be allocated by memblock_alloc()
> very early on only if required (kasan.stacktrace= can still switch it
> off, right?).
Will do in v2.
> > +void save_stack_info(struct kmem_cache *cache, void *object,
> > + gfp_t flags, bool is_free)
>
> static void save_stack_info(...)
Right, will do in v2.
> > +{
> > + u64 pos;
> > + struct kasan_stack_ring_entry *entry;
> > + depot_stack_handle_t stack;
> > +
> > + stack = kasan_save_stack(flags, true);
> > +
> > + pos = atomic64_fetch_add(1, &stack_ring.pos);
> > + entry = &stack_ring.entries[pos % KASAN_STACK_RING_ENTRIES];
> > +
> > + atomic64_set(&entry->size, cache->object_size);
> > + atomic_set(&entry->pid, current->pid);
> > + atomic_set(&entry->stack, stack);
> > + atomic_set(&entry->is_free, is_free);
> > +
>
> I don't see the point of these being atomic. You can make them normal
> variables with the proper types, and use READ_ONCE() / WRITE_ONCE().
>
> The only one where you truly need the atomic type is 'pos'.
Will do in v2.
> > + /*
> > + * Paired with atomic64_read_acquire() in
> > + * kasan_complete_mode_report_info().
> > + */
> > + atomic64_set_release(&entry->ptr, (s64)object);
>
> This could be smp_store_release() and 'ptr' can be just a normal pointer.
Will do in v2.
> One thing that is not entirely impossible though (vs. re-reading same
> pointer but inconsistent fields I mentioned above), is if something
> wants to write to the ring, but stalls for a very long time before the
> release of 'ptr', giving 'pos' the chance to wrap around and another
> writer writing the same entry. Something like:
>
> T0 | T1
> --------------------------------------+--------------------------------
> WRITE_ONCE(entry->size, ..) |
> WRITE_ONCE(entry->pid, ..) |
> | WRITE_ONCE(entry->size, ..)
> | WRITE_ONCE(entry->pid, ..)
> | WRITE_ONCE(entry->stack, ..)
> | WRITE_ONCE(entry->is_free, ..)
> | smp_store_release(entry->ptr, ...)
> WRITE_ONCE(entry->stack, ..) |
> WRITE_ONCE(entry->is_free, ..) |
> smp_store_release(entry->ptr, ...) |
>
> Which results in some mix of T0's and T1's data.
>
> The way to solve this is to implement a try-lock using 'ptr':
>
> #define BUSY_PTR ((void*)1) // non-zero because initial values are 0
> old_ptr = READ_ONCE(entry->ptr);
> if (old_ptr == BUSY_PTR)
> goto next; /* Busy slot. */
> if (!try_cmpxchg(&entry->ptr, &old_ptr, BUSY_PTR))
> goto next; /* Busy slot. */
> ... set fields as before ...
> smp_store_release(&entry->ptr, object);
Sounds good, will do in v2.
Thank you, Marco!