Per memcg slab accounting and kasan have a problem with kmem_cache
destruction.
- kmem_cache_create() allocates a kmem_cache, which is used for
allocations from processes running in root (top) memcg.
- Processes running in non root memcg and allocating with either
__GFP_ACCOUNT or from a SLAB_ACCOUNT cache use a per memcg kmem_cache.
- Kasan catches use-after-free by having kfree() and kmem_cache_free()
defer freeing of objects. Objects are placed in a quarantine.
- kmem_cache_destroy() destroys root and non root kmem_caches. It takes
care to drain the quarantine of objects from the root memcg's
kmem_cache, but ignores objects associated with non root memcg. This
causes leaks because quarantined per memcg objects refer to per memcg
kmem cache being destroyed.
To see the problem:
1) create a slab cache with kmem_cache_create(,,,SLAB_ACCOUNT,)
2) from non root memcg, allocate and free a few objects from cache
3) dispose of the cache with kmem_cache_destroy()
kmem_cache_destroy() will trigger a "Slab cache still has objects"
warning indicating that the per memcg kmem_cache structure was leaked.
Fix the leak by draining kasan quarantined objects allocated from non
root memcg.
Racing memcg deletion is tricky, but handled. kmem_cache_destroy() =>
shutdown_memcg_caches() => __shutdown_memcg_cache() => shutdown_cache()
flushes per memcg quarantined objects, even if that memcg has been
rmdir'd and gone through memcg_deactivate_kmem_caches().
This leak only affects destroyed SLAB_ACCOUNT kmem caches when kasan is
enabled. So I don't think it's worth patching stable kernels.
Signed-off-by: Greg Thelen <[email protected]>
---
include/linux/kasan.h | 4 ++--
mm/kasan/kasan.c | 2 +-
mm/kasan/quarantine.c | 1 +
mm/slab_common.c | 4 +++-
4 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 820c0ad54a01..c908b25bf5a5 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -52,7 +52,7 @@ void kasan_free_pages(struct page *page, unsigned int order);
void kasan_cache_create(struct kmem_cache *cache, size_t *size,
unsigned long *flags);
void kasan_cache_shrink(struct kmem_cache *cache);
-void kasan_cache_destroy(struct kmem_cache *cache);
+void kasan_cache_shutdown(struct kmem_cache *cache);
void kasan_poison_slab(struct page *page);
void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
@@ -98,7 +98,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
size_t *size,
unsigned long *flags) {}
static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
-static inline void kasan_cache_destroy(struct kmem_cache *cache) {}
+static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 0e9505f66ec1..8d020ad5b74a 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -428,7 +428,7 @@ void kasan_cache_shrink(struct kmem_cache *cache)
quarantine_remove_cache(cache);
}
-void kasan_cache_destroy(struct kmem_cache *cache)
+void kasan_cache_shutdown(struct kmem_cache *cache)
{
quarantine_remove_cache(cache);
}
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index baabaad4a4aa..fb362cb19157 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -273,6 +273,7 @@ static void per_cpu_remove_cache(void *arg)
qlist_free_all(&to_free, cache);
}
+/* Free all quarantined objects belonging to cache. */
void quarantine_remove_cache(struct kmem_cache *cache)
{
unsigned long flags;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 329b03843863..d3c8602dea5d 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -455,6 +455,9 @@ EXPORT_SYMBOL(kmem_cache_create);
static int shutdown_cache(struct kmem_cache *s,
struct list_head *release, bool *need_rcu_barrier)
{
+ /* free asan quarantined objects */
+ kasan_cache_shutdown(s);
+
if (__kmem_cache_shutdown(s) != 0)
return -EBUSY;
@@ -715,7 +718,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
get_online_cpus();
get_online_mems();
- kasan_cache_destroy(s);
mutex_lock(&slab_mutex);
s->refcount--;
--
2.8.0.rc3.226.g39d4020
Make a kasan test which uses a SLAB_ACCOUNT slab cache. If the test is
run within a non default memcg, then it uncovers the bug fixed by
"kasan: drain quarantine of memcg slab objects"[1].
If run without fix [1] it shows "Slab cache still has objects", and the
kmem_cache structure is leaked.
Here's an unpatched kernel test:
$ dmesg -c > /dev/null
$ mkdir /sys/fs/cgroup/memory/test
$ echo $$ > /sys/fs/cgroup/memory/test/tasks
$ modprobe test_kasan 2> /dev/null
$ dmesg | grep -B1 still
[ 123.456789] kasan test: memcg_accounted_kmem_cache allocate memcg accounted object
[ 124.456789] kmem_cache_destroy test_cache: Slab cache still has objects
Kernels with fix [1] don't have the "Slab cache still has objects"
warning or the underlying leak.
The new test runs and passes in the default (root) memcg, though in the
root memcg it won't uncover the problem fixed by [1].
Signed-off-by: Greg Thelen <[email protected]>
---
lib/test_kasan.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index fbdf87920093..0b1d3140fbb8 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
+#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/mman.h>
#include <linux/mm.h>
@@ -331,6 +332,38 @@ static noinline void __init kmem_cache_oob(void)
kmem_cache_destroy(cache);
}
+static noinline void __init memcg_accounted_kmem_cache(void)
+{
+ int i;
+ char *p;
+ size_t size = 200;
+ struct kmem_cache *cache;
+
+ cache = kmem_cache_create("test_cache", size, 0, SLAB_ACCOUNT, NULL);
+ if (!cache) {
+ pr_err("Cache allocation failed\n");
+ return;
+ }
+
+ pr_info("allocate memcg accounted object\n");
+ /*
+ * Several allocations with a delay to allow for lazy per memcg kmem
+ * cache creation.
+ */
+ for (i = 0; i < 5; i++) {
+ p = kmem_cache_alloc(cache, GFP_KERNEL);
+ if (!p) {
+ pr_err("Allocation failed\n");
+ goto free_cache;
+ }
+ kmem_cache_free(cache, p);
+ msleep(100);
+ }
+
+free_cache:
+ kmem_cache_destroy(cache);
+}
+
static char global_array[10];
static noinline void __init kasan_global_oob(void)
@@ -460,6 +493,7 @@ static int __init kmalloc_tests_init(void)
kmalloc_uaf_memset();
kmalloc_uaf2();
kmem_cache_oob();
+ memcg_accounted_kmem_cache();
kasan_stack_oob();
kasan_global_oob();
ksize_unpoisons_memory();
--
2.8.0.rc3.226.g39d4020
On 12/20/2016 09:11 PM, Greg Thelen wrote:
> Per memcg slab accounting and kasan have a problem with kmem_cache
> destruction.
> - kmem_cache_create() allocates a kmem_cache, which is used for
> allocations from processes running in root (top) memcg.
> - Processes running in non root memcg and allocating with either
> __GFP_ACCOUNT or from a SLAB_ACCOUNT cache use a per memcg kmem_cache.
> - Kasan catches use-after-free by having kfree() and kmem_cache_free()
> defer freeing of objects. Objects are placed in a quarantine.
> - kmem_cache_destroy() destroys root and non root kmem_caches. It takes
> care to drain the quarantine of objects from the root memcg's
> kmem_cache, but ignores objects associated with non root memcg. This
> causes leaks because quarantined per memcg objects refer to per memcg
> kmem cache being destroyed.
>
> To see the problem:
> 1) create a slab cache with kmem_cache_create(,,,SLAB_ACCOUNT,)
> 2) from non root memcg, allocate and free a few objects from cache
> 3) dispose of the cache with kmem_cache_destroy()
> kmem_cache_destroy() will trigger a "Slab cache still has objects"
> warning indicating that the per memcg kmem_cache structure was leaked.
>
> Fix the leak by draining kasan quarantined objects allocated from non
> root memcg.
>
> Racing memcg deletion is tricky, but handled. kmem_cache_destroy() =>
> shutdown_memcg_caches() => __shutdown_memcg_cache() => shutdown_cache()
> flushes per memcg quarantined objects, even if that memcg has been
> rmdir'd and gone through memcg_deactivate_kmem_caches().
>
> This leak only affects destroyed SLAB_ACCOUNT kmem caches when kasan is
> enabled. So I don't think it's worth patching stable kernels.
>
> Signed-off-by: Greg Thelen <[email protected]>
>
Acked-by: Andrey Ryabinin <[email protected]>
On Tue, Dec 20, 2016 at 10:11:01AM -0800, Greg Thelen wrote:
> Per memcg slab accounting and kasan have a problem with kmem_cache
> destruction.
> - kmem_cache_create() allocates a kmem_cache, which is used for
> allocations from processes running in root (top) memcg.
> - Processes running in non root memcg and allocating with either
> __GFP_ACCOUNT or from a SLAB_ACCOUNT cache use a per memcg kmem_cache.
> - Kasan catches use-after-free by having kfree() and kmem_cache_free()
> defer freeing of objects. Objects are placed in a quarantine.
> - kmem_cache_destroy() destroys root and non root kmem_caches. It takes
> care to drain the quarantine of objects from the root memcg's
> kmem_cache, but ignores objects associated with non root memcg. This
> causes leaks because quarantined per memcg objects refer to per memcg
> kmem cache being destroyed.
>
> To see the problem:
> 1) create a slab cache with kmem_cache_create(,,,SLAB_ACCOUNT,)
> 2) from non root memcg, allocate and free a few objects from cache
> 3) dispose of the cache with kmem_cache_destroy()
> kmem_cache_destroy() will trigger a "Slab cache still has objects"
> warning indicating that the per memcg kmem_cache structure was leaked.
>
> Fix the leak by draining kasan quarantined objects allocated from non
> root memcg.
>
> Racing memcg deletion is tricky, but handled. kmem_cache_destroy() =>
> shutdown_memcg_caches() => __shutdown_memcg_cache() => shutdown_cache()
> flushes per memcg quarantined objects, even if that memcg has been
> rmdir'd and gone through memcg_deactivate_kmem_caches().
>
> This leak only affects destroyed SLAB_ACCOUNT kmem caches when kasan is
> enabled. So I don't think it's worth patching stable kernels.
>
> Signed-off-by: Greg Thelen <[email protected]>
Reviewed-by: Vladimir Davydov <[email protected]>
On Tue, Dec 20, 2016 at 10:11:02AM -0800, Greg Thelen wrote:
> Make a kasan test which uses a SLAB_ACCOUNT slab cache. If the test is
> run within a non default memcg, then it uncovers the bug fixed by
> "kasan: drain quarantine of memcg slab objects"[1].
>
> If run without fix [1] it shows "Slab cache still has objects", and the
> kmem_cache structure is leaked.
> Here's an unpatched kernel test:
> $ dmesg -c > /dev/null
> $ mkdir /sys/fs/cgroup/memory/test
> $ echo $$ > /sys/fs/cgroup/memory/test/tasks
> $ modprobe test_kasan 2> /dev/null
> $ dmesg | grep -B1 still
> [ 123.456789] kasan test: memcg_accounted_kmem_cache allocate memcg accounted object
> [ 124.456789] kmem_cache_destroy test_cache: Slab cache still has objects
>
> Kernels with fix [1] don't have the "Slab cache still has objects"
> warning or the underlying leak.
>
> The new test runs and passes in the default (root) memcg, though in the
> root memcg it won't uncover the problem fixed by [1].
>
> Signed-off-by: Greg Thelen <[email protected]>
Reviewed-by: Vladimir Davydov <[email protected]>