All slub debug features currently disable the fast path completely.
Some features such as consistency checks require this to allow taking of
locks. Poisoning and red zoning don't require this and can safely use
the per-cpu fast path. Introduce a Kconfig to continue to use the fast
path when 'fast' debugging options are enabled. The code will
automatically revert to always using the slow path when 'slow' options
are enabled.
Signed-off-by: Laura Abbott <[email protected]>
---
This is a follow up from my previous proposal to add an alternate per-cpu
list. The feedback was just add to the fast path. With this version, the
hackbench penalty with slub_debug=P is only 3%. hackbench is too noisy to give
an idea of the change with just slub_debug=- so I looked at some of the bulk
allocation benchmarks from https://github.com/netoptimizer/prototype-kernel .
With slab_bulk_test01, the penalty was between 4-7 cycles even with
slub_debug=-.
---
init/Kconfig | 10 ++++++++++
mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 55 insertions(+), 5 deletions(-)
diff --git a/init/Kconfig b/init/Kconfig
index 8514b25db21c..aef7cc2bf275 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1582,6 +1582,16 @@ config SLUB_CPU_PARTIAL
which requires the taking of locks that may cause latency spikes.
Typically one would choose no for a realtime system.
+config SLUB_FAST_POISON
+ bool "Allow poisoning debug options to use the fast path"
+ depends on SLUB_CPU_PARTIAL
+ help
+ Some SLUB debugging options are safe to use without taking extra
+ locks and can use the per-cpu lists. Enable this option to let
+ poisoning and red zoning use the per-cpu lists. The trade-off is
+ a few extra checks in the fast path. You should select this option
+ if you intend to use poisoning for non-debugging uses.
+
config MMAP_ALLOW_UNINITIALIZED
bool "Allow mmapped anonymous memory to be uninitialized"
depends on EXPERT && !MMU
diff --git a/mm/slub.c b/mm/slub.c
index 1d3f9835f4ea..a296693ce907 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -124,6 +124,18 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
#endif
}
+#define SLAB_SLOW_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
+ SLAB_TRACE)
+
+static inline int kmem_cache_slow_debug(struct kmem_cache *s)
+{
+#if defined(CONFIG_SLUB_FAST_POISON)
+ return s->flags & SLAB_SLOW_FLAGS;
+#else
+ return kmem_cache_debug(s);
+#endif
+}
+
void *fixup_red_left(struct kmem_cache *s, void *p)
{
if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
@@ -134,7 +146,9 @@ void *fixup_red_left(struct kmem_cache *s, void *p)
static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
{
-#ifdef CONFIG_SLUB_CPU_PARTIAL
+#if defined(CONFIG_SLUB_FAST_POISON)
+ return !kmem_cache_slow_debug(s);
+#elif defined(CONFIG_SLUB_CPU_PARTIAL)
return !kmem_cache_debug(s);
#else
return false;
@@ -2083,7 +2097,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
}
} else {
m = M_FULL;
- if (kmem_cache_debug(s) && !lock) {
+ if (kmem_cache_slow_debug(s) && !lock) {
lock = 1;
/*
* This also ensures that the scanning of full
@@ -2580,11 +2594,11 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
}
page = c->page;
- if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
+ if (likely(!kmem_cache_slow_debug(s) && pfmemalloc_match(page, gfpflags)))
goto load_freelist;
/* Only entered in the debug case */
- if (kmem_cache_debug(s) &&
+ if (kmem_cache_slow_debug(s) &&
!alloc_debug_processing(s, page, freelist, addr))
goto new_slab; /* Slab failed checks. Next slab needed */
@@ -2617,6 +2631,12 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
return p;
}
+static inline void alloc_sanitize(struct kmem_cache *s, void *object)
+{
+#ifdef CONFIG_SLUB_FAST_POISON
+ init_object(s, object, SLUB_RED_ACTIVE);
+#endif
+}
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
@@ -2706,6 +2726,8 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
stat(s, ALLOC_FASTPATH);
}
+ if (kmem_cache_debug(s))
+ alloc_sanitize(s, object);
if (unlikely(gfpflags & __GFP_ZERO) && object)
memset(object, 0, s->object_size);
@@ -2793,7 +2815,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
stat(s, FREE_SLOWPATH);
- if (kmem_cache_debug(s) &&
+ if (kmem_cache_slow_debug(s) &&
!free_debug_processing(s, page, head, tail, cnt, addr))
return;
@@ -2908,6 +2930,21 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
* same page) possible by specifying head and tail ptr, plus objects
* count (cnt). Bulk free indicated by tail pointer being set.
*/
+
+static inline void free_sanitize(struct kmem_cache *s, struct page *page, void *head, void *tail)
+{
+#ifdef CONFIG_SLUB_FAST_POISON
+ void *object = head;
+
+next_object:
+ init_object(s, object, SLUB_RED_INACTIVE);
+ if (object != tail) {
+ object = get_freepointer(s, object);
+ goto next_object;
+ }
+#endif
+}
+
static __always_inline void do_slab_free(struct kmem_cache *s,
struct page *page, void *head, void *tail,
int cnt, unsigned long addr)
@@ -2931,6 +2968,9 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
/* Same with comment on barrier() in slab_alloc_node() */
barrier();
+ if (kmem_cache_debug(s))
+ free_sanitize(s, page, head, tail_obj);
+
if (likely(page == c->page)) {
set_freepointer(s, tail_obj, c->freelist);
--
2.13.0
On Fri, 4 Aug 2017, Laura Abbott wrote:
> All slub debug features currently disable the fast path completely.
> Some features such as consistency checks require this to allow taking of
> locks. Poisoning and red zoning don't require this and can safely use
> the per-cpu fast path. Introduce a Kconfig to continue to use the fast
> path when 'fast' debugging options are enabled. The code will
> automatically revert to always using the slow path when 'slow' options
> are enabled.
Ok I see that the objects are initialized with poisoning and redzoning but
I do not see that there is fastpath code to actually check the values
before the object is reinitialized. Is that intentional or am
I missing something?
On 08/07/2017 07:37 AM, Christopher Lameter wrote:
> On Fri, 4 Aug 2017, Laura Abbott wrote:
>
>> All slub debug features currently disable the fast path completely.
>> Some features such as consistency checks require this to allow taking of
>> locks. Poisoning and red zoning don't require this and can safely use
>> the per-cpu fast path. Introduce a Kconfig to continue to use the fast
>> path when 'fast' debugging options are enabled. The code will
>> automatically revert to always using the slow path when 'slow' options
>> are enabled.
>
> Ok I see that the objects are initialized with poisoning and redzoning but
> I do not see that there is fastpath code to actually check the values
> before the object is reinitialized. Is that intentional or am
> I missing something?
>
Yes, that's intentional here. I see the validation as a separate more
expensive feature. I had a crude patch to do some checks for testing
and I know Daniel Micay had an out of tree patch to do some checks
as well.
Thanks,
Laura
On Mon, 7 Aug 2017, Laura Abbott wrote:
> > Ok I see that the objects are initialized with poisoning and redzoning but
> > I do not see that there is fastpath code to actually check the values
> > before the object is reinitialized. Is that intentional or am
> > I missing something?
>
> Yes, that's intentional here. I see the validation as a separate more
> expensive feature. I had a crude patch to do some checks for testing
> and I know Daniel Micay had an out of tree patch to do some checks
> as well.
Ok then this patch does nothing? How does this help?
On 08/07/2017 11:03 AM, Christopher Lameter wrote:
> On Mon, 7 Aug 2017, Laura Abbott wrote:
>
>>> Ok I see that the objects are initialized with poisoning and redzoning but
>>> I do not see that there is fastpath code to actually check the values
>>> before the object is reinitialized. Is that intentional or am
>>> I missing something?
>>
>> Yes, that's intentional here. I see the validation as a separate more
>> expensive feature. I had a crude patch to do some checks for testing
>> and I know Daniel Micay had an out of tree patch to do some checks
>> as well.
>
> Ok then this patch does nothing? How does this help?
The purpose of this patch is to ensure the poisoning can happen without
too much penalty. Even if there aren't checks to abort/warn when there
is a problem, there's still value in ensuring objects are always poisoned.
Thanks,
Laura
On Mon, Aug 7, 2017 at 3:00 PM, Laura Abbott <[email protected]> wrote:
> On 08/07/2017 11:03 AM, Christopher Lameter wrote:
>> On Mon, 7 Aug 2017, Laura Abbott wrote:
>>
>>>> Ok I see that the objects are initialized with poisoning and redzoning but
>>>> I do not see that there is fastpath code to actually check the values
>>>> before the object is reinitialized. Is that intentional or am
>>>> I missing something?
>>>
>>> Yes, that's intentional here. I see the validation as a separate more
>>> expensive feature. I had a crude patch to do some checks for testing
>>> and I know Daniel Micay had an out of tree patch to do some checks
>>> as well.
>>
>> Ok then this patch does nothing? How does this help?
>
> The purpose of this patch is to ensure the poisoning can happen without
> too much penalty. Even if there aren't checks to abort/warn when there
> is a problem, there's still value in ensuring objects are always poisoned.
To clarify, this is desirable to kill exploitation of
exposure-after-free flaws and some classes of use-after-free flaws,
since the contents will have be wiped out after a free. (Verification
of poison is nice, but is expensive compared to the benefit against
these exploits -- and notably doesn't protect against the other
use-after-free attacks where the contents are changed after the next
allocation, which would have passed the poison verification.)
-Kees
--
Kees Cook
Pixel Security
On Mon, 7 Aug 2017, Kees Cook wrote:
>
> To clarify, this is desirable to kill exploitation of
> exposure-after-free flaws and some classes of use-after-free flaws,
> since the contents will have be wiped out after a free. (Verification
> of poison is nice, but is expensive compared to the benefit against
> these exploits -- and notably doesn't protect against the other
> use-after-free attacks where the contents are changed after the next
> allocation, which would have passed the poison verification.)
Well the only variable in the freed area that is in use by the allocator
is the free pointer. This ensures that complete object is poisoned and the
free pointer has a separate storage area right? So the size of the slab
objects increase. In addition to more hotpath processing we also have
increased object sizes.
I am not familiar with the terminology here.
So exposure-after-free means that the contents of the object can be used
after it was freed?
Contents are changed after allocation? Someone gets a pointer to the
object and the mods it later?
On Tue, Aug 8, 2017 at 8:01 AM, Christopher Lameter <[email protected]> wrote:
>
> On Mon, 7 Aug 2017, Kees Cook wrote:
>>
>> To clarify, this is desirable to kill exploitation of
>> exposure-after-free flaws and some classes of use-after-free flaws,
>> since the contents will have be wiped out after a free. (Verification
>> of poison is nice, but is expensive compared to the benefit against
>> these exploits -- and notably doesn't protect against the other
>> use-after-free attacks where the contents are changed after the next
>> allocation, which would have passed the poison verification.)
>
> Well the only variable in the freed area that is in use by the allocator
> is the free pointer. This ensures that complete object is poisoned and the
> free pointer has a separate storage area right? So the size of the slab
> objects increase. In addition to more hotpath processing we also have
> increased object sizes.
I'll let Laura speak to that; this is mainly an implementation detail.
I think it would be fine to leave the free pointer written in-object
after poisoning.
> I am not familiar with the terminology here.
Sorry, my fault for not being more clear! More below...
> So exposure-after-free means that the contents of the object can be used
> after it was freed?
There's a few things mixed together, but mainly this is about removing
the idea of "uninitialized" memory contents. One example is just
simply a memory region getting reused immediately, but failing to
properly initialize it, so the old contents are still there, and they
get exposed in some way (for a recent example, see [1]), leaking
sensitive kernel contents that an attacker can use to extend another
attack (e.g. leaking the precise location of some other target in
kernel memory). A simple example could look like this:
userspace makes syscall
... some function call path ...
kfree($location);
userspace makes syscall
... other function ...
ptr = kmalloc(...); // ptr is $location now
... buggy logic that never writes to ptr contents ...
copy_to_user(user, ptr, ...); // contents of $location copied to userspace
> Contents are changed after allocation? Someone gets a pointer to the
> object and the mods it later?
The classic use-after-free attack isn't normally affected by cache
poisoning, since the attack pattern is:
userspace makes syscall
tracked_struct = kmalloc(...);
...
kfree(tracked_struct); // some bug causes an early free
userspace makes syscall
...
other_struct = kmalloc(...); // tracked_struct same as other_struct now
other_struct->fields = evil_from_userspace; // overwrite by attacker
userspace makes syscall
...
tracked_struct->function_pointer(...); // calls attacker-controlled function
In other words, between the kfree() and the use, it gets reallocated
and written to, but the old reference remains and operates on the
newly written contents (in this worst-case example, it's a function
pointer overwrite). What I meant by "some classes of use-after-free
flaws" was that in rare cases, the "written to" step isn't needed,
since the existing contents can be used as-is (i.e. like the
"exposure-after-free" example I showed first), but it differs in what
primitives it provides to an attacker since it's not "just" an
exposure, but results in an attacker having control over kernel
behavior due to unexpected contents in memory.
Similar things happen to stack variables (there are lots of stack
info-leak examples, and see my presentation[2] for a direct execution
control example due to "uninitialized" variables), but that is being
worked on separately (forced stack variable init, and forced stack
clearing). The fast-path poisoning-on-free effort here is to protect
the slab cache from these classes of flaws and attacks.
-Kees
[1] http://seclists.org/oss-sec/2017/q2/455
[2] https://outflux.net/slides/2011/defcon/kernel-exploitation.pdf
--
Kees Cook
Pixel Security