Allocations can be a very hot path, and this out-of-line function
call is noticeable.
Signed-off-by: Jens Axboe <[email protected]>
---
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index e525f6957c49..3128d2c8b3b4 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
struct kmem_cache;
-int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
#ifdef CONFIG_FAILSLAB
+int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
#else
static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
diff --git a/mm/slab.h b/mm/slab.h
index 58c01a34e5b8..92fd6fe01877 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
might_alloc(flags);
+#ifdef CONFIG_FAILSLAB
if (should_failslab(s, flags))
return NULL;
+#endif
if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
return NULL;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index ec2bb0beed75..c21bd447f237 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
EXPORT_TRACEPOINT_SYMBOL(kfree);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
+#ifdef CONFIG_FAILSLAB
int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
if (__should_failslab(s, gfpflags))
@@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
return 0;
}
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
+#endif
--
Jens Axboe
On Tue, 5 Oct 2021 09:31:43 -0600 Jens Axboe <[email protected]> wrote:
> Allocations can be a very hot path, and this out-of-line function
> call is noticeable.
>
> --- a/include/linux/fault-inject.h
> +++ b/include/linux/fault-inject.h
> @@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
>
> struct kmem_cache;
>
> -int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
> #ifdef CONFIG_FAILSLAB
> +int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
> extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
> #else
> static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> diff --git a/mm/slab.h b/mm/slab.h
> index 58c01a34e5b8..92fd6fe01877 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>
> might_alloc(flags);
>
> +#ifdef CONFIG_FAILSLAB
> if (should_failslab(s, flags))
> return NULL;
> +#endif
Can we avoid the ifdefs here?
>
> if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
> return NULL;
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index ec2bb0beed75..c21bd447f237 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
> EXPORT_TRACEPOINT_SYMBOL(kfree);
> EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
>
> +#ifdef CONFIG_FAILSLAB
> int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> {
> if (__should_failslab(s, gfpflags))
> @@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> return 0;
> }
> ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
> +#endif
Like,
--- a/include/linux/fault-inject.h~mm-dont-call-should_failslab-for-config_failslab-fix
+++ a/include/linux/fault-inject.h
@@ -68,6 +68,10 @@ struct kmem_cache;
int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
#else
+static inline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+{
+ return 0;
+}
static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
return false;
--- a/mm/slab.h~mm-dont-call-should_failslab-for-config_failslab-fix
+++ a/mm/slab.h
@@ -491,10 +491,8 @@ static inline struct kmem_cache *slab_pr
might_alloc(flags);
-#ifdef CONFIG_FAILSLAB
if (should_failslab(s, flags))
return NULL;
-#endif
if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
return NULL;
_
On 10/5/21 3:18 PM, Andrew Morton wrote:
> On Tue, 5 Oct 2021 09:31:43 -0600 Jens Axboe <[email protected]> wrote:
>
>> Allocations can be a very hot path, and this out-of-line function
>> call is noticeable.
>>
>> --- a/include/linux/fault-inject.h
>> +++ b/include/linux/fault-inject.h
>> @@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
>>
>> struct kmem_cache;
>>
>> -int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>> #ifdef CONFIG_FAILSLAB
>> +int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>> extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>> #else
>> static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>> diff --git a/mm/slab.h b/mm/slab.h
>> index 58c01a34e5b8..92fd6fe01877 100644
>> --- a/mm/slab.h
>> +++ b/mm/slab.h
>> @@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>>
>> might_alloc(flags);
>>
>> +#ifdef CONFIG_FAILSLAB
>> if (should_failslab(s, flags))
>> return NULL;
>> +#endif
>
> Can we avoid the ifdefs here?
>
>>
>> if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
>> return NULL;
>> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> index ec2bb0beed75..c21bd447f237 100644
>> --- a/mm/slab_common.c
>> +++ b/mm/slab_common.c
>> @@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
>> EXPORT_TRACEPOINT_SYMBOL(kfree);
>> EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
>>
>> +#ifdef CONFIG_FAILSLAB
>> int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>> {
>> if (__should_failslab(s, gfpflags))
>> @@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>> return 0;
>> }
>> ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
>> +#endif
>
> Like,
>
> --- a/include/linux/fault-inject.h~mm-dont-call-should_failslab-for-config_failslab-fix
> +++ a/include/linux/fault-inject.h
> @@ -68,6 +68,10 @@ struct kmem_cache;
> int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
> extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
> #else
> +static inline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> +{
> + return 0;
> +}
> static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> {
> return false;
> --- a/mm/slab.h~mm-dont-call-should_failslab-for-config_failslab-fix
> +++ a/mm/slab.h
> @@ -491,10 +491,8 @@ static inline struct kmem_cache *slab_pr
>
> might_alloc(flags);
>
> -#ifdef CONFIG_FAILSLAB
> if (should_failslab(s, flags))
> return NULL;
> -#endif
>
> if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
> return NULL;
> _
Yep, that'll work!
--
Jens Axboe
On Thu, Oct 07, 2021 at 05:32:52PM +0200, Vlastimil Babka wrote:
> On 10/5/21 17:31, Jens Axboe wrote:
> > Allocations can be a very hot path, and this out-of-line function
> > call is noticeable.
> >
> > Signed-off-by: Jens Axboe <[email protected]>
>
> It used to be inline b4 (hi, Konstantin!)
Congratulations, you made me look. :)
-K
On 10/7/21 9:32 AM, Vlastimil Babka wrote:
> On 10/5/21 17:31, Jens Axboe wrote:
>> Allocations can be a very hot path, and this out-of-line function
>> call is noticeable.
>>
>> Signed-off-by: Jens Axboe <[email protected]>
>
> It used to be inline b4 (hi, Konstantin!) and then was converted to be like
> this intentionally :/
>
> See 4f6923fbb352 ("mm: make should_failslab always available for fault
> injection")
>
> And now also kernel/bpf/verifier.c contains:
> BTF_ID(func, should_failslab)
>
> I think either your or Andrew's version will break this BTF_ID thing, at the
> very least.
>
> But I do strongly agree that putting unconditionally a non-inline call into
> slab allocator fastpath sucks. Can we make it so that bpf can only do these
> overrides when CONFIG_FAILSLAB is enabled?
> I don't know, perhaps putting this BTF_ID() in #ifdef as well, or providing
> a dummy that is always available (so that nothing breaks), but doesn't
> actually affect slab_pre_alloc_hook() unless CONFIG_FAILSLAB has been enabled?
That seems to be the right approach, limiting it on it actually being enabled
and a function call.
--
Jens Axboe
On 10/5/21 17:31, Jens Axboe wrote:
> Allocations can be a very hot path, and this out-of-line function
> call is noticeable.
>
> Signed-off-by: Jens Axboe <[email protected]>
It used to be inline b4 (hi, Konstantin!) and then was converted to be like
this intentionally :/
See 4f6923fbb352 ("mm: make should_failslab always available for fault
injection")
And now also kernel/bpf/verifier.c contains:
BTF_ID(func, should_failslab)
I think either your or Andrew's version will break this BTF_ID thing, at the
very least.
But I do strongly agree that putting unconditionally a non-inline call into
slab allocator fastpath sucks. Can we make it so that bpf can only do these
overrides when CONFIG_FAILSLAB is enabled?
I don't know, perhaps putting this BTF_ID() in #ifdef as well, or providing
a dummy that is always available (so that nothing breaks), but doesn't
actually affect slab_pre_alloc_hook() unless CONFIG_FAILSLAB has been enabled?
> ---
>
> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
> index e525f6957c49..3128d2c8b3b4 100644
> --- a/include/linux/fault-inject.h
> +++ b/include/linux/fault-inject.h
> @@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
>
> struct kmem_cache;
>
> -int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
> #ifdef CONFIG_FAILSLAB
> +int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
> extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
> #else
> static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> diff --git a/mm/slab.h b/mm/slab.h
> index 58c01a34e5b8..92fd6fe01877 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>
> might_alloc(flags);
>
> +#ifdef CONFIG_FAILSLAB
> if (should_failslab(s, flags))
> return NULL;
> +#endif
>
> if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
> return NULL;
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index ec2bb0beed75..c21bd447f237 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
> EXPORT_TRACEPOINT_SYMBOL(kfree);
> EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
>
> +#ifdef CONFIG_FAILSLAB
> int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> {
> if (__should_failslab(s, gfpflags))
> @@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> return 0;
> }
> ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
> +#endif
>
+cc Linus
On Thu, Oct 07, 2021 at 05:32:52PM +0200, Vlastimil Babka wrote:
> On 10/5/21 17:31, Jens Axboe wrote:
> > Allocations can be a very hot path, and this out-of-line function
> > call is noticeable.
> >
> > Signed-off-by: Jens Axboe <[email protected]>
>
> It used to be inline b4 (hi, Konstantin!) and then was converted to be like
> this intentionally :/
>
> See 4f6923fbb352 ("mm: make should_failslab always available for fault
> injection")
>
> And now also kernel/bpf/verifier.c contains:
> BTF_ID(func, should_failslab)
>
> I think either your or Andrew's version will break this BTF_ID thing, at the
> very least.
>
> But I do strongly agree that putting unconditionally a non-inline call into
> slab allocator fastpath sucks. Can we make it so that bpf can only do these
> overrides when CONFIG_FAILSLAB is enabled?
> I don't know, perhaps putting this BTF_ID() in #ifdef as well, or providing
> a dummy that is always available (so that nothing breaks), but doesn't
> actually affect slab_pre_alloc_hook() unless CONFIG_FAILSLAB has been enabled?
>
I just ran into it while looking at kmalloc + kfree pair.
A toy test which calls this in a loop like so:
static long noinline custom_bench(void)
{
void *buf;
while (!signal_pending(current)) {
buf = kmalloc(16, GFP_KERNEL);
kfree(buf);
cond_resched();
}
return -EINTR;
}
.. shows this with perf top:
57.88% [kernel] [k] kfree
31.38% [kernel] [k] kmalloc_trace_noprof
3.20% [kernel] [k] should_failslab.constprop.0
A side note is that I verified majority of the time in kfree and
kmalloc_trace_noprof is cmpxchg16b, which is both good and bad news.
As for should_failslab, it compiles to an empty func on production
kernels and is present even when there are no supported means of
instrumenting it. As in everyone pays for its existence, even if there
is no way to use it.
Also note there are 3 unrelated mechanisms to alter the return code,
which imo is 2 too many. But more importantly they are not even
coordinated.
A hard requirement for a long term solution is to not alter the fast
path beyond nops for hot patching.
So far I think implementing this in a clean manner would require
agreeing on some namespace for bpf ("failprobes"?) and coordinating
hotpatching between different mechanisms. Maybe there is a better, I
don't know.
Here is the crux of my e-mail though:
1. turning should_failslab into a mandatory func call is an ok local
hack for the test farm, not a viable approach for production
2. as such it is up to the original submitter (or whoever else
who wants to pick up the slack) to implement something which
hotpatches the callsite as opposed to inducing a function call for
everyone
In the meantime the routine should disappear unless explicitly included
in kernel config. The patch submitted here would be one way to do it.
On 5/27/24 11:34 AM, Mateusz Guzik wrote:
> +cc Linus
>
> On Thu, Oct 07, 2021 at 05:32:52PM +0200, Vlastimil Babka wrote:
>> On 10/5/21 17:31, Jens Axboe wrote:
>> > Allocations can be a very hot path, and this out-of-line function
>> > call is noticeable.
>> >
>> > Signed-off-by: Jens Axboe <[email protected]>
>>
>> It used to be inline b4 (hi, Konstantin!) and then was converted to be like
>> this intentionally :/
>>
>> See 4f6923fbb352 ("mm: make should_failslab always available for fault
>> injection")
>>
>> And now also kernel/bpf/verifier.c contains:
>> BTF_ID(func, should_failslab)
>>
>> I think either your or Andrew's version will break this BTF_ID thing, at the
>> very least.
>>
>> But I do strongly agree that putting unconditionally a non-inline call into
>> slab allocator fastpath sucks. Can we make it so that bpf can only do these
>> overrides when CONFIG_FAILSLAB is enabled?
>> I don't know, perhaps putting this BTF_ID() in #ifdef as well, or providing
>> a dummy that is always available (so that nothing breaks), but doesn't
>> actually affect slab_pre_alloc_hook() unless CONFIG_FAILSLAB has been enabled?
>>
>
> I just ran into it while looking at kmalloc + kfree pair.
>
> A toy test which calls this in a loop like so:
> static long noinline custom_bench(void)
> {
> void *buf;
>
> while (!signal_pending(current)) {
> buf = kmalloc(16, GFP_KERNEL);
> kfree(buf);
> cond_resched();
> }
>
> return -EINTR;
> }
>
> ... shows this with perf top:
> 57.88% [kernel] [k] kfree
> 31.38% [kernel] [k] kmalloc_trace_noprof
> 3.20% [kernel] [k] should_failslab.constprop.0
>
> A side note is that I verified majority of the time in kfree and
> kmalloc_trace_noprof is cmpxchg16b, which is both good and bad news.
>
> As for should_failslab, it compiles to an empty func on production
> kernels and is present even when there are no supported means of
> instrumenting it. As in everyone pays for its existence, even if there
> is no way to use it.
>
> Also note there are 3 unrelated mechanisms to alter the return code,
> which imo is 2 too many. But more importantly they are not even
> coordinated.
>
> A hard requirement for a long term solution is to not alter the fast
> path beyond nops for hot patching.
>
> So far I think implementing this in a clean manner would require
> agreeing on some namespace for bpf ("failprobes"?) and coordinating
> hotpatching between different mechanisms. Maybe there is a better, I
> don't know.
I've attempted something (not complete yet) here:
https://lore.kernel.org/all/[email protected]/
> Here is the crux of my e-mail though:
> 1. turning should_failslab into a mandatory func call is an ok local
> hack for the test farm, not a viable approach for production
> 2. as such it is up to the original submitter (or whoever else
> who wants to pick up the slack) to implement something which
> hotpatches the callsite as opposed to inducing a function call for
> everyone
>
> In the meantime the routine should disappear unless explicitly included
> in kernel config. The patch submitted here would be one way to do it.