2022-11-18 10:11:20

by Qi Zheng

[permalink] [raw]
Subject: [PATCH v3] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

When we specify __GFP_NOWARN, we only expect that no warnings
will be issued for current caller. But in the __should_failslab()
and __should_fail_alloc_page(), the local GFP flags alter the
global {failslab|fail_page_alloc}.attr, which is persistent and
shared by all tasks. This is not what we expected, let's fix it.

Cc: [email protected]
Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
Reported-by: Dmitry Vyukov <[email protected]>
Signed-off-by: Qi Zheng <[email protected]>
Reviewed-by: Akinobu Mita <[email protected]>
---
v1: https://lore.kernel.org/lkml/[email protected]/
v2: https://lore.kernel.org/lkml/[email protected]/

Changelog in v2 -> v3:
- collect Reviewed-by
- rebase onto the next-20221118

Changelog in v1 -> v2:
- add comment for __should_failslab() and __should_fail_alloc_page()
(suggested by Jason)

include/linux/fault-inject.h | 7 +++++--
lib/fault-inject.c | 14 +++++++++-----
mm/failslab.c | 12 ++++++++++--
mm/page_alloc.c | 7 +++++--
4 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 9f6e25467844..444236dadcf0 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -20,7 +20,6 @@ struct fault_attr {
atomic_t space;
unsigned long verbose;
bool task_filter;
- bool no_warn;
unsigned long stacktrace_depth;
unsigned long require_start;
unsigned long require_end;
@@ -32,6 +31,10 @@ struct fault_attr {
struct dentry *dname;
};

+enum fault_flags {
+ FAULT_NOWARN = 1 << 0,
+};
+
#define FAULT_ATTR_INITIALIZER { \
.interval = 1, \
.times = ATOMIC_INIT(1), \
@@ -40,11 +43,11 @@ struct fault_attr {
.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
.verbose = 2, \
.dname = NULL, \
- .no_warn = false, \
}

#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
int setup_fault_attr(struct fault_attr *attr, char *str);
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
bool should_fail(struct fault_attr *attr, ssize_t size);

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 4b8fafce415c..5971f7c3e49e 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);

static void fail_dump(struct fault_attr *attr)
{
- if (attr->no_warn)
- return;
-
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
"name %pd, interval %lu, probability %lu, "
@@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
* http://www.nongnu.org/failmalloc/
*/

-bool should_fail(struct fault_attr *attr, ssize_t size)
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
{
bool stack_checked = false;

@@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
return false;

fail:
- fail_dump(attr);
+ if (!(flags & FAULT_NOWARN))
+ fail_dump(attr);

if (atomic_read(&attr->times) != -1)
atomic_dec_not_zero(&attr->times);

return true;
}
+EXPORT_SYMBOL_GPL(should_fail_ex);
+
+bool should_fail(struct fault_attr *attr, ssize_t size)
+{
+ return should_fail_ex(attr, size, 0);
+}
EXPORT_SYMBOL_GPL(should_fail);

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/mm/failslab.c b/mm/failslab.c
index 58df9789f1d2..ffc420c0e767 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -16,6 +16,8 @@ static struct {

bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
+ int flags = 0;
+
/* No fault-injection for bootstrap cache */
if (unlikely(s == kmem_cache))
return false;
@@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
return false;

+ /*
+ * In some cases, it expects to specify __GFP_NOWARN
+ * to avoid printing any information(not just a warning),
+ * thus avoiding deadlocks. See commit 6b9dbedbe349 for
+ * details.
+ */
if (gfpflags & __GFP_NOWARN)
- failslab.attr.no_warn = true;
+ flags |= FAULT_NOWARN;

- return should_fail(&failslab.attr, s->object_size);
+ return should_fail_ex(&failslab.attr, s->object_size, flags);
}

static int __init setup_failslab(char *str)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f7a63684e6c4..baf97166172c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3909,6 +3909,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);

static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
{
+ int flags = 0;
+
if (order < fail_page_alloc.min_order)
return false;
if (gfp_mask & __GFP_NOFAIL)
@@ -3919,10 +3921,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
(gfp_mask & __GFP_DIRECT_RECLAIM))
return false;

+ /* See comment in __should_failslab() */
if (gfp_mask & __GFP_NOWARN)
- fail_page_alloc.attr.no_warn = true;
+ flags |= FAULT_NOWARN;

- return should_fail(&fail_page_alloc.attr, 1 << order);
+ return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
}

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
--
2.20.1



2022-11-18 13:08:34

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v3] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

On Fri, Nov 18, 2022 at 06:00:11PM +0800, Qi Zheng wrote:
> When we specify __GFP_NOWARN, we only expect that no warnings
> will be issued for current caller. But in the __should_failslab()
> and __should_fail_alloc_page(), the local GFP flags alter the
> global {failslab|fail_page_alloc}.attr, which is persistent and
> shared by all tasks. This is not what we expected, let's fix it.
>
> Cc: [email protected]
> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
> Reported-by: Dmitry Vyukov <[email protected]>
> Signed-off-by: Qi Zheng <[email protected]>
> Reviewed-by: Akinobu Mita <[email protected]>
> ---
> v1: https://lore.kernel.org/lkml/[email protected]/
> v2: https://lore.kernel.org/lkml/[email protected]/

Reviewed-by: Jason Gunthorpe <[email protected]>

Jason

2022-11-18 22:04:38

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v3] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

On Fri, 18 Nov 2022 18:00:11 +0800 Qi Zheng <[email protected]> wrote:

> When we specify __GFP_NOWARN, we only expect that no warnings
> will be issued for current caller. But in the __should_failslab()
> and __should_fail_alloc_page(), the local GFP flags alter the
> global {failslab|fail_page_alloc}.attr, which is persistent and
> shared by all tasks. This is not what we expected, let's fix it.
>
> Cc: [email protected]
> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
> Reported-by: Dmitry Vyukov <[email protected]>
> Signed-off-by: Qi Zheng <[email protected]>
> Reviewed-by: Akinobu Mita <[email protected]>
>
> ...
>
> -bool should_fail(struct fault_attr *attr, ssize_t size)
> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
> {
> bool stack_checked = false;
>
> @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
> return false;
>
> fail:
> - fail_dump(attr);
> + if (!(flags & FAULT_NOWARN))
> + fail_dump(attr);
>
> if (atomic_read(&attr->times) != -1)
> atomic_dec_not_zero(&attr->times);
>
> return true;
> }
> +EXPORT_SYMBOL_GPL(should_fail_ex);

I don't see a need to export this?



2022-11-19 01:48:10

by Qi Zheng

[permalink] [raw]
Subject: Re: [PATCH v3] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr



On 2022/11/19 05:42, Andrew Morton wrote:
> On Fri, 18 Nov 2022 18:00:11 +0800 Qi Zheng <[email protected]> wrote:
>
>> When we specify __GFP_NOWARN, we only expect that no warnings
>> will be issued for current caller. But in the __should_failslab()
>> and __should_fail_alloc_page(), the local GFP flags alter the
>> global {failslab|fail_page_alloc}.attr, which is persistent and
>> shared by all tasks. This is not what we expected, let's fix it.
>>
>> Cc: [email protected]
>> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
>> Reported-by: Dmitry Vyukov <[email protected]>
>> Signed-off-by: Qi Zheng <[email protected]>
>> Reviewed-by: Akinobu Mita <[email protected]>
>>
>> ...
>>
>> -bool should_fail(struct fault_attr *attr, ssize_t size)
>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
>> {
>> bool stack_checked = false;
>>
>> @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
>> return false;
>>
>> fail:
>> - fail_dump(attr);
>> + if (!(flags & FAULT_NOWARN))
>> + fail_dump(attr);
>>
>> if (atomic_read(&attr->times) != -1)
>> atomic_dec_not_zero(&attr->times);
>>
>> return true;
>> }
>> +EXPORT_SYMBOL_GPL(should_fail_ex);
>
> I don't see a need to export this?

Yes, my initial thought was that there might be a driver using this
function, but there really isn't one yet.

And I see you've helped remove this, thanks a lot. :)

>
>

--
Thanks,
Qi