2022-11-07 03:58:54

by Qi Zheng

[permalink] [raw]
Subject: [PATCH] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

When we specify __GFP_NOWARN, we only expect that no warnings
will be issued for current caller. But in the __should_failslab()
and __should_fail_alloc_page(), the local GFP flags alter the
global {failslab|fail_page_alloc}.attr, which is persistent and
shared by all tasks. This is not what we expected, let's fix it.

Cc: [email protected]
Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
Reported-by: Dmitry Vyukov <[email protected]>
Signed-off-by: Qi Zheng <[email protected]>
---
include/linux/fault-inject.h | 7 +++++--
lib/fault-inject.c | 14 +++++++++-----
mm/failslab.c | 6 ++++--
mm/page_alloc.c | 6 ++++--
4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 9f6e25467844..444236dadcf0 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -20,7 +20,6 @@ struct fault_attr {
atomic_t space;
unsigned long verbose;
bool task_filter;
- bool no_warn;
unsigned long stacktrace_depth;
unsigned long require_start;
unsigned long require_end;
@@ -32,6 +31,10 @@ struct fault_attr {
struct dentry *dname;
};

+enum fault_flags {
+ FAULT_NOWARN = 1 << 0,
+};
+
#define FAULT_ATTR_INITIALIZER { \
.interval = 1, \
.times = ATOMIC_INIT(1), \
@@ -40,11 +43,11 @@ struct fault_attr {
.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
.verbose = 2, \
.dname = NULL, \
- .no_warn = false, \
}

#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
int setup_fault_attr(struct fault_attr *attr, char *str);
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
bool should_fail(struct fault_attr *attr, ssize_t size);

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 4b8fafce415c..5971f7c3e49e 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);

static void fail_dump(struct fault_attr *attr)
{
- if (attr->no_warn)
- return;
-
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
"name %pd, interval %lu, probability %lu, "
@@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
* http://www.nongnu.org/failmalloc/
*/

-bool should_fail(struct fault_attr *attr, ssize_t size)
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
{
bool stack_checked = false;

@@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
return false;

fail:
- fail_dump(attr);
+ if (!(flags & FAULT_NOWARN))
+ fail_dump(attr);

if (atomic_read(&attr->times) != -1)
atomic_dec_not_zero(&attr->times);

return true;
}
+EXPORT_SYMBOL_GPL(should_fail_ex);
+
+bool should_fail(struct fault_attr *attr, ssize_t size)
+{
+ return should_fail_ex(attr, size, 0);
+}
EXPORT_SYMBOL_GPL(should_fail);

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/mm/failslab.c b/mm/failslab.c
index 58df9789f1d2..fc046f26606c 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -16,6 +16,8 @@ static struct {

bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
+ int flags = 0;
+
/* No fault-injection for bootstrap cache */
if (unlikely(s == kmem_cache))
return false;
@@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
return false;

if (gfpflags & __GFP_NOWARN)
- failslab.attr.no_warn = true;
+ flags |= FAULT_NOWARN;

- return should_fail(&failslab.attr, s->object_size);
+ return should_fail_ex(&failslab.attr, s->object_size, flags);
}

static int __init setup_failslab(char *str)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7192ded44ad0..e537d3a950a4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);

static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
{
+ int flags = 0;
+
if (order < fail_page_alloc.min_order)
return false;
if (gfp_mask & __GFP_NOFAIL)
@@ -3913,9 +3915,9 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
return false;

if (gfp_mask & __GFP_NOWARN)
- fail_page_alloc.attr.no_warn = true;
+ flags |= FAULT_NOWARN;

- return should_fail(&fail_page_alloc.attr, 1 << order);
+ return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
}

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
--
2.20.1



2022-11-07 12:52:49

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote:

> @@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> return false;
>
> if (gfpflags & __GFP_NOWARN)
> - failslab.attr.no_warn = true;
> + flags |= FAULT_NOWARN;

You should add a comment here about why this is required, to avoid
deadlocking printk

Jason

2022-11-07 16:19:07

by Qi Zheng

[permalink] [raw]
Subject: Re: [PATCH] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr



On 2022/11/7 20:42, Jason Gunthorpe wrote:
> On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote:
>
>> @@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>> return false;
>>
>> if (gfpflags & __GFP_NOWARN)
>> - failslab.attr.no_warn = true;
>> + flags |= FAULT_NOWARN;
>
> You should add a comment here about why this is required, to avoid
> deadlocking printk

I think this comment should be placed where __GFP_NOWARN is specified
instead of here. What do you think? :)

Thanks,
Qi

>
> Jason

--
Thanks,
Qi

2022-11-07 17:07:08

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

On Mon, Nov 07, 2022 at 11:05:42PM +0800, Qi Zheng wrote:
>
>
> On 2022/11/7 20:42, Jason Gunthorpe wrote:
> > On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote:
> >
> > > @@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> > > return false;
> > > if (gfpflags & __GFP_NOWARN)
> > > - failslab.attr.no_warn = true;
> > > + flags |= FAULT_NOWARN;
> >
> > You should add a comment here about why this is required, to avoid
> > deadlocking printk
>
> I think this comment should be placed where __GFP_NOWARN is specified
> instead of here. What do you think? :)

NOWARN is clear what it does, it is this specifically that is very
subtle about avoiding deadlock aginst allocations triggered by
printk/etc code.

Jason

2022-11-08 02:55:45

by Qi Zheng

[permalink] [raw]
Subject: Re: [PATCH] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr



On 2022/11/8 00:26, Jason Gunthorpe wrote:
> On Mon, Nov 07, 2022 at 11:05:42PM +0800, Qi Zheng wrote:
>>
>>
>> On 2022/11/7 20:42, Jason Gunthorpe wrote:
>>> On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote:
>>>
>>>> @@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>>> return false;
>>>> if (gfpflags & __GFP_NOWARN)
>>>> - failslab.attr.no_warn = true;
>>>> + flags |= FAULT_NOWARN;
>>>
>>> You should add a comment here about why this is required, to avoid
>>> deadlocking printk
>>
>> I think this comment should be placed where __GFP_NOWARN is specified
>> instead of here. What do you think? :)
>
> NOWARN is clear what it does, it is this specifically that is very
> subtle about avoiding deadlock aginst allocations triggered by
> printk/etc code.

Oh, maybe I understand your concern. Some people may think that this
is just a print of fault injection information, not a warning. I'll
add a comment explaining why in some cases there must be no printing.

Thanks,
Qi

>
> Jason

--
Thanks,
Qi

2022-11-08 04:14:18

by Qi Zheng

[permalink] [raw]
Subject: [PATCH v2] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

When we specify __GFP_NOWARN, we only expect that no warnings
will be issued for current caller. But in the __should_failslab()
and __should_fail_alloc_page(), the local GFP flags alter the
global {failslab|fail_page_alloc}.attr, which is persistent and
shared by all tasks. This is not what we expected, let's fix it.

Cc: [email protected]
Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
Reported-by: Dmitry Vyukov <[email protected]>
Signed-off-by: Qi Zheng <[email protected]>
---
v1: https://lore.kernel.org/lkml/[email protected]/

Changelog in v1 -> v2:
- add comment for __should_failslab() and __should_fail_alloc_page()
(suggested by Jason)

include/linux/fault-inject.h | 7 +++++--
lib/fault-inject.c | 14 +++++++++-----
mm/failslab.c | 12 ++++++++++--
mm/page_alloc.c | 7 +++++--
4 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 9f6e25467844..444236dadcf0 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -20,7 +20,6 @@ struct fault_attr {
atomic_t space;
unsigned long verbose;
bool task_filter;
- bool no_warn;
unsigned long stacktrace_depth;
unsigned long require_start;
unsigned long require_end;
@@ -32,6 +31,10 @@ struct fault_attr {
struct dentry *dname;
};

+enum fault_flags {
+ FAULT_NOWARN = 1 << 0,
+};
+
#define FAULT_ATTR_INITIALIZER { \
.interval = 1, \
.times = ATOMIC_INIT(1), \
@@ -40,11 +43,11 @@ struct fault_attr {
.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
.verbose = 2, \
.dname = NULL, \
- .no_warn = false, \
}

#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
int setup_fault_attr(struct fault_attr *attr, char *str);
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
bool should_fail(struct fault_attr *attr, ssize_t size);

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 4b8fafce415c..5971f7c3e49e 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);

static void fail_dump(struct fault_attr *attr)
{
- if (attr->no_warn)
- return;
-
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
"name %pd, interval %lu, probability %lu, "
@@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
* http://www.nongnu.org/failmalloc/
*/

-bool should_fail(struct fault_attr *attr, ssize_t size)
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
{
bool stack_checked = false;

@@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
return false;

fail:
- fail_dump(attr);
+ if (!(flags & FAULT_NOWARN))
+ fail_dump(attr);

if (atomic_read(&attr->times) != -1)
atomic_dec_not_zero(&attr->times);

return true;
}
+EXPORT_SYMBOL_GPL(should_fail_ex);
+
+bool should_fail(struct fault_attr *attr, ssize_t size)
+{
+ return should_fail_ex(attr, size, 0);
+}
EXPORT_SYMBOL_GPL(should_fail);

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/mm/failslab.c b/mm/failslab.c
index 58df9789f1d2..ffc420c0e767 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -16,6 +16,8 @@ static struct {

bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
+ int flags = 0;
+
/* No fault-injection for bootstrap cache */
if (unlikely(s == kmem_cache))
return false;
@@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
return false;

+ /*
+ * In some cases, it expects to specify __GFP_NOWARN
+ * to avoid printing any information(not just a warning),
+ * thus avoiding deadlocks. See commit 6b9dbedbe349 for
+ * details.
+ */
if (gfpflags & __GFP_NOWARN)
- failslab.attr.no_warn = true;
+ flags |= FAULT_NOWARN;

- return should_fail(&failslab.attr, s->object_size);
+ return should_fail_ex(&failslab.attr, s->object_size, flags);
}

static int __init setup_failslab(char *str)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7192ded44ad0..cb6fe715d983 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);

static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
{
+ int flags = 0;
+
if (order < fail_page_alloc.min_order)
return false;
if (gfp_mask & __GFP_NOFAIL)
@@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
(gfp_mask & __GFP_DIRECT_RECLAIM))
return false;

+ /* See comment in __should_failslab() */
if (gfp_mask & __GFP_NOWARN)
- fail_page_alloc.attr.no_warn = true;
+ flags |= FAULT_NOWARN;

- return should_fail(&fail_page_alloc.attr, 1 << order);
+ return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
}

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
--
2.20.1


2022-11-08 09:23:10

by Qi Zheng

[permalink] [raw]
Subject: Re: [PATCH v2] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr



On 2022/11/8 16:44, Wei Yongjun wrote:
> Hi Zheng Qi,
>
> On 2022/11/8 11:52, Qi Zheng wrote:
>> When we specify __GFP_NOWARN, we only expect that no warnings
>> will be issued for current caller. But in the __should_failslab()
>> and __should_fail_alloc_page(), the local GFP flags alter the
>> global {failslab|fail_page_alloc}.attr, which is persistent and
>> shared by all tasks. This is not what we expected, let's fix it.
>>
>> Cc: [email protected]
>> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
>> Reported-by: Dmitry Vyukov <[email protected]>
>> Signed-off-by: Qi Zheng <[email protected]>
>> ---
>> v1: https://lore.kernel.org/lkml/[email protected]/
>>
>> Changelog in v1 -> v2:
>> - add comment for __should_failslab() and __should_fail_alloc_page()
>> (suggested by Jason)
>>
>> include/linux/fault-inject.h | 7 +++++--
>> lib/fault-inject.c | 14 +++++++++-----
>> mm/failslab.c | 12 ++++++++++--
>> mm/page_alloc.c | 7 +++++--
>> 4 files changed, 29 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
>> index 9f6e25467844..444236dadcf0 100644
>> --- a/include/linux/fault-inject.h
>> +++ b/include/linux/fault-inject.h
>> @@ -20,7 +20,6 @@ struct fault_attr {
>> atomic_t space;
>> unsigned long verbose;
>> bool task_filter;
>> - bool no_warn;
>> unsigned long stacktrace_depth;
>> unsigned long require_start;
>> unsigned long require_end;
>> @@ -32,6 +31,10 @@ struct fault_attr {
>> struct dentry *dname;
>> };
>>
>> +enum fault_flags {
>> + FAULT_NOWARN = 1 << 0,
>> +};
>> +
>> #define FAULT_ATTR_INITIALIZER { \
>> .interval = 1, \
>> .times = ATOMIC_INIT(1), \
>> @@ -40,11 +43,11 @@ struct fault_attr {
>> .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
>> .verbose = 2, \
>> .dname = NULL, \
>> - .no_warn = false, \
>
> How about keep no_warn attr as it be, and export it to user?
>
> When testing with fault injection, and each fault will print an backtrace.
> but not all of the testsuit can tell us which one is fault injection
> message or other is a real warning/crash like syzkaller do.
>
> In my case, to make things simple, we usually used a regex to detect whether
> wanring/error happend. So we disabled the slab/page fault warning message by
> default, and only enable it when debug real issue.

So you want to set/clear this no_warn attr through the procfs or sysfs
interface, so that you can easily disable/enable the slab/page fault
warning message from the user mode. Right?

Seems reasonable to me. Anyone else has an opinion on this? If it is
really needed, I can do it later.

Thanks,
Qi

>
> Regards,
>
>
>> }
>>
>> #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
>> int setup_fault_attr(struct fault_attr *attr, char *str);
>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
>> bool should_fail(struct fault_attr *attr, ssize_t size);
>>
>> #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
>> index 4b8fafce415c..5971f7c3e49e 100644
>> --- a/lib/fault-inject.c
>> +++ b/lib/fault-inject.c
>> @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>>
>> static void fail_dump(struct fault_attr *attr)
>> {
>> - if (attr->no_warn)
>> - return;
>> -
>> if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
>> printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
>> "name %pd, interval %lu, probability %lu, "
>> @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
>> * http://www.nongnu.org/failmalloc/
>> */
>>
>> -bool should_fail(struct fault_attr *attr, ssize_t size)
>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
>> {
>> bool stack_checked = false;
>>
>> @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
>> return false;
>>
>> fail:
>> - fail_dump(attr);
>> + if (!(flags & FAULT_NOWARN))
>> + fail_dump(attr);
>>
>> if (atomic_read(&attr->times) != -1)
>> atomic_dec_not_zero(&attr->times);
>>
>> return true;
>> }
>> +EXPORT_SYMBOL_GPL(should_fail_ex);
>> +
>> +bool should_fail(struct fault_attr *attr, ssize_t size)
>> +{
>> + return should_fail_ex(attr, size, 0);
>> +}
>> EXPORT_SYMBOL_GPL(should_fail);
>>
>> #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>> diff --git a/mm/failslab.c b/mm/failslab.c
>> index 58df9789f1d2..ffc420c0e767 100644
>> --- a/mm/failslab.c
>> +++ b/mm/failslab.c
>> @@ -16,6 +16,8 @@ static struct {
>>
>> bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>> {
>> + int flags = 0;
>> +
>> /* No fault-injection for bootstrap cache */
>> if (unlikely(s == kmem_cache))
>> return false;
>> @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>> if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
>> return false;
>>
>> + /*
>> + * In some cases, it expects to specify __GFP_NOWARN
>> + * to avoid printing any information(not just a warning),
>> + * thus avoiding deadlocks. See commit 6b9dbedbe349 for
>> + * details.
>> + */
>> if (gfpflags & __GFP_NOWARN)
>> - failslab.attr.no_warn = true;
>> + flags |= FAULT_NOWARN;
>>
>> - return should_fail(&failslab.attr, s->object_size);
>> + return should_fail_ex(&failslab.attr, s->object_size, flags);
>> }
>>
>> static int __init setup_failslab(char *str)
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 7192ded44ad0..cb6fe715d983 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
>>
>> static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>> {
>> + int flags = 0;
>> +
>> if (order < fail_page_alloc.min_order)
>> return false;
>> if (gfp_mask & __GFP_NOFAIL)
>> @@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>> (gfp_mask & __GFP_DIRECT_RECLAIM))
>> return false;
>>
>> + /* See comment in __should_failslab() */
>> if (gfp_mask & __GFP_NOWARN)
>> - fail_page_alloc.attr.no_warn = true;
>> + flags |= FAULT_NOWARN;
>>
>> - return should_fail(&fail_page_alloc.attr, 1 << order);
>> + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
>> }
>>
>> #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

--
Thanks,
Qi

2022-11-08 10:32:23

by Qi Zheng

[permalink] [raw]
Subject: Re: [PATCH v2] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr



On 2022/11/8 17:32, Wei Yongjun wrote:
>
>
> On 2022/11/8 16:58, Qi Zheng wrote:
>>
>>
>> On 2022/11/8 16:44, Wei Yongjun wrote:
>>> Hi Zheng Qi,
>>>
>>> On 2022/11/8 11:52, Qi Zheng wrote:
>>>> When we specify __GFP_NOWARN, we only expect that no warnings
>>>> will be issued for current caller. But in the __should_failslab()
>>>> and __should_fail_alloc_page(), the local GFP flags alter the
>>>> global {failslab|fail_page_alloc}.attr, which is persistent and
>>>> shared by all tasks. This is not what we expected, let's fix it.
>>>>
>>>> Cc: [email protected]
>>>> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
>>>> Reported-by: Dmitry Vyukov <[email protected]>
>>>> Signed-off-by: Qi Zheng <[email protected]>
>>>> ---
>>>>   v1: https://lore.kernel.org/lkml/[email protected]/
>>>>
>>>>   Changelog in v1 -> v2:
>>>>    - add comment for __should_failslab() and __should_fail_alloc_page()
>>>>      (suggested by Jason)
>>>>
>>>>   include/linux/fault-inject.h |  7 +++++--
>>>>   lib/fault-inject.c           | 14 +++++++++-----
>>>>   mm/failslab.c                | 12 ++++++++++--
>>>>   mm/page_alloc.c              |  7 +++++--
>>>>   4 files changed, 29 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
>>>> index 9f6e25467844..444236dadcf0 100644
>>>> --- a/include/linux/fault-inject.h
>>>> +++ b/include/linux/fault-inject.h
>>>> @@ -20,7 +20,6 @@ struct fault_attr {
>>>>       atomic_t space;
>>>>       unsigned long verbose;
>>>>       bool task_filter;
>>>> -    bool no_warn;
>>>>       unsigned long stacktrace_depth;
>>>>       unsigned long require_start;
>>>>       unsigned long require_end;
>>>> @@ -32,6 +31,10 @@ struct fault_attr {
>>>>       struct dentry *dname;
>>>>   };
>>>>   +enum fault_flags {
>>>> +    FAULT_NOWARN =    1 << 0,
>>>> +};
>>>> +
>>>>   #define FAULT_ATTR_INITIALIZER {                    \
>>>>           .interval = 1,                        \
>>>>           .times = ATOMIC_INIT(1),                \
>>>> @@ -40,11 +43,11 @@ struct fault_attr {
>>>>           .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,    \
>>>>           .verbose = 2,                        \
>>>>           .dname = NULL,                        \
>>>> -        .no_warn = false,                    \
>>>
>>> How about keep no_warn attr as it be, and export it to user?
>>>
>>> When testing with fault injection, and each fault will print an backtrace.
>>> but not all of the testsuit can tell us which one is fault injection
>>> message or other is a real warning/crash like syzkaller do.
>>>
>>> In my case, to make things simple, we usually used a regex to detect whether
>>> wanring/error happend. So we disabled the slab/page fault warning message by
>>> default, and only enable it when debug real issue.
>>
>> So you want to set/clear this no_warn attr through the procfs or sysfs
>> interface, so that you can easily disable/enable the slab/page fault
>> warning message from the user mode. Right?
>
> Yes, just like:
>
> echo 1 > /sys/kernel/debug/failslab/no_warn #disable message
> echo 0 > /sys/kernel/debug/failslab/no_warn #enable message

Got it. Let's wait for the other people's comments and suggestions. :)

>
> Regards
> Wei Yongjun
>
>>
>> Seems reasonable to me. Anyone else has an opinion on this? If it is
>> really needed, I can do it later.
>>
>> Thanks,
>> Qi
>>
>>>
>>> Regards,
>>>
>>>
>>>>       }
>>>>     #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
>>>>   int setup_fault_attr(struct fault_attr *attr, char *str);
>>>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
>>>>   bool should_fail(struct fault_attr *attr, ssize_t size);
>>>>     #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>>>> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
>>>> index 4b8fafce415c..5971f7c3e49e 100644
>>>> --- a/lib/fault-inject.c
>>>> +++ b/lib/fault-inject.c
>>>> @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>>>>     static void fail_dump(struct fault_attr *attr)
>>>>   {
>>>> -    if (attr->no_warn)
>>>> -        return;
>>>> -
>>>>       if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
>>>>           printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
>>>>                  "name %pd, interval %lu, probability %lu, "
>>>> @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
>>>>    * http://www.nongnu.org/failmalloc/
>>>>    */
>>>>   -bool should_fail(struct fault_attr *attr, ssize_t size)
>>>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
>>>>   {
>>>>       bool stack_checked = false;
>>>>   @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
>>>>           return false;
>>>>     fail:
>>>> -    fail_dump(attr);
>>>> +    if (!(flags & FAULT_NOWARN))
>>>> +        fail_dump(attr);
>>>>         if (atomic_read(&attr->times) != -1)
>>>>           atomic_dec_not_zero(&attr->times);
>>>>         return true;
>>>>   }
>>>> +EXPORT_SYMBOL_GPL(should_fail_ex);
>>>> +
>>>> +bool should_fail(struct fault_attr *attr, ssize_t size)
>>>> +{
>>>> +    return should_fail_ex(attr, size, 0);
>>>> +}
>>>>   EXPORT_SYMBOL_GPL(should_fail);
>>>>     #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>>>> diff --git a/mm/failslab.c b/mm/failslab.c
>>>> index 58df9789f1d2..ffc420c0e767 100644
>>>> --- a/mm/failslab.c
>>>> +++ b/mm/failslab.c
>>>> @@ -16,6 +16,8 @@ static struct {
>>>>     bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>>>   {
>>>> +    int flags = 0;
>>>> +
>>>>       /* No fault-injection for bootstrap cache */
>>>>       if (unlikely(s == kmem_cache))
>>>>           return false;
>>>> @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>>>       if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
>>>>           return false;
>>>>   +    /*
>>>> +     * In some cases, it expects to specify __GFP_NOWARN
>>>> +     * to avoid printing any information(not just a warning),
>>>> +     * thus avoiding deadlocks. See commit 6b9dbedbe349 for
>>>> +     * details.
>>>> +     */
>>>>       if (gfpflags & __GFP_NOWARN)
>>>> -        failslab.attr.no_warn = true;
>>>> +        flags |= FAULT_NOWARN;
>>>>   -    return should_fail(&failslab.attr, s->object_size);
>>>> +    return should_fail_ex(&failslab.attr, s->object_size, flags);
>>>>   }
>>>>     static int __init setup_failslab(char *str)
>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>> index 7192ded44ad0..cb6fe715d983 100644
>>>> --- a/mm/page_alloc.c
>>>> +++ b/mm/page_alloc.c
>>>> @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
>>>>     static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>>>>   {
>>>> +    int flags = 0;
>>>> +
>>>>       if (order < fail_page_alloc.min_order)
>>>>           return false;
>>>>       if (gfp_mask & __GFP_NOFAIL)
>>>> @@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>>>>               (gfp_mask & __GFP_DIRECT_RECLAIM))
>>>>           return false;
>>>>   +    /* See comment in __should_failslab() */
>>>>       if (gfp_mask & __GFP_NOWARN)
>>>> -        fail_page_alloc.attr.no_warn = true;
>>>> +        flags |= FAULT_NOWARN;
>>>>   -    return should_fail(&fail_page_alloc.attr, 1 << order);
>>>> +    return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
>>>>   }
>>>>     #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>>

--
Thanks,
Qi

2022-11-08 13:29:36

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v2] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

On Tue, Nov 08, 2022 at 05:32:52PM +0800, Wei Yongjun wrote:
> > So you want to set/clear this no_warn attr through the procfs or sysfs
> > interface, so that you can easily disable/enable the slab/page fault
> > warning message from the user mode. Right?
>
> Yes, just like:
>
> echo 1 > /sys/kernel/debug/failslab/no_warn #disable message
> echo 0 > /sys/kernel/debug/failslab/no_warn #enable message

You can already do that:

echo 0 > /sys/kernel/debug/failslab/verbose #disable message

Jason

2022-11-08 18:29:23

by Akinobu Mita

[permalink] [raw]
Subject: Re: [PATCH v2] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

2022年11月8日(火) 12:52 Qi Zheng <[email protected]>:
>
> When we specify __GFP_NOWARN, we only expect that no warnings
> will be issued for current caller. But in the __should_failslab()
> and __should_fail_alloc_page(), the local GFP flags alter the
> global {failslab|fail_page_alloc}.attr, which is persistent and
> shared by all tasks. This is not what we expected, let's fix it.
>
> Cc: [email protected]
> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
> Reported-by: Dmitry Vyukov <[email protected]>
> Signed-off-by: Qi Zheng <[email protected]>
> ---
> v1: https://lore.kernel.org/lkml/[email protected]/
>
> Changelog in v1 -> v2:
> - add comment for __should_failslab() and __should_fail_alloc_page()
> (suggested by Jason)

Looks good.

Reviewed-by: Akinobu Mita <[email protected]>

2022-11-14 05:21:17

by Qi Zheng

[permalink] [raw]
Subject: Re: [PATCH v2] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr



On 2022/11/9 01:36, Akinobu Mita wrote:
> 2022年11月8日(火) 12:52 Qi Zheng <[email protected]>:
>>
>> When we specify __GFP_NOWARN, we only expect that no warnings
>> will be issued for current caller. But in the __should_failslab()
>> and __should_fail_alloc_page(), the local GFP flags alter the
>> global {failslab|fail_page_alloc}.attr, which is persistent and
>> shared by all tasks. This is not what we expected, let's fix it.
>>
>> Cc: [email protected]
>> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
>> Reported-by: Dmitry Vyukov <[email protected]>
>> Signed-off-by: Qi Zheng <[email protected]>
>> ---
>> v1: https://lore.kernel.org/lkml/[email protected]/
>>
>> Changelog in v1 -> v2:
>> - add comment for __should_failslab() and __should_fail_alloc_page()
>> (suggested by Jason)
>
> Looks good.
>
> Reviewed-by: Akinobu Mita <[email protected]>

Thanks. And hi Andrew, seems no action left for me, can this patch
be applied to mm-unstable tree for testing? :)

--
Thanks,
Qi