LinuxLists.cc - [PATCH 1/2] mm: fix missing handler for __GFP

2022-05-10 13:03:31

Subject: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN

We expect no warnings to be issued when we specify __GFP_NOWARN, but
currently in paths like alloc_pages() and kmalloc(), there are still
some warnings printed, fix it.

Signed-off-by: Qi Zheng <[email protected]>
---
include/linux/fault-inject.h | 2 ++
lib/fault-inject.c | 3 +++
mm/failslab.c | 3 +++
mm/internal.h | 11 +++++++++++
mm/page_alloc.c | 22 ++++++++++++----------
5 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 2d04f6448cde..9f6e25467844 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -20,6 +20,7 @@ struct fault_attr {
atomic_t space;
unsigned long verbose;
bool task_filter;
+ bool no_warn;
unsigned long stacktrace_depth;
unsigned long require_start;
unsigned long require_end;
@@ -39,6 +40,7 @@ struct fault_attr {
.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
.verbose = 2, \
.dname = NULL, \
+ .no_warn = false, \
}

#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index ce12621b4275..423784d9c058 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);

static void fail_dump(struct fault_attr *attr)
{
+ if (attr->no_warn)
+ return;
+
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
"name %pd, interval %lu, probability %lu, "
diff --git a/mm/failslab.c b/mm/failslab.c
index f92fed91ac23..58df9789f1d2 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -30,6 +30,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
return false;

+ if (gfpflags & __GFP_NOWARN)
+ failslab.attr.no_warn = true;
+
return should_fail(&failslab.attr, s->object_size);
}

diff --git a/mm/internal.h b/mm/internal.h
index cf16280ce132..7a268fac6559 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -35,6 +35,17 @@ struct folio_batch;
/* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)

+#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
+ static bool __section(".data.once") __warned; \
+ int __ret_warn_once = !!(cond); \
+ \
+ if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
+
void page_writeback_init(void);

static inline void *folio_raw_mapping(struct folio *folio)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0e42038382c1..2bf4ce4d0e2f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3722,7 +3722,7 @@ struct page *rmqueue(struct zone *preferred_zone,
* We most definitely don't want callers attempting to
* allocate greater than order-1 page units with __GFP_NOFAIL.
*/
- WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+ WARN_ON_ONCE_GFP((gfp_flags & __GFP_NOFAIL) && (order > 1), gfp_flags);

do {
page = NULL;
@@ -3799,6 +3799,9 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
(gfp_mask & __GFP_DIRECT_RECLAIM))
return false;

+ if (gfp_mask & __GFP_NOWARN)
+ fail_page_alloc.attr.no_warn = true;
+
return should_fail(&fail_page_alloc.attr, 1 << order);
}

@@ -4346,7 +4349,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
*/

/* Exhausted what can be done so it's blame time */
- if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
+ if (out_of_memory(&oc) ||
+ WARN_ON_ONCE_GFP(gfp_mask & __GFP_NOFAIL, gfp_mask)) {
*did_some_progress = 1;

/*
@@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* We also sanity check to catch abuse of atomic reserves being used by
* callers that are not in atomic context.
*/
- if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
- (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
+ if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
+ (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
gfp_mask &= ~__GFP_ATOMIC;

retry_cpuset:
@@ -5117,7 +5121,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* All existing users of the __GFP_NOFAIL are blockable, so warn
* of any new users that actually require GFP_NOWAIT
*/
- if (WARN_ON_ONCE(!can_direct_reclaim))
+ if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
goto fail;

/*
@@ -5125,7 +5129,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* because we cannot reclaim anything and only can loop waiting
* for somebody to do a work for us
*/
- WARN_ON_ONCE(current->flags & PF_MEMALLOC);
+ WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);

/*
* non failing costly orders are a hard requirement which we
@@ -5133,7 +5137,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* so that we can identify them and convert them to something
* else.
*/
- WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
+ WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);

/*
* Help non-failing allocations by giving them access to memory
@@ -5379,10 +5383,8 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
* There are several places where we assume that the order value is sane
* so bail out early if the request is out of bound.
*/
- if (unlikely(order >= MAX_ORDER)) {
- WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
+ if (WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp))
return NULL;
- }

gfp &= gfp_allowed_mask;
/*
--
2.20.1

2022-05-10 19:20:33

by Qi Zheng

[permalink] [raw]

Subject: [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock

The pty_write() invokes kmalloc() which may invoke a normal printk() to
print failure message. This can cause a deadlock in the scenario reported
by syz-bot below:

CPU0 CPU1 CPU2
---- ---- ----
lock(console_owner);
lock(&port_lock_key);
lock(&port->lock);
lock(&port_lock_key);
lock(&port->lock);
lock(console_owner);

As commit dbdda842fe96 ("printk: Add console owner and waiter logic to
load balance console writes") said, such deadlock can be prevented by
using printk_deferred() in kmalloc() (which is invoked in the section
guarded by the port->lock). But there are too many printk() on the
kmalloc() path, and kmalloc() can be called from anywhere, so changing
printk() to printk_deferred() is too complicated and inelegant.

Therefore, this patch chooses to specify __GFP_NOWARN to kmalloc(), so
that printk() will not be called, and this deadlock problem can be avoided.

Syz-bot reported the following lockdep error:

======================================================
WARNING: possible circular locking dependency detected
5.4.143-00237-g08ccc19a-dirty #10 Not tainted
------------------------------------------------------
syz-executor.4/29420 is trying to acquire lock:
ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: console_trylock_spinning kernel/printk/printk.c:1752 [inline]
ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: vprintk_emit+0x2ca/0x470 kernel/printk/printk.c:2023

but task is already holding lock:
ffff8880119c9158 (&port->lock){-.-.}-{2:2}, at: pty_write+0xf4/0x1f0 drivers/tty/pty.c:120

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&port->lock){-.-.}-{2:2}:
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
_raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
tty_port_tty_get drivers/tty/tty_port.c:288 [inline] <-- lock(&port->lock);
tty_port_default_wakeup+0x1d/0xb0 drivers/tty/tty_port.c:47
serial8250_tx_chars+0x530/0xa80 drivers/tty/serial/8250/8250_port.c:1767
serial8250_handle_irq.part.0+0x31f/0x3d0 drivers/tty/serial/8250/8250_port.c:1854
serial8250_handle_irq drivers/tty/serial/8250/8250_port.c:1827 [inline] <-- lock(&port_lock_key);
serial8250_default_handle_irq+0xb2/0x220 drivers/tty/serial/8250/8250_port.c:1870
serial8250_interrupt+0xfd/0x200 drivers/tty/serial/8250/8250_core.c:126
__handle_irq_event_percpu+0x109/0xa50 kernel/irq/handle.c:156
handle_irq_event_percpu+0x76/0x170 kernel/irq/handle.c:196
handle_irq_event+0xa1/0x130 kernel/irq/handle.c:213
handle_edge_irq+0x261/0xd00 kernel/irq/chip.c:833
generic_handle_irq_desc include/linux/irqdesc.h:156 [inline]
do_IRQ+0xf2/0x2e0 arch/x86/kernel/irq.c:250
ret_from_intr+0x0/0x19
native_safe_halt arch/x86/include/asm/irqflags.h:60 [inline]
arch_safe_halt arch/x86/include/asm/irqflags.h:103 [inline]
default_idle+0x2c/0x1a0 arch/x86/kernel/process.c:572
cpuidle_idle_call kernel/sched/idle.c:184 [inline]
do_idle+0x44c/0x590 kernel/sched/idle.c:294
cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:386
start_secondary+0x2d1/0x3e0 arch/x86/kernel/smpboot.c:264
secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241

-> #1 (&port_lock_key){-.-.}-{2:2}:
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
_raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
serial8250_console_write+0x184/0xa40 drivers/tty/serial/8250/8250_port.c:3198
<-- lock(&port_lock_key);
call_console_drivers kernel/printk/printk.c:1819 [inline]
console_unlock+0x8cb/0xd00 kernel/printk/printk.c:2504
vprintk_emit+0x1b5/0x470 kernel/printk/printk.c:2024 <-- lock(console_owner);
vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
printk+0xba/0xed kernel/printk/printk.c:2084
register_console+0x8b3/0xc10 kernel/printk/printk.c:2829
univ8250_console_init+0x3a/0x46 drivers/tty/serial/8250/8250_core.c:681
console_init+0x49d/0x6d3 kernel/printk/printk.c:2915
start_kernel+0x5e9/0x879 init/main.c:713
secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241

-> #0 (console_owner){....}-{0:0}:
check_prev_add kernel/locking/lockdep.c:2600 [inline]
check_prevs_add kernel/locking/lockdep.c:2705 [inline]
validate_chain kernel/locking/lockdep.c:3095 [inline]
__lock_acquire+0x27e6/0x4cc0 kernel/locking/lockdep.c:4200
lock_acquire+0x127/0x340 kernel/locking/lockdep.c:4734
console_trylock_spinning kernel/printk/printk.c:1773 [inline] <-- lock(console_owner);
vprintk_emit+0x307/0x470 kernel/printk/printk.c:2023
vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
printk+0xba/0xed kernel/printk/printk.c:2084
fail_dump lib/fault-inject.c:45 [inline]
should_fail+0x67b/0x7c0 lib/fault-inject.c:144
__should_failslab+0x152/0x1c0 mm/failslab.c:33
should_failslab+0x5/0x10 mm/slab_common.c:1224
slab_pre_alloc_hook mm/slab.h:468 [inline]
slab_alloc_node mm/slub.c:2723 [inline]
slab_alloc mm/slub.c:2807 [inline]
__kmalloc+0x72/0x300 mm/slub.c:3871
kmalloc include/linux/slab.h:582 [inline]
tty_buffer_alloc+0x23f/0x2a0 drivers/tty/tty_buffer.c:175
__tty_buffer_request_room+0x156/0x2a0 drivers/tty/tty_buffer.c:273
tty_insert_flip_string_fixed_flag+0x93/0x250 drivers/tty/tty_buffer.c:318
tty_insert_flip_string include/linux/tty_flip.h:37 [inline]
pty_write+0x126/0x1f0 drivers/tty/pty.c:122 <-- lock(&port->lock);
n_tty_write+0xa7a/0xfc0 drivers/tty/n_tty.c:2356
do_tty_write drivers/tty/tty_io.c:961 [inline]
tty_write+0x512/0x930 drivers/tty/tty_io.c:1045
__vfs_write+0x76/0x100 fs/read_write.c:494
vfs_write+0x268/0x5c0 fs/read_write.c:558
ksys_write+0x12d/0x250 fs/read_write.c:611
do_syscall_64+0xd7/0x380 arch/x86/entry/common.c:291
entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

Chain exists of:
console_owner --> &port_lock_key --> &port->lock

Fixes: b6da31b2c07c ("tty: Fix data race in tty_insert_flip_string_fixed_flag")
Signed-off-by: Qi Zheng <[email protected]>
---
drivers/tty/tty_buffer.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 646510476c30..bfa431a8e690 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -175,7 +175,8 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size)
*/
if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
return NULL;
- p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
+ p = kmalloc(sizeof(struct tty_buffer) + 2 * size,
+ GFP_ATOMIC | __GFP_NOWARN);
if (p == NULL)
return NULL;

--
2.20.1

2022-05-11 06:56:55

by Andrew Morton

[permalink] [raw]

Subject: Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN

On Tue, 10 May 2022 19:38:08 +0800 Qi Zheng <[email protected]> wrote:

> We expect no warnings to be issued when we specify __GFP_NOWARN, but
> currently in paths like alloc_pages() and kmalloc(), there are still
> some warnings printed, fix it.

Looks sane to me.

> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -35,6 +35,17 @@ struct folio_batch;
> /* Do not use these with a slab allocator */
> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>
> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
> + static bool __section(".data.once") __warned; \
> + int __ret_warn_once = !!(cond); \
> + \
> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
> + __warned = true; \
> + WARN_ON(1); \
> + } \
> + unlikely(__ret_warn_once); \
> +})

I don't think WARN_ON_ONCE_GFP is a good name for this. But
WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(

WARN_ON_ONCE_NOWARN might be better. No strong opinion here, really.

> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> * We also sanity check to catch abuse of atomic reserves being used by
> * callers that are not in atomic context.
> */
> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
> + if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
> gfp_mask &= ~__GFP_ATOMIC;
>
> retry_cpuset:

I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
(https://lkml.kernel.org/r/[email protected])
deleted this code.

2022-05-11 07:48:24

by Qi Zheng

[permalink] [raw]

Subject: Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN

On 2022/5/11 1:12 PM, Qi Zheng wrote:
>
>
> On 2022/5/10 7:38 PM, Qi Zheng wrote:
>> We expect no warnings to be issued when we specify __GFP_NOWARN, but
>> currently in paths like alloc_pages() and kmalloc(), there are still
>> some warnings printed, fix it.
>
> Hi Andrew,
>
> Maybe we only need to deal with memory allocation failures, such as
> should_fail() (This leads to deadlock in PATCH[2/2]). These
> WARN_ON_ONCE()s report usage problems and should not be printed. If they

such as WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1),
gfp_flags).

> are printed, we should fix these usage problems.
>
> Thanks,
> Qi
>
>
>>
>> Signed-off-by: Qi Zheng <[email protected]>
>> ---
>> include/linux/fault-inject.h | 2 ++
>> lib/fault-inject.c           | 3 +++
>> mm/failslab.c                | 3 +++
>> mm/internal.h                | 11 +++++++++++
>> mm/page_alloc.c              | 22 ++++++++++++----------
>> 5 files changed, 31 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
>> index 2d04f6448cde..9f6e25467844 100644
>> --- a/include/linux/fault-inject.h
>> +++ b/include/linux/fault-inject.h
>> @@ -20,6 +20,7 @@ struct fault_attr {
>>       atomic_t space;
>>       unsigned long verbose;
>>       bool task_filter;
>> +    bool no_warn;
>>       unsigned long stacktrace_depth;
>>       unsigned long require_start;
>>       unsigned long require_end;
>> @@ -39,6 +40,7 @@ struct fault_attr {
>>           .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,    \
>>           .verbose = 2,                        \
>>           .dname = NULL,                        \
>> +        .no_warn = false,                    \
>>       }
>> #define DECLARE_FAULT_ATTR(name) struct fault_attr name =
>> FAULT_ATTR_INITIALIZER
>> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
>> index ce12621b4275..423784d9c058 100644
>> --- a/lib/fault-inject.c
>> +++ b/lib/fault-inject.c
>> @@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>> static void fail_dump(struct fault_attr *attr)
>> {
>> +    if (attr->no_warn)
>> +        return;
>> +
>>       if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
>>           printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
>>                  "name %pd, interval %lu, probability %lu, "
>> diff --git a/mm/failslab.c b/mm/failslab.c
>> index f92fed91ac23..58df9789f1d2 100644
>> --- a/mm/failslab.c
>> +++ b/mm/failslab.c
>> @@ -30,6 +30,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t
>> gfpflags)
>>       if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
>>           return false;
>> +    if (gfpflags & __GFP_NOWARN)
>> +        failslab.attr.no_warn = true;
>> +
>>       return should_fail(&failslab.attr, s->object_size);
>> }
>> diff --git a/mm/internal.h b/mm/internal.h
>> index cf16280ce132..7a268fac6559 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -35,6 +35,17 @@ struct folio_batch;
>> /* Do not use these with a slab allocator */
>> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>> +#define WARN_ON_ONCE_GFP(cond, gfp)    ({                \
>> +    static bool __section(".data.once") __warned;            \
>> +    int __ret_warn_once = !!(cond);                    \
>> +                                    \
>> +    if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once &&
>> !__warned)) { \
>> +        __warned = true;                    \
>> +        WARN_ON(1);                        \
>> +    }                                \
>> +    unlikely(__ret_warn_once);                    \
>> +})
>> +
>> void page_writeback_init(void);
>> static inline void *folio_raw_mapping(struct folio *folio)
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 0e42038382c1..2bf4ce4d0e2f 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -3722,7 +3722,7 @@ struct page *rmqueue(struct zone *preferred_zone,
>>        * We most definitely don't want callers attempting to
>>        * allocate greater than order-1 page units with __GFP_NOFAIL.
>>        */
>> -    WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
>> +    WARN_ON_ONCE_GFP((gfp_flags & __GFP_NOFAIL) && (order > 1),
>> gfp_flags);
>>       do {
>>           page = NULL;
>> @@ -3799,6 +3799,9 @@ static bool __should_fail_alloc_page(gfp_t
>> gfp_mask, unsigned int order)
>>               (gfp_mask & __GFP_DIRECT_RECLAIM))
>>           return false;
>> +    if (gfp_mask & __GFP_NOWARN)
>> +        fail_page_alloc.attr.no_warn = true;
>> +
>>       return should_fail(&fail_page_alloc.attr, 1 << order);
>> }
>> @@ -4346,7 +4349,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned
>> int order,
>>        */
>>       /* Exhausted what can be done so it's blame time */
>> -    if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
>> +    if (out_of_memory(&oc) ||
>> +        WARN_ON_ONCE_GFP(gfp_mask & __GFP_NOFAIL, gfp_mask)) {
>>           *did_some_progress = 1;
>>           /*
>> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned
>> int order,
>>        * We also sanity check to catch abuse of atomic reserves being
>> used by
>>        * callers that are not in atomic context.
>>        */
>> -    if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> -                (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
>> +    if (WARN_ON_ONCE_GFP((gfp_mask &
>> (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> +                (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>>           gfp_mask &= ~__GFP_ATOMIC;
>> retry_cpuset:
>> @@ -5117,7 +5121,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned
>> int order,
>>            * All existing users of the __GFP_NOFAIL are blockable, so
>> warn
>>            * of any new users that actually require GFP_NOWAIT
>>            */
>> -        if (WARN_ON_ONCE(!can_direct_reclaim))
>> +        if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
>>               goto fail;
>>           /*
>> @@ -5125,7 +5129,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned
>> int order,
>>            * because we cannot reclaim anything and only can loop waiting
>>            * for somebody to do a work for us
>>            */
>> -        WARN_ON_ONCE(current->flags & PF_MEMALLOC);
>> +        WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);
>>           /*
>>            * non failing costly orders are a hard requirement which we
>> @@ -5133,7 +5137,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned
>> int order,
>>            * so that we can identify them and convert them to something
>>            * else.
>>            */
>> -        WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
>> +        WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);
>>           /*
>>            * Help non-failing allocations by giving them access to memory
>> @@ -5379,10 +5383,8 @@ struct page *__alloc_pages(gfp_t gfp, unsigned
>> int order, int preferred_nid,
>>        * There are several places where we assume that the order value
>> is sane
>>        * so bail out early if the request is out of bound.
>>        */
>> -    if (unlikely(order >= MAX_ORDER)) {
>> -        WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
>> +    if (WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp))
>>           return NULL;
>> -    }
>>       gfp &= gfp_allowed_mask;
>>       /*
>

--
Thanks,
Qi

2022-05-11 08:50:23

by Qi Zheng

[permalink] [raw]

Subject: Re: [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock

On 2022/5/11 1:57 PM, Jiri Slaby wrote:
> Hi,
>
> LGTM:
> Acked-by: Jiri Slaby <[email protected]>

Thanks.

>
> Minor comments below.
>
> On 10. 05. 22, 13:38, Qi Zheng wrote:
>> The pty_write() invokes kmalloc() which may invoke a normal printk() to
>> print failure message. This can cause a deadlock in the scenario reported
>> by syz-bot below:
>>
>>         CPU0              CPU1                    CPU2
>>         ----              ----                    ----
>>                           lock(console_owner);
>>                                                   lock(&port_lock_key);
>>    lock(&port->lock);
>>                           lock(&port_lock_key);
>>                                                   lock(&port->lock);
>>    lock(console_owner);
>>
>> As commit dbdda842fe96 ("printk: Add console owner and waiter logic to
>> load balance console writes") said, such deadlock can be prevented by
>> using printk_deferred() in kmalloc() (which is invoked in the section
>> guarded by the port->lock). But there are too many printk() on the
>> kmalloc() path, and kmalloc() can be called from anywhere, so changing
>> printk() to printk_deferred() is too complicated and inelegant.
>>
>> Therefore, this patch chooses to specify __GFP_NOWARN to kmalloc(), so
>> that printk() will not be called, and this deadlock problem can be
>> avoided.
>>
>> Syz-bot reported the following lockdep error:
>>
>> ======================================================
>> WARNING: possible circular locking dependency detected
>> 5.4.143-00237-g08ccc19a-dirty #10 Not tainted
>> ------------------------------------------------------
>> syz-executor.4/29420 is trying to acquire lock:
>> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at:
>> console_trylock_spinning kernel/printk/printk.c:1752 [inline]
>> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at:
>> vprintk_emit+0x2ca/0x470 kernel/printk/printk.c:2023
>>
>> but task is already holding lock:
>> ffff8880119c9158 (&port->lock){-.-.}-{2:2}, at: pty_write+0xf4/0x1f0
>> drivers/tty/pty.c:120
>>
>> which lock already depends on the new lock.
>>
>> the existing dependency chain (in reverse order) is:
>
> Maybe trim the stack traces a bit, the commit message is unnecessarily
> long...

Agree, will do.

>
>> -> #2 (&port->lock){-.-.}-{2:2}:
>>         __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110
>> [inline]
>>         _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
>>         tty_port_tty_get drivers/tty/tty_port.c:288
>> [inline]                  <-- lock(&port->lock);
>>         tty_port_default_wakeup+0x1d/0xb0 drivers/tty/tty_port.c:47
>>         serial8250_tx_chars+0x530/0xa80
>> drivers/tty/serial/8250/8250_port.c:1767
>>         serial8250_handle_irq.part.0+0x31f/0x3d0
>> drivers/tty/serial/8250/8250_port.c:1854
>>         serial8250_handle_irq drivers/tty/serial/8250/8250_port.c:1827
>> [inline]     <-- lock(&port_lock_key);
>>         serial8250_default_handle_irq+0xb2/0x220
>> drivers/tty/serial/8250/8250_port.c:1870
>>         serial8250_interrupt+0xfd/0x200
>> drivers/tty/serial/8250/8250_core.c:126
>>         __handle_irq_event_percpu+0x109/0xa50 kernel/irq/handle.c:156
>
> Stop this trace here and trim the rest?

Will do.

>
>>         handle_irq_event_percpu+0x76/0x170 kernel/irq/handle.c:196
>>         handle_irq_event+0xa1/0x130 kernel/irq/handle.c:213
>>         handle_edge_irq+0x261/0xd00 kernel/irq/chip.c:833
>>         generic_handle_irq_desc include/linux/irqdesc.h:156 [inline]
>>         do_IRQ+0xf2/0x2e0 arch/x86/kernel/irq.c:250
>>         ret_from_intr+0x0/0x19
>>         native_safe_halt arch/x86/include/asm/irqflags.h:60 [inline]
>>         arch_safe_halt arch/x86/include/asm/irqflags.h:103 [inline]
>>         default_idle+0x2c/0x1a0 arch/x86/kernel/process.c:572
>>         cpuidle_idle_call kernel/sched/idle.c:184 [inline]
>>         do_idle+0x44c/0x590 kernel/sched/idle.c:294
>>         cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:386
>>         start_secondary+0x2d1/0x3e0 arch/x86/kernel/smpboot.c:264
>>         secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
>>
>> -> #1 (&port_lock_key){-.-.}-{2:2}:
>>         __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110
>> [inline]
>>         _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
>>         serial8250_console_write+0x184/0xa40
>> drivers/tty/serial/8250/8250_port.c:3198
>>                                         <-- lock(&port_lock_key);
>>         call_console_drivers kernel/printk/printk.c:1819 [inline]
>>         console_unlock+0x8cb/0xd00 kernel/printk/printk.c:2504
>>         vprintk_emit+0x1b5/0x470
>> kernel/printk/printk.c:2024            <-- lock(console_owner);
>>         vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
>>         printk+0xba/0xed kernel/printk/printk.c:2084
>>         register_console+0x8b3/0xc10 kernel/printk/printk.c:2829
>>         univ8250_console_init+0x3a/0x46
>> drivers/tty/serial/8250/8250_core.c:681
>>         console_init+0x49d/0x6d3 kernel/printk/printk.c:2915
>>         start_kernel+0x5e9/0x879 init/main.c:713
>>         secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
>>
>> -> #0 (console_owner){....}-{0:0}:
>>         check_prev_add kernel/locking/lockdep.c:2600 [inline]
>>         check_prevs_add kernel/locking/lockdep.c:2705 [inline]
>>         validate_chain kernel/locking/lockdep.c:3095 [inline]
>>         __lock_acquire+0x27e6/0x4cc0 kernel/locking/lockdep.c:4200
>
> Delete the above 4 lines?

Will do.

>
>>         lock_acquire+0x127/0x340 kernel/locking/lockdep.c:4734
>>         console_trylock_spinning kernel/printk/printk.c:1773
>> [inline]        <-- lock(console_owner);
>>         vprintk_emit+0x307/0x470 kernel/printk/printk.c:2023
>>         vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
>>         printk+0xba/0xed kernel/printk/printk.c:2084
>>         fail_dump lib/fault-inject.c:45 [inline]
>>         should_fail+0x67b/0x7c0 lib/fault-inject.c:144
>>         __should_failslab+0x152/0x1c0 mm/failslab.c:33
>>         should_failslab+0x5/0x10 mm/slab_common.c:1224
>>         slab_pre_alloc_hook mm/slab.h:468 [inline]
>>         slab_alloc_node mm/slub.c:2723 [inline]
>>         slab_alloc mm/slub.c:2807 [inline]
>>         __kmalloc+0x72/0x300 mm/slub.c:3871
>>         kmalloc include/linux/slab.h:582 [inline]
>>         tty_buffer_alloc+0x23f/0x2a0 drivers/tty/tty_buffer.c:175
>>         __tty_buffer_request_room+0x156/0x2a0
>> drivers/tty/tty_buffer.c:273
>>         tty_insert_flip_string_fixed_flag+0x93/0x250
>> drivers/tty/tty_buffer.c:318
>>         tty_insert_flip_string include/linux/tty_flip.h:37 [inline]
>>         pty_write+0x126/0x1f0 drivers/tty/pty.c:122                <--
>> lock(&port->lock);
>>         n_tty_write+0xa7a/0xfc0 drivers/tty/n_tty.c:2356
>>         do_tty_write drivers/tty/tty_io.c:961 [inline]
>>         tty_write+0x512/0x930 drivers/tty/tty_io.c:1045
>>         __vfs_write+0x76/0x100 fs/read_write.c:494
>
> And stop here?

Will do.

>
>>         vfs_write+0x268/0x5c0 fs/read_write.c:558
>>         ksys_write+0x12d/0x250 fs/read_write.c:611
>>         do_syscall_64+0xd7/0x380 arch/x86/entry/common.c:291
>>         entry_SYSCALL_64_after_hwframe+0x49/0xbe
>>
>> other info that might help us debug this:
>>
>> Chain exists of:
>>    console_owner --> &port_lock_key --> &port->lock
>>
>> Fixes: b6da31b2c07c ("tty: Fix data race in
>> tty_insert_flip_string_fixed_flag")
>> Signed-off-by: Qi Zheng <[email protected]>
>> ---
>> drivers/tty/tty_buffer.c | 3 ++-
>> 1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
>> index 646510476c30..bfa431a8e690 100644
>> --- a/drivers/tty/tty_buffer.c
>> +++ b/drivers/tty/tty_buffer.c
>> @@ -175,7 +175,8 @@ static struct tty_buffer *tty_buffer_alloc(struct
>> tty_port *port, size_t size)
>>        */
>>       if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
>>           return NULL;
>> -    p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
>> +    p = kmalloc(sizeof(struct tty_buffer) + 2 * size,
>> +            GFP_ATOMIC | __GFP_NOWARN);
>>       if (p == NULL)
>>           return NULL;
>
>

--
Thanks,
Qi

2022-05-11 08:55:54

by Qi Zheng

[permalink] [raw]

Subject: Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN

On 2022/5/11 10:32 AM, Andrew Morton wrote:
> On Wed, 11 May 2022 10:19:48 +0800 Qi Zheng <[email protected]> wrote:
>
>>
>> ,,,
>>>> --- a/mm/internal.h
>>>> +++ b/mm/internal.h
>>>> @@ -35,6 +35,17 @@ struct folio_batch;
>>>> /* Do not use these with a slab allocator */
>>>> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>>>>
>>>> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
>>>> + static bool __section(".data.once") __warned; \
>>>> + int __ret_warn_once = !!(cond); \
>>>> + \
>>>> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
>>>> + __warned = true; \
>>>> + WARN_ON(1); \
>>>> + } \
>>>> + unlikely(__ret_warn_once); \
>>>> +})
>>>
>>> I don't think WARN_ON_ONCE_GFP is a good name for this. But
>>> WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(
>>>
>>> WARN_ON_ONCE_NOWARN might be better. No strong opinion here, really.
>>
>> I've thought about WARN_ON_ONCE_NOWARN, but I feel a little weird
>> putting 'WARN' and 'NOWARN' together, how about WARN_ON_ONCE_IF_ALLOWED?
>
> I dunno. WARN_ON_ONCE_GFP isn't too bad I suppose. Add a comment over
> the definition explaining it?

OK, I will add a comment to it.

>
>>>
>>>> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>>> * We also sanity check to catch abuse of atomic reserves being used by
>>>> * callers that are not in atomic context.
>>>> */
>>>> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>>>> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
>>>> + if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>>>> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>>>> gfp_mask &= ~__GFP_ATOMIC;
>>>>
>>>> retry_cpuset:
>>>
>>> I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
>>> (https://lkml.kernel.org/r/[email protected])
>>> deleted this code.
>>>
>>
>> This series is based on v5.18-rc5, I will rebase it to the latest next
>> branch and check if there are any missing WARN_ON_ONCEs that are not
>> being handled.
>
> Against git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm branch
> mm-unstable, please. That ends up in linux-next, with a delay.

OK, will do.

--
Thanks,
Qi

2022-05-11 09:01:14

by Qi Zheng

[permalink] [raw]

Subject: Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN

On 2022/5/11 2:59 AM, Andrew Morton wrote:
> On Tue, 10 May 2022 19:38:08 +0800 Qi Zheng <[email protected]> wrote:
>
>> We expect no warnings to be issued when we specify __GFP_NOWARN, but
>> currently in paths like alloc_pages() and kmalloc(), there are still
>> some warnings printed, fix it.
>
> Looks sane to me.
>
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -35,6 +35,17 @@ struct folio_batch;
>> /* Do not use these with a slab allocator */
>> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>>
>> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
>> + static bool __section(".data.once") __warned; \
>> + int __ret_warn_once = !!(cond); \
>> + \
>> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
>> + __warned = true; \
>> + WARN_ON(1); \
>> + } \
>> + unlikely(__ret_warn_once); \
>> +})
>
> I don't think WARN_ON_ONCE_GFP is a good name for this. But
> WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(
>
> WARN_ON_ONCE_NOWARN might be better. No strong opinion here, really.

I've thought about WARN_ON_ONCE_NOWARN, but I feel a little weird
putting 'WARN' and 'NOWARN' together, how about WARN_ON_ONCE_IF_ALLOWED?

>
>> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> * We also sanity check to catch abuse of atomic reserves being used by
>> * callers that are not in atomic context.
>> */
>> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
>> + if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>> gfp_mask &= ~__GFP_ATOMIC;
>>
>> retry_cpuset:
>
> I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
> (https://lkml.kernel.org/r/[email protected])
> deleted this code.
>

This series is based on v5.18-rc5, I will rebase it to the latest next
branch and check if there are any missing WARN_ON_ONCEs that are not
being handled.

Thanks,
Qi

--
Thanks,
Qi

2022-05-11 09:35:20

by Andrew Morton

[permalink] [raw]

Subject: Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN

On Wed, 11 May 2022 10:19:48 +0800 Qi Zheng <[email protected]> wrote:

>
> ,,,
> >> --- a/mm/internal.h
> >> +++ b/mm/internal.h
> >> @@ -35,6 +35,17 @@ struct folio_batch;
> >> /* Do not use these with a slab allocator */
> >> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
> >>
> >> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
> >> + static bool __section(".data.once") __warned; \
> >> + int __ret_warn_once = !!(cond); \
> >> + \
> >> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
> >> + __warned = true; \
> >> + WARN_ON(1); \
> >> + } \
> >> + unlikely(__ret_warn_once); \
> >> +})
> >
> > I don't think WARN_ON_ONCE_GFP is a good name for this. But
> > WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(
> >
> > WARN_ON_ONCE_NOWARN might be better. No strong opinion here, really.
>
> I've thought about WARN_ON_ONCE_NOWARN, but I feel a little weird
> putting 'WARN' and 'NOWARN' together, how about WARN_ON_ONCE_IF_ALLOWED?

I dunno. WARN_ON_ONCE_GFP isn't too bad I suppose. Add a comment over
the definition explaining it?

> >
> >> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> >> * We also sanity check to catch abuse of atomic reserves being used by
> >> * callers that are not in atomic context.
> >> */
> >> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> >> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
> >> + if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> >> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
> >> gfp_mask &= ~__GFP_ATOMIC;
> >>
> >> retry_cpuset:
> >
> > I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
> > (https://lkml.kernel.org/r/[email protected])
> > deleted this code.
> >
>
> This series is based on v5.18-rc5, I will rebase it to the latest next
> branch and check if there are any missing WARN_ON_ONCEs that are not
> being handled.

Against git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm branch
mm-unstable, please. That ends up in linux-next, with a delay.

2022-05-11 09:37:43

by Qi Zheng

[permalink] [raw]

Subject: Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN

On 2022/5/10 7:38 PM, Qi Zheng wrote:
> We expect no warnings to be issued when we specify __GFP_NOWARN, but
> currently in paths like alloc_pages() and kmalloc(), there are still
> some warnings printed, fix it.

Hi Andrew,

Maybe we only need to deal with memory allocation failures, such as
should_fail() (This leads to deadlock in PATCH[2/2]). These
WARN_ON_ONCE()s report usage problems and should not be printed. If they
are printed, we should fix these usage problems.

Thanks,
Qi

>
> Signed-off-by: Qi Zheng <[email protected]>
> ---
> include/linux/fault-inject.h | 2 ++
> lib/fault-inject.c | 3 +++
> mm/failslab.c | 3 +++
> mm/internal.h | 11 +++++++++++
> mm/page_alloc.c | 22 ++++++++++++----------
> 5 files changed, 31 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
> index 2d04f6448cde..9f6e25467844 100644
> --- a/include/linux/fault-inject.h
> +++ b/include/linux/fault-inject.h
> @@ -20,6 +20,7 @@ struct fault_attr {
> atomic_t space;
> unsigned long verbose;
> bool task_filter;
> + bool no_warn;
> unsigned long stacktrace_depth;
> unsigned long require_start;
> unsigned long require_end;
> @@ -39,6 +40,7 @@ struct fault_attr {
> .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
> .verbose = 2, \
> .dname = NULL, \
> + .no_warn = false, \
> }
>
> #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
> index ce12621b4275..423784d9c058 100644
> --- a/lib/fault-inject.c
> +++ b/lib/fault-inject.c
> @@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>
> static void fail_dump(struct fault_attr *attr)
> {
> + if (attr->no_warn)
> + return;
> +
> if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
> printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
> "name %pd, interval %lu, probability %lu, "
> diff --git a/mm/failslab.c b/mm/failslab.c
> index f92fed91ac23..58df9789f1d2 100644
> --- a/mm/failslab.c
> +++ b/mm/failslab.c
> @@ -30,6 +30,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
> return false;
>
> + if (gfpflags & __GFP_NOWARN)
> + failslab.attr.no_warn = true;
> +
> return should_fail(&failslab.attr, s->object_size);
> }
>
> diff --git a/mm/internal.h b/mm/internal.h
> index cf16280ce132..7a268fac6559 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -35,6 +35,17 @@ struct folio_batch;
> /* Do not use these with a slab allocator */
> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>
> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
> + static bool __section(".data.once") __warned; \
> + int __ret_warn_once = !!(cond); \
> + \
> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
> + __warned = true; \
> + WARN_ON(1); \
> + } \
> + unlikely(__ret_warn_once); \
> +})
> +
> void page_writeback_init(void);
>
> static inline void *folio_raw_mapping(struct folio *folio)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0e42038382c1..2bf4ce4d0e2f 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3722,7 +3722,7 @@ struct page *rmqueue(struct zone *preferred_zone,
> * We most definitely don't want callers attempting to
> * allocate greater than order-1 page units with __GFP_NOFAIL.
> */
> - WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
> + WARN_ON_ONCE_GFP((gfp_flags & __GFP_NOFAIL) && (order > 1), gfp_flags);
>
> do {
> page = NULL;
> @@ -3799,6 +3799,9 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
> (gfp_mask & __GFP_DIRECT_RECLAIM))
> return false;
>
> + if (gfp_mask & __GFP_NOWARN)
> + fail_page_alloc.attr.no_warn = true;
> +
> return should_fail(&fail_page_alloc.attr, 1 << order);
> }
>
> @@ -4346,7 +4349,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> */
>
> /* Exhausted what can be done so it's blame time */
> - if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
> + if (out_of_memory(&oc) ||
> + WARN_ON_ONCE_GFP(gfp_mask & __GFP_NOFAIL, gfp_mask)) {
> *did_some_progress = 1;
>
> /*
> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> * We also sanity check to catch abuse of atomic reserves being used by
> * callers that are not in atomic context.
> */
> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
> + if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
> gfp_mask &= ~__GFP_ATOMIC;
>
> retry_cpuset:
> @@ -5117,7 +5121,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> * All existing users of the __GFP_NOFAIL are blockable, so warn
> * of any new users that actually require GFP_NOWAIT
> */
> - if (WARN_ON_ONCE(!can_direct_reclaim))
> + if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
> goto fail;
>
> /*
> @@ -5125,7 +5129,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> * because we cannot reclaim anything and only can loop waiting
> * for somebody to do a work for us
> */
> - WARN_ON_ONCE(current->flags & PF_MEMALLOC);
> + WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);
>
> /*
> * non failing costly orders are a hard requirement which we
> @@ -5133,7 +5137,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> * so that we can identify them and convert them to something
> * else.
> */
> - WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
> + WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);
>
> /*
> * Help non-failing allocations by giving them access to memory
> @@ -5379,10 +5383,8 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
> * There are several places where we assume that the order value is sane
> * so bail out early if the request is out of bound.
> */
> - if (unlikely(order >= MAX_ORDER)) {
> - WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
> + if (WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp))
> return NULL;
> - }
>
> gfp &= gfp_allowed_mask;
> /*

--
Thanks,
Qi

2022-05-11 11:58:43

by Jiri Slaby

[permalink] [raw]

Subject: Re: [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock

Hi,

LGTM:
Acked-by: Jiri Slaby <[email protected]>

Minor comments below.

On 10. 05. 22, 13:38, Qi Zheng wrote:
> The pty_write() invokes kmalloc() which may invoke a normal printk() to
> print failure message. This can cause a deadlock in the scenario reported
> by syz-bot below:
>
> CPU0 CPU1 CPU2
> ---- ---- ----
> lock(console_owner);
> lock(&port_lock_key);
> lock(&port->lock);
> lock(&port_lock_key);
> lock(&port->lock);
> lock(console_owner);
>
> As commit dbdda842fe96 ("printk: Add console owner and waiter logic to
> load balance console writes") said, such deadlock can be prevented by
> using printk_deferred() in kmalloc() (which is invoked in the section
> guarded by the port->lock). But there are too many printk() on the
> kmalloc() path, and kmalloc() can be called from anywhere, so changing
> printk() to printk_deferred() is too complicated and inelegant.
>
> Therefore, this patch chooses to specify __GFP_NOWARN to kmalloc(), so
> that printk() will not be called, and this deadlock problem can be avoided.
>
> Syz-bot reported the following lockdep error:
>
> ======================================================
> WARNING: possible circular locking dependency detected
> 5.4.143-00237-g08ccc19a-dirty #10 Not tainted
> ------------------------------------------------------
> syz-executor.4/29420 is trying to acquire lock:
> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: console_trylock_spinning kernel/printk/printk.c:1752 [inline]
> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: vprintk_emit+0x2ca/0x470 kernel/printk/printk.c:2023
>
> but task is already holding lock:
> ffff8880119c9158 (&port->lock){-.-.}-{2:2}, at: pty_write+0xf4/0x1f0 drivers/tty/pty.c:120
>
> which lock already depends on the new lock.
>
> the existing dependency chain (in reverse order) is:

Maybe trim the stack traces a bit, the commit message is unnecessarily
long...

> -> #2 (&port->lock){-.-.}-{2:2}:
> __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
> _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
> tty_port_tty_get drivers/tty/tty_port.c:288 [inline] <-- lock(&port->lock);
> tty_port_default_wakeup+0x1d/0xb0 drivers/tty/tty_port.c:47
> serial8250_tx_chars+0x530/0xa80 drivers/tty/serial/8250/8250_port.c:1767
> serial8250_handle_irq.part.0+0x31f/0x3d0 drivers/tty/serial/8250/8250_port.c:1854
> serial8250_handle_irq drivers/tty/serial/8250/8250_port.c:1827 [inline] <-- lock(&port_lock_key);
> serial8250_default_handle_irq+0xb2/0x220 drivers/tty/serial/8250/8250_port.c:1870
> serial8250_interrupt+0xfd/0x200 drivers/tty/serial/8250/8250_core.c:126
> __handle_irq_event_percpu+0x109/0xa50 kernel/irq/handle.c:156

Stop this trace here and trim the rest?

> handle_irq_event_percpu+0x76/0x170 kernel/irq/handle.c:196
> handle_irq_event+0xa1/0x130 kernel/irq/handle.c:213
> handle_edge_irq+0x261/0xd00 kernel/irq/chip.c:833
> generic_handle_irq_desc include/linux/irqdesc.h:156 [inline]
> do_IRQ+0xf2/0x2e0 arch/x86/kernel/irq.c:250
> ret_from_intr+0x0/0x19
> native_safe_halt arch/x86/include/asm/irqflags.h:60 [inline]
> arch_safe_halt arch/x86/include/asm/irqflags.h:103 [inline]
> default_idle+0x2c/0x1a0 arch/x86/kernel/process.c:572
> cpuidle_idle_call kernel/sched/idle.c:184 [inline]
> do_idle+0x44c/0x590 kernel/sched/idle.c:294
> cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:386
> start_secondary+0x2d1/0x3e0 arch/x86/kernel/smpboot.c:264
> secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
>
> -> #1 (&port_lock_key){-.-.}-{2:2}:
> __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
> _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
> serial8250_console_write+0x184/0xa40 drivers/tty/serial/8250/8250_port.c:3198
> <-- lock(&port_lock_key);
> call_console_drivers kernel/printk/printk.c:1819 [inline]
> console_unlock+0x8cb/0xd00 kernel/printk/printk.c:2504
> vprintk_emit+0x1b5/0x470 kernel/printk/printk.c:2024 <-- lock(console_owner);
> vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
> printk+0xba/0xed kernel/printk/printk.c:2084
> register_console+0x8b3/0xc10 kernel/printk/printk.c:2829
> univ8250_console_init+0x3a/0x46 drivers/tty/serial/8250/8250_core.c:681
> console_init+0x49d/0x6d3 kernel/printk/printk.c:2915
> start_kernel+0x5e9/0x879 init/main.c:713
> secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
>
> -> #0 (console_owner){....}-{0:0}:
> check_prev_add kernel/locking/lockdep.c:2600 [inline]
> check_prevs_add kernel/locking/lockdep.c:2705 [inline]
> validate_chain kernel/locking/lockdep.c:3095 [inline]
> __lock_acquire+0x27e6/0x4cc0 kernel/locking/lockdep.c:4200

Delete the above 4 lines?

> lock_acquire+0x127/0x340 kernel/locking/lockdep.c:4734
> console_trylock_spinning kernel/printk/printk.c:1773 [inline] <-- lock(console_owner);
> vprintk_emit+0x307/0x470 kernel/printk/printk.c:2023
> vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
> printk+0xba/0xed kernel/printk/printk.c:2084
> fail_dump lib/fault-inject.c:45 [inline]
> should_fail+0x67b/0x7c0 lib/fault-inject.c:144
> __should_failslab+0x152/0x1c0 mm/failslab.c:33
> should_failslab+0x5/0x10 mm/slab_common.c:1224
> slab_pre_alloc_hook mm/slab.h:468 [inline]
> slab_alloc_node mm/slub.c:2723 [inline]
> slab_alloc mm/slub.c:2807 [inline]
> __kmalloc+0x72/0x300 mm/slub.c:3871
> kmalloc include/linux/slab.h:582 [inline]
> tty_buffer_alloc+0x23f/0x2a0 drivers/tty/tty_buffer.c:175
> __tty_buffer_request_room+0x156/0x2a0 drivers/tty/tty_buffer.c:273
> tty_insert_flip_string_fixed_flag+0x93/0x250 drivers/tty/tty_buffer.c:318
> tty_insert_flip_string include/linux/tty_flip.h:37 [inline]
> pty_write+0x126/0x1f0 drivers/tty/pty.c:122 <-- lock(&port->lock);
> n_tty_write+0xa7a/0xfc0 drivers/tty/n_tty.c:2356
> do_tty_write drivers/tty/tty_io.c:961 [inline]
> tty_write+0x512/0x930 drivers/tty/tty_io.c:1045
> __vfs_write+0x76/0x100 fs/read_write.c:494

And stop here?

> vfs_write+0x268/0x5c0 fs/read_write.c:558
> ksys_write+0x12d/0x250 fs/read_write.c:611
> do_syscall_64+0xd7/0x380 arch/x86/entry/common.c:291
> entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> other info that might help us debug this:
>
> Chain exists of:
> console_owner --> &port_lock_key --> &port->lock
>
> Fixes: b6da31b2c07c ("tty: Fix data race in tty_insert_flip_string_fixed_flag")
> Signed-off-by: Qi Zheng <[email protected]>
> ---
> drivers/tty/tty_buffer.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
> index 646510476c30..bfa431a8e690 100644
> --- a/drivers/tty/tty_buffer.c
> +++ b/drivers/tty/tty_buffer.c
> @@ -175,7 +175,8 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size)
> */
> if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
> return NULL;
> - p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
> + p = kmalloc(sizeof(struct tty_buffer) + 2 * size,
> + GFP_ATOMIC | __GFP_NOWARN);
> if (p == NULL)
> return NULL;
>

--
js
suse labs