2023-07-18 10:11:20

by syzbot

[permalink] [raw]
Subject: [syzbot] [mm?] possible deadlock in shmem_uncharge (2)

Hello,

syzbot found the following issue on:

HEAD commit: 7c2878be5732 Add linux-next specific files for 20230714
git tree: linux-next
console+strace: https://syzkaller.appspot.com/x/log.txt?x=14b77fd8a80000
kernel config: https://syzkaller.appspot.com/x/.config?x=3baff2936ac3cefa
dashboard link: https://syzkaller.appspot.com/bug?extid=38ca19393fb3344f57e6
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=153eea12a80000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=1169adeca80000

Downloadable assets:
disk image: https://storage.googleapis.com/syzbot-assets/bfdfa043f096/disk-7c2878be.raw.xz
vmlinux: https://storage.googleapis.com/syzbot-assets/cf7a97f69e2a/vmlinux-7c2878be.xz
kernel image: https://storage.googleapis.com/syzbot-assets/8366b63af2c6/bzImage-7c2878be.xz

The issue was bisected to:

commit 1a93dd24f1bee98ca121e68ce5c0de4a60a0a0b6
Author: Carlos Maiolino <[email protected]>
Date: Thu Jul 13 13:48:47 2023 +0000

shmem: quota support

bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=11af3afaa80000
final oops: https://syzkaller.appspot.com/x/report.txt?x=13af3afaa80000
console output: https://syzkaller.appspot.com/x/log.txt?x=15af3afaa80000

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: [email protected]
Fixes: 1a93dd24f1be ("shmem: quota support")

======================================================
WARNING: possible circular locking dependency detected
6.5.0-rc1-next-20230714-syzkaller #0 Not tainted
------------------------------------------------------
/5027 is trying to acquire lock:
ffff88807dbd8758 (&info->lock){....}-{2:2}, at: shmem_uncharge+0x28/0x2b0 mm/shmem.c:450

but task is already holding lock:
ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: folio_lruvec_lock+0x1ba/0x3b0 mm/memcontrol.c:1323

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #3 (&lruvec->lru_lock){....}-{2:2}:
__raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline]
_raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154
spin_lock include/linux/spinlock.h:351 [inline]
folio_lruvec_lock+0x1ba/0x3b0 mm/memcontrol.c:1323
__split_huge_page mm/huge_memory.c:2538 [inline]
split_huge_page_to_list+0x103b/0x49e0 mm/huge_memory.c:2772
split_folio_to_list include/linux/huge_mm.h:400 [inline]
split_folio include/linux/huge_mm.h:405 [inline]
truncate_inode_partial_folio+0x544/0x760 mm/truncate.c:242
shmem_undo_range+0x723/0x1190 mm/shmem.c:1026
shmem_truncate_range mm/shmem.c:1120 [inline]
shmem_setattr+0xd43/0x1050 mm/shmem.c:1205
notify_change+0x742/0x11c0 fs/attr.c:485
do_truncate+0x15c/0x220 fs/open.c:66
do_sys_ftruncate+0x6a2/0x790 fs/open.c:194
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

-> #2 (&xa->xa_lock#7){..-.}-{2:2}:
__raw_spin_lock_irq include/linux/spinlock_api_smp.h:119 [inline]
_raw_spin_lock_irq+0x36/0x50 kernel/locking/spinlock.c:170
spin_lock_irq include/linux/spinlock.h:376 [inline]
filemap_remove_folio+0xbf/0x250 mm/filemap.c:259
truncate_inode_folio+0x49/0x70 mm/truncate.c:195
shmem_undo_range+0x363/0x1190 mm/shmem.c:1004
shmem_truncate_range mm/shmem.c:1120 [inline]
shmem_evict_inode+0x334/0xb10 mm/shmem.c:1250
evict+0x2ed/0x6b0 fs/inode.c:665
iput_final fs/inode.c:1791 [inline]
iput.part.0+0x55e/0x7a0 fs/inode.c:1817
iput+0x5c/0x80 fs/inode.c:1807
dentry_unlink_inode+0x292/0x430 fs/dcache.c:401
__dentry_kill+0x3b8/0x640 fs/dcache.c:607
dentry_kill fs/dcache.c:745 [inline]
dput+0x703/0xfd0 fs/dcache.c:913
do_renameat2+0xc4c/0xdc0 fs/namei.c:5011
__do_sys_rename fs/namei.c:5055 [inline]
__se_sys_rename fs/namei.c:5053 [inline]
__x64_sys_rename+0x81/0xa0 fs/namei.c:5053
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

-> #1 (&sb->s_type->i_lock_key){+.+.}-{2:2}:
__raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline]
_raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154
spin_lock include/linux/spinlock.h:351 [inline]
inode_sub_bytes+0x28/0x100 fs/stat.c:816
__dquot_free_space+0x8f7/0xaf0 fs/quota/dquot.c:1881
dquot_free_space_nodirty include/linux/quotaops.h:379 [inline]
dquot_free_block_nodirty include/linux/quotaops.h:390 [inline]
shmem_inode_unacct_blocks mm/shmem.c:243 [inline]
shmem_recalc_inode+0x196/0x350 mm/shmem.c:420
shmem_undo_range+0x558/0x1190 mm/shmem.c:1114
shmem_truncate_range mm/shmem.c:1120 [inline]
shmem_evict_inode+0x334/0xb10 mm/shmem.c:1250
evict+0x2ed/0x6b0 fs/inode.c:665
iput_final fs/inode.c:1791 [inline]
iput.part.0+0x55e/0x7a0 fs/inode.c:1817
iput+0x5c/0x80 fs/inode.c:1807
dentry_unlink_inode+0x292/0x430 fs/dcache.c:401
__dentry_kill+0x3b8/0x640 fs/dcache.c:607
dentry_kill fs/dcache.c:745 [inline]
dput+0x703/0xfd0 fs/dcache.c:913
do_renameat2+0xc4c/0xdc0 fs/namei.c:5011
__do_sys_rename fs/namei.c:5055 [inline]
__se_sys_rename fs/namei.c:5053 [inline]
__x64_sys_rename+0x81/0xa0 fs/namei.c:5053
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

-> #0 (&info->lock){....}-{2:2}:
check_prev_add kernel/locking/lockdep.c:3142 [inline]
check_prevs_add kernel/locking/lockdep.c:3261 [inline]
validate_chain kernel/locking/lockdep.c:3876 [inline]
__lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144
lock_acquire kernel/locking/lockdep.c:5761 [inline]
lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
_raw_spin_lock_irqsave+0x3a/0x50 kernel/locking/spinlock.c:162
shmem_uncharge+0x28/0x2b0 mm/shmem.c:450
__split_huge_page mm/huge_memory.c:2549 [inline]
split_huge_page_to_list+0x3832/0x49e0 mm/huge_memory.c:2772
split_folio_to_list include/linux/huge_mm.h:400 [inline]
split_folio include/linux/huge_mm.h:405 [inline]
truncate_inode_partial_folio+0x544/0x760 mm/truncate.c:242
shmem_undo_range+0x723/0x1190 mm/shmem.c:1026
shmem_truncate_range mm/shmem.c:1120 [inline]
shmem_setattr+0xd43/0x1050 mm/shmem.c:1205
notify_change+0x742/0x11c0 fs/attr.c:485
do_truncate+0x15c/0x220 fs/open.c:66
do_sys_ftruncate+0x6a2/0x790 fs/open.c:194
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

other info that might help us debug this:

Chain exists of:
&info->lock --> &xa->xa_lock#7 --> &lruvec->lru_lock

Possible unsafe locking scenario:

CPU0 CPU1
---- ----
lock(&lruvec->lru_lock);
lock(&xa->xa_lock#7);
lock(&lruvec->lru_lock);
lock(&info->lock);

*** DEADLOCK ***

5 locks held by /5027:
#0: ffff8880762b4410 (sb_writers#5){.+.+}-{0:0}, at: do_syscall_x64 arch/x86/entry/common.c:50 [inline]
#0: ffff8880762b4410 (sb_writers#5){.+.+}-{0:0}, at: do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
#1: ffff88807dbd8a50 (&sb->s_type->i_mutex_key#12){+.+.}-{3:3}, at: inode_lock include/linux/fs.h:771 [inline]
#1: ffff88807dbd8a50 (&sb->s_type->i_mutex_key#12){+.+.}-{3:3}, at: do_truncate+0x14b/0x220 fs/open.c:64
#2: ffff88807dbd8cf0 (&mapping->i_mmap_rwsem){++++}-{3:3}, at: i_mmap_lock_read include/linux/fs.h:501 [inline]
#2: ffff88807dbd8cf0 (&mapping->i_mmap_rwsem){++++}-{3:3}, at: split_huge_page_to_list+0x7d5/0x49e0 mm/huge_memory.c:2712
#3: ffff88807dbd8b60 (&xa->xa_lock#7){..-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
#3: ffff88807dbd8b60 (&xa->xa_lock#7){..-.}-{2:2}, at: split_huge_page_to_list+0x980/0x49e0 mm/huge_memory.c:2744
#4: ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
#4: ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: folio_lruvec_lock+0x1ba/0x3b0 mm/memcontrol.c:1323

stack backtrace:
CPU: 0 PID: 5027 Comm: Not tainted 6.5.0-rc1-next-20230714-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/03/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
check_noncircular+0x311/0x3f0 kernel/locking/lockdep.c:2195
check_prev_add kernel/locking/lockdep.c:3142 [inline]
check_prevs_add kernel/locking/lockdep.c:3261 [inline]
validate_chain kernel/locking/lockdep.c:3876 [inline]
__lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144
lock_acquire kernel/locking/lockdep.c:5761 [inline]
lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
_raw_spin_lock_irqsave+0x3a/0x50 kernel/locking/spinlock.c:162
shmem_uncharge+0x28/0x2b0 mm/shmem.c:450
__split_huge_page mm/huge_memory.c:2549 [inline]
split_huge_page_to_list+0x3832/0x49e0 mm/huge_memory.c:2772
split_folio_to_list include/linux/huge_mm.h:400 [inline]
split_folio include/linux/huge_mm.h:405 [inline]
truncate_inode_partial_folio+0x544/0x760 mm/truncate.c:242
shmem_undo_range+0x723/0x1190 mm/shmem.c:1026
shmem_truncate_range mm/shmem.c:1120 [inline]
shmem_setattr+0xd43/0x1050 mm/shmem.c:1205
notify_change+0x742/0x11c0 fs/attr.c:485
do_truncate+0x15c/0x220 fs/open.c:66
do_sys_ftruncate+0x6a2/0x790 fs/open.c:194
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7fcc0ae38b99
Code: 48 83 c4 28 c3 e8 67 17 00 00 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffcd4272e58 EFLAGS: 00000246 ORIG_RAX: 000000000000004d
RAX: ffffffffffffffda RBX: 00007ffcd4272e60 RCX: 00007fcc0ae38b99
RDX: 00007fcc0ae38b99 RSI: 0000000000008979 RDI: 0000000000000003
RBP: 00007ffcd4272e68 R08: 00007fcc0ae05c10 R09: 00007fcc0ae05c10
R10: 0000000000000000 R11: 000000


---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at [email protected].

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
For information about bisection process see: https://goo.gl/tpsmEJ#bisection

If the bug is already fixed, let syzbot know by replying with:
#syz fix: exact-commit-title

If you want syzbot to run the reproducer, reply with:
#syz test: git://repo/address.git branch-or-commit-hash
If you attach or paste a git patch, syzbot will apply it before testing.

If you want to change bug's subsystems, reply with:
#syz set subsystems: new-subsystem
(See the list of subsystem names on the web dashboard)

If the bug is a duplicate of another bug, reply with:
#syz dup: exact-subject-of-another-report

If you want to undo deduplication, reply with:
#syz undup


2023-07-18 18:56:05

by Hugh Dickins

[permalink] [raw]
Subject: Re: [syzbot] [mm?] possible deadlock in shmem_uncharge (2)

On Tue, 18 Jul 2023, syzbot wrote:

> Hello,
>
> syzbot found the following issue on:

Yes, this doesn't require any syzbot trickery, it showed up as soon as
I tried a shmem quota linux-next with lockdep and shmem huge last week.

There's some other things wrong with the accounting there (in the non-
quota case anyway): I have been working up a patch to fix them, but need
to consider what must go in quickly, and what should wait until later.

Carlos, in brief: don't worry about this syzbot report, I'm on it (but
there's a risk that any patch I send may turn out to break your quotas).

Hugh

>
> HEAD commit: 7c2878be5732 Add linux-next specific files for 20230714
> git tree: linux-next
> console+strace: https://syzkaller.appspot.com/x/log.txt?x=14b77fd8a80000
> kernel config: https://syzkaller.appspot.com/x/.config?x=3baff2936ac3cefa
> dashboard link: https://syzkaller.appspot.com/bug?extid=38ca19393fb3344f57e6
> compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
> syz repro: https://syzkaller.appspot.com/x/repro.syz?x=153eea12a80000
> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=1169adeca80000
>
> Downloadable assets:
> disk image: https://storage.googleapis.com/syzbot-assets/bfdfa043f096/disk-7c2878be.raw.xz
> vmlinux: https://storage.googleapis.com/syzbot-assets/cf7a97f69e2a/vmlinux-7c2878be.xz
> kernel image: https://storage.googleapis.com/syzbot-assets/8366b63af2c6/bzImage-7c2878be.xz
>
> The issue was bisected to:
>
> commit 1a93dd24f1bee98ca121e68ce5c0de4a60a0a0b6
> Author: Carlos Maiolino <[email protected]>
> Date: Thu Jul 13 13:48:47 2023 +0000
>
> shmem: quota support
>
> bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=11af3afaa80000
> final oops: https://syzkaller.appspot.com/x/report.txt?x=13af3afaa80000
> console output: https://syzkaller.appspot.com/x/log.txt?x=15af3afaa80000
>
> IMPORTANT: if you fix the issue, please add the following tag to the commit:
> Reported-by: [email protected]
> Fixes: 1a93dd24f1be ("shmem: quota support")
>
> ======================================================
> WARNING: possible circular locking dependency detected
> 6.5.0-rc1-next-20230714-syzkaller #0 Not tainted
> ------------------------------------------------------
> /5027 is trying to acquire lock:
> ffff88807dbd8758 (&info->lock){....}-{2:2}, at: shmem_uncharge+0x28/0x2b0 mm/shmem.c:450
>
> but task is already holding lock:
> ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
> ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: folio_lruvec_lock+0x1ba/0x3b0 mm/memcontrol.c:1323
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #3 (&lruvec->lru_lock){....}-{2:2}:
> __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline]
> _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154
> spin_lock include/linux/spinlock.h:351 [inline]
> folio_lruvec_lock+0x1ba/0x3b0 mm/memcontrol.c:1323
> __split_huge_page mm/huge_memory.c:2538 [inline]
> split_huge_page_to_list+0x103b/0x49e0 mm/huge_memory.c:2772
> split_folio_to_list include/linux/huge_mm.h:400 [inline]
> split_folio include/linux/huge_mm.h:405 [inline]
> truncate_inode_partial_folio+0x544/0x760 mm/truncate.c:242
> shmem_undo_range+0x723/0x1190 mm/shmem.c:1026
> shmem_truncate_range mm/shmem.c:1120 [inline]
> shmem_setattr+0xd43/0x1050 mm/shmem.c:1205
> notify_change+0x742/0x11c0 fs/attr.c:485
> do_truncate+0x15c/0x220 fs/open.c:66
> do_sys_ftruncate+0x6a2/0x790 fs/open.c:194
> do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> -> #2 (&xa->xa_lock#7){..-.}-{2:2}:
> __raw_spin_lock_irq include/linux/spinlock_api_smp.h:119 [inline]
> _raw_spin_lock_irq+0x36/0x50 kernel/locking/spinlock.c:170
> spin_lock_irq include/linux/spinlock.h:376 [inline]
> filemap_remove_folio+0xbf/0x250 mm/filemap.c:259
> truncate_inode_folio+0x49/0x70 mm/truncate.c:195
> shmem_undo_range+0x363/0x1190 mm/shmem.c:1004
> shmem_truncate_range mm/shmem.c:1120 [inline]
> shmem_evict_inode+0x334/0xb10 mm/shmem.c:1250
> evict+0x2ed/0x6b0 fs/inode.c:665
> iput_final fs/inode.c:1791 [inline]
> iput.part.0+0x55e/0x7a0 fs/inode.c:1817
> iput+0x5c/0x80 fs/inode.c:1807
> dentry_unlink_inode+0x292/0x430 fs/dcache.c:401
> __dentry_kill+0x3b8/0x640 fs/dcache.c:607
> dentry_kill fs/dcache.c:745 [inline]
> dput+0x703/0xfd0 fs/dcache.c:913
> do_renameat2+0xc4c/0xdc0 fs/namei.c:5011
> __do_sys_rename fs/namei.c:5055 [inline]
> __se_sys_rename fs/namei.c:5053 [inline]
> __x64_sys_rename+0x81/0xa0 fs/namei.c:5053
> do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> -> #1 (&sb->s_type->i_lock_key){+.+.}-{2:2}:
> __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline]
> _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154
> spin_lock include/linux/spinlock.h:351 [inline]
> inode_sub_bytes+0x28/0x100 fs/stat.c:816
> __dquot_free_space+0x8f7/0xaf0 fs/quota/dquot.c:1881
> dquot_free_space_nodirty include/linux/quotaops.h:379 [inline]
> dquot_free_block_nodirty include/linux/quotaops.h:390 [inline]
> shmem_inode_unacct_blocks mm/shmem.c:243 [inline]
> shmem_recalc_inode+0x196/0x350 mm/shmem.c:420
> shmem_undo_range+0x558/0x1190 mm/shmem.c:1114
> shmem_truncate_range mm/shmem.c:1120 [inline]
> shmem_evict_inode+0x334/0xb10 mm/shmem.c:1250
> evict+0x2ed/0x6b0 fs/inode.c:665
> iput_final fs/inode.c:1791 [inline]
> iput.part.0+0x55e/0x7a0 fs/inode.c:1817
> iput+0x5c/0x80 fs/inode.c:1807
> dentry_unlink_inode+0x292/0x430 fs/dcache.c:401
> __dentry_kill+0x3b8/0x640 fs/dcache.c:607
> dentry_kill fs/dcache.c:745 [inline]
> dput+0x703/0xfd0 fs/dcache.c:913
> do_renameat2+0xc4c/0xdc0 fs/namei.c:5011
> __do_sys_rename fs/namei.c:5055 [inline]
> __se_sys_rename fs/namei.c:5053 [inline]
> __x64_sys_rename+0x81/0xa0 fs/namei.c:5053
> do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> -> #0 (&info->lock){....}-{2:2}:
> check_prev_add kernel/locking/lockdep.c:3142 [inline]
> check_prevs_add kernel/locking/lockdep.c:3261 [inline]
> validate_chain kernel/locking/lockdep.c:3876 [inline]
> __lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144
> lock_acquire kernel/locking/lockdep.c:5761 [inline]
> lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726
> __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
> _raw_spin_lock_irqsave+0x3a/0x50 kernel/locking/spinlock.c:162
> shmem_uncharge+0x28/0x2b0 mm/shmem.c:450
> __split_huge_page mm/huge_memory.c:2549 [inline]
> split_huge_page_to_list+0x3832/0x49e0 mm/huge_memory.c:2772
> split_folio_to_list include/linux/huge_mm.h:400 [inline]
> split_folio include/linux/huge_mm.h:405 [inline]
> truncate_inode_partial_folio+0x544/0x760 mm/truncate.c:242
> shmem_undo_range+0x723/0x1190 mm/shmem.c:1026
> shmem_truncate_range mm/shmem.c:1120 [inline]
> shmem_setattr+0xd43/0x1050 mm/shmem.c:1205
> notify_change+0x742/0x11c0 fs/attr.c:485
> do_truncate+0x15c/0x220 fs/open.c:66
> do_sys_ftruncate+0x6a2/0x790 fs/open.c:194
> do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> other info that might help us debug this:
>
> Chain exists of:
> &info->lock --> &xa->xa_lock#7 --> &lruvec->lru_lock
>
> Possible unsafe locking scenario:
>
> CPU0 CPU1
> ---- ----
> lock(&lruvec->lru_lock);
> lock(&xa->xa_lock#7);
> lock(&lruvec->lru_lock);
> lock(&info->lock);
>
> *** DEADLOCK ***
>
> 5 locks held by /5027:
> #0: ffff8880762b4410 (sb_writers#5){.+.+}-{0:0}, at: do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> #0: ffff8880762b4410 (sb_writers#5){.+.+}-{0:0}, at: do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> #1: ffff88807dbd8a50 (&sb->s_type->i_mutex_key#12){+.+.}-{3:3}, at: inode_lock include/linux/fs.h:771 [inline]
> #1: ffff88807dbd8a50 (&sb->s_type->i_mutex_key#12){+.+.}-{3:3}, at: do_truncate+0x14b/0x220 fs/open.c:64
> #2: ffff88807dbd8cf0 (&mapping->i_mmap_rwsem){++++}-{3:3}, at: i_mmap_lock_read include/linux/fs.h:501 [inline]
> #2: ffff88807dbd8cf0 (&mapping->i_mmap_rwsem){++++}-{3:3}, at: split_huge_page_to_list+0x7d5/0x49e0 mm/huge_memory.c:2712
> #3: ffff88807dbd8b60 (&xa->xa_lock#7){..-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
> #3: ffff88807dbd8b60 (&xa->xa_lock#7){..-.}-{2:2}, at: split_huge_page_to_list+0x980/0x49e0 mm/huge_memory.c:2744
> #4: ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
> #4: ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: folio_lruvec_lock+0x1ba/0x3b0 mm/memcontrol.c:1323
>
> stack backtrace:
> CPU: 0 PID: 5027 Comm: Not tainted 6.5.0-rc1-next-20230714-syzkaller #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/03/2023
> Call Trace:
> <TASK>
> __dump_stack lib/dump_stack.c:88 [inline]
> dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
> check_noncircular+0x311/0x3f0 kernel/locking/lockdep.c:2195
> check_prev_add kernel/locking/lockdep.c:3142 [inline]
> check_prevs_add kernel/locking/lockdep.c:3261 [inline]
> validate_chain kernel/locking/lockdep.c:3876 [inline]
> __lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144
> lock_acquire kernel/locking/lockdep.c:5761 [inline]
> lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726
> __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
> _raw_spin_lock_irqsave+0x3a/0x50 kernel/locking/spinlock.c:162
> shmem_uncharge+0x28/0x2b0 mm/shmem.c:450
> __split_huge_page mm/huge_memory.c:2549 [inline]
> split_huge_page_to_list+0x3832/0x49e0 mm/huge_memory.c:2772
> split_folio_to_list include/linux/huge_mm.h:400 [inline]
> split_folio include/linux/huge_mm.h:405 [inline]
> truncate_inode_partial_folio+0x544/0x760 mm/truncate.c:242
> shmem_undo_range+0x723/0x1190 mm/shmem.c:1026
> shmem_truncate_range mm/shmem.c:1120 [inline]
> shmem_setattr+0xd43/0x1050 mm/shmem.c:1205
> notify_change+0x742/0x11c0 fs/attr.c:485
> do_truncate+0x15c/0x220 fs/open.c:66
> do_sys_ftruncate+0x6a2/0x790 fs/open.c:194
> do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
> RIP: 0033:0x7fcc0ae38b99
> Code: 48 83 c4 28 c3 e8 67 17 00 00 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
> RSP: 002b:00007ffcd4272e58 EFLAGS: 00000246 ORIG_RAX: 000000000000004d
> RAX: ffffffffffffffda RBX: 00007ffcd4272e60 RCX: 00007fcc0ae38b99
> RDX: 00007fcc0ae38b99 RSI: 0000000000008979 RDI: 0000000000000003
> RBP: 00007ffcd4272e68 R08: 00007fcc0ae05c10 R09: 00007fcc0ae05c10
> R10: 0000000000000000 R11: 000000
>
>
> ---
> This report is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at [email protected].
>
> syzbot will keep track of this issue. See:
> https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
> For information about bisection process see: https://goo.gl/tpsmEJ#bisection
>
> If the bug is already fixed, let syzbot know by replying with:
> #syz fix: exact-commit-title
>
> If you want syzbot to run the reproducer, reply with:
> #syz test: git://repo/address.git branch-or-commit-hash
> If you attach or paste a git patch, syzbot will apply it before testing.
>
> If you want to change bug's subsystems, reply with:
> #syz set subsystems: new-subsystem
> (See the list of subsystem names on the web dashboard)
>
> If the bug is a duplicate of another bug, reply with:
> #syz dup: exact-subject-of-another-report
>
> If you want to undo deduplication, reply with:
> #syz undup

2023-07-18 19:41:57

by Carlos Maiolino

[permalink] [raw]
Subject: Re: [syzbot] [mm?] possible deadlock in shmem_uncharge (2)

On Tue, Jul 18, 2023 at 11:22:59AM -0700, Hugh Dickins wrote:
> On Tue, 18 Jul 2023, syzbot wrote:
>
> > Hello,
> >
> > syzbot found the following issue on:
>
> Yes, this doesn't require any syzbot trickery, it showed up as soon as
> I tried a shmem quota linux-next with lockdep and shmem huge last week.
>
> There's some other things wrong with the accounting there (in the non-
> quota case anyway): I have been working up a patch to fix them, but need
> to consider what must go in quickly, and what should wait until later.
>
> Carlos, in brief: don't worry about this syzbot report, I'm on it (but
> there's a risk that any patch I send may turn out to break your quotas).

Ok, thanks Hugh, I have the next version ready to go when I saw those syzkaller
reports.

I will send the new series tomorrow morning then.
Could you please keep me in the loop, I'm interested to see what's going on
there.

Carlos.


>
> Hugh
>
> >
> > HEAD commit: 7c2878be5732 Add linux-next specific files for 20230714
> > git tree: linux-next
> > console+strace: https://syzkaller.appspot.com/x/log.txt?x=14b77fd8a80000
> > kernel config: https://syzkaller.appspot.com/x/.config?x=3baff2936ac3cefa
> > dashboard link: https://syzkaller.appspot.com/bug?extid=38ca19393fb3344f57e6
> > compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
> > syz repro: https://syzkaller.appspot.com/x/repro.syz?x=153eea12a80000
> > C reproducer: https://syzkaller.appspot.com/x/repro.c?x=1169adeca80000
> >
> > Downloadable assets:
> > disk image: https://storage.googleapis.com/syzbot-assets/bfdfa043f096/disk-7c2878be.raw.xz
> > vmlinux: https://storage.googleapis.com/syzbot-assets/cf7a97f69e2a/vmlinux-7c2878be.xz
> > kernel image: https://storage.googleapis.com/syzbot-assets/8366b63af2c6/bzImage-7c2878be.xz
> >
> > The issue was bisected to:
> >
> > commit 1a93dd24f1bee98ca121e68ce5c0de4a60a0a0b6
> > Author: Carlos Maiolino <[email protected]>
> > Date: Thu Jul 13 13:48:47 2023 +0000
> >
> > shmem: quota support
> >
> > bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=11af3afaa80000
> > final oops: https://syzkaller.appspot.com/x/report.txt?x=13af3afaa80000
> > console output: https://syzkaller.appspot.com/x/log.txt?x=15af3afaa80000
> >
> > IMPORTANT: if you fix the issue, please add the following tag to the commit:
> > Reported-by: [email protected]
> > Fixes: 1a93dd24f1be ("shmem: quota support")
> >
> > ======================================================
> > WARNING: possible circular locking dependency detected
> > 6.5.0-rc1-next-20230714-syzkaller #0 Not tainted
> > ------------------------------------------------------
> > /5027 is trying to acquire lock:
> > ffff88807dbd8758 (&info->lock){....}-{2:2}, at: shmem_uncharge+0x28/0x2b0 mm/shmem.c:450
> >
> > but task is already holding lock:
> > ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
> > ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: folio_lruvec_lock+0x1ba/0x3b0 mm/memcontrol.c:1323
> >
> > which lock already depends on the new lock.
> >
> >
> > the existing dependency chain (in reverse order) is:
> >
> > -> #3 (&lruvec->lru_lock){....}-{2:2}:
> > __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline]
> > _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154
> > spin_lock include/linux/spinlock.h:351 [inline]
> > folio_lruvec_lock+0x1ba/0x3b0 mm/memcontrol.c:1323
> > __split_huge_page mm/huge_memory.c:2538 [inline]
> > split_huge_page_to_list+0x103b/0x49e0 mm/huge_memory.c:2772
> > split_folio_to_list include/linux/huge_mm.h:400 [inline]
> > split_folio include/linux/huge_mm.h:405 [inline]
> > truncate_inode_partial_folio+0x544/0x760 mm/truncate.c:242
> > shmem_undo_range+0x723/0x1190 mm/shmem.c:1026
> > shmem_truncate_range mm/shmem.c:1120 [inline]
> > shmem_setattr+0xd43/0x1050 mm/shmem.c:1205
> > notify_change+0x742/0x11c0 fs/attr.c:485
> > do_truncate+0x15c/0x220 fs/open.c:66
> > do_sys_ftruncate+0x6a2/0x790 fs/open.c:194
> > do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> > entry_SYSCALL_64_after_hwframe+0x63/0xcd
> >
> > -> #2 (&xa->xa_lock#7){..-.}-{2:2}:
> > __raw_spin_lock_irq include/linux/spinlock_api_smp.h:119 [inline]
> > _raw_spin_lock_irq+0x36/0x50 kernel/locking/spinlock.c:170
> > spin_lock_irq include/linux/spinlock.h:376 [inline]
> > filemap_remove_folio+0xbf/0x250 mm/filemap.c:259
> > truncate_inode_folio+0x49/0x70 mm/truncate.c:195
> > shmem_undo_range+0x363/0x1190 mm/shmem.c:1004
> > shmem_truncate_range mm/shmem.c:1120 [inline]
> > shmem_evict_inode+0x334/0xb10 mm/shmem.c:1250
> > evict+0x2ed/0x6b0 fs/inode.c:665
> > iput_final fs/inode.c:1791 [inline]
> > iput.part.0+0x55e/0x7a0 fs/inode.c:1817
> > iput+0x5c/0x80 fs/inode.c:1807
> > dentry_unlink_inode+0x292/0x430 fs/dcache.c:401
> > __dentry_kill+0x3b8/0x640 fs/dcache.c:607
> > dentry_kill fs/dcache.c:745 [inline]
> > dput+0x703/0xfd0 fs/dcache.c:913
> > do_renameat2+0xc4c/0xdc0 fs/namei.c:5011
> > __do_sys_rename fs/namei.c:5055 [inline]
> > __se_sys_rename fs/namei.c:5053 [inline]
> > __x64_sys_rename+0x81/0xa0 fs/namei.c:5053
> > do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> > entry_SYSCALL_64_after_hwframe+0x63/0xcd
> >
> > -> #1 (&sb->s_type->i_lock_key){+.+.}-{2:2}:
> > __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline]
> > _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154
> > spin_lock include/linux/spinlock.h:351 [inline]
> > inode_sub_bytes+0x28/0x100 fs/stat.c:816
> > __dquot_free_space+0x8f7/0xaf0 fs/quota/dquot.c:1881
> > dquot_free_space_nodirty include/linux/quotaops.h:379 [inline]
> > dquot_free_block_nodirty include/linux/quotaops.h:390 [inline]
> > shmem_inode_unacct_blocks mm/shmem.c:243 [inline]
> > shmem_recalc_inode+0x196/0x350 mm/shmem.c:420
> > shmem_undo_range+0x558/0x1190 mm/shmem.c:1114
> > shmem_truncate_range mm/shmem.c:1120 [inline]
> > shmem_evict_inode+0x334/0xb10 mm/shmem.c:1250
> > evict+0x2ed/0x6b0 fs/inode.c:665
> > iput_final fs/inode.c:1791 [inline]
> > iput.part.0+0x55e/0x7a0 fs/inode.c:1817
> > iput+0x5c/0x80 fs/inode.c:1807
> > dentry_unlink_inode+0x292/0x430 fs/dcache.c:401
> > __dentry_kill+0x3b8/0x640 fs/dcache.c:607
> > dentry_kill fs/dcache.c:745 [inline]
> > dput+0x703/0xfd0 fs/dcache.c:913
> > do_renameat2+0xc4c/0xdc0 fs/namei.c:5011
> > __do_sys_rename fs/namei.c:5055 [inline]
> > __se_sys_rename fs/namei.c:5053 [inline]
> > __x64_sys_rename+0x81/0xa0 fs/namei.c:5053
> > do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> > entry_SYSCALL_64_after_hwframe+0x63/0xcd
> >
> > -> #0 (&info->lock){....}-{2:2}:
> > check_prev_add kernel/locking/lockdep.c:3142 [inline]
> > check_prevs_add kernel/locking/lockdep.c:3261 [inline]
> > validate_chain kernel/locking/lockdep.c:3876 [inline]
> > __lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144
> > lock_acquire kernel/locking/lockdep.c:5761 [inline]
> > lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726
> > __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
> > _raw_spin_lock_irqsave+0x3a/0x50 kernel/locking/spinlock.c:162
> > shmem_uncharge+0x28/0x2b0 mm/shmem.c:450
> > __split_huge_page mm/huge_memory.c:2549 [inline]
> > split_huge_page_to_list+0x3832/0x49e0 mm/huge_memory.c:2772
> > split_folio_to_list include/linux/huge_mm.h:400 [inline]
> > split_folio include/linux/huge_mm.h:405 [inline]
> > truncate_inode_partial_folio+0x544/0x760 mm/truncate.c:242
> > shmem_undo_range+0x723/0x1190 mm/shmem.c:1026
> > shmem_truncate_range mm/shmem.c:1120 [inline]
> > shmem_setattr+0xd43/0x1050 mm/shmem.c:1205
> > notify_change+0x742/0x11c0 fs/attr.c:485
> > do_truncate+0x15c/0x220 fs/open.c:66
> > do_sys_ftruncate+0x6a2/0x790 fs/open.c:194
> > do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> > entry_SYSCALL_64_after_hwframe+0x63/0xcd
> >
> > other info that might help us debug this:
> >
> > Chain exists of:
> > &info->lock --> &xa->xa_lock#7 --> &lruvec->lru_lock
> >
> > Possible unsafe locking scenario:
> >
> > CPU0 CPU1
> > ---- ----
> > lock(&lruvec->lru_lock);
> > lock(&xa->xa_lock#7);
> > lock(&lruvec->lru_lock);
> > lock(&info->lock);
> >
> > *** DEADLOCK ***
> >
> > 5 locks held by /5027:
> > #0: ffff8880762b4410 (sb_writers#5){.+.+}-{0:0}, at: do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > #0: ffff8880762b4410 (sb_writers#5){.+.+}-{0:0}, at: do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> > #1: ffff88807dbd8a50 (&sb->s_type->i_mutex_key#12){+.+.}-{3:3}, at: inode_lock include/linux/fs.h:771 [inline]
> > #1: ffff88807dbd8a50 (&sb->s_type->i_mutex_key#12){+.+.}-{3:3}, at: do_truncate+0x14b/0x220 fs/open.c:64
> > #2: ffff88807dbd8cf0 (&mapping->i_mmap_rwsem){++++}-{3:3}, at: i_mmap_lock_read include/linux/fs.h:501 [inline]
> > #2: ffff88807dbd8cf0 (&mapping->i_mmap_rwsem){++++}-{3:3}, at: split_huge_page_to_list+0x7d5/0x49e0 mm/huge_memory.c:2712
> > #3: ffff88807dbd8b60 (&xa->xa_lock#7){..-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
> > #3: ffff88807dbd8b60 (&xa->xa_lock#7){..-.}-{2:2}, at: split_huge_page_to_list+0x980/0x49e0 mm/huge_memory.c:2744
> > #4: ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
> > #4: ffff88801526c068 (&lruvec->lru_lock){....}-{2:2}, at: folio_lruvec_lock+0x1ba/0x3b0 mm/memcontrol.c:1323
> >
> > stack backtrace:
> > CPU: 0 PID: 5027 Comm: Not tainted 6.5.0-rc1-next-20230714-syzkaller #0
> > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/03/2023
> > Call Trace:
> > <TASK>
> > __dump_stack lib/dump_stack.c:88 [inline]
> > dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
> > check_noncircular+0x311/0x3f0 kernel/locking/lockdep.c:2195
> > check_prev_add kernel/locking/lockdep.c:3142 [inline]
> > check_prevs_add kernel/locking/lockdep.c:3261 [inline]
> > validate_chain kernel/locking/lockdep.c:3876 [inline]
> > __lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144
> > lock_acquire kernel/locking/lockdep.c:5761 [inline]
> > lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726
> > __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
> > _raw_spin_lock_irqsave+0x3a/0x50 kernel/locking/spinlock.c:162
> > shmem_uncharge+0x28/0x2b0 mm/shmem.c:450
> > __split_huge_page mm/huge_memory.c:2549 [inline]
> > split_huge_page_to_list+0x3832/0x49e0 mm/huge_memory.c:2772
> > split_folio_to_list include/linux/huge_mm.h:400 [inline]
> > split_folio include/linux/huge_mm.h:405 [inline]
> > truncate_inode_partial_folio+0x544/0x760 mm/truncate.c:242
> > shmem_undo_range+0x723/0x1190 mm/shmem.c:1026
> > shmem_truncate_range mm/shmem.c:1120 [inline]
> > shmem_setattr+0xd43/0x1050 mm/shmem.c:1205
> > notify_change+0x742/0x11c0 fs/attr.c:485
> > do_truncate+0x15c/0x220 fs/open.c:66
> > do_sys_ftruncate+0x6a2/0x790 fs/open.c:194
> > do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> > entry_SYSCALL_64_after_hwframe+0x63/0xcd
> > RIP: 0033:0x7fcc0ae38b99
> > Code: 48 83 c4 28 c3 e8 67 17 00 00 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
> > RSP: 002b:00007ffcd4272e58 EFLAGS: 00000246 ORIG_RAX: 000000000000004d
> > RAX: ffffffffffffffda RBX: 00007ffcd4272e60 RCX: 00007fcc0ae38b99
> > RDX: 00007fcc0ae38b99 RSI: 0000000000008979 RDI: 0000000000000003
> > RBP: 00007ffcd4272e68 R08: 00007fcc0ae05c10 R09: 00007fcc0ae05c10
> > R10: 0000000000000000 R11: 000000
> >
> >
> > ---
> > This report is generated by a bot. It may contain errors.
> > See https://goo.gl/tpsmEJ for more information about syzbot.
> > syzbot engineers can be reached at [email protected].
> >
> > syzbot will keep track of this issue. See:
> > https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
> > For information about bisection process see: https://goo.gl/tpsmEJ#bisection
> >
> > If the bug is already fixed, let syzbot know by replying with:
> > #syz fix: exact-commit-title
> >
> > If you want syzbot to run the reproducer, reply with:
> > #syz test: git://repo/address.git branch-or-commit-hash
> > If you attach or paste a git patch, syzbot will apply it before testing.
> >
> > If you want to change bug's subsystems, reply with:
> > #syz set subsystems: new-subsystem
> > (See the list of subsystem names on the web dashboard)
> >
> > If the bug is a duplicate of another bug, reply with:
> > #syz dup: exact-subject-of-another-report
> >
> > If you want to undo deduplication, reply with:
> > #syz undup
>


2023-07-23 04:57:45

by Hugh Dickins

[permalink] [raw]
Subject: Re: [syzbot] [mm?] possible deadlock in shmem_uncharge (2)

On Tue, 18 Jul 2023, Carlos Maiolino wrote:
> On Tue, Jul 18, 2023 at 11:22:59AM -0700, Hugh Dickins wrote:
> > On Tue, 18 Jul 2023, syzbot wrote:
> >
> > > Hello,
> > >
> > > syzbot found the following issue on:
> >
> > Yes, this doesn't require any syzbot trickery, it showed up as soon as
> > I tried a shmem quota linux-next with lockdep and shmem huge last week.
> >
> > There's some other things wrong with the accounting there (in the non-
> > quota case anyway): I have been working up a patch to fix them, but need
> > to consider what must go in quickly, and what should wait until later.
> >
> > Carlos, in brief: don't worry about this syzbot report, I'm on it (but
> > there's a risk that any patch I send may turn out to break your quotas).
>
> Ok, thanks Hugh, I have the next version ready to go when I saw those syzkaller
> reports.
>
> I will send the new series tomorrow morning then.
> Could you please keep me in the loop, I'm interested to see what's going on
> there.

I never saw a new series on Wednesday, just a new series on Monday which
I had ignored, knowing that another was coming. I was waiting for your
new series before sending my patch to that, but perhaps you were waiting
for my patch before sending your new series?

Then when I went to check my patch on next-20230721, found that your
series has been dropped from mm-unstable meanwhile. Maybe Andrew
dropped it because of the lockdep reports (I see now there was another),
or maybe he dropped it because of new series coming too thick and fast,
or maybe he dropped it because of the awkward merge commit which Stephen
Rothwell had to write, to reconcile the vfs and mm trees. (But I was
surprised not to have seen any mm-commits mail on the dropping.)

So the patch below is against next-20230714, and will hopefully still
apply in your next posting: please include it there (since it modifies
THP source, I think it's best kept as a separate patch in your series).

But when you repost, if you are still going for the next release, the
more you can do to minimize that merge commit the better. In particular,
the reindentations of shmem_get_inode() and other functions in
"shmem: make shmem_get_inode() return ERR_PTR instead of NULL"
don't help review, and could well be left until some later cleanup -
but I don't know how much that would actually eliminate the clashes.

(And "Add default quota limit mount options" needs to say "shmem: ...";
I'd have preferred "tmpfs: ...", but the others say shmem, so okay.)

[PATCH] shmem: fix quota lock nesting in huge hole handling

i_pages lock nests inside i_lock, but shmem_charge() and shmem_uncharge()
were being called from THP splitting or collapsing while i_pages lock was
held, and now go on to call dquot_alloc_block_nodirty() which takes
i_lock to update i_blocks.

We may well want to take i_lock out of this path later, in the non-quota
case even if it's left in the quota case (or perhaps use i_lock instead
of shmem's info->lock throughout); but don't get into that at this time.

Move the shmem_charge() and shmem_uncharge() calls out from under i_pages
lock, accounting the full batch of holes in a single call.

Still pass the pages argument to shmem_uncharge(), but it happens now to
be unused: shmem_recalc_inode() is designed to account for clean pages
freed behind shmem's back, so it gets the accounting right by itself;
then the later call to shmem_inode_unacct_blocks() led to imbalance
(that WARN_ON(inode->i_blocks) in shmem_evict_inode()).

Reported-by: [email protected]
Closes: https://lore.kernel.org/lkml/[email protected]/
Reported-by: [email protected]
Closes: https://lore.kernel.org/lkml/[email protected]/
Signed-off-by: Hugh Dickins <[email protected]>
---
mm/huge_memory.c | 6 ++++--
mm/khugepaged.c | 13 +++++++------
mm/shmem.c | 19 +++++++++----------
3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 762be2f4244c..01f6838329a1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2521,7 +2521,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
struct address_space *swap_cache = NULL;
unsigned long offset = 0;
unsigned int nr = thp_nr_pages(head);
- int i;
+ int i, nr_dropped = 0;

/* complete memcg works before add pages to LRU */
split_page_memcg(head, nr);
@@ -2546,7 +2546,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
struct folio *tail = page_folio(head + i);

if (shmem_mapping(head->mapping))
- shmem_uncharge(head->mapping->host, 1);
+ nr_dropped++;
else if (folio_test_clear_dirty(tail))
folio_account_cleaned(tail,
inode_to_wb(folio->mapping->host));
@@ -2583,6 +2583,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
}
local_irq_enable();

+ if (nr_dropped)
+ shmem_uncharge(head->mapping->host, nr_dropped);
remap_page(folio, nr);

if (PageSwapCache(head)) {
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 6bad69c0e4bd..366ee467fb83 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1828,10 +1828,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
goto xa_locked;
}
}
- if (!shmem_charge(mapping->host, 1)) {
- result = SCAN_FAIL;
- goto xa_locked;
- }
nr_none++;
continue;
}
@@ -2017,8 +2013,13 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
*/
try_to_unmap_flush();

- if (result != SCAN_SUCCEED)
+ if (result == SCAN_SUCCEED && nr_none &&
+ !shmem_charge(mapping->host, nr_none))
+ result = SCAN_FAIL;
+ if (result != SCAN_SUCCEED) {
+ nr_none = 0;
goto rollback;
+ }

/*
* The old pages are locked, so they won't change anymore.
@@ -2157,8 +2158,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
if (nr_none) {
xas_lock_irq(&xas);
mapping->nrpages -= nr_none;
- shmem_uncharge(mapping->host, nr_none);
xas_unlock_irq(&xas);
+ shmem_uncharge(mapping->host, nr_none);
}

list_for_each_entry_safe(page, tmp, &pagelist, lru) {
diff --git a/mm/shmem.c b/mm/shmem.c
index 161592a8d3fe..521a6302dc37 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -424,18 +424,20 @@ static void shmem_recalc_inode(struct inode *inode)
bool shmem_charge(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
- unsigned long flags;
+ struct address_space *mapping = inode->i_mapping;

if (shmem_inode_acct_block(inode, pages))
return false;

/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
- inode->i_mapping->nrpages += pages;
+ xa_lock_irq(&mapping->i_pages);
+ mapping->nrpages += pages;
+ xa_unlock_irq(&mapping->i_pages);

- spin_lock_irqsave(&info->lock, flags);
+ spin_lock_irq(&info->lock);
info->alloced += pages;
shmem_recalc_inode(inode);
- spin_unlock_irqrestore(&info->lock, flags);
+ spin_unlock_irq(&info->lock);

return true;
}
@@ -443,16 +445,13 @@ bool shmem_charge(struct inode *inode, long pages)
void shmem_uncharge(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
- unsigned long flags;

/* nrpages adjustment done by __filemap_remove_folio() or caller */

- spin_lock_irqsave(&info->lock, flags);
- info->alloced -= pages;
+ spin_lock_irq(&info->lock);
shmem_recalc_inode(inode);
- spin_unlock_irqrestore(&info->lock, flags);
-
- shmem_inode_unacct_blocks(inode, pages);
+ /* which has called shmem_inode_unacct_blocks() if necessary */
+ spin_unlock_irq(&info->lock);
}

/*
--
2.35.3


2023-07-24 12:16:15

by Carlos Maiolino

[permalink] [raw]
Subject: Re: [syzbot] [mm?] possible deadlock in shmem_uncharge (2)

Hi Hugh.

On Sat, Jul 22, 2023 at 09:37:35PM -0700, Hugh Dickins wrote:
> On Tue, 18 Jul 2023, Carlos Maiolino wrote:
> > On Tue, Jul 18, 2023 at 11:22:59AM -0700, Hugh Dickins wrote:
> > > On Tue, 18 Jul 2023, syzbot wrote:
> > >
> > > > Hello,
> > > >
> > > > syzbot found the following issue on:
> > >
> > > Yes, this doesn't require any syzbot trickery, it showed up as soon as
> > > I tried a shmem quota linux-next with lockdep and shmem huge last week.
> > >
> > > There's some other things wrong with the accounting there (in the non-
> > > quota case anyway): I have been working up a patch to fix them, but need
> > > to consider what must go in quickly, and what should wait until later.
> > >
> > > Carlos, in brief: don't worry about this syzbot report, I'm on it (but
> > > there's a risk that any patch I send may turn out to break your quotas).
> >
> > Ok, thanks Hugh, I have the next version ready to go when I saw those syzkaller
> > reports.
> >
> > I will send the new series tomorrow morning then.
> > Could you please keep me in the loop, I'm interested to see what's going on
> > there.
>
> I never saw a new series on Wednesday, just a new series on Monday which
> I had ignored, knowing that another was coming. I was waiting for your
> new series before sending my patch to that, but perhaps you were waiting
> for my patch before sending your new series?

TL;DR, I spoke with Andrew past week, and we agreed it would be better to wait
for you to submit the patches before I send the new version, I thought I had
cc'ed in the email thread, but seems like I didn't, my apologies.

>
> Then when I went to check my patch on next-20230721, found that your
> series has been dropped from mm-unstable meanwhile. Maybe Andrew
> dropped it because of the lockdep reports (I see now there was another),
> or maybe he dropped it because of new series coming too thick and fast,
> or maybe he dropped it because of the awkward merge commit which Stephen
> Rothwell had to write, to reconcile the vfs and mm trees. (But I was
> surprised not to have seen any mm-commits mail on the dropping.)
>
> So the patch below is against next-20230714, and will hopefully still
> apply in your next posting: please include it there (since it modifies
> THP source, I think it's best kept as a separate patch in your series).
>
> But when you repost, if you are still going for the next release, the
> more you can do to minimize that merge commit the better. In particular,
> the reindentations of shmem_get_inode() and other functions in
> "shmem: make shmem_get_inode() return ERR_PTR instead of NULL"
> don't help review, and could well be left until some later cleanup -
> but I don't know how much that would actually eliminate the clashes.

I'm not @work today, so I can't take a deep look into it by now, but,
I plan to rebase the series on top of linux-next and I'll include your patch in
my series, if that's what I understood from a quick read on your email.

I'll try to work on it tomorrow first thing, thanks!

Carlos


>
> (And "Add default quota limit mount options" needs to say "shmem: ...";
> I'd have preferred "tmpfs: ...", but the others say shmem, so okay.)
>
> [PATCH] shmem: fix quota lock nesting in huge hole handling
>
> i_pages lock nests inside i_lock, but shmem_charge() and shmem_uncharge()
> were being called from THP splitting or collapsing while i_pages lock was
> held, and now go on to call dquot_alloc_block_nodirty() which takes
> i_lock to update i_blocks.
>
> We may well want to take i_lock out of this path later, in the non-quota
> case even if it's left in the quota case (or perhaps use i_lock instead
> of shmem's info->lock throughout); but don't get into that at this time.
>
> Move the shmem_charge() and shmem_uncharge() calls out from under i_pages
> lock, accounting the full batch of holes in a single call.
>
> Still pass the pages argument to shmem_uncharge(), but it happens now to
> be unused: shmem_recalc_inode() is designed to account for clean pages
> freed behind shmem's back, so it gets the accounting right by itself;
> then the later call to shmem_inode_unacct_blocks() led to imbalance
> (that WARN_ON(inode->i_blocks) in shmem_evict_inode()).
>
> Reported-by: [email protected]
> Closes: https://lore.kernel.org/lkml/[email protected]/
> Reported-by: [email protected]
> Closes: https://lore.kernel.org/lkml/[email protected]/
> Signed-off-by: Hugh Dickins <[email protected]>
> ---
> mm/huge_memory.c | 6 ++++--
> mm/khugepaged.c | 13 +++++++------
> mm/shmem.c | 19 +++++++++----------
> 3 files changed, 20 insertions(+), 18 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 762be2f4244c..01f6838329a1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2521,7 +2521,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
> struct address_space *swap_cache = NULL;
> unsigned long offset = 0;
> unsigned int nr = thp_nr_pages(head);
> - int i;
> + int i, nr_dropped = 0;
>
> /* complete memcg works before add pages to LRU */
> split_page_memcg(head, nr);
> @@ -2546,7 +2546,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
> struct folio *tail = page_folio(head + i);
>
> if (shmem_mapping(head->mapping))
> - shmem_uncharge(head->mapping->host, 1);
> + nr_dropped++;
> else if (folio_test_clear_dirty(tail))
> folio_account_cleaned(tail,
> inode_to_wb(folio->mapping->host));
> @@ -2583,6 +2583,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
> }
> local_irq_enable();
>
> + if (nr_dropped)
> + shmem_uncharge(head->mapping->host, nr_dropped);
> remap_page(folio, nr);
>
> if (PageSwapCache(head)) {
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 6bad69c0e4bd..366ee467fb83 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1828,10 +1828,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
> goto xa_locked;
> }
> }
> - if (!shmem_charge(mapping->host, 1)) {
> - result = SCAN_FAIL;
> - goto xa_locked;
> - }
> nr_none++;
> continue;
> }
> @@ -2017,8 +2013,13 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
> */
> try_to_unmap_flush();
>
> - if (result != SCAN_SUCCEED)
> + if (result == SCAN_SUCCEED && nr_none &&
> + !shmem_charge(mapping->host, nr_none))
> + result = SCAN_FAIL;
> + if (result != SCAN_SUCCEED) {
> + nr_none = 0;
> goto rollback;
> + }
>
> /*
> * The old pages are locked, so they won't change anymore.
> @@ -2157,8 +2158,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
> if (nr_none) {
> xas_lock_irq(&xas);
> mapping->nrpages -= nr_none;
> - shmem_uncharge(mapping->host, nr_none);
> xas_unlock_irq(&xas);
> + shmem_uncharge(mapping->host, nr_none);
> }
>
> list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 161592a8d3fe..521a6302dc37 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -424,18 +424,20 @@ static void shmem_recalc_inode(struct inode *inode)
> bool shmem_charge(struct inode *inode, long pages)
> {
> struct shmem_inode_info *info = SHMEM_I(inode);
> - unsigned long flags;
> + struct address_space *mapping = inode->i_mapping;
>
> if (shmem_inode_acct_block(inode, pages))
> return false;
>
> /* nrpages adjustment first, then shmem_recalc_inode() when balanced */
> - inode->i_mapping->nrpages += pages;
> + xa_lock_irq(&mapping->i_pages);
> + mapping->nrpages += pages;
> + xa_unlock_irq(&mapping->i_pages);
>
> - spin_lock_irqsave(&info->lock, flags);
> + spin_lock_irq(&info->lock);
> info->alloced += pages;
> shmem_recalc_inode(inode);
> - spin_unlock_irqrestore(&info->lock, flags);
> + spin_unlock_irq(&info->lock);
>
> return true;
> }
> @@ -443,16 +445,13 @@ bool shmem_charge(struct inode *inode, long pages)
> void shmem_uncharge(struct inode *inode, long pages)
> {
> struct shmem_inode_info *info = SHMEM_I(inode);
> - unsigned long flags;
>
> /* nrpages adjustment done by __filemap_remove_folio() or caller */
>
> - spin_lock_irqsave(&info->lock, flags);
> - info->alloced -= pages;
> + spin_lock_irq(&info->lock);
> shmem_recalc_inode(inode);
> - spin_unlock_irqrestore(&info->lock, flags);
> -
> - shmem_inode_unacct_blocks(inode, pages);
> + /* which has called shmem_inode_unacct_blocks() if necessary */
> + spin_unlock_irq(&info->lock);
> }
>
> /*
> --
> 2.35.3
>