2022-11-16 12:20:07

by ChenXiaoSong

[permalink] [raw]
Subject: [PATCH v5 0/2] btrfs: fix sleep from invalid context bug in update_qgroup_limit_item()

At least 3 places might sleep in update_qgroup_limit_item(), as shown below:

update_qgroup_limit_item
btrfs_alloc_path
/* allocate memory non-atomically, might sleep */
kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS)
btrfs_search_slot
setup_nodes_for_search
reada_for_balance
btrfs_readahead_node_child
btrfs_readahead_tree_block
btrfs_find_create_tree_block
alloc_extent_buffer
kmem_cache_zalloc
/* allocate memory non-atomically, might sleep */
kmem_cache_alloc(GFP_NOFS|__GFP_NOFAIL|__GFP_ZERO)
read_extent_buffer_pages
submit_extent_page
/* disk IO, might sleep */
submit_one_bio

Fix this by calling qgroup_dirty() on @dstqgroup, and update limit item in
btrfs_run_qgroups() later.

By the way, add might_sleep() to btrfs_search_slot().

ChenXiaoSong (2):
btrfs: add might_sleep() to btrfs_search_slot()
btrfs: qgroup: fix sleep from invalid context bug in
update_qgroup_limit_item()

fs/btrfs/ctree.c | 2 ++
fs/btrfs/qgroup.c | 29 ++++++++++++-----------------
2 files changed, 14 insertions(+), 17 deletions(-)

--
2.31.1



2022-11-16 12:21:50

by ChenXiaoSong

[permalink] [raw]
Subject: [PATCH v5 2/2] btrfs: qgroup: fix sleep from invalid context bug in update_qgroup_limit_item()

Syzkaller reported BUG as follows:

BUG: sleeping function called from invalid context at
include/linux/sched/mm.h:274
Call Trace:
<TASK>
dump_stack_lvl+0xcd/0x134
__might_resched.cold+0x222/0x26b
kmem_cache_alloc+0x2e7/0x3c0
update_qgroup_limit_item+0xe1/0x390
btrfs_qgroup_inherit+0x147b/0x1ee0
create_subvol+0x4eb/0x1710
btrfs_mksubvol+0xfe5/0x13f0
__btrfs_ioctl_snap_create+0x2b0/0x430
btrfs_ioctl_snap_create_v2+0x25a/0x520
btrfs_ioctl+0x2a1c/0x5ce0
__x64_sys_ioctl+0x193/0x200
do_syscall_64+0x35/0x80

Fix this by calling qgroup_dirty() on @dstqgroup, and update limit item in
btrfs_run_qgroups() later.

Signed-off-by: ChenXiaoSong <[email protected]>
---
fs/btrfs/qgroup.c | 29 ++++++++++++-----------------
1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9334c3157c22..8f5c52e24430 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2860,6 +2860,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
bool need_rescan = false;
u32 level_size = 0;
u64 nums;
+ bool dirty_dstgrp = false;

/*
* There are only two callers of this function.
@@ -2941,7 +2942,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dstgroup = add_qgroup_rb(fs_info, objectid);
if (IS_ERR(dstgroup)) {
ret = PTR_ERR(dstgroup);
- goto unlock;
+ goto dirty;
}

if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
@@ -2950,21 +2951,13 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dstgroup->max_excl = inherit->lim.max_excl;
dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
dstgroup->rsv_excl = inherit->lim.rsv_excl;
-
- ret = update_qgroup_limit_item(trans, dstgroup);
- if (ret) {
- qgroup_mark_inconsistent(fs_info);
- btrfs_info(fs_info,
- "unable to update quota limit for %llu",
- dstgroup->qgroupid);
- goto unlock;
- }
+ dirty_dstgrp = true;
}

if (srcid) {
srcgroup = find_qgroup_rb(fs_info, srcid);
if (!srcgroup)
- goto unlock;
+ goto dirty;

/*
* We call inherit after we clone the root in order to make sure
@@ -2985,20 +2978,20 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dstgroup->max_excl = srcgroup->max_excl;
dstgroup->rsv_rfer = srcgroup->rsv_rfer;
dstgroup->rsv_excl = srcgroup->rsv_excl;
+ dirty_dstgrp = true;

- qgroup_dirty(fs_info, dstgroup);
qgroup_dirty(fs_info, srcgroup);
}

if (!inherit)
- goto unlock;
+ goto dirty;

i_qgroups = (u64 *)(inherit + 1);
for (i = 0; i < inherit->num_qgroups; ++i) {
if (*i_qgroups) {
ret = add_relation_rb(fs_info, objectid, *i_qgroups);
if (ret)
- goto unlock;
+ goto dirty;
}
++i_qgroups;

@@ -3022,7 +3015,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,

if (!src || !dst) {
ret = -EINVAL;
- goto unlock;
+ goto dirty;
}

dst->rfer = src->rfer - level_size;
@@ -3043,15 +3036,17 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,

if (!src || !dst) {
ret = -EINVAL;
- goto unlock;
+ goto dirty;
}

dst->excl = src->excl + level_size;
dst->excl_cmpr = src->excl_cmpr + level_size;
need_rescan = true;
}
+dirty:
+ if (dirty_dstgrp)
+ qgroup_dirty(fs_info, dstgroup);

-unlock:
spin_unlock(&fs_info->qgroup_lock);
if (!ret)
ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup);
--
2.31.1


2022-11-16 12:53:53

by ChenXiaoSong

[permalink] [raw]
Subject: [PATCH v5 1/2] btrfs: add might_sleep() to btrfs_search_slot()

At least 2 places might sleep in btrfs_search_slot(), as shown below:

btrfs_search_slot
setup_nodes_for_search
reada_for_balance
btrfs_readahead_node_child
btrfs_readahead_tree_block
btrfs_find_create_tree_block
alloc_extent_buffer
kmem_cache_zalloc
/* allocate memory non-atomically, might sleep */
kmem_cache_alloc(GFP_NOFS|__GFP_NOFAIL|__GFP_ZERO)
read_extent_buffer_pages
submit_extent_page
/* disk IO, might sleep */
submit_one_bio

As the potential sleeping under spin lock is hard to spot, we should add
might_sleep() to btrfs_search_slot().

Signed-off-by: ChenXiaoSong <[email protected]>
---
fs/btrfs/ctree.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a9543f01184c..809053e9cfde 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1934,6 +1934,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int min_write_lock_level;
int prev_cmp;

+ might_sleep();
+
lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len > 0);
WARN_ON(p->nodes[0] != NULL);
--
2.31.1


2022-11-16 12:58:18

by Qu Wenruo

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] btrfs: qgroup: fix sleep from invalid context bug in update_qgroup_limit_item()



On 2022/11/16 21:07, ChenXiaoSong wrote:
> Syzkaller reported BUG as follows:
>
> BUG: sleeping function called from invalid context at
> include/linux/sched/mm.h:274
> Call Trace:
> <TASK>
> dump_stack_lvl+0xcd/0x134
> __might_resched.cold+0x222/0x26b
> kmem_cache_alloc+0x2e7/0x3c0
> update_qgroup_limit_item+0xe1/0x390
> btrfs_qgroup_inherit+0x147b/0x1ee0
> create_subvol+0x4eb/0x1710
> btrfs_mksubvol+0xfe5/0x13f0
> __btrfs_ioctl_snap_create+0x2b0/0x430
> btrfs_ioctl_snap_create_v2+0x25a/0x520
> btrfs_ioctl+0x2a1c/0x5ce0
> __x64_sys_ioctl+0x193/0x200
> do_syscall_64+0x35/0x80
>
> Fix this by calling qgroup_dirty() on @dstqgroup, and update limit item in
> btrfs_run_qgroups() later.
>
> Signed-off-by: ChenXiaoSong <[email protected]>
> ---
> fs/btrfs/qgroup.c | 29 ++++++++++++-----------------
> 1 file changed, 12 insertions(+), 17 deletions(-)
>
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index 9334c3157c22..8f5c52e24430 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -2860,6 +2860,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
> bool need_rescan = false;
> u32 level_size = 0;
> u64 nums;
> + bool dirty_dstgrp = false;

I don't know why you insist on such bool (and the extra lable).

I have mentioned how qgroup_dirty() works, it can be called how ever
many times, and the qgroup code will handle it without problem.

So the whole patch can be just as simple as:

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 05e79f7b4433..e0522c6c0d67 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2965,14 +2965,7 @@ int btrfs_qgroup_inherit(struct
btrfs_trans_handle *trans, u64 srcid,
dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
dstgroup->rsv_excl = inherit->lim.rsv_excl;

- ret = update_qgroup_limit_item(trans, dstgroup);
- if (ret) {
- qgroup_mark_inconsistent(fs_info);
- btrfs_info(fs_info,
- "unable to update quota limit for %llu",
- dstgroup->qgroupid);
- goto unlock;
- }
+ qgroup_dirty(fs_info, dstgroup);
}

if (srcid) {


>
> /*
> * There are only two callers of this function.
> @@ -2941,7 +2942,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
> dstgroup = add_qgroup_rb(fs_info, objectid);
> if (IS_ERR(dstgroup)) {
> ret = PTR_ERR(dstgroup);
> - goto unlock;
> + goto dirty;
> }
>
> if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
> @@ -2950,21 +2951,13 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
> dstgroup->max_excl = inherit->lim.max_excl;
> dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
> dstgroup->rsv_excl = inherit->lim.rsv_excl;
> -
> - ret = update_qgroup_limit_item(trans, dstgroup);
> - if (ret) {
> - qgroup_mark_inconsistent(fs_info);
> - btrfs_info(fs_info,
> - "unable to update quota limit for %llu",
> - dstgroup->qgroupid);
> - goto unlock;
> - }
> + dirty_dstgrp = true;
> }
>
> if (srcid) {
> srcgroup = find_qgroup_rb(fs_info, srcid);
> if (!srcgroup)
> - goto unlock;
> + goto dirty;
>
> /*
> * We call inherit after we clone the root in order to make sure
> @@ -2985,20 +2978,20 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
> dstgroup->max_excl = srcgroup->max_excl;
> dstgroup->rsv_rfer = srcgroup->rsv_rfer;
> dstgroup->rsv_excl = srcgroup->rsv_excl;
> + dirty_dstgrp = true;
>
> - qgroup_dirty(fs_info, dstgroup);
> qgroup_dirty(fs_info, srcgroup);
> }
>
> if (!inherit)
> - goto unlock;
> + goto dirty;
>
> i_qgroups = (u64 *)(inherit + 1);
> for (i = 0; i < inherit->num_qgroups; ++i) {
> if (*i_qgroups) {
> ret = add_relation_rb(fs_info, objectid, *i_qgroups);
> if (ret)
> - goto unlock;
> + goto dirty;
> }
> ++i_qgroups;
>
> @@ -3022,7 +3015,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
>
> if (!src || !dst) {
> ret = -EINVAL;
> - goto unlock;
> + goto dirty;
> }
>
> dst->rfer = src->rfer - level_size;
> @@ -3043,15 +3036,17 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
>
> if (!src || !dst) {
> ret = -EINVAL;
> - goto unlock;
> + goto dirty;
> }
>
> dst->excl = src->excl + level_size;
> dst->excl_cmpr = src->excl_cmpr + level_size;
> need_rescan = true;
> }
> +dirty:
> + if (dirty_dstgrp)
> + qgroup_dirty(fs_info, dstgroup);
>
> -unlock:
> spin_unlock(&fs_info->qgroup_lock);
> if (!ret)
> ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup);