2023-07-14 07:26:36

by xiaoshoukui

[permalink] [raw]
Subject: [PATCH] btrfs: fix balance_ctl not free properly in btrfs_balance

btrfs: fix balance_ctl not free properly in btrfs_balance

The balance_ctl should be freed after balance finished successfully.
But this is not be true when balance_pause_req > 0 and __btrfs_balance
return with 0. This can lead to unexpected behaviors. Resume balance
with balance_ctl not be free may lead to btrfs_exclop_balance in
btrfs_ioctl_balance assert fail.

Issue the pause balance request bewteen __btrfs_balance return to ret
with 0 and mutex_lock(&fs_info->balance_mutex) can lead to such assert
fail.

Rewrite the normal exit condition of balance will fix the problem.

We can easily reproduce the problem with the following code.

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <linux/btrfs.h>
#include <pthread.h>
#include <errno.h>
#include <dirent.h>
#include <string.h>

static int fd;
static int i;
static int controller = 0;
static int random_delay1 = 0;
static int random_delay2 = 0;
//const static int delta = 10000;


static pthread_t thread_id[10];
static char *path = "/dev/vda";
static char *btrfsmntpt = "/mnt/my_btrfs";


void prepare_random()
{
random_delay1 = rand() % 80;
random_delay2 = rand() % 20;
}

void *thr1(void *arg) {
int ret;
struct btrfs_ioctl_balance_args args;
memset(&args, 0, sizeof(args));
args.flags |= BTRFS_BALANCE_FORCE;
while(!controller);
//usleep(delta);
usleep(random_delay1);
ret = ioctl(fd, BTRFS_IOC_BALANCE_V2, &args);
if (ret != 0) {
printf("Failed to balance %s, errno %d\n", path, errno);
} else {
printf("%s balance successfully\n", path);
}
memset(&args, 0, sizeof(args));
args.flags |= BTRFS_BALANCE_RESUME;
ret = ioctl(fd, BTRFS_IOC_BALANCE_V2, &args);
if (ret != 0) {
printf("Failed to resume balance %s, errno %d\n", path, errno);
} else {
printf("%s resume balance successfully\n", path);
}
return NULL;
}

void *thr2(void *arg) {
int ret;
struct btrfs_ioctl_balance_args args;
memset(&args, 0, sizeof(args));
while(!controller);
usleep(random_delay2);
ret = ioctl(fd, BTRFS_IOC_BALANCE_CTL, 0x1);
if (ret != 0) {
printf("Failed to pause balance %s, errno %d\n", path, errno);
} else {
printf("%s pause balance successfully\n", path);
}
return NULL;
}



int main(int argc, char **argv) {

DIR * dir;
pthread_t th1, th2;
srand(time(NULL));

while(1) {
controller = 0;
//prepare random delay
prepare_random();

dir = opendir(btrfsmntpt);
if (!dir) {
printf("Failed to open btrfs mount point %s, errno %d\n", btrfsmntpt, errno);
return 1;
}
fd = dirfd(dir);
if (fd < 0) {
printf("Failed to get directory stream file descriptor, errno %d\n", errno);
return 1;
}

pthread_create(&th2, NULL, thr2, NULL);
pthread_create(&th1, NULL, thr1, NULL);
// pthread_create(&th2, NULL, closing_thread, &test);

controller = 1; // start

pthread_join(th1, NULL);
pthread_join(th2, NULL);

closedir(dir);
close(fd);
}
return 0;
}


The procedure to reproduce the problem:
1、mount a device to /mnt/my_btrfs;
2、excute the above compiled repro;


The log of my test:
root@syzkaller:/home/xsk# mount btrfs.img /mnt/my_btrfs/
root@syzkaller:/home/xsk# mount
/dev/sda on / type ext4 (rw,relatime)
....
/home/xsk/btrfs.img on /mnt/my_btrfs type btrfs (rw,relatime,discard=async,space_cache,subvolid=5,subvol=/)
root@syzkaller:/home/xsk# ./repro
[ 54.565209][ T4290] BTRFS info (device loop0): balance: start -f
[ 54.566302][ T4290] BTRFS info (device loop0): balance: ended with status: 0
/dev/vda balance successfully
Failed to pause balance /dev/vda, errno 107
Failed to resume balance /dev/vda, errno 107
Failed to pause balance /dev/vda, errno 107
[ 54.654827][ T4292] BTRFS info (device loop0): balance: start -f
[ 54.655679][ T4292] BTRFS info (device loop0): balance: ended with status: 0
/dev/vda balance successfully
Failed to resume balance /dev/vda, errno 107
Failed to pause balance /dev/vda, errno 107
[ 54.727231][ T4294] BTRFS info (device loop0): balance: start -f
[ 54.728795][ T4294] BTRFS info (device loop0): balance: ended with status: 0
/dev/vda balance successfully
Failed to resume balance /dev/vda, errno 107
Failed to pause balance /dev/vda, errno 107
[ 54.801791][ T4296] BTRFS info (device loop0): balance: start -f
[ 54.802566][ T4296] BTRFS info (device loop0): balance: ended with status: 0
/dev/vda balance successfully
Failed to resume balance /dev/vda, errno 107
Failed to pause balance /dev/vda, errno 107
[ 54.876345][ T4298] BTRFS info (device loop0): balance: start -f
[ 54.877201][ T4298] BTRFS info (device loop0): balance: ended with status: 0
......
......
Failed to resume balance /dev/vda, errno 107
Failed to pause balance /dev/vda, errno 107
[ 60.518896][ T4476] BTRFS info (device loop0): balance: start -f
[ 60.519831][ T4476] BTRFS info (device loop0): balance: ended with status: 0
/dev/vda balance successfully
Failed to resume balance /dev/vda, errno 107
Failed to pause balance /dev/vda, errno 107
[ 60.581485][ T4478] BTRFS info (device loop0): balance: start -f
[ 60.582344][ T4478] BTRFS info (device loop0): balance: ended with status: 0
/dev/vda balance successfully
Failed to resume balance /dev/vda, errno 107
Failed to pause balance /dev/vda, errno 107
[ 60.639015][ T4480] BTRFS info (device loop0): balance: start -f
[ 60.639909][ T4480] BTRFS info (device loop0): balance: ended with status: 0
/dev/vda balance successfully
Failed to resume balance /dev/vda, errno 107
Failed to pause balance /dev/vda, errno 107
[ 60.695822][ T4482] BTRFS info (device loop0): balance: start -f
[ 60.696686][ T4482] BTRFS info (device loop0): balance: ended with status: 0
/dev/vda balance successfully
Failed to resume balance /dev/vda, errno 107
[ 60.753212][ T4484] BTRFS info (device loop0): balance: start -f
[ 60.754589][ T4484] BTRFS info (device loop0): balance: ended with status: 0
/dev/vda balance successfully
/dev/vda pause balance successfully
[ 60.776677][ T4484] assertion failed: fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED,
in fs/btrfs/ioctl.c:465
[ 60.800426][ T4484] ------------[ cut here ]------------
[ 60.801145][ T4484] kernel BUG at fs/btrfs/messages.c:259!
[ 60.804667][ T4484] invalid opcode: 0000 [#1] PREEMPT SMP
[ 60.806051][ T4484] CPU: 1 PID: 4484 Comm: repro_bp Not tainted 6.2.0 #34
[ 60.807169][ T4484] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
[ 60.808267][ T4484] RIP: 0010:btrfs_assertfail+0x14/0x16
[ 60.808267][ T4484] Code: e8 36 dc 0f 00 48 8d 65 d0 5b 41 5a 41 5c 41 5d 41 5e 41 5f 5d c3 89 d1 48
89 f2 48 89 fe 48 c7 c7 70 4d 66 83 e8 26 7a f9 ff <0f> 0b 48 c7 c6 98 4d 66 83 e9 f5 fc ff ff 41 56 83 f9 e4 41 89 d6
[ 60.808267][ T4484] RSP: 0018:ffffc90004707df0 EFLAGS: 00000246
[ 60.808267][ T4484] RAX: 0000000000000066 RBX: ffff88811384f301 RCX: 0000000000000000
[ 60.808267][ T4484] RDX: 0000000000000000 RSI: ffffffff8361226e RDI: 00000000ffffffff
[ 60.808267][ T4484] RBP: 00000000c4009420 R08: 00000000ffffffff R09: 695f7366203a6465
[ 60.808267][ T4484] R10: 0000000074726573 R11: 0000000065737361 R12: 00007f38bfdf0dc0
[ 60.808267][ T4484] R13: 0000000000000003 R14: ffff88811384f300 R15: 0000000000000000
[ 60.808267][ T4484] FS: 00007f38bfdf1640(0000) GS:ffff88813bc80000(0000) knlGS:0000000000000000
[ 60.808267][ T4484] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 60.808267][ T4484] CR2: 00007f175952f010 CR3: 0000000114d7b000 CR4: 00000000000006e0
[ 60.808267][ T4484] Call Trace:
[ 60.808267][ T4484] <TASK>
[ 60.808267][ T4484] btrfs_exclop_balance+0x160/0x1b0
[ 60.828329][ T4484] ? btrfs_ioctl_balance+0x1a3/0x4a0
[ 60.828329][ T4484] btrfs_ioctl_balance+0x1b0/0x4a0
[ 60.828329][ T4484] btrfs_ioctl+0xa2c/0xd20
[ 60.828329][ T4484] __x64_sys_ioctl+0x7d/0xa0
[ 60.828329][ T4484] do_syscall_64+0x38/0x80
[ 60.828329][ T4484] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 60.828329][ T4484] RIP: 0033:0x4547ef
[ 60.828329][ T4484] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08
48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00
[ 60.828329][ T4484] RSP: 002b:00007f38bfdf0d40 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[ 60.828329][ T4484] RAX: ffffffffffffffda RBX: 00007f38bfdf1640 RCX: 00000000004547ef
[ 60.828329][ T4484] RDX: 00007f38bfdf0dc0 RSI: 00000000c4009420 RDI: 0000000000000003
[ 60.828329][ T4484] RBP: 00007f38bfdf11d0 R08: 0000000000000000 R09: 0000000000000000
[ 60.837353][ T4484] R10: 00000000004b1008 R11: 0000000000000246 R12: 00007f38bfdf1640
[ 60.837353][ T4484] R13: 0000000000000009 R14: 000000000041b450 R15: 00007f38bf5f1000
[ 60.837353][ T4484] </TASK>
[ 60.837353][ T4484] Modules linked in:
[ 60.842505][ T4484] ---[ end trace 0000000000000000 ]---
[ 60.843117][ T4484] RIP: 0010:btrfs_assertfail+0x14/0x16
[ 60.843596][ T4484] Code: e8 36 dc 0f 00 48 8d 65 d0 5b 41 5a 41 5c 41 5d 41 5e 41 5f 5d c3 89 d1 48
89 f2 48 89 fe 48 c7 c7 70 4d 66 83 e8 26 7a f9 ff <0f> 0b 48 c7 c6 98 4d 66 83 e9 f5 fc ff ff 41 56 83 f9 e4 41 89 d6
[ 60.844983][ T4484] RSP: 0018:ffffc90004707df0 EFLAGS: 00000246
[ 60.845512][ T4484] RAX: 0000000000000066 RBX: ffff88811384f301 RCX: 0000000000000000
[ 60.846172][ T4484] RDX: 0000000000000000 RSI: ffffffff8361226e RDI: 00000000ffffffff
[ 60.846777][ T4484] RBP: 00000000c4009420 R08: 00000000ffffffff R09: 695f7366203a6465
[ 60.847420][ T4484] R10: 0000000074726573 R11: 0000000065737361 R12: 00007f38bfdf0dc0
[ 60.847957][ T4484] R13: 0000000000000003 R14: ffff88811384f300 R15: 0000000000000000
[ 60.849118][ T4484] FS: 00007f38bfdf1640(0000) GS:ffff88813bc80000(0000) knlGS:0000000000000000
[ 60.849760][ T4484] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 60.850214][ T4484] CR2: 00007f175952f010 CR3: 0000000114d7b000 CR4: 00000000000006e0
[ 60.851164][ T4484] Kernel panic - not syncing: Fatal exception
[ 60.853481][ T4484] Kernel Offset: disabled
[ 60.854170][ T4484] Rebooting in 86400 seconds..


Apply the patch, regression tests shows that the problem is fixed.

Signed-off-by: xiaoshoukui <[email protected]>
---
fs/btrfs/volumes.c | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7823168c08a6..c1ab94d8694c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4055,14 +4055,6 @@ static int alloc_profile_is_valid(u64 flags, int extended)
return has_single_bit_set(flags);
}

-static inline int balance_need_close(struct btrfs_fs_info *fs_info)
-{
- /* cancel requested || normal exit path */
- return atomic_read(&fs_info->balance_cancel_req) ||
- (atomic_read(&fs_info->balance_pause_req) == 0 &&
- atomic_read(&fs_info->balance_cancel_req) == 0);
-}
-
/*
* Validate target profile against allowed profiles and return true if it's OK.
* Otherwise print the error message and return false.
@@ -4411,7 +4403,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
}

if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
- balance_need_close(fs_info)) {
+ ret == -ECANCELED || ret == 0) {
reset_balance_state(fs_info);
btrfs_exclop_finish(fs_info);
}
--
2.20.1



2023-07-14 10:15:12

by David Sterba

[permalink] [raw]
Subject: Re: [PATCH] btrfs: fix balance_ctl not free properly in btrfs_balance

On Fri, Jul 14, 2023 at 03:15:48AM -0400, xiaoshoukui wrote:
> Signed-off-by: xiaoshoukui <[email protected]>
> ---
> fs/btrfs/volumes.c | 10 +---------
> 1 file changed, 1 insertion(+), 9 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 7823168c08a6..c1ab94d8694c 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -4055,14 +4055,6 @@ static int alloc_profile_is_valid(u64 flags, int extended)
> return has_single_bit_set(flags);
> }
>
> -static inline int balance_need_close(struct btrfs_fs_info *fs_info)
> -{
> - /* cancel requested || normal exit path */
> - return atomic_read(&fs_info->balance_cancel_req) ||
> - (atomic_read(&fs_info->balance_pause_req) == 0 &&
> - atomic_read(&fs_info->balance_cancel_req) == 0);
> -}
> -
> /*
> * Validate target profile against allowed profiles and return true if it's OK.
> * Otherwise print the error message and return false.
> @@ -4411,7 +4403,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
> }
>
> if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
> - balance_need_close(fs_info)) {
> + ret == -ECANCELED || ret == 0) {
> reset_balance_state(fs_info);
> btrfs_exclop_finish(fs_info);

This is a similar patch to what Josef sent but not exactly the same,

https://lore.kernel.org/linux-btrfs/9cdf58c2f045863e98a52d7f9d5102ba12b87f07.1687496547.git.josef@toxicpanda.com/

Both remove balance_need_close but your version does not track the
paused state. I haven't analyzed it closer, but it looks like you're
missing some case. Josef's fix simplifies the error handling so we don't
have te enumerate the errors.

As you have a reproducer, can you please try it with this patch instead?
It's possible that there are still some unhandled states so it would be
good to check. Thanks.

2023-07-26 03:24:53

by xiaoshoukui

[permalink] [raw]
Subject: Re: [PATCH] btrfs: fix balance_ctl not free properly in btrfs_balance

> This is a similar patch to what Josef sent but not exactly the same,
> https://lore.kernel.org/linux-btrfs/9cdf58c2f045863e98a52d7f9d5102ba12b87f07.1687496547.git.josef@toxicpanda.com/
> Both remove balance_need_close but your version does not track the
> paused state. I haven't analyzed it closer, but it looks like you're
> missing some case. Josef's fix simplifies the error handling so we don't
> have te enumerate the errors.

yeah. I think the fix logic is similar.

> As you have a reproducer, can you please try it with this patch instead?
> It's possible that there are still some unhandled states so it would be
> good to check. Thanks.

With Josef patch my fuzz reproducer ran for three hours without tripping panic.
However, based on the test results, it was found that the fix was not complete.

The above patch only fixes the problem that the balance_ctl is not freed properly,
but does not solve the problem that the pause ioctl request returns an incorrect
value 0 to the user.

Issue a pause or cancel IOCTL request after judging that there is no pause or
cancel request on the path of __btrfs_balance to return 0, which will mislead
the user that the pause and cancel requests are successful.In fact, the balance
request has not been paused or canceled.

> while (1) {
> if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
> atomic_read(&fs_info->balance_cancel_req)) {
> ret = -ECANCELED;
> goto error;
> }
> --------------------
> ....... issue a pause or cancel req in anthoer thead
> --------------------
> return ret; --//return ret with 0

> [ 60.753212][ T4484] BTRFS info (device loop0): balance: start -f
> [ 60.754589][ T4484] BTRFS info (device loop0): balance: ended with status: 0
> /dev/vda balance successfully
> /dev/vda pause balance successfully --//should fail with invalid.

This should indicate that the pause ioctl fail with invalid request.
With my new patch,the testing result show that both the problems are fixed.

The log of my test:
> [ 109.371116][ T4449] BTRFS info (device loop0): balance: start -f
> [ 109.382745][ T4449] BTRFS info (device loop0): balance: ended with status: 0
> /dev/vda balance successfully
> Failed to pause balance /dev/vda, errno 22 --//fail with invalid.
> Failed to resume balance /dev/vda, errno 107 --//didn't trip assert panic
> close btrfs

Signed-off-by: xiaoshoukui <[email protected]>
---
fs/btrfs/fs.h | 6 ++++++
fs/btrfs/volumes.c | 14 +++++++++-----
2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 203d2a267828..6c85279d0e76 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -92,6 +92,12 @@ enum {
* main phase. The fs_info::balance_ctl is initialized.
*/
BTRFS_FS_BALANCE_RUNNING,
+
+ /* Indicate that balance has been paused. */
+ BTRFS_FS_BALANCE_PAUSED,
+
+ /* Indicate that balance has been canceled. */
+ BTRFS_FS_BALANCE_CANCELED,

/*
* Indicate that relocation of a chunk has started, it's set per chunk
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 70d69d4b44d2..8e759e7ebdd6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4267,7 +4267,6 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
u64 num_devices;
unsigned seq;
bool reducing_redundancy;
- bool paused = false;
int i;

if (btrfs_fs_closing(fs_info) ||
@@ -4390,6 +4389,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
describe_balance_start_or_resume(fs_info);
+ clear_bit(BTRFS_FS_BALANCE_PAUSED, &fs_info->flags);
+ clear_bit(BTRFS_FS_BALANCE_CANCELED, &fs_info->flags);
mutex_unlock(&fs_info->balance_mutex);

ret = __btrfs_balance(fs_info);
@@ -4398,7 +4399,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
btrfs_info(fs_info, "balance: paused");
btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED);
- paused = true;
+ set_bit(BTRFS_FS_BALANCE_PAUSED, &fs_info->flags);
}
/*
* Balance can be canceled by:
@@ -4415,8 +4416,10 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
*
* So here we only check the return value to catch canceled balance.
*/
- else if (ret == -ECANCELED || ret == -EINTR)
+ else if (ret == -ECANCELED || ret == -EINTR) {
btrfs_info(fs_info, "balance: canceled");
+ set_bit(BTRFS_FS_BALANCE_CANCELED, &fs_info->flags);
+ }
else
btrfs_info(fs_info, "balance: ended with status: %d", ret);

@@ -4428,7 +4431,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
}

/* We didn't pause, we can clean everything up. */
- if (!paused) {
+ if (!test_bit(BTRFS_FS_BALANCE_PAUSED, &fs_info->flags)) {
reset_balance_state(fs_info);
btrfs_exclop_finish(fs_info);
}
@@ -4587,6 +4590,7 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
/* we are good with balance_ctl ripped off from under us */
BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_pause_req);
+ ret = test_bit(BTRFS_FS_BALANCE_PAUSED, &fs_info->flags) ? 0 : -EINVAL;
} else {
ret = -ENOTCONN;
}
@@ -4642,7 +4646,7 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_cancel_req);
mutex_unlock(&fs_info->balance_mutex);
- return 0;
+ return test_bit(BTRFS_FS_BALANCE_CANCELED, &fs_info->flags) ? 0 : -EINVAL;
}

int btrfs_uuid_scan_kthread(void *data)
--
2.34.1