2021-10-29 09:30:48

by yebin (H)

[permalink] [raw]
Subject: [PATCH -next v3 0/2] Fix hungtask when nbd_config_put

Ye Bin (2):
nbd: Fix incorrect error handle when first_minor big than '0xff' in
nbd_dev_add
nbd: Fix hungtask when nbd_config_put

drivers/block/nbd.c | 30 ++++++++++++++----------------
1 file changed, 14 insertions(+), 16 deletions(-)

--
2.31.1


2021-10-29 09:31:38

by yebin (H)

[permalink] [raw]
Subject: [PATCH -next v3 2/2] nbd: Fix hungtask when nbd_config_put

I got follow issue:
[ 247.381177] INFO: task kworker/u10:0:47 blocked for more than 120 seconds.
[ 247.382644] Not tainted 4.19.90-dirty #140
[ 247.383502] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 247.385027] Call Trace:
[ 247.388384] schedule+0xb8/0x3c0
[ 247.388966] schedule_timeout+0x2b4/0x380
[ 247.392815] wait_for_completion+0x367/0x510
[ 247.397713] flush_workqueue+0x32b/0x1340
[ 247.402700] drain_workqueue+0xda/0x3c0
[ 247.403442] destroy_workqueue+0x7b/0x690
[ 247.405014] nbd_config_put.cold+0x2f9/0x5b6
[ 247.405823] recv_work+0x1fd/0x2b0
[ 247.406485] process_one_work+0x70b/0x1610
[ 247.407262] worker_thread+0x5a9/0x1060
[ 247.408699] kthread+0x35e/0x430
[ 247.410918] ret_from_fork+0x1f/0x30

We can reprodeuce issue as follows:
1. Inject memory fault in nbd_start_device
-1244,10 +1248,18 @@ static int nbd_start_device(struct nbd_device *nbd)
nbd_dev_dbg_init(nbd);
for (i = 0; i < num_connections; i++) {
struct recv_thread_args *args;
-
- args = kzalloc(sizeof(*args), GFP_KERNEL);
+
+ if (i == 1) {
+ args = NULL;
+ printk("%s: inject malloc error\n", __func__);
+ }
+ else
+ args = kzalloc(sizeof(*args), GFP_KERNEL);
2. Inject delay in recv_work
-757,6 +760,8 @@ static void recv_work(struct work_struct *work)

blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
}
+ printk("%s: comm=%s pid=%d\n", __func__, current->comm, current->pid);
+ mdelay(5 * 1000);
nbd_config_put(nbd);
atomic_dec(&config->recv_threads);
wake_up(&config->recv_wq);
3. Create nbd server
nbd-server 8000 /tmp/disk
4. Create nbd client
nbd-client localhost 8000 /dev/nbd1
Then will trigger above issue.

Reason is when add delay in recv_work, lead to relase the last reference
of 'nbd->config_refs'. nbd_config_put will call flush_workqueue to make
all work finish. Obviously, it will lead to deadloop.
To solve this issue, according to Josef's suggestion move 'recv_work'
init from start device to nbd_dev_add, then destory 'recv_work'when
nbd device teardown.

Signed-off-by: Ye Bin <[email protected]>
---
drivers/block/nbd.c | 30 ++++++++++++++----------------
1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 096883ab9b76..c9a65a260668 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1314,10 +1314,6 @@ static void nbd_config_put(struct nbd_device *nbd)
kfree(nbd->config);
nbd->config = NULL;

- if (nbd->recv_workq)
- destroy_workqueue(nbd->recv_workq);
- nbd->recv_workq = NULL;
-
nbd->tag_set.timeout = 0;
nbd->disk->queue->limits.discard_granularity = 0;
nbd->disk->queue->limits.discard_alignment = 0;
@@ -1346,14 +1342,6 @@ static int nbd_start_device(struct nbd_device *nbd)
return -EINVAL;
}

- nbd->recv_workq = alloc_workqueue("knbd%d-recv",
- WQ_MEM_RECLAIM | WQ_HIGHPRI |
- WQ_UNBOUND, 0, nbd->index);
- if (!nbd->recv_workq) {
- dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
- return -ENOMEM;
- }
-
blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
nbd->pid = task_pid_nr(current);

@@ -1779,6 +1767,15 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
}
nbd->disk = disk;

+ nbd->recv_workq = alloc_workqueue("nbd%d-recv",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI |
+ WQ_UNBOUND, 0, nbd->index);
+ if (!nbd->recv_workq) {
+ dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
+ err = -ENOMEM;
+ goto out_err_disk;
+ }
+
/*
* Tell the block layer that we are not a rotational device
*/
@@ -1809,7 +1806,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
disk->first_minor = index << part_shift;
if (disk->first_minor > 0xff) {
err = -EINVAL;
- goto out_err_disk;
+ goto out_free_work;
}

disk->minors = 1 << part_shift;
@@ -1818,7 +1815,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
sprintf(disk->disk_name, "nbd%d", index);
err = add_disk(disk);
if (err)
- goto out_err_disk;
+ goto out_free_work;

/*
* Now publish the device.
@@ -1827,6 +1824,8 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
nbd_total_devices++;
return nbd;

+out_free_work:
+ destroy_workqueue(nbd->recv_workq);
out_err_disk:
blk_cleanup_disk(disk);
out_free_idr:
@@ -2087,8 +2086,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
* queue. And this also ensure that we can safely call nbd_clear_que()
* to cancel the inflight I/Os.
*/
- if (nbd->recv_workq)
- flush_workqueue(nbd->recv_workq);
+ flush_workqueue(nbd->recv_workq);
nbd_clear_que(nbd);
nbd->task_setup = NULL;
mutex_unlock(&nbd->config_lock);
--
2.31.1

2021-10-29 09:32:37

by yebin (H)

[permalink] [raw]
Subject: [PATCH -next v3 1/2] nbd: Fix incorrect error handle when first_minor big than '0xff' in nbd_dev_add

If first_minor big than '0xff' goto out_free_idr label, this will miss
cleanup disk.

Fixes: b1a811633f73 ("block: nbd: add sanity check for first_minor")
Signed-off-by: Ye Bin <[email protected]>
---
drivers/block/nbd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index b47b2a87ae8f..096883ab9b76 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1809,7 +1809,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
disk->first_minor = index << part_shift;
if (disk->first_minor > 0xff) {
err = -EINVAL;
- goto out_free_idr;
+ goto out_err_disk;
}

disk->minors = 1 << part_shift;
--
2.31.1

2021-10-29 14:35:40

by Josef Bacik

[permalink] [raw]
Subject: Re: [PATCH -next v3 1/2] nbd: Fix incorrect error handle when first_minor big than '0xff' in nbd_dev_add

On Fri, Oct 29, 2021 at 05:42:27PM +0800, Ye Bin wrote:
> If first_minor big than '0xff' goto out_free_idr label, this will miss
> cleanup disk.
>
> Fixes: b1a811633f73 ("block: nbd: add sanity check for first_minor")
> Signed-off-by: Ye Bin <[email protected]>

Reviewed-by: Josef Bacik <[email protected]>

Thanks,

Josef

2021-10-29 14:40:31

by Josef Bacik

[permalink] [raw]
Subject: Re: [PATCH -next v3 2/2] nbd: Fix hungtask when nbd_config_put

On Fri, Oct 29, 2021 at 05:42:28PM +0800, Ye Bin wrote:
> I got follow issue:
> [ 247.381177] INFO: task kworker/u10:0:47 blocked for more than 120 seconds.
> [ 247.382644] Not tainted 4.19.90-dirty #140
> [ 247.383502] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [ 247.385027] Call Trace:
> [ 247.388384] schedule+0xb8/0x3c0
> [ 247.388966] schedule_timeout+0x2b4/0x380
> [ 247.392815] wait_for_completion+0x367/0x510
> [ 247.397713] flush_workqueue+0x32b/0x1340
> [ 247.402700] drain_workqueue+0xda/0x3c0
> [ 247.403442] destroy_workqueue+0x7b/0x690
> [ 247.405014] nbd_config_put.cold+0x2f9/0x5b6
> [ 247.405823] recv_work+0x1fd/0x2b0
> [ 247.406485] process_one_work+0x70b/0x1610
> [ 247.407262] worker_thread+0x5a9/0x1060
> [ 247.408699] kthread+0x35e/0x430
> [ 247.410918] ret_from_fork+0x1f/0x30
>
> We can reprodeuce issue as follows:

"reproduce"

> 1. Inject memory fault in nbd_start_device
> -1244,10 +1248,18 @@ static int nbd_start_device(struct nbd_device *nbd)
> nbd_dev_dbg_init(nbd);
> for (i = 0; i < num_connections; i++) {
> struct recv_thread_args *args;
> -
> - args = kzalloc(sizeof(*args), GFP_KERNEL);
> +
> + if (i == 1) {
> + args = NULL;
> + printk("%s: inject malloc error\n", __func__);
> + }
> + else
> + args = kzalloc(sizeof(*args), GFP_KERNEL);
> 2. Inject delay in recv_work
> -757,6 +760,8 @@ static void recv_work(struct work_struct *work)
>
> blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
> }
> + printk("%s: comm=%s pid=%d\n", __func__, current->comm, current->pid);
> + mdelay(5 * 1000);
> nbd_config_put(nbd);
> atomic_dec(&config->recv_threads);
> wake_up(&config->recv_wq);
> 3. Create nbd server
> nbd-server 8000 /tmp/disk
> 4. Create nbd client
> nbd-client localhost 8000 /dev/nbd1
> Then will trigger above issue.
>
> Reason is when add delay in recv_work, lead to relase the last reference

"release"

> of 'nbd->config_refs'. nbd_config_put will call flush_workqueue to make
> all work finish. Obviously, it will lead to deadloop.
> To solve this issue, according to Josef's suggestion move 'recv_work'
> init from start device to nbd_dev_add, then destory 'recv_work'when

"destroy"

> nbd device teardown.
>
> Signed-off-by: Ye Bin <[email protected]>
> ---
> drivers/block/nbd.c | 30 ++++++++++++++----------------
> 1 file changed, 14 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 096883ab9b76..c9a65a260668 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -1314,10 +1314,6 @@ static void nbd_config_put(struct nbd_device *nbd)
> kfree(nbd->config);
> nbd->config = NULL;
>
> - if (nbd->recv_workq)
> - destroy_workqueue(nbd->recv_workq);
> - nbd->recv_workq = NULL;
> -
> nbd->tag_set.timeout = 0;
> nbd->disk->queue->limits.discard_granularity = 0;
> nbd->disk->queue->limits.discard_alignment = 0;
> @@ -1346,14 +1342,6 @@ static int nbd_start_device(struct nbd_device *nbd)
> return -EINVAL;
> }
>
> - nbd->recv_workq = alloc_workqueue("knbd%d-recv",
> - WQ_MEM_RECLAIM | WQ_HIGHPRI |
> - WQ_UNBOUND, 0, nbd->index);
> - if (!nbd->recv_workq) {
> - dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
> - return -ENOMEM;
> - }
> -
> blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
> nbd->pid = task_pid_nr(current);
>
> @@ -1779,6 +1767,15 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
> }
> nbd->disk = disk;
>
> + nbd->recv_workq = alloc_workqueue("nbd%d-recv",
> + WQ_MEM_RECLAIM | WQ_HIGHPRI |
> + WQ_UNBOUND, 0, nbd->index);
> + if (!nbd->recv_workq) {
> + dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
> + err = -ENOMEM;
> + goto out_err_disk;
> + }
> +

You never free this up, you need to add a destroy_workqueue(nbd->rsv_workq) to
nbd_dev_remove().

> /*
> * Tell the block layer that we are not a rotational device
> */
> @@ -1809,7 +1806,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
> disk->first_minor = index << part_shift;
> if (disk->first_minor > 0xff) {
> err = -EINVAL;
> - goto out_err_disk;
> + goto out_free_work;
> }
>
> disk->minors = 1 << part_shift;
> @@ -1818,7 +1815,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
> sprintf(disk->disk_name, "nbd%d", index);
> err = add_disk(disk);
> if (err)
> - goto out_err_disk;
> + goto out_free_work;
>
> /*
> * Now publish the device.
> @@ -1827,6 +1824,8 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
> nbd_total_devices++;
> return nbd;
>
> +out_free_work:
> + destroy_workqueue(nbd->recv_workq);
> out_err_disk:
> blk_cleanup_disk(disk);
> out_free_idr:
> @@ -2087,8 +2086,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
> * queue. And this also ensure that we can safely call nbd_clear_que()
> * to cancel the inflight I/Os.
> */
> - if (nbd->recv_workq)
> - flush_workqueue(nbd->recv_workq);
> + flush_workqueue(nbd->recv_workq);

The comment above this part needs to be updated, as we no longer have this
problem. Thanks,

Josef