LinuxLists.cc - [PATCH] loop: avoid EAGAIN, if offset or block

2019-05-18 00:54:27

Subject: [PATCH] loop: avoid EAGAIN, if offset or block_size are changed

This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying
to drop stale pages resulting in wrong data access.

Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38

Cc: <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: [email protected]
Cc: Bart Van Assche <[email protected]>
Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed")
Reported-by: Gwendal Grignou <[email protected]>
Reported-by: grygorii tertychnyi <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
drivers/block/loop.c | 44 +++++++++++++++++---------------------------
1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 102d79575895..7c7d2d9c47d0 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1212,6 +1212,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
kuid_t uid = current_uid();
struct block_device *bdev;
bool partscan = false;
+ bool drop_caches = false;

err = mutex_lock_killable(&loop_ctl_mutex);
if (err)
@@ -1232,10 +1233,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
}

if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit) {
- sync_blockdev(lo->lo_device);
- kill_bdev(lo->lo_device);
- }
+ lo->lo_sizelimit != info->lo_sizelimit)
+ drop_caches = true;

/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
@@ -1265,14 +1264,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)

if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
goto out_unfreeze;
@@ -1317,6 +1308,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
bdev = lo->lo_device;
partscan = true;
}
+
+ /* truncate stale pages cached by previous operations */
+ if (!err && drop_caches) {
+ sync_blockdev(lo->lo_device);
+ kill_bdev(lo->lo_device);
+ }
out_unlock:
mutex_unlock(&loop_ctl_mutex);
if (partscan)
@@ -1498,6 +1495,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)

static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
{
+ bool drop_caches = false;
int err = 0;

if (lo->lo_state != Lo_bound)
@@ -1506,23 +1504,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
return -EINVAL;

- if (lo->lo_queue->limits.logical_block_size != arg) {
- sync_blockdev(lo->lo_device);
- kill_bdev(lo->lo_device);
- }
+ if (lo->lo_queue->limits.logical_block_size != arg)
+ drop_caches = true;

blk_mq_freeze_queue(lo->lo_queue);
-
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_queue->limits.logical_block_size != arg &&
- lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
blk_queue_logical_block_size(lo->lo_queue, arg);
blk_queue_physical_block_size(lo->lo_queue, arg);
blk_queue_io_min(lo->lo_queue, arg);
@@ -1530,6 +1515,11 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);

+ /* truncate stale pages cached by previous operations */
+ if (drop_caches) {
+ sync_blockdev(lo->lo_device);
+ kill_bdev(lo->lo_device);
+ }
return err;
}

--
2.19.0.605.g01d371f741-goog

2019-05-18 02:09:57

by Jaegeuk Kim

[permalink] [raw]

Subject: Re: [PATCH v2] loop: avoid EAGAIN, if offset or block_size are changed

This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying
to drop stale pages resulting in wrong data access.

Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38

Cc: <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: [email protected]
Cc: Bart Van Assche <[email protected]>
Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed")
Reported-by: Gwendal Grignou <[email protected]>
Reported-by: grygorii tertychnyi <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
v2 from v1:
- remove obsolete jump

drivers/block/loop.c | 45 +++++++++++++++++---------------------------
1 file changed, 17 insertions(+), 28 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 102d79575895..42994de2dd12 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1212,6 +1212,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
kuid_t uid = current_uid();
struct block_device *bdev;
bool partscan = false;
+ bool drop_caches = false;

err = mutex_lock_killable(&loop_ctl_mutex);
if (err)
@@ -1232,10 +1233,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
}

if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit) {
- sync_blockdev(lo->lo_device);
- kill_bdev(lo->lo_device);
- }
+ lo->lo_sizelimit != info->lo_sizelimit)
+ drop_caches = true;

/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
@@ -1265,14 +1264,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)

if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
goto out_unfreeze;
@@ -1317,6 +1308,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
bdev = lo->lo_device;
partscan = true;
}
+
+ /* truncate stale pages cached by previous operations */
+ if (!err && drop_caches) {
+ sync_blockdev(lo->lo_device);
+ kill_bdev(lo->lo_device);
+ }
out_unlock:
mutex_unlock(&loop_ctl_mutex);
if (partscan)
@@ -1498,6 +1495,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)

static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
{
+ bool drop_caches = false;
int err = 0;

if (lo->lo_state != Lo_bound)
@@ -1506,30 +1504,21 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
return -EINVAL;

- if (lo->lo_queue->limits.logical_block_size != arg) {
- sync_blockdev(lo->lo_device);
- kill_bdev(lo->lo_device);
- }
+ if (lo->lo_queue->limits.logical_block_size != arg)
+ drop_caches = true;

blk_mq_freeze_queue(lo->lo_queue);
-
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_queue->limits.logical_block_size != arg &&
- lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
blk_queue_logical_block_size(lo->lo_queue, arg);
blk_queue_physical_block_size(lo->lo_queue, arg);
blk_queue_io_min(lo->lo_queue, arg);
loop_update_dio(lo);
-out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);

+ /* truncate stale pages cached by previous operations */
+ if (drop_caches) {
+ sync_blockdev(lo->lo_device);
+ kill_bdev(lo->lo_device);
+ }
return err;
}

--
2.19.0.605.g01d371f741-goog

2019-06-17 21:11:03

by Jaegeuk Kim

[permalink] [raw]

Subject: Re: [f2fs-dev] [PATCH v2] loop: avoid EAGAIN, if offset or block_size are changed

Jens,

Any chance to get a review for this?

(Added Tested-by:)

On 05/17, Jaegeuk Kim wrote:
> This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying
> to drop stale pages resulting in wrong data access.
>
> Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
>
> Cc: <[email protected]>
> Cc: Jens Axboe <[email protected]>
> Cc: [email protected]
> Cc: Bart Van Assche <[email protected]>
> Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed")
> Reported-by: Gwendal Grignou <[email protected]>
> Reported-by: grygorii tertychnyi <[email protected]>

Tested-by: Francesco Ruggeri <[email protected]>

> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> v2 from v1:
> - remove obsolete jump
>
> drivers/block/loop.c | 45 +++++++++++++++++---------------------------
> 1 file changed, 17 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 102d79575895..42994de2dd12 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1212,6 +1212,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> kuid_t uid = current_uid();
> struct block_device *bdev;
> bool partscan = false;
> + bool drop_caches = false;
>
> err = mutex_lock_killable(&loop_ctl_mutex);
> if (err)
> @@ -1232,10 +1233,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> }
>
> if (lo->lo_offset != info->lo_offset ||
> - lo->lo_sizelimit != info->lo_sizelimit) {
> - sync_blockdev(lo->lo_device);
> - kill_bdev(lo->lo_device);
> - }
> + lo->lo_sizelimit != info->lo_sizelimit)
> + drop_caches = true;
>
> /* I/O need to be drained during transfer transition */
> blk_mq_freeze_queue(lo->lo_queue);
> @@ -1265,14 +1264,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
>
> if (lo->lo_offset != info->lo_offset ||
> lo->lo_sizelimit != info->lo_sizelimit) {
> - /* kill_bdev should have truncated all the pages */
> - if (lo->lo_device->bd_inode->i_mapping->nrpages) {
> - err = -EAGAIN;
> - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
> - __func__, lo->lo_number, lo->lo_file_name,
> - lo->lo_device->bd_inode->i_mapping->nrpages);
> - goto out_unfreeze;
> - }
> if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
> err = -EFBIG;
> goto out_unfreeze;
> @@ -1317,6 +1308,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> bdev = lo->lo_device;
> partscan = true;
> }
> +
> + /* truncate stale pages cached by previous operations */
> + if (!err && drop_caches) {
> + sync_blockdev(lo->lo_device);
> + kill_bdev(lo->lo_device);
> + }
> out_unlock:
> mutex_unlock(&loop_ctl_mutex);
> if (partscan)
> @@ -1498,6 +1495,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
>
> static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
> {
> + bool drop_caches = false;
> int err = 0;
>
> if (lo->lo_state != Lo_bound)
> @@ -1506,30 +1504,21 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
> if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
> return -EINVAL;
>
> - if (lo->lo_queue->limits.logical_block_size != arg) {
> - sync_blockdev(lo->lo_device);
> - kill_bdev(lo->lo_device);
> - }
> + if (lo->lo_queue->limits.logical_block_size != arg)
> + drop_caches = true;
>
> blk_mq_freeze_queue(lo->lo_queue);
> -
> - /* kill_bdev should have truncated all the pages */
> - if (lo->lo_queue->limits.logical_block_size != arg &&
> - lo->lo_device->bd_inode->i_mapping->nrpages) {
> - err = -EAGAIN;
> - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
> - __func__, lo->lo_number, lo->lo_file_name,
> - lo->lo_device->bd_inode->i_mapping->nrpages);
> - goto out_unfreeze;
> - }
> -
> blk_queue_logical_block_size(lo->lo_queue, arg);
> blk_queue_physical_block_size(lo->lo_queue, arg);
> blk_queue_io_min(lo->lo_queue, arg);
> loop_update_dio(lo);
> -out_unfreeze:
> blk_mq_unfreeze_queue(lo->lo_queue);
>
> + /* truncate stale pages cached by previous operations */
> + if (drop_caches) {
> + sync_blockdev(lo->lo_device);
> + kill_bdev(lo->lo_device);
> + }
> return err;
> }
>
> --
> 2.19.0.605.g01d371f741-goog
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2019-11-18 18:38:49

by Andrew Norrie

[permalink] [raw]

Subject: Re: loop: avoid EAGAIN, if offset or block_size are changed

Any chance to get a review on this patch?

We're running Singularity containers in an mpi environment where multiple
concurrent container image loop mounts occur and we hit this issue as reported
to Sylabs by us here:

https://github.com/sylabs/singularity/issues/3860

It is affecting our production.
This email and any accompanying attachments are confidential. If you received this email by mistake, please delete
it from your system. Any review, disclosure, copying, distribution, or use of the email by others is strictly prohibited.

2019-11-19 04:02:32

by Greg Kroah-Hartman

[permalink] [raw]

Subject: Re: loop: avoid EAGAIN, if offset or block_size are changed

On Mon, Nov 18, 2019 at 11:36:16AM -0700, Andrew Norrie wrote:
> This email and any accompanying attachments are confidential. If you received this email by mistake, please delete
> it from your system. Any review, disclosure, copying, distribution, or use of the email by others is strictly prohibited.

Now deleted. This is not compatible with Linux mailing lists, sorry.

2019-11-19 23:42:44

by Bart Van Assche

[permalink] [raw]

Subject: Re: [PATCH v2] loop: avoid EAGAIN, if offset or block_size are changed

On 5/17/19 5:53 PM, Jaegeuk Kim wrote:
> This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying
> to drop stale pages resulting in wrong data access.
>
> Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38

Please provide a more detailed commit description. What is wrong with
the current implementation and why is the new behavior considered the
correct behavior?

This patch moves draining code from before the following comment to
after that comment:

/* I/O need to be drained during transfer transition */

Is that comment still correct or should it perhaps be updated?

Thanks,

Bart.

2019-11-25 18:56:18

by Jaegeuk Kim

[permalink] [raw]

Subject: Re: [PATCH v2] loop: avoid EAGAIN, if offset or block_size are changed

On 11/19, Bart Van Assche wrote:
> On 5/17/19 5:53 PM, Jaegeuk Kim wrote:
> > This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying
> > to drop stale pages resulting in wrong data access.
> >
> > Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
>
> Please provide a more detailed commit description. What is wrong with the
> current implementation and why is the new behavior considered the correct
> behavior?

Some history would be:

Original bug fix is:
commit 5db470e229e2 ("loop: drop caches if offset or block_size are changed"),
which returns EAGAIN so that user land like Chrome would require enhancing their
error handling routines.

So, this patch tries to avoid EAGAIN while addressing the original bug.

>
> This patch moves draining code from before the following comment to after
> that comment:
>
> /* I/O need to be drained during transfer transition */
>
> Is that comment still correct or should it perhaps be updated?

IMHO, it's still valid.

>
> Thanks,
>

> Bart.

2019-11-25 18:56:33

by Bart Van Assche

[permalink] [raw]

Subject: Re: [PATCH v2] loop: avoid EAGAIN, if offset or block_size are changed

On 11/25/19 9:59 AM, Jaegeuk Kim wrote:
> On 11/19, Bart Van Assche wrote:
>> On 5/17/19 5:53 PM, Jaegeuk Kim wrote:
>>> This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying
>>> to drop stale pages resulting in wrong data access.
>>>
>>> Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
>>
>> Please provide a more detailed commit description. What is wrong with the
>> current implementation and why is the new behavior considered the correct
>> behavior?
>
> Some history would be:
>
> Original bug fix is:
> commit 5db470e229e2 ("loop: drop caches if offset or block_size are changed"),
> which returns EAGAIN so that user land like Chrome would require enhancing their
> error handling routines.
>
> So, this patch tries to avoid EAGAIN while addressing the original bug.
>
>>
>> This patch moves draining code from before the following comment to after
>> that comment:
>>
>> /* I/O need to be drained during transfer transition */
>>
>> Is that comment still correct or should it perhaps be updated?
>
> IMHO, it's still valid.

Hi Jaegeuk,

Thank you for the additional and very helpful clarification. Can you have a look at the (totally untested) patch below? I prefer that version because it prevents concurrent processing of requests and syncing/killing the bdev.

Thanks,

Bart.

Subject: [PATCH] loop: Avoid EAGAIN if offset or block_size are changed

After sync_blockdev() and kill_bdev() have been called, more requests
can be submitted to the loop device. These requests dirty additional
pages, causing loop_set_status() to return -EAGAIN. Not all user space
code that changes the offset and/or the block size handles -EAGAIN
correctly. Hence make sure that loop_set_status() does not return
-EAGAIN.

Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed")
Reported-by: Gwendal Grignou <[email protected]>
Reported-by: grygorii tertychnyi <[email protected]>
Reported-by: Jaegeuk Kim <[email protected]>
Cc: <[email protected]>
Signed-off-by: Bart Van Assche <[email protected]>
---
drivers/block/loop.c | 35 +++++++----------------------------
1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 739b372a5112..48cfc8b9c247 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1264,15 +1264,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
goto out_unlock;
}

+ /* I/O need to be drained during transfer transition */
+ blk_mq_freeze_queue(lo->lo_queue);
+
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
}

- /* I/O need to be drained during transfer transition */
- blk_mq_freeze_queue(lo->lo_queue);
-
err = loop_release_xfer(lo);
if (err)
goto out_unfreeze;
@@ -1298,14 +1298,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)

if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
goto out_unfreeze;
@@ -1531,39 +1523,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)

static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
{
- int err = 0;
-
if (lo->lo_state != Lo_bound)
return -ENXIO;

if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
return -EINVAL;

+ blk_mq_freeze_queue(lo->lo_queue);
+
if (lo->lo_queue->limits.logical_block_size != arg) {
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
}
-
- blk_mq_freeze_queue(lo->lo_queue);
-
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_queue->limits.logical_block_size != arg &&
- lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
blk_queue_logical_block_size(lo->lo_queue, arg);
blk_queue_physical_block_size(lo->lo_queue, arg);
blk_queue_io_min(lo->lo_queue, arg);
loop_update_dio(lo);
-out_unfreeze:
+
blk_mq_unfreeze_queue(lo->lo_queue);

- return err;
+ return 0;
}

static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,

2019-11-25 19:26:18

by Jaegeuk Kim

[permalink] [raw]

Subject: Re: [PATCH v2] loop: avoid EAGAIN, if offset or block_size are changed

Hi Bart,

On 11/25, Bart Van Assche wrote:
> On 11/25/19 9:59 AM, Jaegeuk Kim wrote:
> > On 11/19, Bart Van Assche wrote:
> > > On 5/17/19 5:53 PM, Jaegeuk Kim wrote:
> > > > This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying
> > > > to drop stale pages resulting in wrong data access.
> > > >
> > > > Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
> > >
> > > Please provide a more detailed commit description. What is wrong with the
> > > current implementation and why is the new behavior considered the correct
> > > behavior?
> >
> > Some history would be:
> >
> > Original bug fix is:
> > commit 5db470e229e2 ("loop: drop caches if offset or block_size are changed"),
> > which returns EAGAIN so that user land like Chrome would require enhancing their
> > error handling routines.
> >
> > So, this patch tries to avoid EAGAIN while addressing the original bug.
> >
> > >
> > > This patch moves draining code from before the following comment to after
> > > that comment:
> > >
> > > /* I/O need to be drained during transfer transition */
> > >
> > > Is that comment still correct or should it perhaps be updated?
> >
> > IMHO, it's still valid.
>
> Hi Jaegeuk,
>
> Thank you for the additional and very helpful clarification. Can you have a look at the (totally untested) patch below? I prefer that version because it prevents concurrent processing of requests and syncing/killing the bdev.

Yeah, I thought this was much cleaner way, but wasn't sure it could be doable
to sync|kill block device after freezing the queue. Is it okay?

>
> Thanks,
>
> Bart.
>
>
> Subject: [PATCH] loop: Avoid EAGAIN if offset or block_size are changed
>
> After sync_blockdev() and kill_bdev() have been called, more requests
> can be submitted to the loop device. These requests dirty additional
> pages, causing loop_set_status() to return -EAGAIN. Not all user space
> code that changes the offset and/or the block size handles -EAGAIN
> correctly. Hence make sure that loop_set_status() does not return
> -EAGAIN.
>
> Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed")
> Reported-by: Gwendal Grignou <[email protected]>
> Reported-by: grygorii tertychnyi <[email protected]>
> Reported-by: Jaegeuk Kim <[email protected]>
> Cc: <[email protected]>
> Signed-off-by: Bart Van Assche <[email protected]>
> ---
> drivers/block/loop.c | 35 +++++++----------------------------
> 1 file changed, 7 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 739b372a5112..48cfc8b9c247 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1264,15 +1264,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> goto out_unlock;
> }
>
> + /* I/O need to be drained during transfer transition */
> + blk_mq_freeze_queue(lo->lo_queue);
> +
> if (lo->lo_offset != info->lo_offset ||
> lo->lo_sizelimit != info->lo_sizelimit) {
> sync_blockdev(lo->lo_device);
> kill_bdev(lo->lo_device);
> }
>
> - /* I/O need to be drained during transfer transition */
> - blk_mq_freeze_queue(lo->lo_queue);
> -
> err = loop_release_xfer(lo);
> if (err)
> goto out_unfreeze;
> @@ -1298,14 +1298,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
>
> if (lo->lo_offset != info->lo_offset ||
> lo->lo_sizelimit != info->lo_sizelimit) {
> - /* kill_bdev should have truncated all the pages */
> - if (lo->lo_device->bd_inode->i_mapping->nrpages) {
> - err = -EAGAIN;
> - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
> - __func__, lo->lo_number, lo->lo_file_name,
> - lo->lo_device->bd_inode->i_mapping->nrpages);
> - goto out_unfreeze;
> - }
> if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
> err = -EFBIG;
> goto out_unfreeze;
> @@ -1531,39 +1523,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
>
> static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
> {
> - int err = 0;
> -
> if (lo->lo_state != Lo_bound)
> return -ENXIO;
>
> if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
> return -EINVAL;
>
> + blk_mq_freeze_queue(lo->lo_queue);
> +
> if (lo->lo_queue->limits.logical_block_size != arg) {
> sync_blockdev(lo->lo_device);
> kill_bdev(lo->lo_device);
> }
> -
> - blk_mq_freeze_queue(lo->lo_queue);
> -
> - /* kill_bdev should have truncated all the pages */
> - if (lo->lo_queue->limits.logical_block_size != arg &&
> - lo->lo_device->bd_inode->i_mapping->nrpages) {
> - err = -EAGAIN;
> - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
> - __func__, lo->lo_number, lo->lo_file_name,
> - lo->lo_device->bd_inode->i_mapping->nrpages);
> - goto out_unfreeze;
> - }
> -
> blk_queue_logical_block_size(lo->lo_queue, arg);
> blk_queue_physical_block_size(lo->lo_queue, arg);
> blk_queue_io_min(lo->lo_queue, arg);
> loop_update_dio(lo);
> -out_unfreeze:
> +
> blk_mq_unfreeze_queue(lo->lo_queue);
>
> - return err;
> + return 0;
> }
>
> static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,

2019-11-25 19:43:26

by Bart Van Assche

[permalink] [raw]

Subject: Re: [PATCH v2] loop: avoid EAGAIN, if offset or block_size are changed

On 11/25/19 11:22 AM, Jaegeuk Kim wrote:
> On 11/25, Bart Van Assche wrote:
>> Thank you for the additional and very helpful clarification. Can you have a look at the (totally untested) patch below? I prefer that version because it prevents concurrent processing of requests and syncing/killing the bdev.
>
> Yeah, I thought this was much cleaner way, but wasn't sure it could be doable
> to sync|kill block device after freezing the queue. Is it okay?

Hi Jaegeuk,

That patch was based on an incorrect interpretation of the meaning of
lo_device. After having taken another loop at the block driver, I don't
think that calling sync after freezing the queue is OK. How about using
the following call sequence:
* sync_blockdev()
* blk_mq_freeze_queue()
* kill_bdev()

Thanks,

Bart.

2019-11-27 18:19:31

by Jaegeuk Kim

[permalink] [raw]

Subject: Re: [f2fs-dev] [PATCH v3] loop: avoid EAGAIN, if offset or block_size are changed

Previously, there was a bug where user could see stale buffer cache (e.g, 512B)
attached in the 4KB-sized pager cache, when the block size was changed from
512B to 4KB. That was fixed by:
commit 5db470e229e2 ("loop: drop caches if offset or block_size are changed")

But, there were some regression reports saying the fix returns EAGAIN easily.
So, this patch removes previously added EAGAIN condition, nrpages != 0.

Instead, it changes the flow like this:
- sync_blockdev()
- blk_mq_freeze_queue()
: change the loop configuration
- blk_mq_unfreeze_queue()
- sync_blockdev()
- invalidate_bdev()

After invalidating the buffer cache, we must see the full valid 4KB page.

Additional concern came from Bart in which we can lose some data when
changing the lo_offset. In that case, this patch adds:
- sync_blockdev()
- blk_set_queue_dying
- blk_mq_freeze_queue()
: change the loop configuration
- blk_mq_unfreeze_queue()
- blk_queue_flag_clear(QUEUE_FLAG_DYING);
- sync_blockdev()
- invalidate_bdev()

Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38

Cc: <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: [email protected]
Cc: Bart Van Assche <[email protected]>
Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed")
Reported-by: Gwendal Grignou <[email protected]>
Reported-by: grygorii tertychnyi <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
drivers/block/loop.c | 65 ++++++++++++++++++++++----------------------
1 file changed, 33 insertions(+), 32 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f6f77eaa7217..b583050d513a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1232,6 +1232,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
kuid_t uid = current_uid();
struct block_device *bdev;
bool partscan = false;
+ bool drop_request = false;
+ bool drop_cache = false;

err = mutex_lock_killable(&loop_ctl_mutex);
if (err)
@@ -1251,14 +1253,21 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
goto out_unlock;
}

+ if (lo->lo_offset != info->lo_offset)
+ drop_request = true;
if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit) {
- sync_blockdev(lo->lo_device);
- kill_bdev(lo->lo_device);
- }
+ lo->lo_sizelimit != info->lo_sizelimit)
+ drop_cache = true;

- /* I/O need to be drained during transfer transition */
- blk_mq_freeze_queue(lo->lo_queue);
+ sync_blockdev(lo->lo_device);
+
+ if (drop_request) {
+ blk_set_queue_dying(lo->lo_queue);
+ blk_mq_freeze_queue_wait(lo->lo_queue);
+ } else {
+ /* I/O need to be drained during transfer transition */
+ blk_mq_freeze_queue(lo->lo_queue);
+ }

err = loop_release_xfer(lo);
if (err)
@@ -1285,14 +1294,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)

if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
goto out_unfreeze;
@@ -1329,6 +1330,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)

out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
+ if (drop_request)
+ blk_queue_flag_clear(QUEUE_FLAG_DYING, lo->lo_queue);

if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
!(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
@@ -1337,6 +1340,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
bdev = lo->lo_device;
partscan = true;
}
+
+ /* truncate stale pages cached by previous operations */
+ if (!err && drop_cache) {
+ sync_blockdev(lo->lo_device);
+ invalidate_bdev(lo->lo_device);
+ }
out_unlock:
mutex_unlock(&loop_ctl_mutex);
if (partscan)
@@ -1518,7 +1527,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)

static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
{
- int err = 0;
+ bool drop_cache = false;

if (lo->lo_state != Lo_bound)
return -ENXIO;
@@ -1526,31 +1535,23 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
return -EINVAL;

- if (lo->lo_queue->limits.logical_block_size != arg) {
- sync_blockdev(lo->lo_device);
- kill_bdev(lo->lo_device);
- }
+ if (lo->lo_queue->limits.logical_block_size != arg)
+ drop_cache = true;

+ sync_blockdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
-
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_queue->limits.logical_block_size != arg &&
- lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
blk_queue_logical_block_size(lo->lo_queue, arg);
blk_queue_physical_block_size(lo->lo_queue, arg);
blk_queue_io_min(lo->lo_queue, arg);
loop_update_dio(lo);
-out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);

- return err;
+ /* truncate stale pages cached by previous operations */
+ if (drop_cache) {
+ sync_blockdev(lo->lo_device);
+ invalidate_bdev(lo->lo_device);
+ }
+ return 0;
}

static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
--
2.19.0.605.g01d371f741-goog

2019-11-27 18:56:44

by Bart Van Assche

[permalink] [raw]

Subject: Re: [f2fs-dev] [PATCH v3] loop: avoid EAGAIN, if offset or block_size are changed

On 11/27/19 10:18 AM, Jaegeuk Kim wrote:
> Previously, there was a bug where user could see stale buffer cache (e.g, 512B)
> attached in the 4KB-sized pager cache, when the block size was changed from
> 512B to 4KB. That was fixed by:
> commit 5db470e229e2 ("loop: drop caches if offset or block_size are changed")

[ ... ]

Reviewed-by: Bart Van Assche <[email protected]>

2020-02-19 20:00:18

by Andrew Norrie

[permalink] [raw]

Subject: Re: [f2fs-dev] [PATCH v3] loop: avoid EAGAIN, if offset or block_size are changed

Hi,

Just checking again the status of this patch?
It doesn't look like it's made it into the kernel yet?

2020-03-05 21:04:53

by Jan Kara

[permalink] [raw]

Subject: Re: [PATCH] loop: avoid EAGAIN, if offset or block_size are changed

On Fri 17-05-19 17:47:51, Jaegeuk Kim wrote:
> This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying
> to drop stale pages resulting in wrong data access.
>
> Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38

...

> ---
> drivers/block/loop.c | 44 +++++++++++++++++---------------------------
> 1 file changed, 17 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 102d79575895..7c7d2d9c47d0 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1212,6 +1212,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> kuid_t uid = current_uid();
> struct block_device *bdev;
> bool partscan = false;
> + bool drop_caches = false;
>
> err = mutex_lock_killable(&loop_ctl_mutex);
> if (err)
> @@ -1232,10 +1233,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> }
>
> if (lo->lo_offset != info->lo_offset ||
> - lo->lo_sizelimit != info->lo_sizelimit) {
> - sync_blockdev(lo->lo_device);
> - kill_bdev(lo->lo_device);
> - }
> + lo->lo_sizelimit != info->lo_sizelimit)
> + drop_caches = true;

I don't think this solution of moving buffer cache invalidation after loop
device is updated is really correct.

If there's any dirty data in the buffer cache, god knows where it ends
up being written after the loop device is reconfigured. Think e.g. of a
file offset being changed. It may not be even possible to write it if say
block size increased and we have now improperly sized buffers in the buffer
cache...

Frankly, I have rather limited sympathy to someone trying to reconfigure a
loop device while it is in use. Is there any sane usecase? I'd be inclined
to just use a similar trick as we did with LOOP_SET_FD and allow these
changes only if the caller has the loop device open exclusively or we are
able to upgrade to exclusive open. As otherwise say mounted filesystem on
top of loop device being reconfigured is very likely to be in serious
trouble (e.g. it's impossible to fully invalidate buffer cache in that
case).

But that's probably somewhat tangential to the problem you have. For your
case I don't really see a race-free way to invalidate buffer cache and
update loop configuration - the best I can see is to flush & invalidate
the cache, freeze the bdev so that new data cannot be read into the
buffer cache, check the cache is still empty - if yes, go ahead. If not,
unfreeze and try again...

Honza

> /* I/O need to be drained during transfer transition */
> blk_mq_freeze_queue(lo->lo_queue);
> @@ -1265,14 +1264,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
>
> if (lo->lo_offset != info->lo_offset ||
> lo->lo_sizelimit != info->lo_sizelimit) {
> - /* kill_bdev should have truncated all the pages */
> - if (lo->lo_device->bd_inode->i_mapping->nrpages) {
> - err = -EAGAIN;
> - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
> - __func__, lo->lo_number, lo->lo_file_name,
> - lo->lo_device->bd_inode->i_mapping->nrpages);
> - goto out_unfreeze;
> - }
> if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
> err = -EFBIG;
> goto out_unfreeze;
> @@ -1317,6 +1308,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> bdev = lo->lo_device;
> partscan = true;
> }
> +
> + /* truncate stale pages cached by previous operations */
> + if (!err && drop_caches) {
> + sync_blockdev(lo->lo_device);
> + kill_bdev(lo->lo_device);
> + }
> out_unlock:
> mutex_unlock(&loop_ctl_mutex);
> if (partscan)
> @@ -1498,6 +1495,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
>
> static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
> {
> + bool drop_caches = false;
> int err = 0;
>
> if (lo->lo_state != Lo_bound)
> @@ -1506,23 +1504,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
> if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
> return -EINVAL;
>
> - if (lo->lo_queue->limits.logical_block_size != arg) {
> - sync_blockdev(lo->lo_device);
> - kill_bdev(lo->lo_device);
> - }
> + if (lo->lo_queue->limits.logical_block_size != arg)
> + drop_caches = true;
>
> blk_mq_freeze_queue(lo->lo_queue);
> -
> - /* kill_bdev should have truncated all the pages */
> - if (lo->lo_queue->limits.logical_block_size != arg &&
> - lo->lo_device->bd_inode->i_mapping->nrpages) {
> - err = -EAGAIN;
> - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
> - __func__, lo->lo_number, lo->lo_file_name,
> - lo->lo_device->bd_inode->i_mapping->nrpages);
> - goto out_unfreeze;
> - }
> -
> blk_queue_logical_block_size(lo->lo_queue, arg);
> blk_queue_physical_block_size(lo->lo_queue, arg);
> blk_queue_io_min(lo->lo_queue, arg);
> @@ -1530,6 +1515,11 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
> out_unfreeze:
> blk_mq_unfreeze_queue(lo->lo_queue);
>
> + /* truncate stale pages cached by previous operations */
> + if (drop_caches) {
> + sync_blockdev(lo->lo_device);
> + kill_bdev(lo->lo_device);
> + }
> return err;
> }
>
--
Jan Kara <[email protected]>
SUSE Labs, CR