2021-03-26 09:02:11

by Zhang, Qiang

[permalink] [raw]
Subject: [PATCH v2] loop: call __loop_clr_fd() with lo_mutex locked to avoid autoclear race

From: Zqiang <[email protected]>

lo->lo_refcnt = 0

CPU0 CPU1
lo_open() lo_open()
mutex_lock(&lo->lo_mutex)
atomic_inc(&lo->lo_refcnt)
lo_refcnt == 1
mutex_unlock(&lo->lo_mutex)
mutex_lock(&lo->lo_mutex)
atomic_inc(&lo->lo_refcnt)
lo_refcnt == 2
mutex_unlock(&lo->lo_mutex)
loop_clr_fd()
mutex_lock(&lo->lo_mutex)
atomic_read(&lo->lo_refcnt) > 1
lo->lo_flags |= LO_FLAGS_AUTOCLEAR lo_release()
mutex_unlock(&lo->lo_mutex)
return mutex_lock(&lo->lo_mutex)
atomic_dec_return(&lo->lo_refcnt)
lo_refcnt == 1
mutex_unlock(&lo->lo_mutex)
return

lo_release()
mutex_lock(&lo->lo_mutex)
atomic_dec_return(&lo->lo_refcnt)
lo_refcnt == 0
lo->lo_flags & LO_FLAGS_AUTOCLEAR
== true
mutex_unlock(&lo->lo_mutex) loop_control_ioctl()
case LOOP_CTL_REMOVE:
mutex_lock(&lo->lo_mutex)
atomic_read(&lo->lo_refcnt)==0
__loop_clr_fd(lo, true) mutex_unlock(&lo->lo_mutex)
mutex_lock(&lo->lo_mutex) loop_remove(lo)
mutex_destroy(&lo->lo_mutex)
...... kfree(lo)
data race

When different tasks on two CPUs perform the above operations on the same
lo device, data race may be occur, Do not drop lo->lo_mutex before calling
__loop_clr_fd(), so refcnt and LO_FLAGS_AUTOCLEAR check in lo_release
stay in sync.

Fixes: 6cc8e7430801 ("loop: scale loop device by introducing per device lock")
Signed-off-by: Zqiang <[email protected]>
---
v1->v2:
Modify the title and commit message.

drivers/block/loop.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d58d68f3c7cd..5712f1698a66 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1201,7 +1201,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
bool partscan = false;
int lo_number;

- mutex_lock(&lo->lo_mutex);
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
err = -ENXIO;
goto out_unlock;
@@ -1257,7 +1256,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
lo_number = lo->lo_number;
loop_unprepare_queue(lo);
out_unlock:
- mutex_unlock(&lo->lo_mutex);
if (partscan) {
/*
* bd_mutex has been held already in release path, so don't
@@ -1288,12 +1286,11 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
* protects us from all the other places trying to change the 'lo'
* device.
*/
- mutex_lock(&lo->lo_mutex);
+
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
lo->lo_state = Lo_unbound;
- mutex_unlock(&lo->lo_mutex);

/*
* Need not hold lo_mutex to fput backing file. Calling fput holding
@@ -1332,9 +1329,10 @@ static int loop_clr_fd(struct loop_device *lo)
return 0;
}
lo->lo_state = Lo_rundown;
+ err = __loop_clr_fd(lo, false);
mutex_unlock(&lo->lo_mutex);

- return __loop_clr_fd(lo, false);
+ return err;
}

static int
@@ -1916,13 +1914,12 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
if (lo->lo_state != Lo_bound)
goto out_unlock;
lo->lo_state = Lo_rundown;
- mutex_unlock(&lo->lo_mutex);
/*
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
*/
__loop_clr_fd(lo, true);
- return;
+ goto out_unlock;
} else if (lo->lo_state == Lo_bound) {
/*
* Otherwise keep thread (if running) and config,
--
2.17.1


2021-03-26 21:45:17

by Pasha Tatashin

[permalink] [raw]
Subject: Re: [PATCH v2] loop: call __loop_clr_fd() with lo_mutex locked to avoid autoclear race

On Fri, Mar 26, 2021 at 5:00 AM <[email protected]> wrote:
>
> From: Zqiang <[email protected]>
>
> lo->lo_refcnt = 0
>
> CPU0 CPU1
> lo_open() lo_open()
> mutex_lock(&lo->lo_mutex)
> atomic_inc(&lo->lo_refcnt)
> lo_refcnt == 1
> mutex_unlock(&lo->lo_mutex)
> mutex_lock(&lo->lo_mutex)
> atomic_inc(&lo->lo_refcnt)
> lo_refcnt == 2
> mutex_unlock(&lo->lo_mutex)
> loop_clr_fd()
> mutex_lock(&lo->lo_mutex)
> atomic_read(&lo->lo_refcnt) > 1
> lo->lo_flags |= LO_FLAGS_AUTOCLEAR lo_release()
> mutex_unlock(&lo->lo_mutex)
> return mutex_lock(&lo->lo_mutex)
> atomic_dec_return(&lo->lo_refcnt)
> lo_refcnt == 1
> mutex_unlock(&lo->lo_mutex)
> return
>
> lo_release()
> mutex_lock(&lo->lo_mutex)
> atomic_dec_return(&lo->lo_refcnt)
> lo_refcnt == 0
> lo->lo_flags & LO_FLAGS_AUTOCLEAR
> == true
> mutex_unlock(&lo->lo_mutex) loop_control_ioctl()
> case LOOP_CTL_REMOVE:
> mutex_lock(&lo->lo_mutex)
> atomic_read(&lo->lo_refcnt)==0
> __loop_clr_fd(lo, true) mutex_unlock(&lo->lo_mutex)
> mutex_lock(&lo->lo_mutex) loop_remove(lo)
> mutex_destroy(&lo->lo_mutex)
> ...... kfree(lo)
> data race
>
> When different tasks on two CPUs perform the above operations on the same
> lo device, data race may be occur, Do not drop lo->lo_mutex before calling
> __loop_clr_fd(), so refcnt and LO_FLAGS_AUTOCLEAR check in lo_release
> stay in sync.

There is a race with autoclear logic where use after free may occur as
shown in the above scenario. Do not drop lo->lo_mutex before calling
__loop_clr_fd(), so refcnt and LO_FLAGS_AUTOCLEAR check in lo_release
stay in sync.

Reviewed-by: Pavel Tatashin <[email protected]>

>
> Fixes: 6cc8e7430801 ("loop: scale loop device by introducing per device lock")
> Signed-off-by: Zqiang <[email protected]>
> ---
> v1->v2:
> Modify the title and commit message.
>
> drivers/block/loop.c | 11 ++++-------
> 1 file changed, 4 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index d58d68f3c7cd..5712f1698a66 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1201,7 +1201,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
> bool partscan = false;
> int lo_number;
>
> - mutex_lock(&lo->lo_mutex);
> if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
> err = -ENXIO;
> goto out_unlock;
> @@ -1257,7 +1256,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
> lo_number = lo->lo_number;
> loop_unprepare_queue(lo);
> out_unlock:
> - mutex_unlock(&lo->lo_mutex);
> if (partscan) {
> /*
> * bd_mutex has been held already in release path, so don't
> @@ -1288,12 +1286,11 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
> * protects us from all the other places trying to change the 'lo'
> * device.
> */
> - mutex_lock(&lo->lo_mutex);
> +
> lo->lo_flags = 0;
> if (!part_shift)
> lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
> lo->lo_state = Lo_unbound;
> - mutex_unlock(&lo->lo_mutex);
>
> /*
> * Need not hold lo_mutex to fput backing file. Calling fput holding
> @@ -1332,9 +1329,10 @@ static int loop_clr_fd(struct loop_device *lo)
> return 0;
> }
> lo->lo_state = Lo_rundown;
> + err = __loop_clr_fd(lo, false);
> mutex_unlock(&lo->lo_mutex);
>
> - return __loop_clr_fd(lo, false);
> + return err;
> }
>
> static int
> @@ -1916,13 +1914,12 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
> if (lo->lo_state != Lo_bound)
> goto out_unlock;
> lo->lo_state = Lo_rundown;
> - mutex_unlock(&lo->lo_mutex);
> /*
> * In autoclear mode, stop the loop thread
> * and remove configuration after last close.
> */
> __loop_clr_fd(lo, true);
> - return;
> + goto out_unlock;
> } else if (lo->lo_state == Lo_bound) {
> /*
> * Otherwise keep thread (if running) and config,
> --
> 2.17.1
>

2021-03-27 01:33:41

by Zhang, Qiang

[permalink] [raw]
Subject: 回复: [PATCH v2] loop: call __loop_clr_fd() w ith lo_mutex locked to avoid autoclear race



________________________________________
??????: Pavel Tatashin <[email protected]>
????ʱ??: 2021??3??27?? 5:41
?ռ???: Zhang, Qiang
????: Jens Axboe; [email protected]; LKML
????: Re: [PATCH v2] loop: call __loop_clr_fd() with lo_mutex locked to avoid autoclear race

[Please note: This e-mail is from an EXTERNAL e-mail address]

On Fri, Mar 26, 2021 at 5:00 AM <[email protected]> wrote:
>
> From: Zqiang <[email protected]>
>
> lo->lo_refcnt = 0
>
> CPU0 CPU1
> lo_open() lo_open()
> mutex_lock(&lo->lo_mutex)
> atomic_inc(&lo->lo_refcnt)
> lo_refcnt == 1
> mutex_unlock(&lo->lo_mutex)
> mutex_lock(&lo->lo_mutex)
> atomic_inc(&lo->lo_refcnt)
> lo_refcnt == 2
> mutex_unlock(&lo->lo_mutex)
> loop_clr_fd()
> mutex_lock(&lo->lo_mutex)
> atomic_read(&lo->lo_refcnt) > 1
> lo->lo_flags |= LO_FLAGS_AUTOCLEAR lo_release()
> mutex_unlock(&lo->lo_mutex)
> return mutex_lock(&lo->lo_mutex)
> atomic_dec_return(&lo->lo_refcnt)
> lo_refcnt == 1
> mutex_unlock(&lo->lo_mutex)
> return
>
> lo_release()
> mutex_lock(&lo->lo_mutex)
> atomic_dec_return(&lo->lo_refcnt)
> lo_refcnt == 0
> lo->lo_flags & LO_FLAGS_AUTOCLEAR
> == true
> mutex_unlock(&lo->lo_mutex) loop_control_ioctl()
> case LOOP_CTL_REMOVE:
> mutex_lock(&lo->lo_mutex)
> atomic_read(&lo->lo_refcnt)==0
> __loop_clr_fd(lo, true) mutex_unlock(&lo->lo_mutex)
> mutex_lock(&lo->lo_mutex) loop_remove(lo)
> mutex_destroy(&lo->lo_mutex)
> ...... kfree(lo)
> data race
>
> When different tasks on two CPUs perform the above operations on the same
> lo device, data race may be occur, Do not drop lo->lo_mutex before calling
> __loop_clr_fd(), so refcnt and LO_FLAGS_AUTOCLEAR check in lo_release
> stay in sync.
>
>There is a race with autoclear logic where use after free may >occur as
>shown in the above scenario. Do not drop lo->lo_mutex >before calling
>__loop_clr_fd(), so refcnt and LO_FLAGS_AUTOCLEAR check >in lo_release
>stay in sync.

Hi Pasha

this patch is incorrect, lo->lo_state status detection is ignored by me,
in lo_release() the lo_state is set Lo_rundown, when call LOOP_CTL_REMOVE , if lo->lo_state != Lo_unbound will directly return, not call loop_remove().
I'm sorry to mislead you.

>
>Reviewed-by: Pavel Tatashin <[email protected]>

>
> Fixes: 6cc8e7430801 ("loop: scale loop device by introducing per device lock")
> Signed-off-by: Zqiang <[email protected]>
> ---
> v1->v2:
> Modify the title and commit message.
>
> drivers/block/loop.c | 11 ++++-------
> 1 file changed, 4 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index d58d68f3c7cd..5712f1698a66 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1201,7 +1201,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
> bool partscan = false;
> int lo_number;
>
> - mutex_lock(&lo->lo_mutex);
> if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
> err = -ENXIO;
> goto out_unlock;
> @@ -1257,7 +1256,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
> lo_number = lo->lo_number;
> loop_unprepare_queue(lo);
> out_unlock:
> - mutex_unlock(&lo->lo_mutex);
> if (partscan) {
> /*
> * bd_mutex has been held already in release path, so don't
> @@ -1288,12 +1286,11 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
> * protects us from all the other places trying to change the 'lo'
> * device.
> */
> - mutex_lock(&lo->lo_mutex);
> +
> lo->lo_flags = 0;
> if (!part_shift)
> lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
> lo->lo_state = Lo_unbound;
> - mutex_unlock(&lo->lo_mutex);
>
> /*
> * Need not hold lo_mutex to fput backing file. Calling fput holding
> @@ -1332,9 +1329,10 @@ static int loop_clr_fd(struct loop_device *lo)
> return 0;
> }
> lo->lo_state = Lo_rundown;
> + err = __loop_clr_fd(lo, false);
> mutex_unlock(&lo->lo_mutex);
>
> - return __loop_clr_fd(lo, false);
> + return err;
> }
>
> static int
> @@ -1916,13 +1914,12 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
> if (lo->lo_state != Lo_bound)
> goto out_unlock;
> lo->lo_state = Lo_rundown;
> - mutex_unlock(&lo->lo_mutex);
> /*
> * In autoclear mode, stop the loop thread
> * and remove configuration after last close.
> */
> __loop_clr_fd(lo, true);
> - return;
> + goto out_unlock;
> } else if (lo->lo_state == Lo_bound) {
> /*
> * Otherwise keep thread (if running) and config,
> --
> 2.17.1
>