2020-07-17 20:56:01

by Pasha Tatashin

[permalink] [raw]
Subject: [PATCH v1 1/1] loop: scale loop device by introducing per device lock

Currently, loop device has only one global lock:
loop_ctl_mutex.

This becomes hot in scenarios where many loop devices are used.

Scale it by introducing per-device lock: lo_mutex that proctests
field in struct loop_device. Keep loop_ctl_mutex to protect global
data such as loop_index_idr, loop_lookup, loop_add.

Lock ordering: loop_ctl_mutex > lo_mutex.

Signed-off-by: Pavel Tatashin <[email protected]>
---
drivers/block/loop.c | 86 ++++++++++++++++++++++++--------------------
drivers/block/loop.h | 1 +
2 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 475e1a738560..056af3bca6c2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -706,7 +706,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
int error;
bool partscan;

- error = mutex_lock_killable(&loop_ctl_mutex);
+ error = mutex_lock_killable(&lo->lo_mutex);
if (error)
return error;
error = -ENXIO;
@@ -745,9 +745,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue);
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
/*
- * We must drop file reference outside of loop_ctl_mutex as dropping
+ * We must drop file reference outside of lo_mutex as dropping
* the file ref can take bd_mutex which creates circular locking
* dependency.
*/
@@ -757,7 +757,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
return 0;

out_err:
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
if (file)
fput(file);
return error;
@@ -1096,7 +1096,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
}
}

- error = mutex_lock_killable(&loop_ctl_mutex);
+ error = mutex_lock_killable(&lo->lo_mutex);
if (error)
goto out_bdev;

@@ -1176,7 +1176,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
*/
bdgrab(bdev);
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
if (partscan)
loop_reread_partitions(lo, bdev);
if (claimed_bdev)
@@ -1184,7 +1184,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
return 0;

out_unlock:
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
out_bdev:
if (claimed_bdev)
bd_abort_claiming(bdev, claimed_bdev, loop_configure);
@@ -1205,7 +1205,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
bool partscan = false;
int lo_number;

- mutex_lock(&loop_ctl_mutex);
+ mutex_lock(&lo->lo_mutex);
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
err = -ENXIO;
goto out_unlock;
@@ -1259,7 +1259,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
lo_number = lo->lo_number;
loop_unprepare_queue(lo);
out_unlock:
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
if (partscan) {
/*
* bd_mutex has been held already in release path, so don't
@@ -1290,18 +1290,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
* protects us from all the other places trying to change the 'lo'
* device.
*/
- mutex_lock(&loop_ctl_mutex);
+ mutex_lock(&lo->lo_mutex);
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
lo->lo_state = Lo_unbound;
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);

/*
- * Need not hold loop_ctl_mutex to fput backing file.
- * Calling fput holding loop_ctl_mutex triggers a circular
+ * Need not hold lo_mutex to fput backing file.
+ * Calling fput holding lo_mutex triggers a circular
* lock dependency possibility warning as fput can take
- * bd_mutex which is usually taken before loop_ctl_mutex.
+ * bd_mutex which is usually taken before lo_mutex.
*/
if (filp)
fput(filp);
@@ -1312,11 +1312,11 @@ static int loop_clr_fd(struct loop_device *lo)
{
int err;

- err = mutex_lock_killable(&loop_ctl_mutex);
+ err = mutex_lock_killable(&lo->lo_mutex);
if (err)
return err;
if (lo->lo_state != Lo_bound) {
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
return -ENXIO;
}
/*
@@ -1331,11 +1331,11 @@ static int loop_clr_fd(struct loop_device *lo)
*/
if (atomic_read(&lo->lo_refcnt) > 1) {
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
return 0;
}
lo->lo_state = Lo_rundown;
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);

return __loop_clr_fd(lo, false);
}
@@ -1350,7 +1350,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
bool partscan = false;
bool size_changed = false;

- err = mutex_lock_killable(&loop_ctl_mutex);
+ err = mutex_lock_killable(&lo->lo_mutex);
if (err)
return err;
if (lo->lo_encrypt_key_size &&
@@ -1417,7 +1417,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
partscan = true;
}
out_unlock:
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
if (partscan)
loop_reread_partitions(lo, bdev);

@@ -1431,11 +1431,11 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
struct kstat stat;
int ret;

- ret = mutex_lock_killable(&loop_ctl_mutex);
+ ret = mutex_lock_killable(&lo->lo_mutex);
if (ret)
return ret;
if (lo->lo_state != Lo_bound) {
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
return -ENXIO;
}

@@ -1454,10 +1454,10 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
lo->lo_encrypt_key_size);
}

- /* Drop loop_ctl_mutex while we call into the filesystem. */
+ /* Drop lo_mutex while we call into the filesystem. */
path = lo->lo_backing_file->f_path;
path_get(&path);
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
if (!ret) {
info->lo_device = huge_encode_dev(stat.dev);
@@ -1643,7 +1643,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
{
int err;

- err = mutex_lock_killable(&loop_ctl_mutex);
+ err = mutex_lock_killable(&lo->lo_mutex);
if (err)
return err;
switch (cmd) {
@@ -1659,7 +1659,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
default:
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
}
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
return err;
}

@@ -1890,22 +1890,23 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
return err;
lo = bdev->bd_disk->private_data;
if (!lo) {
- err = -ENXIO;
- goto out;
+ mutex_unlock(&loop_ctl_mutex);
+ return -ENXIO;
}
-
- atomic_inc(&lo->lo_refcnt);
-out:
+ err = mutex_lock_killable(&lo->lo_mutex);
mutex_unlock(&loop_ctl_mutex);
- return err;
+ if (err)
+ return err;
+ atomic_inc(&lo->lo_refcnt);
+ mutex_unlock(&lo->lo_mutex);
+ return 0;
}

static void lo_release(struct gendisk *disk, fmode_t mode)
{
- struct loop_device *lo;
+ struct loop_device *lo = disk->private_data;

- mutex_lock(&loop_ctl_mutex);
- lo = disk->private_data;
+ mutex_lock(&lo->lo_mutex);
if (atomic_dec_return(&lo->lo_refcnt))
goto out_unlock;

@@ -1913,7 +1914,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
if (lo->lo_state != Lo_bound)
goto out_unlock;
lo->lo_state = Lo_rundown;
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
/*
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
@@ -1930,7 +1931,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
}

out_unlock:
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
}

static const struct block_device_operations lo_fops = {
@@ -1969,10 +1970,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data)
struct loop_device *lo = ptr;
struct loop_func_table *xfer = data;

- mutex_lock(&loop_ctl_mutex);
+ mutex_lock(&lo->lo_mutex);
if (lo->lo_encryption == xfer)
loop_release_xfer(lo);
- mutex_unlock(&loop_ctl_mutex);
+ mutex_unlock(&lo->lo_mutex);
return 0;
}

@@ -2157,6 +2158,7 @@ static int loop_add(struct loop_device **l, int i)
disk->flags |= GENHD_FL_NO_PART_SCAN;
disk->flags |= GENHD_FL_EXT_DEVT;
atomic_set(&lo->lo_refcnt, 0);
+ mutex_init(&lo->lo_mutex);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
disk->major = LOOP_MAJOR;
@@ -2272,15 +2274,21 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
ret = loop_lookup(&lo, parm);
if (ret < 0)
break;
+ ret = mutex_lock_killable(&lo->lo_mutex);
+ if (ret)
+ break;
if (lo->lo_state != Lo_unbound) {
ret = -EBUSY;
+ mutex_unlock(&lo->lo_mutex);
break;
}
if (atomic_read(&lo->lo_refcnt) > 0) {
ret = -EBUSY;
+ mutex_unlock(&lo->lo_mutex);
break;
}
lo->lo_disk->private_data = NULL;
+ mutex_unlock(&lo->lo_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
loop_remove(lo);
break;
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index af75a5ee4094..a3c04f310672 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -62,6 +62,7 @@ struct loop_device {
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
struct gendisk *lo_disk;
+ struct mutex lo_mutex;
};

struct loop_cmd {
--
2.25.1


2020-07-23 18:12:07

by Tyler Hicks

[permalink] [raw]
Subject: Re: [PATCH v1 1/1] loop: scale loop device by introducing per device lock

On 2020-07-17 16:53:22, Pavel Tatashin wrote:
> Currently, loop device has only one global lock:
> loop_ctl_mutex.
>
> This becomes hot in scenarios where many loop devices are used.
>
> Scale it by introducing per-device lock: lo_mutex that proctests
> field in struct loop_device. Keep loop_ctl_mutex to protect global

s/proctests field/protects the fields/

> data such as loop_index_idr, loop_lookup, loop_add.
>
> Lock ordering: loop_ctl_mutex > lo_mutex.
>
> Signed-off-by: Pavel Tatashin <[email protected]>
> ---
> drivers/block/loop.c | 86 ++++++++++++++++++++++++--------------------
> drivers/block/loop.h | 1 +
> 2 files changed, 48 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 475e1a738560..056af3bca6c2 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -706,7 +706,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
> int error;
> bool partscan;
>
> - error = mutex_lock_killable(&loop_ctl_mutex);
> + error = mutex_lock_killable(&lo->lo_mutex);
> if (error)
> return error;
> error = -ENXIO;
> @@ -745,9 +745,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
> loop_update_dio(lo);
> blk_mq_unfreeze_queue(lo->lo_queue);
> partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> /*
> - * We must drop file reference outside of loop_ctl_mutex as dropping
> + * We must drop file reference outside of lo_mutex as dropping
> * the file ref can take bd_mutex which creates circular locking
> * dependency.
> */
> @@ -757,7 +757,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
> return 0;
>
> out_err:
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> if (file)
> fput(file);
> return error;
> @@ -1096,7 +1096,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
> }
> }
>
> - error = mutex_lock_killable(&loop_ctl_mutex);
> + error = mutex_lock_killable(&lo->lo_mutex);
> if (error)
> goto out_bdev;
>
> @@ -1176,7 +1176,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
> * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
> */
> bdgrab(bdev);
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> if (partscan)
> loop_reread_partitions(lo, bdev);
> if (claimed_bdev)
> @@ -1184,7 +1184,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
> return 0;
>
> out_unlock:
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> out_bdev:
> if (claimed_bdev)
> bd_abort_claiming(bdev, claimed_bdev, loop_configure);
> @@ -1205,7 +1205,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
> bool partscan = false;
> int lo_number;
>
> - mutex_lock(&loop_ctl_mutex);
> + mutex_lock(&lo->lo_mutex);
> if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
> err = -ENXIO;
> goto out_unlock;
> @@ -1259,7 +1259,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
> lo_number = lo->lo_number;
> loop_unprepare_queue(lo);
> out_unlock:
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> if (partscan) {
> /*
> * bd_mutex has been held already in release path, so don't
> @@ -1290,18 +1290,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
> * protects us from all the other places trying to change the 'lo'
> * device.
> */
> - mutex_lock(&loop_ctl_mutex);
> + mutex_lock(&lo->lo_mutex);
> lo->lo_flags = 0;
> if (!part_shift)
> lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
> lo->lo_state = Lo_unbound;
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
>
> /*
> - * Need not hold loop_ctl_mutex to fput backing file.
> - * Calling fput holding loop_ctl_mutex triggers a circular
> + * Need not hold lo_mutex to fput backing file.
> + * Calling fput holding lo_mutex triggers a circular
> * lock dependency possibility warning as fput can take
> - * bd_mutex which is usually taken before loop_ctl_mutex.
> + * bd_mutex which is usually taken before lo_mutex.
> */
> if (filp)
> fput(filp);
> @@ -1312,11 +1312,11 @@ static int loop_clr_fd(struct loop_device *lo)
> {
> int err;
>
> - err = mutex_lock_killable(&loop_ctl_mutex);
> + err = mutex_lock_killable(&lo->lo_mutex);
> if (err)
> return err;
> if (lo->lo_state != Lo_bound) {
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> return -ENXIO;
> }
> /*
> @@ -1331,11 +1331,11 @@ static int loop_clr_fd(struct loop_device *lo)
> */
> if (atomic_read(&lo->lo_refcnt) > 1) {
> lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> return 0;
> }
> lo->lo_state = Lo_rundown;
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
>
> return __loop_clr_fd(lo, false);
> }
> @@ -1350,7 +1350,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> bool partscan = false;
> bool size_changed = false;
>
> - err = mutex_lock_killable(&loop_ctl_mutex);
> + err = mutex_lock_killable(&lo->lo_mutex);
> if (err)
> return err;
> if (lo->lo_encrypt_key_size &&
> @@ -1417,7 +1417,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> partscan = true;
> }
> out_unlock:
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> if (partscan)
> loop_reread_partitions(lo, bdev);
>
> @@ -1431,11 +1431,11 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
> struct kstat stat;
> int ret;
>
> - ret = mutex_lock_killable(&loop_ctl_mutex);
> + ret = mutex_lock_killable(&lo->lo_mutex);
> if (ret)
> return ret;
> if (lo->lo_state != Lo_bound) {
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> return -ENXIO;
> }
>
> @@ -1454,10 +1454,10 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
> lo->lo_encrypt_key_size);
> }
>
> - /* Drop loop_ctl_mutex while we call into the filesystem. */
> + /* Drop lo_mutex while we call into the filesystem. */
> path = lo->lo_backing_file->f_path;
> path_get(&path);
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
> if (!ret) {
> info->lo_device = huge_encode_dev(stat.dev);
> @@ -1643,7 +1643,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
> {
> int err;
>
> - err = mutex_lock_killable(&loop_ctl_mutex);
> + err = mutex_lock_killable(&lo->lo_mutex);
> if (err)
> return err;
> switch (cmd) {
> @@ -1659,7 +1659,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
> default:
> err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
> }
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> return err;
> }
>
> @@ -1890,22 +1890,23 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
> return err;
> lo = bdev->bd_disk->private_data;
> if (!lo) {
> - err = -ENXIO;
> - goto out;
> + mutex_unlock(&loop_ctl_mutex);
> + return -ENXIO;
> }
> -
> - atomic_inc(&lo->lo_refcnt);
> -out:
> + err = mutex_lock_killable(&lo->lo_mutex);
> mutex_unlock(&loop_ctl_mutex);

I don't see a possibility for deadlock but it bothers me a little that
we're not unlocking in the reverse locking order here, as we do in
loop_control_ioctl(). There should be no perf impact if we move the
mutex_unlock(&loop_ctl_mutex) after mutex_unlock(&lo->lo_mutex).

> - return err;
> + if (err)
> + return err;
> + atomic_inc(&lo->lo_refcnt);
> + mutex_unlock(&lo->lo_mutex);
> + return 0;
> }
>
> static void lo_release(struct gendisk *disk, fmode_t mode)
> {
> - struct loop_device *lo;
> + struct loop_device *lo = disk->private_data;
>
> - mutex_lock(&loop_ctl_mutex);
> - lo = disk->private_data;
> + mutex_lock(&lo->lo_mutex);
> if (atomic_dec_return(&lo->lo_refcnt))
> goto out_unlock;
>
> @@ -1913,7 +1914,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
> if (lo->lo_state != Lo_bound)
> goto out_unlock;
> lo->lo_state = Lo_rundown;
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> /*
> * In autoclear mode, stop the loop thread
> * and remove configuration after last close.
> @@ -1930,7 +1931,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
> }
>
> out_unlock:
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> }
>
> static const struct block_device_operations lo_fops = {
> @@ -1969,10 +1970,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data)
> struct loop_device *lo = ptr;
> struct loop_func_table *xfer = data;
>
> - mutex_lock(&loop_ctl_mutex);
> + mutex_lock(&lo->lo_mutex);
> if (lo->lo_encryption == xfer)
> loop_release_xfer(lo);
> - mutex_unlock(&loop_ctl_mutex);
> + mutex_unlock(&lo->lo_mutex);
> return 0;
> }
>
> @@ -2157,6 +2158,7 @@ static int loop_add(struct loop_device **l, int i)
> disk->flags |= GENHD_FL_NO_PART_SCAN;
> disk->flags |= GENHD_FL_EXT_DEVT;
> atomic_set(&lo->lo_refcnt, 0);
> + mutex_init(&lo->lo_mutex);

We need a corresponding call to mutex_destroy() in loop_remove().

> lo->lo_number = i;
> spin_lock_init(&lo->lo_lock);
> disk->major = LOOP_MAJOR;
> @@ -2272,15 +2274,21 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
> ret = loop_lookup(&lo, parm);
> if (ret < 0)
> break;
> + ret = mutex_lock_killable(&lo->lo_mutex);
> + if (ret)
> + break;
> if (lo->lo_state != Lo_unbound) {
> ret = -EBUSY;
> + mutex_unlock(&lo->lo_mutex);
> break;
> }
> if (atomic_read(&lo->lo_refcnt) > 0) {
> ret = -EBUSY;
> + mutex_unlock(&lo->lo_mutex);
> break;
> }
> lo->lo_disk->private_data = NULL;
> + mutex_unlock(&lo->lo_mutex);
> idr_remove(&loop_index_idr, lo->lo_number);
> loop_remove(lo);
> break;
> diff --git a/drivers/block/loop.h b/drivers/block/loop.h
> index af75a5ee4094..a3c04f310672 100644
> --- a/drivers/block/loop.h
> +++ b/drivers/block/loop.h
> @@ -62,6 +62,7 @@ struct loop_device {
> struct request_queue *lo_queue;
> struct blk_mq_tag_set tag_set;
> struct gendisk *lo_disk;

There's an instance, which is not in this patch's context, of accessing
lo_disk that needs lo_mutex protection. In loop_probe(), we call
get_disk_and_module(lo->lo_disk) and we need to lock and unlock lo_mutex
around that call.

Tyler

> + struct mutex lo_mutex;
> };
>
> struct loop_cmd {
> --
> 2.25.1
>

2020-07-23 18:32:47

by Pasha Tatashin

[permalink] [raw]
Subject: Re: [PATCH v1 1/1] loop: scale loop device by introducing per device lock

Hi Tyler,

Thank you for the review comments. My replies are inlined below.

> > Scale it by introducing per-device lock: lo_mutex that proctests
> > field in struct loop_device. Keep loop_ctl_mutex to protect global
>
> s/proctests field/protects the fields/

OK

> > @@ -1890,22 +1890,23 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
> > return err;
> > lo = bdev->bd_disk->private_data;
> > if (!lo) {
> > - err = -ENXIO;
> > - goto out;
> > + mutex_unlock(&loop_ctl_mutex);
> > + return -ENXIO;
> > }
> > -
> > - atomic_inc(&lo->lo_refcnt);
> > -out:
> > + err = mutex_lock_killable(&lo->lo_mutex);
> > mutex_unlock(&loop_ctl_mutex);
>
> I don't see a possibility for deadlock but it bothers me a little that
> we're not unlocking in the reverse locking order here, as we do in
> loop_control_ioctl(). There should be no perf impact if we move the
> mutex_unlock(&loop_ctl_mutex) after mutex_unlock(&lo->lo_mutex).

The lo_open() was one of the top functions that showed up in
contention profiling, and the only shared data that it updates is
lo_recnt which can be protected by lo_mutex. We must have
loop_ctl_mutex in order to get a valid lo pointer, otherwise we could
race with loop_control_ioctl(LOOP_CTL_REMOVE). Unlocking in a
different order is not an issue, as long as we always preserve the
locking order.


> > @@ -2157,6 +2158,7 @@ static int loop_add(struct loop_device **l, int i)
> > disk->flags |= GENHD_FL_NO_PART_SCAN;
> > disk->flags |= GENHD_FL_EXT_DEVT;
> > atomic_set(&lo->lo_refcnt, 0);
> > + mutex_init(&lo->lo_mutex);
>
> We need a corresponding call to mutex_destroy() in loop_remove().

Yes, thank you for catching this.

> > +++ b/drivers/block/loop.h
> > @@ -62,6 +62,7 @@ struct loop_device {
> > struct request_queue *lo_queue;
> > struct blk_mq_tag_set tag_set;
> > struct gendisk *lo_disk;
>
> There's an instance, which is not in this patch's context, of accessing
> lo_disk that needs lo_mutex protection. In loop_probe(), we call
> get_disk_and_module(lo->lo_disk) and we need to lock and unlock lo_mutex
> around that call.

I will add it.

Thank you,
Pasha

2020-07-23 18:40:13

by Tyler Hicks

[permalink] [raw]
Subject: Re: [PATCH v1 1/1] loop: scale loop device by introducing per device lock

On 2020-07-23 14:29:31, Pavel Tatashin wrote:
> Hi Tyler,
>
> Thank you for the review comments. My replies are inlined below.
>
> > > Scale it by introducing per-device lock: lo_mutex that proctests
> > > field in struct loop_device. Keep loop_ctl_mutex to protect global
> >
> > s/proctests field/protects the fields/
>
> OK
>
> > > @@ -1890,22 +1890,23 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
> > > return err;
> > > lo = bdev->bd_disk->private_data;
> > > if (!lo) {
> > > - err = -ENXIO;
> > > - goto out;
> > > + mutex_unlock(&loop_ctl_mutex);
> > > + return -ENXIO;
> > > }
> > > -
> > > - atomic_inc(&lo->lo_refcnt);
> > > -out:
> > > + err = mutex_lock_killable(&lo->lo_mutex);
> > > mutex_unlock(&loop_ctl_mutex);
> >
> > I don't see a possibility for deadlock but it bothers me a little that
> > we're not unlocking in the reverse locking order here, as we do in
> > loop_control_ioctl(). There should be no perf impact if we move the
> > mutex_unlock(&loop_ctl_mutex) after mutex_unlock(&lo->lo_mutex).
>
> The lo_open() was one of the top functions that showed up in
> contention profiling, and the only shared data that it updates is
> lo_recnt which can be protected by lo_mutex. We must have
> loop_ctl_mutex in order to get a valid lo pointer, otherwise we could
> race with loop_control_ioctl(LOOP_CTL_REMOVE). Unlocking in a
> different order is not an issue, as long as we always preserve the
> locking order.

It is probably a good idea to leave a comment about this in the
lo_open() so that nobody comes along and tries to "correct" the
unlocking order in the future and, as a result, introduces a perf
regression.

Tyler

> > > @@ -2157,6 +2158,7 @@ static int loop_add(struct loop_device **l, int i)
> > > disk->flags |= GENHD_FL_NO_PART_SCAN;
> > > disk->flags |= GENHD_FL_EXT_DEVT;
> > > atomic_set(&lo->lo_refcnt, 0);
> > > + mutex_init(&lo->lo_mutex);
> >
> > We need a corresponding call to mutex_destroy() in loop_remove().
>
> Yes, thank you for catching this.
>
> > > +++ b/drivers/block/loop.h
> > > @@ -62,6 +62,7 @@ struct loop_device {
> > > struct request_queue *lo_queue;
> > > struct blk_mq_tag_set tag_set;
> > > struct gendisk *lo_disk;
> >
> > There's an instance, which is not in this patch's context, of accessing
> > lo_disk that needs lo_mutex protection. In loop_probe(), we call
> > get_disk_and_module(lo->lo_disk) and we need to lock and unlock lo_mutex
> > around that call.
>
> I will add it.
>
> Thank you,
> Pasha

2020-07-23 19:42:51

by Pasha Tatashin

[permalink] [raw]
Subject: Re: [PATCH v1 1/1] loop: scale loop device by introducing per device lock

> > > > - atomic_inc(&lo->lo_refcnt);
> > > > -out:
> > > > + err = mutex_lock_killable(&lo->lo_mutex);
> > > > mutex_unlock(&loop_ctl_mutex);
> > >
> > > I don't see a possibility for deadlock but it bothers me a little that
> > > we're not unlocking in the reverse locking order here, as we do in
> > > loop_control_ioctl(). There should be no perf impact if we move the
> > > mutex_unlock(&loop_ctl_mutex) after mutex_unlock(&lo->lo_mutex).
> >
> > The lo_open() was one of the top functions that showed up in
> > contention profiling, and the only shared data that it updates is
> > lo_recnt which can be protected by lo_mutex. We must have
> > loop_ctl_mutex in order to get a valid lo pointer, otherwise we could
> > race with loop_control_ioctl(LOOP_CTL_REMOVE). Unlocking in a
> > different order is not an issue, as long as we always preserve the
> > locking order.
>
> It is probably a good idea to leave a comment about this in the
> lo_open() so that nobody comes along and tries to "correct" the
> unlocking order in the future and, as a result, introduces a perf
> regression.
>
Makes sense, I will add a comment about it.

Thank you,
Pasha