This series introduces a new ioctl that makes it possible to atomically
configure a loop device. Previously, if you wanted to set parameters
such as the offset on a loop device, this required calling LOOP_SET_FD
to set the backing file, and then LOOP_SET_STATUS to set the offset.
However, in between these two calls, the loop device is available and
would accept requests, which is generally not desirable. Similar issues
exist around setting the block size (LOOP_SET_BLOCK_SIZE) and requesting
direct I/O mode (LOOP_SET_DIRECT_IO).
There are also performance benefits with combining these ioctls into
one, which are described in more detail in the last change in the
series.
---
v5:
- Added "loop: Call loop_config_discard() ..." as a first patch to
this series, as the rest of the refactoring done depends on it.
- Removed sector_t truncation checks, as suggested by Ming Lei.
v4:
- Addressed review comments from Christoph Hellwig:
-- Minor code cleanups
-- Clarified what lo_flags LOOP_SET_STATUS can set and clear, and
made that more explicit in the code (see [10/11])
-- LOOP_CONFIGURE can now also be used to configure the block size
and to explicitly request Direct I/O and read-only mode.
-- Explicitly reject lo_flags we don't know about in LOOP_CONFIGURE
-- Renamed LOOP_SET_FD_AND_STATUS to LOOP_CONFIGURE, since the ioctl
can now do things LOOP_SET_STATUS couldn't do.
v3:
- Addressed review comments from Christoph Hellwig:
-- Factored out loop_validate_size()
-- Split up the largish first patch in a few smaller ones
-- Use set_capacity_revalidate_and_notify()
- Fixed a variable wrongly using size_t instead of loff_t
v2:
- Addressed review comments from Bart van Assche:
-- Use SECTOR_SHIFT constant
-- Renamed loop_set_from_status() to loop_set_status_from_info()
-- Added kerneldoc for loop_set_status_from_info()
-- Removed dots in patch subject lines
- Addressed review comments from Christoph Hellwig:
-- Added missing padding in struct loop_fd_and_status
-- Cleaned up some __user pointer handling in lo_ioctl
-- Pass in a stack-initialized loop_info64 for the legacy
LOOP_SET_FD case
Martijn Coenen (11):
loop: Call loop_config_discard() only after new config is applied
loop: Remove sector_t truncation checks
loop: Factor out setting loop device size
loop: Switch to set_capacity_revalidate_and_notify()
loop: Refactor loop_set_status() size calculation
loop: Remove figure_loop_size()
loop: Factor out configuring loop from status
loop: Move loop_set_status_from_info() and friends up
loop: Rework lo_ioctl() __user argument casting
loop: Clean up LOOP_SET_STATUS lo_flags handling
loop: Add LOOP_CONFIGURE ioctl
drivers/block/loop.c | 381 ++++++++++++++++++++++----------------
include/uapi/linux/loop.h | 31 +++-
2 files changed, 255 insertions(+), 157 deletions(-)
--
2.26.2.645.ge9eca65c58-goog
sector_t is now always u64, so we don't need to check for truncation.
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 21 +++++++--------------
1 file changed, 7 insertions(+), 14 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f1754262fc94..00de7fec0ed5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -228,24 +228,20 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
blk_mq_unfreeze_queue(lo->lo_queue);
}
-static int
+static void
figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
- sector_t x = (sector_t)size;
struct block_device *bdev = lo->lo_device;
- if (unlikely((loff_t)x != size))
- return -EFBIG;
if (lo->lo_offset != offset)
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
- set_capacity(lo->lo_disk, x);
+ set_capacity(lo->lo_disk, size);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
- return 0;
}
static inline int
@@ -1003,10 +999,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo_flags |= LO_FLAGS_READ_ONLY;
- error = -EFBIG;
size = get_loop_size(lo, file);
- if ((loff_t)(sector_t)size != size)
- goto out_unlock;
+
error = loop_prepare_queue(lo);
if (error)
goto out_unlock;
@@ -1328,10 +1322,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
}
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
- err = -EFBIG;
- goto out_unfreeze;
- }
+ figure_loop_size(lo, info->lo_offset, info->lo_sizelimit);
}
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
@@ -1534,7 +1525,9 @@ static int loop_set_capacity(struct loop_device *lo)
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+ figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+
+ return 0;
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
--
2.26.2.645.ge9eca65c58-goog
This was recently added to block/genhd.c, and takes care of both
updating the capacity and notifying userspace of the new size.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e69ff3c19eff..d9a756f8abd5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -240,10 +240,9 @@ static void loop_set_size(struct loop_device *lo, loff_t size)
{
struct block_device *bdev = lo->lo_device;
- set_capacity(lo->lo_disk, size);
bd_set_size(bdev, size << SECTOR_SHIFT);
- /* let user-space know about the new size */
- kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+
+ set_capacity_revalidate_and_notify(lo->lo_disk, size, false);
}
static void
--
2.26.2.645.ge9eca65c58-goog
figure_loop_size() calculates the loop size based on the passed in
parameters, but at the same time it updates the offset and sizelimit
parameters in the loop device configuration. That is a somewhat
unexpected side effect of a function with this name, and it is only only
needed by one of the two callers of this function - loop_set_status().
Move the lo_offset and lo_sizelimit assignment back into loop_set_status(),
and use the newly factored out functions to validate and apply the newly
calculated size. This allows us to get rid of figure_loop_size() in a
follow-up commit.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 37 +++++++++++++++++++------------------
1 file changed, 19 insertions(+), 18 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d9a756f8abd5..c134b3439483 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -250,11 +250,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
- if (lo->lo_offset != offset)
- lo->lo_offset = offset;
- if (lo->lo_sizelimit != sizelimit)
- lo->lo_sizelimit = sizelimit;
-
loop_set_size(lo, size);
}
@@ -1272,6 +1267,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
kuid_t uid = current_uid();
struct block_device *bdev;
bool partscan = false;
+ bool size_changed = false;
err = mutex_lock_killable(&loop_ctl_mutex);
if (err)
@@ -1293,6 +1289,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
+ size_changed = true;
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
}
@@ -1300,6 +1297,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
+ if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
+ /* If any pages were dirtied after kill_bdev(), try again */
+ err = -EAGAIN;
+ pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
+ __func__, lo->lo_number, lo->lo_file_name,
+ lo->lo_device->bd_inode->i_mapping->nrpages);
+ goto out_unfreeze;
+ }
+
err = loop_release_xfer(lo);
if (err)
goto out_unfreeze;
@@ -1323,19 +1329,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (err)
goto out_unfreeze;
- if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit) {
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
- figure_loop_size(lo, info->lo_offset, info->lo_sizelimit);
- }
-
+ lo->lo_offset = info->lo_offset;
+ lo->lo_sizelimit = info->lo_sizelimit;
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
@@ -1359,6 +1354,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
lo->lo_key_owner = uid;
}
+ if (size_changed) {
+ loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
+ lo->lo_backing_file);
+ loop_set_size(lo, new_size);
+ }
+
loop_config_discard(lo);
/* update dio if lo_offset or transfer is changed */
--
2.26.2.645.ge9eca65c58-goog
Factor out this code into a separate function, so it can be reused by
other code more easily.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 117 +++++++++++++++++++++++++------------------
1 file changed, 67 insertions(+), 50 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e281a9f03d96..6a4c0ba225ca 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1251,75 +1251,43 @@ static int loop_clr_fd(struct loop_device *lo)
return __loop_clr_fd(lo, false);
}
+/**
+ * loop_set_status_from_info - configure device from loop_info
+ * @lo: struct loop_device to configure
+ * @info: struct loop_info64 to configure the device with
+ *
+ * Configures the loop device parameters according to the passed
+ * in loop_info64 configuration.
+ */
static int
-loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
+loop_set_status_from_info(struct loop_device *lo,
+ const struct loop_info64 *info)
{
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
- struct block_device *bdev;
- bool partscan = false;
- bool size_changed = false;
-
- err = mutex_lock_killable(&loop_ctl_mutex);
- if (err)
- return err;
- if (lo->lo_encrypt_key_size &&
- !uid_eq(lo->lo_key_owner, uid) &&
- !capable(CAP_SYS_ADMIN)) {
- err = -EPERM;
- goto out_unlock;
- }
- if (lo->lo_state != Lo_bound) {
- err = -ENXIO;
- goto out_unlock;
- }
- if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) {
- err = -EINVAL;
- goto out_unlock;
- }
-
- if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit) {
- size_changed = true;
- sync_blockdev(lo->lo_device);
- kill_bdev(lo->lo_device);
- }
- /* I/O need to be drained during transfer transition */
- blk_mq_freeze_queue(lo->lo_queue);
-
- if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
- /* If any pages were dirtied after kill_bdev(), try again */
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
+ if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
+ return -EINVAL;
err = loop_release_xfer(lo);
if (err)
- goto out_unfreeze;
+ return err;
if (info->lo_encrypt_type) {
unsigned int type = info->lo_encrypt_type;
- if (type >= MAX_LO_CRYPT) {
- err = -EINVAL;
- goto out_unfreeze;
- }
+ if (type >= MAX_LO_CRYPT)
+ return -EINVAL;
xfer = xfer_funcs[type];
- if (xfer == NULL) {
- err = -EINVAL;
- goto out_unfreeze;
- }
+ if (xfer == NULL)
+ return -EINVAL;
} else
xfer = NULL;
err = loop_init_xfer(lo, xfer, info);
if (err)
- goto out_unfreeze;
+ return err;
lo->lo_offset = info->lo_offset;
lo->lo_sizelimit = info->lo_sizelimit;
@@ -1346,6 +1314,55 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
lo->lo_key_owner = uid;
}
+ return 0;
+}
+
+static int
+loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
+{
+ int err;
+ struct block_device *bdev;
+ kuid_t uid = current_uid();
+ bool partscan = false;
+ bool size_changed = false;
+
+ err = mutex_lock_killable(&loop_ctl_mutex);
+ if (err)
+ return err;
+ if (lo->lo_encrypt_key_size &&
+ !uid_eq(lo->lo_key_owner, uid) &&
+ !capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out_unlock;
+ }
+ if (lo->lo_state != Lo_bound) {
+ err = -ENXIO;
+ goto out_unlock;
+ }
+
+ if (lo->lo_offset != info->lo_offset ||
+ lo->lo_sizelimit != info->lo_sizelimit) {
+ size_changed = true;
+ sync_blockdev(lo->lo_device);
+ kill_bdev(lo->lo_device);
+ }
+
+ /* I/O need to be drained during transfer transition */
+ blk_mq_freeze_queue(lo->lo_queue);
+
+ if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
+ /* If any pages were dirtied after kill_bdev(), try again */
+ err = -EAGAIN;
+ pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
+ __func__, lo->lo_number, lo->lo_file_name,
+ lo->lo_device->bd_inode->i_mapping->nrpages);
+ goto out_unfreeze;
+ }
+
+ err = loop_set_status_from_info(lo, info);
+ if (err)
+ goto out_unfreeze;
+
if (size_changed) {
loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
lo->lo_backing_file);
--
2.26.2.645.ge9eca65c58-goog
In preparation for a new ioctl that needs to copy_from_user(); makes the
code easier to read as well.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 4dc11d954169..31f10da4945e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1634,6 +1634,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct loop_device *lo = bdev->bd_disk->private_data;
+ void __user *argp = (void __user *) arg;
int err;
switch (cmd) {
@@ -1646,21 +1647,19 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
case LOOP_SET_STATUS:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
- err = loop_set_status_old(lo,
- (struct loop_info __user *)arg);
+ err = loop_set_status_old(lo, argp);
}
break;
case LOOP_GET_STATUS:
- return loop_get_status_old(lo, (struct loop_info __user *) arg);
+ return loop_get_status_old(lo, argp);
case LOOP_SET_STATUS64:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
- err = loop_set_status64(lo,
- (struct loop_info64 __user *) arg);
+ err = loop_set_status64(lo, argp);
}
break;
case LOOP_GET_STATUS64:
- return loop_get_status64(lo, (struct loop_info64 __user *) arg);
+ return loop_get_status64(lo, argp);
case LOOP_SET_CAPACITY:
case LOOP_SET_DIRECT_IO:
case LOOP_SET_BLOCK_SIZE:
--
2.26.2.645.ge9eca65c58-goog
So we can use it without forward declaration. This is a separate commit
to make it easier to verify that this is just a move, without functional
modifications.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 206 +++++++++++++++++++++----------------------
1 file changed, 103 insertions(+), 103 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 6a4c0ba225ca..4dc11d954169 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -949,6 +949,109 @@ static void loop_update_rotational(struct loop_device *lo)
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
}
+static int
+loop_release_xfer(struct loop_device *lo)
+{
+ int err = 0;
+ struct loop_func_table *xfer = lo->lo_encryption;
+
+ if (xfer) {
+ if (xfer->release)
+ err = xfer->release(lo);
+ lo->transfer = NULL;
+ lo->lo_encryption = NULL;
+ module_put(xfer->owner);
+ }
+ return err;
+}
+
+static int
+loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
+ const struct loop_info64 *i)
+{
+ int err = 0;
+
+ if (xfer) {
+ struct module *owner = xfer->owner;
+
+ if (!try_module_get(owner))
+ return -EINVAL;
+ if (xfer->init)
+ err = xfer->init(lo, i);
+ if (err)
+ module_put(owner);
+ else
+ lo->lo_encryption = xfer;
+ }
+ return err;
+}
+
+/**
+ * loop_set_status_from_info - configure device from loop_info
+ * @lo: struct loop_device to configure
+ * @info: struct loop_info64 to configure the device with
+ *
+ * Configures the loop device parameters according to the passed
+ * in loop_info64 configuration.
+ */
+static int
+loop_set_status_from_info(struct loop_device *lo,
+ const struct loop_info64 *info)
+{
+ int err;
+ struct loop_func_table *xfer;
+ kuid_t uid = current_uid();
+
+ if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
+ return -EINVAL;
+
+ err = loop_release_xfer(lo);
+ if (err)
+ return err;
+
+ if (info->lo_encrypt_type) {
+ unsigned int type = info->lo_encrypt_type;
+
+ if (type >= MAX_LO_CRYPT)
+ return -EINVAL;
+ xfer = xfer_funcs[type];
+ if (xfer == NULL)
+ return -EINVAL;
+ } else
+ xfer = NULL;
+
+ err = loop_init_xfer(lo, xfer, info);
+ if (err)
+ return err;
+
+ lo->lo_offset = info->lo_offset;
+ lo->lo_sizelimit = info->lo_sizelimit;
+ memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
+ memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
+ lo->lo_file_name[LO_NAME_SIZE-1] = 0;
+ lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
+
+ if (!xfer)
+ xfer = &none_funcs;
+ lo->transfer = xfer->transfer;
+ lo->ioctl = xfer->ioctl;
+
+ if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
+ (info->lo_flags & LO_FLAGS_AUTOCLEAR))
+ lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
+
+ lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
+ lo->lo_init[0] = info->lo_init[0];
+ lo->lo_init[1] = info->lo_init[1];
+ if (info->lo_encrypt_key_size) {
+ memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
+ info->lo_encrypt_key_size);
+ lo->lo_key_owner = uid;
+ }
+
+ return 0;
+}
+
static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct block_device *bdev, unsigned int arg)
{
@@ -1070,43 +1173,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
return error;
}
-static int
-loop_release_xfer(struct loop_device *lo)
-{
- int err = 0;
- struct loop_func_table *xfer = lo->lo_encryption;
-
- if (xfer) {
- if (xfer->release)
- err = xfer->release(lo);
- lo->transfer = NULL;
- lo->lo_encryption = NULL;
- module_put(xfer->owner);
- }
- return err;
-}
-
-static int
-loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
- const struct loop_info64 *i)
-{
- int err = 0;
-
- if (xfer) {
- struct module *owner = xfer->owner;
-
- if (!try_module_get(owner))
- return -EINVAL;
- if (xfer->init)
- err = xfer->init(lo, i);
- if (err)
- module_put(owner);
- else
- lo->lo_encryption = xfer;
- }
- return err;
-}
-
static int __loop_clr_fd(struct loop_device *lo, bool release)
{
struct file *filp = NULL;
@@ -1251,72 +1317,6 @@ static int loop_clr_fd(struct loop_device *lo)
return __loop_clr_fd(lo, false);
}
-/**
- * loop_set_status_from_info - configure device from loop_info
- * @lo: struct loop_device to configure
- * @info: struct loop_info64 to configure the device with
- *
- * Configures the loop device parameters according to the passed
- * in loop_info64 configuration.
- */
-static int
-loop_set_status_from_info(struct loop_device *lo,
- const struct loop_info64 *info)
-{
- int err;
- struct loop_func_table *xfer;
- kuid_t uid = current_uid();
-
- if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
- return -EINVAL;
-
- err = loop_release_xfer(lo);
- if (err)
- return err;
-
- if (info->lo_encrypt_type) {
- unsigned int type = info->lo_encrypt_type;
-
- if (type >= MAX_LO_CRYPT)
- return -EINVAL;
- xfer = xfer_funcs[type];
- if (xfer == NULL)
- return -EINVAL;
- } else
- xfer = NULL;
-
- err = loop_init_xfer(lo, xfer, info);
- if (err)
- return err;
-
- lo->lo_offset = info->lo_offset;
- lo->lo_sizelimit = info->lo_sizelimit;
- memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
- memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
- lo->lo_file_name[LO_NAME_SIZE-1] = 0;
- lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
-
- if (!xfer)
- xfer = &none_funcs;
- lo->transfer = xfer->transfer;
- lo->ioctl = xfer->ioctl;
-
- if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
- (info->lo_flags & LO_FLAGS_AUTOCLEAR))
- lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
-
- lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
- lo->lo_init[0] = info->lo_init[0];
- lo->lo_init[1] = info->lo_init[1];
- if (info->lo_encrypt_key_size) {
- memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
- info->lo_encrypt_key_size);
- lo->lo_key_owner = uid;
- }
-
- return 0;
-}
-
static int
loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
{
--
2.26.2.645.ge9eca65c58-goog
This allows userspace to completely setup a loop device with a single
ioctl, removing the in-between state where the device can be partially
configured - eg the loop device has a backing file associated with it,
but is reading from the wrong offset.
Besides removing the intermediate state, another big benefit of this
ioctl is that LOOP_SET_STATUS can be slow; the main reason for this
slowness is that LOOP_SET_STATUS(64) calls blk_mq_freeze_queue() to
freeze the associated queue; this requires waiting for RCU
synchronization, which I've measured can take about 15-20ms on this
device on average.
In addition to doing what LOOP_SET_STATUS can do, LOOP_CONFIGURE can
also be used to:
- Set the correct block size immediately by setting
loop_config.block_size (avoids LOOP_SET_BLOCK_SIZE)
- Explicitly request direct I/O mode by setting LO_FLAGS_DIRECT_IO
in loop_config.info.lo_flags (avoids LOOP_SET_DIRECT_IO)
- Explicitly request read-only mode by setting LO_FLAGS_READ_ONLY
in loop_config.info.lo_flags
Here's setting up ~70 regular loop devices with an offset on an x86
Android device, using LOOP_SET_FD and LOOP_SET_STATUS:
vsoc_x86:/system/apex # time for i in `seq 30 100`;
do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done
0m03.40s real 0m00.02s user 0m00.03s system
Here's configuring ~70 devices in the same way, but using a modified
losetup that uses the new LOOP_CONFIGURE ioctl:
vsoc_x86:/system/apex # time for i in `seq 30 100`;
do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done
0m01.94s real 0m00.01s user 0m00.01s system
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 104 ++++++++++++++++++++++++++++----------
include/uapi/linux/loop.h | 21 ++++++++
2 files changed, 97 insertions(+), 28 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 13518ba191f5..a565c5aafa52 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -228,6 +228,19 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
blk_mq_unfreeze_queue(lo->lo_queue);
}
+/**
+ * loop_validate_block_size() - validates the passed in block size
+ * @bsize: size to validate
+ */
+static int
+loop_validate_block_size(unsigned short bsize)
+{
+ if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* loop_set_size() - sets device size and notifies userspace
* @lo: struct loop_device to set the size for
@@ -1050,23 +1063,24 @@ loop_set_status_from_info(struct loop_device *lo,
return 0;
}
-static int loop_set_fd(struct loop_device *lo, fmode_t mode,
- struct block_device *bdev, unsigned int arg)
+static int loop_configure(struct loop_device *lo, fmode_t mode,
+ struct block_device *bdev,
+ const struct loop_config *config)
{
struct file *file;
struct inode *inode;
struct address_space *mapping;
struct block_device *claimed_bdev = NULL;
- int lo_flags = 0;
int error;
loff_t size;
bool partscan;
+ unsigned short bsize;
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
error = -EBADF;
- file = fget(arg);
+ file = fget(config->fd);
if (!file)
goto out;
@@ -1075,7 +1089,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
* here to avoid changing device under exclusive owner.
*/
if (!(mode & FMODE_EXCL)) {
- claimed_bdev = bd_start_claiming(bdev, loop_set_fd);
+ claimed_bdev = bd_start_claiming(bdev, loop_configure);
if (IS_ERR(claimed_bdev)) {
error = PTR_ERR(claimed_bdev);
goto out_putf;
@@ -1097,11 +1111,26 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
mapping = file->f_mapping;
inode = mapping->host;
+ size = get_loop_size(lo, file);
+
+ if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (config->block_size) {
+ error = loop_validate_block_size(config->block_size);
+ if (error)
+ goto out_unlock;
+ }
+
+ error = loop_set_status_from_info(lo, &config->info);
+ if (error)
+ goto out_unlock;
+
if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
!file->f_op->write_iter)
- lo_flags |= LO_FLAGS_READ_ONLY;
-
- size = get_loop_size(lo, file);
+ lo->lo_flags |= LO_FLAGS_READ_ONLY;
error = loop_prepare_queue(lo);
if (error)
@@ -1109,30 +1138,28 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
error = 0;
- set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
+ set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
- lo->use_dio = false;
+ lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
- lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
- lo->transfer = NULL;
- lo->ioctl = NULL;
- lo->lo_sizelimit = 0;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
- if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
+ if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_write_cache(lo->lo_queue, true, false);
- if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) {
+ if (config->block_size)
+ bsize = config->block_size;
+ else if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev)
/* In case of direct I/O, match underlying block size */
- unsigned short bsize = bdev_logical_block_size(
- inode->i_sb->s_bdev);
+ bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
+ else
+ bsize = 512;
- blk_queue_logical_block_size(lo->lo_queue, bsize);
- blk_queue_physical_block_size(lo->lo_queue, bsize);
- blk_queue_io_min(lo->lo_queue, bsize);
- }
+ blk_queue_logical_block_size(lo->lo_queue, bsize);
+ blk_queue_physical_block_size(lo->lo_queue, bsize);
+ blk_queue_io_min(lo->lo_queue, bsize);
loop_update_rotational(lo);
loop_update_dio(lo);
@@ -1155,14 +1182,14 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (partscan)
loop_reread_partitions(lo, bdev);
if (claimed_bdev)
- bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
+ bd_abort_claiming(bdev, claimed_bdev, loop_configure);
return 0;
out_unlock:
mutex_unlock(&loop_ctl_mutex);
out_bdev:
if (claimed_bdev)
- bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
+ bd_abort_claiming(bdev, claimed_bdev, loop_configure);
out_putf:
fput(file);
out:
@@ -1582,8 +1609,9 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
if (lo->lo_state != Lo_bound)
return -ENXIO;
- if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
- return -EINVAL;
+ err = loop_validate_block_size(arg);
+ if (err)
+ return err;
if (lo->lo_queue->limits.logical_block_size == arg)
return 0;
@@ -1645,8 +1673,27 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
int err;
switch (cmd) {
- case LOOP_SET_FD:
- return loop_set_fd(lo, mode, bdev, arg);
+ case LOOP_SET_FD: {
+ /*
+ * Legacy case - pass in a zeroed out struct loop_config with
+ * only the file descriptor set , which corresponds with the
+ * default parameters we'd have used otherwise.
+ */
+ struct loop_config config;
+
+ memset(&config, 0, sizeof(config));
+ config.fd = arg;
+
+ return loop_configure(lo, mode, bdev, &config);
+ }
+ case LOOP_CONFIGURE: {
+ struct loop_config config;
+
+ if (copy_from_user(&config, argp, sizeof(config)))
+ return -EFAULT;
+
+ return loop_configure(lo, mode, bdev, &config);
+ }
case LOOP_CHANGE_FD:
return loop_change_fd(lo, bdev, arg);
case LOOP_CLR_FD:
@@ -1818,6 +1865,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
case LOOP_CLR_FD:
case LOOP_GET_STATUS64:
case LOOP_SET_STATUS64:
+ case LOOP_CONFIGURE:
arg = (unsigned long) compat_ptr(arg);
/* fall through */
case LOOP_SET_FD:
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index 6b32fee80ce0..24a1c45bd1ae 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -31,6 +31,10 @@ enum {
/* LO_FLAGS that can be cleared using LOOP_SET_STATUS(64) */
#define LOOP_SET_STATUS_CLEARABLE_FLAGS (LO_FLAGS_AUTOCLEAR)
+/* LO_FLAGS that can be set using LOOP_CONFIGURE */
+#define LOOP_CONFIGURE_SETTABLE_FLAGS (LO_FLAGS_READ_ONLY | LO_FLAGS_AUTOCLEAR \
+ | LO_FLAGS_PARTSCAN | LO_FLAGS_DIRECT_IO)
+
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
#include <linux/types.h> /* for __u64 */
@@ -66,6 +70,22 @@ struct loop_info64 {
__u64 lo_init[2];
};
+/**
+ * struct loop_config - Complete configuration for a loop device.
+ * @fd: fd of the file to be used as a backing file for the loop device.
+ * @block_size: block size to use; ignored if 0.
+ * @info: struct loop_info64 to configure the loop device with.
+ *
+ * This structure is used with the LOOP_CONFIGURE ioctl, and can be used to
+ * atomically setup and configure all loop device parameters at once.
+ */
+struct loop_config {
+ __u32 fd;
+ __u32 block_size;
+ struct loop_info64 info;
+ __u64 __reserved[8];
+};
+
/*
* Loop filter types
*/
@@ -96,6 +116,7 @@ struct loop_info64 {
#define LOOP_SET_CAPACITY 0x4C07
#define LOOP_SET_DIRECT_IO 0x4C08
#define LOOP_SET_BLOCK_SIZE 0x4C09
+#define LOOP_CONFIGURE 0x4C0A
/* /dev/loop-control interface */
#define LOOP_CTL_ADD 0x4C80
--
2.26.2.645.ge9eca65c58-goog
loop_set_status() calls loop_config_discard() to configure discard for
the loop device; however, the discard configuration depends on whether
the loop device uses encryption, and when we call it the encryption
configuration has not been updated yet. Move the call down so we apply
the correct discard configuration based on the new configuration.
Reviewed-by: Christoph Hellwig <[email protected]>
Reviewed-by: Bob Liu <[email protected]>
Reviewed-by: Bart Van Assche <[email protected]>
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index da693e6a834e..f1754262fc94 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1334,8 +1334,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
}
}
- loop_config_discard(lo);
-
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
@@ -1359,6 +1357,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
lo->lo_key_owner = uid;
}
+ loop_config_discard(lo);
+
/* update dio if lo_offset or transfer is changed */
__loop_update_dio(lo, lo->use_dio);
--
2.26.2.645.ge9eca65c58-goog
This function was now only used by loop_set_capacity(). Just open code
the remaining code in the caller instead.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c134b3439483..e281a9f03d96 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -245,14 +245,6 @@ static void loop_set_size(struct loop_device *lo, loff_t size)
set_capacity_revalidate_and_notify(lo->lo_disk, size, false);
}
-static void
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
-{
- loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
-
- loop_set_size(lo, size);
-}
-
static inline int
lo_do_transfer(struct loop_device *lo, int cmd,
struct page *rpage, unsigned roffs,
@@ -1534,10 +1526,13 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
static int loop_set_capacity(struct loop_device *lo)
{
+ loff_t size;
+
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+ size = get_loop_size(lo, lo->lo_backing_file);
+ loop_set_size(lo, size);
return 0;
}
--
2.26.2.645.ge9eca65c58-goog
LOOP_SET_STATUS(64) will actually allow some lo_flags to be modified; in
particular, LO_FLAGS_AUTOCLEAR can be set and cleared, whereas
LO_FLAGS_PARTSCAN can be set to request a partition scan. Make this
explicit by updating the UAPI to include the flags that can be
set/cleared using this ioctl.
The implementation can then blindly take over the passed in flags,
and use the previous flags for those flags that can't be set / cleared
using LOOP_SET_STATUS.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 19 +++++++++++++------
include/uapi/linux/loop.h | 10 ++++++++--
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 31f10da4945e..13518ba191f5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1036,9 +1036,7 @@ loop_set_status_from_info(struct loop_device *lo,
lo->transfer = xfer->transfer;
lo->ioctl = xfer->ioctl;
- if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
- (info->lo_flags & LO_FLAGS_AUTOCLEAR))
- lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
+ lo->lo_flags = info->lo_flags;
lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
lo->lo_init[0] = info->lo_init[0];
@@ -1323,6 +1321,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
struct block_device *bdev;
kuid_t uid = current_uid();
+ int prev_lo_flags;
bool partscan = false;
bool size_changed = false;
@@ -1359,10 +1358,19 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
goto out_unfreeze;
}
+ prev_lo_flags = lo->lo_flags;
+
err = loop_set_status_from_info(lo, info);
if (err)
goto out_unfreeze;
+ /* Mask out flags that can't be set using LOOP_SET_STATUS. */
+ lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS;
+ /* For those flags, use the previous values instead */
+ lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS;
+ /* For flags that can't be cleared, use previous values too */
+ lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_CLEARABLE_FLAGS;
+
if (size_changed) {
loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
lo->lo_backing_file);
@@ -1377,9 +1385,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
- if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
- !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
- lo->lo_flags |= LO_FLAGS_PARTSCAN;
+ if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) &&
+ !(prev_lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
bdev = lo->lo_device;
partscan = true;
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index 080a8df134ef..6b32fee80ce0 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -25,6 +25,12 @@ enum {
LO_FLAGS_DIRECT_IO = 16,
};
+/* LO_FLAGS that can be set using LOOP_SET_STATUS(64) */
+#define LOOP_SET_STATUS_SETTABLE_FLAGS (LO_FLAGS_AUTOCLEAR | LO_FLAGS_PARTSCAN)
+
+/* LO_FLAGS that can be cleared using LOOP_SET_STATUS(64) */
+#define LOOP_SET_STATUS_CLEARABLE_FLAGS (LO_FLAGS_AUTOCLEAR)
+
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
#include <linux/types.h> /* for __u64 */
@@ -37,7 +43,7 @@ struct loop_info {
int lo_offset;
int lo_encrypt_type;
int lo_encrypt_key_size; /* ioctl w/o */
- int lo_flags; /* ioctl r/o */
+ int lo_flags;
char lo_name[LO_NAME_SIZE];
unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
unsigned long lo_init[2];
@@ -53,7 +59,7 @@ struct loop_info64 {
__u32 lo_number; /* ioctl r/o */
__u32 lo_encrypt_type;
__u32 lo_encrypt_key_size; /* ioctl w/o */
- __u32 lo_flags; /* ioctl r/o */
+ __u32 lo_flags;
__u8 lo_file_name[LO_NAME_SIZE];
__u8 lo_crypt_name[LO_NAME_SIZE];
__u8 lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
--
2.26.2.645.ge9eca65c58-goog
On Wed, May 13, 2020 at 03:38:36PM +0200, Martijn Coenen wrote:
> sector_t is now always u64, so we don't need to check for truncation.
>
> Signed-off-by: Martijn Coenen <[email protected]>
Looks good:
Reviewed-by: Christoph Hellwig <[email protected]>
This code is used repeatedly.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Martijn Coenen <[email protected]>
---
drivers/block/loop.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 00de7fec0ed5..e69ff3c19eff 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -228,20 +228,35 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
blk_mq_unfreeze_queue(lo->lo_queue);
}
+/**
+ * loop_set_size() - sets device size and notifies userspace
+ * @lo: struct loop_device to set the size for
+ * @size: new size of the loop device
+ *
+ * Callers must validate that the size passed into this function fits into
+ * a sector_t, eg using loop_validate_size()
+ */
+static void loop_set_size(struct loop_device *lo, loff_t size)
+{
+ struct block_device *bdev = lo->lo_device;
+
+ set_capacity(lo->lo_disk, size);
+ bd_set_size(bdev, size << SECTOR_SHIFT);
+ /* let user-space know about the new size */
+ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+}
+
static void
figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
- struct block_device *bdev = lo->lo_device;
if (lo->lo_offset != offset)
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
- set_capacity(lo->lo_disk, size);
- bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
- /* let user-space know about the new size */
- kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+
+ loop_set_size(lo, size);
}
static inline int
@@ -1034,11 +1049,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
loop_update_rotational(lo);
loop_update_dio(lo);
- set_capacity(lo->lo_disk, size);
- bd_set_size(bdev, size << 9);
loop_sysfs_init(lo);
- /* let user-space know about the new size */
- kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+ loop_set_size(lo, size);
set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
block_size(inode->i_bdev) : PAGE_SIZE);
--
2.26.2.645.ge9eca65c58-goog
On 5/13/20 7:38 AM, Martijn Coenen wrote:
> This series introduces a new ioctl that makes it possible to atomically
> configure a loop device. Previously, if you wanted to set parameters
> such as the offset on a loop device, this required calling LOOP_SET_FD
> to set the backing file, and then LOOP_SET_STATUS to set the offset.
> However, in between these two calls, the loop device is available and
> would accept requests, which is generally not desirable. Similar issues
> exist around setting the block size (LOOP_SET_BLOCK_SIZE) and requesting
> direct I/O mode (LOOP_SET_DIRECT_IO).
>
> There are also performance benefits with combining these ioctls into
> one, which are described in more detail in the last change in the
> series.
Thanks, applied for 5.8.
--
Jens Axboe
On 13.05.2020 15.38, Martijn Coenen wrote:
> This allows userspace to completely setup a loop device with a single
> ioctl, removing the in-between state where the device can be partially
> configured - eg the loop device has a backing file associated with it,
> but is reading from the wrong offset.
>
> Besides removing the intermediate state, another big benefit of this
> ioctl is that LOOP_SET_STATUS can be slow; the main reason for this
> slowness is that LOOP_SET_STATUS(64) calls blk_mq_freeze_queue() to
> freeze the associated queue; this requires waiting for RCU
> synchronization, which I've measured can take about 15-20ms on this
> device on average.
>
> In addition to doing what LOOP_SET_STATUS can do, LOOP_CONFIGURE can
> also be used to:
> - Set the correct block size immediately by setting
> loop_config.block_size (avoids LOOP_SET_BLOCK_SIZE)
> - Explicitly request direct I/O mode by setting LO_FLAGS_DIRECT_IO
> in loop_config.info.lo_flags (avoids LOOP_SET_DIRECT_IO)
> - Explicitly request read-only mode by setting LO_FLAGS_READ_ONLY
> in loop_config.info.lo_flags
>
> Here's setting up ~70 regular loop devices with an offset on an x86
> Android device, using LOOP_SET_FD and LOOP_SET_STATUS:
>
> vsoc_x86:/system/apex # time for i in `seq 30 100`;
> do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done
> 0m03.40s real 0m00.02s user 0m00.03s system
>
> Here's configuring ~70 devices in the same way, but using a modified
> losetup that uses the new LOOP_CONFIGURE ioctl:
>
> vsoc_x86:/system/apex # time for i in `seq 30 100`;
> do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done
> 0m01.94s real 0m00.01s user 0m00.01s system
>
> Reviewed-by: Christoph Hellwig <[email protected]>
> Signed-off-by: Martijn Coenen <[email protected]>
> ---
> drivers/block/loop.c | 104 ++++++++++++++++++++++++++++----------
> include/uapi/linux/loop.h | 21 ++++++++
> 2 files changed, 97 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 13518ba191f5..a565c5aafa52 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -228,6 +228,19 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
> blk_mq_unfreeze_queue(lo->lo_queue);
> }
>
> +/**
> + * loop_validate_block_size() - validates the passed in block size
> + * @bsize: size to validate
> + */
> +static int
> +loop_validate_block_size(unsigned short bsize)
> +{
> + if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> /**
> * loop_set_size() - sets device size and notifies userspace
> * @lo: struct loop_device to set the size for
> @@ -1050,23 +1063,24 @@ loop_set_status_from_info(struct loop_device *lo,
> return 0;
> }
>
> -static int loop_set_fd(struct loop_device *lo, fmode_t mode,
> - struct block_device *bdev, unsigned int arg)
> +static int loop_configure(struct loop_device *lo, fmode_t mode,
> + struct block_device *bdev,
> + const struct loop_config *config)
> {
> struct file *file;
> struct inode *inode;
> struct address_space *mapping;
> struct block_device *claimed_bdev = NULL;
> - int lo_flags = 0;
> int error;
> loff_t size;
> bool partscan;
> + unsigned short bsize;
>
> /* This is safe, since we have a reference from open(). */
> __module_get(THIS_MODULE);
>
> error = -EBADF;
> - file = fget(arg);
> + file = fget(config->fd);
> if (!file)
> goto out;
>
> @@ -1075,7 +1089,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
> * here to avoid changing device under exclusive owner.
> */
> if (!(mode & FMODE_EXCL)) {
> - claimed_bdev = bd_start_claiming(bdev, loop_set_fd);
> + claimed_bdev = bd_start_claiming(bdev, loop_configure);
> if (IS_ERR(claimed_bdev)) {
> error = PTR_ERR(claimed_bdev);
> goto out_putf;
> @@ -1097,11 +1111,26 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
> mapping = file->f_mapping;
> inode = mapping->host;
>
> + size = get_loop_size(lo, file);
> +
> + if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
> + error = -EINVAL;
> + goto out_unlock;
> + }
> +
> + if (config->block_size) {
> + error = loop_validate_block_size(config->block_size);
> + if (error)
> + goto out_unlock;
> + }
> +
> + error = loop_set_status_from_info(lo, &config->info);
loop_set_status_from_info() doesn't call loop_config_discard() nor does
loop_configure(), I assume it should be called?
I'm asking because I upgraded to util-linux 2.37 which use the new
LOOP_CONFIGURE ioctl and fstrim is complaining that "the discard
operation is not supported" and the missing call of
loop_config_discard() stood out. Sadly I haven't been able to reproduce
the issue reliably outside CI (yet).
> + if (error)
> + goto out_unlock;
> +
> if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
> !file->f_op->write_iter)
> - lo_flags |= LO_FLAGS_READ_ONLY;
> -
> - size = get_loop_size(lo, file);
> + lo->lo_flags |= LO_FLAGS_READ_ONLY;
>
> error = loop_prepare_queue(lo);
> if (error)
>