Currently the loop driver just simulates 512-byte blocks. When
creating bootable images on virtual machines it might be required
to use a different physical blocksize (eg 4k for S/390 DASD), as
the some bootloaders (like lilo or zipl for S/390) need to know
the physical block addresses of the kernel and initrd.
This patchset extends the current LOOP_SET_STATUS64 ioctl to
set the logical and physical blocksize by re-using the existing
'init' fields, which are currently unused.
As usual, comments and reviews are welcome.
Changes to v1:
- Move LO_FLAGS_BLOCKSIZE definition
- Reshuffle patches
Changes to v2:
- Drop lo_flags setting in loop_set_fd as suggested by Ming Lei
Hannes Reinecke (4):
loop: Enable correct physical blocksize
loop: Remove unused 'bdev' argument from loop_set_capacity
loop: Add 'lo_logical_blocksize'
loop: Pass logical blocksize in 'lo_init[0]' ioctl field
drivers/block/loop.c | 41 ++++++++++++++++++++++++++++++++++-------
drivers/block/loop.h | 1 +
include/uapi/linux/loop.h | 1 +
3 files changed, 36 insertions(+), 7 deletions(-)
--
1.8.5.6
When running on files the physical blocksize is actually 4k,
so we should be announcing it as such. This is enabled with
a new LO_FLAGS_BLOCKSIZE flag value to the existing
loop_set_status ioctl.
Signed-off-by: Hannes Reinecke <[email protected]>
---
drivers/block/loop.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 423f4ca..e790487 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -233,6 +233,8 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
+ if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
+ blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
@@ -1092,6 +1094,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
+ int lo_flags = lo->lo_flags;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
@@ -1121,8 +1124,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (err)
return err;
+ if (info->lo_flags & LO_FLAGS_BLOCKSIZE)
+ lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
+
if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit)
+ lo->lo_sizelimit != info->lo_sizelimit ||
+ lo->lo_flags != lo_flags)
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit))
return -EFBIG;
--
1.8.5.6
Signed-off-by: Hannes Reinecke <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
---
drivers/block/loop.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e790487..7272055 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1310,7 +1310,7 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
return err;
}
-static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
+static int loop_set_capacity(struct loop_device *lo)
{
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
@@ -1373,7 +1373,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
case LOOP_SET_CAPACITY:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
- err = loop_set_capacity(lo, bdev);
+ err = loop_set_capacity(lo);
break;
case LOOP_SET_DIRECT_IO:
err = -EPERM;
--
1.8.5.6
Add a new field 'lo_logical_blocksize' to hold the logical
blocksize of the loop device.
Signed-off-by: Hannes Reinecke <[email protected]>
---
drivers/block/loop.c | 9 +++++++--
drivers/block/loop.h | 1 +
include/uapi/linux/loop.h | 1 +
3 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 7272055..d0b8754 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -233,8 +233,11 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
- if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
+ if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
+ blk_queue_logical_block_size(lo->lo_queue,
+ lo->lo_logical_blocksize);
+ }
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
@@ -821,6 +824,7 @@ static void loop_config_discard(struct loop_device *lo)
struct file *file = lo->lo_backing_file;
struct inode *inode = file->f_mapping->host;
struct request_queue *q = lo->lo_queue;
+ int lo_bits = blksize_bits(lo->lo_logical_blocksize);
/*
* We use punch hole to reclaim the free space used by the
@@ -840,7 +844,7 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
+ blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
q->limits.discard_zeroes_data = 1;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
@@ -929,6 +933,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
lo->use_dio = false;
lo->lo_blocksize = lo_blocksize;
+ lo->lo_logical_blocksize = 512;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index fb2237c..579f2f7 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -49,6 +49,7 @@ struct loop_device {
struct file * lo_backing_file;
struct block_device *lo_device;
unsigned lo_blocksize;
+ unsigned lo_logical_blocksize;
void *key_data;
gfp_t old_gfp_mask;
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index c8125ec..2691c1c 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -22,6 +22,7 @@ enum {
LO_FLAGS_AUTOCLEAR = 4,
LO_FLAGS_PARTSCAN = 8,
LO_FLAGS_DIRECT_IO = 16,
+ LO_FLAGS_BLOCKSIZE = 32,
};
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
--
1.8.5.6
The current LOOP_SET_STATUS64 ioctl has two unused fields
'init[2]', which can be used in conjunction with the
LO_FLAGS_BLOCKSIZE flag to pass in the new logical blocksize.
Signed-off-by: Hannes Reinecke <[email protected]>
---
drivers/block/loop.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d0b8754..6723e5e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -221,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
}
static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
+ loff_t logical_blocksize)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
sector_t x = (sector_t)size;
@@ -234,6 +235,8 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
+ if (lo->lo_logical_blocksize != logical_blocksize)
+ lo->lo_logical_blocksize = logical_blocksize;
blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
blk_queue_logical_block_size(lo->lo_queue,
lo->lo_logical_blocksize);
@@ -1129,13 +1132,24 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (err)
return err;
- if (info->lo_flags & LO_FLAGS_BLOCKSIZE)
+ if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
+ if ((info->lo_init[0] != 512) &&
+ (info->lo_init[0] != 1024) &&
+ (info->lo_init[0] != 2048) &&
+ (info->lo_init[0] != 4096))
+ return -EINVAL;
+ if (info->lo_init[0] > lo->lo_blocksize)
+ return -EINVAL;
+ }
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit ||
- lo->lo_flags != lo_flags)
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit))
+ lo->lo_flags != lo_flags ||
+ ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
+ (lo->lo_logical_blocksize != info->lo_init[0])))
+ if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
+ info->lo_init[0]))
return -EFBIG;
loop_config_discard(lo);
@@ -1320,7 +1334,8 @@ static int loop_set_capacity(struct loop_device *lo)
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+ return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
+ lo->lo_logical_blocksize);
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
--
1.8.5.6
Hi Hannes,
Hannes Reinecke <[email protected]> writes:
> When running on files the physical blocksize is actually 4k,
How did you come to that conclusion? Are you basing it on the file
system block size? If so, that's configurable at mkfs time and can be
anything from 512 bytes to 64k on current in-tree file systems that I
know of (depending on platform, of course).
The main use for physical block size, as I understand it, is to allow
partitioning utilities to place partitions on physical block boundaries
of the underlying storage. The benefit of that is to avoid
read-modify-writes for I/O which is naturally sized and aligned. If we
carry that forward to loop, then I think it does makes sense to key off
of the file system block size, but the fact remains that 4k is not
universal.
So, I think the idea is sound, but you should be setting the physical
block size to sb->s_blocksize. And I don't see any reason why we
wouldn't do this by default, do you?
If you end up reposting this patch, would you mind including more of
this rationale in your commit message?
Thanks!
Jeff
Hannes Reinecke <[email protected]> writes:
> Signed-off-by: Hannes Reinecke <[email protected]>
> Reviewed-by: Christoph Hellwig <[email protected]>
Introduced by the code refactoring in 7b0576a3d835b4d46ed85d817ce016e90bf293a3.
Reviewed-by: Jeff Moyer <[email protected]>
> ---
> drivers/block/loop.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index e790487..7272055 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1310,7 +1310,7 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
> return err;
> }
>
> -static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
> +static int loop_set_capacity(struct loop_device *lo)
> {
> if (unlikely(lo->lo_state != Lo_bound))
> return -ENXIO;
> @@ -1373,7 +1373,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
> case LOOP_SET_CAPACITY:
> err = -EPERM;
> if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
> - err = loop_set_capacity(lo, bdev);
> + err = loop_set_capacity(lo);
> break;
> case LOOP_SET_DIRECT_IO:
> err = -EPERM;
Hi Hannes,
Hannes Reinecke <[email protected]> writes:
> The current LOOP_SET_STATUS64 ioctl has two unused fields
> 'init[2]', which can be used in conjunction with the
> LO_FLAGS_BLOCKSIZE flag to pass in the new logical blocksize.
I don't see a reason to set LO_FLAGS_BLOCKSIZE inside of the
loop_device->lo_flags. It's not a flag that gets toggled; rather, it's a
flag that indicates that we're attempting to change the block size. The
block size is the persistent state. To further clarify, I'm okay with
passing it in info->lo_flags, I just don't like that it gets also set in
the struct loop_device. I also think that you should spell out
logical_block_size, instead of having physical_block_size and
block_size. It's just easier to read, in my opinion.
If you take my suggestion to set the physical block size automatically,
I think that will clean things up some, too. I'm looking forward to v2.
And just FYI, nothing looked horrible in patch 3/4, but I'm not going
to ack it until I see the v2 posting, since I think it will change at
least a little.
Cheers,
Jeff
> Signed-off-by: Hannes Reinecke <[email protected]>
> ---
> drivers/block/loop.c | 25 ++++++++++++++++++++-----
> 1 file changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index d0b8754..6723e5e 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -221,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
> }
>
> static int
> -figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
> +figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
> + loff_t logical_blocksize)
> {
> loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
> sector_t x = (sector_t)size;
> @@ -234,6 +235,8 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
> if (lo->lo_sizelimit != sizelimit)
> lo->lo_sizelimit = sizelimit;
> if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
> + if (lo->lo_logical_blocksize != logical_blocksize)
> + lo->lo_logical_blocksize = logical_blocksize;
> blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
> blk_queue_logical_block_size(lo->lo_queue,
> lo->lo_logical_blocksize);
> @@ -1129,13 +1132,24 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> if (err)
> return err;
>
> - if (info->lo_flags & LO_FLAGS_BLOCKSIZE)
> + if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
> lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
> + if ((info->lo_init[0] != 512) &&
> + (info->lo_init[0] != 1024) &&
> + (info->lo_init[0] != 2048) &&
> + (info->lo_init[0] != 4096))
> + return -EINVAL;
> + if (info->lo_init[0] > lo->lo_blocksize)
> + return -EINVAL;
> + }
>
> if (lo->lo_offset != info->lo_offset ||
> lo->lo_sizelimit != info->lo_sizelimit ||
> - lo->lo_flags != lo_flags)
> - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit))
> + lo->lo_flags != lo_flags ||
> + ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
> + (lo->lo_logical_blocksize != info->lo_init[0])))
> + if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
> + info->lo_init[0]))
> return -EFBIG;
>
> loop_config_discard(lo);
> @@ -1320,7 +1334,8 @@ static int loop_set_capacity(struct loop_device *lo)
> if (unlikely(lo->lo_state != Lo_bound))
> return -ENXIO;
>
> - return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
> + return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
> + lo->lo_logical_blocksize);
> }
>
> static int loop_set_dio(struct loop_device *lo, unsigned long arg)
On Tue, Nov 10, 2015 at 4:13 PM, Hannes Reinecke <[email protected]> wrote:
> When running on files the physical blocksize is actually 4k,
> so we should be announcing it as such. This is enabled with
> a new LO_FLAGS_BLOCKSIZE flag value to the existing
> loop_set_status ioctl.
LO_FLAGS_BLOCKSIZE is defined in patch 3/4, and you use
it too early in patch 1/4.
>
> Signed-off-by: Hannes Reinecke <[email protected]>
> ---
> drivers/block/loop.c | 9 ++++++++-
> 1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 423f4ca..e790487 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -233,6 +233,8 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
> lo->lo_offset = offset;
> if (lo->lo_sizelimit != sizelimit)
> lo->lo_sizelimit = sizelimit;
> + if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
> + blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
> set_capacity(lo->lo_disk, x);
> bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
> /* let user-space know about the new size */
> @@ -1092,6 +1094,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> int err;
> struct loop_func_table *xfer;
> kuid_t uid = current_uid();
> + int lo_flags = lo->lo_flags;
>
> if (lo->lo_encrypt_key_size &&
> !uid_eq(lo->lo_key_owner, uid) &&
> @@ -1121,8 +1124,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> if (err)
> return err;
>
> + if (info->lo_flags & LO_FLAGS_BLOCKSIZE)
> + lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
> +
> if (lo->lo_offset != info->lo_offset ||
> - lo->lo_sizelimit != info->lo_sizelimit)
> + lo->lo_sizelimit != info->lo_sizelimit ||
> + lo->lo_flags != lo_flags)
> if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit))
> return -EFBIG;
>
> --
> 1.8.5.6
>
--
Ming Lei
On 11/13/2015 09:57 PM, Jeff Moyer wrote:
> Hi Hannes,
>
> Hannes Reinecke <[email protected]> writes:
>
>> When running on files the physical blocksize is actually 4k,
>
> How did you come to that conclusion? Are you basing it on the file
> system block size? If so, that's configurable at mkfs time and can be
> anything from 512 bytes to 64k on current in-tree file systems that I
> know of (depending on platform, of course).
>
loop.c does this (in do_loop_switch()):
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
lo->lo_backing_file = file;
lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
So either it's a block device, then we're taking the blocksize of
the underlying device, or we're using PAGE_SIZE.
Which is architecture dependent, of course.
> The main use for physical block size, as I understand it, is to allow
> partitioning utilities to place partitions on physical block boundaries
> of the underlying storage. The benefit of that is to avoid
> read-modify-writes for I/O which is naturally sized and aligned. If we
> carry that forward to loop, then I think it does makes sense to key off
> of the file system block size, but the fact remains that 4k is not
> universal.
>
The main point here is that some utilities (eg bootloaders) need to
know the _physical_ location of a particular blob, for which it
needs to know the physical blocksize.
> So, I think the idea is sound, but you should be setting the physical
> block size to sb->s_blocksize. And I don't see any reason why we
> wouldn't do this by default, do you?
>
Neither do I. But the code doesn't treat it that way, so I elected
to stay with the current version.
> If you end up reposting this patch, would you mind including more of
> this rationale in your commit message?
>
Sure.
Cheers,
Hannes
--
Dr. Hannes Reinecke zSeries & Storage
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: F. Imend?rffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG N?rnberg)