Darrick,
After reading the comments from you, Dave Chinner, and Dan, it looks like
the dyanmic S_DAX flag support won't be coming or not any time soon at the
least. Here are the the collection of patches so far to address yours and
Dave C's comments for minimal support. Please let me know what else I am
missing. Thanks!
v6:
- Removed excess () per Christoph comment.
v5:
- Removed sb parameter for bdev_dax_supported() since we only use it for
debug output per Christoph comment.
v4:
- Removed setting of error return in ext2 and ext4 per Ross's comments
- Rebased against 4.16-rc1 with updates
---
Darrick J. Wong (1):
fs: allow per-device dax status checking for filesystems
Dave Jiang (2):
dax: change bdev_dax_supported() to support boolean returns
xfs: reject removal of realtime flag when datadev doesn't support DAX
drivers/dax/super.c | 44 ++++++++++++++++++++++----------------------
fs/ext2/super.c | 3 +--
fs/ext4/super.c | 3 +--
fs/xfs/xfs_ioctl.c | 17 ++++++++++++++++-
fs/xfs/xfs_iops.c | 30 +++++++++++++++++++++++++-----
fs/xfs/xfs_super.c | 10 ++++++++--
include/linux/dax.h | 12 ++++--------
7 files changed, 77 insertions(+), 42 deletions(-)
--
From: Darrick J. Wong <[email protected]>
Remove __bdev_dax_supported and change to bdev_dax_supported that takes a
bdev parameter. This enables multi-device filesystems like xfs to check
that a dax device can work for the particular filesystem. Once that's
in place, actually fix all the parts of XFS where we need to be able to
distinguish between datadev and rtdev.
This patch fixes the problem where we screw up the dax support checking
in xfs if the datadev and rtdev have different dax capabilities.
Signed-off-by: Darrick J. Wong <[email protected]>
Signed-off-by: Dave Jiang <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
---
drivers/dax/super.c | 30 +++++++++++++++---------------
fs/ext2/super.c | 2 +-
fs/ext4/super.c | 2 +-
fs/xfs/xfs_ioctl.c | 3 ++-
fs/xfs/xfs_iops.c | 30 +++++++++++++++++++++++++-----
fs/xfs/xfs_super.c | 10 ++++++++--
include/linux/dax.h | 10 +++-------
7 files changed, 55 insertions(+), 32 deletions(-)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 473af694ad1c..f037458600aa 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -73,8 +73,8 @@ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
#endif
/**
- * __bdev_dax_supported() - Check if the device supports dax for filesystem
- * @sb: The superblock of the device
+ * bdev_dax_supported() - Check if the device supports dax for filesystem
+ * @bdev: block device to check
* @blocksize: The block size of the device
*
* This is a library function for filesystems to check if the block device
@@ -82,33 +82,33 @@ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
*
* Return: negative errno if unsupported, 0 if supported.
*/
-int __bdev_dax_supported(struct super_block *sb, int blocksize)
+int bdev_dax_supported(struct block_device *bdev, int blocksize)
{
- struct block_device *bdev = sb->s_bdev;
struct dax_device *dax_dev;
pgoff_t pgoff;
int err, id;
void *kaddr;
pfn_t pfn;
long len;
+ char buf[BDEVNAME_SIZE];
if (blocksize != PAGE_SIZE) {
- pr_debug("VFS (%s): error: unsupported blocksize for dax\n",
- sb->s_id);
+ pr_debug("%s: error: unsupported blocksize for dax\n",
+ bdevname(bdev, buf));
return -EINVAL;
}
err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
if (err) {
- pr_debug("VFS (%s): error: unaligned partition for dax\n",
- sb->s_id);
+ pr_debug("%s: error: unaligned partition for dax\n",
+ bdevname(bdev, buf));
return err;
}
dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
if (!dax_dev) {
- pr_debug("VFS (%s): error: device does not support dax\n",
- sb->s_id);
+ pr_debug("%s: error: device does not support dax\n",
+ bdevname(bdev, buf));
return -EOPNOTSUPP;
}
@@ -119,8 +119,8 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
put_dax(dax_dev);
if (len < 1) {
- pr_debug("VFS (%s): error: dax access failed (%ld)\n",
- sb->s_id, len);
+ pr_debug("%s: error: dax access failed (%ld)\n",
+ bdevname(bdev, buf), len);
return len < 0 ? len : -EIO;
}
@@ -128,14 +128,14 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
|| pfn_t_devmap(pfn))
/* pass */;
else {
- pr_debug("VFS (%s): error: dax support not enabled\n",
- sb->s_id);
+ pr_debug("%s: error: dax support not enabled\n",
+ bdevname(bdev, buf));
return -EOPNOTSUPP;
}
return 0;
}
-EXPORT_SYMBOL_GPL(__bdev_dax_supported);
+EXPORT_SYMBOL_GPL(bdev_dax_supported);
#endif
enum dax_device_flags {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7666c065b96f..b8063b640ec1 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -961,7 +961,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
- err = bdev_dax_supported(sb, blocksize);
+ err = bdev_dax_supported(sb->s_bdev, blocksize);
if (err) {
ext2_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 39bf464c35f1..911eca771853 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3714,7 +3714,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
" that may contain inline data");
sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
}
- err = bdev_dax_supported(sb, blocksize);
+ err = bdev_dax_supported(sb->s_bdev, blocksize);
if (err) {
ext4_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 89fb1eb80aae..0effd46b965f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1103,7 +1103,8 @@ xfs_ioctl_setattr_dax_invalidate(
if (fa->fsx_xflags & FS_XFLAG_DAX) {
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
return -EINVAL;
- if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
+ if (bdev_dax_supported(xfs_find_bdev_for_inode(VFS_I(ip)),
+ sb->s_blocksize) < 0)
return -EINVAL;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 56475fcd76f2..66cd61c172af 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1182,6 +1182,30 @@ static const struct inode_operations xfs_inline_symlink_inode_operations = {
.update_time = xfs_vn_update_time,
};
+/* Figure out if this file actually supports DAX. */
+static bool
+xfs_inode_supports_dax(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ /* Only supported on non-reflinked files. */
+ if (!S_ISREG(VFS_I(ip)->i_mode) || xfs_is_reflink_inode(ip))
+ return false;
+
+ /* DAX mount option or DAX iflag must be set. */
+ if (!(mp->m_flags & XFS_MOUNT_DAX) &&
+ !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
+ return false;
+
+ /* Block size must match page size */
+ if (mp->m_sb.sb_blocksize != PAGE_SIZE)
+ return false;
+
+ /* Device has to support DAX too. */
+ return xfs_find_daxdev_for_inode(VFS_I(ip)) != NULL;
+}
+
STATIC void
xfs_diflags_to_iflags(
struct inode *inode,
@@ -1200,11 +1224,7 @@ xfs_diflags_to_iflags(
inode->i_flags |= S_SYNC;
if (flags & XFS_DIFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
- if (S_ISREG(inode->i_mode) &&
- ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE &&
- !xfs_is_reflink_inode(ip) &&
- (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
- ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
+ if (xfs_inode_supports_dax(ip))
inode->i_flags |= S_DAX;
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 7aba628dc527..aaaba6e604c4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1657,11 +1657,17 @@ xfs_fs_fill_super(
sb->s_flags |= SB_I_VERSION;
if (mp->m_flags & XFS_MOUNT_DAX) {
+ int error2 = 0;
+
xfs_warn(mp,
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
- error = bdev_dax_supported(sb, sb->s_blocksize);
- if (error) {
+ error = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
+ sb->s_blocksize);
+ if (mp->m_rtdev_targp)
+ error2 = bdev_dax_supported(mp->m_rtdev_targp->bt_bdev,
+ sb->s_blocksize);
+ if (error && error2) {
xfs_alert(mp,
"DAX unsupported by block device. Turning off DAX.");
mp->m_flags &= ~XFS_MOUNT_DAX;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 0185ecdae135..8eb3a359df95 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -40,12 +40,7 @@ static inline void put_dax(struct dax_device *dax_dev)
int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
#if IS_ENABLED(CONFIG_FS_DAX)
-int __bdev_dax_supported(struct super_block *sb, int blocksize);
-static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
-{
- return __bdev_dax_supported(sb, blocksize);
-}
In a situation where the rt_dev is DAX and data_dev is not DAX, if the user
requests to remove the realtime flag via ioctl we can no longer support DAX
for that file. Dynamic changing of S_DAX on the inode is not supported due
to various complications in the existing implementation. Therefore until we
address the dynamic S_DAX change issues, we must disallow realtime flag
being removed.
Signed-off-by: Dave Jiang <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
---
fs/xfs/xfs_ioctl.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 2c70a0a4f59f..edd97d527fe8 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1030,6 +1030,20 @@ xfs_ioctl_setattr_xflags(
{
struct xfs_mount *mp = ip->i_mount;
uint64_t di_flags2;
+ struct inode *inode = VFS_I(ip);
+ struct super_block *sb = inode->i_sb;
+
+ /*
+ * In the case that the inode is realtime, and we are trying to remove
+ * the realtime flag, and the rtdev supports DAX but the datadev does
+ * not support DAX, we can't allow the realtime flag to be removed
+ * since we do not support dynamic S_DAX flag removal yet.
+ */
+ if (XFS_IS_REALTIME_INODE(ip) &&
+ !(fa->fsx_xflags & FS_XFLAG_REALTIME) &&
+ bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, sb->s_blocksize) &&
+ !bdev_dax_supported(mp->m_ddev_targp->bt_bdev, sb->s_blocksize))
+ return -ENOTSUPP;
/* Can't change realtime flag if any extents are allocated. */
if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
On Fri, Feb 16, 2018 at 10:04:26AM -0700, Dave Jiang wrote:
> In a situation where the rt_dev is DAX and data_dev is not DAX, if the user
> requests to remove the realtime flag via ioctl we can no longer support DAX
> for that file. Dynamic changing of S_DAX on the inode is not supported due
> to various complications in the existing implementation. Therefore until we
> address the dynamic S_DAX change issues, we must disallow realtime flag
> being removed.
>
> Signed-off-by: Dave Jiang <[email protected]>
> Reviewed-by: Christoph Hellwig <[email protected]>
> ---
> fs/xfs/xfs_ioctl.c | 14 ++++++++++++++
> 1 file changed, 14 insertions(+)
>
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index 2c70a0a4f59f..edd97d527fe8 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -1030,6 +1030,20 @@ xfs_ioctl_setattr_xflags(
> {
> struct xfs_mount *mp = ip->i_mount;
> uint64_t di_flags2;
> + struct inode *inode = VFS_I(ip);
> + struct super_block *sb = inode->i_sb;
> +
> + /*
> + * In the case that the inode is realtime, and we are trying to remove
> + * the realtime flag, and the rtdev supports DAX but the datadev does
> + * not support DAX, we can't allow the realtime flag to be removed
> + * since we do not support dynamic S_DAX flag removal yet.
> + */
> + if (XFS_IS_REALTIME_INODE(ip) &&
> + !(fa->fsx_xflags & FS_XFLAG_REALTIME) &&
> + bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, sb->s_blocksize) &&
> + !bdev_dax_supported(mp->m_ddev_targp->bt_bdev, sb->s_blocksize))
What happens here if we have a non-rt file that we're trying to turn
into an rt file and the data dev supports dax but not the rt dev?
Changing the rt flag is only supported on files with no data blocks (no
extents, no delalloc blocks), so why can't we remove S_DAX from an empty
file? There aren't any memory mappings or page cache to get in the way,
correct?
Please fix the complications in the existing implementation that prevent
us from removing S_DAX rather than adding more XFS restrictions, or at
least tell me what's holding that up.
--D
> + return -ENOTSUPP;
>
> /* Can't change realtime flag if any extents are allocated. */
> if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Feb 16, 2018 at 09:22:47AM -0800, Darrick J. Wong wrote:
> On Fri, Feb 16, 2018 at 10:04:26AM -0700, Dave Jiang wrote:
> > In a situation where the rt_dev is DAX and data_dev is not DAX, if the user
> > requests to remove the realtime flag via ioctl we can no longer support DAX
> > for that file. Dynamic changing of S_DAX on the inode is not supported due
> > to various complications in the existing implementation. Therefore until we
> > address the dynamic S_DAX change issues, we must disallow realtime flag
> > being removed.
> >
> > Signed-off-by: Dave Jiang <[email protected]>
> > Reviewed-by: Christoph Hellwig <[email protected]>
> > ---
> > fs/xfs/xfs_ioctl.c | 14 ++++++++++++++
> > 1 file changed, 14 insertions(+)
> >
> > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> > index 2c70a0a4f59f..edd97d527fe8 100644
> > --- a/fs/xfs/xfs_ioctl.c
> > +++ b/fs/xfs/xfs_ioctl.c
> > @@ -1030,6 +1030,20 @@ xfs_ioctl_setattr_xflags(
> > {
> > struct xfs_mount *mp = ip->i_mount;
> > uint64_t di_flags2;
> > + struct inode *inode = VFS_I(ip);
> > + struct super_block *sb = inode->i_sb;
> > +
> > + /*
> > + * In the case that the inode is realtime, and we are trying to remove
> > + * the realtime flag, and the rtdev supports DAX but the datadev does
> > + * not support DAX, we can't allow the realtime flag to be removed
> > + * since we do not support dynamic S_DAX flag removal yet.
> > + */
> > + if (XFS_IS_REALTIME_INODE(ip) &&
> > + !(fa->fsx_xflags & FS_XFLAG_REALTIME) &&
> > + bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, sb->s_blocksize) &&
> > + !bdev_dax_supported(mp->m_ddev_targp->bt_bdev, sb->s_blocksize))
>
> What happens here if we have a non-rt file that we're trying to turn
> into an rt file and the data dev supports dax but not the rt dev?
>
> Changing the rt flag is only supported on files with no data blocks (no
> extents, no delalloc blocks), so why can't we remove S_DAX from an empty
> file? There aren't any memory mappings or page cache to get in the way,
> correct?
File size can be non-zero, so you can have DAX read-over-hole
mappings present. I simply don't think it's safe to remove/add S_DAX
flags via ioctls right now. If we have a DAX capable rtdev, then the
only way we should allow rtdev+dax to be used right now is via the
RT inherit bit on the dir that creates files in the rtdev right from
the start. i.e. we can't set/remove the RT inode flag on an inode
via ioctl if rtdev+dax is enabled until the whole dynamic S_DAX
inode flag thing is resolved.
Cheers,
Dave.
--
Dave Chinner
[email protected]
On Sun, Feb 18, 2018 at 11:23:17AM +1100, Dave Chinner wrote:
> On Fri, Feb 16, 2018 at 09:22:47AM -0800, Darrick J. Wong wrote:
> > On Fri, Feb 16, 2018 at 10:04:26AM -0700, Dave Jiang wrote:
> > > In a situation where the rt_dev is DAX and data_dev is not DAX, if the user
> > > requests to remove the realtime flag via ioctl we can no longer support DAX
> > > for that file. Dynamic changing of S_DAX on the inode is not supported due
> > > to various complications in the existing implementation. Therefore until we
> > > address the dynamic S_DAX change issues, we must disallow realtime flag
> > > being removed.
> > >
> > > Signed-off-by: Dave Jiang <[email protected]>
> > > Reviewed-by: Christoph Hellwig <[email protected]>
> > > ---
> > > fs/xfs/xfs_ioctl.c | 14 ++++++++++++++
> > > 1 file changed, 14 insertions(+)
> > >
> > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> > > index 2c70a0a4f59f..edd97d527fe8 100644
> > > --- a/fs/xfs/xfs_ioctl.c
> > > +++ b/fs/xfs/xfs_ioctl.c
> > > @@ -1030,6 +1030,20 @@ xfs_ioctl_setattr_xflags(
> > > {
> > > struct xfs_mount *mp = ip->i_mount;
> > > uint64_t di_flags2;
> > > + struct inode *inode = VFS_I(ip);
> > > + struct super_block *sb = inode->i_sb;
> > > +
> > > + /*
> > > + * In the case that the inode is realtime, and we are trying to remove
> > > + * the realtime flag, and the rtdev supports DAX but the datadev does
> > > + * not support DAX, we can't allow the realtime flag to be removed
> > > + * since we do not support dynamic S_DAX flag removal yet.
> > > + */
> > > + if (XFS_IS_REALTIME_INODE(ip) &&
> > > + !(fa->fsx_xflags & FS_XFLAG_REALTIME) &&
> > > + bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, sb->s_blocksize) &&
> > > + !bdev_dax_supported(mp->m_ddev_targp->bt_bdev, sb->s_blocksize))
> >
> > What happens here if we have a non-rt file that we're trying to turn
> > into an rt file and the data dev supports dax but not the rt dev?
> >
> > Changing the rt flag is only supported on files with no data blocks (no
> > extents, no delalloc blocks), so why can't we remove S_DAX from an empty
> > file? There aren't any memory mappings or page cache to get in the way,
> > correct?
>
> File size can be non-zero, so you can have DAX read-over-hole
> mappings present. I simply don't think it's safe to remove/add S_DAX
> flags via ioctls right now. If we have a DAX capable rtdev, then the
> only way we should allow rtdev+dax to be used right now is via the
> RT inherit bit on the dir that creates files in the rtdev right from
> the start. i.e. we can't set/remove the RT inode flag on an inode
> via ioctl if rtdev+dax is enabled until the whole dynamic S_DAX
> inode flag thing is resolved.
Could we deal with the restriction that the DAX flag can't change
(whether by user ioctl or by toggling the rt flag) unless the file size
is zero? That adds another way setting/clearing the realtime flag can
fail, but at least it'd be the same EINVAL.
--D
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> [email protected]
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Feb 20, 2018 at 03:01:09PM -0800, Darrick J. Wong wrote:
> On Sun, Feb 18, 2018 at 11:23:17AM +1100, Dave Chinner wrote:
> > On Fri, Feb 16, 2018 at 09:22:47AM -0800, Darrick J. Wong wrote:
> > > On Fri, Feb 16, 2018 at 10:04:26AM -0700, Dave Jiang wrote:
> > > > In a situation where the rt_dev is DAX and data_dev is not DAX, if the user
> > > > requests to remove the realtime flag via ioctl we can no longer support DAX
> > > > for that file. Dynamic changing of S_DAX on the inode is not supported due
> > > > to various complications in the existing implementation. Therefore until we
> > > > address the dynamic S_DAX change issues, we must disallow realtime flag
> > > > being removed.
> > > >
> > > > Signed-off-by: Dave Jiang <[email protected]>
> > > > Reviewed-by: Christoph Hellwig <[email protected]>
> > > > ---
> > > > fs/xfs/xfs_ioctl.c | 14 ++++++++++++++
> > > > 1 file changed, 14 insertions(+)
> > > >
> > > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> > > > index 2c70a0a4f59f..edd97d527fe8 100644
> > > > --- a/fs/xfs/xfs_ioctl.c
> > > > +++ b/fs/xfs/xfs_ioctl.c
> > > > @@ -1030,6 +1030,20 @@ xfs_ioctl_setattr_xflags(
> > > > {
> > > > struct xfs_mount *mp = ip->i_mount;
> > > > uint64_t di_flags2;
> > > > + struct inode *inode = VFS_I(ip);
> > > > + struct super_block *sb = inode->i_sb;
> > > > +
> > > > + /*
> > > > + * In the case that the inode is realtime, and we are trying to remove
> > > > + * the realtime flag, and the rtdev supports DAX but the datadev does
> > > > + * not support DAX, we can't allow the realtime flag to be removed
> > > > + * since we do not support dynamic S_DAX flag removal yet.
> > > > + */
> > > > + if (XFS_IS_REALTIME_INODE(ip) &&
> > > > + !(fa->fsx_xflags & FS_XFLAG_REALTIME) &&
> > > > + bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, sb->s_blocksize) &&
> > > > + !bdev_dax_supported(mp->m_ddev_targp->bt_bdev, sb->s_blocksize))
> > >
> > > What happens here if we have a non-rt file that we're trying to turn
> > > into an rt file and the data dev supports dax but not the rt dev?
> > >
> > > Changing the rt flag is only supported on files with no data blocks (no
> > > extents, no delalloc blocks), so why can't we remove S_DAX from an empty
> > > file? There aren't any memory mappings or page cache to get in the way,
> > > correct?
> >
> > File size can be non-zero, so you can have DAX read-over-hole
> > mappings present. I simply don't think it's safe to remove/add S_DAX
> > flags via ioctls right now. If we have a DAX capable rtdev, then the
> > only way we should allow rtdev+dax to be used right now is via the
> > RT inherit bit on the dir that creates files in the rtdev right from
> > the start. i.e. we can't set/remove the RT inode flag on an inode
> > via ioctl if rtdev+dax is enabled until the whole dynamic S_DAX
> > inode flag thing is resolved.
>
> Could we deal with the restriction that the DAX flag can't change
> (whether by user ioctl or by toggling the rt flag) unless the file size
> is zero? That adds another way setting/clearing the realtime flag can
> fail, but at least it'd be the same EINVAL.
I thought we still mmap a zero length file and get a page fault that
returns a zeroed page? Or does that segv?
Cheers,
Dave.
--
Dave Chinner
[email protected]
On Wed, Feb 21, 2018 at 10:15:24AM +1100, Dave Chinner wrote:
> On Tue, Feb 20, 2018 at 03:01:09PM -0800, Darrick J. Wong wrote:
> > On Sun, Feb 18, 2018 at 11:23:17AM +1100, Dave Chinner wrote:
> > > On Fri, Feb 16, 2018 at 09:22:47AM -0800, Darrick J. Wong wrote:
> > > > On Fri, Feb 16, 2018 at 10:04:26AM -0700, Dave Jiang wrote:
> > > > > In a situation where the rt_dev is DAX and data_dev is not DAX, if the user
> > > > > requests to remove the realtime flag via ioctl we can no longer support DAX
> > > > > for that file. Dynamic changing of S_DAX on the inode is not supported due
> > > > > to various complications in the existing implementation. Therefore until we
> > > > > address the dynamic S_DAX change issues, we must disallow realtime flag
> > > > > being removed.
> > > > >
> > > > > Signed-off-by: Dave Jiang <[email protected]>
> > > > > Reviewed-by: Christoph Hellwig <[email protected]>
> > > > > ---
> > > > > fs/xfs/xfs_ioctl.c | 14 ++++++++++++++
> > > > > 1 file changed, 14 insertions(+)
> > > > >
> > > > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> > > > > index 2c70a0a4f59f..edd97d527fe8 100644
> > > > > --- a/fs/xfs/xfs_ioctl.c
> > > > > +++ b/fs/xfs/xfs_ioctl.c
> > > > > @@ -1030,6 +1030,20 @@ xfs_ioctl_setattr_xflags(
> > > > > {
> > > > > struct xfs_mount *mp = ip->i_mount;
> > > > > uint64_t di_flags2;
> > > > > + struct inode *inode = VFS_I(ip);
> > > > > + struct super_block *sb = inode->i_sb;
> > > > > +
> > > > > + /*
> > > > > + * In the case that the inode is realtime, and we are trying to remove
> > > > > + * the realtime flag, and the rtdev supports DAX but the datadev does
> > > > > + * not support DAX, we can't allow the realtime flag to be removed
> > > > > + * since we do not support dynamic S_DAX flag removal yet.
> > > > > + */
> > > > > + if (XFS_IS_REALTIME_INODE(ip) &&
> > > > > + !(fa->fsx_xflags & FS_XFLAG_REALTIME) &&
> > > > > + bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, sb->s_blocksize) &&
> > > > > + !bdev_dax_supported(mp->m_ddev_targp->bt_bdev, sb->s_blocksize))
> > > >
> > > > What happens here if we have a non-rt file that we're trying to turn
> > > > into an rt file and the data dev supports dax but not the rt dev?
> > > >
> > > > Changing the rt flag is only supported on files with no data blocks (no
> > > > extents, no delalloc blocks), so why can't we remove S_DAX from an empty
> > > > file? There aren't any memory mappings or page cache to get in the way,
> > > > correct?
> > >
> > > File size can be non-zero, so you can have DAX read-over-hole
> > > mappings present. I simply don't think it's safe to remove/add S_DAX
> > > flags via ioctls right now. If we have a DAX capable rtdev, then the
> > > only way we should allow rtdev+dax to be used right now is via the
> > > RT inherit bit on the dir that creates files in the rtdev right from
> > > the start. i.e. we can't set/remove the RT inode flag on an inode
> > > via ioctl if rtdev+dax is enabled until the whole dynamic S_DAX
> > > inode flag thing is resolved.
> >
> > Could we deal with the restriction that the DAX flag can't change
> > (whether by user ioctl or by toggling the rt flag) unless the file size
> > is zero? That adds another way setting/clearing the realtime flag can
> > fail, but at least it'd be the same EINVAL.
>
> I thought we still mmap a zero length file and get a page fault that
> returns a zeroed page? Or does that segv?
I think it segfaults, but let's see...
$ rm -rf /opt/b ; xfs_io -f -c 'mmap -rw 0 1m' -c 'mread 512 20' /opt/b
Bus error
$ rm -rf /opt/b ; xfs_io -f -c 'mmap -rw 0 1m' -c 'mwrite 512 20' /opt/b
Bus error
--D
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> [email protected]
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On 02/20/2018 04:23 PM, Darrick J. Wong wrote:
> On Wed, Feb 21, 2018 at 10:15:24AM +1100, Dave Chinner wrote:
>> On Tue, Feb 20, 2018 at 03:01:09PM -0800, Darrick J. Wong wrote:
>>> On Sun, Feb 18, 2018 at 11:23:17AM +1100, Dave Chinner wrote:
>>>> On Fri, Feb 16, 2018 at 09:22:47AM -0800, Darrick J. Wong wrote:
>>>>> On Fri, Feb 16, 2018 at 10:04:26AM -0700, Dave Jiang wrote:
>>>>>> In a situation where the rt_dev is DAX and data_dev is not DAX, if the user
>>>>>> requests to remove the realtime flag via ioctl we can no longer support DAX
>>>>>> for that file. Dynamic changing of S_DAX on the inode is not supported due
>>>>>> to various complications in the existing implementation. Therefore until we
>>>>>> address the dynamic S_DAX change issues, we must disallow realtime flag
>>>>>> being removed.
>>>>>>
>>>>>> Signed-off-by: Dave Jiang <[email protected]>
>>>>>> Reviewed-by: Christoph Hellwig <[email protected]>
>>>>>> ---
>>>>>> fs/xfs/xfs_ioctl.c | 14 ++++++++++++++
>>>>>> 1 file changed, 14 insertions(+)
>>>>>>
>>>>>> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
>>>>>> index 2c70a0a4f59f..edd97d527fe8 100644
>>>>>> --- a/fs/xfs/xfs_ioctl.c
>>>>>> +++ b/fs/xfs/xfs_ioctl.c
>>>>>> @@ -1030,6 +1030,20 @@ xfs_ioctl_setattr_xflags(
>>>>>> {
>>>>>> struct xfs_mount *mp = ip->i_mount;
>>>>>> uint64_t di_flags2;
>>>>>> + struct inode *inode = VFS_I(ip);
>>>>>> + struct super_block *sb = inode->i_sb;
>>>>>> +
>>>>>> + /*
>>>>>> + * In the case that the inode is realtime, and we are trying to remove
>>>>>> + * the realtime flag, and the rtdev supports DAX but the datadev does
>>>>>> + * not support DAX, we can't allow the realtime flag to be removed
>>>>>> + * since we do not support dynamic S_DAX flag removal yet.
>>>>>> + */
>>>>>> + if (XFS_IS_REALTIME_INODE(ip) &&
>>>>>> + !(fa->fsx_xflags & FS_XFLAG_REALTIME) &&
>>>>>> + bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, sb->s_blocksize) &&
>>>>>> + !bdev_dax_supported(mp->m_ddev_targp->bt_bdev, sb->s_blocksize))
>>>>>
>>>>> What happens here if we have a non-rt file that we're trying to turn
>>>>> into an rt file and the data dev supports dax but not the rt dev?
>>>>>
>>>>> Changing the rt flag is only supported on files with no data blocks (no
>>>>> extents, no delalloc blocks), so why can't we remove S_DAX from an empty
>>>>> file? There aren't any memory mappings or page cache to get in the way,
>>>>> correct?
>>>>
>>>> File size can be non-zero, so you can have DAX read-over-hole
>>>> mappings present. I simply don't think it's safe to remove/add S_DAX
>>>> flags via ioctls right now. If we have a DAX capable rtdev, then the
>>>> only way we should allow rtdev+dax to be used right now is via the
>>>> RT inherit bit on the dir that creates files in the rtdev right from
>>>> the start. i.e. we can't set/remove the RT inode flag on an inode
>>>> via ioctl if rtdev+dax is enabled until the whole dynamic S_DAX
>>>> inode flag thing is resolved.
>>>
>>> Could we deal with the restriction that the DAX flag can't change
>>> (whether by user ioctl or by toggling the rt flag) unless the file size
>>> is zero? That adds another way setting/clearing the realtime flag can
>>> fail, but at least it'd be the same EINVAL.
>>
>> I thought we still mmap a zero length file and get a page fault that
>> returns a zeroed page? Or does that segv?
>
> I think it segfaults, but let's see...
>
> $ rm -rf /opt/b ; xfs_io -f -c 'mmap -rw 0 1m' -c 'mread 512 20' /opt/b
> Bus error
> $ rm -rf /opt/b ; xfs_io -f -c 'mmap -rw 0 1m' -c 'mwrite 512 20' /opt/b
> Bus error
Darrick,
So you want the change to be if the file size is 0 then we can modify
the RT bit, otherwise reject if DAX is involved?
On Tue, Feb 27, 2018 at 09:46:54AM -0700, Dave Jiang wrote:
>
>
> On 02/20/2018 04:23 PM, Darrick J. Wong wrote:
> > On Wed, Feb 21, 2018 at 10:15:24AM +1100, Dave Chinner wrote:
> >> On Tue, Feb 20, 2018 at 03:01:09PM -0800, Darrick J. Wong wrote:
> >>> On Sun, Feb 18, 2018 at 11:23:17AM +1100, Dave Chinner wrote:
> >>>> On Fri, Feb 16, 2018 at 09:22:47AM -0800, Darrick J. Wong wrote:
> >>>>> On Fri, Feb 16, 2018 at 10:04:26AM -0700, Dave Jiang wrote:
> >>>>>> In a situation where the rt_dev is DAX and data_dev is not DAX, if the user
> >>>>>> requests to remove the realtime flag via ioctl we can no longer support DAX
> >>>>>> for that file. Dynamic changing of S_DAX on the inode is not supported due
> >>>>>> to various complications in the existing implementation. Therefore until we
> >>>>>> address the dynamic S_DAX change issues, we must disallow realtime flag
> >>>>>> being removed.
> >>>>>>
> >>>>>> Signed-off-by: Dave Jiang <[email protected]>
> >>>>>> Reviewed-by: Christoph Hellwig <[email protected]>
> >>>>>> ---
> >>>>>> fs/xfs/xfs_ioctl.c | 14 ++++++++++++++
> >>>>>> 1 file changed, 14 insertions(+)
> >>>>>>
> >>>>>> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> >>>>>> index 2c70a0a4f59f..edd97d527fe8 100644
> >>>>>> --- a/fs/xfs/xfs_ioctl.c
> >>>>>> +++ b/fs/xfs/xfs_ioctl.c
> >>>>>> @@ -1030,6 +1030,20 @@ xfs_ioctl_setattr_xflags(
> >>>>>> {
> >>>>>> struct xfs_mount *mp = ip->i_mount;
> >>>>>> uint64_t di_flags2;
> >>>>>> + struct inode *inode = VFS_I(ip);
> >>>>>> + struct super_block *sb = inode->i_sb;
> >>>>>> +
> >>>>>> + /*
> >>>>>> + * In the case that the inode is realtime, and we are trying to remove
> >>>>>> + * the realtime flag, and the rtdev supports DAX but the datadev does
> >>>>>> + * not support DAX, we can't allow the realtime flag to be removed
> >>>>>> + * since we do not support dynamic S_DAX flag removal yet.
> >>>>>> + */
> >>>>>> + if (XFS_IS_REALTIME_INODE(ip) &&
> >>>>>> + !(fa->fsx_xflags & FS_XFLAG_REALTIME) &&
> >>>>>> + bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, sb->s_blocksize) &&
> >>>>>> + !bdev_dax_supported(mp->m_ddev_targp->bt_bdev, sb->s_blocksize))
> >>>>>
> >>>>> What happens here if we have a non-rt file that we're trying to turn
> >>>>> into an rt file and the data dev supports dax but not the rt dev?
> >>>>>
> >>>>> Changing the rt flag is only supported on files with no data blocks (no
> >>>>> extents, no delalloc blocks), so why can't we remove S_DAX from an empty
> >>>>> file? There aren't any memory mappings or page cache to get in the way,
> >>>>> correct?
> >>>>
> >>>> File size can be non-zero, so you can have DAX read-over-hole
> >>>> mappings present. I simply don't think it's safe to remove/add S_DAX
> >>>> flags via ioctls right now. If we have a DAX capable rtdev, then the
> >>>> only way we should allow rtdev+dax to be used right now is via the
> >>>> RT inherit bit on the dir that creates files in the rtdev right from
> >>>> the start. i.e. we can't set/remove the RT inode flag on an inode
> >>>> via ioctl if rtdev+dax is enabled until the whole dynamic S_DAX
> >>>> inode flag thing is resolved.
> >>>
> >>> Could we deal with the restriction that the DAX flag can't change
> >>> (whether by user ioctl or by toggling the rt flag) unless the file size
> >>> is zero? That adds another way setting/clearing the realtime flag can
> >>> fail, but at least it'd be the same EINVAL.
> >>
> >> I thought we still mmap a zero length file and get a page fault that
> >> returns a zeroed page? Or does that segv?
> >
> > I think it segfaults, but let's see...
> >
> > $ rm -rf /opt/b ; xfs_io -f -c 'mmap -rw 0 1m' -c 'mread 512 20' /opt/b
> > Bus error
> > $ rm -rf /opt/b ; xfs_io -f -c 'mmap -rw 0 1m' -c 'mwrite 512 20' /opt/b
> > Bus error
>
> Darrick,
> So you want the change to be if the file size is 0 then we can modify
> the RT bit, otherwise reject if DAX is involved?
The other way around -- reject any change to the DAX flag if the file
size is not zero, regardless of whether the user tried to change the DAX
flag directly or the change is happening because the user changed the RT
flag and the device dax support is different between the rt & data
devices.
We'll need more rigorous testing of this current theory that we can
change S_DAX without problems if the file size is zero, once this change
has been written.
--D
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Darrick,
I love your patch! Yet something to improve:
[auto build test ERROR on linus/master]
[also build test ERROR on v4.16-rc2 next-20180219]
[cannot apply to dgc-xfs/for-next]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Dave-Jiang/fs-allow-per-device-dax-status-checking-for-filesystems/20180219-121644
config: x86_64-randconfig-s5-02200659 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All errors (new ones prefixed by >>):
>> drivers/dax/super.c:85:5: error: redefinition of 'bdev_dax_supported'
int bdev_dax_supported(struct block_device *bdev, int blocksize)
^~~~~~~~~~~~~~~~~~
In file included from drivers/dax/super.c:23:0:
include/linux/dax.h:56:19: note: previous definition of 'bdev_dax_supported' was here
static inline int bdev_dax_supported(struct block_device *bdev,
^~~~~~~~~~~~~~~~~~
vim +/bdev_dax_supported +85 drivers/dax/super.c
74
75 /**
76 * bdev_dax_supported() - Check if the device supports dax for filesystem
77 * @bdev: block device to check
78 * @blocksize: The block size of the device
79 *
80 * This is a library function for filesystems to check if the block device
81 * can be mounted with dax option.
82 *
83 * Return: negative errno if unsupported, 0 if supported.
84 */
> 85 int bdev_dax_supported(struct block_device *bdev, int blocksize)
86 {
87 struct dax_device *dax_dev;
88 pgoff_t pgoff;
89 int err, id;
90 void *kaddr;
91 pfn_t pfn;
92 long len;
93 char buf[BDEVNAME_SIZE];
94
95 if (blocksize != PAGE_SIZE) {
96 pr_debug("%s: error: unsupported blocksize for dax\n",
97 bdevname(bdev, buf));
98 return -EINVAL;
99 }
100
101 err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
102 if (err) {
103 pr_debug("%s: error: unaligned partition for dax\n",
104 bdevname(bdev, buf));
105 return err;
106 }
107
108 dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
109 if (!dax_dev) {
110 pr_debug("%s: error: device does not support dax\n",
111 bdevname(bdev, buf));
112 return -EOPNOTSUPP;
113 }
114
115 id = dax_read_lock();
116 len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
117 dax_read_unlock(id);
118
119 put_dax(dax_dev);
120
121 if (len < 1) {
122 pr_debug("%s: error: dax access failed (%ld)\n",
123 bdevname(bdev, buf), len);
124 return len < 0 ? len : -EIO;
125 }
126
127 if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn))
128 || pfn_t_devmap(pfn))
129 /* pass */;
130 else {
131 pr_debug("%s: error: dax support not enabled\n",
132 bdevname(bdev, buf));
133 return -EOPNOTSUPP;
134 }
135
136 return 0;
137 }
138 EXPORT_SYMBOL_GPL(bdev_dax_supported);
139 #endif
140
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation