2020-05-28 15:02:11

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V5 0/9] Enable ext4 support for per-file/directory DAX operations

From: Ira Weiny <[email protected]>

Changes from V4:
Fix up DAX mutual exclusion with other flags.
Add clean up patch (remove jflags)

Changes from V3:
Change EXT4_DAX_FL to bit24
Cache device DAX support in the super block and use that is
ext4_should_use_dax()

Changes from V2:
Rework DAX exclusivity with verity and encryption based on feedback
from Eric

Enable the same per file DAX support in ext4 as was done for xfs. This series
builds and depends on the V11 series for xfs.[1]

This passes the same xfstests test as XFS.

The only issue is that this modifies the old mount option parsing code rather
than waiting for the new parsing code to be finalized.

This series starts with 3 fixes which include making Verity and Encrypt truly
mutually exclusive from DAX. I think these first 3 patches should be picked up
for 5.8 regardless of what is decided regarding the mount parsing.

[1] https://lore.kernel.org/lkml/[email protected]/

To: [email protected]
To: Andreas Dilger <[email protected]>
To: "Theodore Y. Ts'o" <[email protected]>
To: Jan Kara <[email protected]>
To: Eric Biggers <[email protected]>

Cc: Al Viro <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Dave Chinner <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Jeff Moyer <[email protected]>
Cc: "Darrick J. Wong" <[email protected]>
Cc: [email protected]
Cc: [email protected]
Ira Weiny (9):
fs/ext4: Narrow scope of DAX check in setflags
fs/ext4: Disallow verity if inode is DAX
fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS
fs/ext4: Update ext4_should_use_dax()
fs/ext4: Only change S_DAX on inode load
fs/ext4: Make DAX mount option a tri-state
fs/ext4: Remove jflag variable
fs/ext4: Introduce DAX inode flag
Documentation/dax: Update DAX enablement for ext4

Documentation/filesystems/dax.txt | 6 +-
Documentation/filesystems/ext4/verity.rst | 3 +
fs/ext4/ext4.h | 27 +++++--
fs/ext4/ialloc.c | 2 +-
fs/ext4/inode.c | 26 +++++--
fs/ext4/ioctl.c | 65 ++++++++++++++---
fs/ext4/super.c | 85 ++++++++++++++++++-----
fs/ext4/verity.c | 5 +-
include/uapi/linux/fs.h | 1 +
9 files changed, 175 insertions(+), 45 deletions(-)

--
2.25.1


2020-05-28 15:02:34

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V5 6/9] fs/ext4: Make DAX mount option a tri-state

From: Ira Weiny <[email protected]>

We add 'always', 'never', and 'inode' (default). '-o dax' continues to
operate the same which is equivalent to 'always'. This new
functionality is limited to ext4 only.

Specifically we introduce a 2nd DAX mount flag EXT4_MOUNT2_DAX_NEVER and set
it and EXT4_MOUNT_DAX_ALWAYS appropriately for the mode.

We also force EXT4_MOUNT2_DAX_NEVER if !CONFIG_FS_DAX.

Finally, EXT4_MOUNT2_DAX_INODE is used solely to detect if the user
specified that option for printing.

Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Ira Weiny <[email protected]>

---
Changes from V1:
Fix up mounting options to only show an option if specified
Fix remount to prevent dax changes
Isolate behavior to ext4 only

Changes from RFC:
Combine remount check for DAX_NEVER with DAX_ALWAYS
Update ext4_should_enable_dax()
---
fs/ext4/ext4.h | 2 ++
fs/ext4/inode.c | 2 ++
fs/ext4/super.c | 67 +++++++++++++++++++++++++++++++++++++++++--------
3 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 94e213e6d8ca..d80a6d830b1c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1168,6 +1168,8 @@ struct ext4_inode_info {
blocks */
#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
file systems */
+#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */
+#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */

#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
specified journal checksum */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 01636cf5f322..68fac9289109 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4402,6 +4402,8 @@ static bool ext4_should_enable_dax(struct inode *inode)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);

+ if (test_opt2(inode->i_sb, DAX_NEVER))
+ return false;
if (!S_ISREG(inode->i_mode))
return false;
if (ext4_should_journal_data(inode))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 80eb814c47eb..5e056aa20ce9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1512,7 +1512,8 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
+ Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
Opt_nowarn_on_error, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
@@ -1579,6 +1580,9 @@ static const match_table_t tokens = {
{Opt_nobarrier, "nobarrier"},
{Opt_i_version, "i_version"},
{Opt_dax, "dax"},
+ {Opt_dax_always, "dax=always"},
+ {Opt_dax_inode, "dax=inode"},
+ {Opt_dax_never, "dax=never"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
{Opt_warn_on_error, "warn_on_error"},
@@ -1726,6 +1730,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
#define MOPT_NO_EXT3 0x0200
#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
#define MOPT_STRING 0x0400
+#define MOPT_SKIP 0x0800

static const struct mount_opts {
int token;
@@ -1775,7 +1780,13 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
- {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET},
+ {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
+ {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+ {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+ {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
@@ -2084,13 +2095,32 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_jquota_fmt = m->mount_opt;
#endif
- } else if (token == Opt_dax) {
+ } else if (token == Opt_dax || token == Opt_dax_always ||
+ token == Opt_dax_inode || token == Opt_dax_never) {
#ifdef CONFIG_FS_DAX
- ext4_msg(sb, KERN_WARNING,
- "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
- sbi->s_mount_opt |= m->mount_opt;
+ switch (token) {
+ case Opt_dax:
+ case Opt_dax_always:
+ ext4_msg(sb, KERN_WARNING,
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+ break;
+ case Opt_dax_never:
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+ break;
+ case Opt_dax_inode:
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+ /* Strictly for printing options */
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE;
+ break;
+ }
#else
ext4_msg(sb, KERN_INFO, "dax option not supported");
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
return -1;
#endif
} else if (token == Opt_data_err_abort) {
@@ -2254,7 +2284,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
for (m = ext4_mount_opts; m->token != Opt_err; m++) {
int want_set = m->flags & MOPT_SET;
if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
- (m->flags & MOPT_CLEAR_ERR))
+ (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
continue;
if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
continue; /* skip if same as the default */
@@ -2314,6 +2344,17 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
if (DUMMY_ENCRYPTION_ENABLED(sbi))
SEQ_OPTS_PUTS("test_dummy_encryption");

+ if (test_opt(sb, DAX_ALWAYS)) {
+ if (IS_EXT2_SB(sb))
+ SEQ_OPTS_PUTS("dax");
+ else
+ SEQ_OPTS_PUTS("dax=always");
+ } else if (test_opt2(sb, DAX_NEVER)) {
+ SEQ_OPTS_PUTS("dax=never");
+ } else if (test_opt2(sb, DAX_INODE)) {
+ SEQ_OPTS_PUTS("dax=inode");
+ }
+
ext4_show_quota_options(seq, sb);
return 0;
}
@@ -5436,10 +5477,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}

- if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS) {
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS ||
+ (sbi->s_mount_opt2 ^ old_opts.s_mount_opt2) & EXT4_MOUNT2_DAX_NEVER ||
+ (sbi->s_mount_opt2 ^ old_opts.s_mount_opt2) & EXT4_MOUNT2_DAX_INODE) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
- "dax flag with busy inodes while remounting");
- sbi->s_mount_opt ^= EXT4_MOUNT_DAX_ALWAYS;
+ "dax mount option with busy inodes while remounting");
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt |= old_opts.s_mount_opt & EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt2 &= ~(EXT4_MOUNT2_DAX_NEVER | EXT4_MOUNT2_DAX_INODE);
+ sbi->s_mount_opt2 |= old_opts.s_mount_opt2 &
+ (EXT4_MOUNT2_DAX_NEVER | EXT4_MOUNT2_DAX_INODE);
}

if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
--
2.25.1

2020-05-28 15:02:43

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V5 7/9] fs/ext4: Remove jflag variable

From: Ira Weiny <[email protected]>

The jflag variable serves almost no purpose. Remove it.

Signed-off-by: Ira Weiny <[email protected]>

---
Changes from V4:
New for this series.
---
fs/ext4/ioctl.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7ccf20b1488b..e8a5cdade59f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -300,7 +300,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
int err = -EPERM, migrate = 0;
struct ext4_iloc iloc;
unsigned int oldflags, mask, i;
- unsigned int jflag;
struct super_block *sb = inode->i_sb;

/* Is it quota file? Do not allow user to mess with it */
@@ -309,9 +308,6 @@ static int ext4_ioctl_setflags(struct inode *inode,

oldflags = ei->i_flags;

- /* The JOURNAL_DATA flag is modifiable only by root */
- jflag = flags & EXT4_JOURNAL_DATA_FL;
-
err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
if (err)
goto flags_out;
@@ -320,7 +316,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
* The JOURNAL_DATA flag can only be changed by
* the relevant capability.
*/
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+ if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
if (!capable(CAP_SYS_RESOURCE))
goto flags_out;
}
@@ -391,7 +387,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
if (err)
goto flags_out;

- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+ if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
/*
* Changes to the journaling mode can cause unsafe changes to
* S_DAX if the inode is DAX
@@ -401,7 +397,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
goto flags_out;
}

- err = ext4_change_inode_journal_flag(inode, jflag);
+ err = ext4_change_inode_journal_flag(inode,
+ flags & EXT4_JOURNAL_DATA_FL);
if (err)
goto flags_out;
}
--
2.25.1

2020-05-28 15:03:30

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V5 4/9] fs/ext4: Update ext4_should_use_dax()

From: Ira Weiny <[email protected]>

S_DAX should only be enabled when the underlying block device supports
dax.

Cache the underlying support for DAX in the super block and modify
ext4_should_use_dax() to check for device support prior to the over
riding mount option.

While we are at it change the function to ext4_should_enable_dax() as
this better reflects the ask as well as matches xfs.

Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Ira Weiny <[email protected]>

---
Changes from V3:
Add a sb DAX supported flag for performance

Changes from RFC
Change function name to 'should enable'
Clean up bool conversion
Reorder this for better bisect-ability
---
fs/ext4/ext4.h | 1 +
fs/ext4/inode.c | 15 ++++++++++-----
fs/ext4/super.c | 5 ++++-
3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6c65fa2f5e00..3fdd044304a0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1979,6 +1979,7 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
*/
#define EXT4_FLAGS_RESIZING 0
#define EXT4_FLAGS_SHUTDOWN 1
+#define EXT4_FLAGS_BDEV_IS_DAX 2

static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
{
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a10ff12194db..6532870f6a0b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4398,10 +4398,10 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
}

-static bool ext4_should_use_dax(struct inode *inode)
+static bool ext4_should_enable_dax(struct inode *inode)
{
- if (!test_opt(inode->i_sb, DAX_ALWAYS))
- return false;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
if (!S_ISREG(inode->i_mode))
return false;
if (ext4_should_journal_data(inode))
@@ -4412,7 +4412,12 @@ static bool ext4_should_use_dax(struct inode *inode)
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
return false;
- return true;
+ if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags))
+ return false;
+ if (test_opt(inode->i_sb, DAX_ALWAYS))
+ return true;
+
+ return false;
}

void ext4_set_inode_flags(struct inode *inode)
@@ -4430,7 +4435,7 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (ext4_should_use_dax(inode))
+ if (ext4_should_enable_dax(inode))
new_fl |= S_DAX;
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7b99c44d0a91..f7d76dcaedfe 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4092,13 +4092,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}

+ if (bdev_dax_supported(sb->s_bdev, blocksize))
+ set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
+
if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
goto failed_mount;
}
- if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
+ if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
ext4_msg(sb, KERN_ERR,
"DAX unsupported by block device.");
goto failed_mount;
--
2.25.1

2020-05-28 15:05:11

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V5 2/9] fs/ext4: Disallow verity if inode is DAX

From: Ira Weiny <[email protected]>

Verity and DAX are incompatible. Changing the DAX mode due to a verity
flag change is wrong without a corresponding address_space_operations
update.

Make the 2 options mutually exclusive by returning an error if DAX was
set first.

(Setting DAX is already disabled if Verity is set first.)

Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Ira Weiny <[email protected]>

---
Changes from V2:
Remove Section title 'Verity and DAX'

Changes:
remove WARN_ON_ONCE
Add documentation for DAX/Verity exclusivity
---
Documentation/filesystems/ext4/verity.rst | 3 +++
fs/ext4/verity.c | 3 +++
2 files changed, 6 insertions(+)

diff --git a/Documentation/filesystems/ext4/verity.rst b/Documentation/filesystems/ext4/verity.rst
index 3e4c0ee0e068..e99ff3fd09f7 100644
--- a/Documentation/filesystems/ext4/verity.rst
+++ b/Documentation/filesystems/ext4/verity.rst
@@ -39,3 +39,6 @@ is encrypted as well as the data itself.

Verity files cannot have blocks allocated past the end of the verity
metadata.
+
+Verity and DAX are not compatible and attempts to set both of these flags
+on a file will fail.
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index dc5ec724d889..f05a09fb2ae4 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -113,6 +113,9 @@ static int ext4_begin_enable_verity(struct file *filp)
handle_t *handle;
int err;

+ if (IS_DAX(inode))
+ return -EINVAL;
+
if (ext4_verity_in_progress(inode))
return -EBUSY;

--
2.25.1

2020-05-28 15:05:22

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V5 3/9] fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS

From: Ira Weiny <[email protected]>

In prep for the new tri-state mount option which then introduces
EXT4_MOUNT_DAX_NEVER.

Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Ira Weiny <[email protected]>

---
Changes:
New patch
---
fs/ext4/ext4.h | 4 ++--
fs/ext4/inode.c | 2 +-
fs/ext4/super.c | 12 ++++++------
3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ad2dbf6e4924..6c65fa2f5e00 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1123,9 +1123,9 @@ struct ext4_inode_info {
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
#ifdef CONFIG_FS_DAX
-#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
+#define EXT4_MOUNT_DAX_ALWAYS 0x00200 /* Direct Access */
#else
-#define EXT4_MOUNT_DAX 0
+#define EXT4_MOUNT_DAX_ALWAYS 0
#endif
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a4aae6acdcb..a10ff12194db 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4400,7 +4400,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)

static bool ext4_should_use_dax(struct inode *inode)
{
- if (!test_opt(inode->i_sb, DAX))
+ if (!test_opt(inode->i_sb, DAX_ALWAYS))
return false;
if (!S_ISREG(inode->i_mode))
return false;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bf5fcb477f66..7b99c44d0a91 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1775,7 +1775,7 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
- {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
+ {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
@@ -3982,7 +3982,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"both data=journal and dioread_nolock");
goto failed_mount;
}
- if (test_opt(sb, DAX)) {
+ if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and dax");
goto failed_mount;
@@ -4092,7 +4092,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}

- if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+ if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
@@ -5412,7 +5412,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
err = -EINVAL;
goto restore_opts;
}
- if (test_opt(sb, DAX)) {
+ if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and dax");
err = -EINVAL;
@@ -5433,10 +5433,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}

- if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
"dax flag with busy inodes while remounting");
- sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
+ sbi->s_mount_opt ^= EXT4_MOUNT_DAX_ALWAYS;
}

if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
--
2.25.1

2020-05-29 02:55:43

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH V5 0/9] Enable ext4 support for per-file/directory DAX operations

On Thu, May 28, 2020 at 07:59:54AM -0700, [email protected] wrote:
> From: Ira Weiny <[email protected]>
>
> Changes from V4:
> Fix up DAX mutual exclusion with other flags.
> Add clean up patch (remove jflags)
>
> Changes from V3:
> Change EXT4_DAX_FL to bit24
> Cache device DAX support in the super block and use that is
> ext4_should_use_dax()
>
> Changes from V2:
> Rework DAX exclusivity with verity and encryption based on feedback
> from Eric
>
> Enable the same per file DAX support in ext4 as was done for xfs. This series
> builds and depends on the V11 series for xfs.[1]
>
> This passes the same xfstests test as XFS.
>
> The only issue is that this modifies the old mount option parsing code rather
> than waiting for the new parsing code to be finalized.
>
> This series starts with 3 fixes which include making Verity and Encrypt truly
> mutually exclusive from DAX. I think these first 3 patches should be picked up
> for 5.8 regardless of what is decided regarding the mount parsing.
>
> [1] https://lore.kernel.org/lkml/[email protected]/
>
> To: [email protected]
> To: Andreas Dilger <[email protected]>
> To: "Theodore Y. Ts'o" <[email protected]>
> To: Jan Kara <[email protected]>
> To: Eric Biggers <[email protected]>

Thanks, applied to the ext4-dax branch.

- Ted

2020-05-29 04:18:24

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH V5 0/9] Enable ext4 support for per-file/directory DAX operations

On Thu, May 28, 2020 at 10:54:41PM -0400, Theodore Y. Ts'o wrote:
>
> Thanks, applied to the ext4-dax branch.
>

I spoke too soon. While I tried merging with the ext4.git dev branch,
a merge conflict made me look closer and I realize I needed to make
the following changes (see diff between your patch set and what is
currently in ext4-dax).

Essentially, I needed to rework the branch to take into account commit
e0198aff3ae3 ("ext4: reject mount options not supported when
remounting in handle_mount_opt()").

The problem is that if you allow handle_mount_opt() to apply the
changes to the dax settings, and then later on, ext4_remount() realize
that we're remounting, and we need to reject the change, there's a
race if we restore the mount options to the original configuration.
Specifically, as Syzkaller pointed out, between when we change the dax
settings and then reset them, it's possible for some file to be opened
with "wrong" dax setting, and then when they are reset, *boom*.

The correct way to deal with this is to reject the mount option change
much earlier, in handle_mount_opt(), *before* we mess with the dax
settings.

Please take a look at the ext4-dax for the actual changes which I
made.

Cheers,

- Ted


diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3658e3016999..9a37d70394b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1733,7 +1733,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
#define MOPT_NO_EXT3 0x0200
#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
#define MOPT_STRING 0x0400
-#define MOPT_SKIP 0x0800
+#define MOPT_NO_REMOUNT 0x0800

static const struct mount_opts {
int token;
@@ -1783,18 +1783,15 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
- {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
- {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
- MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
- {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
- MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
- {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
- MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+ {Opt_dax, 0, MOPT_NO_REMOUNT},
+ {Opt_dax_always, 0, MOPT_NO_REMOUNT},
+ {Opt_dax_inode, 0, MOPT_NO_REMOUNT},
+ {Opt_dax_never, 0, MOPT_NO_REMOUNT},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
- {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
- {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
+ {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0 | MOPT_NO_REMOUNT},
+ {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING | MOPT_NO_REMOUNT},
{Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
@@ -1831,7 +1828,7 @@ static const struct mount_opts {
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_test_dummy_encryption, 0, MOPT_GTE0},
- {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+ {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET | MOPT_NO_REMOUNT},
{Opt_err, 0, 0}
};

@@ -1929,6 +1926,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
"Mount option \"%s\" incompatible with ext3", opt);
return -1;
}
+ if ((m->flags & MOPT_NO_REMOUNT) && is_remount) {
+ ext4_msg(sb, KERN_ERR,
+ "Mount option \"%s\" not supported when remounting",
+ opt);
+ return -1;
+ }

if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
return -1;
@@ -2008,11 +2011,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_resgid = gid;
} else if (token == Opt_journal_dev) {
- if (is_remount) {
- ext4_msg(sb, KERN_ERR,
- "Cannot specify journal on remount");
- return -1;
- }
*journal_devnum = arg;
} else if (token == Opt_journal_path) {
char *journal_path;
@@ -2020,11 +2018,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
struct path path;
int error;

- if (is_remount) {
- ext4_msg(sb, KERN_ERR,
- "Cannot specify journal on remount");
- return -1;
- }
journal_path = match_strdup(&args[0]);
if (!journal_path) {
ext4_msg(sb, KERN_ERR, "error: could not dup "
@@ -2287,7 +2280,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
for (m = ext4_mount_opts; m->token != Opt_err; m++) {
int want_set = m->flags & MOPT_SET;
if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
- (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
+ (m->flags & MOPT_CLEAR_ERR))
continue;
if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
continue; /* skip if same as the default */
@@ -5474,24 +5467,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
}

- if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
- ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
- err = -EINVAL;
- goto restore_opts;
- }
-
- if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS ||
- (sbi->s_mount_opt2 ^ old_opts.s_mount_opt2) & EXT4_MOUNT2_DAX_NEVER ||
- (sbi->s_mount_opt2 ^ old_opts.s_mount_opt2) & EXT4_MOUNT2_DAX_INODE) {
- ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
- "dax mount option with busy inodes while remounting");
- sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
- sbi->s_mount_opt |= old_opts.s_mount_opt & EXT4_MOUNT_DAX_ALWAYS;
- sbi->s_mount_opt2 &= ~(EXT4_MOUNT2_DAX_NEVER | EXT4_MOUNT2_DAX_INODE);
- sbi->s_mount_opt2 |= old_opts.s_mount_opt2 &
- (EXT4_MOUNT2_DAX_NEVER | EXT4_MOUNT2_DAX_INODE);
- }
-
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");

2020-05-29 18:13:24

by Ira Weiny

[permalink] [raw]
Subject: Re: [PATCH V5 0/9] Enable ext4 support for per-file/directory DAX operations

On Fri, May 29, 2020 at 12:17:17AM -0400, Theodore Y. Ts'o wrote:
> On Thu, May 28, 2020 at 10:54:41PM -0400, Theodore Y. Ts'o wrote:
> >
> > Thanks, applied to the ext4-dax branch.
> >
>
> I spoke too soon. While I tried merging with the ext4.git dev branch,
> a merge conflict made me look closer and I realize I needed to make
> the following changes (see diff between your patch set and what is
> currently in ext4-dax).
>
> Essentially, I needed to rework the branch to take into account commit
> e0198aff3ae3 ("ext4: reject mount options not supported when
> remounting in handle_mount_opt()").
>
> The problem is that if you allow handle_mount_opt() to apply the
> changes to the dax settings, and then later on, ext4_remount() realize
> that we're remounting, and we need to reject the change, there's a
> race if we restore the mount options to the original configuration.
> Specifically, as Syzkaller pointed out, between when we change the dax
> settings and then reset them, it's possible for some file to be opened
> with "wrong" dax setting, and then when they are reset, *boom*.
>
> The correct way to deal with this is to reject the mount option change
> much earlier, in handle_mount_opt(), *before* we mess with the dax
> settings.
>
> Please take a look at the ext4-dax for the actual changes which I
> made.
>
> Cheers,
>
> - Ted
>
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 3658e3016999..9a37d70394b2 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1733,7 +1733,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
> #define MOPT_NO_EXT3 0x0200
> #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
> #define MOPT_STRING 0x0400
> -#define MOPT_SKIP 0x0800

I think we still need MOPT_SKIP...

This was put in to skip these options when printing to deal with printing only
dax=inode when it was specified by the user.

Ah but I see now. By taking MOPT_SET away you have created the same behavior?

This is orthogonal to the remount issue right?

> +#define MOPT_NO_REMOUNT 0x0800
>
> static const struct mount_opts {
> int token;
> @@ -1783,18 +1783,15 @@ static const struct mount_opts {
> {Opt_min_batch_time, 0, MOPT_GTE0},
> {Opt_inode_readahead_blks, 0, MOPT_GTE0},
> {Opt_init_itable, 0, MOPT_GTE0},
> - {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
> - {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
> - MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
> - {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
> - MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
> - {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
> - MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
> + {Opt_dax, 0, MOPT_NO_REMOUNT},
> + {Opt_dax_always, 0, MOPT_NO_REMOUNT},
> + {Opt_dax_inode, 0, MOPT_NO_REMOUNT},
> + {Opt_dax_never, 0, MOPT_NO_REMOUNT},

Even if MOPT_SET is redundant. Why don't we need still need MOPT_EXT4_ONLY?

And why don't we need to associate the defines; EXT4_MOUNT_DAX_ALWAYS etc?

> {Opt_stripe, 0, MOPT_GTE0},
> {Opt_resuid, 0, MOPT_GTE0},
> {Opt_resgid, 0, MOPT_GTE0},
> - {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
> - {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
> + {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0 | MOPT_NO_REMOUNT},
> + {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING | MOPT_NO_REMOUNT},
> {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
> {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
> {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
> @@ -1831,7 +1828,7 @@ static const struct mount_opts {
> {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
> {Opt_max_dir_size_kb, 0, MOPT_GTE0},
> {Opt_test_dummy_encryption, 0, MOPT_GTE0},
> - {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
> + {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET | MOPT_NO_REMOUNT},
> {Opt_err, 0, 0}
> };
>
> @@ -1929,6 +1926,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
> "Mount option \"%s\" incompatible with ext3", opt);
> return -1;
> }
> + if ((m->flags & MOPT_NO_REMOUNT) && is_remount) {
> + ext4_msg(sb, KERN_ERR,
> + "Mount option \"%s\" not supported when remounting",
> + opt);
> + return -1;
> + }

I think this is cleaner!

Thanks, I did test this but not while trying to manipulate files as the same time
as a remount. So a race would not have been caught.

Thanks!
Ira

>
> if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
> return -1;
> @@ -2008,11 +2011,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
> }
> sbi->s_resgid = gid;
> } else if (token == Opt_journal_dev) {
> - if (is_remount) {
> - ext4_msg(sb, KERN_ERR,
> - "Cannot specify journal on remount");
> - return -1;
> - }
> *journal_devnum = arg;
> } else if (token == Opt_journal_path) {
> char *journal_path;
> @@ -2020,11 +2018,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
> struct path path;
> int error;
>
> - if (is_remount) {
> - ext4_msg(sb, KERN_ERR,
> - "Cannot specify journal on remount");
> - return -1;
> - }
> journal_path = match_strdup(&args[0]);
> if (!journal_path) {
> ext4_msg(sb, KERN_ERR, "error: could not dup "
> @@ -2287,7 +2280,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
> for (m = ext4_mount_opts; m->token != Opt_err; m++) {
> int want_set = m->flags & MOPT_SET;
> if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
> - (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
> + (m->flags & MOPT_CLEAR_ERR))
> continue;
> if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
> continue; /* skip if same as the default */
> @@ -5474,24 +5467,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
> }
> }
>
> - if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
> - ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
> - err = -EINVAL;
> - goto restore_opts;
> - }
> -
> - if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS ||
> - (sbi->s_mount_opt2 ^ old_opts.s_mount_opt2) & EXT4_MOUNT2_DAX_NEVER ||
> - (sbi->s_mount_opt2 ^ old_opts.s_mount_opt2) & EXT4_MOUNT2_DAX_INODE) {
> - ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
> - "dax mount option with busy inodes while remounting");
> - sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> - sbi->s_mount_opt |= old_opts.s_mount_opt & EXT4_MOUNT_DAX_ALWAYS;
> - sbi->s_mount_opt2 &= ~(EXT4_MOUNT2_DAX_NEVER | EXT4_MOUNT2_DAX_INODE);
> - sbi->s_mount_opt2 |= old_opts.s_mount_opt2 &
> - (EXT4_MOUNT2_DAX_NEVER | EXT4_MOUNT2_DAX_INODE);
> - }
> -
> if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
> ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
>