2020-05-14 06:53:54

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V1 0/9] Enable ext4 support for per-file/directory DAX operations

From: Ira Weiny <[email protected]>

Enable the same per file DAX support in ext4 as was done for xfs. This series
builds and depends on the V11 series for xfs.[1]

This passes the same xfstests test as XFS.

The only issue is that this modifies the old mount option parsing code rather
than waiting for the new parsing code to be finalized.

This series starts with 3 fixes which include making Verity and Encrypt truly
mutually exclusive from DAX. I think these first 3 patches should be picked up
for 5.8 regardless of what is decided regarding the mount parsing.

[1] https://lore.kernel.org/lkml/[email protected]/

Changes from V0:
Collect reviews
Fix up setting don't cache in ioctl code
Add FS_DAX_FL flag for consistency


To: [email protected]
To: "Theodore Y. Ts'o" <[email protected]>
To: Jan Kara <[email protected]>
Cc: "Darrick J. Wong" <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Dave Chinner <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]


Ira Weiny (9):
fs/ext4: Narrow scope of DAX check in setflags
fs/ext4: Disallow verity if inode is DAX
fs/ext4: Disallow encryption if inode is DAX
fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS
fs/ext4: Update ext4_should_use_dax()
fs/ext4: Only change S_DAX on inode load
fs/ext4: Make DAX mount option a tri-state
fs/ext4: Introduce DAX inode flag
Documentation/dax: Update DAX enablement for ext4

Documentation/filesystems/dax.txt | 6 +-
Documentation/filesystems/ext4/verity.rst | 7 +++
Documentation/filesystems/fscrypt.rst | 4 +-
fs/ext4/ext4.h | 20 ++++---
fs/ext4/ialloc.c | 2 +-
fs/ext4/inode.c | 27 +++++++--
fs/ext4/ioctl.c | 31 +++++++++--
fs/ext4/super.c | 67 +++++++++++++++--------
fs/ext4/verity.c | 5 +-
include/uapi/linux/fs.h | 1 +
10 files changed, 125 insertions(+), 45 deletions(-)

--
2.25.1


2020-05-14 06:54:01

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V1 4/9] fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS

From: Ira Weiny <[email protected]>

In prep for the new tri-state mount option which then introduces
EXT4_MOUNT_DAX_NEVER.

Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Ira Weiny <[email protected]>

---
Changes:
New patch
---
fs/ext4/ext4.h | 4 ++--
fs/ext4/inode.c | 2 +-
fs/ext4/super.c | 12 ++++++------
3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 91eb4381cae5..1a3daf2d18ef 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1123,9 +1123,9 @@ struct ext4_inode_info {
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
#ifdef CONFIG_FS_DAX
-#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
+#define EXT4_MOUNT_DAX_ALWAYS 0x00200 /* Direct Access */
#else
-#define EXT4_MOUNT_DAX 0
+#define EXT4_MOUNT_DAX_ALWAYS 0
#endif
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a4aae6acdcb..a10ff12194db 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4400,7 +4400,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)

static bool ext4_should_use_dax(struct inode *inode)
{
- if (!test_opt(inode->i_sb, DAX))
+ if (!test_opt(inode->i_sb, DAX_ALWAYS))
return false;
if (!S_ISREG(inode->i_mode))
return false;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9873ab27e3fa..d0434b513919 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1767,7 +1767,7 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
- {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
+ {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
@@ -3974,7 +3974,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"both data=journal and dioread_nolock");
goto failed_mount;
}
- if (test_opt(sb, DAX)) {
+ if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and dax");
goto failed_mount;
@@ -4084,7 +4084,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}

- if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+ if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
@@ -5404,7 +5404,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
err = -EINVAL;
goto restore_opts;
}
- if (test_opt(sb, DAX)) {
+ if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and dax");
err = -EINVAL;
@@ -5425,10 +5425,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}

- if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
"dax flag with busy inodes while remounting");
- sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
+ sbi->s_mount_opt ^= EXT4_MOUNT_DAX_ALWAYS;
}

if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
--
2.25.1

2020-05-14 06:54:07

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V1 9/9] Documentation/dax: Update DAX enablement for ext4

From: Ira Weiny <[email protected]>

Update the document to reflect ext4 and xfs now behave the same.

Signed-off-by: Ira Weiny <[email protected]>

---
Changes from RFC:
Update with ext2 text...
---
Documentation/filesystems/dax.txt | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
index 735fb4b54117..265c4f808dbf 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -25,7 +25,7 @@ size when creating the filesystem.
Currently 3 filesystems support DAX: ext2, ext4 and xfs. Enabling DAX on them
is different.

-Enabling DAX on ext4 and ext2
+Enabling DAX on ext2
-----------------------------

When mounting the filesystem, use the "-o dax" option on the command line or
@@ -33,8 +33,8 @@ add 'dax' to the options in /etc/fstab. This works to enable DAX on all files
within the filesystem. It is equivalent to the '-o dax=always' behavior below.


-Enabling DAX on xfs
--------------------
+Enabling DAX on xfs and ext4
+----------------------------

Summary
-------
--
2.25.1

2020-05-14 06:54:23

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V1 8/9] fs/ext4: Introduce DAX inode flag

From: Ira Weiny <[email protected]>

Add a flag to preserve FS_XFLAG_DAX in the ext4 inode.

Set the flag to be user visible and changeable. Set the flag to be
inherited. Allow applications to change the flag at any time.

Finally, on regular files, flag the inode to not be cached to facilitate
changing S_DAX on the next creation of the inode.

Signed-off-by: Ira Weiny <[email protected]>

---
Change from V0:
Add FS_DAX_FL to include/uapi/linux/fs.h
to be consistent
Move ext4_dax_dontcache() to ext4_ioctl_setflags()
This ensures that it is only set when the flags are going to be
set and not if there is an error
Also this sets don't cache in the FS_IOC_SETFLAGS case

Change from RFC:
use new d_mark_dontcache()
Allow caching if ALWAYS/NEVER is set
Rebased to latest Linus master
Change flag to unused 0x01000000
update ext4_should_enable_dax()
---
fs/ext4/ext4.h | 13 +++++++++----
fs/ext4/inode.c | 4 +++-
fs/ext4/ioctl.c | 24 +++++++++++++++++++++++-
include/uapi/linux/fs.h | 1 +
4 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 01d1de838896..715f8f2029b2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -415,13 +415,16 @@ struct flex_groups {
#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
/* 0x00400000 was formerly EXT4_EOFBLOCKS_FL */
+
+#define EXT4_DAX_FL 0x01000000 /* Inode is DAX */
+
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */

-#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE 0x715BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE 0x614BC0FF /* User modifiable flags */

/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
@@ -429,14 +432,16 @@ struct flex_groups {
EXT4_APPEND_FL | \
EXT4_NODUMP_FL | \
EXT4_NOATIME_FL | \
- EXT4_PROJINHERIT_FL)
+ EXT4_PROJINHERIT_FL | \
+ EXT4_DAX_FL)

/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
- EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
+ EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
+ EXT4_DAX_FL)

/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 140b1930e2f4..105cf04f7940 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4400,6 +4400,8 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)

static bool ext4_should_enable_dax(struct inode *inode)
{
+ unsigned int flags = EXT4_I(inode)->i_flags;
+
if (test_opt2(inode->i_sb, DAX_NEVER))
return false;
if (!S_ISREG(inode->i_mode))
@@ -4418,7 +4420,7 @@ static bool ext4_should_enable_dax(struct inode *inode)
if (test_opt(inode->i_sb, DAX_ALWAYS))
return true;

- return false;
+ return flags & EXT4_DAX_FL;
}

void ext4_set_inode_flags(struct inode *inode, bool init)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 145083e8cd1e..d6d018ea8e94 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -292,6 +292,21 @@ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
return 0;
}

+static void ext4_dax_dontcache(struct inode *inode, unsigned int flags)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ if (S_ISDIR(inode->i_mode))
+ return;
+
+ if (test_opt2(inode->i_sb, DAX_NEVER) ||
+ test_opt(inode->i_sb, DAX_ALWAYS))
+ return;
+
+ if ((ei->i_flags ^ flags) & EXT4_DAX_FL)
+ d_mark_dontcache(inode);
+}
+
static int ext4_ioctl_setflags(struct inode *inode,
unsigned int flags)
{
@@ -369,6 +384,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
if (err)
goto flags_err;

+ ext4_dax_dontcache(inode, flags);
+
for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
if (!(mask & EXT4_FL_USER_MODIFIABLE))
continue;
@@ -528,12 +545,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
xflags |= FS_XFLAG_NOATIME;
if (iflags & EXT4_PROJINHERIT_FL)
xflags |= FS_XFLAG_PROJINHERIT;
+ if (iflags & EXT4_DAX_FL)
+ xflags |= FS_XFLAG_DAX;
return xflags;
}

#define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
- FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
+ FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \
+ FS_XFLAG_DAX)

/* Transfer xflags flags to internal */
static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
@@ -552,6 +572,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
iflags |= EXT4_NOATIME_FL;
if (xflags & FS_XFLAG_PROJINHERIT)
iflags |= EXT4_PROJINHERIT_FL;
+ if (xflags & FS_XFLAG_DAX)
+ iflags |= EXT4_DAX_FL;

return iflags;
}
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 379a612f8f1d..7c5f6eb51e2d 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -262,6 +262,7 @@ struct fsxattr {
#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
+#define FS_DAX_FL 0x01000000 /* Inode is DAX */
#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */
--
2.25.1

2020-05-14 06:54:29

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V1 6/9] fs/ext4: Only change S_DAX on inode load

From: Ira Weiny <[email protected]>

To prevent complications with in memory inodes we only set S_DAX on
inode load. FS_XFLAG_DAX can be changed at any time and S_DAX will
change after inode eviction and reload.

Add init bool to ext4_set_inode_flags() to indicate if the inode is
being newly initialized.

Assert that S_DAX is not set on an inode which is just being loaded.

Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Ira Weiny <[email protected]>

---
Changes from RFC:
Change J_ASSERT() to WARN_ON_ONCE()
Fix bug which would clear S_DAX incorrectly
---
fs/ext4/ext4.h | 2 +-
fs/ext4/ialloc.c | 2 +-
fs/ext4/inode.c | 13 ++++++++++---
fs/ext4/ioctl.c | 3 ++-
fs/ext4/super.c | 4 ++--
fs/ext4/verity.c | 2 +-
6 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1a3daf2d18ef..86a0994332ce 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2692,7 +2692,7 @@ extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
-extern void ext4_set_inode_flags(struct inode *);
+extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4b8c9a9bdf0c..7941c140723f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1116,7 +1116,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ei->i_block_group = group;
ei->i_last_alloc_group = ~0;

- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, true);
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
if (insert_inode_locked(inode) < 0) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d3a4c2ed7a1c..23e42a223235 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4419,11 +4419,13 @@ static bool ext4_should_enable_dax(struct inode *inode)
return false;
}

-void ext4_set_inode_flags(struct inode *inode)
+void ext4_set_inode_flags(struct inode *inode, bool init)
{
unsigned int flags = EXT4_I(inode)->i_flags;
unsigned int new_fl = 0;

+ WARN_ON_ONCE(IS_DAX(inode) && init);
+
if (flags & EXT4_SYNC_FL)
new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
@@ -4434,8 +4436,13 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (ext4_should_enable_dax(inode))
+
+ /* Because of the way inode_set_flags() works we must preserve S_DAX
+ * here if already set. */
+ new_fl |= (inode->i_flags & S_DAX);
+ if (init && ext4_should_enable_dax(inode))
new_fl |= S_DAX;
+
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
if (flags & EXT4_CASEFOLD_FL)
@@ -4649,7 +4656,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
* not initialized on a new filesystem. */
}
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, true);
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
if (ext4_has_feature_64bit(sb))
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5813e5e73eab..145083e8cd1e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -381,7 +381,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
ext4_clear_inode_flag(inode, i);
}

- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, false);
+
inode->i_ctime = current_time(inode);

err = ext4_mark_iloc_dirty(handle, inode, &iloc);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d0434b513919..5ec900fdf73c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1344,7 +1344,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, false);
}
return res;
}
@@ -1367,7 +1367,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
ctx, len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, false);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index f05a09fb2ae4..89a155ece323 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -244,7 +244,7 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc,
if (err)
goto out_stop;
ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, false);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
out_stop:
--
2.25.1

2020-05-14 06:54:40

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V1 5/9] fs/ext4: Update ext4_should_use_dax()

From: Ira Weiny <[email protected]>

S_DAX should only be enabled when the underlying block device supports
dax.

Change ext4_should_use_dax() to check for device support prior to the
over riding mount option.

While we are at it change the function to ext4_should_enable_dax() as
this better reflects the ask as well as matches xfs.

Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Ira Weiny <[email protected]>

---
Changes from RFC
Change function name to 'should enable'
Clean up bool conversion
Reorder this for better bisect-ability
---
fs/ext4/inode.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a10ff12194db..d3a4c2ed7a1c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4398,10 +4398,8 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
}

-static bool ext4_should_use_dax(struct inode *inode)
+static bool ext4_should_enable_dax(struct inode *inode)
{
- if (!test_opt(inode->i_sb, DAX_ALWAYS))
- return false;
if (!S_ISREG(inode->i_mode))
return false;
if (ext4_should_journal_data(inode))
@@ -4412,7 +4410,13 @@ static bool ext4_should_use_dax(struct inode *inode)
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
return false;
- return true;
+ if (!bdev_dax_supported(inode->i_sb->s_bdev,
+ inode->i_sb->s_blocksize))
+ return false;
+ if (test_opt(inode->i_sb, DAX_ALWAYS))
+ return true;
+
+ return false;
}

void ext4_set_inode_flags(struct inode *inode)
@@ -4430,7 +4434,7 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (ext4_should_use_dax(inode))
+ if (ext4_should_enable_dax(inode))
new_fl |= S_DAX;
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
--
2.25.1

2020-05-14 06:54:53

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V1 7/9] fs/ext4: Make DAX mount option a tri-state

From: Ira Weiny <[email protected]>

We add 'always', 'never', and 'inode' (default). '-o dax' continue to
operate the same.

Specifically we introduce a 2nd DAX mount flag EXT4_MOUNT2_DAX_NEVER and set
it and EXT4_MOUNT_DAX_ALWAYS appropriately.

We also force EXT4_MOUNT2_DAX_NEVER if !CONFIG_FS_DAX.

https://lore.kernel.org/lkml/[email protected]/

Signed-off-by: Ira Weiny <[email protected]>

---
Changes from RFC:
Combine remount check for DAX_NEVER with DAX_ALWAYS
Update ext4_should_enable_dax()
---
fs/ext4/ext4.h | 1 +
fs/ext4/inode.c | 2 ++
fs/ext4/super.c | 43 +++++++++++++++++++++++++++++++++++++------
3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 86a0994332ce..01d1de838896 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1168,6 +1168,7 @@ struct ext4_inode_info {
blocks */
#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
file systems */
+#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */

#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
specified journal checksum */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 23e42a223235..140b1930e2f4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4400,6 +4400,8 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)

static bool ext4_should_enable_dax(struct inode *inode)
{
+ if (test_opt2(inode->i_sb, DAX_NEVER))
+ return false;
if (!S_ISREG(inode->i_mode))
return false;
if (ext4_should_journal_data(inode))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5ec900fdf73c..e01a040a58a9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1505,6 +1505,7 @@ enum {
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
+ Opt_dax_str,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
Opt_nowarn_on_error, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
@@ -1570,6 +1571,7 @@ static const match_table_t tokens = {
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
{Opt_i_version, "i_version"},
+ {Opt_dax_str, "dax=%s"},
{Opt_dax, "dax"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
@@ -1767,6 +1769,7 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
+ {Opt_dax_str, 0, MOPT_STRING},
{Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
@@ -2076,13 +2079,32 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_jquota_fmt = m->mount_opt;
#endif
- } else if (token == Opt_dax) {
+ } else if (token == Opt_dax || token == Opt_dax_str) {
#ifdef CONFIG_FS_DAX
- ext4_msg(sb, KERN_WARNING,
- "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
- sbi->s_mount_opt |= m->mount_opt;
+ char *tmp = match_strdup(&args[0]);
+
+ if (!tmp || !strcmp(tmp, "always")) {
+ ext4_msg(sb, KERN_WARNING,
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+ } else if (!strcmp(tmp, "never")) {
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+ } else if (!strcmp(tmp, "inode")) {
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+ } else {
+ ext4_msg(sb, KERN_WARNING, "DAX invalid option.");
+ kfree(tmp);
+ return -1;
+ }
+
+ kfree(tmp);
#else
ext4_msg(sb, KERN_INFO, "dax option not supported");
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
return -1;
#endif
} else if (token == Opt_data_err_abort) {
@@ -2306,6 +2328,13 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
if (DUMMY_ENCRYPTION_ENABLED(sbi))
SEQ_OPTS_PUTS("test_dummy_encryption");

+ if (test_opt2(sb, DAX_NEVER))
+ SEQ_OPTS_PUTS("dax=never");
+ else if (test_opt(sb, DAX_ALWAYS))
+ SEQ_OPTS_PUTS("dax=always");
+ else
+ SEQ_OPTS_PUTS("dax=inode");
+
ext4_show_quota_options(seq, sb);
return 0;
}
@@ -5425,10 +5454,12 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}

- if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS) {
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS ||
+ (sbi->s_mount_opt2 ^ old_opts.s_mount_opt2) & EXT4_MOUNT2_DAX_NEVER) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
- "dax flag with busy inodes while remounting");
+ "dax mount option with busy inodes while remounting");
sbi->s_mount_opt ^= EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt2 ^= EXT4_MOUNT2_DAX_NEVER;
}

if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
--
2.25.1

2020-05-14 06:55:18

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V1 3/9] fs/ext4: Disallow encryption if inode is DAX

From: Ira Weiny <[email protected]>

Encryption and DAX are incompatible. Changing the DAX mode due to a
change in Encryption mode is wrong without a corresponding
address_space_operations update.

Make the 2 options mutually exclusive by returning an error if DAX was
set first.

Furthermore, clarify the documentation of the exclusivity and how that
will work.

Signed-off-by: Ira Weiny <[email protected]>

---
Changes:
remove WARN_ON_ONCE
Add documentation to the encrypt doc WRT DAX
---
Documentation/filesystems/fscrypt.rst | 4 +++-
fs/ext4/super.c | 10 +---------
2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index aa072112cfff..1475b8d52fef 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -1038,7 +1038,9 @@ astute users may notice some differences in behavior:
- The ext4 filesystem does not support data journaling with encrypted
regular files. It will fall back to ordered data mode instead.

-- DAX (Direct Access) is not supported on encrypted files.
+- DAX (Direct Access) is not supported on encrypted files. Attempts to enable
+ DAX on an encrypted file will fail. Mount options will _not_ enable DAX on
+ encrypted files.

- The st_size of an encrypted symlink will not necessarily give the
length of the symlink target as required by POSIX. It will actually
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bf5fcb477f66..9873ab27e3fa 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1320,7 +1320,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
if (inode->i_ino == EXT4_ROOT_INO)
return -EPERM;

- if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
+ if (IS_DAX(inode))
return -EINVAL;

res = ext4_convert_inline_data(inode);
@@ -1344,10 +1344,6 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
- /*
- * Update inode->i_flags - S_ENCRYPTED will be enabled,
- * S_DAX may be disabled
- */
ext4_set_inode_flags(inode);
}
return res;
@@ -1371,10 +1367,6 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
ctx, len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
- /*
- * Update inode->i_flags - S_ENCRYPTED will be enabled,
- * S_DAX may be disabled
- */
ext4_set_inode_flags(inode);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
--
2.25.1

2020-05-14 06:55:39

by Ira Weiny

[permalink] [raw]
Subject: [PATCH V1 2/9] fs/ext4: Disallow verity if inode is DAX

From: Ira Weiny <[email protected]>

Verity and DAX are incompatible. Changing the DAX mode due to a verity
flag change is wrong without a corresponding address_space_operations
update.

Make the 2 options mutually exclusive by returning an error if DAX was
set first.

(Setting DAX is already disabled if Verity is set first.)

Signed-off-by: Ira Weiny <[email protected]>

---
Changes:
remove WARN_ON_ONCE
Add documentation for DAX/Verity exclusivity
---
Documentation/filesystems/ext4/verity.rst | 7 +++++++
fs/ext4/verity.c | 3 +++
2 files changed, 10 insertions(+)

diff --git a/Documentation/filesystems/ext4/verity.rst b/Documentation/filesystems/ext4/verity.rst
index 3e4c0ee0e068..51ab1aa17e59 100644
--- a/Documentation/filesystems/ext4/verity.rst
+++ b/Documentation/filesystems/ext4/verity.rst
@@ -39,3 +39,10 @@ is encrypted as well as the data itself.

Verity files cannot have blocks allocated past the end of the verity
metadata.
+
+Verity and DAX
+--------------
+
+Verity and DAX are not compatible and attempts to set both of these flags on a
+file will fail.
+
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index dc5ec724d889..f05a09fb2ae4 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -113,6 +113,9 @@ static int ext4_begin_enable_verity(struct file *filp)
handle_t *handle;
int err;

+ if (IS_DAX(inode))
+ return -EINVAL;
+
if (ext4_verity_in_progress(inode))
return -EBUSY;

--
2.25.1

2020-05-14 10:22:27

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH V1 2/9] fs/ext4: Disallow verity if inode is DAX

On Wed 13-05-20 23:53:08, [email protected] wrote:
> From: Ira Weiny <[email protected]>
>
> Verity and DAX are incompatible. Changing the DAX mode due to a verity
> flag change is wrong without a corresponding address_space_operations
> update.
>
> Make the 2 options mutually exclusive by returning an error if DAX was
> set first.
>
> (Setting DAX is already disabled if Verity is set first.)
>
> Signed-off-by: Ira Weiny <[email protected]>

Makes sence. You can add:

Reviewed-by: Jan Kara <[email protected]>

Honza


>
> ---
> Changes:
> remove WARN_ON_ONCE
> Add documentation for DAX/Verity exclusivity
> ---
> Documentation/filesystems/ext4/verity.rst | 7 +++++++
> fs/ext4/verity.c | 3 +++
> 2 files changed, 10 insertions(+)
>
> diff --git a/Documentation/filesystems/ext4/verity.rst b/Documentation/filesystems/ext4/verity.rst
> index 3e4c0ee0e068..51ab1aa17e59 100644
> --- a/Documentation/filesystems/ext4/verity.rst
> +++ b/Documentation/filesystems/ext4/verity.rst
> @@ -39,3 +39,10 @@ is encrypted as well as the data itself.
>
> Verity files cannot have blocks allocated past the end of the verity
> metadata.
> +
> +Verity and DAX
> +--------------
> +
> +Verity and DAX are not compatible and attempts to set both of these flags on a
> +file will fail.
> +
> diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
> index dc5ec724d889..f05a09fb2ae4 100644
> --- a/fs/ext4/verity.c
> +++ b/fs/ext4/verity.c
> @@ -113,6 +113,9 @@ static int ext4_begin_enable_verity(struct file *filp)
> handle_t *handle;
> int err;
>
> + if (IS_DAX(inode))
> + return -EINVAL;
> +
> if (ext4_verity_in_progress(inode))
> return -EBUSY;
>
> --
> 2.25.1
>
--
Jan Kara <[email protected]>
SUSE Labs, CR

2020-05-14 11:07:20

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH V1 8/9] fs/ext4: Introduce DAX inode flag

On Wed 13-05-20 23:53:14, [email protected] wrote:
> From: Ira Weiny <[email protected]>
>
> Add a flag to preserve FS_XFLAG_DAX in the ext4 inode.
>
> Set the flag to be user visible and changeable. Set the flag to be
> inherited. Allow applications to change the flag at any time.
>
> Finally, on regular files, flag the inode to not be cached to facilitate
> changing S_DAX on the next creation of the inode.
>
> Signed-off-by: Ira Weiny <[email protected]>

The patch looks good to me. You can add:

Reviewed-by: Jan Kara <[email protected]>

Honza

>
> ---
> Change from V0:
> Add FS_DAX_FL to include/uapi/linux/fs.h
> to be consistent
> Move ext4_dax_dontcache() to ext4_ioctl_setflags()
> This ensures that it is only set when the flags are going to be
> set and not if there is an error
> Also this sets don't cache in the FS_IOC_SETFLAGS case
>
> Change from RFC:
> use new d_mark_dontcache()
> Allow caching if ALWAYS/NEVER is set
> Rebased to latest Linus master
> Change flag to unused 0x01000000
> update ext4_should_enable_dax()
> ---
> fs/ext4/ext4.h | 13 +++++++++----
> fs/ext4/inode.c | 4 +++-
> fs/ext4/ioctl.c | 24 +++++++++++++++++++++++-
> include/uapi/linux/fs.h | 1 +
> 4 files changed, 36 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 01d1de838896..715f8f2029b2 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -415,13 +415,16 @@ struct flex_groups {
> #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */
> #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
> /* 0x00400000 was formerly EXT4_EOFBLOCKS_FL */
> +
> +#define EXT4_DAX_FL 0x01000000 /* Inode is DAX */
> +
> #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
> #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
> #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */
> #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
>
> -#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */
> -#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
> +#define EXT4_FL_USER_VISIBLE 0x715BDFFF /* User visible flags */
> +#define EXT4_FL_USER_MODIFIABLE 0x614BC0FF /* User modifiable flags */
>
> /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
> #define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
> @@ -429,14 +432,16 @@ struct flex_groups {
> EXT4_APPEND_FL | \
> EXT4_NODUMP_FL | \
> EXT4_NOATIME_FL | \
> - EXT4_PROJINHERIT_FL)
> + EXT4_PROJINHERIT_FL | \
> + EXT4_DAX_FL)
>
> /* Flags that should be inherited by new inodes from their parent. */
> #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
> EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
> EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
> EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
> - EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
> + EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
> + EXT4_DAX_FL)
>
> /* Flags that are appropriate for regular files (all but dir-specific ones). */
> #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 140b1930e2f4..105cf04f7940 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4400,6 +4400,8 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
>
> static bool ext4_should_enable_dax(struct inode *inode)
> {
> + unsigned int flags = EXT4_I(inode)->i_flags;
> +
> if (test_opt2(inode->i_sb, DAX_NEVER))
> return false;
> if (!S_ISREG(inode->i_mode))
> @@ -4418,7 +4420,7 @@ static bool ext4_should_enable_dax(struct inode *inode)
> if (test_opt(inode->i_sb, DAX_ALWAYS))
> return true;
>
> - return false;
> + return flags & EXT4_DAX_FL;
> }
>
> void ext4_set_inode_flags(struct inode *inode, bool init)
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 145083e8cd1e..d6d018ea8e94 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -292,6 +292,21 @@ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
> return 0;
> }
>
> +static void ext4_dax_dontcache(struct inode *inode, unsigned int flags)
> +{
> + struct ext4_inode_info *ei = EXT4_I(inode);
> +
> + if (S_ISDIR(inode->i_mode))
> + return;
> +
> + if (test_opt2(inode->i_sb, DAX_NEVER) ||
> + test_opt(inode->i_sb, DAX_ALWAYS))
> + return;
> +
> + if ((ei->i_flags ^ flags) & EXT4_DAX_FL)
> + d_mark_dontcache(inode);
> +}
> +
> static int ext4_ioctl_setflags(struct inode *inode,
> unsigned int flags)
> {
> @@ -369,6 +384,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
> if (err)
> goto flags_err;
>
> + ext4_dax_dontcache(inode, flags);
> +
> for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
> if (!(mask & EXT4_FL_USER_MODIFIABLE))
> continue;
> @@ -528,12 +545,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
> xflags |= FS_XFLAG_NOATIME;
> if (iflags & EXT4_PROJINHERIT_FL)
> xflags |= FS_XFLAG_PROJINHERIT;
> + if (iflags & EXT4_DAX_FL)
> + xflags |= FS_XFLAG_DAX;
> return xflags;
> }
>
> #define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
> FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
> - FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
> + FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \
> + FS_XFLAG_DAX)
>
> /* Transfer xflags flags to internal */
> static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
> @@ -552,6 +572,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
> iflags |= EXT4_NOATIME_FL;
> if (xflags & FS_XFLAG_PROJINHERIT)
> iflags |= EXT4_PROJINHERIT_FL;
> + if (xflags & FS_XFLAG_DAX)
> + iflags |= EXT4_DAX_FL;
>
> return iflags;
> }
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 379a612f8f1d..7c5f6eb51e2d 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -262,6 +262,7 @@ struct fsxattr {
> #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
> #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
> #define FS_NOCOW_FL 0x00800000 /* Do not cow file */
> +#define FS_DAX_FL 0x01000000 /* Inode is DAX */
> #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
> #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
> #define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */
> --
> 2.25.1
>
--
Jan Kara <[email protected]>
SUSE Labs, CR

2020-05-14 11:45:21

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH V1 7/9] fs/ext4: Make DAX mount option a tri-state

On Wed 13-05-20 23:53:13, [email protected] wrote:
> From: Ira Weiny <[email protected]>
>
> We add 'always', 'never', and 'inode' (default). '-o dax' continue to
> operate the same.
>
> Specifically we introduce a 2nd DAX mount flag EXT4_MOUNT2_DAX_NEVER and set
> it and EXT4_MOUNT_DAX_ALWAYS appropriately.
>
> We also force EXT4_MOUNT2_DAX_NEVER if !CONFIG_FS_DAX.
>
> https://lore.kernel.org/lkml/[email protected]/
>
> Signed-off-by: Ira Weiny <[email protected]>
>
> ---
> Changes from RFC:
> Combine remount check for DAX_NEVER with DAX_ALWAYS
> Update ext4_should_enable_dax()

...

> @@ -2076,13 +2079,32 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
> }
> sbi->s_jquota_fmt = m->mount_opt;
> #endif
> - } else if (token == Opt_dax) {
> + } else if (token == Opt_dax || token == Opt_dax_str) {
> #ifdef CONFIG_FS_DAX
> - ext4_msg(sb, KERN_WARNING,
> - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
> - sbi->s_mount_opt |= m->mount_opt;
> + char *tmp = match_strdup(&args[0]);
> +
> + if (!tmp || !strcmp(tmp, "always")) {
> + ext4_msg(sb, KERN_WARNING,
> + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
> + sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
> + sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
> + } else if (!strcmp(tmp, "never")) {
> + sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
> + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> + } else if (!strcmp(tmp, "inode")) {
> + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> + sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
> + } else {
> + ext4_msg(sb, KERN_WARNING, "DAX invalid option.");
> + kfree(tmp);
> + return -1;
> + }
> +
> + kfree(tmp);

As I wrote in my reply to previous version of this patch, I'd prefer if we
handled this like e.g. 'data=' mount option. I don't think any unification
in option parsing with XFS makes sence and I'd rather keep consistent how
ext4 handles these 'enum' options.

Honza

--
Jan Kara <[email protected]>
SUSE Labs, CR

2020-05-14 14:28:12

by Ira Weiny

[permalink] [raw]
Subject: Re: [PATCH V1 7/9] fs/ext4: Make DAX mount option a tri-state

On Thu, May 14, 2020 at 01:25:53PM +0200, Jan Kara wrote:
> On Wed 13-05-20 23:53:13, [email protected] wrote:
> > From: Ira Weiny <[email protected]>
> >
> > We add 'always', 'never', and 'inode' (default). '-o dax' continue to
> > operate the same.
> >
> > Specifically we introduce a 2nd DAX mount flag EXT4_MOUNT2_DAX_NEVER and set
> > it and EXT4_MOUNT_DAX_ALWAYS appropriately.
> >
> > We also force EXT4_MOUNT2_DAX_NEVER if !CONFIG_FS_DAX.
> >
> > https://lore.kernel.org/lkml/[email protected]/
> >
> > Signed-off-by: Ira Weiny <[email protected]>
> >
> > ---
> > Changes from RFC:
> > Combine remount check for DAX_NEVER with DAX_ALWAYS
> > Update ext4_should_enable_dax()
>
> ...
>
> > @@ -2076,13 +2079,32 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
> > }
> > sbi->s_jquota_fmt = m->mount_opt;
> > #endif
> > - } else if (token == Opt_dax) {
> > + } else if (token == Opt_dax || token == Opt_dax_str) {
> > #ifdef CONFIG_FS_DAX
> > - ext4_msg(sb, KERN_WARNING,
> > - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
> > - sbi->s_mount_opt |= m->mount_opt;
> > + char *tmp = match_strdup(&args[0]);
> > +
> > + if (!tmp || !strcmp(tmp, "always")) {
> > + ext4_msg(sb, KERN_WARNING,
> > + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
> > + sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
> > + sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
> > + } else if (!strcmp(tmp, "never")) {
> > + sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
> > + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> > + } else if (!strcmp(tmp, "inode")) {
> > + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> > + sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
> > + } else {
> > + ext4_msg(sb, KERN_WARNING, "DAX invalid option.");
> > + kfree(tmp);
> > + return -1;
> > + }
> > +
> > + kfree(tmp);
>
> As I wrote in my reply to previous version of this patch, I'd prefer if we
> handled this like e.g. 'data=' mount option. I don't think any unification
> in option parsing with XFS makes sence and I'd rather keep consistent how
> ext4 handles these 'enum' options.

Ok... I'm sorry I'll change this. Thanks for all the reviews!
Ira

>
> Honza
>
> --
> Jan Kara <[email protected]>
> SUSE Labs, CR

2020-05-14 15:11:48

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH V1 7/9] fs/ext4: Make DAX mount option a tri-state

On Wed, May 13, 2020 at 11:53:13PM -0700, [email protected] wrote:
> From: Ira Weiny <[email protected]>
>
> We add 'always', 'never', and 'inode' (default). '-o dax' continue to
> operate the same.
>
> Specifically we introduce a 2nd DAX mount flag EXT4_MOUNT2_DAX_NEVER and set
> it and EXT4_MOUNT_DAX_ALWAYS appropriately.
>
> We also force EXT4_MOUNT2_DAX_NEVER if !CONFIG_FS_DAX.
>
> https://lore.kernel.org/lkml/[email protected]/
>
> Signed-off-by: Ira Weiny <[email protected]>
>
> ---
> Changes from RFC:
> Combine remount check for DAX_NEVER with DAX_ALWAYS
> Update ext4_should_enable_dax()
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/inode.c | 2 ++
> fs/ext4/super.c | 43 +++++++++++++++++++++++++++++++++++++------
> 3 files changed, 40 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 86a0994332ce..01d1de838896 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1168,6 +1168,7 @@ struct ext4_inode_info {
> blocks */
> #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
> file systems */
> +#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */
>
> #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
> specified journal checksum */
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 23e42a223235..140b1930e2f4 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4400,6 +4400,8 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
>
> static bool ext4_should_enable_dax(struct inode *inode)
> {
> + if (test_opt2(inode->i_sb, DAX_NEVER))
> + return false;
> if (!S_ISREG(inode->i_mode))
> return false;
> if (ext4_should_journal_data(inode))
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 5ec900fdf73c..e01a040a58a9 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1505,6 +1505,7 @@ enum {
> Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
> Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
> Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
> + Opt_dax_str,
> Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
> Opt_nowarn_on_error, Opt_mblk_io_submit,
> Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
> @@ -1570,6 +1571,7 @@ static const match_table_t tokens = {
> {Opt_barrier, "barrier"},
> {Opt_nobarrier, "nobarrier"},
> {Opt_i_version, "i_version"},
> + {Opt_dax_str, "dax=%s"},
> {Opt_dax, "dax"},
> {Opt_stripe, "stripe=%u"},
> {Opt_delalloc, "delalloc"},
> @@ -1767,6 +1769,7 @@ static const struct mount_opts {
> {Opt_min_batch_time, 0, MOPT_GTE0},
> {Opt_inode_readahead_blks, 0, MOPT_GTE0},
> {Opt_init_itable, 0, MOPT_GTE0},
> + {Opt_dax_str, 0, MOPT_STRING},
> {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET},
> {Opt_stripe, 0, MOPT_GTE0},
> {Opt_resuid, 0, MOPT_GTE0},
> @@ -2076,13 +2079,32 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
> }
> sbi->s_jquota_fmt = m->mount_opt;
> #endif
> - } else if (token == Opt_dax) {
> + } else if (token == Opt_dax || token == Opt_dax_str) {
> #ifdef CONFIG_FS_DAX
> - ext4_msg(sb, KERN_WARNING,
> - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
> - sbi->s_mount_opt |= m->mount_opt;
> + char *tmp = match_strdup(&args[0]);
> +
> + if (!tmp || !strcmp(tmp, "always")) {
> + ext4_msg(sb, KERN_WARNING,
> + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
> + sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
> + sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
> + } else if (!strcmp(tmp, "never")) {
> + sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
> + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> + } else if (!strcmp(tmp, "inode")) {
> + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> + sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
> + } else {
> + ext4_msg(sb, KERN_WARNING, "DAX invalid option.");
> + kfree(tmp);
> + return -1;
> + }
> +
> + kfree(tmp);
> #else
> ext4_msg(sb, KERN_INFO, "dax option not supported");
> + sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
> + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> return -1;
> #endif
> } else if (token == Opt_data_err_abort) {
> @@ -2306,6 +2328,13 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
> if (DUMMY_ENCRYPTION_ENABLED(sbi))
> SEQ_OPTS_PUTS("test_dummy_encryption");
>
> + if (test_opt2(sb, DAX_NEVER))
> + SEQ_OPTS_PUTS("dax=never");
> + else if (test_opt(sb, DAX_ALWAYS))
> + SEQ_OPTS_PUTS("dax=always");
> + else
> + SEQ_OPTS_PUTS("dax=inode");

dax=inode is the default; do you need to show it?

(Especially since xfs doesn't...)

--D

> +
> ext4_show_quota_options(seq, sb);
> return 0;
> }
> @@ -5425,10 +5454,12 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
> goto restore_opts;
> }
>
> - if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS) {
> + if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS ||
> + (sbi->s_mount_opt2 ^ old_opts.s_mount_opt2) & EXT4_MOUNT2_DAX_NEVER) {
> ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
> - "dax flag with busy inodes while remounting");
> + "dax mount option with busy inodes while remounting");
> sbi->s_mount_opt ^= EXT4_MOUNT_DAX_ALWAYS;
> + sbi->s_mount_opt2 ^= EXT4_MOUNT2_DAX_NEVER;
> }
>
> if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
> --
> 2.25.1
>

2020-05-15 03:39:47

by Ira Weiny

[permalink] [raw]
Subject: Re: [PATCH V1 7/9] fs/ext4: Make DAX mount option a tri-state

On Thu, May 14, 2020 at 08:08:39AM -0700, Darrick J. Wong wrote:
> On Wed, May 13, 2020 at 11:53:13PM -0700, [email protected] wrote:
> > From: Ira Weiny <[email protected]>
> >
> > We add 'always', 'never', and 'inode' (default). '-o dax' continue to
> > operate the same.
> >
> > Specifically we introduce a 2nd DAX mount flag EXT4_MOUNT2_DAX_NEVER and set
> > it and EXT4_MOUNT_DAX_ALWAYS appropriately.
> >
> > We also force EXT4_MOUNT2_DAX_NEVER if !CONFIG_FS_DAX.
> >
> > https://lore.kernel.org/lkml/[email protected]/
> >
> > Signed-off-by: Ira Weiny <[email protected]>
> >
> > ---
> > Changes from RFC:
> > Combine remount check for DAX_NEVER with DAX_ALWAYS
> > Update ext4_should_enable_dax()
> > ---
> > fs/ext4/ext4.h | 1 +
> > fs/ext4/inode.c | 2 ++
> > fs/ext4/super.c | 43 +++++++++++++++++++++++++++++++++++++------
> > 3 files changed, 40 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> > index 86a0994332ce..01d1de838896 100644
> > --- a/fs/ext4/ext4.h
> > +++ b/fs/ext4/ext4.h
> > @@ -1168,6 +1168,7 @@ struct ext4_inode_info {
> > blocks */
> > #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
> > file systems */
> > +#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */
> >
> > #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
> > specified journal checksum */
> > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> > index 23e42a223235..140b1930e2f4 100644
> > --- a/fs/ext4/inode.c
> > +++ b/fs/ext4/inode.c
> > @@ -4400,6 +4400,8 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
> >
> > static bool ext4_should_enable_dax(struct inode *inode)
> > {
> > + if (test_opt2(inode->i_sb, DAX_NEVER))
> > + return false;
> > if (!S_ISREG(inode->i_mode))
> > return false;
> > if (ext4_should_journal_data(inode))
> > diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> > index 5ec900fdf73c..e01a040a58a9 100644
> > --- a/fs/ext4/super.c
> > +++ b/fs/ext4/super.c
> > @@ -1505,6 +1505,7 @@ enum {
> > Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
> > Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
> > Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
> > + Opt_dax_str,
> > Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
> > Opt_nowarn_on_error, Opt_mblk_io_submit,
> > Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
> > @@ -1570,6 +1571,7 @@ static const match_table_t tokens = {
> > {Opt_barrier, "barrier"},
> > {Opt_nobarrier, "nobarrier"},
> > {Opt_i_version, "i_version"},
> > + {Opt_dax_str, "dax=%s"},
> > {Opt_dax, "dax"},
> > {Opt_stripe, "stripe=%u"},
> > {Opt_delalloc, "delalloc"},
> > @@ -1767,6 +1769,7 @@ static const struct mount_opts {
> > {Opt_min_batch_time, 0, MOPT_GTE0},
> > {Opt_inode_readahead_blks, 0, MOPT_GTE0},
> > {Opt_init_itable, 0, MOPT_GTE0},
> > + {Opt_dax_str, 0, MOPT_STRING},
> > {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET},
> > {Opt_stripe, 0, MOPT_GTE0},
> > {Opt_resuid, 0, MOPT_GTE0},
> > @@ -2076,13 +2079,32 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
> > }
> > sbi->s_jquota_fmt = m->mount_opt;
> > #endif
> > - } else if (token == Opt_dax) {
> > + } else if (token == Opt_dax || token == Opt_dax_str) {
> > #ifdef CONFIG_FS_DAX
> > - ext4_msg(sb, KERN_WARNING,
> > - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
> > - sbi->s_mount_opt |= m->mount_opt;
> > + char *tmp = match_strdup(&args[0]);
> > +
> > + if (!tmp || !strcmp(tmp, "always")) {
> > + ext4_msg(sb, KERN_WARNING,
> > + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
> > + sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
> > + sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
> > + } else if (!strcmp(tmp, "never")) {
> > + sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
> > + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> > + } else if (!strcmp(tmp, "inode")) {
> > + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> > + sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
> > + } else {
> > + ext4_msg(sb, KERN_WARNING, "DAX invalid option.");
> > + kfree(tmp);
> > + return -1;
> > + }
> > +
> > + kfree(tmp);
> > #else
> > ext4_msg(sb, KERN_INFO, "dax option not supported");
> > + sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
> > + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
> > return -1;
> > #endif
> > } else if (token == Opt_data_err_abort) {
> > @@ -2306,6 +2328,13 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
> > if (DUMMY_ENCRYPTION_ENABLED(sbi))
> > SEQ_OPTS_PUTS("test_dummy_encryption");
> >
> > + if (test_opt2(sb, DAX_NEVER))
> > + SEQ_OPTS_PUTS("dax=never");
> > + else if (test_opt(sb, DAX_ALWAYS))
> > + SEQ_OPTS_PUTS("dax=always");
> > + else
> > + SEQ_OPTS_PUTS("dax=inode");
>
> dax=inode is the default; do you need to show it?
>
> (Especially since xfs doesn't...)

I'll only show it if -o dax or -o dax=inode was actually specified per earlier
comments regarding ext4 behavior.

Ira

>
> --D
>
> > +
> > ext4_show_quota_options(seq, sb);
> > return 0;
> > }
> > @@ -5425,10 +5454,12 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
> > goto restore_opts;
> > }
> >
> > - if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS) {
> > + if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS ||
> > + (sbi->s_mount_opt2 ^ old_opts.s_mount_opt2) & EXT4_MOUNT2_DAX_NEVER) {
> > ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
> > - "dax flag with busy inodes while remounting");
> > + "dax mount option with busy inodes while remounting");
> > sbi->s_mount_opt ^= EXT4_MOUNT_DAX_ALWAYS;
> > + sbi->s_mount_opt2 ^= EXT4_MOUNT2_DAX_NEVER;
> > }
> >
> > if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
> > --
> > 2.25.1
> >