2023-05-03 14:21:14

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v3 0/6] fs: implement multigrain timestamps

Major changes in v3:
- move flag to use bit 31 instead of 0 since the upper bits in the
tv_nsec field aren't used for timestamps. This means we don't need to
set s_time_gran to a value higher than 1.

- use an fstype flag instead of a superblock flag

...plus a lot of smaller cleanups and documentation.

The basic idea with multigrain timestamps is to keep track of when an
inode's mtime or ctime has been queried and to force a fine-grained
timestamp the next time the mtime or ctime is updated.

This is a follow-up of the patches I posted last week [1]. The main
change in this set is that it no longer uses the lowest-order bit in the
tv_nsec field, and instead uses one of the higher-order bits (#31,
specifically) since they are otherwise unused. This change makes things
much simpler, and we no longer need to twiddle s_time_gran for it.

Note that with these changes, the statx06 LTP test will intermittently
fail on most filesystems, usually with errors like this:

statx06.c:138: TFAIL: Birth time > after_time
statx06.c:138: TFAIL: Modified time > after_time

The test does this:

SAFE_CLOCK_GETTIME(CLOCK_REALTIME_COARSE, &before_time);
clock_wait_tick();
tc->operation();
clock_wait_tick();
SAFE_CLOCK_GETTIME(CLOCK_REALTIME_COARSE, &after_time);

Converting the second SAFE_CLOCK_GETTIME to use CLOCK_REALTIME instead
gets things working again.

For now, I've only converted/tested a few filesystems, focusing on the
most popular ones exported via NFS. If this approach looks acceptable
though, I'll plan to convert more filesystems to it.

Another thing we could consider is enabling this unilaterally
kernel-wide. I decided not to do that for now, but it's something we
could consider for lately.

[1]: https://lore.kernel.org/linux-fsdevel/[email protected]/

Jeff Layton (6):
fs: add infrastructure for multigrain inode i_m/ctime
overlayfs: allow it handle multigrain timestamps
shmem: convert to multigrain timestamps
xfs: convert to multigrain timestamps
ext4: convert to multigrain timestamps
btrfs: convert to multigrain timestamps

fs/btrfs/delayed-inode.c | 2 +-
fs/btrfs/file.c | 10 +++---
fs/btrfs/inode.c | 25 +++++++-------
fs/btrfs/ioctl.c | 6 ++--
fs/btrfs/reflink.c | 2 +-
fs/btrfs/super.c | 5 +--
fs/btrfs/transaction.c | 2 +-
fs/btrfs/tree-log.c | 2 +-
fs/btrfs/volumes.c | 2 +-
fs/btrfs/xattr.c | 4 +--
fs/ext4/acl.c | 2 +-
fs/ext4/extents.c | 10 +++---
fs/ext4/ialloc.c | 2 +-
fs/ext4/inline.c | 4 +--
fs/ext4/inode.c | 24 ++++++++++---
fs/ext4/ioctl.c | 8 ++---
fs/ext4/namei.c | 20 +++++------
fs/ext4/super.c | 4 +--
fs/ext4/xattr.c | 2 +-
fs/inode.c | 52 ++++++++++++++++++++++++++--
fs/overlayfs/file.c | 7 ++--
fs/overlayfs/util.c | 2 +-
fs/stat.c | 32 +++++++++++++++++
fs/xfs/libxfs/xfs_inode_buf.c | 2 +-
fs/xfs/libxfs/xfs_trans_inode.c | 2 +-
fs/xfs/xfs_acl.c | 2 +-
fs/xfs/xfs_bmap_util.c | 2 +-
fs/xfs/xfs_inode.c | 2 +-
fs/xfs/xfs_inode_item.c | 2 +-
fs/xfs/xfs_iops.c | 15 ++++++--
fs/xfs/xfs_super.c | 2 +-
include/linux/fs.h | 61 ++++++++++++++++++++++++++++++++-
mm/shmem.c | 25 +++++++-------
33 files changed, 255 insertions(+), 89 deletions(-)

--
2.40.1


2023-05-03 14:22:13

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v3 3/6] shmem: convert to multigrain timestamps

Signed-off-by: Jeff Layton <[email protected]>
---
mm/shmem.c | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 448f393d8ab2..40c794a7baa8 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1039,7 +1039,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
{
shmem_undo_range(inode, lstart, lend, false);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_ctime = inode->i_mtime = current_ctime(inode);
inode_inc_iversion(inode);
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -1066,6 +1066,7 @@ static int shmem_getattr(struct mnt_idmap *idmap,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
generic_fillattr(idmap, inode, stat);
+ generic_fill_multigrain_cmtime(request_mask, inode, stat);

if (shmem_is_huge(inode, 0, false, NULL, 0))
stat->blksize = HPAGE_PMD_SIZE;
@@ -1136,7 +1137,7 @@ static int shmem_setattr(struct mnt_idmap *idmap,
if (attr->ia_valid & ATTR_MODE)
error = posix_acl_chmod(idmap, dentry, inode->i_mode);
if (!error && update_ctime) {
- inode->i_ctime = current_time(inode);
+ inode->i_ctime = current_ctime(inode);
if (update_mtime)
inode->i_mtime = inode->i_ctime;
inode_inc_iversion(inode);
@@ -2361,7 +2362,7 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
inode->i_ino = ino;
inode_init_owner(idmap, inode, dir, mode);
inode->i_blocks = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_ctime(inode);
inode->i_generation = get_random_u32();
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
@@ -2940,7 +2941,7 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,

error = 0;
dir->i_size += BOGO_DIRENT_SIZE;
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_ctime = dir->i_mtime = current_ctime(dir);
inode_inc_iversion(dir);
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
@@ -3016,7 +3017,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
}

dir->i_size += BOGO_DIRENT_SIZE;
- inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = current_ctime(inode);
inode_inc_iversion(dir);
inc_nlink(inode);
ihold(inode); /* New dentry reference */
@@ -3034,7 +3035,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
shmem_free_inode(inode->i_sb);

dir->i_size -= BOGO_DIRENT_SIZE;
- inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = current_ctime(inode);
inode_inc_iversion(dir);
drop_nlink(inode);
dput(dentry); /* Undo the count from "create" - this does all the work */
@@ -3124,7 +3125,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
new_dir->i_size += BOGO_DIRENT_SIZE;
old_dir->i_ctime = old_dir->i_mtime =
new_dir->i_ctime = new_dir->i_mtime =
- inode->i_ctime = current_time(old_dir);
+ inode->i_ctime = current_ctime(old_dir);
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
return 0;
@@ -3178,7 +3179,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
folio_put(folio);
}
dir->i_size += BOGO_DIRENT_SIZE;
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_ctime = dir->i_mtime = current_ctime(dir);
inode_inc_iversion(dir);
d_instantiate(dentry, inode);
dget(dentry);
@@ -3250,7 +3251,7 @@ static int shmem_fileattr_set(struct mnt_idmap *idmap,
(fa->flags & SHMEM_FL_USER_MODIFIABLE);

shmem_set_inode_flags(inode, info->fsflags);
- inode->i_ctime = current_time(inode);
+ inode->i_ctime = current_ctime(inode);
inode_inc_iversion(inode);
return 0;
}
@@ -3320,7 +3321,7 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
name = xattr_full_name(handler, name);
err = simple_xattr_set(&info->xattrs, name, value, size, flags, NULL);
if (!err) {
- inode->i_ctime = current_time(inode);
+ inode->i_ctime = current_ctime(inode);
inode_inc_iversion(inode);
}
return err;
@@ -4052,9 +4053,9 @@ static struct file_system_type shmem_fs_type = {
#endif
.kill_sb = kill_litter_super,
#ifdef CONFIG_SHMEM
- .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
+ .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MULTIGRAIN_TS
#else
- .fs_flags = FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT | FS_MULTIGRAIN_TS,
#endif
};

--
2.40.1

2023-05-03 14:22:21

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v3 4/6] xfs: convert to multigrain timestamps

With this change, also have XFS stop reporting a STATX_CHANGE_COOKIE, so
that nfsd will use the ctime instead.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/xfs/libxfs/xfs_inode_buf.c | 2 +-
fs/xfs/libxfs/xfs_trans_inode.c | 2 +-
fs/xfs/xfs_acl.c | 2 +-
fs/xfs/xfs_bmap_util.c | 2 +-
fs/xfs/xfs_inode.c | 2 +-
fs/xfs/xfs_inode_item.c | 2 +-
fs/xfs/xfs_iops.c | 15 ++++++++++++---
fs/xfs/xfs_super.c | 2 +-
8 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 758aacd8166b..c29e961fac34 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -316,7 +316,7 @@ xfs_inode_to_disk(

to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
- to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
+ to->di_ctime = xfs_inode_to_disk_ts(ip, ctime_peek(inode));
to->di_nlink = cpu_to_be32(inode->i_nlink);
to->di_gen = cpu_to_be32(inode->i_generation);
to->di_mode = cpu_to_be16(inode->i_mode);
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 8b5547073379..c08be3aa3339 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -63,7 +63,7 @@ xfs_trans_ichgtime(
ASSERT(tp);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));

- tv = current_time(inode);
+ tv = current_ctime(inode);

if (flags & XFS_ICHGTIME_MOD)
inode->i_mtime = tv;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 791db7d9c849..85353e6e9004 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -233,7 +233,7 @@ xfs_acl_set_mode(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
inode->i_mode = mode;
- inode->i_ctime = current_time(inode);
+ inode->i_ctime = current_ctime(inode);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

if (xfs_has_wsync(mp))
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a09dd2606479..e9cb1bfb9574 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1757,7 +1757,7 @@ xfs_swap_extents(
* under it.
*/
if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
- (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
+ (sbp->bs_ctime.tv_nsec != ctime_nsec_peek(VFS_I(ip))) ||
(sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
(sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
error = -EBUSY;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5808abab786c..ac299c1a9838 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -843,7 +843,7 @@ xfs_init_new_inode(
ip->i_df.if_nextents = 0;
ASSERT(ip->i_nblocks == 0);

- tv = current_time(inode);
+ tv = current_ctime(inode);
inode->i_mtime = tv;
inode->i_atime = tv;
inode->i_ctime = tv;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index ca2941ab6cbc..018f187387f0 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -381,7 +381,7 @@ xfs_inode_to_log_dinode(
memset(to->di_pad3, 0, sizeof(to->di_pad3));
to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime);
to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime);
- to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode->i_ctime);
+ to->di_ctime = xfs_inode_to_log_dinode_ts(ip, ctime_peek(inode));
to->di_nlink = inode->i_nlink;
to->di_gen = inode->i_generation;
to->di_mode = inode->i_mode;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 24718adb3c16..f41155cfbbe2 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -573,8 +573,17 @@ xfs_vn_getattr(
stat->gid = vfsgid_into_kgid(vfsgid);
stat->ino = ip->i_ino;
stat->atime = inode->i_atime;
- stat->mtime = inode->i_mtime;
- stat->ctime = inode->i_ctime;
+ generic_fill_multigrain_cmtime(request_mask, inode, stat);
+
+ /*
+ * XFS's i_version counter doesn't conform to the rules that other
+ * filesystems live by. In particular, it changes the version on atime
+ * updates which leads to excess cache invalidations on NFS. Just clear
+ * the STATX_CHANGE_COOKIE flag so that nfsd (and others) use the
+ * (multigrain) ctime instead.
+ */
+ stat->result_mask &= ~STATX_CHANGE_COOKIE;
+
stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);

if (xfs_has_v3inodes(mp)) {
@@ -917,7 +926,7 @@ xfs_setattr_size(
if (newsize != oldsize &&
!(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
iattr->ia_ctime = iattr->ia_mtime =
- current_time(inode);
+ current_ctime(inode);
iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
}

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 4f814f9e12ab..db3943d09532 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1976,7 +1976,7 @@ static struct file_system_type xfs_fs_type = {
.init_fs_context = xfs_init_fs_context,
.parameters = xfs_fs_parameters,
.kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MULTIGRAIN_TS,
};
MODULE_ALIAS_FS("xfs");

--
2.40.1