2022-01-10 04:49:53

by Xin Yin

[permalink] [raw]
Subject: [PATCH v2 0/2] ext4: fix issues when fast commit work with jbd

When test fast commit with xfstests generic/455, some logic issues were
found. When a full commit is ongonig, the logic of fast commit tracking seems
not correct. The first patch fix the ineligible commit case , and the
second patch fix the common fast commit case.

After testing this patch set with xfstests log and quick group, no
regressions were found, and the generic/455 can pass now.

---
v2: drop EXT4_MF_FC_COMMITTING

Xin Yin (2):
ext4: fast commit may not fallback for ineligible commit
ext4: fast commit may miss file actions

fs/ext4/ext4.h | 3 ++-
fs/ext4/extents.c | 4 ++--
fs/ext4/fast_commit.c | 28 +++++++++++++++++++---------
fs/ext4/inode.c | 4 ++--
fs/ext4/ioctl.c | 4 ++--
fs/ext4/namei.c | 4 ++--
fs/ext4/super.c | 1 +
fs/ext4/xattr.c | 6 +++---
fs/jbd2/commit.c | 2 +-
fs/jbd2/journal.c | 2 +-
include/linux/jbd2.h | 2 +-
11 files changed, 36 insertions(+), 24 deletions(-)

--
2.20.1



2022-01-10 04:50:05

by Xin Yin

[permalink] [raw]
Subject: [PATCH v2 1/2] ext4: fast commit may not fallback for ineligible commit

for the follow scenario:
1. jbd start commit transaction n
2. task A get new handle for transaction n+1
3. task A do some ineligible actions and mark FC_INELIGIBLE
4. jbd complete transaction n and clean FC_INELIGIBLE
5. task A call fsync

in this case fast commit will not fallback to full commit and
transaction n+1 also not handled by jbd.

make ext4_fc_mark_ineligible() also record transaction tid for
latest ineligible case, when call ext4_fc_cleanup() check
current transaction tid, if small than latest ineligible tid
do not clear the EXT4_MF_FC_INELIGIBLE.

Signed-off-by: Xin Yin <[email protected]>
---
fs/ext4/ext4.h | 3 ++-
fs/ext4/extents.c | 4 ++--
fs/ext4/fast_commit.c | 19 +++++++++++++------
fs/ext4/inode.c | 4 ++--
fs/ext4/ioctl.c | 4 ++--
fs/ext4/namei.c | 4 ++--
fs/ext4/super.c | 1 +
fs/ext4/xattr.c | 6 +++---
fs/jbd2/commit.c | 2 +-
fs/jbd2/journal.c | 2 +-
include/linux/jbd2.h | 2 +-
11 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7b0686758691..a060bb56e654 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1747,6 +1747,7 @@ struct ext4_sb_info {
spinlock_t s_fc_lock;
struct buffer_head *s_fc_bh;
struct ext4_fc_stats s_fc_stats;
+ tid_t s_fc_ineligible_tid;
#ifdef CONFIG_EXT4_DEBUG
int s_fc_debug_max_replay;
#endif
@@ -2924,7 +2925,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
struct dentry *dentry);
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
void ext4_fc_start_update(struct inode *inode);
void ext4_fc_stop_update(struct inode *inode);
void ext4_fc_del(struct inode *inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9b6c76629c93..3be90aa5a5ba 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5339,7 +5339,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle);
goto out_mmap;
}
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);

down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0);
@@ -5479,7 +5479,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle);
goto out_mmap;
}
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);

/* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len;
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 0812438d10cc..444c0e5be932 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -303,7 +303,7 @@ void ext4_fc_del(struct inode *inode)
* Mark file system as fast commit ineligible. This means that next commit
* operation would result in a full jbd2 commit.
*/
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);

@@ -312,6 +312,10 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
return;

ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ spin_lock(&sbi->s_fc_lock);
+ if (handle && !IS_ERR(handle))
+ sbi->s_fc_ineligible_tid = handle->h_transaction->t_tid;
+ spin_unlock(&sbi->s_fc_lock);
WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
}
@@ -387,7 +391,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
mutex_unlock(&ei->i_fc_lock);
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
if (!node) {
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -400,7 +404,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
if (!node->fcd_name.name) {
kmem_cache_free(ext4_fc_dentry_cachep, node);
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_NOMEM);
+ EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -502,7 +506,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)

if (ext4_should_journal_data(inode)) {
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_INODE_JOURNAL_DATA);
+ EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
return;
}

@@ -1180,7 +1184,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
* Fast commit cleanup routine. This is called after every fast commit and
* full commit. full is true if we are called after a full commit.
*/
-static void ext4_fc_cleanup(journal_t *journal, int full)
+static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
{
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1228,7 +1232,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
&sbi->s_fc_q[FC_Q_MAIN]);

ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
- ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ if (tid >= sbi->s_fc_ineligible_tid) {
+ sbi->s_fc_ineligible_tid = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ }

if (full)
sbi->s_fc_bytes = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 08a90e25b78b..2ffa9bbb1324 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -337,7 +337,7 @@ void ext4_evict_inode(struct inode *inode)
return;
no_delete:
if (!list_empty(&EXT4_I(inode)->i_fc_list))
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, handle);
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
}

@@ -5995,7 +5995,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return PTR_ERR(handle);

ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
+ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
err = ext4_mark_inode_dirty(handle, inode);
ext4_handle_sync(handle);
ext4_journal_stop(handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 1366afb59fba..0557402ac4fd 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
err = -EINVAL;
goto err_out;
}
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);

/* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(inode, inode_bl);
@@ -1073,7 +1073,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)

err = ext4_resize_fs(sb, n_blocks_count);
if (EXT4_SB(sb)->s_journal) {
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 52c9bd154122..47b9f87dbc6f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
* dirents in directories.
*/
ext4_fc_mark_ineligible(old.inode->i_sb,
- EXT4_FC_REASON_RENAME_DIR);
+ EXT4_FC_REASON_RENAME_DIR, handle);
} else {
if (new.inode)
ext4_fc_track_unlink(handle, new.dentry);
@@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(retval))
goto end_rename;
ext4_fc_mark_ineligible(new.inode->i_sb,
- EXT4_FC_REASON_CROSS_RENAME);
+ EXT4_FC_REASON_CROSS_RENAME, handle);
if (old.dir_bh) {
retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
if (retval)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d81423021e3b..6049547d3c0f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4627,6 +4627,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_fc_bytes = 0;
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+ sbi->s_fc_ineligible_tid = 0;
spin_lock_init(&sbi->s_fc_lock);
memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
sbi->s_fc_replay_state.fc_regions = NULL;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1e0fc1ed845b..c1748730c1df 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2408,7 +2408,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (IS_SYNC(inode))
ext4_handle_sync(handle);
}
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);

cleanup:
brelse(is.iloc.bh);
@@ -2486,7 +2486,7 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
if (error == 0)
error = error2;
}
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);

return error;
}
@@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
error);
goto cleanup;
}
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
}
error = 0;
cleanup:
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 3cc4ab2ba7f4..d188fa913a07 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1170,7 +1170,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction);
if (journal->j_fc_cleanup_callback)
- journal->j_fc_cleanup_callback(journal, 1);
+ journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);

trace_jbd2_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD2: commit %d complete, head %d\n",
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0b86a4365b66..a8e64ad11ae3 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -771,7 +771,7 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{
jbd2_journal_unlock_updates(journal);
if (journal->j_fc_cleanup_callback)
- journal->j_fc_cleanup_callback(journal, 0);
+ journal->j_fc_cleanup_callback(journal, 0, tid);
write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
if (fallback)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index fd933c45281a..d63b8106796e 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1295,7 +1295,7 @@ struct journal_s
* Clean-up after fast commit or full commit. JBD2 calls this function
* after every commit operation.
*/
- void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
+ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);

/**
* @j_fc_replay_callback:
--
2.20.1


2022-01-10 04:50:09

by Xin Yin

[permalink] [raw]
Subject: [PATCH v2 2/2] ext4: fast commit may miss file actions

in the follow scenario:
1. jbd start transaction n
2. task A get new handle for transaction n+1
3. task A do some actions and add inode to FC_Q_MAIN fc_q
4. jbd complete transaction n and clear FC_Q_MAIN fc_q
5. task A call fsync

fast commit will lost the file actions during a full commit.

we should also add updates to staging queue during a full commit.
and in ext4_fc_cleanup(), when reset a inode's fc track range, check
it's i_sync_tid, if it bigger than current transaction tid, do not
rest it, or we will lost the track range.

And EXT4_MF_FC_COMMITTING is not needed anymore, so drop it.

Signed-off-by: Xin Yin <[email protected]>
---
v2: drop EXT4_MF_FC_COMMITTING
---
fs/ext4/ext4.h | 5 +----
fs/ext4/fast_commit.c | 11 ++++++-----
fs/ext4/super.c | 1 -
3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a060bb56e654..589b1e335ad4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1793,10 +1793,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
enum {
EXT4_MF_MNTDIR_SAMPLED,
EXT4_MF_FS_ABORTED, /* Fatal error detected */
- EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */
- EXT4_MF_FC_COMMITTING /* File system underoing a fast
- * commit.
- */
+ EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */
};

static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 444c0e5be932..7966e3bdba27 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -365,7 +365,8 @@ static int ext4_fc_track_template(
spin_lock(&sbi->s_fc_lock);
if (list_empty(&EXT4_I(inode)->i_fc_list))
list_add_tail(&EXT4_I(inode)->i_fc_list,
- (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
+ (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
&sbi->s_fc_q[FC_Q_STAGING] :
&sbi->s_fc_q[FC_Q_MAIN]);
spin_unlock(&sbi->s_fc_lock);
@@ -418,7 +419,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
node->fcd_name.len = dentry->d_name.len;

spin_lock(&sbi->s_fc_lock);
- if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
+ if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
list_add_tail(&node->fcd_list,
&sbi->s_fc_dentry_q[FC_Q_STAGING]);
else
@@ -884,7 +886,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
int ret = 0;

spin_lock(&sbi->s_fc_lock);
- ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
while (atomic_read(&ei->i_fc_updates)) {
@@ -1202,7 +1203,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
list_del_init(&iter->i_fc_list);
ext4_clear_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_COMMITTING);
- ext4_fc_reset_inode(&iter->vfs_inode);
+ if (iter->i_sync_tid <= tid)
+ ext4_fc_reset_inode(&iter->vfs_inode);
/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
smp_mb();
#if (BITS_PER_LONG < 64)
@@ -1231,7 +1233,6 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
&sbi->s_fc_q[FC_Q_MAIN]);

- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
if (tid >= sbi->s_fc_ineligible_tid) {
sbi->s_fc_ineligible_tid = 0;
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6049547d3c0f..5404516ea7e0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4626,7 +4626,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
sbi->s_fc_bytes = 0;
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
sbi->s_fc_ineligible_tid = 0;
spin_lock_init(&sbi->s_fc_lock);
memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
--
2.20.1