2022-01-07 12:12:48

by Xin Yin

[permalink] [raw]
Subject: [PATCH 0/2] ext4: fix issues when fast commit work with jbd

When test fast commit with xfstests generic/455, some logic issues were
found. When a full commit is ongonig, the logic of fast commit tracking seems
not correct. The first patch fix the ineligible commit case , and the
second patch fix the common fast commit case.

After testing this patch set with xfstests log and quick group, no
regressions were found, and the generic/455 can pass now.

Xin Yin (2):
ext4: fast commit may not fallback for ineligible commit
ext4: fast commit may miss file actions

fs/ext4/ext4.h | 3 ++-
fs/ext4/extents.c | 4 ++--
fs/ext4/fast_commit.c | 28 +++++++++++++++++++---------
fs/ext4/inode.c | 4 ++--
fs/ext4/ioctl.c | 4 ++--
fs/ext4/namei.c | 4 ++--
fs/ext4/super.c | 1 +
fs/ext4/xattr.c | 6 +++---
fs/jbd2/commit.c | 2 +-
fs/jbd2/journal.c | 2 +-
include/linux/jbd2.h | 2 +-
11 files changed, 36 insertions(+), 24 deletions(-)

--
2.20.1



2022-01-07 12:12:56

by Xin Yin

[permalink] [raw]
Subject: [PATCH 1/2] ext4: fast commit may not fallback for ineligible commit

for the follow scenario:
1. jbd start commit transaction n
2. task A get new handle for transaction n+1
3. task A do some ineligible actions and mark FC_INELIGIBLE
4. jbd complete transaction n and clean FC_INELIGIBLE
5. task A call fsync

in this case fast commit will not fallback to full commit and
transaction n+1 also not handled by jbd.

make ext4_fc_mark_ineligible() also record transaction tid for
latest ineligible case, when call ext4_fc_cleanup() check
current transaction tid, if small than latest ineligible tid
do not clear the EXT4_MF_FC_INELIGIBLE.

Signed-off-by: Xin Yin <[email protected]>
---
fs/ext4/ext4.h | 3 ++-
fs/ext4/extents.c | 4 ++--
fs/ext4/fast_commit.c | 19 +++++++++++++------
fs/ext4/inode.c | 4 ++--
fs/ext4/ioctl.c | 4 ++--
fs/ext4/namei.c | 4 ++--
fs/ext4/super.c | 1 +
fs/ext4/xattr.c | 6 +++---
fs/jbd2/commit.c | 2 +-
fs/jbd2/journal.c | 2 +-
include/linux/jbd2.h | 2 +-
11 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7b0686758691..a060bb56e654 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1747,6 +1747,7 @@ struct ext4_sb_info {
spinlock_t s_fc_lock;
struct buffer_head *s_fc_bh;
struct ext4_fc_stats s_fc_stats;
+ tid_t s_fc_ineligible_tid;
#ifdef CONFIG_EXT4_DEBUG
int s_fc_debug_max_replay;
#endif
@@ -2924,7 +2925,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
struct dentry *dentry);
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
void ext4_fc_start_update(struct inode *inode);
void ext4_fc_stop_update(struct inode *inode);
void ext4_fc_del(struct inode *inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9b6c76629c93..3be90aa5a5ba 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5339,7 +5339,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle);
goto out_mmap;
}
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);

down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0);
@@ -5479,7 +5479,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle);
goto out_mmap;
}
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);

/* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len;
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index f0cd20f5fe5e..3673d4798af3 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -303,7 +303,7 @@ void ext4_fc_del(struct inode *inode)
* Mark file system as fast commit ineligible. This means that next commit
* operation would result in a full jbd2 commit.
*/
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);

@@ -312,6 +312,10 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
return;

ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ spin_lock(&sbi->s_fc_lock);
+ if (handle && !IS_ERR(handle))
+ sbi->s_fc_ineligible_tid = handle->h_transaction->t_tid;
+ spin_unlock(&sbi->s_fc_lock);
WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
}
@@ -387,7 +391,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
mutex_unlock(&ei->i_fc_lock);
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
if (!node) {
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -400,7 +404,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
if (!node->fcd_name.name) {
kmem_cache_free(ext4_fc_dentry_cachep, node);
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_NOMEM);
+ EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -502,7 +506,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)

if (ext4_should_journal_data(inode)) {
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_INODE_JOURNAL_DATA);
+ EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
return;
}

@@ -1180,7 +1184,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
* Fast commit cleanup routine. This is called after every fast commit and
* full commit. full is true if we are called after a full commit.
*/
-static void ext4_fc_cleanup(journal_t *journal, int full)
+static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
{
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1228,7 +1232,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
&sbi->s_fc_q[FC_Q_MAIN]);

ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
- ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ if (tid >= sbi->s_fc_ineligible_tid) {
+ sbi->s_fc_ineligible_tid = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ }

if (full)
sbi->s_fc_bytes = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 08a90e25b78b..2ffa9bbb1324 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -337,7 +337,7 @@ void ext4_evict_inode(struct inode *inode)
return;
no_delete:
if (!list_empty(&EXT4_I(inode)->i_fc_list))
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, handle);
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
}

@@ -5995,7 +5995,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return PTR_ERR(handle);

ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
+ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
err = ext4_mark_inode_dirty(handle, inode);
ext4_handle_sync(handle);
ext4_journal_stop(handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 1366afb59fba..0557402ac4fd 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
err = -EINVAL;
goto err_out;
}
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);

/* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(inode, inode_bl);
@@ -1073,7 +1073,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)

err = ext4_resize_fs(sb, n_blocks_count);
if (EXT4_SB(sb)->s_journal) {
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 52c9bd154122..47b9f87dbc6f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
* dirents in directories.
*/
ext4_fc_mark_ineligible(old.inode->i_sb,
- EXT4_FC_REASON_RENAME_DIR);
+ EXT4_FC_REASON_RENAME_DIR, handle);
} else {
if (new.inode)
ext4_fc_track_unlink(handle, new.dentry);
@@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(retval))
goto end_rename;
ext4_fc_mark_ineligible(new.inode->i_sb,
- EXT4_FC_REASON_CROSS_RENAME);
+ EXT4_FC_REASON_CROSS_RENAME, handle);
if (old.dir_bh) {
retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
if (retval)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d81423021e3b..6049547d3c0f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4627,6 +4627,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_fc_bytes = 0;
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+ sbi->s_fc_ineligible_tid = 0;
spin_lock_init(&sbi->s_fc_lock);
memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
sbi->s_fc_replay_state.fc_regions = NULL;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1e0fc1ed845b..c1748730c1df 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2408,7 +2408,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (IS_SYNC(inode))
ext4_handle_sync(handle);
}
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);

cleanup:
brelse(is.iloc.bh);
@@ -2486,7 +2486,7 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
if (error == 0)
error = error2;
}
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);

return error;
}
@@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
error);
goto cleanup;
}
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
}
error = 0;
cleanup:
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 3cc4ab2ba7f4..d188fa913a07 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1170,7 +1170,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction);
if (journal->j_fc_cleanup_callback)
- journal->j_fc_cleanup_callback(journal, 1);
+ journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);

trace_jbd2_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD2: commit %d complete, head %d\n",
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0b86a4365b66..a8e64ad11ae3 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -771,7 +771,7 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{
jbd2_journal_unlock_updates(journal);
if (journal->j_fc_cleanup_callback)
- journal->j_fc_cleanup_callback(journal, 0);
+ journal->j_fc_cleanup_callback(journal, 0, tid);
write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
if (fallback)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index fd933c45281a..d63b8106796e 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1295,7 +1295,7 @@ struct journal_s
* Clean-up after fast commit or full commit. JBD2 calls this function
* after every commit operation.
*/
- void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
+ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);

/**
* @j_fc_replay_callback:
--
2.20.1


2022-01-07 12:13:03

by Xin Yin

[permalink] [raw]
Subject: [PATCH 2/2] ext4: fast commit may miss file actions

in the follow scenario:
1. jbd start transaction n
2. task A get new handle for transaction n+1
3. task A do some actions and add inode to FC_Q_MAIN fc_q
4. jbd complete transaction n and clear FC_Q_MAIN fc_q
5. task A call fsync

fast commit will lost the file actions during a full commit.

we should also add updates to staging queue during a full commit.
and in ext4_fc_cleanup(), when reset a inode's fc track range, check
it's i_sync_tid, if it bigger than current transaction tid, do not
rest it, or we will lost the track range.

Signed-off-by: Xin Yin <[email protected]>
---
fs/ext4/fast_commit.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 3673d4798af3..4cea92aec7c4 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -365,7 +365,8 @@ static int ext4_fc_track_template(
spin_lock(&sbi->s_fc_lock);
if (list_empty(&EXT4_I(inode)->i_fc_list))
list_add_tail(&EXT4_I(inode)->i_fc_list,
- (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
+ (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
&sbi->s_fc_q[FC_Q_STAGING] :
&sbi->s_fc_q[FC_Q_MAIN]);
spin_unlock(&sbi->s_fc_lock);
@@ -418,7 +419,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
node->fcd_name.len = dentry->d_name.len;

spin_lock(&sbi->s_fc_lock);
- if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
+ if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
list_add_tail(&node->fcd_list,
&sbi->s_fc_dentry_q[FC_Q_STAGING]);
else
@@ -1202,7 +1204,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
list_del_init(&iter->i_fc_list);
ext4_clear_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_COMMITTING);
- ext4_fc_reset_inode(&iter->vfs_inode);
+ if (iter->i_sync_tid <= tid)
+ ext4_fc_reset_inode(&iter->vfs_inode);
/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
smp_mb();
#if (BITS_PER_LONG < 64)
--
2.20.1


2022-01-07 20:10:03

by harshad shirwadkar

[permalink] [raw]
Subject: Re: [PATCH 1/2] ext4: fast commit may not fallback for ineligible commit

Nice this is a pretty good catch! One thing I'd note is that I'm
working on a patch that would improve performance of fast commit by
locking only one inode at a time instead of locking the entire file
system from performing journal updates. (In other words, with my patch
we'll drop jbd2_journal_lock_updates() before performing the fast
commit) What that patch would do is that it would add a new "h_priv"
field in handle_t where we would store a pointer to the inode which is
being modified. Once that patch is in, we can modify
ext4_fc_mark_ineligible() to only take handle and sb can be derived
out of it as handle->h_priv->i_sb. I can take care of doing that
change in my patch.

Reviewed-by: Harshad Shirwadkar <[email protected]>

On Fri, Jan 7, 2022 at 4:12 AM Xin Yin <[email protected]> wrote:
>
> for the follow scenario:
> 1. jbd start commit transaction n
> 2. task A get new handle for transaction n+1
> 3. task A do some ineligible actions and mark FC_INELIGIBLE
> 4. jbd complete transaction n and clean FC_INELIGIBLE
> 5. task A call fsync
>
> in this case fast commit will not fallback to full commit and
> transaction n+1 also not handled by jbd.
>
> make ext4_fc_mark_ineligible() also record transaction tid for
> latest ineligible case, when call ext4_fc_cleanup() check
> current transaction tid, if small than latest ineligible tid
> do not clear the EXT4_MF_FC_INELIGIBLE.
>
> Signed-off-by: Xin Yin <[email protected]>
> ---
> fs/ext4/ext4.h | 3 ++-
> fs/ext4/extents.c | 4 ++--
> fs/ext4/fast_commit.c | 19 +++++++++++++------
> fs/ext4/inode.c | 4 ++--
> fs/ext4/ioctl.c | 4 ++--
> fs/ext4/namei.c | 4 ++--
> fs/ext4/super.c | 1 +
> fs/ext4/xattr.c | 6 +++---
> fs/jbd2/commit.c | 2 +-
> fs/jbd2/journal.c | 2 +-
> include/linux/jbd2.h | 2 +-
> 11 files changed, 30 insertions(+), 21 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7b0686758691..a060bb56e654 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1747,6 +1747,7 @@ struct ext4_sb_info {
> spinlock_t s_fc_lock;
> struct buffer_head *s_fc_bh;
> struct ext4_fc_stats s_fc_stats;
> + tid_t s_fc_ineligible_tid;
> #ifdef CONFIG_EXT4_DEBUG
> int s_fc_debug_max_replay;
> #endif
> @@ -2924,7 +2925,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
> struct dentry *dentry);
> void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
> void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
> -void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
> +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
> void ext4_fc_start_update(struct inode *inode);
> void ext4_fc_stop_update(struct inode *inode);
> void ext4_fc_del(struct inode *inode);
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 9b6c76629c93..3be90aa5a5ba 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -5339,7 +5339,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
> ret = PTR_ERR(handle);
> goto out_mmap;
> }
> - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
> + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
>
> down_write(&EXT4_I(inode)->i_data_sem);
> ext4_discard_preallocations(inode, 0);
> @@ -5479,7 +5479,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
> ret = PTR_ERR(handle);
> goto out_mmap;
> }
> - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
> + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
>
> /* Expand file to avoid data loss if there is error while shifting */
> inode->i_size += len;
> diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> index f0cd20f5fe5e..3673d4798af3 100644
> --- a/fs/ext4/fast_commit.c
> +++ b/fs/ext4/fast_commit.c
> @@ -303,7 +303,7 @@ void ext4_fc_del(struct inode *inode)
> * Mark file system as fast commit ineligible. This means that next commit
> * operation would result in a full jbd2 commit.
> */
> -void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
> +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
> {
> struct ext4_sb_info *sbi = EXT4_SB(sb);
>
> @@ -312,6 +312,10 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
> return;
>
> ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
> + spin_lock(&sbi->s_fc_lock);
> + if (handle && !IS_ERR(handle))
> + sbi->s_fc_ineligible_tid = handle->h_transaction->t_tid;
> + spin_unlock(&sbi->s_fc_lock);
> WARN_ON(reason >= EXT4_FC_REASON_MAX);
> sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
> }
> @@ -387,7 +391,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
> mutex_unlock(&ei->i_fc_lock);
> node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
> if (!node) {
> - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
> + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
> mutex_lock(&ei->i_fc_lock);
> return -ENOMEM;
> }
> @@ -400,7 +404,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
> if (!node->fcd_name.name) {
> kmem_cache_free(ext4_fc_dentry_cachep, node);
> ext4_fc_mark_ineligible(inode->i_sb,
> - EXT4_FC_REASON_NOMEM);
> + EXT4_FC_REASON_NOMEM, NULL);
> mutex_lock(&ei->i_fc_lock);
> return -ENOMEM;
> }
> @@ -502,7 +506,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
>
> if (ext4_should_journal_data(inode)) {
> ext4_fc_mark_ineligible(inode->i_sb,
> - EXT4_FC_REASON_INODE_JOURNAL_DATA);
> + EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
> return;
> }
>
> @@ -1180,7 +1184,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
> * Fast commit cleanup routine. This is called after every fast commit and
> * full commit. full is true if we are called after a full commit.
> */
> -static void ext4_fc_cleanup(journal_t *journal, int full)
> +static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
> {
> struct super_block *sb = journal->j_private;
> struct ext4_sb_info *sbi = EXT4_SB(sb);
> @@ -1228,7 +1232,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
> &sbi->s_fc_q[FC_Q_MAIN]);
>
> ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
> - ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
> + if (tid >= sbi->s_fc_ineligible_tid) {
> + sbi->s_fc_ineligible_tid = 0;
> + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
> + }
>
> if (full)
> sbi->s_fc_bytes = 0;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 08a90e25b78b..2ffa9bbb1324 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -337,7 +337,7 @@ void ext4_evict_inode(struct inode *inode)
> return;
> no_delete:
> if (!list_empty(&EXT4_I(inode)->i_fc_list))
> - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
> + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, handle);
> ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
> }
>
> @@ -5995,7 +5995,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
> return PTR_ERR(handle);
>
> ext4_fc_mark_ineligible(inode->i_sb,
> - EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
> + EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
> err = ext4_mark_inode_dirty(handle, inode);
> ext4_handle_sync(handle);
> ext4_journal_stop(handle);
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 1366afb59fba..0557402ac4fd 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
> err = -EINVAL;
> goto err_out;
> }
> - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
> + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);
>
> /* Protect extent tree against block allocations via delalloc */
> ext4_double_down_write_data_sem(inode, inode_bl);
> @@ -1073,7 +1073,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>
> err = ext4_resize_fs(sb, n_blocks_count);
> if (EXT4_SB(sb)->s_journal) {
> - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
> + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
> jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
> err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
> jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index 52c9bd154122..47b9f87dbc6f 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
> * dirents in directories.
> */
> ext4_fc_mark_ineligible(old.inode->i_sb,
> - EXT4_FC_REASON_RENAME_DIR);
> + EXT4_FC_REASON_RENAME_DIR, handle);
> } else {
> if (new.inode)
> ext4_fc_track_unlink(handle, new.dentry);
> @@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
> if (unlikely(retval))
> goto end_rename;
> ext4_fc_mark_ineligible(new.inode->i_sb,
> - EXT4_FC_REASON_CROSS_RENAME);
> + EXT4_FC_REASON_CROSS_RENAME, handle);
> if (old.dir_bh) {
> retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
> if (retval)
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index d81423021e3b..6049547d3c0f 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -4627,6 +4627,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> sbi->s_fc_bytes = 0;
> ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
> ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
> + sbi->s_fc_ineligible_tid = 0;
> spin_lock_init(&sbi->s_fc_lock);
> memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
> sbi->s_fc_replay_state.fc_regions = NULL;
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 1e0fc1ed845b..c1748730c1df 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -2408,7 +2408,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
> if (IS_SYNC(inode))
> ext4_handle_sync(handle);
> }
> - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
> + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
>
> cleanup:
> brelse(is.iloc.bh);
> @@ -2486,7 +2486,7 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
> if (error == 0)
> error = error2;
> }
> - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
> + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
>
> return error;
> }
> @@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
> error);
> goto cleanup;
> }
> - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
> + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
> }
> error = 0;
> cleanup:
> diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
> index 3cc4ab2ba7f4..d188fa913a07 100644
> --- a/fs/jbd2/commit.c
> +++ b/fs/jbd2/commit.c
> @@ -1170,7 +1170,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
> if (journal->j_commit_callback)
> journal->j_commit_callback(journal, commit_transaction);
> if (journal->j_fc_cleanup_callback)
> - journal->j_fc_cleanup_callback(journal, 1);
> + journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
>
> trace_jbd2_end_commit(journal, commit_transaction);
> jbd_debug(1, "JBD2: commit %d complete, head %d\n",
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index 0b86a4365b66..a8e64ad11ae3 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -771,7 +771,7 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
> {
> jbd2_journal_unlock_updates(journal);
> if (journal->j_fc_cleanup_callback)
> - journal->j_fc_cleanup_callback(journal, 0);
> + journal->j_fc_cleanup_callback(journal, 0, tid);
> write_lock(&journal->j_state_lock);
> journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
> if (fallback)
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index fd933c45281a..d63b8106796e 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -1295,7 +1295,7 @@ struct journal_s
> * Clean-up after fast commit or full commit. JBD2 calls this function
> * after every commit operation.
> */
> - void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
> + void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);
>
> /**
> * @j_fc_replay_callback:
> --
> 2.20.1
>

2022-01-07 20:14:51

by harshad shirwadkar

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext4: fast commit may miss file actions

Makes sense. With this change, we don't really need
EXT4_MF_FC_COMMITTING flag anymore. So, we can drop it. But other than
that, this patch looks good.

Reviewed-by: Harshad Shirwadkar <[email protected]>

On Fri, Jan 7, 2022 at 4:13 AM Xin Yin <[email protected]> wrote:
>
> in the follow scenario:
> 1. jbd start transaction n
> 2. task A get new handle for transaction n+1
> 3. task A do some actions and add inode to FC_Q_MAIN fc_q
> 4. jbd complete transaction n and clear FC_Q_MAIN fc_q
> 5. task A call fsync
>
> fast commit will lost the file actions during a full commit.
>
> we should also add updates to staging queue during a full commit.
> and in ext4_fc_cleanup(), when reset a inode's fc track range, check
> it's i_sync_tid, if it bigger than current transaction tid, do not
> rest it, or we will lost the track range.
>
> Signed-off-by: Xin Yin <[email protected]>
> ---
> fs/ext4/fast_commit.c | 9 ++++++---
> 1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> index 3673d4798af3..4cea92aec7c4 100644
> --- a/fs/ext4/fast_commit.c
> +++ b/fs/ext4/fast_commit.c
> @@ -365,7 +365,8 @@ static int ext4_fc_track_template(
> spin_lock(&sbi->s_fc_lock);
> if (list_empty(&EXT4_I(inode)->i_fc_list))
> list_add_tail(&EXT4_I(inode)->i_fc_list,
> - (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
> + (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
> + sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
> &sbi->s_fc_q[FC_Q_STAGING] :
> &sbi->s_fc_q[FC_Q_MAIN]);
> spin_unlock(&sbi->s_fc_lock);
> @@ -418,7 +419,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
> node->fcd_name.len = dentry->d_name.len;
>
> spin_lock(&sbi->s_fc_lock);
> - if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
> + if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
> + sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
> list_add_tail(&node->fcd_list,
> &sbi->s_fc_dentry_q[FC_Q_STAGING]);
> else
> @@ -1202,7 +1204,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
> list_del_init(&iter->i_fc_list);
> ext4_clear_inode_state(&iter->vfs_inode,
> EXT4_STATE_FC_COMMITTING);
> - ext4_fc_reset_inode(&iter->vfs_inode);
> + if (iter->i_sync_tid <= tid)
> + ext4_fc_reset_inode(&iter->vfs_inode);
> /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
> smp_mb();
> #if (BITS_PER_LONG < 64)
> --
> 2.20.1
>

2022-01-07 20:15:52

by harshad shirwadkar

[permalink] [raw]
Subject: Re: [PATCH 0/2] ext4: fix issues when fast commit work with jbd

This patch series looks good to me. Also, it's great to know that
generic/455 now passes! :)

Reviewed-by: Harshad Shirwadkar <[email protected]>

On Fri, Jan 7, 2022 at 4:12 AM Xin Yin <[email protected]> wrote:
>
> When test fast commit with xfstests generic/455, some logic issues were
> found. When a full commit is ongonig, the logic of fast commit tracking seems
> not correct. The first patch fix the ineligible commit case , and the
> second patch fix the common fast commit case.
>
> After testing this patch set with xfstests log and quick group, no
> regressions were found, and the generic/455 can pass now.
>
> Xin Yin (2):
> ext4: fast commit may not fallback for ineligible commit
> ext4: fast commit may miss file actions
>
> fs/ext4/ext4.h | 3 ++-
> fs/ext4/extents.c | 4 ++--
> fs/ext4/fast_commit.c | 28 +++++++++++++++++++---------
> fs/ext4/inode.c | 4 ++--
> fs/ext4/ioctl.c | 4 ++--
> fs/ext4/namei.c | 4 ++--
> fs/ext4/super.c | 1 +
> fs/ext4/xattr.c | 6 +++---
> fs/jbd2/commit.c | 2 +-
> fs/jbd2/journal.c | 2 +-
> include/linux/jbd2.h | 2 +-
> 11 files changed, 36 insertions(+), 24 deletions(-)
>
> --
> 2.20.1
>

2022-01-09 03:07:01

by Xin Yin

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 2/2] ext4: fast commit may miss file actions

Thanks, you are right , the EXT4_MF_FC_COMMITTING is not needed after
this change , I will do the cleanup for it , and send a v2 patches
set.

Thanks,
Xin Yin

On Sat, Jan 8, 2022 at 4:14 AM harshad shirwadkar
<[email protected]> wrote:
>
> Makes sense. With this change, we don't really need
> EXT4_MF_FC_COMMITTING flag anymore. So, we can drop it. But other than
> that, this patch looks good.
>
> Reviewed-by: Harshad Shirwadkar <[email protected]>
>
> On Fri, Jan 7, 2022 at 4:13 AM Xin Yin <[email protected]> wrote:
> >
> > in the follow scenario:
> > 1. jbd start transaction n
> > 2. task A get new handle for transaction n+1
> > 3. task A do some actions and add inode to FC_Q_MAIN fc_q
> > 4. jbd complete transaction n and clear FC_Q_MAIN fc_q
> > 5. task A call fsync
> >
> > fast commit will lost the file actions during a full commit.
> >
> > we should also add updates to staging queue during a full commit.
> > and in ext4_fc_cleanup(), when reset a inode's fc track range, check
> > it's i_sync_tid, if it bigger than current transaction tid, do not
> > rest it, or we will lost the track range.
> >
> > Signed-off-by: Xin Yin <[email protected]>
> > ---
> > fs/ext4/fast_commit.c | 9 ++++++---
> > 1 file changed, 6 insertions(+), 3 deletions(-)
> >
> > diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> > index 3673d4798af3..4cea92aec7c4 100644
> > --- a/fs/ext4/fast_commit.c
> > +++ b/fs/ext4/fast_commit.c
> > @@ -365,7 +365,8 @@ static int ext4_fc_track_template(
> > spin_lock(&sbi->s_fc_lock);
> > if (list_empty(&EXT4_I(inode)->i_fc_list))
> > list_add_tail(&EXT4_I(inode)->i_fc_list,
> > - (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
> > + (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
> > + sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
> > &sbi->s_fc_q[FC_Q_STAGING] :
> > &sbi->s_fc_q[FC_Q_MAIN]);
> > spin_unlock(&sbi->s_fc_lock);
> > @@ -418,7 +419,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
> > node->fcd_name.len = dentry->d_name.len;
> >
> > spin_lock(&sbi->s_fc_lock);
> > - if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
> > + if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
> > + sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
> > list_add_tail(&node->fcd_list,
> > &sbi->s_fc_dentry_q[FC_Q_STAGING]);
> > else
> > @@ -1202,7 +1204,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
> > list_del_init(&iter->i_fc_list);
> > ext4_clear_inode_state(&iter->vfs_inode,
> > EXT4_STATE_FC_COMMITTING);
> > - ext4_fc_reset_inode(&iter->vfs_inode);
> > + if (iter->i_sync_tid <= tid)
> > + ext4_fc_reset_inode(&iter->vfs_inode);
> > /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
> > smp_mb();
> > #if (BITS_PER_LONG < 64)
> > --
> > 2.20.1
> >

2022-01-10 09:24:17

by Dan Carpenter

[permalink] [raw]
Subject: Re: [PATCH 1/2] ext4: fast commit may not fallback for ineligible commit

Hi Xin,

url: https://github.com/0day-ci/linux/commits/Xin-Yin/ext4-fix-issues-when-fast-commit-work-with-jbd/20220107-201314
base: https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
config: x86_64-randconfig-m001-20220107 (https://download.01.org/0day-ci/archive/20220109/[email protected]/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
Reported-by: Dan Carpenter <[email protected]>

New smatch warnings:
fs/ext4/inode.c:340 ext4_evict_inode() error: uninitialized symbol 'handle'.

vim +/handle +340 fs/ext4/inode.c

0930fcc1ee2f0a Al Viro 2010-06-07 167 void ext4_evict_inode(struct inode *inode)
ac27a0ec112a08 Dave Kleikamp 2006-10-11 168 {
ac27a0ec112a08 Dave Kleikamp 2006-10-11 169 handle_t *handle;
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 170 int err;
65db869c754e7c Jan Kara 2019-11-05 171 /*
65db869c754e7c Jan Kara 2019-11-05 172 * Credits for final inode cleanup and freeing:
65db869c754e7c Jan Kara 2019-11-05 173 * sb + inode (ext4_orphan_del()), block bitmap, group descriptor
65db869c754e7c Jan Kara 2019-11-05 174 * (xattr block freeing), bitmap, group descriptor (inode freeing)
65db869c754e7c Jan Kara 2019-11-05 175 */
65db869c754e7c Jan Kara 2019-11-05 176 int extra_credits = 6;
0421a189bc8cde Tahsin Erdogan 2017-06-22 177 struct ext4_xattr_inode_array *ea_inode_array = NULL;
46e294efc355c4 Jan Kara 2020-11-27 178 bool freeze_protected = false;
ac27a0ec112a08 Dave Kleikamp 2006-10-11 179
7ff9c073dd4d72 Theodore Ts'o 2010-11-08 180 trace_ext4_evict_inode(inode);
2581fdc810889f Jiaying Zhang 2011-08-13 181
0930fcc1ee2f0a Al Viro 2010-06-07 182 if (inode->i_nlink) {
2d859db3e4a82a Jan Kara 2011-07-26 183 /*
2d859db3e4a82a Jan Kara 2011-07-26 184 * When journalling data dirty buffers are tracked only in the
2d859db3e4a82a Jan Kara 2011-07-26 185 * journal. So although mm thinks everything is clean and
2d859db3e4a82a Jan Kara 2011-07-26 186 * ready for reaping the inode might still have some pages to
2d859db3e4a82a Jan Kara 2011-07-26 187 * write in the running transaction or waiting to be
2d859db3e4a82a Jan Kara 2011-07-26 188 * checkpointed. Thus calling jbd2_journal_invalidatepage()
2d859db3e4a82a Jan Kara 2011-07-26 189 * (via truncate_inode_pages()) to discard these buffers can
2d859db3e4a82a Jan Kara 2011-07-26 190 * cause data loss. Also even if we did not discard these
2d859db3e4a82a Jan Kara 2011-07-26 191 * buffers, we would have no way to find them after the inode
2d859db3e4a82a Jan Kara 2011-07-26 192 * is reaped and thus user could see stale data if he tries to
2d859db3e4a82a Jan Kara 2011-07-26 193 * read them before the transaction is checkpointed. So be
2d859db3e4a82a Jan Kara 2011-07-26 194 * careful and force everything to disk here... We use
2d859db3e4a82a Jan Kara 2011-07-26 195 * ei->i_datasync_tid to store the newest transaction
2d859db3e4a82a Jan Kara 2011-07-26 196 * containing inode's data.
2d859db3e4a82a Jan Kara 2011-07-26 197 *
2d859db3e4a82a Jan Kara 2011-07-26 198 * Note that directories do not have this problem because they
2d859db3e4a82a Jan Kara 2011-07-26 199 * don't use page cache.
2d859db3e4a82a Jan Kara 2011-07-26 200 */
6a7fd522a7c94c Vegard Nossum 2016-07-04 201 if (inode->i_ino != EXT4_JOURNAL_INO &&
6a7fd522a7c94c Vegard Nossum 2016-07-04 202 ext4_should_journal_data(inode) &&
3abb1a0fc2871f Jan Kara 2017-06-22 203 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
3abb1a0fc2871f Jan Kara 2017-06-22 204 inode->i_data.nrpages) {
2d859db3e4a82a Jan Kara 2011-07-26 205 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
2d859db3e4a82a Jan Kara 2011-07-26 206 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
2d859db3e4a82a Jan Kara 2011-07-26 207
d76a3a77113db0 Theodore Ts'o 2013-04-03 208 jbd2_complete_transaction(journal, commit_tid);
2d859db3e4a82a Jan Kara 2011-07-26 209 filemap_write_and_wait(&inode->i_data);
2d859db3e4a82a Jan Kara 2011-07-26 210 }
91b0abe36a7b2b Johannes Weiner 2014-04-03 211 truncate_inode_pages_final(&inode->i_data);
5dc23bdd5f846e Jan Kara 2013-06-04 212
0930fcc1ee2f0a Al Viro 2010-06-07 213 goto no_delete;

Assume we hit this goto

0930fcc1ee2f0a Al Viro 2010-06-07 214 }
0930fcc1ee2f0a Al Viro 2010-06-07 215
e2bfb088fac03c Theodore Ts'o 2014-10-05 216 if (is_bad_inode(inode))
e2bfb088fac03c Theodore Ts'o 2014-10-05 217 goto no_delete;
871a293155a245 Christoph Hellwig 2010-03-03 218 dquot_initialize(inode);
907f4554e2521c Christoph Hellwig 2010-03-03 219
678aaf481496b0 Jan Kara 2008-07-11 220 if (ext4_should_order_data(inode))
678aaf481496b0 Jan Kara 2008-07-11 221 ext4_begin_ordered_truncate(inode, 0);
91b0abe36a7b2b Johannes Weiner 2014-04-03 222 truncate_inode_pages_final(&inode->i_data);
ac27a0ec112a08 Dave Kleikamp 2006-10-11 223
ceff86fddae874 Jan Kara 2020-04-21 224 /*
ceff86fddae874 Jan Kara 2020-04-21 225 * For inodes with journalled data, transaction commit could have
ceff86fddae874 Jan Kara 2020-04-21 226 * dirtied the inode. Flush worker is ignoring it because of I_FREEING
ceff86fddae874 Jan Kara 2020-04-21 227 * flag but we still need to remove the inode from the writeback lists.
ceff86fddae874 Jan Kara 2020-04-21 228 */
ceff86fddae874 Jan Kara 2020-04-21 229 if (!list_empty_careful(&inode->i_io_list)) {
ceff86fddae874 Jan Kara 2020-04-21 230 WARN_ON_ONCE(!ext4_should_journal_data(inode));
ceff86fddae874 Jan Kara 2020-04-21 231 inode_io_list_del(inode);
ceff86fddae874 Jan Kara 2020-04-21 232 }
ceff86fddae874 Jan Kara 2020-04-21 233
8e8ad8a57c75f3 Jan Kara 2012-06-12 234 /*
8e8ad8a57c75f3 Jan Kara 2012-06-12 235 * Protect us against freezing - iput() caller didn't have to have any
46e294efc355c4 Jan Kara 2020-11-27 236 * protection against it. When we are in a running transaction though,
46e294efc355c4 Jan Kara 2020-11-27 237 * we are already protected against freezing and we cannot grab further
46e294efc355c4 Jan Kara 2020-11-27 238 * protection due to lock ordering constraints.
8e8ad8a57c75f3 Jan Kara 2012-06-12 239 */
46e294efc355c4 Jan Kara 2020-11-27 240 if (!ext4_journal_current_handle()) {
8e8ad8a57c75f3 Jan Kara 2012-06-12 241 sb_start_intwrite(inode->i_sb);
46e294efc355c4 Jan Kara 2020-11-27 242 freeze_protected = true;
46e294efc355c4 Jan Kara 2020-11-27 243 }
e50e5129f384ae Andreas Dilger 2017-06-21 244
30a7eb970c3aae Tahsin Erdogan 2017-06-22 245 if (!IS_NOQUOTA(inode))
30a7eb970c3aae Tahsin Erdogan 2017-06-22 246 extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
30a7eb970c3aae Tahsin Erdogan 2017-06-22 247
65db869c754e7c Jan Kara 2019-11-05 248 /*
65db869c754e7c Jan Kara 2019-11-05 249 * Block bitmap, group descriptor, and inode are accounted in both
65db869c754e7c Jan Kara 2019-11-05 250 * ext4_blocks_for_truncate() and extra_credits. So subtract 3.
65db869c754e7c Jan Kara 2019-11-05 251 */
30a7eb970c3aae Tahsin Erdogan 2017-06-22 252 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
65db869c754e7c Jan Kara 2019-11-05 253 ext4_blocks_for_truncate(inode) + extra_credits - 3);
ac27a0ec112a08 Dave Kleikamp 2006-10-11 254 if (IS_ERR(handle)) {
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 255 ext4_std_error(inode->i_sb, PTR_ERR(handle));
ac27a0ec112a08 Dave Kleikamp 2006-10-11 256 /*
ac27a0ec112a08 Dave Kleikamp 2006-10-11 257 * If we're going to skip the normal cleanup, we still need to
ac27a0ec112a08 Dave Kleikamp 2006-10-11 258 * make sure that the in-core orphan linked list is properly
ac27a0ec112a08 Dave Kleikamp 2006-10-11 259 * cleaned up.
ac27a0ec112a08 Dave Kleikamp 2006-10-11 260 */
617ba13b31fbf5 Mingming Cao 2006-10-11 261 ext4_orphan_del(NULL, inode);
46e294efc355c4 Jan Kara 2020-11-27 262 if (freeze_protected)
8e8ad8a57c75f3 Jan Kara 2012-06-12 263 sb_end_intwrite(inode->i_sb);
ac27a0ec112a08 Dave Kleikamp 2006-10-11 264 goto no_delete;
ac27a0ec112a08 Dave Kleikamp 2006-10-11 265 }
30a7eb970c3aae Tahsin Erdogan 2017-06-22 266
ac27a0ec112a08 Dave Kleikamp 2006-10-11 267 if (IS_SYNC(inode))
0390131ba84fd3 Frank Mayhar 2009-01-07 268 ext4_handle_sync(handle);
407cd7fb83c0eb Tahsin Erdogan 2017-07-04 269
407cd7fb83c0eb Tahsin Erdogan 2017-07-04 270 /*
407cd7fb83c0eb Tahsin Erdogan 2017-07-04 271 * Set inode->i_size to 0 before calling ext4_truncate(). We need
407cd7fb83c0eb Tahsin Erdogan 2017-07-04 272 * special handling of symlinks here because i_size is used to
407cd7fb83c0eb Tahsin Erdogan 2017-07-04 273 * determine whether ext4_inode_info->i_data contains symlink data or
407cd7fb83c0eb Tahsin Erdogan 2017-07-04 274 * block mappings. Setting i_size to 0 will remove its fast symlink
407cd7fb83c0eb Tahsin Erdogan 2017-07-04 275 * status. Erase i_data so that it becomes a valid empty block map.
407cd7fb83c0eb Tahsin Erdogan 2017-07-04 276 */
407cd7fb83c0eb Tahsin Erdogan 2017-07-04 277 if (ext4_inode_is_fast_symlink(inode))
407cd7fb83c0eb Tahsin Erdogan 2017-07-04 278 memset(EXT4_I(inode)->i_data, 0, sizeof(EXT4_I(inode)->i_data));
ac27a0ec112a08 Dave Kleikamp 2006-10-11 279 inode->i_size = 0;
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 280 err = ext4_mark_inode_dirty(handle, inode);
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 281 if (err) {
12062dddda4509 Eric Sandeen 2010-02-15 282 ext4_warning(inode->i_sb,
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 283 "couldn't mark inode dirty (err %d)", err);
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 284 goto stop_handle;
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 285 }
2c98eb5ea24976 Theodore Ts'o 2016-11-13 286 if (inode->i_blocks) {
2c98eb5ea24976 Theodore Ts'o 2016-11-13 287 err = ext4_truncate(inode);
2c98eb5ea24976 Theodore Ts'o 2016-11-13 288 if (err) {
54d3adbc29f0c7 Theodore Ts'o 2020-03-28 289 ext4_error_err(inode->i_sb, -err,
2c98eb5ea24976 Theodore Ts'o 2016-11-13 290 "couldn't truncate inode %lu (err %d)",
2c98eb5ea24976 Theodore Ts'o 2016-11-13 291 inode->i_ino, err);
2c98eb5ea24976 Theodore Ts'o 2016-11-13 292 goto stop_handle;
2c98eb5ea24976 Theodore Ts'o 2016-11-13 293 }
2c98eb5ea24976 Theodore Ts'o 2016-11-13 294 }
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 295
30a7eb970c3aae Tahsin Erdogan 2017-06-22 296 /* Remove xattr references. */
30a7eb970c3aae Tahsin Erdogan 2017-06-22 297 err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
30a7eb970c3aae Tahsin Erdogan 2017-06-22 298 extra_credits);
30a7eb970c3aae Tahsin Erdogan 2017-06-22 299 if (err) {
30a7eb970c3aae Tahsin Erdogan 2017-06-22 300 ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 301 stop_handle:
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 302 ext4_journal_stop(handle);
4538821993f448 Theodore Ts'o 2010-07-29 303 ext4_orphan_del(NULL, inode);
46e294efc355c4 Jan Kara 2020-11-27 304 if (freeze_protected)
8e8ad8a57c75f3 Jan Kara 2012-06-12 305 sb_end_intwrite(inode->i_sb);
30a7eb970c3aae Tahsin Erdogan 2017-06-22 306 ext4_xattr_inode_array_free(ea_inode_array);
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 307 goto no_delete;
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 308 }
bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 309
ac27a0ec112a08 Dave Kleikamp 2006-10-11 310 /*
617ba13b31fbf5 Mingming Cao 2006-10-11 311 * Kill off the orphan record which ext4_truncate created.
ac27a0ec112a08 Dave Kleikamp 2006-10-11 312 * AKPM: I think this can be inside the above `if'.
617ba13b31fbf5 Mingming Cao 2006-10-11 313 * Note that ext4_orphan_del() has to be able to cope with the
ac27a0ec112a08 Dave Kleikamp 2006-10-11 314 * deletion of a non-existent orphan - this is because we don't
617ba13b31fbf5 Mingming Cao 2006-10-11 315 * know if ext4_truncate() actually created an orphan record.
ac27a0ec112a08 Dave Kleikamp 2006-10-11 316 * (Well, we could do this if we need to, but heck - it works)
ac27a0ec112a08 Dave Kleikamp 2006-10-11 317 */
617ba13b31fbf5 Mingming Cao 2006-10-11 318 ext4_orphan_del(handle, inode);
5ffff834322281 Arnd Bergmann 2018-07-29 319 EXT4_I(inode)->i_dtime = (__u32)ktime_get_real_seconds();
ac27a0ec112a08 Dave Kleikamp 2006-10-11 320
ac27a0ec112a08 Dave Kleikamp 2006-10-11 321 /*
ac27a0ec112a08 Dave Kleikamp 2006-10-11 322 * One subtle ordering requirement: if anything has gone wrong
ac27a0ec112a08 Dave Kleikamp 2006-10-11 323 * (transaction abort, IO errors, whatever), then we can still
ac27a0ec112a08 Dave Kleikamp 2006-10-11 324 * do these next steps (the fs will already have been marked as
ac27a0ec112a08 Dave Kleikamp 2006-10-11 325 * having errors), but we can't free the inode if the mark_dirty
ac27a0ec112a08 Dave Kleikamp 2006-10-11 326 * fails.
ac27a0ec112a08 Dave Kleikamp 2006-10-11 327 */
617ba13b31fbf5 Mingming Cao 2006-10-11 328 if (ext4_mark_inode_dirty(handle, inode))
ac27a0ec112a08 Dave Kleikamp 2006-10-11 329 /* If that failed, just do the required in-core inode clear. */
0930fcc1ee2f0a Al Viro 2010-06-07 330 ext4_clear_inode(inode);
ac27a0ec112a08 Dave Kleikamp 2006-10-11 331 else
617ba13b31fbf5 Mingming Cao 2006-10-11 332 ext4_free_inode(handle, inode);
617ba13b31fbf5 Mingming Cao 2006-10-11 333 ext4_journal_stop(handle);
46e294efc355c4 Jan Kara 2020-11-27 334 if (freeze_protected)
8e8ad8a57c75f3 Jan Kara 2012-06-12 335 sb_end_intwrite(inode->i_sb);
0421a189bc8cde Tahsin Erdogan 2017-06-22 336 ext4_xattr_inode_array_free(ea_inode_array);
ac27a0ec112a08 Dave Kleikamp 2006-10-11 337 return;
ac27a0ec112a08 Dave Kleikamp 2006-10-11 338 no_delete:
b21ebf143af219 Harshad Shirwadkar 2020-11-05 339 if (!list_empty(&EXT4_I(inode)->i_fc_list))

It's not clear without more context where this ->i_fc_list list is
modified.

db40129f85538a Xin Yin 2022-01-07 @340 ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, handle);

"handle" might be uninitialized?

0930fcc1ee2f0a Al Viro 2010-06-07 341 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
9d0be50230b333 Theodore Ts'o 2010-01-01 342 }

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]


2022-01-11 03:13:32

by Xin Yin

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ext4: fast commit may not fallback for ineligible commit

Hi Dan,

Thanks for spotting this, and I think it is not only an
'uninitialized' issue , we can not use 'handle' after
ext4_journal_stop, it may cause a use-after-free.
So maybe we should use 'transaction tid' as input instead of 'handle',
then it will be like this ext4_fc_mark_ineligible(struct super_block
*sb, int reason, tid_t tid). or we should move all
ext4_fc_mark_ineligible() between ext4_journal_start/ext4_journal_stop
if we need 'handle' param.

Hi Harshad, could you give some advice? it seems you also need to
change this part in your following patches.

Thanks,
Xin Yin

On Mon, Jan 10, 2022 at 5:23 PM Dan Carpenter <[email protected]> wrote:
>
> Hi Xin,
>
> url: https://github.com/0day-ci/linux/commits/Xin-Yin/ext4-fix-issues-when-fast-commit-work-with-jbd/20220107-201314
> base: https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
> config: x86_64-randconfig-m001-20220107 (https://download.01.org/0day-ci/archive/20220109/[email protected]/config)
> compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <[email protected]>
> Reported-by: Dan Carpenter <[email protected]>
>
> New smatch warnings:
> fs/ext4/inode.c:340 ext4_evict_inode() error: uninitialized symbol 'handle'.
>
> vim +/handle +340 fs/ext4/inode.c
>
> 0930fcc1ee2f0a Al Viro 2010-06-07 167 void ext4_evict_inode(struct inode *inode)
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 168 {
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 169 handle_t *handle;
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 170 int err;
> 65db869c754e7c Jan Kara 2019-11-05 171 /*
> 65db869c754e7c Jan Kara 2019-11-05 172 * Credits for final inode cleanup and freeing:
> 65db869c754e7c Jan Kara 2019-11-05 173 * sb + inode (ext4_orphan_del()), block bitmap, group descriptor
> 65db869c754e7c Jan Kara 2019-11-05 174 * (xattr block freeing), bitmap, group descriptor (inode freeing)
> 65db869c754e7c Jan Kara 2019-11-05 175 */
> 65db869c754e7c Jan Kara 2019-11-05 176 int extra_credits = 6;
> 0421a189bc8cde Tahsin Erdogan 2017-06-22 177 struct ext4_xattr_inode_array *ea_inode_array = NULL;
> 46e294efc355c4 Jan Kara 2020-11-27 178 bool freeze_protected = false;
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 179
> 7ff9c073dd4d72 Theodore Ts'o 2010-11-08 180 trace_ext4_evict_inode(inode);
> 2581fdc810889f Jiaying Zhang 2011-08-13 181
> 0930fcc1ee2f0a Al Viro 2010-06-07 182 if (inode->i_nlink) {
> 2d859db3e4a82a Jan Kara 2011-07-26 183 /*
> 2d859db3e4a82a Jan Kara 2011-07-26 184 * When journalling data dirty buffers are tracked only in the
> 2d859db3e4a82a Jan Kara 2011-07-26 185 * journal. So although mm thinks everything is clean and
> 2d859db3e4a82a Jan Kara 2011-07-26 186 * ready for reaping the inode might still have some pages to
> 2d859db3e4a82a Jan Kara 2011-07-26 187 * write in the running transaction or waiting to be
> 2d859db3e4a82a Jan Kara 2011-07-26 188 * checkpointed. Thus calling jbd2_journal_invalidatepage()
> 2d859db3e4a82a Jan Kara 2011-07-26 189 * (via truncate_inode_pages()) to discard these buffers can
> 2d859db3e4a82a Jan Kara 2011-07-26 190 * cause data loss. Also even if we did not discard these
> 2d859db3e4a82a Jan Kara 2011-07-26 191 * buffers, we would have no way to find them after the inode
> 2d859db3e4a82a Jan Kara 2011-07-26 192 * is reaped and thus user could see stale data if he tries to
> 2d859db3e4a82a Jan Kara 2011-07-26 193 * read them before the transaction is checkpointed. So be
> 2d859db3e4a82a Jan Kara 2011-07-26 194 * careful and force everything to disk here... We use
> 2d859db3e4a82a Jan Kara 2011-07-26 195 * ei->i_datasync_tid to store the newest transaction
> 2d859db3e4a82a Jan Kara 2011-07-26 196 * containing inode's data.
> 2d859db3e4a82a Jan Kara 2011-07-26 197 *
> 2d859db3e4a82a Jan Kara 2011-07-26 198 * Note that directories do not have this problem because they
> 2d859db3e4a82a Jan Kara 2011-07-26 199 * don't use page cache.
> 2d859db3e4a82a Jan Kara 2011-07-26 200 */
> 6a7fd522a7c94c Vegard Nossum 2016-07-04 201 if (inode->i_ino != EXT4_JOURNAL_INO &&
> 6a7fd522a7c94c Vegard Nossum 2016-07-04 202 ext4_should_journal_data(inode) &&
> 3abb1a0fc2871f Jan Kara 2017-06-22 203 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
> 3abb1a0fc2871f Jan Kara 2017-06-22 204 inode->i_data.nrpages) {
> 2d859db3e4a82a Jan Kara 2011-07-26 205 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
> 2d859db3e4a82a Jan Kara 2011-07-26 206 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
> 2d859db3e4a82a Jan Kara 2011-07-26 207
> d76a3a77113db0 Theodore Ts'o 2013-04-03 208 jbd2_complete_transaction(journal, commit_tid);
> 2d859db3e4a82a Jan Kara 2011-07-26 209 filemap_write_and_wait(&inode->i_data);
> 2d859db3e4a82a Jan Kara 2011-07-26 210 }
> 91b0abe36a7b2b Johannes Weiner 2014-04-03 211 truncate_inode_pages_final(&inode->i_data);
> 5dc23bdd5f846e Jan Kara 2013-06-04 212
> 0930fcc1ee2f0a Al Viro 2010-06-07 213 goto no_delete;
>
> Assume we hit this goto
>
> 0930fcc1ee2f0a Al Viro 2010-06-07 214 }
> 0930fcc1ee2f0a Al Viro 2010-06-07 215
> e2bfb088fac03c Theodore Ts'o 2014-10-05 216 if (is_bad_inode(inode))
> e2bfb088fac03c Theodore Ts'o 2014-10-05 217 goto no_delete;
> 871a293155a245 Christoph Hellwig 2010-03-03 218 dquot_initialize(inode);
> 907f4554e2521c Christoph Hellwig 2010-03-03 219
> 678aaf481496b0 Jan Kara 2008-07-11 220 if (ext4_should_order_data(inode))
> 678aaf481496b0 Jan Kara 2008-07-11 221 ext4_begin_ordered_truncate(inode, 0);
> 91b0abe36a7b2b Johannes Weiner 2014-04-03 222 truncate_inode_pages_final(&inode->i_data);
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 223
> ceff86fddae874 Jan Kara 2020-04-21 224 /*
> ceff86fddae874 Jan Kara 2020-04-21 225 * For inodes with journalled data, transaction commit could have
> ceff86fddae874 Jan Kara 2020-04-21 226 * dirtied the inode. Flush worker is ignoring it because of I_FREEING
> ceff86fddae874 Jan Kara 2020-04-21 227 * flag but we still need to remove the inode from the writeback lists.
> ceff86fddae874 Jan Kara 2020-04-21 228 */
> ceff86fddae874 Jan Kara 2020-04-21 229 if (!list_empty_careful(&inode->i_io_list)) {
> ceff86fddae874 Jan Kara 2020-04-21 230 WARN_ON_ONCE(!ext4_should_journal_data(inode));
> ceff86fddae874 Jan Kara 2020-04-21 231 inode_io_list_del(inode);
> ceff86fddae874 Jan Kara 2020-04-21 232 }
> ceff86fddae874 Jan Kara 2020-04-21 233
> 8e8ad8a57c75f3 Jan Kara 2012-06-12 234 /*
> 8e8ad8a57c75f3 Jan Kara 2012-06-12 235 * Protect us against freezing - iput() caller didn't have to have any
> 46e294efc355c4 Jan Kara 2020-11-27 236 * protection against it. When we are in a running transaction though,
> 46e294efc355c4 Jan Kara 2020-11-27 237 * we are already protected against freezing and we cannot grab further
> 46e294efc355c4 Jan Kara 2020-11-27 238 * protection due to lock ordering constraints.
> 8e8ad8a57c75f3 Jan Kara 2012-06-12 239 */
> 46e294efc355c4 Jan Kara 2020-11-27 240 if (!ext4_journal_current_handle()) {
> 8e8ad8a57c75f3 Jan Kara 2012-06-12 241 sb_start_intwrite(inode->i_sb);
> 46e294efc355c4 Jan Kara 2020-11-27 242 freeze_protected = true;
> 46e294efc355c4 Jan Kara 2020-11-27 243 }
> e50e5129f384ae Andreas Dilger 2017-06-21 244
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 245 if (!IS_NOQUOTA(inode))
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 246 extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 247
> 65db869c754e7c Jan Kara 2019-11-05 248 /*
> 65db869c754e7c Jan Kara 2019-11-05 249 * Block bitmap, group descriptor, and inode are accounted in both
> 65db869c754e7c Jan Kara 2019-11-05 250 * ext4_blocks_for_truncate() and extra_credits. So subtract 3.
> 65db869c754e7c Jan Kara 2019-11-05 251 */
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 252 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
> 65db869c754e7c Jan Kara 2019-11-05 253 ext4_blocks_for_truncate(inode) + extra_credits - 3);
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 254 if (IS_ERR(handle)) {
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 255 ext4_std_error(inode->i_sb, PTR_ERR(handle));
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 256 /*
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 257 * If we're going to skip the normal cleanup, we still need to
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 258 * make sure that the in-core orphan linked list is properly
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 259 * cleaned up.
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 260 */
> 617ba13b31fbf5 Mingming Cao 2006-10-11 261 ext4_orphan_del(NULL, inode);
> 46e294efc355c4 Jan Kara 2020-11-27 262 if (freeze_protected)
> 8e8ad8a57c75f3 Jan Kara 2012-06-12 263 sb_end_intwrite(inode->i_sb);
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 264 goto no_delete;
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 265 }
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 266
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 267 if (IS_SYNC(inode))
> 0390131ba84fd3 Frank Mayhar 2009-01-07 268 ext4_handle_sync(handle);
> 407cd7fb83c0eb Tahsin Erdogan 2017-07-04 269
> 407cd7fb83c0eb Tahsin Erdogan 2017-07-04 270 /*
> 407cd7fb83c0eb Tahsin Erdogan 2017-07-04 271 * Set inode->i_size to 0 before calling ext4_truncate(). We need
> 407cd7fb83c0eb Tahsin Erdogan 2017-07-04 272 * special handling of symlinks here because i_size is used to
> 407cd7fb83c0eb Tahsin Erdogan 2017-07-04 273 * determine whether ext4_inode_info->i_data contains symlink data or
> 407cd7fb83c0eb Tahsin Erdogan 2017-07-04 274 * block mappings. Setting i_size to 0 will remove its fast symlink
> 407cd7fb83c0eb Tahsin Erdogan 2017-07-04 275 * status. Erase i_data so that it becomes a valid empty block map.
> 407cd7fb83c0eb Tahsin Erdogan 2017-07-04 276 */
> 407cd7fb83c0eb Tahsin Erdogan 2017-07-04 277 if (ext4_inode_is_fast_symlink(inode))
> 407cd7fb83c0eb Tahsin Erdogan 2017-07-04 278 memset(EXT4_I(inode)->i_data, 0, sizeof(EXT4_I(inode)->i_data));
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 279 inode->i_size = 0;
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 280 err = ext4_mark_inode_dirty(handle, inode);
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 281 if (err) {
> 12062dddda4509 Eric Sandeen 2010-02-15 282 ext4_warning(inode->i_sb,
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 283 "couldn't mark inode dirty (err %d)", err);
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 284 goto stop_handle;
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 285 }
> 2c98eb5ea24976 Theodore Ts'o 2016-11-13 286 if (inode->i_blocks) {
> 2c98eb5ea24976 Theodore Ts'o 2016-11-13 287 err = ext4_truncate(inode);
> 2c98eb5ea24976 Theodore Ts'o 2016-11-13 288 if (err) {
> 54d3adbc29f0c7 Theodore Ts'o 2020-03-28 289 ext4_error_err(inode->i_sb, -err,
> 2c98eb5ea24976 Theodore Ts'o 2016-11-13 290 "couldn't truncate inode %lu (err %d)",
> 2c98eb5ea24976 Theodore Ts'o 2016-11-13 291 inode->i_ino, err);
> 2c98eb5ea24976 Theodore Ts'o 2016-11-13 292 goto stop_handle;
> 2c98eb5ea24976 Theodore Ts'o 2016-11-13 293 }
> 2c98eb5ea24976 Theodore Ts'o 2016-11-13 294 }
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 295
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 296 /* Remove xattr references. */
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 297 err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 298 extra_credits);
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 299 if (err) {
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 300 ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 301 stop_handle:
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 302 ext4_journal_stop(handle);
> 4538821993f448 Theodore Ts'o 2010-07-29 303 ext4_orphan_del(NULL, inode);
> 46e294efc355c4 Jan Kara 2020-11-27 304 if (freeze_protected)
> 8e8ad8a57c75f3 Jan Kara 2012-06-12 305 sb_end_intwrite(inode->i_sb);
> 30a7eb970c3aae Tahsin Erdogan 2017-06-22 306 ext4_xattr_inode_array_free(ea_inode_array);
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 307 goto no_delete;
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 308 }
> bc965ab3f2b4b7 Theodore Ts'o 2008-08-02 309
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 310 /*
> 617ba13b31fbf5 Mingming Cao 2006-10-11 311 * Kill off the orphan record which ext4_truncate created.
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 312 * AKPM: I think this can be inside the above `if'.
> 617ba13b31fbf5 Mingming Cao 2006-10-11 313 * Note that ext4_orphan_del() has to be able to cope with the
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 314 * deletion of a non-existent orphan - this is because we don't
> 617ba13b31fbf5 Mingming Cao 2006-10-11 315 * know if ext4_truncate() actually created an orphan record.
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 316 * (Well, we could do this if we need to, but heck - it works)
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 317 */
> 617ba13b31fbf5 Mingming Cao 2006-10-11 318 ext4_orphan_del(handle, inode);
> 5ffff834322281 Arnd Bergmann 2018-07-29 319 EXT4_I(inode)->i_dtime = (__u32)ktime_get_real_seconds();
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 320
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 321 /*
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 322 * One subtle ordering requirement: if anything has gone wrong
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 323 * (transaction abort, IO errors, whatever), then we can still
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 324 * do these next steps (the fs will already have been marked as
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 325 * having errors), but we can't free the inode if the mark_dirty
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 326 * fails.
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 327 */
> 617ba13b31fbf5 Mingming Cao 2006-10-11 328 if (ext4_mark_inode_dirty(handle, inode))
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 329 /* If that failed, just do the required in-core inode clear. */
> 0930fcc1ee2f0a Al Viro 2010-06-07 330 ext4_clear_inode(inode);
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 331 else
> 617ba13b31fbf5 Mingming Cao 2006-10-11 332 ext4_free_inode(handle, inode);
> 617ba13b31fbf5 Mingming Cao 2006-10-11 333 ext4_journal_stop(handle);
> 46e294efc355c4 Jan Kara 2020-11-27 334 if (freeze_protected)
> 8e8ad8a57c75f3 Jan Kara 2012-06-12 335 sb_end_intwrite(inode->i_sb);
> 0421a189bc8cde Tahsin Erdogan 2017-06-22 336 ext4_xattr_inode_array_free(ea_inode_array);
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 337 return;
> ac27a0ec112a08 Dave Kleikamp 2006-10-11 338 no_delete:
> b21ebf143af219 Harshad Shirwadkar 2020-11-05 339 if (!list_empty(&EXT4_I(inode)->i_fc_list))
>
> It's not clear without more context where this ->i_fc_list list is
> modified.
>
> db40129f85538a Xin Yin 2022-01-07 @340 ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, handle);
>
> "handle" might be uninitialized?
>
> 0930fcc1ee2f0a Al Viro 2010-06-07 341 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
> 9d0be50230b333 Theodore Ts'o 2010-01-01 342 }
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/[email protected]
>