On Sun 29-11-20 15:11:05, Andreas Dilger wrote:
> On Nov 27, 2020, at 4:33 AM, Jan Kara <[email protected]> wrote:
> >
> > Superblock is written out either through ext4_commit_super() or through
> > ext4_handle_dirty_super(). In both cases we recompute the checksum so it
> > is not necessary to recompute it after updating superblock free inodes &
> > blocks counters.
>
> I searched through the code to see where s_sbh is being used, and it
> looks like there is one case that doesn't update the checksum using
> ext4_handle_dirty_super(), namely:
>
> ext4_file_ioctl(cmd=FS_IOC_GET_ENCRYPTION_PWSALT)
> {
> err = ext4_journal_get_write_access(handle, sbi->s_sbh);
> if (err)
> goto pwsalt_err_journal;
> generate_random_uuid(sbi->s_es->s_encrypt_pw_salt);
> err = ext4_handle_dirty_metadata(handle, NULL,
> sbi->s_sbh);
>
> I don't think that is a problem with this patch, per se, but looks like
> a bug that could be hit in rare cases with fscrypt + metadata_csum. It
> would only happen once per filesystem, and would normally be hidden by
> later superblock updates, but should probably be fixed anyway.

Yeah, good spotting. I'll write a fix for this.

> Reviewed-by: Andreas Dilger <[email protected]>

Thanks for review!

Honza

>
> > Signed-off-by: Jan Kara <[email protected]>
> > ---
> > fs/ext4/super.c | 2 --
> > 1 file changed, 2 deletions(-)
> >
> > diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> > index 2b08b162075c..61e6e5f156f3 100644
> > --- a/fs/ext4/super.c
> > +++ b/fs/ext4/super.c
> > @@ -5004,13 +5004,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> > block = ext4_count_free_clusters(sb);
> > ext4_free_blocks_count_set(sbi->s_es,
> > EXT4_C2B(sbi, block));
> > - ext4_superblock_csum_set(sb);
> > err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
> > GFP_KERNEL);
> > if (!err) {
> > unsigned long freei = ext4_count_free_inodes(sb);
> > sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
> > - ext4_superblock_csum_set(sb);
> > err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
> > GFP_KERNEL);
> > }
> > --
> > 2.16.4
> >
>
>
> Cheers, Andreas
>
>
>
>
>

--
Jan Kara <[email protected]>
SUSE Labs, CR

2020-12-14 19:21:00

by harshad shirwadkar

[permalink] [raw]

Subject: Re: [PATCH 00/12] ext4: Various fixes of ext4 handling of fs errors

Thanks! I ran smoke tests (-c 4k -g quick) on this series and there were no
regressions for me as well.

- Harshad

On Fri, Nov 27, 2020 at 3:37 AM Jan Kara <[email protected]> wrote:
>
> Hello,
>
> this patches addresses problems in handling of filesystem errors in ext4.
> When we hit metadata error, we want to store information about the error
> in the superblock. Currently we do it through direct superblock modification
> which can lead to lost information, checksum failures, or DIF/DIX failures.
> Fix various races in the error handling so that the superblock update is
> reliable.
>
> The patches have passed xfstests for me in various configurations and some
> targetted manual testing of the error handling.
>
> Honza

2020-12-14 19:25:30

by harshad shirwadkar

[permalink] [raw]

Subject: Re: [PATCH 08/12] ext4: Combine ext4_handle_error() and save_error_info()

On Fri, Nov 27, 2020 at 3:38 AM Jan Kara <[email protected]> wrote:
>
> save_error_info() is always called together with ext4_handle_error().
> Combine them into a single call and move unconditional bits out of
> save_error_info() into ext4_handle_error().
>
> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/ext4/super.c | 31 +++++++++++++++----------------
> 1 file changed, 15 insertions(+), 16 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 2d7dc0908cdd..73a09b73fc11 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -592,9 +592,6 @@ static void __save_error_info(struct super_block *sb, int error,
> {
> struct ext4_sb_info *sbi = EXT4_SB(sb);
>
> - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
> - if (bdev_read_only(sb->s_bdev))
> - return;
> /* We default to EFSCORRUPTED error... */
> if (error == 0)
> error = EFSCORRUPTED;
> @@ -647,13 +644,19 @@ static void save_error_info(struct super_block *sb, int error,
> * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
> * at a critical moment in log management.
> */
> -static void ext4_handle_error(struct super_block *sb, bool force_ro)
> +static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
> + __u32 ino, __u64 block,
> + const char *func, unsigned int line)
> {
> journal_t *journal = EXT4_SB(sb)->s_journal;
>
> + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
> if (test_opt(sb, WARN_ON_ERROR))
> WARN_ON_ONCE(1);
>
> + if (!bdev_read_only(sb->s_bdev))
> + save_error_info(sb, error, ino, block, func, line);
> +
> if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT)))
> return;
>
> @@ -710,8 +713,7 @@ void __ext4_error(struct super_block *sb, const char *function,
> sb->s_id, function, line, current->comm, &vaf);
> va_end(args);
> }
> - save_error_info(sb, error, 0, block, function, line);
> - ext4_handle_error(sb, force_ro);
> + ext4_handle_error(sb, force_ro, error, 0, block, function, line);
> }
>
> void __ext4_error_inode(struct inode *inode, const char *function,
> @@ -741,9 +743,8 @@ void __ext4_error_inode(struct inode *inode, const char *function,
> current->comm, &vaf);
> va_end(args);
> }
> - save_error_info(inode->i_sb, error, inode->i_ino, block,
> - function, line);
> - ext4_handle_error(inode->i_sb, false);
> + ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
> + function, line);
> }
>
> void __ext4_error_file(struct file *file, const char *function,
> @@ -780,9 +781,8 @@ void __ext4_error_file(struct file *file, const char *function,
> current->comm, path, &vaf);
> va_end(args);
> }
> - save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
> - function, line);
> - ext4_handle_error(inode->i_sb, false);
> + ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
> + function, line);
> }
>
> const char *ext4_decode_error(struct super_block *sb, int errno,
> @@ -849,8 +849,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
> sb->s_id, function, line, errstr);
> }
>
> - save_error_info(sb, -errno, 0, 0, function, line);
> - ext4_handle_error(sb, false);
> + ext4_handle_error(sb, false, -errno, 0, 0, function, line);
> }
>
> void __ext4_msg(struct super_block *sb,
> @@ -944,13 +943,13 @@ __acquires(bitlock)
> if (test_opt(sb, ERRORS_CONT)) {
> if (test_opt(sb, WARN_ON_ERROR))
> WARN_ON_ONCE(1);
> + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
Since you moved the bdev_read_only() check from __save_error_info to
ext4_handle_error(), should we add that check here?

- Harshad
> __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
> schedule_work(&EXT4_SB(sb)->s_error_work);
> return;
> }
> ext4_unlock_group(sb, grp);
> - save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
> - ext4_handle_error(sb, false);
> + ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
> /*
> * We only get here in the ERRORS_RO case; relocking the group
> * may be dangerous, but nothing bad will happen since the
> --
> 2.16.4
>

2020-12-14 19:29:07

by harshad shirwadkar

[permalink] [raw]

Subject: Re: [PATCH 09/12] ext4: Drop sync argument of ext4_commit_super()

Looks good to me.

Reviewed-by: Harshad Shirwadkar <[email protected]>

On Fri, Nov 27, 2020 at 10:25 AM Jan Kara <[email protected]> wrote:
>
> Everybody passes 1 as sync argument of ext4_commit_super(). Just drop
> it.
>
> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/ext4/super.c | 47 ++++++++++++++++++++++-------------------------
> 1 file changed, 22 insertions(+), 25 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 73a09b73fc11..aae12ea1466a 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -65,7 +65,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit;
> static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
> unsigned long journal_devnum);
> static int ext4_show_options(struct seq_file *seq, struct dentry *root);
> -static int ext4_commit_super(struct super_block *sb, int sync);
> +static int ext4_commit_super(struct super_block *sb);
> static int ext4_mark_recovery_complete(struct super_block *sb,
> struct ext4_super_block *es);
> static int ext4_clear_journal_err(struct super_block *sb,
> @@ -621,7 +621,7 @@ static void save_error_info(struct super_block *sb, int error,
> {
> __save_error_info(sb, error, ino, block, func, line);
> if (!bdev_read_only(sb->s_bdev))
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
> }
>
> /* Deal with the reporting of failure conditions on a filesystem such as
> @@ -686,7 +686,7 @@ static void flush_stashed_error_work(struct work_struct *work)
> struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
> s_error_work);
>
> - ext4_commit_super(sbi->s_sb, 1);
> + ext4_commit_super(sbi->s_sb);
> }
>
> #define ext4_error_ratelimit(sb) \
> @@ -1151,7 +1151,7 @@ static void ext4_put_super(struct super_block *sb)
> es->s_state = cpu_to_le16(sbi->s_mount_state);
> }
> if (!sb_rdonly(sb))
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
>
> rcu_read_lock();
> group_desc = rcu_dereference(sbi->s_group_desc);
> @@ -2641,7 +2641,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
> if (sbi->s_journal)
> ext4_set_feature_journal_needs_recovery(sb);
>
> - err = ext4_commit_super(sb, 1);
> + err = ext4_commit_super(sb);
> done:
> if (test_opt(sb, DEBUG))
> printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
> @@ -4862,7 +4862,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
> !ext4_has_feature_encrypt(sb)) {
> ext4_set_feature_encrypt(sb);
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
> }
>
> /*
> @@ -5415,7 +5415,7 @@ static int ext4_load_journal(struct super_block *sb,
> es->s_journal_dev = cpu_to_le32(journal_devnum);
>
> /* Make sure we flush the recovery flag to disk. */
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
> }
>
> return 0;
> @@ -5425,7 +5425,7 @@ static int ext4_load_journal(struct super_block *sb,
> return err;
> }
>
> -static int ext4_commit_super(struct super_block *sb, int sync)
> +static int ext4_commit_super(struct super_block *sb)
> {
> struct ext4_sb_info *sbi = EXT4_SB(sb);
> struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> @@ -5502,8 +5502,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
>
> BUFFER_TRACE(sbh, "marking dirty");
> ext4_superblock_csum_set(sb);
> - if (sync)
> - lock_buffer(sbh);
> + lock_buffer(sbh);
> if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
> /*
> * Oh, dear. A previous attempt to write the
> @@ -5519,16 +5518,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)
> set_buffer_uptodate(sbh);
> }
> mark_buffer_dirty(sbh);
> - if (sync) {
> - unlock_buffer(sbh);
> - error = __sync_dirty_buffer(sbh,
> - REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
> - if (buffer_write_io_error(sbh)) {
> - ext4_msg(sb, KERN_ERR, "I/O error while writing "
> - "superblock");
> - clear_buffer_write_io_error(sbh);
> - set_buffer_uptodate(sbh);
> - }
> + unlock_buffer(sbh);
> + error = __sync_dirty_buffer(sbh,
> + REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
> + if (buffer_write_io_error(sbh)) {
> + ext4_msg(sb, KERN_ERR, "I/O error while writing "
> + "superblock");
> + clear_buffer_write_io_error(sbh);
> + set_buffer_uptodate(sbh);
> }
> return error;
> }
> @@ -5559,7 +5556,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb,
>
> if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
> ext4_clear_feature_journal_needs_recovery(sb);
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
> }
> out:
> jbd2_journal_unlock_updates(journal);
> @@ -5601,7 +5598,7 @@ static int ext4_clear_journal_err(struct super_block *sb,
>
> EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
> es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
>
> jbd2_journal_clear_err(journal);
> jbd2_journal_update_sb_errno(journal);
> @@ -5703,7 +5700,7 @@ static int ext4_freeze(struct super_block *sb)
> ext4_clear_feature_journal_needs_recovery(sb);
> }
>
> - error = ext4_commit_super(sb, 1);
> + error = ext4_commit_super(sb);
> out:
> if (journal)
> /* we rely on upper layer to stop further updates */
> @@ -5725,7 +5722,7 @@ static int ext4_unfreeze(struct super_block *sb)
> ext4_set_feature_journal_needs_recovery(sb);
> }
>
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
> return 0;
> }
>
> @@ -5985,7 +5982,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
> }
>
> if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
> - err = ext4_commit_super(sb, 1);
> + err = ext4_commit_super(sb);
> if (err)
> goto restore_opts;
> }
> --
> 2.16.4
>