2020-11-27 11:35:41

by Jan Kara

[permalink] [raw]
Subject: [PATCH 00/12] ext4: Various fixes of ext4 handling of fs errors

Hello,

this patches addresses problems in handling of filesystem errors in ext4.
When we hit metadata error, we want to store information about the error
in the superblock. Currently we do it through direct superblock modification
which can lead to lost information, checksum failures, or DIF/DIX failures.
Fix various races in the error handling so that the superblock update is
reliable.

The patches have passed xfstests for me in various configurations and some
targetted manual testing of the error handling.

Honza


2020-11-27 11:35:41

by Jan Kara

[permalink] [raw]
Subject: [PATCH 03/12] ext4: Standardize error message in ext4_protect_reserved_inode()

We use __ext4_error() when ext4_protect_reserved_inode() finds
filesystem corruption. However EXT4_ERROR_INODE_ERR() is perfectly
capable of reporting all the needed information. So just use that.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/block_validity.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 8e6ca23ed172..13ffda871227 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -176,12 +176,10 @@ static int ext4_protect_reserved_inode(struct super_block *sb,
err = add_system_zone(system_blks, map.m_pblk, n, ino);
if (err < 0) {
if (err == -EFSCORRUPTED) {
- __ext4_error(sb, __func__, __LINE__,
- -err, map.m_pblk,
- "blocks %llu-%llu from inode %u overlap system zone",
- map.m_pblk,
- map.m_pblk + map.m_len - 1,
- ino);
+ EXT4_ERROR_INODE_ERR(inode, -err,
+ "blocks %llu-%llu from inode overlap system zone",
+ map.m_pblk,
+ map.m_pblk + map.m_len - 1);
}
break;
}
--
2.16.4

2020-11-27 11:35:41

by Jan Kara

[permalink] [raw]
Subject: [PATCH 01/12] ext4: Don't remount read-only with errors=continue on reboot

ext4_handle_error() with errors=continue mount option can accidentally
remount the filesystem read-only when the system is rebooting. Fix that.

Fixes: 1dc1097ff60e ("ext4: avoid panic during forced reboot")
Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/super.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 94472044f4c1..2b08b162075c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -666,19 +666,17 @@ static bool system_going_down(void)

static void ext4_handle_error(struct super_block *sb)
{
+ journal_t *journal = EXT4_SB(sb)->s_journal;
+
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);

- if (sb_rdonly(sb))
+ if (sb_rdonly(sb) || test_opt(sb, ERRORS_CONT))
return;

- if (!test_opt(sb, ERRORS_CONT)) {
- journal_t *journal = EXT4_SB(sb)->s_journal;
-
- ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
- if (journal)
- jbd2_journal_abort(journal, -EIO);
- }
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+ if (journal)
+ jbd2_journal_abort(journal, -EIO);
/*
* We force ERRORS_RO behavior when system is rebooting. Otherwise we
* could panic during 'reboot -f' as the underlying device got already
--
2.16.4

2020-11-27 11:35:42

by Jan Kara

[permalink] [raw]
Subject: [PATCH 04/12] ext4: Make ext4_abort() use __ext4_error()

The only difference between __ext4_abort() and __ext4_error() is that
the former one ignores errors=continue mount option. Unify the code to
reduce duplication.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/ext4.h | 29 ++++++++----------
fs/ext4/ext4_jbd2.c | 4 +--
fs/ext4/inode.c | 2 +-
fs/ext4/super.c | 84 ++++++++++++++---------------------------------------
4 files changed, 37 insertions(+), 82 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 65ecaf96d0a4..e67291c4a10b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2952,9 +2952,9 @@ extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
ext4_group_t block_group,
unsigned int flags);

-extern __printf(6, 7)
-void __ext4_error(struct super_block *, const char *, unsigned int, int, __u64,
- const char *, ...);
+extern __printf(7, 8)
+void __ext4_error(struct super_block *, const char *, unsigned int, bool,
+ int, __u64, const char *, ...);
extern __printf(6, 7)
void __ext4_error_inode(struct inode *, const char *, unsigned int,
ext4_fsblk_t, int, const char *, ...);
@@ -2963,9 +2963,6 @@ void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
const char *, ...);
extern void __ext4_std_error(struct super_block *, const char *,
unsigned int, int);
-extern __printf(5, 6)
-void __ext4_abort(struct super_block *, const char *, unsigned int, int,
- const char *, ...);
extern __printf(4, 5)
void __ext4_warning(struct super_block *, const char *, unsigned int,
const char *, ...);
@@ -2995,6 +2992,9 @@ void __ext4_grp_locked_error(const char *, unsigned int,
#define EXT4_ERROR_FILE(file, block, fmt, a...) \
ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)

+#define ext4_abort(sb, err, fmt, a...) \
+ __ext4_error((sb), __func__, __LINE__, true, (err), 0, (fmt), ## a)
+
#ifdef CONFIG_PRINTK

#define ext4_error_inode(inode, func, line, block, fmt, ...) \
@@ -3005,11 +3005,11 @@ void __ext4_grp_locked_error(const char *, unsigned int,
#define ext4_error_file(file, func, line, block, fmt, ...) \
__ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__)
#define ext4_error(sb, fmt, ...) \
- __ext4_error((sb), __func__, __LINE__, 0, 0, (fmt), ##__VA_ARGS__)
+ __ext4_error((sb), __func__, __LINE__, false, 0, 0, (fmt), \
+ ##__VA_ARGS__)
#define ext4_error_err(sb, err, fmt, ...) \
- __ext4_error((sb), __func__, __LINE__, (err), 0, (fmt), ##__VA_ARGS__)
-#define ext4_abort(sb, err, fmt, ...) \
- __ext4_abort((sb), __func__, __LINE__, (err), (fmt), ##__VA_ARGS__)
+ __ext4_error((sb), __func__, __LINE__, false, (err), 0, (fmt), \
+ ##__VA_ARGS__)
#define ext4_warning(sb, fmt, ...) \
__ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
#define ext4_warning_inode(inode, fmt, ...) \
@@ -3042,17 +3042,12 @@ do { \
#define ext4_error(sb, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
- __ext4_error(sb, "", 0, 0, 0, " "); \
+ __ext4_error(sb, "", 0, false, 0, 0, " "); \
} while (0)
#define ext4_error_err(sb, err, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
- __ext4_error(sb, "", 0, err, 0, " "); \
-} while (0)
-#define ext4_abort(sb, err, fmt, ...) \
-do { \
- no_printk(fmt, ##__VA_ARGS__); \
- __ext4_abort(sb, "", 0, err, " "); \
+ __ext4_error(sb, "", 0, false, err, 0, " "); \
} while (0)
#define ext4_warning(sb, fmt, ...) \
do { \
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 0fd0c42a4f7d..1a0a827a7f34 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -296,8 +296,8 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
if (err) {
ext4_journal_abort_handle(where, line, __func__,
bh, handle, err);
- __ext4_abort(inode->i_sb, where, line, -err,
- "error %d when attempting revoke", err);
+ __ext4_error(inode->i_sb, where, line, true, -err, 0,
+ "error %d when attempting revoke", err);
}
BUFFER_TRACE(bh, "exit");
return err;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0d8385aea898..3a39fa0d6a3a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4610,7 +4610,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
(ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
if (flags & EXT4_IGET_HANDLE)
return ERR_PTR(-ESTALE);
- __ext4_error(sb, function, line, EFSCORRUPTED, 0,
+ __ext4_error(sb, function, line, false, EFSCORRUPTED, 0,
"inode #%lu: comm %s: iget: illegal inode #",
ino, current->comm);
return ERR_PTR(-EFSCORRUPTED);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 61e6e5f156f3..dddaadc55475 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -662,16 +662,21 @@ static bool system_going_down(void)
* We'll just use the jbd2_journal_abort() error code to record an error in
* the journal instead. On recovery, the journal will complain about
* that error until we've noted it down and cleared it.
+ *
+ * If force_ro is set, we unconditionally force the filesystem into an
+ * ABORT|READONLY state, unless the error response on the fs has been set to
+ * panic in which case we take the easy way out and panic immediately. This is
+ * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
+ * at a critical moment in log management.
*/
-
-static void ext4_handle_error(struct super_block *sb)
+static void ext4_handle_error(struct super_block *sb, bool force_ro)
{
journal_t *journal = EXT4_SB(sb)->s_journal;

if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);

- if (sb_rdonly(sb) || test_opt(sb, ERRORS_CONT))
+ if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT)))
return;

ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
@@ -682,18 +687,17 @@ static void ext4_handle_error(struct super_block *sb)
* could panic during 'reboot -f' as the underlying device got already
* disabled.
*/
- if (test_opt(sb, ERRORS_RO) || system_going_down()) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- /*
- * Make sure updated value of ->s_mount_flags will be visible
- * before ->s_flags update
- */
- smp_wmb();
- sb->s_flags |= SB_RDONLY;
- } else if (test_opt(sb, ERRORS_PANIC)) {
+ if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
+ ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+ /*
+ * Make sure updated value of ->s_mount_flags will be visible before
+ * ->s_flags update
+ */
+ smp_wmb();
+ sb->s_flags |= SB_RDONLY;
}

#define ext4_error_ratelimit(sb) \
@@ -701,7 +705,7 @@ static void ext4_handle_error(struct super_block *sb)
"EXT4-fs error")

void __ext4_error(struct super_block *sb, const char *function,
- unsigned int line, int error, __u64 block,
+ unsigned int line, bool force_ro, int error, __u64 block,
const char *fmt, ...)
{
struct va_format vaf;
@@ -721,7 +725,7 @@ void __ext4_error(struct super_block *sb, const char *function,
va_end(args);
}
save_error_info(sb, error, 0, block, function, line);
- ext4_handle_error(sb);
+ ext4_handle_error(sb, force_ro);
}

void __ext4_error_inode(struct inode *inode, const char *function,
@@ -753,7 +757,7 @@ void __ext4_error_inode(struct inode *inode, const char *function,
}
save_error_info(inode->i_sb, error, inode->i_ino, block,
function, line);
- ext4_handle_error(inode->i_sb);
+ ext4_handle_error(inode->i_sb, false);
}

void __ext4_error_file(struct file *file, const char *function,
@@ -792,7 +796,7 @@ void __ext4_error_file(struct file *file, const char *function,
}
save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
function, line);
- ext4_handle_error(inode->i_sb);
+ ext4_handle_error(inode->i_sb, false);
}

const char *ext4_decode_error(struct super_block *sb, int errno,
@@ -860,51 +864,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
}

save_error_info(sb, -errno, 0, 0, function, line);
- ext4_handle_error(sb);
-}
-
-/*
- * ext4_abort is a much stronger failure handler than ext4_error. The
- * abort function may be used to deal with unrecoverable failures such
- * as journal IO errors or ENOMEM at a critical moment in log management.
- *
- * We unconditionally force the filesystem into an ABORT|READONLY state,
- * unless the error response on the fs has been set to panic in which
- * case we take the easy way out and panic immediately.
- */
-
-void __ext4_abort(struct super_block *sb, const char *function,
- unsigned int line, int error, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
- return;
-
- save_error_info(sb, error, 0, 0, function, line);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
- sb->s_id, function, line, &vaf);
- va_end(args);
-
- if (sb_rdonly(sb) == 0) {
- ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
- if (EXT4_SB(sb)->s_journal)
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
-
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- /*
- * Make sure updated value of ->s_mount_flags will be visible
- * before ->s_flags update
- */
- smp_wmb();
- sb->s_flags |= SB_RDONLY;
- }
- if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
- panic("EXT4-fs panic from previous error\n");
+ ext4_handle_error(sb, false);
}

void __ext4_msg(struct super_block *sb,
@@ -1007,7 +967,7 @@ __acquires(bitlock)

ext4_unlock_group(sb, grp);
ext4_commit_super(sb, 1);
- ext4_handle_error(sb);
+ ext4_handle_error(sb, false);
/*
* We only get here in the ERRORS_RO case; relocking the group
* may be dangerous, but nothing bad will happen since the
--
2.16.4

2020-11-27 11:36:39

by Jan Kara

[permalink] [raw]
Subject: [PATCH 02/12] ext4: Remove redundant sb checksum recomputation

Superblock is written out either through ext4_commit_super() or through
ext4_handle_dirty_super(). In both cases we recompute the checksum so it
is not necessary to recompute it after updating superblock free inodes &
blocks counters.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/super.c | 2 --
1 file changed, 2 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2b08b162075c..61e6e5f156f3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5004,13 +5004,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
block = ext4_count_free_clusters(sb);
ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block));
- ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
GFP_KERNEL);
if (!err) {
unsigned long freei = ext4_count_free_inodes(sb);
sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
- ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
--
2.16.4

2020-11-27 11:36:53

by Jan Kara

[permalink] [raw]
Subject: [PATCH 05/12] ext4: Move functions in super.c

Just move error info related functions in super.c close to
ext4_handle_error(). We'll want to combine save_error_info() with
ext4_handle_error() and this makes change more obvious and saves a
forward declaration as well. No functional change.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/super.c | 196 ++++++++++++++++++++++++++++----------------------------
1 file changed, 98 insertions(+), 98 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dddaadc55475..7948c07d0a90 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -423,104 +423,6 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
#define ext4_get_tstamp(es, tstamp) \
__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)

-static void __save_error_info(struct super_block *sb, int error,
- __u32 ino, __u64 block,
- const char *func, unsigned int line)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- int err;
-
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- if (bdev_read_only(sb->s_bdev))
- return;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_update_tstamp(es, s_last_error_time);
- strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
- es->s_last_error_line = cpu_to_le32(line);
- es->s_last_error_ino = cpu_to_le32(ino);
- es->s_last_error_block = cpu_to_le64(block);
- switch (error) {
- case EIO:
- err = EXT4_ERR_EIO;
- break;
- case ENOMEM:
- err = EXT4_ERR_ENOMEM;
- break;
- case EFSBADCRC:
- err = EXT4_ERR_EFSBADCRC;
- break;
- case 0:
- case EFSCORRUPTED:
- err = EXT4_ERR_EFSCORRUPTED;
- break;
- case ENOSPC:
- err = EXT4_ERR_ENOSPC;
- break;
- case ENOKEY:
- err = EXT4_ERR_ENOKEY;
- break;
- case EROFS:
- err = EXT4_ERR_EROFS;
- break;
- case EFBIG:
- err = EXT4_ERR_EFBIG;
- break;
- case EEXIST:
- err = EXT4_ERR_EEXIST;
- break;
- case ERANGE:
- err = EXT4_ERR_ERANGE;
- break;
- case EOVERFLOW:
- err = EXT4_ERR_EOVERFLOW;
- break;
- case EBUSY:
- err = EXT4_ERR_EBUSY;
- break;
- case ENOTDIR:
- err = EXT4_ERR_ENOTDIR;
- break;
- case ENOTEMPTY:
- err = EXT4_ERR_ENOTEMPTY;
- break;
- case ESHUTDOWN:
- err = EXT4_ERR_ESHUTDOWN;
- break;
- case EFAULT:
- err = EXT4_ERR_EFAULT;
- break;
- default:
- err = EXT4_ERR_UNKNOWN;
- }
- es->s_last_error_errcode = err;
- if (!es->s_first_error_time) {
- es->s_first_error_time = es->s_last_error_time;
- es->s_first_error_time_hi = es->s_last_error_time_hi;
- strncpy(es->s_first_error_func, func,
- sizeof(es->s_first_error_func));
- es->s_first_error_line = cpu_to_le32(line);
- es->s_first_error_ino = es->s_last_error_ino;
- es->s_first_error_block = es->s_last_error_block;
- es->s_first_error_errcode = es->s_last_error_errcode;
- }
- /*
- * Start the daily error reporting function if it hasn't been
- * started already
- */
- if (!es->s_error_count)
- mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
- le32_add_cpu(&es->s_error_count, 1);
-}
-
-static void save_error_info(struct super_block *sb, int error,
- __u32 ino, __u64 block,
- const char *func, unsigned int line)
-{
- __save_error_info(sb, error, ino, block, func, line);
- if (!bdev_read_only(sb->s_bdev))
- ext4_commit_super(sb, 1);
-}
-
/*
* The del_gendisk() function uninitializes the disk-specific data
* structures, including the bdi structure, without telling anyone
@@ -649,6 +551,104 @@ static bool system_going_down(void)
|| system_state == SYSTEM_RESTART;
}

+static void __save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
+{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ int err;
+
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ if (bdev_read_only(sb->s_bdev))
+ return;
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+ ext4_update_tstamp(es, s_last_error_time);
+ strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
+ es->s_last_error_line = cpu_to_le32(line);
+ es->s_last_error_ino = cpu_to_le32(ino);
+ es->s_last_error_block = cpu_to_le64(block);
+ switch (error) {
+ case EIO:
+ err = EXT4_ERR_EIO;
+ break;
+ case ENOMEM:
+ err = EXT4_ERR_ENOMEM;
+ break;
+ case EFSBADCRC:
+ err = EXT4_ERR_EFSBADCRC;
+ break;
+ case 0:
+ case EFSCORRUPTED:
+ err = EXT4_ERR_EFSCORRUPTED;
+ break;
+ case ENOSPC:
+ err = EXT4_ERR_ENOSPC;
+ break;
+ case ENOKEY:
+ err = EXT4_ERR_ENOKEY;
+ break;
+ case EROFS:
+ err = EXT4_ERR_EROFS;
+ break;
+ case EFBIG:
+ err = EXT4_ERR_EFBIG;
+ break;
+ case EEXIST:
+ err = EXT4_ERR_EEXIST;
+ break;
+ case ERANGE:
+ err = EXT4_ERR_ERANGE;
+ break;
+ case EOVERFLOW:
+ err = EXT4_ERR_EOVERFLOW;
+ break;
+ case EBUSY:
+ err = EXT4_ERR_EBUSY;
+ break;
+ case ENOTDIR:
+ err = EXT4_ERR_ENOTDIR;
+ break;
+ case ENOTEMPTY:
+ err = EXT4_ERR_ENOTEMPTY;
+ break;
+ case ESHUTDOWN:
+ err = EXT4_ERR_ESHUTDOWN;
+ break;
+ case EFAULT:
+ err = EXT4_ERR_EFAULT;
+ break;
+ default:
+ err = EXT4_ERR_UNKNOWN;
+ }
+ es->s_last_error_errcode = err;
+ if (!es->s_first_error_time) {
+ es->s_first_error_time = es->s_last_error_time;
+ es->s_first_error_time_hi = es->s_last_error_time_hi;
+ strncpy(es->s_first_error_func, func,
+ sizeof(es->s_first_error_func));
+ es->s_first_error_line = cpu_to_le32(line);
+ es->s_first_error_ino = es->s_last_error_ino;
+ es->s_first_error_block = es->s_last_error_block;
+ es->s_first_error_errcode = es->s_last_error_errcode;
+ }
+ /*
+ * Start the daily error reporting function if it hasn't been
+ * started already
+ */
+ if (!es->s_error_count)
+ mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
+ le32_add_cpu(&es->s_error_count, 1);
+}
+
+static void save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
+{
+ __save_error_info(sb, error, ino, block, func, line);
+ if (!bdev_read_only(sb->s_bdev))
+ ext4_commit_super(sb, 1);
+}
+
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
*
--
2.16.4

2020-11-27 11:37:34

by Jan Kara

[permalink] [raw]
Subject: [PATCH 09/12] ext4: Drop sync argument of ext4_commit_super()

Everybody passes 1 as sync argument of ext4_commit_super(). Just drop
it.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/super.c | 47 ++++++++++++++++++++++-------------------------
1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 73a09b73fc11..aae12ea1466a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -65,7 +65,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_show_options(struct seq_file *seq, struct dentry *root);
-static int ext4_commit_super(struct super_block *sb, int sync);
+static int ext4_commit_super(struct super_block *sb);
static int ext4_mark_recovery_complete(struct super_block *sb,
struct ext4_super_block *es);
static int ext4_clear_journal_err(struct super_block *sb,
@@ -621,7 +621,7 @@ static void save_error_info(struct super_block *sb, int error,
{
__save_error_info(sb, error, ino, block, func, line);
if (!bdev_read_only(sb->s_bdev))
- ext4_commit_super(sb, 1);
+ ext4_commit_super(sb);
}

/* Deal with the reporting of failure conditions on a filesystem such as
@@ -686,7 +686,7 @@ static void flush_stashed_error_work(struct work_struct *work)
struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
s_error_work);

- ext4_commit_super(sbi->s_sb, 1);
+ ext4_commit_super(sbi->s_sb);
}

#define ext4_error_ratelimit(sb) \
@@ -1151,7 +1151,7 @@ static void ext4_put_super(struct super_block *sb)
es->s_state = cpu_to_le16(sbi->s_mount_state);
}
if (!sb_rdonly(sb))
- ext4_commit_super(sb, 1);
+ ext4_commit_super(sb);

rcu_read_lock();
group_desc = rcu_dereference(sbi->s_group_desc);
@@ -2641,7 +2641,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
if (sbi->s_journal)
ext4_set_feature_journal_needs_recovery(sb);

- err = ext4_commit_super(sb, 1);
+ err = ext4_commit_super(sb);
done:
if (test_opt(sb, DEBUG))
printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
@@ -4862,7 +4862,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
!ext4_has_feature_encrypt(sb)) {
ext4_set_feature_encrypt(sb);
- ext4_commit_super(sb, 1);
+ ext4_commit_super(sb);
}

/*
@@ -5415,7 +5415,7 @@ static int ext4_load_journal(struct super_block *sb,
es->s_journal_dev = cpu_to_le32(journal_devnum);

/* Make sure we flush the recovery flag to disk. */
- ext4_commit_super(sb, 1);
+ ext4_commit_super(sb);
}

return 0;
@@ -5425,7 +5425,7 @@ static int ext4_load_journal(struct super_block *sb,
return err;
}

-static int ext4_commit_super(struct super_block *sb, int sync)
+static int ext4_commit_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -5502,8 +5502,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)

BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
- if (sync)
- lock_buffer(sbh);
+ lock_buffer(sbh);
if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
/*
* Oh, dear. A previous attempt to write the
@@ -5519,16 +5518,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)
set_buffer_uptodate(sbh);
}
mark_buffer_dirty(sbh);
- if (sync) {
- unlock_buffer(sbh);
- error = __sync_dirty_buffer(sbh,
- REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
- if (buffer_write_io_error(sbh)) {
- ext4_msg(sb, KERN_ERR, "I/O error while writing "
- "superblock");
- clear_buffer_write_io_error(sbh);
- set_buffer_uptodate(sbh);
- }
+ unlock_buffer(sbh);
+ error = __sync_dirty_buffer(sbh,
+ REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
+ if (buffer_write_io_error(sbh)) {
+ ext4_msg(sb, KERN_ERR, "I/O error while writing "
+ "superblock");
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
}
return error;
}
@@ -5559,7 +5556,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb,

if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
ext4_clear_feature_journal_needs_recovery(sb);
- ext4_commit_super(sb, 1);
+ ext4_commit_super(sb);
}
out:
jbd2_journal_unlock_updates(journal);
@@ -5601,7 +5598,7 @@ static int ext4_clear_journal_err(struct super_block *sb,

EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_commit_super(sb, 1);
+ ext4_commit_super(sb);

jbd2_journal_clear_err(journal);
jbd2_journal_update_sb_errno(journal);
@@ -5703,7 +5700,7 @@ static int ext4_freeze(struct super_block *sb)
ext4_clear_feature_journal_needs_recovery(sb);
}

- error = ext4_commit_super(sb, 1);
+ error = ext4_commit_super(sb);
out:
if (journal)
/* we rely on upper layer to stop further updates */
@@ -5725,7 +5722,7 @@ static int ext4_unfreeze(struct super_block *sb)
ext4_set_feature_journal_needs_recovery(sb);
}

- ext4_commit_super(sb, 1);
+ ext4_commit_super(sb);
return 0;
}

@@ -5985,7 +5982,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}

if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
- err = ext4_commit_super(sb, 1);
+ err = ext4_commit_super(sb);
if (err)
goto restore_opts;
}
--
2.16.4

2020-11-27 11:37:35

by Jan Kara

[permalink] [raw]
Subject: [PATCH 07/12] ext4: Defer saving error info from atomic context

When filesystem inconsistency is detected with group locked, we
currently try to modify superblock to store error there without
blocking. However this can cause superblock checksum failures (or
DIF/DIX failure) when the superblock is just being written out.

Make error handling code just store error information in ext4_sb_info
structure and copy it to on-disk superblock only in ext4_commit_super().
In case of error happening with group locked, we just postpone the
superblock flushing to a workqueue.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/ext4.h | 21 +++++++++++
fs/ext4/super.c | 115 ++++++++++++++++++++++++++++++++++++++------------------
2 files changed, 99 insertions(+), 37 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e67291c4a10b..09959ee0c9e2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1619,6 +1619,27 @@ struct ext4_sb_info {
errseq_t s_bdev_wb_err;
spinlock_t s_bdev_wb_lock;

+ /* Information about errors that happened during this mount */
+ spinlock_t s_error_lock;
+ int s_add_error_count;
+ int s_first_error_code;
+ __u32 s_first_error_line;
+ __u32 s_first_error_ino;
+ __u64 s_first_error_block;
+ const char *s_first_error_func;
+ time64_t s_first_error_time;
+ int s_last_error_code;
+ __u32 s_last_error_line;
+ __u32 s_last_error_ino;
+ __u64 s_last_error_block;
+ const char *s_last_error_func;
+ time64_t s_last_error_time;
+ /*
+ * If we are in a context where we cannot update error information in
+ * the on-disk superblock, we queue this work to do it.
+ */
+ struct work_struct s_error_work;
+
/* Ext4 fast commit stuff */
atomic_t s_fc_subtid;
atomic_t s_fc_ineligible_updates;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8add06d08495..2d7dc0908cdd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -404,10 +404,8 @@ void ext4_itable_unused_set(struct super_block *sb,
bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
}

-static void __ext4_update_tstamp(__le32 *lo, __u8 *hi)
+static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
{
- time64_t now = ktime_get_real_seconds();
-
now = clamp_val(now, 0, (1ull << 40) - 1);

*lo = cpu_to_le32(lower_32_bits(now));
@@ -419,7 +417,8 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
}
#define ext4_update_tstamp(es, tstamp) \
- __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
+ __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
+ ktime_get_real_seconds())
#define ext4_get_tstamp(es, tstamp) \
__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)

@@ -591,7 +590,7 @@ static void __save_error_info(struct super_block *sb, int error,
__u32 ino, __u64 block,
const char *func, unsigned int line)
{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);

EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
if (bdev_read_only(sb->s_bdev))
@@ -599,30 +598,24 @@ static void __save_error_info(struct super_block *sb, int error,
/* We default to EFSCORRUPTED error... */
if (error == 0)
error = EFSCORRUPTED;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_update_tstamp(es, s_last_error_time);
- strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
- es->s_last_error_line = cpu_to_le32(line);
- es->s_last_error_ino = cpu_to_le32(ino);
- es->s_last_error_block = cpu_to_le64(block);
- es->s_last_error_errcode = ext4_errno_to_code(error);
- if (!es->s_first_error_time) {
- es->s_first_error_time = es->s_last_error_time;
- es->s_first_error_time_hi = es->s_last_error_time_hi;
- strncpy(es->s_first_error_func, func,
- sizeof(es->s_first_error_func));
- es->s_first_error_line = cpu_to_le32(line);
- es->s_first_error_ino = es->s_last_error_ino;
- es->s_first_error_block = es->s_last_error_block;
- es->s_first_error_errcode = es->s_last_error_errcode;
- }
- /*
- * Start the daily error reporting function if it hasn't been
- * started already
- */
- if (!es->s_error_count)
- mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
- le32_add_cpu(&es->s_error_count, 1);
+
+ spin_lock(&sbi->s_error_lock);
+ sbi->s_add_error_count++;
+ sbi->s_last_error_code = error;
+ sbi->s_last_error_line = line;
+ sbi->s_last_error_ino = ino;
+ sbi->s_last_error_block = block;
+ sbi->s_last_error_func = func;
+ sbi->s_last_error_time = ktime_get_real_seconds();
+ if (!sbi->s_first_error_time) {
+ sbi->s_first_error_code = error;
+ sbi->s_first_error_line = line;
+ sbi->s_first_error_ino = ino;
+ sbi->s_first_error_block = block;
+ sbi->s_first_error_func = func;
+ sbi->s_first_error_time = sbi->s_last_error_time;
+ }
+ spin_unlock(&sbi->s_error_lock);
}

static void save_error_info(struct super_block *sb, int error,
@@ -685,6 +678,14 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro)
sb->s_flags |= SB_RDONLY;
}

+static void flush_stashed_error_work(struct work_struct *work)
+{
+ struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
+ s_error_work);
+
+ ext4_commit_super(sbi->s_sb, 1);
+}
+
#define ext4_error_ratelimit(sb) \
___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
"EXT4-fs error")
@@ -925,8 +926,6 @@ __acquires(bitlock)
return;

trace_ext4_error(sb, function, line);
- __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
-
if (ext4_error_ratelimit(sb)) {
va_start(args, fmt);
vaf.fmt = fmt;
@@ -942,16 +941,15 @@ __acquires(bitlock)
va_end(args);
}

- if (test_opt(sb, WARN_ON_ERROR))
- WARN_ON_ONCE(1);
-
if (test_opt(sb, ERRORS_CONT)) {
- ext4_commit_super(sb, 0);
+ if (test_opt(sb, WARN_ON_ERROR))
+ WARN_ON_ONCE(1);
+ __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
+ schedule_work(&EXT4_SB(sb)->s_error_work);
return;
}
-
ext4_unlock_group(sb, grp);
- ext4_commit_super(sb, 1);
+ save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
ext4_handle_error(sb, false);
/*
* We only get here in the ERRORS_RO case; relocking the group
@@ -1124,6 +1122,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_unregister_li_request(sb);
ext4_quota_off_umount(sb);

+ flush_work(&sbi->s_error_work);
destroy_workqueue(sbi->rsv_conversion_wq);

/*
@@ -4661,6 +4660,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}

timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
+ spin_lock_init(&sbi->s_error_lock);
+ INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);

/* Register extent status tree shrinker */
if (ext4_es_register_shrinker(sbi))
@@ -5427,6 +5428,7 @@ static int ext4_load_journal(struct super_block *sb,

static int ext4_commit_super(struct super_block *sb, int sync)
{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
@@ -5463,6 +5465,42 @@ static int ext4_commit_super(struct super_block *sb, int sync)
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
+ /* Copy error information to the on-disk superblock */
+ spin_lock(&sbi->s_error_lock);
+ if (sbi->s_add_error_count > 0) {
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+ __ext4_update_tstamp(&es->s_first_error_time,
+ &es->s_first_error_time_hi,
+ sbi->s_first_error_time);
+ strncpy(es->s_first_error_func, sbi->s_first_error_func,
+ sizeof(es->s_first_error_func));
+ es->s_first_error_line = cpu_to_le32(sbi->s_first_error_line);
+ es->s_first_error_ino = cpu_to_le32(sbi->s_first_error_ino);
+ es->s_first_error_block = cpu_to_le64(sbi->s_first_error_block);
+ es->s_first_error_errcode =
+ ext4_errno_to_code(sbi->s_first_error_code);
+
+ __ext4_update_tstamp(&es->s_last_error_time,
+ &es->s_last_error_time_hi,
+ sbi->s_last_error_time);
+ strncpy(es->s_last_error_func, sbi->s_last_error_func,
+ sizeof(es->s_last_error_func));
+ es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
+ es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
+ es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
+ es->s_last_error_errcode =
+ ext4_errno_to_code(sbi->s_last_error_code);
+ /*
+ * Start the daily error reporting function if it hasn't been
+ * started already
+ */
+ if (!es->s_error_count)
+ mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
+ le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
+ sbi->s_add_error_count = 0;
+ }
+ spin_unlock(&sbi->s_error_lock);
+
BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
if (sync)
@@ -5816,6 +5854,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}

+ /* Flush outstanding errors before changing fs state */
+ flush_work(&sbi->s_error_work);
+
if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
err = -EROFS;
--
2.16.4

2020-11-27 11:37:48

by Jan Kara

[permalink] [raw]
Subject: [PATCH 11/12] ext4: Save error info to sb through journal if available

If journalling is still working at the moment we get to writing error
information to the superblock we cannot write directly to the superblock
as such write could race with journalled update of the superblock and
cause journal checksum failures, writing inconsistent information to the
journal or other problems. We cannot journal the superblock directly
from the error handling functions as we are running in uncertain context
and could deadlock so just punt journalled superblock update to a
workqueue.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/super.c | 97 ++++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 72 insertions(+), 25 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb1d61ed7dff..51670258ef84 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -65,6 +65,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_show_options(struct seq_file *seq, struct dentry *root);
+static void ext4_update_super(struct super_block *sb);
static int ext4_commit_super(struct super_block *sb);
static int ext4_mark_recovery_complete(struct super_block *sb,
struct ext4_super_block *es);
@@ -586,9 +587,9 @@ static int ext4_errno_to_code(int errno)
return EXT4_ERR_UNKNOWN;
}

-static void __save_error_info(struct super_block *sb, int error,
- __u32 ino, __u64 block,
- const char *func, unsigned int line)
+static void save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);

@@ -615,15 +616,6 @@ static void __save_error_info(struct super_block *sb, int error,
spin_unlock(&sbi->s_error_lock);
}

-static void save_error_info(struct super_block *sb, int error,
- __u32 ino, __u64 block,
- const char *func, unsigned int line)
-{
- __save_error_info(sb, error, ino, block, func, line);
- if (!bdev_read_only(sb->s_bdev))
- ext4_commit_super(sb);
-}
-
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
*
@@ -649,20 +641,35 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
const char *func, unsigned int line)
{
journal_t *journal = EXT4_SB(sb)->s_journal;
+ bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);

EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);

- if (!bdev_read_only(sb->s_bdev))
+ if (!continue_fs && !sb_rdonly(sb)) {
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+ if (journal)
+ jbd2_journal_abort(journal, -EIO);
+ }
+
+ if (!bdev_read_only(sb->s_bdev)) {
save_error_info(sb, error, ino, block, func, line);
+ /*
+ * In case the fs should keep running, we need to writeout
+ * superblock through the journal. Due to lock ordering
+ * constraints, it may not be safe to do it right here so we
+ * defer superblock flushing to a workqueue.
+ */
+ if (continue_fs)
+ schedule_work(&EXT4_SB(sb)->s_error_work);
+ else
+ ext4_commit_super(sb);
+ }

- if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT)))
+ if (sb_rdonly(sb) || continue_fs)
return;

- ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
- if (journal)
- jbd2_journal_abort(journal, -EIO);
/*
* We force ERRORS_RO behavior when system is rebooting. Otherwise we
* could panic during 'reboot -f' as the underlying device got already
@@ -685,7 +692,38 @@ static void flush_stashed_error_work(struct work_struct *work)
{
struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
s_error_work);
+ journal_t *journal = sbi->s_journal;
+ handle_t *handle;

+ /*
+ * If the journal is still running, we have to write out superblock
+ * through the journal to avoid collisions of other journalled sb
+ * updates.
+ *
+ * We use directly jbd2 functions here to avoid recursing back into
+ * ext4 error handling code during handling of previous errors.
+ */
+ if (!sb_rdonly(sbi->s_sb) && journal) {
+ handle = jbd2_journal_start(journal, 1);
+ if (IS_ERR(handle))
+ goto write_directly;
+ if (jbd2_journal_get_write_access(handle, sbi->s_sbh)) {
+ jbd2_journal_stop(handle);
+ goto write_directly;
+ }
+ ext4_update_super(sbi->s_sb);
+ if (jbd2_journal_dirty_metadata(handle, sbi->s_sbh)) {
+ jbd2_journal_stop(handle);
+ goto write_directly;
+ }
+ jbd2_journal_stop(handle);
+ return;
+ }
+write_directly:
+ /*
+ * Write through journal failed. Write sb directly to get error info
+ * out and hope for the best.
+ */
ext4_commit_super(sbi->s_sb);
}

@@ -944,7 +982,7 @@ __acquires(bitlock)
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
+ save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
schedule_work(&EXT4_SB(sb)->s_error_work);
return;
}
@@ -5425,15 +5463,12 @@ static int ext4_load_journal(struct super_block *sb,
return err;
}

-static int ext4_commit_super(struct super_block *sb)
+/* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
+static void ext4_update_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
- int error = 0;
-
- if (!sbh || block_device_ejected(sb))
- return error;

lock_buffer(sbh);
/*
@@ -5501,8 +5536,20 @@ static int ext4_commit_super(struct super_block *sb)
}
spin_unlock(&sbi->s_error_lock);

- BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
+ unlock_buffer(sbh);
+}
+
+static int ext4_commit_super(struct super_block *sb)
+{
+ struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
+ int error = 0;
+
+ if (!sbh || block_device_ejected(sb))
+ return error;
+
+ ext4_update_super(sb);
+
if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
/*
* Oh, dear. A previous attempt to write the
@@ -5517,8 +5564,8 @@ static int ext4_commit_super(struct super_block *sb)
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
}
+ BUFFER_TRACE(sbh, "marking dirty");
mark_buffer_dirty(sbh);
- unlock_buffer(sbh);
error = __sync_dirty_buffer(sbh,
REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
if (buffer_write_io_error(sbh)) {
--
2.16.4

2020-11-27 11:37:52

by Jan Kara

[permalink] [raw]
Subject: [PATCH 08/12] ext4: Combine ext4_handle_error() and save_error_info()

save_error_info() is always called together with ext4_handle_error().
Combine them into a single call and move unconditional bits out of
save_error_info() into ext4_handle_error().

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/super.c | 31 +++++++++++++++----------------
1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2d7dc0908cdd..73a09b73fc11 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -592,9 +592,6 @@ static void __save_error_info(struct super_block *sb, int error,
{
struct ext4_sb_info *sbi = EXT4_SB(sb);

- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- if (bdev_read_only(sb->s_bdev))
- return;
/* We default to EFSCORRUPTED error... */
if (error == 0)
error = EFSCORRUPTED;
@@ -647,13 +644,19 @@ static void save_error_info(struct super_block *sb, int error,
* used to deal with unrecoverable failures such as journal IO errors or ENOMEM
* at a critical moment in log management.
*/
-static void ext4_handle_error(struct super_block *sb, bool force_ro)
+static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
{
journal_t *journal = EXT4_SB(sb)->s_journal;

+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);

+ if (!bdev_read_only(sb->s_bdev))
+ save_error_info(sb, error, ino, block, func, line);
+
if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT)))
return;

@@ -710,8 +713,7 @@ void __ext4_error(struct super_block *sb, const char *function,
sb->s_id, function, line, current->comm, &vaf);
va_end(args);
}
- save_error_info(sb, error, 0, block, function, line);
- ext4_handle_error(sb, force_ro);
+ ext4_handle_error(sb, force_ro, error, 0, block, function, line);
}

void __ext4_error_inode(struct inode *inode, const char *function,
@@ -741,9 +743,8 @@ void __ext4_error_inode(struct inode *inode, const char *function,
current->comm, &vaf);
va_end(args);
}
- save_error_info(inode->i_sb, error, inode->i_ino, block,
- function, line);
- ext4_handle_error(inode->i_sb, false);
+ ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
+ function, line);
}

void __ext4_error_file(struct file *file, const char *function,
@@ -780,9 +781,8 @@ void __ext4_error_file(struct file *file, const char *function,
current->comm, path, &vaf);
va_end(args);
}
- save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
- function, line);
- ext4_handle_error(inode->i_sb, false);
+ ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
+ function, line);
}

const char *ext4_decode_error(struct super_block *sb, int errno,
@@ -849,8 +849,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
sb->s_id, function, line, errstr);
}

- save_error_info(sb, -errno, 0, 0, function, line);
- ext4_handle_error(sb, false);
+ ext4_handle_error(sb, false, -errno, 0, 0, function, line);
}

void __ext4_msg(struct super_block *sb,
@@ -944,13 +943,13 @@ __acquires(bitlock)
if (test_opt(sb, ERRORS_CONT)) {
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
__save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
schedule_work(&EXT4_SB(sb)->s_error_work);
return;
}
ext4_unlock_group(sb, grp);
- save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
- ext4_handle_error(sb, false);
+ ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
/*
* We only get here in the ERRORS_RO case; relocking the group
* may be dangerous, but nothing bad will happen since the
--
2.16.4

2020-11-27 11:38:25

by Jan Kara

[permalink] [raw]
Subject: [PATCH 06/12] ext4: Simplify ext4 error translation

We convert errno's to ext4 on-disk format error codes in
save_error_info(). Add a function and a bit of macro magic to make this
simpler.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/super.c | 95 ++++++++++++++++++++++++---------------------------------
1 file changed, 40 insertions(+), 55 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7948c07d0a90..8add06d08495 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -551,76 +551,61 @@ static bool system_going_down(void)
|| system_state == SYSTEM_RESTART;
}

+struct ext4_err_translation {
+ int code;
+ int errno;
+};
+
+#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
+
+static struct ext4_err_translation err_translation[] = {
+ EXT4_ERR_TRANSLATE(EIO),
+ EXT4_ERR_TRANSLATE(ENOMEM),
+ EXT4_ERR_TRANSLATE(EFSBADCRC),
+ EXT4_ERR_TRANSLATE(EFSCORRUPTED),
+ EXT4_ERR_TRANSLATE(ENOSPC),
+ EXT4_ERR_TRANSLATE(ENOKEY),
+ EXT4_ERR_TRANSLATE(EROFS),
+ EXT4_ERR_TRANSLATE(EFBIG),
+ EXT4_ERR_TRANSLATE(EEXIST),
+ EXT4_ERR_TRANSLATE(ERANGE),
+ EXT4_ERR_TRANSLATE(EOVERFLOW),
+ EXT4_ERR_TRANSLATE(EBUSY),
+ EXT4_ERR_TRANSLATE(ENOTDIR),
+ EXT4_ERR_TRANSLATE(ENOTEMPTY),
+ EXT4_ERR_TRANSLATE(ESHUTDOWN),
+ EXT4_ERR_TRANSLATE(EFAULT),
+};
+
+static int ext4_errno_to_code(int errno)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(err_translation); i++)
+ if (err_translation[i].errno == errno)
+ return err_translation[i].code;
+ return EXT4_ERR_UNKNOWN;
+}
+
static void __save_error_info(struct super_block *sb, int error,
__u32 ino, __u64 block,
const char *func, unsigned int line)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- int err;

EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
if (bdev_read_only(sb->s_bdev))
return;
+ /* We default to EFSCORRUPTED error... */
+ if (error == 0)
+ error = EFSCORRUPTED;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_update_tstamp(es, s_last_error_time);
strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
es->s_last_error_line = cpu_to_le32(line);
es->s_last_error_ino = cpu_to_le32(ino);
es->s_last_error_block = cpu_to_le64(block);
- switch (error) {
- case EIO:
- err = EXT4_ERR_EIO;
- break;
- case ENOMEM:
- err = EXT4_ERR_ENOMEM;
- break;
- case EFSBADCRC:
- err = EXT4_ERR_EFSBADCRC;
- break;
- case 0:
- case EFSCORRUPTED:
- err = EXT4_ERR_EFSCORRUPTED;
- break;
- case ENOSPC:
- err = EXT4_ERR_ENOSPC;
- break;
- case ENOKEY:
- err = EXT4_ERR_ENOKEY;
- break;
- case EROFS:
- err = EXT4_ERR_EROFS;
- break;
- case EFBIG:
- err = EXT4_ERR_EFBIG;
- break;
- case EEXIST:
- err = EXT4_ERR_EEXIST;
- break;
- case ERANGE:
- err = EXT4_ERR_ERANGE;
- break;
- case EOVERFLOW:
- err = EXT4_ERR_EOVERFLOW;
- break;
- case EBUSY:
- err = EXT4_ERR_EBUSY;
- break;
- case ENOTDIR:
- err = EXT4_ERR_ENOTDIR;
- break;
- case ENOTEMPTY:
- err = EXT4_ERR_ENOTEMPTY;
- break;
- case ESHUTDOWN:
- err = EXT4_ERR_ESHUTDOWN;
- break;
- case EFAULT:
- err = EXT4_ERR_EFAULT;
- break;
- default:
- err = EXT4_ERR_UNKNOWN;
- }
- es->s_last_error_errcode = err;
+ es->s_last_error_errcode = ext4_errno_to_code(error);
if (!es->s_first_error_time) {
es->s_first_error_time = es->s_last_error_time;
es->s_first_error_time_hi = es->s_last_error_time_hi;
--
2.16.4

2020-11-27 11:39:19

by Jan Kara

[permalink] [raw]
Subject: [PATCH 10/12] ext4: Protect superblock modifications with a buffer lock

Protect all superblock modifications (including checksum computation)
with a superblock buffer lock. That way we are sure computed checksum
matches current superblock contents (a mismatch could cause checksum
failures in nojournal mode or if an unjournalled superblock update races
with a journalled one). Also we avoid modifying superblock contents
while it is being written out (which can cause DIF/DIX failures if we
are running in nojournal mode).

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/ext4_jbd2.c | 1 -
fs/ext4/file.c | 3 +++
fs/ext4/inode.c | 3 +++
fs/ext4/namei.c | 6 ++++++
fs/ext4/resize.c | 12 ++++++++++++
fs/ext4/super.c | 2 +-
fs/ext4/xattr.c | 3 +++
7 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 1a0a827a7f34..c7e410c4ab7d 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -379,7 +379,6 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
int err = 0;

- ext4_superblock_csum_set(sb);
if (ext4_handle_valid(handle)) {
err = jbd2_journal_dirty_metadata(handle, bh);
if (err)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3ed8c048fb12..26907d5835d0 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -809,8 +809,11 @@ static int ext4_sample_last_mounted(struct super_block *sb,
err = ext4_journal_get_write_access(handle, sbi->s_sbh);
if (err)
goto out_journal;
+ lock_buffer(sbi->s_sbh);
strlcpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted));
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(sbi->s_sbh);
ext4_handle_dirty_super(handle, sb);
out_journal:
ext4_journal_stop(handle);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3a39fa0d6a3a..72534319fae5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5141,7 +5141,10 @@ static int ext4_do_update_inode(handle_t *handle,
err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
if (err)
goto out_brelse;
+ lock_buffer(EXT4_SB(sb)->s_sbh);
ext4_set_feature_large_file(sb);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(EXT4_SB(sb)->s_sbh);
ext4_handle_sync(handle);
err = ext4_handle_dirty_super(handle, sb);
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 33509266f5a0..d804505e1a32 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2982,7 +2982,10 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
(le32_to_cpu(sbi->s_es->s_inodes_count))) {
/* Insert this inode at the head of the on-disk orphan list */
NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
+ lock_buffer(sbi->s_sbh);
sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(sbi->s_sbh);
dirty = true;
}
list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
@@ -3065,7 +3068,10 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
mutex_unlock(&sbi->s_orphan_lock);
goto out_brelse;
}
+ lock_buffer(sbi->s_sbh);
sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
+ ext4_superblock_csum_set(inode->i_sb);
+ unlock_buffer(sbi->s_sbh);
mutex_unlock(&sbi->s_orphan_lock);
err = ext4_handle_dirty_super(handle, inode->i_sb);
} else {
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 928700d57eb6..6155f2b9538c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -899,7 +899,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
EXT4_SB(sb)->s_gdb_count++;
ext4_kvfree_array_rcu(o_group_desc);

+ lock_buffer(EXT4_SB(sb)->s_sbh);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(EXT4_SB(sb)->s_sbh);
err = ext4_handle_dirty_super(handle, sb);
if (err)
ext4_std_error(sb, err);
@@ -1384,6 +1387,7 @@ static void ext4_update_super(struct super_block *sb,
reserved_blocks *= blocks_count;
do_div(reserved_blocks, 100);

+ lock_buffer(sbi->s_sbh);
ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
@@ -1421,6 +1425,8 @@ static void ext4_update_super(struct super_block *sb,
* active. */
ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
reserved_blocks);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(sbi->s_sbh);

/* Update the free space counts */
percpu_counter_add(&sbi->s_freeclusters_counter,
@@ -1717,8 +1723,11 @@ static int ext4_group_extend_no_check(struct super_block *sb,
goto errout;
}

+ lock_buffer(EXT4_SB(sb)->s_sbh);
ext4_blocks_count_set(es, o_blocks_count + add);
ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(EXT4_SB(sb)->s_sbh);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
/* We add the blocks to the bitmap and set the group need init bit */
@@ -1874,10 +1883,13 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
if (err)
goto errout;

+ lock_buffer(sbi->s_sbh);
ext4_clear_feature_resize_inode(sb);
ext4_set_feature_meta_bg(sb);
sbi->s_es->s_first_meta_bg =
cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count));
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(sbi->s_sbh);

err = ext4_handle_dirty_super(handle, sb);
if (err) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index aae12ea1466a..fb1d61ed7dff 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5435,6 +5435,7 @@ static int ext4_commit_super(struct super_block *sb)
if (!sbh || block_device_ejected(sb))
return error;

+ lock_buffer(sbh);
/*
* If the file system is mounted read-only, don't update the
* superblock write time. This avoids updating the superblock
@@ -5502,7 +5503,6 @@ static int ext4_commit_super(struct super_block *sb)

BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
- lock_buffer(sbh);
if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
/*
* Oh, dear. A previous attempt to write the
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 6127e94ea4f5..ab46aa447baa 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -792,7 +792,10 @@ static void ext4_xattr_update_super_block(handle_t *handle,

BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
+ lock_buffer(EXT4_SB(sb)->s_sbh);
ext4_set_feature_xattr(sb);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(EXT4_SB(sb)->s_sbh);
ext4_handle_dirty_super(handle, sb);
}
}
--
2.16.4

2020-11-27 11:39:22

by Jan Kara

[permalink] [raw]
Subject: [PATCH 12/12] ext4: Use sbi instead of EXT4_SB(sb) in ext4_update_super()

No behavioral change.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/super.c | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 51670258ef84..3030a02304ef 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5467,8 +5467,8 @@ static int ext4_load_journal(struct super_block *sb,
static void ext4_update_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
+ struct ext4_super_block *es = sbi->s_es;
+ struct buffer_head *sbh = sbi->s_sbh;

lock_buffer(sbh);
/*
@@ -5485,21 +5485,20 @@ static void ext4_update_super(struct super_block *sb)
ext4_update_tstamp(es, s_wtime);
if (sb->s_bdev->bd_part)
es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
+ cpu_to_le64(sbi->s_kbytes_written +
((part_stat_read(sb->s_bdev->bd_part,
sectors[STAT_WRITE]) -
- EXT4_SB(sb)->s_sectors_written_start) >> 1));
+ sbi->s_sectors_written_start) >> 1));
else
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
- if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
+ es->s_kbytes_written = cpu_to_le64(sbi->s_kbytes_written);
+ if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
ext4_free_blocks_count_set(es,
- EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeclusters_counter)));
- if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
+ EXT4_C2B(sbi, percpu_counter_sum_positive(
+ &sbi->s_freeclusters_counter)));
+ if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeinodes_counter));
+ &sbi->s_freeinodes_counter));
/* Copy error information to the on-disk superblock */
spin_lock(&sbi->s_error_lock);
if (sbi->s_add_error_count > 0) {
--
2.16.4

2020-11-29 21:36:20

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 01/12] ext4: Don't remount read-only with errors=continue on reboot

On Nov 27, 2020, at 4:33 AM, Jan Kara <[email protected]> wrote:
>
> ext4_handle_error() with errors=continue mount option can accidentally
> remount the filesystem read-only when the system is rebooting. Fix that.
>
> Fixes: 1dc1097ff60e ("ext4: avoid panic during forced reboot")
> Signed-off-by: Jan Kara <[email protected]>

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> fs/ext4/super.c | 14 ++++++--------
> 1 file changed, 6 insertions(+), 8 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 94472044f4c1..2b08b162075c 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -666,19 +666,17 @@ static bool system_going_down(void)
>
> static void ext4_handle_error(struct super_block *sb)
> {
> + journal_t *journal = EXT4_SB(sb)->s_journal;
> +
> if (test_opt(sb, WARN_ON_ERROR))
> WARN_ON_ONCE(1);
>
> - if (sb_rdonly(sb))
> + if (sb_rdonly(sb) || test_opt(sb, ERRORS_CONT))
> return;
>
> - if (!test_opt(sb, ERRORS_CONT)) {
> - journal_t *journal = EXT4_SB(sb)->s_journal;
> -
> - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
> - if (journal)
> - jbd2_journal_abort(journal, -EIO);
> - }
> + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
> + if (journal)
> + jbd2_journal_abort(journal, -EIO);
> /*
> * We force ERRORS_RO behavior when system is rebooting. Otherwise we
> * could panic during 'reboot -f' as the underlying device got already
> --
> 2.16.4
>


Cheers, Andreas






Attachments:
signature.asc (890.00 B)
Message signed with OpenPGP

2020-11-29 21:41:25

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 03/12] ext4: Standardize error message in ext4_protect_reserved_inode()

On Nov 27, 2020, at 4:33 AM, Jan Kara <[email protected]> wrote:
>
> We use __ext4_error() when ext4_protect_reserved_inode() finds
> filesystem corruption. However EXT4_ERROR_INODE_ERR() is perfectly
> capable of reporting all the needed information. So just use that.
>
> Signed-off-by: Jan Kara <[email protected]>

I'm not a big fan of EXT4_ERROR_INODE_ERR() vs. ext4_error_inode_err() and
some of the error macros being upper-case vs. lower case, but that is not
the fault of this patch... :-)

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> fs/ext4/block_validity.c | 10 ++++------
> 1 file changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
> index 8e6ca23ed172..13ffda871227 100644
> --- a/fs/ext4/block_validity.c
> +++ b/fs/ext4/block_validity.c
> @@ -176,12 +176,10 @@ static int ext4_protect_reserved_inode(struct super_block *sb,
> err = add_system_zone(system_blks, map.m_pblk, n, ino);
> if (err < 0) {
> if (err == -EFSCORRUPTED) {
> - __ext4_error(sb, __func__, __LINE__,
> - -err, map.m_pblk,
> - "blocks %llu-%llu from inode %u overlap system zone",
> - map.m_pblk,
> - map.m_pblk + map.m_len - 1,
> - ino);
> + EXT4_ERROR_INODE_ERR(inode, -err,
> + "blocks %llu-%llu from inode overlap system zone",
> + map.m_pblk,
> + map.m_pblk + map.m_len - 1);
> }
> break;
> }
> --
> 2.16.4
>


Cheers, Andreas






Attachments:
signature.asc (890.00 B)
Message signed with OpenPGP

2020-11-29 21:59:17

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 06/12] ext4: Simplify ext4 error translation

On Nov 27, 2020, at 4:33 AM, Jan Kara <[email protected]> wrote:
>
> We convert errno's to ext4 on-disk format error codes in
> save_error_info(). Add a function and a bit of macro magic to make this
> simpler.
>
> Signed-off-by: Jan Kara <[email protected]>

In hindsight, it would have been better (IMHO) if the EXT4_ERR_* values mapped
to the standard x86_64 errors in errno.h, since that is what most admins/users
are familiar with (e.g. 5 = EIO, 12 = ENOMEM, 28 = ENOSPC, 30 = EROFS). That
would avoid the need to look up the EXT4_ERR_* values, since it doesn't look
like dumpe2fs even handles these fields yet.

I guess I never noticed the original patch when it was submitted, so water under
the bridge, I guess...

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> fs/ext4/super.c | 95 ++++++++++++++++++++++++---------------------------------
> 1 file changed, 40 insertions(+), 55 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 7948c07d0a90..8add06d08495 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -551,76 +551,61 @@ static bool system_going_down(void)
> || system_state == SYSTEM_RESTART;
> }
>
> +struct ext4_err_translation {
> + int code;
> + int errno;
> +};
> +
> +#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
> +
> +static struct ext4_err_translation err_translation[] = {
> + EXT4_ERR_TRANSLATE(EIO),
> + EXT4_ERR_TRANSLATE(ENOMEM),
> + EXT4_ERR_TRANSLATE(EFSBADCRC),
> + EXT4_ERR_TRANSLATE(EFSCORRUPTED),
> + EXT4_ERR_TRANSLATE(ENOSPC),
> + EXT4_ERR_TRANSLATE(ENOKEY),
> + EXT4_ERR_TRANSLATE(EROFS),
> + EXT4_ERR_TRANSLATE(EFBIG),
> + EXT4_ERR_TRANSLATE(EEXIST),
> + EXT4_ERR_TRANSLATE(ERANGE),
> + EXT4_ERR_TRANSLATE(EOVERFLOW),
> + EXT4_ERR_TRANSLATE(EBUSY),
> + EXT4_ERR_TRANSLATE(ENOTDIR),
> + EXT4_ERR_TRANSLATE(ENOTEMPTY),
> + EXT4_ERR_TRANSLATE(ESHUTDOWN),
> + EXT4_ERR_TRANSLATE(EFAULT),
> +};
> +
> +static int ext4_errno_to_code(int errno)
> +{
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(err_translation); i++)
> + if (err_translation[i].errno == errno)
> + return err_translation[i].code;
> + return EXT4_ERR_UNKNOWN;
> +}
> +
> static void __save_error_info(struct super_block *sb, int error,
> __u32 ino, __u64 block,
> const char *func, unsigned int line)
> {
> struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> - int err;
>
> EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
> if (bdev_read_only(sb->s_bdev))
> return;
> + /* We default to EFSCORRUPTED error... */
> + if (error == 0)
> + error = EFSCORRUPTED;
> es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
> ext4_update_tstamp(es, s_last_error_time);
> strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
> es->s_last_error_line = cpu_to_le32(line);
> es->s_last_error_ino = cpu_to_le32(ino);
> es->s_last_error_block = cpu_to_le64(block);
> - switch (error) {
> - case EIO:
> - err = EXT4_ERR_EIO;
> - break;
> - case ENOMEM:
> - err = EXT4_ERR_ENOMEM;
> - break;
> - case EFSBADCRC:
> - err = EXT4_ERR_EFSBADCRC;
> - break;
> - case 0:
> - case EFSCORRUPTED:
> - err = EXT4_ERR_EFSCORRUPTED;
> - break;
> - case ENOSPC:
> - err = EXT4_ERR_ENOSPC;
> - break;
> - case ENOKEY:
> - err = EXT4_ERR_ENOKEY;
> - break;
> - case EROFS:
> - err = EXT4_ERR_EROFS;
> - break;
> - case EFBIG:
> - err = EXT4_ERR_EFBIG;
> - break;
> - case EEXIST:
> - err = EXT4_ERR_EEXIST;
> - break;
> - case ERANGE:
> - err = EXT4_ERR_ERANGE;
> - break;
> - case EOVERFLOW:
> - err = EXT4_ERR_EOVERFLOW;
> - break;
> - case EBUSY:
> - err = EXT4_ERR_EBUSY;
> - break;
> - case ENOTDIR:
> - err = EXT4_ERR_ENOTDIR;
> - break;
> - case ENOTEMPTY:
> - err = EXT4_ERR_ENOTEMPTY;
> - break;
> - case ESHUTDOWN:
> - err = EXT4_ERR_ESHUTDOWN;
> - break;
> - case EFAULT:
> - err = EXT4_ERR_EFAULT;
> - break;
> - default:
> - err = EXT4_ERR_UNKNOWN;
> - }
> - es->s_last_error_errcode = err;
> + es->s_last_error_errcode = ext4_errno_to_code(error);
> if (!es->s_first_error_time) {
> es->s_first_error_time = es->s_last_error_time;
> es->s_first_error_time_hi = es->s_last_error_time_hi;
> --
> 2.16.4
>


Cheers, Andreas






Attachments:
signature.asc (890.00 B)
Message signed with OpenPGP

2020-11-29 22:13:59

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 02/12] ext4: Remove redundant sb checksum recomputation

On Nov 27, 2020, at 4:33 AM, Jan Kara <[email protected]> wrote:
>
> Superblock is written out either through ext4_commit_super() or through
> ext4_handle_dirty_super(). In both cases we recompute the checksum so it
> is not necessary to recompute it after updating superblock free inodes &
> blocks counters.

I searched through the code to see where s_sbh is being used, and it
looks like there is one case that doesn't update the checksum using
ext4_handle_dirty_super(), namely:

ext4_file_ioctl(cmd=FS_IOC_GET_ENCRYPTION_PWSALT)
{
err = ext4_journal_get_write_access(handle, sbi->s_sbh);
if (err)
goto pwsalt_err_journal;
generate_random_uuid(sbi->s_es->s_encrypt_pw_salt);
err = ext4_handle_dirty_metadata(handle, NULL,
sbi->s_sbh);

I don't think that is a problem with this patch, per se, but looks like
a bug that could be hit in rare cases with fscrypt + metadata_csum. It
would only happen once per filesystem, and would normally be hidden by
later superblock updates, but should probably be fixed anyway.

Reviewed-by: Andreas Dilger <[email protected]>

> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/ext4/super.c | 2 --
> 1 file changed, 2 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 2b08b162075c..61e6e5f156f3 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -5004,13 +5004,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> block = ext4_count_free_clusters(sb);
> ext4_free_blocks_count_set(sbi->s_es,
> EXT4_C2B(sbi, block));
> - ext4_superblock_csum_set(sb);
> err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
> GFP_KERNEL);
> if (!err) {
> unsigned long freei = ext4_count_free_inodes(sb);
> sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
> - ext4_superblock_csum_set(sb);
> err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
> GFP_KERNEL);
> }
> --
> 2.16.4
>


Cheers, Andreas






Attachments:
signature.asc (890.00 B)
Message signed with OpenPGP

2020-11-29 22:15:57

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 05/12] ext4: Move functions in super.c

On Nov 27, 2020, at 4:33 AM, Jan Kara <[email protected]> wrote:
>
> Just move error info related functions in super.c close to
> ext4_handle_error(). We'll want to combine save_error_info() with
> ext4_handle_error() and this makes change more obvious and saves a
> forward declaration as well. No functional change.
>
> Signed-off-by: Jan Kara <[email protected]>

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> fs/ext4/super.c | 196 ++++++++++++++++++++++++++++----------------------------
> 1 file changed, 98 insertions(+), 98 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index dddaadc55475..7948c07d0a90 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -423,104 +423,6 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
> #define ext4_get_tstamp(es, tstamp) \
> __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
>
> -static void __save_error_info(struct super_block *sb, int error,
> - __u32 ino, __u64 block,
> - const char *func, unsigned int line)
> -{
> - struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> - int err;
> -
> - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
> - if (bdev_read_only(sb->s_bdev))
> - return;
> - es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
> - ext4_update_tstamp(es, s_last_error_time);
> - strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
> - es->s_last_error_line = cpu_to_le32(line);
> - es->s_last_error_ino = cpu_to_le32(ino);
> - es->s_last_error_block = cpu_to_le64(block);
> - switch (error) {
> - case EIO:
> - err = EXT4_ERR_EIO;
> - break;
> - case ENOMEM:
> - err = EXT4_ERR_ENOMEM;
> - break;
> - case EFSBADCRC:
> - err = EXT4_ERR_EFSBADCRC;
> - break;
> - case 0:
> - case EFSCORRUPTED:
> - err = EXT4_ERR_EFSCORRUPTED;
> - break;
> - case ENOSPC:
> - err = EXT4_ERR_ENOSPC;
> - break;
> - case ENOKEY:
> - err = EXT4_ERR_ENOKEY;
> - break;
> - case EROFS:
> - err = EXT4_ERR_EROFS;
> - break;
> - case EFBIG:
> - err = EXT4_ERR_EFBIG;
> - break;
> - case EEXIST:
> - err = EXT4_ERR_EEXIST;
> - break;
> - case ERANGE:
> - err = EXT4_ERR_ERANGE;
> - break;
> - case EOVERFLOW:
> - err = EXT4_ERR_EOVERFLOW;
> - break;
> - case EBUSY:
> - err = EXT4_ERR_EBUSY;
> - break;
> - case ENOTDIR:
> - err = EXT4_ERR_ENOTDIR;
> - break;
> - case ENOTEMPTY:
> - err = EXT4_ERR_ENOTEMPTY;
> - break;
> - case ESHUTDOWN:
> - err = EXT4_ERR_ESHUTDOWN;
> - break;
> - case EFAULT:
> - err = EXT4_ERR_EFAULT;
> - break;
> - default:
> - err = EXT4_ERR_UNKNOWN;
> - }
> - es->s_last_error_errcode = err;
> - if (!es->s_first_error_time) {
> - es->s_first_error_time = es->s_last_error_time;
> - es->s_first_error_time_hi = es->s_last_error_time_hi;
> - strncpy(es->s_first_error_func, func,
> - sizeof(es->s_first_error_func));
> - es->s_first_error_line = cpu_to_le32(line);
> - es->s_first_error_ino = es->s_last_error_ino;
> - es->s_first_error_block = es->s_last_error_block;
> - es->s_first_error_errcode = es->s_last_error_errcode;
> - }
> - /*
> - * Start the daily error reporting function if it hasn't been
> - * started already
> - */
> - if (!es->s_error_count)
> - mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
> - le32_add_cpu(&es->s_error_count, 1);
> -}
> -
> -static void save_error_info(struct super_block *sb, int error,
> - __u32 ino, __u64 block,
> - const char *func, unsigned int line)
> -{
> - __save_error_info(sb, error, ino, block, func, line);
> - if (!bdev_read_only(sb->s_bdev))
> - ext4_commit_super(sb, 1);
> -}
> -
> /*
> * The del_gendisk() function uninitializes the disk-specific data
> * structures, including the bdi structure, without telling anyone
> @@ -649,6 +551,104 @@ static bool system_going_down(void)
> || system_state == SYSTEM_RESTART;
> }
>
> +static void __save_error_info(struct super_block *sb, int error,
> + __u32 ino, __u64 block,
> + const char *func, unsigned int line)
> +{
> + struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> + int err;
> +
> + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
> + if (bdev_read_only(sb->s_bdev))
> + return;
> + es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
> + ext4_update_tstamp(es, s_last_error_time);
> + strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
> + es->s_last_error_line = cpu_to_le32(line);
> + es->s_last_error_ino = cpu_to_le32(ino);
> + es->s_last_error_block = cpu_to_le64(block);
> + switch (error) {
> + case EIO:
> + err = EXT4_ERR_EIO;
> + break;
> + case ENOMEM:
> + err = EXT4_ERR_ENOMEM;
> + break;
> + case EFSBADCRC:
> + err = EXT4_ERR_EFSBADCRC;
> + break;
> + case 0:
> + case EFSCORRUPTED:
> + err = EXT4_ERR_EFSCORRUPTED;
> + break;
> + case ENOSPC:
> + err = EXT4_ERR_ENOSPC;
> + break;
> + case ENOKEY:
> + err = EXT4_ERR_ENOKEY;
> + break;
> + case EROFS:
> + err = EXT4_ERR_EROFS;
> + break;
> + case EFBIG:
> + err = EXT4_ERR_EFBIG;
> + break;
> + case EEXIST:
> + err = EXT4_ERR_EEXIST;
> + break;
> + case ERANGE:
> + err = EXT4_ERR_ERANGE;
> + break;
> + case EOVERFLOW:
> + err = EXT4_ERR_EOVERFLOW;
> + break;
> + case EBUSY:
> + err = EXT4_ERR_EBUSY;
> + break;
> + case ENOTDIR:
> + err = EXT4_ERR_ENOTDIR;
> + break;
> + case ENOTEMPTY:
> + err = EXT4_ERR_ENOTEMPTY;
> + break;
> + case ESHUTDOWN:
> + err = EXT4_ERR_ESHUTDOWN;
> + break;
> + case EFAULT:
> + err = EXT4_ERR_EFAULT;
> + break;
> + default:
> + err = EXT4_ERR_UNKNOWN;
> + }
> + es->s_last_error_errcode = err;
> + if (!es->s_first_error_time) {
> + es->s_first_error_time = es->s_last_error_time;
> + es->s_first_error_time_hi = es->s_last_error_time_hi;
> + strncpy(es->s_first_error_func, func,
> + sizeof(es->s_first_error_func));
> + es->s_first_error_line = cpu_to_le32(line);
> + es->s_first_error_ino = es->s_last_error_ino;
> + es->s_first_error_block = es->s_last_error_block;
> + es->s_first_error_errcode = es->s_last_error_errcode;
> + }
> + /*
> + * Start the daily error reporting function if it hasn't been
> + * started already
> + */
> + if (!es->s_error_count)
> + mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
> + le32_add_cpu(&es->s_error_count, 1);
> +}
> +
> +static void save_error_info(struct super_block *sb, int error,
> + __u32 ino, __u64 block,
> + const char *func, unsigned int line)
> +{
> + __save_error_info(sb, error, ino, block, func, line);
> + if (!bdev_read_only(sb->s_bdev))
> + ext4_commit_super(sb, 1);
> +}
> +
> /* Deal with the reporting of failure conditions on a filesystem such as
> * inconsistencies detected or read IO failures.
> *
> --
> 2.16.4
>


Cheers, Andreas






Attachments:
signature.asc (890.00 B)
Message signed with OpenPGP

2020-11-29 22:16:14

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 04/12] ext4: Make ext4_abort() use __ext4_error()

On Nov 27, 2020, at 4:33 AM, Jan Kara <[email protected]> wrote:
>
> The only difference between __ext4_abort() and __ext4_error() is that
> the former one ignores errors=continue mount option. Unify the code to
> reduce duplication.
>
> Signed-off-by: Jan Kara <[email protected]>

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> fs/ext4/ext4.h | 29 ++++++++----------
> fs/ext4/ext4_jbd2.c | 4 +--
> fs/ext4/inode.c | 2 +-
> fs/ext4/super.c | 84 ++++++++++++++---------------------------------------
> 4 files changed, 37 insertions(+), 82 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 65ecaf96d0a4..e67291c4a10b 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2952,9 +2952,9 @@ extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
> ext4_group_t block_group,
> unsigned int flags);
>
> -extern __printf(6, 7)
> -void __ext4_error(struct super_block *, const char *, unsigned int, int, __u64,
> - const char *, ...);
> +extern __printf(7, 8)
> +void __ext4_error(struct super_block *, const char *, unsigned int, bool,
> + int, __u64, const char *, ...);
> extern __printf(6, 7)
> void __ext4_error_inode(struct inode *, const char *, unsigned int,
> ext4_fsblk_t, int, const char *, ...);
> @@ -2963,9 +2963,6 @@ void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
> const char *, ...);
> extern void __ext4_std_error(struct super_block *, const char *,
> unsigned int, int);
> -extern __printf(5, 6)
> -void __ext4_abort(struct super_block *, const char *, unsigned int, int,
> - const char *, ...);
> extern __printf(4, 5)
> void __ext4_warning(struct super_block *, const char *, unsigned int,
> const char *, ...);
> @@ -2995,6 +2992,9 @@ void __ext4_grp_locked_error(const char *, unsigned int,
> #define EXT4_ERROR_FILE(file, block, fmt, a...) \
> ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
>
> +#define ext4_abort(sb, err, fmt, a...) \
> + __ext4_error((sb), __func__, __LINE__, true, (err), 0, (fmt), ## a)
> +
> #ifdef CONFIG_PRINTK
>
> #define ext4_error_inode(inode, func, line, block, fmt, ...) \
> @@ -3005,11 +3005,11 @@ void __ext4_grp_locked_error(const char *, unsigned int,
> #define ext4_error_file(file, func, line, block, fmt, ...) \
> __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__)
> #define ext4_error(sb, fmt, ...) \
> - __ext4_error((sb), __func__, __LINE__, 0, 0, (fmt), ##__VA_ARGS__)
> + __ext4_error((sb), __func__, __LINE__, false, 0, 0, (fmt), \
> + ##__VA_ARGS__)
> #define ext4_error_err(sb, err, fmt, ...) \
> - __ext4_error((sb), __func__, __LINE__, (err), 0, (fmt), ##__VA_ARGS__)
> -#define ext4_abort(sb, err, fmt, ...) \
> - __ext4_abort((sb), __func__, __LINE__, (err), (fmt), ##__VA_ARGS__)
> + __ext4_error((sb), __func__, __LINE__, false, (err), 0, (fmt), \
> + ##__VA_ARGS__)
> #define ext4_warning(sb, fmt, ...) \
> __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
> #define ext4_warning_inode(inode, fmt, ...) \
> @@ -3042,17 +3042,12 @@ do { \
> #define ext4_error(sb, fmt, ...) \
> do { \
> no_printk(fmt, ##__VA_ARGS__); \
> - __ext4_error(sb, "", 0, 0, 0, " "); \
> + __ext4_error(sb, "", 0, false, 0, 0, " "); \
> } while (0)
> #define ext4_error_err(sb, err, fmt, ...) \
> do { \
> no_printk(fmt, ##__VA_ARGS__); \
> - __ext4_error(sb, "", 0, err, 0, " "); \
> -} while (0)
> -#define ext4_abort(sb, err, fmt, ...) \
> -do { \
> - no_printk(fmt, ##__VA_ARGS__); \
> - __ext4_abort(sb, "", 0, err, " "); \
> + __ext4_error(sb, "", 0, false, err, 0, " "); \
> } while (0)
> #define ext4_warning(sb, fmt, ...) \
> do { \
> diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
> index 0fd0c42a4f7d..1a0a827a7f34 100644
> --- a/fs/ext4/ext4_jbd2.c
> +++ b/fs/ext4/ext4_jbd2.c
> @@ -296,8 +296,8 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
> if (err) {
> ext4_journal_abort_handle(where, line, __func__,
> bh, handle, err);
> - __ext4_abort(inode->i_sb, where, line, -err,
> - "error %d when attempting revoke", err);
> + __ext4_error(inode->i_sb, where, line, true, -err, 0,
> + "error %d when attempting revoke", err);
> }
> BUFFER_TRACE(bh, "exit");
> return err;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 0d8385aea898..3a39fa0d6a3a 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4610,7 +4610,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
> (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
> if (flags & EXT4_IGET_HANDLE)
> return ERR_PTR(-ESTALE);
> - __ext4_error(sb, function, line, EFSCORRUPTED, 0,
> + __ext4_error(sb, function, line, false, EFSCORRUPTED, 0,
> "inode #%lu: comm %s: iget: illegal inode #",
> ino, current->comm);
> return ERR_PTR(-EFSCORRUPTED);
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 61e6e5f156f3..dddaadc55475 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -662,16 +662,21 @@ static bool system_going_down(void)
> * We'll just use the jbd2_journal_abort() error code to record an error in
> * the journal instead. On recovery, the journal will complain about
> * that error until we've noted it down and cleared it.
> + *
> + * If force_ro is set, we unconditionally force the filesystem into an
> + * ABORT|READONLY state, unless the error response on the fs has been set to
> + * panic in which case we take the easy way out and panic immediately. This is
> + * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
> + * at a critical moment in log management.
> */
> -
> -static void ext4_handle_error(struct super_block *sb)
> +static void ext4_handle_error(struct super_block *sb, bool force_ro)
> {
> journal_t *journal = EXT4_SB(sb)->s_journal;
>
> if (test_opt(sb, WARN_ON_ERROR))
> WARN_ON_ONCE(1);
>
> - if (sb_rdonly(sb) || test_opt(sb, ERRORS_CONT))
> + if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT)))
> return;
>
> ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
> @@ -682,18 +687,17 @@ static void ext4_handle_error(struct super_block *sb)
> * could panic during 'reboot -f' as the underlying device got already
> * disabled.
> */
> - if (test_opt(sb, ERRORS_RO) || system_going_down()) {
> - ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
> - /*
> - * Make sure updated value of ->s_mount_flags will be visible
> - * before ->s_flags update
> - */
> - smp_wmb();
> - sb->s_flags |= SB_RDONLY;
> - } else if (test_opt(sb, ERRORS_PANIC)) {
> + if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
> panic("EXT4-fs (device %s): panic forced after error\n",
> sb->s_id);
> }
> + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
> + /*
> + * Make sure updated value of ->s_mount_flags will be visible before
> + * ->s_flags update
> + */
> + smp_wmb();
> + sb->s_flags |= SB_RDONLY;
> }
>
> #define ext4_error_ratelimit(sb) \
> @@ -701,7 +705,7 @@ static void ext4_handle_error(struct super_block *sb)
> "EXT4-fs error")
>
> void __ext4_error(struct super_block *sb, const char *function,
> - unsigned int line, int error, __u64 block,
> + unsigned int line, bool force_ro, int error, __u64 block,
> const char *fmt, ...)
> {
> struct va_format vaf;
> @@ -721,7 +725,7 @@ void __ext4_error(struct super_block *sb, const char *function,
> va_end(args);
> }
> save_error_info(sb, error, 0, block, function, line);
> - ext4_handle_error(sb);
> + ext4_handle_error(sb, force_ro);
> }
>
> void __ext4_error_inode(struct inode *inode, const char *function,
> @@ -753,7 +757,7 @@ void __ext4_error_inode(struct inode *inode, const char *function,
> }
> save_error_info(inode->i_sb, error, inode->i_ino, block,
> function, line);
> - ext4_handle_error(inode->i_sb);
> + ext4_handle_error(inode->i_sb, false);
> }
>
> void __ext4_error_file(struct file *file, const char *function,
> @@ -792,7 +796,7 @@ void __ext4_error_file(struct file *file, const char *function,
> }
> save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
> function, line);
> - ext4_handle_error(inode->i_sb);
> + ext4_handle_error(inode->i_sb, false);
> }
>
> const char *ext4_decode_error(struct super_block *sb, int errno,
> @@ -860,51 +864,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
> }
>
> save_error_info(sb, -errno, 0, 0, function, line);
> - ext4_handle_error(sb);
> -}
> -
> -/*
> - * ext4_abort is a much stronger failure handler than ext4_error. The
> - * abort function may be used to deal with unrecoverable failures such
> - * as journal IO errors or ENOMEM at a critical moment in log management.
> - *
> - * We unconditionally force the filesystem into an ABORT|READONLY state,
> - * unless the error response on the fs has been set to panic in which
> - * case we take the easy way out and panic immediately.
> - */
> -
> -void __ext4_abort(struct super_block *sb, const char *function,
> - unsigned int line, int error, const char *fmt, ...)
> -{
> - struct va_format vaf;
> - va_list args;
> -
> - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
> - return;
> -
> - save_error_info(sb, error, 0, 0, function, line);
> - va_start(args, fmt);
> - vaf.fmt = fmt;
> - vaf.va = &args;
> - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
> - sb->s_id, function, line, &vaf);
> - va_end(args);
> -
> - if (sb_rdonly(sb) == 0) {
> - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
> - if (EXT4_SB(sb)->s_journal)
> - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
> -
> - ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
> - /*
> - * Make sure updated value of ->s_mount_flags will be visible
> - * before ->s_flags update
> - */
> - smp_wmb();
> - sb->s_flags |= SB_RDONLY;
> - }
> - if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
> - panic("EXT4-fs panic from previous error\n");
> + ext4_handle_error(sb, false);
> }
>
> void __ext4_msg(struct super_block *sb,
> @@ -1007,7 +967,7 @@ __acquires(bitlock)
>
> ext4_unlock_group(sb, grp);
> ext4_commit_super(sb, 1);
> - ext4_handle_error(sb);
> + ext4_handle_error(sb, false);
> /*
> * We only get here in the ERRORS_RO case; relocking the group
> * may be dangerous, but nothing bad will happen since the
> --
> 2.16.4
>


Cheers, Andreas






Attachments:
signature.asc (890.00 B)
Message signed with OpenPGP

2020-11-30 11:01:55

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 02/12] ext4: Remove redundant sb checksum recomputation

On Sun 29-11-20 15:11:05, Andreas Dilger wrote:
> On Nov 27, 2020, at 4:33 AM, Jan Kara <[email protected]> wrote:
> >
> > Superblock is written out either through ext4_commit_super() or through
> > ext4_handle_dirty_super(). In both cases we recompute the checksum so it
> > is not necessary to recompute it after updating superblock free inodes &
> > blocks counters.
>
> I searched through the code to see where s_sbh is being used, and it
> looks like there is one case that doesn't update the checksum using
> ext4_handle_dirty_super(), namely:
>
> ext4_file_ioctl(cmd=FS_IOC_GET_ENCRYPTION_PWSALT)
> {
> err = ext4_journal_get_write_access(handle, sbi->s_sbh);
> if (err)
> goto pwsalt_err_journal;
> generate_random_uuid(sbi->s_es->s_encrypt_pw_salt);
> err = ext4_handle_dirty_metadata(handle, NULL,
> sbi->s_sbh);
>
> I don't think that is a problem with this patch, per se, but looks like
> a bug that could be hit in rare cases with fscrypt + metadata_csum. It
> would only happen once per filesystem, and would normally be hidden by
> later superblock updates, but should probably be fixed anyway.

Yeah, good spotting. I'll write a fix for this.

> Reviewed-by: Andreas Dilger <[email protected]>

Thanks for review!

Honza

>
> > Signed-off-by: Jan Kara <[email protected]>
> > ---
> > fs/ext4/super.c | 2 --
> > 1 file changed, 2 deletions(-)
> >
> > diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> > index 2b08b162075c..61e6e5f156f3 100644
> > --- a/fs/ext4/super.c
> > +++ b/fs/ext4/super.c
> > @@ -5004,13 +5004,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> > block = ext4_count_free_clusters(sb);
> > ext4_free_blocks_count_set(sbi->s_es,
> > EXT4_C2B(sbi, block));
> > - ext4_superblock_csum_set(sb);
> > err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
> > GFP_KERNEL);
> > if (!err) {
> > unsigned long freei = ext4_count_free_inodes(sb);
> > sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
> > - ext4_superblock_csum_set(sb);
> > err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
> > GFP_KERNEL);
> > }
> > --
> > 2.16.4
> >
>
>
> Cheers, Andreas
>
>
>
>
>


--
Jan Kara <[email protected]>
SUSE Labs, CR

2020-12-14 19:21:00

by harshad shirwadkar

[permalink] [raw]
Subject: Re: [PATCH 00/12] ext4: Various fixes of ext4 handling of fs errors

Thanks! I ran smoke tests (-c 4k -g quick) on this series and there were no
regressions for me as well.


- Harshad

On Fri, Nov 27, 2020 at 3:37 AM Jan Kara <[email protected]> wrote:
>
> Hello,
>
> this patches addresses problems in handling of filesystem errors in ext4.
> When we hit metadata error, we want to store information about the error
> in the superblock. Currently we do it through direct superblock modification
> which can lead to lost information, checksum failures, or DIF/DIX failures.
> Fix various races in the error handling so that the superblock update is
> reliable.
>
> The patches have passed xfstests for me in various configurations and some
> targetted manual testing of the error handling.
>
> Honza

2020-12-14 19:25:30

by harshad shirwadkar

[permalink] [raw]
Subject: Re: [PATCH 08/12] ext4: Combine ext4_handle_error() and save_error_info()

On Fri, Nov 27, 2020 at 3:38 AM Jan Kara <[email protected]> wrote:
>
> save_error_info() is always called together with ext4_handle_error().
> Combine them into a single call and move unconditional bits out of
> save_error_info() into ext4_handle_error().
>
> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/ext4/super.c | 31 +++++++++++++++----------------
> 1 file changed, 15 insertions(+), 16 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 2d7dc0908cdd..73a09b73fc11 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -592,9 +592,6 @@ static void __save_error_info(struct super_block *sb, int error,
> {
> struct ext4_sb_info *sbi = EXT4_SB(sb);
>
> - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
> - if (bdev_read_only(sb->s_bdev))
> - return;
> /* We default to EFSCORRUPTED error... */
> if (error == 0)
> error = EFSCORRUPTED;
> @@ -647,13 +644,19 @@ static void save_error_info(struct super_block *sb, int error,
> * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
> * at a critical moment in log management.
> */
> -static void ext4_handle_error(struct super_block *sb, bool force_ro)
> +static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
> + __u32 ino, __u64 block,
> + const char *func, unsigned int line)
> {
> journal_t *journal = EXT4_SB(sb)->s_journal;
>
> + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
> if (test_opt(sb, WARN_ON_ERROR))
> WARN_ON_ONCE(1);
>
> + if (!bdev_read_only(sb->s_bdev))
> + save_error_info(sb, error, ino, block, func, line);
> +
> if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT)))
> return;
>
> @@ -710,8 +713,7 @@ void __ext4_error(struct super_block *sb, const char *function,
> sb->s_id, function, line, current->comm, &vaf);
> va_end(args);
> }
> - save_error_info(sb, error, 0, block, function, line);
> - ext4_handle_error(sb, force_ro);
> + ext4_handle_error(sb, force_ro, error, 0, block, function, line);
> }
>
> void __ext4_error_inode(struct inode *inode, const char *function,
> @@ -741,9 +743,8 @@ void __ext4_error_inode(struct inode *inode, const char *function,
> current->comm, &vaf);
> va_end(args);
> }
> - save_error_info(inode->i_sb, error, inode->i_ino, block,
> - function, line);
> - ext4_handle_error(inode->i_sb, false);
> + ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
> + function, line);
> }
>
> void __ext4_error_file(struct file *file, const char *function,
> @@ -780,9 +781,8 @@ void __ext4_error_file(struct file *file, const char *function,
> current->comm, path, &vaf);
> va_end(args);
> }
> - save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
> - function, line);
> - ext4_handle_error(inode->i_sb, false);
> + ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
> + function, line);
> }
>
> const char *ext4_decode_error(struct super_block *sb, int errno,
> @@ -849,8 +849,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
> sb->s_id, function, line, errstr);
> }
>
> - save_error_info(sb, -errno, 0, 0, function, line);
> - ext4_handle_error(sb, false);
> + ext4_handle_error(sb, false, -errno, 0, 0, function, line);
> }
>
> void __ext4_msg(struct super_block *sb,
> @@ -944,13 +943,13 @@ __acquires(bitlock)
> if (test_opt(sb, ERRORS_CONT)) {
> if (test_opt(sb, WARN_ON_ERROR))
> WARN_ON_ONCE(1);
> + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
Since you moved the bdev_read_only() check from __save_error_info to
ext4_handle_error(), should we add that check here?

- Harshad
> __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
> schedule_work(&EXT4_SB(sb)->s_error_work);
> return;
> }
> ext4_unlock_group(sb, grp);
> - save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
> - ext4_handle_error(sb, false);
> + ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
> /*
> * We only get here in the ERRORS_RO case; relocking the group
> * may be dangerous, but nothing bad will happen since the
> --
> 2.16.4
>

2020-12-14 19:29:07

by harshad shirwadkar

[permalink] [raw]
Subject: Re: [PATCH 09/12] ext4: Drop sync argument of ext4_commit_super()

Looks good to me.

Reviewed-by: Harshad Shirwadkar <[email protected]>

On Fri, Nov 27, 2020 at 10:25 AM Jan Kara <[email protected]> wrote:
>
> Everybody passes 1 as sync argument of ext4_commit_super(). Just drop
> it.
>
> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/ext4/super.c | 47 ++++++++++++++++++++++-------------------------
> 1 file changed, 22 insertions(+), 25 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 73a09b73fc11..aae12ea1466a 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -65,7 +65,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit;
> static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
> unsigned long journal_devnum);
> static int ext4_show_options(struct seq_file *seq, struct dentry *root);
> -static int ext4_commit_super(struct super_block *sb, int sync);
> +static int ext4_commit_super(struct super_block *sb);
> static int ext4_mark_recovery_complete(struct super_block *sb,
> struct ext4_super_block *es);
> static int ext4_clear_journal_err(struct super_block *sb,
> @@ -621,7 +621,7 @@ static void save_error_info(struct super_block *sb, int error,
> {
> __save_error_info(sb, error, ino, block, func, line);
> if (!bdev_read_only(sb->s_bdev))
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
> }
>
> /* Deal with the reporting of failure conditions on a filesystem such as
> @@ -686,7 +686,7 @@ static void flush_stashed_error_work(struct work_struct *work)
> struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
> s_error_work);
>
> - ext4_commit_super(sbi->s_sb, 1);
> + ext4_commit_super(sbi->s_sb);
> }
>
> #define ext4_error_ratelimit(sb) \
> @@ -1151,7 +1151,7 @@ static void ext4_put_super(struct super_block *sb)
> es->s_state = cpu_to_le16(sbi->s_mount_state);
> }
> if (!sb_rdonly(sb))
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
>
> rcu_read_lock();
> group_desc = rcu_dereference(sbi->s_group_desc);
> @@ -2641,7 +2641,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
> if (sbi->s_journal)
> ext4_set_feature_journal_needs_recovery(sb);
>
> - err = ext4_commit_super(sb, 1);
> + err = ext4_commit_super(sb);
> done:
> if (test_opt(sb, DEBUG))
> printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
> @@ -4862,7 +4862,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
> !ext4_has_feature_encrypt(sb)) {
> ext4_set_feature_encrypt(sb);
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
> }
>
> /*
> @@ -5415,7 +5415,7 @@ static int ext4_load_journal(struct super_block *sb,
> es->s_journal_dev = cpu_to_le32(journal_devnum);
>
> /* Make sure we flush the recovery flag to disk. */
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
> }
>
> return 0;
> @@ -5425,7 +5425,7 @@ static int ext4_load_journal(struct super_block *sb,
> return err;
> }
>
> -static int ext4_commit_super(struct super_block *sb, int sync)
> +static int ext4_commit_super(struct super_block *sb)
> {
> struct ext4_sb_info *sbi = EXT4_SB(sb);
> struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> @@ -5502,8 +5502,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
>
> BUFFER_TRACE(sbh, "marking dirty");
> ext4_superblock_csum_set(sb);
> - if (sync)
> - lock_buffer(sbh);
> + lock_buffer(sbh);
> if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
> /*
> * Oh, dear. A previous attempt to write the
> @@ -5519,16 +5518,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)
> set_buffer_uptodate(sbh);
> }
> mark_buffer_dirty(sbh);
> - if (sync) {
> - unlock_buffer(sbh);
> - error = __sync_dirty_buffer(sbh,
> - REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
> - if (buffer_write_io_error(sbh)) {
> - ext4_msg(sb, KERN_ERR, "I/O error while writing "
> - "superblock");
> - clear_buffer_write_io_error(sbh);
> - set_buffer_uptodate(sbh);
> - }
> + unlock_buffer(sbh);
> + error = __sync_dirty_buffer(sbh,
> + REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
> + if (buffer_write_io_error(sbh)) {
> + ext4_msg(sb, KERN_ERR, "I/O error while writing "
> + "superblock");
> + clear_buffer_write_io_error(sbh);
> + set_buffer_uptodate(sbh);
> }
> return error;
> }
> @@ -5559,7 +5556,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb,
>
> if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
> ext4_clear_feature_journal_needs_recovery(sb);
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
> }
> out:
> jbd2_journal_unlock_updates(journal);
> @@ -5601,7 +5598,7 @@ static int ext4_clear_journal_err(struct super_block *sb,
>
> EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
> es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
>
> jbd2_journal_clear_err(journal);
> jbd2_journal_update_sb_errno(journal);
> @@ -5703,7 +5700,7 @@ static int ext4_freeze(struct super_block *sb)
> ext4_clear_feature_journal_needs_recovery(sb);
> }
>
> - error = ext4_commit_super(sb, 1);
> + error = ext4_commit_super(sb);
> out:
> if (journal)
> /* we rely on upper layer to stop further updates */
> @@ -5725,7 +5722,7 @@ static int ext4_unfreeze(struct super_block *sb)
> ext4_set_feature_journal_needs_recovery(sb);
> }
>
> - ext4_commit_super(sb, 1);
> + ext4_commit_super(sb);
> return 0;
> }
>
> @@ -5985,7 +5982,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
> }
>
> if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
> - err = ext4_commit_super(sb, 1);
> + err = ext4_commit_super(sb);
> if (err)
> goto restore_opts;
> }
> --
> 2.16.4
>