In the case of a storage device that suddenly disappears, or in the
case of significant file system corruption, this can result in a huge
flood of messages being sent to the console. This can overflow the
file system containing /var/log/messages, or if a serial console is
configured, this can slow down the system so much that a hardware
watchdog can end up triggering forcing a system reboot.
Google-Bug-Id: 7258357
Signed-off-by: "Theodore Ts'o" <[email protected]>
---
fs/ext4/ext4.h | 6 +++
fs/ext4/super.c | 152 +++++++++++++++++++++++++++++++++++---------------------
2 files changed, 100 insertions(+), 58 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index af815ea..65485ab 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -29,6 +29,7 @@
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
+#include <linux/ratelimit.h>
#include <crypto/hash.h>
#ifdef __KERNEL__
#include <linux/compat.h>
@@ -1314,6 +1315,11 @@ struct ext4_sb_info {
unsigned long s_es_last_sorted;
struct percpu_counter s_extent_cache_cnt;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
+
+ /* Ratelimit ext4 messages. */
+ struct ratelimit_state s_err_ratelimit_state;
+ struct ratelimit_state s_warning_ratelimit_state;
+ struct ratelimit_state s_msg_ratelimit_state;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2c2e6cb..d3a857b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -411,20 +411,26 @@ static void ext4_handle_error(struct super_block *sb)
sb->s_id);
}
+#define ext4_error_ratelimit(sb) \
+ ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
+ "EXT4-fs error")
+
void __ext4_error(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
- sb->s_id, function, line, current->comm, &vaf);
- va_end(args);
+ if (ext4_error_ratelimit(sb)) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
+ sb->s_id, function, line, current->comm, &vaf);
+ va_end(args);
+ }
save_error_info(sb, function, line);
-
ext4_handle_error(sb);
}
@@ -438,22 +444,23 @@ void __ext4_error_inode(struct inode *inode, const char *function,
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
es->s_last_error_block = cpu_to_le64(block);
+ if (ext4_error_ratelimit(inode->i_sb)) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (block)
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+ "inode #%lu: block %llu: comm %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ block, current->comm, &vaf);
+ else
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+ "inode #%lu: comm %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, &vaf);
+ va_end(args);
+ }
save_error_info(inode->i_sb, function, line);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (block)
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
- "inode #%lu: block %llu: comm %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- block, current->comm, &vaf);
- else
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
- "inode #%lu: comm %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, &vaf);
- va_end(args);
-
ext4_handle_error(inode->i_sb);
}
@@ -469,27 +476,28 @@ void __ext4_error_file(struct file *file, const char *function,
es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+ if (ext4_error_ratelimit(inode->i_sb)) {
+ path = d_path(&(file->f_path), pathname, sizeof(pathname));
+ if (IS_ERR(path))
+ path = "(unknown)";
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (block)
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+ "block %llu: comm %s: path %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ block, current->comm, path, &vaf);
+ else
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+ "comm %s: path %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, path, &vaf);
+ va_end(args);
+ }
save_error_info(inode->i_sb, function, line);
- path = d_path(&(file->f_path), pathname, sizeof(pathname));
- if (IS_ERR(path))
- path = "(unknown)";
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (block)
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: "
- "block %llu: comm %s: path %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- block, current->comm, path, &vaf);
- else
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: "
- "comm %s: path %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, path, &vaf);
- va_end(args);
-
ext4_handle_error(inode->i_sb);
}
@@ -543,11 +551,13 @@ void __ext4_std_error(struct super_block *sb, const char *function,
(sb->s_flags & MS_RDONLY))
return;
- errstr = ext4_decode_error(sb, errno, nbuf);
- printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
- sb->s_id, function, line, errstr);
- save_error_info(sb, function, line);
+ if (ext4_error_ratelimit(sb)) {
+ errstr = ext4_decode_error(sb, errno, nbuf);
+ printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
+ sb->s_id, function, line, errstr);
+ }
+ save_error_info(sb, function, line);
ext4_handle_error(sb);
}
@@ -597,6 +607,9 @@ void __ext4_msg(struct super_block *sb,
struct va_format vaf;
va_list args;
+ if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
+ return;
+
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
@@ -610,6 +623,10 @@ void __ext4_warning(struct super_block *sb, const char *function,
struct va_format vaf;
va_list args;
+ if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
+ "EXT4-fs warning"))
+ return;
+
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
@@ -633,18 +650,20 @@ __acquires(bitlock)
es->s_last_error_block = cpu_to_le64(block);
__save_error_info(sb, function, line);
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
- sb->s_id, function, line, grp);
- if (ino)
- printk(KERN_CONT "inode %lu: ", ino);
- if (block)
- printk(KERN_CONT "block %llu:", (unsigned long long) block);
- printk(KERN_CONT "%pV\n", &vaf);
- va_end(args);
+ if (ext4_error_ratelimit(sb)) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
+ sb->s_id, function, line, grp);
+ if (ino)
+ printk(KERN_CONT "inode %lu: ", ino);
+ if (block)
+ printk(KERN_CONT "block %llu:",
+ (unsigned long long) block);
+ printk(KERN_CONT "%pV\n", &vaf);
+ va_end(args);
+ }
if (test_opt(sb, ERRORS_CONT)) {
ext4_commit_super(sb, 0);
@@ -2606,6 +2625,12 @@ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
+EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
+EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
+EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
+EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
+EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
+EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
static struct attribute *ext4_attrs[] = {
ATTR_LIST(delayed_allocation_blocks),
@@ -2623,6 +2648,12 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(extent_max_zeroout_kb),
ATTR_LIST(trigger_fs_error),
+ ATTR_LIST(err_ratelimit_interval_ms),
+ ATTR_LIST(err_ratelimit_burst),
+ ATTR_LIST(warning_ratelimit_interval_ms),
+ ATTR_LIST(warning_ratelimit_burst),
+ ATTR_LIST(msg_ratelimit_interval_ms),
+ ATTR_LIST(msg_ratelimit_burst),
NULL,
};
@@ -4118,6 +4149,11 @@ no_journal:
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
+ /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
+ ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
+ ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
+ ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
+
kfree(orig_data);
return 0;
--
1.7.12.rc0.22.gcdd159b
On Thu, 17 Oct 2013, Theodore Ts'o wrote:
> Date: Thu, 17 Oct 2013 21:28:48 -0400
> From: Theodore Ts'o <[email protected]>
> To: Ext4 Developers List <[email protected]>
> Cc: Theodore Ts'o <[email protected]>
> Subject: [PATCH] ext4: add ratelimiting to ext4 messages
>
> In the case of a storage device that suddenly disappears, or in the
> case of significant file system corruption, this can result in a huge
> flood of messages being sent to the console. This can overflow the
> file system containing /var/log/messages, or if a serial console is
> configured, this can slow down the system so much that a hardware
> watchdog can end up triggering forcing a system reboot.
>
> Google-Bug-Id: 7258357
Looks good. Thanks!
Reviewed-by: Lukas Czerner <[email protected]>
However looking at what we print out from ext4 it looks like we
should revisit the code at some point and make a use of those
ext4_* helpers because there are still cases out there where we
just use printk, or combination of the ext4_* helper and printk.
Also it looks like almost every file in the fs/ext4 has it's own
debugging method, it would be nice to clean it up at some point as
well. But that's not really a priority I guess.
Thanks!
-Lukas
>
> Signed-off-by: "Theodore Ts'o" <[email protected]>
> ---
> fs/ext4/ext4.h | 6 +++
> fs/ext4/super.c | 152 +++++++++++++++++++++++++++++++++++---------------------
> 2 files changed, 100 insertions(+), 58 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index af815ea..65485ab 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -29,6 +29,7 @@
> #include <linux/wait.h>
> #include <linux/blockgroup_lock.h>
> #include <linux/percpu_counter.h>
> +#include <linux/ratelimit.h>
> #include <crypto/hash.h>
> #ifdef __KERNEL__
> #include <linux/compat.h>
> @@ -1314,6 +1315,11 @@ struct ext4_sb_info {
> unsigned long s_es_last_sorted;
> struct percpu_counter s_extent_cache_cnt;
> spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
> +
> + /* Ratelimit ext4 messages. */
> + struct ratelimit_state s_err_ratelimit_state;
> + struct ratelimit_state s_warning_ratelimit_state;
> + struct ratelimit_state s_msg_ratelimit_state;
> };
>
> static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 2c2e6cb..d3a857b 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -411,20 +411,26 @@ static void ext4_handle_error(struct super_block *sb)
> sb->s_id);
> }
>
> +#define ext4_error_ratelimit(sb) \
> + ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
> + "EXT4-fs error")
> +
> void __ext4_error(struct super_block *sb, const char *function,
> unsigned int line, const char *fmt, ...)
> {
> struct va_format vaf;
> va_list args;
>
> - va_start(args, fmt);
> - vaf.fmt = fmt;
> - vaf.va = &args;
> - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
> - sb->s_id, function, line, current->comm, &vaf);
> - va_end(args);
> + if (ext4_error_ratelimit(sb)) {
> + va_start(args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &args;
> + printk(KERN_CRIT
> + "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
> + sb->s_id, function, line, current->comm, &vaf);
> + va_end(args);
> + }
> save_error_info(sb, function, line);
> -
> ext4_handle_error(sb);
> }
>
> @@ -438,22 +444,23 @@ void __ext4_error_inode(struct inode *inode, const char *function,
>
> es->s_last_error_ino = cpu_to_le32(inode->i_ino);
> es->s_last_error_block = cpu_to_le64(block);
> + if (ext4_error_ratelimit(inode->i_sb)) {
> + va_start(args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &args;
> + if (block)
> + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
> + "inode #%lu: block %llu: comm %s: %pV\n",
> + inode->i_sb->s_id, function, line, inode->i_ino,
> + block, current->comm, &vaf);
> + else
> + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
> + "inode #%lu: comm %s: %pV\n",
> + inode->i_sb->s_id, function, line, inode->i_ino,
> + current->comm, &vaf);
> + va_end(args);
> + }
> save_error_info(inode->i_sb, function, line);
> - va_start(args, fmt);
> - vaf.fmt = fmt;
> - vaf.va = &args;
> - if (block)
> - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
> - "inode #%lu: block %llu: comm %s: %pV\n",
> - inode->i_sb->s_id, function, line, inode->i_ino,
> - block, current->comm, &vaf);
> - else
> - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
> - "inode #%lu: comm %s: %pV\n",
> - inode->i_sb->s_id, function, line, inode->i_ino,
> - current->comm, &vaf);
> - va_end(args);
> -
> ext4_handle_error(inode->i_sb);
> }
>
> @@ -469,27 +476,28 @@ void __ext4_error_file(struct file *file, const char *function,
>
> es = EXT4_SB(inode->i_sb)->s_es;
> es->s_last_error_ino = cpu_to_le32(inode->i_ino);
> + if (ext4_error_ratelimit(inode->i_sb)) {
> + path = d_path(&(file->f_path), pathname, sizeof(pathname));
> + if (IS_ERR(path))
> + path = "(unknown)";
> + va_start(args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &args;
> + if (block)
> + printk(KERN_CRIT
> + "EXT4-fs error (device %s): %s:%d: inode #%lu: "
> + "block %llu: comm %s: path %s: %pV\n",
> + inode->i_sb->s_id, function, line, inode->i_ino,
> + block, current->comm, path, &vaf);
> + else
> + printk(KERN_CRIT
> + "EXT4-fs error (device %s): %s:%d: inode #%lu: "
> + "comm %s: path %s: %pV\n",
> + inode->i_sb->s_id, function, line, inode->i_ino,
> + current->comm, path, &vaf);
> + va_end(args);
> + }
> save_error_info(inode->i_sb, function, line);
> - path = d_path(&(file->f_path), pathname, sizeof(pathname));
> - if (IS_ERR(path))
> - path = "(unknown)";
> - va_start(args, fmt);
> - vaf.fmt = fmt;
> - vaf.va = &args;
> - if (block)
> - printk(KERN_CRIT
> - "EXT4-fs error (device %s): %s:%d: inode #%lu: "
> - "block %llu: comm %s: path %s: %pV\n",
> - inode->i_sb->s_id, function, line, inode->i_ino,
> - block, current->comm, path, &vaf);
> - else
> - printk(KERN_CRIT
> - "EXT4-fs error (device %s): %s:%d: inode #%lu: "
> - "comm %s: path %s: %pV\n",
> - inode->i_sb->s_id, function, line, inode->i_ino,
> - current->comm, path, &vaf);
> - va_end(args);
> -
> ext4_handle_error(inode->i_sb);
> }
>
> @@ -543,11 +551,13 @@ void __ext4_std_error(struct super_block *sb, const char *function,
> (sb->s_flags & MS_RDONLY))
> return;
>
> - errstr = ext4_decode_error(sb, errno, nbuf);
> - printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
> - sb->s_id, function, line, errstr);
> - save_error_info(sb, function, line);
> + if (ext4_error_ratelimit(sb)) {
> + errstr = ext4_decode_error(sb, errno, nbuf);
> + printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
> + sb->s_id, function, line, errstr);
> + }
>
> + save_error_info(sb, function, line);
> ext4_handle_error(sb);
> }
>
> @@ -597,6 +607,9 @@ void __ext4_msg(struct super_block *sb,
> struct va_format vaf;
> va_list args;
>
> + if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
> + return;
> +
> va_start(args, fmt);
> vaf.fmt = fmt;
> vaf.va = &args;
> @@ -610,6 +623,10 @@ void __ext4_warning(struct super_block *sb, const char *function,
> struct va_format vaf;
> va_list args;
>
> + if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
> + "EXT4-fs warning"))
> + return;
> +
> va_start(args, fmt);
> vaf.fmt = fmt;
> vaf.va = &args;
> @@ -633,18 +650,20 @@ __acquires(bitlock)
> es->s_last_error_block = cpu_to_le64(block);
> __save_error_info(sb, function, line);
>
> - va_start(args, fmt);
> -
> - vaf.fmt = fmt;
> - vaf.va = &args;
> - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
> - sb->s_id, function, line, grp);
> - if (ino)
> - printk(KERN_CONT "inode %lu: ", ino);
> - if (block)
> - printk(KERN_CONT "block %llu:", (unsigned long long) block);
> - printk(KERN_CONT "%pV\n", &vaf);
> - va_end(args);
> + if (ext4_error_ratelimit(sb)) {
> + va_start(args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &args;
> + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
> + sb->s_id, function, line, grp);
> + if (ino)
> + printk(KERN_CONT "inode %lu: ", ino);
> + if (block)
> + printk(KERN_CONT "block %llu:",
> + (unsigned long long) block);
> + printk(KERN_CONT "%pV\n", &vaf);
> + va_end(args);
> + }
>
> if (test_opt(sb, ERRORS_CONT)) {
> ext4_commit_super(sb, 0);
> @@ -2606,6 +2625,12 @@ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
> EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
> EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
> EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
> +EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
> +EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
> +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
> +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
> +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
> +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
>
> static struct attribute *ext4_attrs[] = {
> ATTR_LIST(delayed_allocation_blocks),
> @@ -2623,6 +2648,12 @@ static struct attribute *ext4_attrs[] = {
> ATTR_LIST(max_writeback_mb_bump),
> ATTR_LIST(extent_max_zeroout_kb),
> ATTR_LIST(trigger_fs_error),
> + ATTR_LIST(err_ratelimit_interval_ms),
> + ATTR_LIST(err_ratelimit_burst),
> + ATTR_LIST(warning_ratelimit_interval_ms),
> + ATTR_LIST(warning_ratelimit_burst),
> + ATTR_LIST(msg_ratelimit_interval_ms),
> + ATTR_LIST(msg_ratelimit_burst),
> NULL,
> };
>
> @@ -4118,6 +4149,11 @@ no_journal:
> if (es->s_error_count)
> mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
>
> + /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
> + ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
> + ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
> + ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
> +
> kfree(orig_data);
> return 0;
>
>
On 10/17/13 8:28 PM, Theodore Ts'o wrote:
> In the case of a storage device that suddenly disappears, or in the
> case of significant file system corruption, this can result in a huge
> flood of messages being sent to the console. This can overflow the
> file system containing /var/log/messages, or if a serial console is
> configured, this can slow down the system so much that a hardware
> watchdog can end up triggering forcing a system reboot.
Just out of curiosity, after the fs shuts down, is there still a flood
of messages? Shouldn't that clamp down on the errors?
If not, shouldn't it do so? xfs has a lot of short-circuiting if
the filesystem is shut down, so it (I think) won't get into paths that
will generate more errors.
(If not, would just shutting up ext_* error message post-shutdown accomplish
the same thing w/ fewer lines & fewer /proc configuration knobs)?
Just a thought,
-Eric
> Google-Bug-Id: 7258357
>
> Signed-off-by: "Theodore Ts'o" <[email protected]>
> ---
> fs/ext4/ext4.h | 6 +++
> fs/ext4/super.c | 152 +++++++++++++++++++++++++++++++++++---------------------
> 2 files changed, 100 insertions(+), 58 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index af815ea..65485ab 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -29,6 +29,7 @@
> #include <linux/wait.h>
> #include <linux/blockgroup_lock.h>
> #include <linux/percpu_counter.h>
> +#include <linux/ratelimit.h>
> #include <crypto/hash.h>
> #ifdef __KERNEL__
> #include <linux/compat.h>
> @@ -1314,6 +1315,11 @@ struct ext4_sb_info {
> unsigned long s_es_last_sorted;
> struct percpu_counter s_extent_cache_cnt;
> spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
> +
> + /* Ratelimit ext4 messages. */
> + struct ratelimit_state s_err_ratelimit_state;
> + struct ratelimit_state s_warning_ratelimit_state;
> + struct ratelimit_state s_msg_ratelimit_state;
> };
>
> static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 2c2e6cb..d3a857b 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -411,20 +411,26 @@ static void ext4_handle_error(struct super_block *sb)
> sb->s_id);
> }
>
> +#define ext4_error_ratelimit(sb) \
> + ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
> + "EXT4-fs error")
> +
> void __ext4_error(struct super_block *sb, const char *function,
> unsigned int line, const char *fmt, ...)
> {
> struct va_format vaf;
> va_list args;
>
> - va_start(args, fmt);
> - vaf.fmt = fmt;
> - vaf.va = &args;
> - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
> - sb->s_id, function, line, current->comm, &vaf);
> - va_end(args);
> + if (ext4_error_ratelimit(sb)) {
> + va_start(args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &args;
> + printk(KERN_CRIT
> + "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
> + sb->s_id, function, line, current->comm, &vaf);
> + va_end(args);
> + }
> save_error_info(sb, function, line);
> -
> ext4_handle_error(sb);
> }
>
> @@ -438,22 +444,23 @@ void __ext4_error_inode(struct inode *inode, const char *function,
>
> es->s_last_error_ino = cpu_to_le32(inode->i_ino);
> es->s_last_error_block = cpu_to_le64(block);
> + if (ext4_error_ratelimit(inode->i_sb)) {
> + va_start(args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &args;
> + if (block)
> + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
> + "inode #%lu: block %llu: comm %s: %pV\n",
> + inode->i_sb->s_id, function, line, inode->i_ino,
> + block, current->comm, &vaf);
> + else
> + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
> + "inode #%lu: comm %s: %pV\n",
> + inode->i_sb->s_id, function, line, inode->i_ino,
> + current->comm, &vaf);
> + va_end(args);
> + }
> save_error_info(inode->i_sb, function, line);
> - va_start(args, fmt);
> - vaf.fmt = fmt;
> - vaf.va = &args;
> - if (block)
> - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
> - "inode #%lu: block %llu: comm %s: %pV\n",
> - inode->i_sb->s_id, function, line, inode->i_ino,
> - block, current->comm, &vaf);
> - else
> - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
> - "inode #%lu: comm %s: %pV\n",
> - inode->i_sb->s_id, function, line, inode->i_ino,
> - current->comm, &vaf);
> - va_end(args);
> -
> ext4_handle_error(inode->i_sb);
> }
>
> @@ -469,27 +476,28 @@ void __ext4_error_file(struct file *file, const char *function,
>
> es = EXT4_SB(inode->i_sb)->s_es;
> es->s_last_error_ino = cpu_to_le32(inode->i_ino);
> + if (ext4_error_ratelimit(inode->i_sb)) {
> + path = d_path(&(file->f_path), pathname, sizeof(pathname));
> + if (IS_ERR(path))
> + path = "(unknown)";
> + va_start(args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &args;
> + if (block)
> + printk(KERN_CRIT
> + "EXT4-fs error (device %s): %s:%d: inode #%lu: "
> + "block %llu: comm %s: path %s: %pV\n",
> + inode->i_sb->s_id, function, line, inode->i_ino,
> + block, current->comm, path, &vaf);
> + else
> + printk(KERN_CRIT
> + "EXT4-fs error (device %s): %s:%d: inode #%lu: "
> + "comm %s: path %s: %pV\n",
> + inode->i_sb->s_id, function, line, inode->i_ino,
> + current->comm, path, &vaf);
> + va_end(args);
> + }
> save_error_info(inode->i_sb, function, line);
> - path = d_path(&(file->f_path), pathname, sizeof(pathname));
> - if (IS_ERR(path))
> - path = "(unknown)";
> - va_start(args, fmt);
> - vaf.fmt = fmt;
> - vaf.va = &args;
> - if (block)
> - printk(KERN_CRIT
> - "EXT4-fs error (device %s): %s:%d: inode #%lu: "
> - "block %llu: comm %s: path %s: %pV\n",
> - inode->i_sb->s_id, function, line, inode->i_ino,
> - block, current->comm, path, &vaf);
> - else
> - printk(KERN_CRIT
> - "EXT4-fs error (device %s): %s:%d: inode #%lu: "
> - "comm %s: path %s: %pV\n",
> - inode->i_sb->s_id, function, line, inode->i_ino,
> - current->comm, path, &vaf);
> - va_end(args);
> -
> ext4_handle_error(inode->i_sb);
> }
>
> @@ -543,11 +551,13 @@ void __ext4_std_error(struct super_block *sb, const char *function,
> (sb->s_flags & MS_RDONLY))
> return;
>
> - errstr = ext4_decode_error(sb, errno, nbuf);
> - printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
> - sb->s_id, function, line, errstr);
> - save_error_info(sb, function, line);
> + if (ext4_error_ratelimit(sb)) {
> + errstr = ext4_decode_error(sb, errno, nbuf);
> + printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
> + sb->s_id, function, line, errstr);
> + }
>
> + save_error_info(sb, function, line);
> ext4_handle_error(sb);
> }
>
> @@ -597,6 +607,9 @@ void __ext4_msg(struct super_block *sb,
> struct va_format vaf;
> va_list args;
>
> + if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
> + return;
> +
> va_start(args, fmt);
> vaf.fmt = fmt;
> vaf.va = &args;
> @@ -610,6 +623,10 @@ void __ext4_warning(struct super_block *sb, const char *function,
> struct va_format vaf;
> va_list args;
>
> + if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
> + "EXT4-fs warning"))
> + return;
> +
> va_start(args, fmt);
> vaf.fmt = fmt;
> vaf.va = &args;
> @@ -633,18 +650,20 @@ __acquires(bitlock)
> es->s_last_error_block = cpu_to_le64(block);
> __save_error_info(sb, function, line);
>
> - va_start(args, fmt);
> -
> - vaf.fmt = fmt;
> - vaf.va = &args;
> - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
> - sb->s_id, function, line, grp);
> - if (ino)
> - printk(KERN_CONT "inode %lu: ", ino);
> - if (block)
> - printk(KERN_CONT "block %llu:", (unsigned long long) block);
> - printk(KERN_CONT "%pV\n", &vaf);
> - va_end(args);
> + if (ext4_error_ratelimit(sb)) {
> + va_start(args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &args;
> + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
> + sb->s_id, function, line, grp);
> + if (ino)
> + printk(KERN_CONT "inode %lu: ", ino);
> + if (block)
> + printk(KERN_CONT "block %llu:",
> + (unsigned long long) block);
> + printk(KERN_CONT "%pV\n", &vaf);
> + va_end(args);
> + }
>
> if (test_opt(sb, ERRORS_CONT)) {
> ext4_commit_super(sb, 0);
> @@ -2606,6 +2625,12 @@ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
> EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
> EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
> EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
> +EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
> +EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
> +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
> +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
> +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
> +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
>
> static struct attribute *ext4_attrs[] = {
> ATTR_LIST(delayed_allocation_blocks),
> @@ -2623,6 +2648,12 @@ static struct attribute *ext4_attrs[] = {
> ATTR_LIST(max_writeback_mb_bump),
> ATTR_LIST(extent_max_zeroout_kb),
> ATTR_LIST(trigger_fs_error),
> + ATTR_LIST(err_ratelimit_interval_ms),
> + ATTR_LIST(err_ratelimit_burst),
> + ATTR_LIST(warning_ratelimit_interval_ms),
> + ATTR_LIST(warning_ratelimit_burst),
> + ATTR_LIST(msg_ratelimit_interval_ms),
> + ATTR_LIST(msg_ratelimit_burst),
> NULL,
> };
>
> @@ -4118,6 +4149,11 @@ no_journal:
> if (es->s_error_count)
> mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
>
> + /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
> + ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
> + ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
> + ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
> +
> kfree(orig_data);
> return 0;
>
>
On Fri, Oct 18, 2013 at 09:08:40AM -0500, Eric Sandeen wrote:
> On 10/17/13 8:28 PM, Theodore Ts'o wrote:
> > In the case of a storage device that suddenly disappears, or in the
> > case of significant file system corruption, this can result in a huge
> > flood of messages being sent to the console. This can overflow the
> > file system containing /var/log/messages, or if a serial console is
> > configured, this can slow down the system so much that a hardware
> > watchdog can end up triggering forcing a system reboot.
>
> Just out of curiosity, after the fs shuts down, is there still a flood
> of messages? Shouldn't that clamp down on the errors?
Not if we are running with errors=continue. There are some ugly
patches in our tree which pipes error notifications to a netlink
socket, which allows userspace to do something intelligent with
errors, and because there are some errors where it's safe to continue
(especially if you are willing to shut down block allocations to the
block group where you don't trust the allocation bitmap), we tend to
run with errors=continue.
I think I mentioned the errors->netlink feature a while back, but
there wasn't a whole lot of excitement about it, and the patches
definitely need a lot of cleanup before they would be ready for
upstream merging. If people are curious, I can look into getting the
patches sent out, since we just finished rebasing them to 3.11.
> If not, shouldn't it do so? xfs has a lot of short-circuiting if
> the filesystem is shut down, so it (I think) won't get into paths that
> will generate more errors.
When xfs "shuts down" the file system, it doesn't allow any read or
write accesses, right? So it's basically an even stronger version of
errors=remount-ro. We should perhaps discuss whether it would be
better to squelch errors if we've remounted the file system read-only,
or whether we should implement a complete shutdown errors option.
And of course, even if we did this, we would still need to squelch
ext4_warning and ext4_msg output. (Although I agree with Lukas that
it might not be a bad idea to review some of the messages that either
get emitted via printk, or which are issued via ext4_msg(KERN_CRIT) to
see if we should perhaps change some of those to ext4_error.)
Regards,
- Ted
On 10/18/13 1:59 PM, Theodore Ts'o wrote:
> On Fri, Oct 18, 2013 at 09:08:40AM -0500, Eric Sandeen wrote:
>> On 10/17/13 8:28 PM, Theodore Ts'o wrote:
>>> In the case of a storage device that suddenly disappears, or in the
>>> case of significant file system corruption, this can result in a huge
>>> flood of messages being sent to the console. This can overflow the
>>> file system containing /var/log/messages, or if a serial console is
>>> configured, this can slow down the system so much that a hardware
>>> watchdog can end up triggering forcing a system reboot.
>>
>> Just out of curiosity, after the fs shuts down, is there still a flood
>> of messages? Shouldn't that clamp down on the errors?
>
> Not if we are running with errors=continue.
Maybe the ratelimit should depend on that then? I'm just concerned about
the possibility of filtering messages that, rather than being a nuisance,
are vital to figuring out what went wrong.
(granted, it's probably the first error or two that matters)
Or maybe it's only relevant with errors=continue, and errors=remount-ro
will be self-limiting in any case.
> There are some ugly
> patches in our tree which pipes error notifications to a netlink
> socket, which allows userspace to do something intelligent with
> errors, and because there are some errors where it's safe to continue
> (especially if you are willing to shut down block allocations to the
> block group where you don't trust the allocation bitmap), we tend to
> run with errors=continue.
hm... :)
> I think I mentioned the errors->netlink feature a while back, but
> there wasn't a whole lot of excitement about it, and the patches
> definitely need a lot of cleanup before they would be ready for
> upstream merging. If people are curious, I can look into getting the
> patches sent out, since we just finished rebasing them to 3.11.
>
>> If not, shouldn't it do so? xfs has a lot of short-circuiting if
>> the filesystem is shut down, so it (I think) won't get into paths that
>> will generate more errors.
>
> When xfs "shuts down" the file system, it doesn't allow any read or
> write accesses, right? So it's basically an even stronger version of
> errors=remount-ro. We should perhaps discuss whether it would be
> better to squelch errors if we've remounted the file system read-only,
> or whether we should implement a complete shutdown errors option.
Yeah, there is no errors=continue type option, that is probably too
dangerous in general for the majority of users.
I'd guess that w/ default remount-ro, the error flood isn't a risk.
> And of course, even if we did this, we would still need to squelch
> ext4_warning and ext4_msg output. (Although I agree with Lukas that
> it might not be a bad idea to review some of the messages that either
> get emitted via printk, or which are issued via ext4_msg(KERN_CRIT) to
> see if we should perhaps change some of those to ext4_error.)
*nod*
Thanks,
-Eric
> Regards,
>
> - Ted
>
On Sat, Oct 19, 2013 at 06:04:55PM -0500, Eric Sandeen wrote:
>
> Maybe the ratelimit should depend on that then? I'm just concerned about
> the possibility of filtering messages that, rather than being a nuisance,
> are vital to figuring out what went wrong.
>
> (granted, it's probably the first error or two that matters)
The default rate limit threshold that we've set is ten messages per 5
seconds. I have a really hard time believing that the 11th message is
going to containing any critical information that won't be in the
first ten. :-)
> Or maybe it's only relevant with errors=continue, and errors=remount-ro
> will be self-limiting in any case.
Even with remount-ro, there are certainly cases where things will not
be self limiting, simply because user processes continually try
referencing the same corrupted directory or file.
> > When xfs "shuts down" the file system, it doesn't allow any read or
> > write accesses, right? So it's basically an even stronger version of
> > errors=remount-ro. We should perhaps discuss whether it would be
> > better to squelch errors if we've remounted the file system read-only,
> > or whether we should implement a complete shutdown errors option.
>
> Yeah, there is no errors=continue type option, that is probably too
> dangerous in general for the majority of users.
What I was asking was whether it might make sense for us to implement
a errors=shutdown-fs option which causes all read operations (in
addition to write operations) to immediately return EIO. That would
certainly reduce the error flood risk, but if you did this on the root
file system, you might as well set errors=panic. This is what XFS's
default behavior on fserrors, correct?
- Ted
On Sun, Oct 20, 2013 at 07:18:00AM -0400, Theodore Ts'o wrote:
> On Sat, Oct 19, 2013 at 06:04:55PM -0500, Eric Sandeen wrote:
> > > When xfs "shuts down" the file system, it doesn't allow any read or
> > > write accesses, right? So it's basically an even stronger version of
> > > errors=remount-ro. We should perhaps discuss whether it would be
> > > better to squelch errors if we've remounted the file system read-only,
> > > or whether we should implement a complete shutdown errors option.
> >
> > Yeah, there is no errors=continue type option, that is probably too
> > dangerous in general for the majority of users.
>
> What I was asking was whether it might make sense for us to implement
> a errors=shutdown-fs option which causes all read operations (in
> addition to write operations) to immediately return EIO. That would
> certainly reduce the error flood risk, but if you did this on the root
> file system, you might as well set errors=panic. This is what XFS's
> default behavior on fserrors, correct?
No. XFS's behaviour is dependent on the context the error occurs in.
If it's a fatal or corruption inducing error, then it shuts down and
returns errors to any attempt to read, write or modify anything in
the filesystem. If the error is not fatal, then XFS behaves like
errors=continue.
IOWs, if you read a directory and trip over a corruption, the XFS
filesystem will not shut down - it will just throw the
EFSCORRUPTED/EIO error back to userspace and log it. However, if you
are trying to modify that directory, and the IO error occurs after
modifications have already been made to the directory but are not
yet committed, then that's a fatal error and a shutdown will occur.
i.e. IO errors in metadata are only fatal if we can't back out
cleanly, otherwise they are simply logged and reported to userspace
like any other IO error during a data read...
Cheers,
Dave.
--
Dave Chinner
[email protected]
Hi Ted,
On Fri, Oct 18, 2013 at 02:59:55PM -0400, Theodore Ts'o wrote:
> On Fri, Oct 18, 2013 at 09:08:40AM -0500, Eric Sandeen wrote:
> > On 10/17/13 8:28 PM, Theodore Ts'o wrote:
> > > In the case of a storage device that suddenly disappears, or in the
> > > case of significant file system corruption, this can result in a huge
> > > flood of messages being sent to the console. This can overflow the
> > > file system containing /var/log/messages, or if a serial console is
> > > configured, this can slow down the system so much that a hardware
> > > watchdog can end up triggering forcing a system reboot.
> >
> > Just out of curiosity, after the fs shuts down, is there still a flood
> > of messages? Shouldn't that clamp down on the errors?
>
> Not if we are running with errors=continue. There are some ugly
> patches in our tree which pipes error notifications to a netlink
> socket, which allows userspace to do something intelligent with
> errors, and because there are some errors where it's safe to continue
> (especially if you are willing to shut down block allocations to the
> block group where you don't trust the allocation bitmap), we tend to
> run with errors=continue.
>
> I think I mentioned the errors->netlink feature a while back, but
> there wasn't a whole lot of excitement about it, and the patches
> definitely need a lot of cleanup before they would be ready for
> upstream merging. If people are curious, I can look into getting the
> patches sent out, since we just finished rebasing them to 3.11.
That would be great if you could sent it out. We are happy to review
and let it go into mainline kernel if it is also useful for others.
Thanks,
- Zheng