From: Eric Sandeen Subject: Re: [PATCH] ext4: add ratelimiting to ext4 messages Date: Fri, 18 Oct 2013 09:08:40 -0500 Message-ID: <526140E8.7000002@redhat.com> References: <1382059728-29483-1-git-send-email-tytso@mit.edu> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Cc: Ext4 Developers List To: "Theodore Ts'o" Return-path: Received: from mx1.redhat.com ([209.132.183.28]:11021 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755998Ab3JROIl (ORCPT ); Fri, 18 Oct 2013 10:08:41 -0400 In-Reply-To: <1382059728-29483-1-git-send-email-tytso@mit.edu> Sender: linux-ext4-owner@vger.kernel.org List-ID: On 10/17/13 8:28 PM, Theodore Ts'o wrote: > In the case of a storage device that suddenly disappears, or in the > case of significant file system corruption, this can result in a huge > flood of messages being sent to the console. This can overflow the > file system containing /var/log/messages, or if a serial console is > configured, this can slow down the system so much that a hardware > watchdog can end up triggering forcing a system reboot. Just out of curiosity, after the fs shuts down, is there still a flood of messages? Shouldn't that clamp down on the errors? If not, shouldn't it do so? xfs has a lot of short-circuiting if the filesystem is shut down, so it (I think) won't get into paths that will generate more errors. (If not, would just shutting up ext_* error message post-shutdown accomplish the same thing w/ fewer lines & fewer /proc configuration knobs)? Just a thought, -Eric > Google-Bug-Id: 7258357 > > Signed-off-by: "Theodore Ts'o" > --- > fs/ext4/ext4.h | 6 +++ > fs/ext4/super.c | 152 +++++++++++++++++++++++++++++++++++--------------------- > 2 files changed, 100 insertions(+), 58 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index af815ea..65485ab 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -29,6 +29,7 @@ > #include > #include > #include > +#include > #include > #ifdef __KERNEL__ > #include > @@ -1314,6 +1315,11 @@ struct ext4_sb_info { > unsigned long s_es_last_sorted; > struct percpu_counter s_extent_cache_cnt; > spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; > + > + /* Ratelimit ext4 messages. */ > + struct ratelimit_state s_err_ratelimit_state; > + struct ratelimit_state s_warning_ratelimit_state; > + struct ratelimit_state s_msg_ratelimit_state; > }; > > static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index 2c2e6cb..d3a857b 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -411,20 +411,26 @@ static void ext4_handle_error(struct super_block *sb) > sb->s_id); > } > > +#define ext4_error_ratelimit(sb) \ > + ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ > + "EXT4-fs error") > + > void __ext4_error(struct super_block *sb, const char *function, > unsigned int line, const char *fmt, ...) > { > struct va_format vaf; > va_list args; > > - va_start(args, fmt); > - vaf.fmt = fmt; > - vaf.va = &args; > - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", > - sb->s_id, function, line, current->comm, &vaf); > - va_end(args); > + if (ext4_error_ratelimit(sb)) { > + va_start(args, fmt); > + vaf.fmt = fmt; > + vaf.va = &args; > + printk(KERN_CRIT > + "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", > + sb->s_id, function, line, current->comm, &vaf); > + va_end(args); > + } > save_error_info(sb, function, line); > - > ext4_handle_error(sb); > } > > @@ -438,22 +444,23 @@ void __ext4_error_inode(struct inode *inode, const char *function, > > es->s_last_error_ino = cpu_to_le32(inode->i_ino); > es->s_last_error_block = cpu_to_le64(block); > + if (ext4_error_ratelimit(inode->i_sb)) { > + va_start(args, fmt); > + vaf.fmt = fmt; > + vaf.va = &args; > + if (block) > + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " > + "inode #%lu: block %llu: comm %s: %pV\n", > + inode->i_sb->s_id, function, line, inode->i_ino, > + block, current->comm, &vaf); > + else > + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " > + "inode #%lu: comm %s: %pV\n", > + inode->i_sb->s_id, function, line, inode->i_ino, > + current->comm, &vaf); > + va_end(args); > + } > save_error_info(inode->i_sb, function, line); > - va_start(args, fmt); > - vaf.fmt = fmt; > - vaf.va = &args; > - if (block) > - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " > - "inode #%lu: block %llu: comm %s: %pV\n", > - inode->i_sb->s_id, function, line, inode->i_ino, > - block, current->comm, &vaf); > - else > - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " > - "inode #%lu: comm %s: %pV\n", > - inode->i_sb->s_id, function, line, inode->i_ino, > - current->comm, &vaf); > - va_end(args); > - > ext4_handle_error(inode->i_sb); > } > > @@ -469,27 +476,28 @@ void __ext4_error_file(struct file *file, const char *function, > > es = EXT4_SB(inode->i_sb)->s_es; > es->s_last_error_ino = cpu_to_le32(inode->i_ino); > + if (ext4_error_ratelimit(inode->i_sb)) { > + path = d_path(&(file->f_path), pathname, sizeof(pathname)); > + if (IS_ERR(path)) > + path = "(unknown)"; > + va_start(args, fmt); > + vaf.fmt = fmt; > + vaf.va = &args; > + if (block) > + printk(KERN_CRIT > + "EXT4-fs error (device %s): %s:%d: inode #%lu: " > + "block %llu: comm %s: path %s: %pV\n", > + inode->i_sb->s_id, function, line, inode->i_ino, > + block, current->comm, path, &vaf); > + else > + printk(KERN_CRIT > + "EXT4-fs error (device %s): %s:%d: inode #%lu: " > + "comm %s: path %s: %pV\n", > + inode->i_sb->s_id, function, line, inode->i_ino, > + current->comm, path, &vaf); > + va_end(args); > + } > save_error_info(inode->i_sb, function, line); > - path = d_path(&(file->f_path), pathname, sizeof(pathname)); > - if (IS_ERR(path)) > - path = "(unknown)"; > - va_start(args, fmt); > - vaf.fmt = fmt; > - vaf.va = &args; > - if (block) > - printk(KERN_CRIT > - "EXT4-fs error (device %s): %s:%d: inode #%lu: " > - "block %llu: comm %s: path %s: %pV\n", > - inode->i_sb->s_id, function, line, inode->i_ino, > - block, current->comm, path, &vaf); > - else > - printk(KERN_CRIT > - "EXT4-fs error (device %s): %s:%d: inode #%lu: " > - "comm %s: path %s: %pV\n", > - inode->i_sb->s_id, function, line, inode->i_ino, > - current->comm, path, &vaf); > - va_end(args); > - > ext4_handle_error(inode->i_sb); > } > > @@ -543,11 +551,13 @@ void __ext4_std_error(struct super_block *sb, const char *function, > (sb->s_flags & MS_RDONLY)) > return; > > - errstr = ext4_decode_error(sb, errno, nbuf); > - printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", > - sb->s_id, function, line, errstr); > - save_error_info(sb, function, line); > + if (ext4_error_ratelimit(sb)) { > + errstr = ext4_decode_error(sb, errno, nbuf); > + printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", > + sb->s_id, function, line, errstr); > + } > > + save_error_info(sb, function, line); > ext4_handle_error(sb); > } > > @@ -597,6 +607,9 @@ void __ext4_msg(struct super_block *sb, > struct va_format vaf; > va_list args; > > + if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) > + return; > + > va_start(args, fmt); > vaf.fmt = fmt; > vaf.va = &args; > @@ -610,6 +623,10 @@ void __ext4_warning(struct super_block *sb, const char *function, > struct va_format vaf; > va_list args; > > + if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), > + "EXT4-fs warning")) > + return; > + > va_start(args, fmt); > vaf.fmt = fmt; > vaf.va = &args; > @@ -633,18 +650,20 @@ __acquires(bitlock) > es->s_last_error_block = cpu_to_le64(block); > __save_error_info(sb, function, line); > > - va_start(args, fmt); > - > - vaf.fmt = fmt; > - vaf.va = &args; > - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", > - sb->s_id, function, line, grp); > - if (ino) > - printk(KERN_CONT "inode %lu: ", ino); > - if (block) > - printk(KERN_CONT "block %llu:", (unsigned long long) block); > - printk(KERN_CONT "%pV\n", &vaf); > - va_end(args); > + if (ext4_error_ratelimit(sb)) { > + va_start(args, fmt); > + vaf.fmt = fmt; > + vaf.va = &args; > + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", > + sb->s_id, function, line, grp); > + if (ino) > + printk(KERN_CONT "inode %lu: ", ino); > + if (block) > + printk(KERN_CONT "block %llu:", > + (unsigned long long) block); > + printk(KERN_CONT "%pV\n", &vaf); > + va_end(args); > + } > > if (test_opt(sb, ERRORS_CONT)) { > ext4_commit_super(sb, 0); > @@ -2606,6 +2625,12 @@ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); > EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); > EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); > EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); > +EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); > +EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); > +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); > +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); > +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); > +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); > > static struct attribute *ext4_attrs[] = { > ATTR_LIST(delayed_allocation_blocks), > @@ -2623,6 +2648,12 @@ static struct attribute *ext4_attrs[] = { > ATTR_LIST(max_writeback_mb_bump), > ATTR_LIST(extent_max_zeroout_kb), > ATTR_LIST(trigger_fs_error), > + ATTR_LIST(err_ratelimit_interval_ms), > + ATTR_LIST(err_ratelimit_burst), > + ATTR_LIST(warning_ratelimit_interval_ms), > + ATTR_LIST(warning_ratelimit_burst), > + ATTR_LIST(msg_ratelimit_interval_ms), > + ATTR_LIST(msg_ratelimit_burst), > NULL, > }; > > @@ -4118,6 +4149,11 @@ no_journal: > if (es->s_error_count) > mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ > > + /* Enable message ratelimiting. Default is 10 messages per 5 secs. */ > + ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); > + ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); > + ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); > + > kfree(orig_data); > return 0; > >