Currently there is no easy way to tell that the mounted file system
contains errors other than checking for log messages, or reading the
information directly from superblock.
This patch adds new sysfs entry "contains_errors" for each ext4 file
system so user can simply check
cat /sys/fs/ext4/sda/contains_errors
If the file system is not marked as containing errors then the file is
empty. Otherwise it would print the same information that could be found
in the log. For example:
error count: 1
EXT4-fs (sda): initial error at 1399305407: trigger_test_error:2630
EXT4-fs (sda): last error at 1399305407: trigger_test_error:2630
Signed-off-by: Lukas Czerner <[email protected]>
---
fs/ext4/super.c | 100 ++++++++++++++++++++++++++++++++++++++------------------
1 file changed, 68 insertions(+), 32 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6f9e6fa..61dc3d6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2502,6 +2502,59 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
EXT4_SB(sb)->s_sectors_written_start) >> 1)));
}
+static ssize_t get_fs_errors(struct ext4_sb_info *sbi, char *buf, ssize_t size)
+{
+ struct super_block *sb = sbi->s_buddy_cache->i_sb;
+ struct ext4_super_block *es = sbi->s_es;
+ int len = 0;
+
+ if (es->s_error_count)
+ len = snprintf(buf + len, size - len,
+ "error count: %u\n",
+ le32_to_cpu(es->s_error_count));
+ if (es->s_first_error_time) {
+ len += snprintf(buf + len, size - len,
+ "EXT4-fs (%s): initial error at %u: %.*s:%d",
+ sb->s_id, le32_to_cpu(es->s_first_error_time),
+ (int) sizeof(es->s_first_error_func),
+ es->s_first_error_func,
+ le32_to_cpu(es->s_first_error_line));
+ if (es->s_first_error_ino)
+ len += snprintf(buf + len, size - len,
+ ": inode %u",
+ le32_to_cpu(es->s_first_error_ino));
+ if (es->s_first_error_block)
+ len += snprintf(buf + len, size - len,
+ ": block %llu", (unsigned long long)
+ le64_to_cpu(es->s_first_error_block));
+ len += snprintf(buf + len, size - len, "\n");
+ }
+ if (es->s_last_error_time) {
+ len += snprintf(buf + len, size - len,
+ "EXT4-fs (%s): last error at %u: %.*s:%d",
+ sb->s_id, le32_to_cpu(es->s_last_error_time),
+ (int) sizeof(es->s_last_error_func),
+ es->s_last_error_func,
+ le32_to_cpu(es->s_last_error_line));
+ if (es->s_last_error_ino)
+ len += snprintf(buf + len, size - len,
+ ": inode %u",
+ le32_to_cpu(es->s_last_error_ino));
+ if (es->s_last_error_block)
+ len += snprintf(buf + len, size - len,
+ ": block %llu", (unsigned long long)
+ le64_to_cpu(es->s_last_error_block));
+ len += snprintf(buf + len, size - len, "\n");
+ }
+ return len;
+}
+
+static ssize_t contains_errors_show(struct ext4_attr *a,
+ struct ext4_sb_info *sbi, char *buf)
+{
+ return get_fs_errors(sbi, buf, PAGE_SIZE);
+}
+
static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
struct ext4_sb_info *sbi,
const char *buf, size_t count)
@@ -2617,6 +2670,7 @@ static struct ext4_attr ext4_attr_##_name = { \
EXT4_RO_ATTR(delayed_allocation_blocks);
EXT4_RO_ATTR(session_write_kbytes);
EXT4_RO_ATTR(lifetime_write_kbytes);
+EXT4_RO_ATTR(contains_errors);
EXT4_RW_ATTR(reserved_clusters);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
inode_readahead_blks_store, s_inode_readahead_blks);
@@ -2659,6 +2713,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(warning_ratelimit_burst),
ATTR_LIST(msg_ratelimit_interval_ms),
ATTR_LIST(msg_ratelimit_burst),
+ ATTR_LIST(contains_errors),
NULL,
};
@@ -2792,42 +2847,23 @@ static void print_daily_error_info(unsigned long arg)
{
struct super_block *sb = (struct super_block *) arg;
struct ext4_sb_info *sbi;
- struct ext4_super_block *es;
+ ssize_t size;
+ char *buf = NULL;
sbi = EXT4_SB(sb);
- es = sbi->s_es;
- if (es->s_error_count)
- ext4_msg(sb, KERN_NOTICE, "error count: %u",
- le32_to_cpu(es->s_error_count));
- if (es->s_first_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
- sb->s_id, le32_to_cpu(es->s_first_error_time),
- (int) sizeof(es->s_first_error_func),
- es->s_first_error_func,
- le32_to_cpu(es->s_first_error_line));
- if (es->s_first_error_ino)
- printk(": inode %u",
- le32_to_cpu(es->s_first_error_ino));
- if (es->s_first_error_block)
- printk(": block %llu", (unsigned long long)
- le64_to_cpu(es->s_first_error_block));
- printk("\n");
- }
- if (es->s_last_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
- sb->s_id, le32_to_cpu(es->s_last_error_time),
- (int) sizeof(es->s_last_error_func),
- es->s_last_error_func,
- le32_to_cpu(es->s_last_error_line));
- if (es->s_last_error_ino)
- printk(": inode %u",
- le32_to_cpu(es->s_last_error_ino));
- if (es->s_last_error_block)
- printk(": block %llu", (unsigned long long)
- le64_to_cpu(es->s_last_error_block));
- printk("\n");
+ buf = kmalloc(255, GFP_NOFS);
+ if (!buf) {
+ ext4_msg(sb, KERN_NOTICE, "File system contains errors, "
+ "running e2fsck is recommended");
+ return;
}
+
+ size = get_fs_errors(sbi, buf, 255);
+ if (size)
+ ext4_msg(sb, KERN_NOTICE, "%s", buf);
+ kfree(buf);
+
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
}
--
1.8.3.1
On Tue, May 06, 2014 at 03:19:17PM +0200, Lukas Czerner wrote:
> Currently there is no easy way to tell that the mounted file system
> contains errors other than checking for log messages, or reading the
> information directly from superblock.
>
> This patch adds new sysfs entry "contains_errors" for each ext4 file
> system so user can simply check
>
> cat /sys/fs/ext4/sda/contains_errors
Minor complaint: "contains" makes me think that cat'ing that file will return
either 0 or 1, not a string of error text. Perhaps we could shorten it to
/sys/fs/ext4/sda/errors ?
Otherwise looks good to me.
--D
>
> If the file system is not marked as containing errors then the file is
> empty. Otherwise it would print the same information that could be found
> in the log. For example:
>
> error count: 1
> EXT4-fs (sda): initial error at 1399305407: trigger_test_error:2630
> EXT4-fs (sda): last error at 1399305407: trigger_test_error:2630
>
> Signed-off-by: Lukas Czerner <[email protected]>
> ---
> fs/ext4/super.c | 100 ++++++++++++++++++++++++++++++++++++++------------------
> 1 file changed, 68 insertions(+), 32 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 6f9e6fa..61dc3d6 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -2502,6 +2502,59 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
> EXT4_SB(sb)->s_sectors_written_start) >> 1)));
> }
>
> +static ssize_t get_fs_errors(struct ext4_sb_info *sbi, char *buf, ssize_t size)
> +{
> + struct super_block *sb = sbi->s_buddy_cache->i_sb;
> + struct ext4_super_block *es = sbi->s_es;
> + int len = 0;
> +
> + if (es->s_error_count)
> + len = snprintf(buf + len, size - len,
> + "error count: %u\n",
> + le32_to_cpu(es->s_error_count));
> + if (es->s_first_error_time) {
> + len += snprintf(buf + len, size - len,
> + "EXT4-fs (%s): initial error at %u: %.*s:%d",
> + sb->s_id, le32_to_cpu(es->s_first_error_time),
> + (int) sizeof(es->s_first_error_func),
> + es->s_first_error_func,
> + le32_to_cpu(es->s_first_error_line));
> + if (es->s_first_error_ino)
> + len += snprintf(buf + len, size - len,
> + ": inode %u",
> + le32_to_cpu(es->s_first_error_ino));
> + if (es->s_first_error_block)
> + len += snprintf(buf + len, size - len,
> + ": block %llu", (unsigned long long)
> + le64_to_cpu(es->s_first_error_block));
> + len += snprintf(buf + len, size - len, "\n");
> + }
> + if (es->s_last_error_time) {
> + len += snprintf(buf + len, size - len,
> + "EXT4-fs (%s): last error at %u: %.*s:%d",
> + sb->s_id, le32_to_cpu(es->s_last_error_time),
> + (int) sizeof(es->s_last_error_func),
> + es->s_last_error_func,
> + le32_to_cpu(es->s_last_error_line));
> + if (es->s_last_error_ino)
> + len += snprintf(buf + len, size - len,
> + ": inode %u",
> + le32_to_cpu(es->s_last_error_ino));
> + if (es->s_last_error_block)
> + len += snprintf(buf + len, size - len,
> + ": block %llu", (unsigned long long)
> + le64_to_cpu(es->s_last_error_block));
> + len += snprintf(buf + len, size - len, "\n");
> + }
> + return len;
> +}
> +
> +static ssize_t contains_errors_show(struct ext4_attr *a,
> + struct ext4_sb_info *sbi, char *buf)
> +{
> + return get_fs_errors(sbi, buf, PAGE_SIZE);
> +}
> +
> static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
> struct ext4_sb_info *sbi,
> const char *buf, size_t count)
> @@ -2617,6 +2670,7 @@ static struct ext4_attr ext4_attr_##_name = { \
> EXT4_RO_ATTR(delayed_allocation_blocks);
> EXT4_RO_ATTR(session_write_kbytes);
> EXT4_RO_ATTR(lifetime_write_kbytes);
> +EXT4_RO_ATTR(contains_errors);
> EXT4_RW_ATTR(reserved_clusters);
> EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
> inode_readahead_blks_store, s_inode_readahead_blks);
> @@ -2659,6 +2713,7 @@ static struct attribute *ext4_attrs[] = {
> ATTR_LIST(warning_ratelimit_burst),
> ATTR_LIST(msg_ratelimit_interval_ms),
> ATTR_LIST(msg_ratelimit_burst),
> + ATTR_LIST(contains_errors),
> NULL,
> };
>
> @@ -2792,42 +2847,23 @@ static void print_daily_error_info(unsigned long arg)
> {
> struct super_block *sb = (struct super_block *) arg;
> struct ext4_sb_info *sbi;
> - struct ext4_super_block *es;
> + ssize_t size;
> + char *buf = NULL;
>
> sbi = EXT4_SB(sb);
> - es = sbi->s_es;
>
> - if (es->s_error_count)
> - ext4_msg(sb, KERN_NOTICE, "error count: %u",
> - le32_to_cpu(es->s_error_count));
> - if (es->s_first_error_time) {
> - printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
> - sb->s_id, le32_to_cpu(es->s_first_error_time),
> - (int) sizeof(es->s_first_error_func),
> - es->s_first_error_func,
> - le32_to_cpu(es->s_first_error_line));
> - if (es->s_first_error_ino)
> - printk(": inode %u",
> - le32_to_cpu(es->s_first_error_ino));
> - if (es->s_first_error_block)
> - printk(": block %llu", (unsigned long long)
> - le64_to_cpu(es->s_first_error_block));
> - printk("\n");
> - }
> - if (es->s_last_error_time) {
> - printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
> - sb->s_id, le32_to_cpu(es->s_last_error_time),
> - (int) sizeof(es->s_last_error_func),
> - es->s_last_error_func,
> - le32_to_cpu(es->s_last_error_line));
> - if (es->s_last_error_ino)
> - printk(": inode %u",
> - le32_to_cpu(es->s_last_error_ino));
> - if (es->s_last_error_block)
> - printk(": block %llu", (unsigned long long)
> - le64_to_cpu(es->s_last_error_block));
> - printk("\n");
> + buf = kmalloc(255, GFP_NOFS);
> + if (!buf) {
> + ext4_msg(sb, KERN_NOTICE, "File system contains errors, "
> + "running e2fsck is recommended");
> + return;
> }
> +
> + size = get_fs_errors(sbi, buf, 255);
> + if (size)
> + ext4_msg(sb, KERN_NOTICE, "%s", buf);
> + kfree(buf);
> +
> mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
> }
>
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, May 06, 2014 at 10:43:01AM -0700, Darrick J. Wong wrote:
>
> Minor complaint: "contains" makes me think that cat'ing that file will return
> either 0 or 1, not a string of error text. Perhaps we could shorten it to
> /sys/fs/ext4/sda/errors ?
What I'd suggest doing is simply calling it errors_count, and
returning s_error_count. While we're at it, we could also return
s_first_error_time and s_last_error_time as well, since I imagine
those would could be quite useful for someone trying to create a
system health monitoring daemon.
Cheers,
- Ted
On Tue, 6 May 2014, Theodore Ts'o wrote:
> Date: Tue, 6 May 2014 15:23:26 -0400
> From: Theodore Ts'o <[email protected]>
> To: Darrick J. Wong <[email protected]>
> Cc: Lukas Czerner <[email protected]>, [email protected],
> [email protected]
> Subject: Re: [PATCH] ext4: add sysfs entry showing whether the fs contains
> errors
>
> On Tue, May 06, 2014 at 10:43:01AM -0700, Darrick J. Wong wrote:
> >
> > Minor complaint: "contains" makes me think that cat'ing that file will return
> > either 0 or 1, not a string of error text. Perhaps we could shorten it to
> > /sys/fs/ext4/sda/errors ?
>
> What I'd suggest doing is simply calling it errors_count, and
> returning s_error_count. While we're at it, we could also return
> s_first_error_time and s_last_error_time as well, since I imagine
> those would could be quite useful for someone trying to create a
> system health monitoring daemon.
>
> Cheers,
>
> - Ted
Right now I am using the same function which prints this information
to the log. So I am going to use simpler output for the sysfs.
<error count> <first time> <function>:<line> <last time> <function>:<line>
That should be easy to parse. I think that just calling it errors is
good enough.
Thanks!
-Lukas
Hello Lukáš,
just wanted to ask about the status of this patch.. it didn't make it
to git yet, did it? Do you plan to make some changes to it? I'm eager
to test it ;-)
cheers
nik
On Wed, May 07, 2014 at 10:44:34AM +0200, Lukáš Czerner wrote:
> On Tue, 6 May 2014, Theodore Ts'o wrote:
>
> > Date: Tue, 6 May 2014 15:23:26 -0400
> > From: Theodore Ts'o <[email protected]>
> > To: Darrick J. Wong <[email protected]>
> > Cc: Lukas Czerner <[email protected]>, [email protected],
> > [email protected]
> > Subject: Re: [PATCH] ext4: add sysfs entry showing whether the fs contains
> > errors
> >
> > On Tue, May 06, 2014 at 10:43:01AM -0700, Darrick J. Wong wrote:
> > >
> > > Minor complaint: "contains" makes me think that cat'ing that file will return
> > > either 0 or 1, not a string of error text. Perhaps we could shorten it to
> > > /sys/fs/ext4/sda/errors ?
> >
> > What I'd suggest doing is simply calling it errors_count, and
> > returning s_error_count. While we're at it, we could also return
> > s_first_error_time and s_last_error_time as well, since I imagine
> > those would could be quite useful for someone trying to create a
> > system health monitoring daemon.
> >
> > Cheers,
> >
> > - Ted
>
> Right now I am using the same function which prints this information
> to the log. So I am going to use simpler output for the sysfs.
>
> <error count> <first time> <function>:<line> <last time> <function>:<line>
>
> That should be easy to parse. I think that just calling it errors is
> good enough.
>
> Thanks!
> -Lukas
>
--
-------------------------------------
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28.rijna 168, 709 00 Ostrava
tel.: +420 591 166 214
fax: +420 596 621 273
mobil: +420 777 093 799
http://www.linuxbox.cz
mobil servis: +420 737 238 656
email servis: [email protected]
-------------------------------------
On Tue, 20 May 2014, Nikola Ciprich wrote:
> Date: Tue, 20 May 2014 11:47:41 +0200
> From: Nikola Ciprich <[email protected]>
> To: Luk?? Czerner <[email protected]>
> Cc: Theodore Ts'o <[email protected]>, Darrick J. Wong <[email protected]>,
> [email protected], Nikola Ciprich <[email protected]>
> Subject: Re: [PATCH] ext4: add sysfs entry showing whether the fs contains
> errors
>
> Hello Luk??,
>
> just wanted to ask about the status of this patch.. it didn't make it
> to git yet, did it? Do you plan to make some changes to it? I'm eager
> to test it ;-)
No it did not and it would not given that there was not a merge
window since then. I've been distracted by other work, but I'll put
you in cc when I have a new patch - yes I plan to change the format
of the output to be more compliant with sysfs rules.
-Lukas
>
> cheers
>
> nik
>
>
> On Wed, May 07, 2014 at 10:44:34AM +0200, Luk?? Czerner wrote:
> > On Tue, 6 May 2014, Theodore Ts'o wrote:
> >
> > > Date: Tue, 6 May 2014 15:23:26 -0400
> > > From: Theodore Ts'o <[email protected]>
> > > To: Darrick J. Wong <[email protected]>
> > > Cc: Lukas Czerner <[email protected]>, [email protected],
> > > [email protected]
> > > Subject: Re: [PATCH] ext4: add sysfs entry showing whether the fs contains
> > > errors
> > >
> > > On Tue, May 06, 2014 at 10:43:01AM -0700, Darrick J. Wong wrote:
> > > >
> > > > Minor complaint: "contains" makes me think that cat'ing that file will return
> > > > either 0 or 1, not a string of error text. Perhaps we could shorten it to
> > > > /sys/fs/ext4/sda/errors ?
> > >
> > > What I'd suggest doing is simply calling it errors_count, and
> > > returning s_error_count. While we're at it, we could also return
> > > s_first_error_time and s_last_error_time as well, since I imagine
> > > those would could be quite useful for someone trying to create a
> > > system health monitoring daemon.
> > >
> > > Cheers,
> > >
> > > - Ted
> >
> > Right now I am using the same function which prints this information
> > to the log. So I am going to use simpler output for the sysfs.
> >
> > <error count> <first time> <function>:<line> <last time> <function>:<line>
> >
> > That should be easy to parse. I think that just calling it errors is
> > good enough.
> >
> > Thanks!
> > -Lukas
> >
>
>
On Tue, May 20, 2014 at 11:56:53AM +0200, Lukáš Czerner wrote:
> On Tue, 20 May 2014, Nikola Ciprich wrote:
>
> > Date: Tue, 20 May 2014 11:47:41 +0200
> > From: Nikola Ciprich <[email protected]>
> > To: Lukáš Czerner <[email protected]>
> > Cc: Theodore Ts'o <[email protected]>, Darrick J. Wong <[email protected]>,
> > [email protected], Nikola Ciprich <[email protected]>
> > Subject: Re: [PATCH] ext4: add sysfs entry showing whether the fs contains
> > errors
> >
> > Hello Lukáš,
> >
> > just wanted to ask about the status of this patch.. it didn't make it
> > to git yet, did it? Do you plan to make some changes to it? I'm eager
> > to test it ;-)
>
> No it did not and it would not given that there was not a merge
> window since then. I've been distracted by other work, but I'll put
> you in cc when I have a new patch - yes I plan to change the format
> of the output to be more compliant with sysfs rules.
Hey Lukas,
I've been looking at old patches still in the ext4 patchwork queue,
and I came across this. Just a quick reminder you were planning on
updating this patch at some point when you have time.
Cheers,
- Ted
On Tue, 29 Jul 2014, Theodore Ts'o wrote:
> Date: Tue, 29 Jul 2014 10:49:03 -0400
> From: Theodore Ts'o <[email protected]>
> To: Lukáš Czerner <[email protected]>
> Cc: Nikola Ciprich <[email protected]>,
> Darrick J. Wong <[email protected]>, [email protected]
> Subject: Re: [PATCH] ext4: add sysfs entry showing whether the fs contains
> errors
>
> On Tue, May 20, 2014 at 11:56:53AM +0200, Lukáš Czerner wrote:
> > On Tue, 20 May 2014, Nikola Ciprich wrote:
> >
> > > Date: Tue, 20 May 2014 11:47:41 +0200
> > > From: Nikola Ciprich <[email protected]>
> > > To: Lukáš Czerner <[email protected]>
> > > Cc: Theodore Ts'o <[email protected]>, Darrick J. Wong <[email protected]>,
> > > [email protected], Nikola Ciprich <[email protected]>
> > > Subject: Re: [PATCH] ext4: add sysfs entry showing whether the fs contains
> > > errors
> > >
> > > Hello Lukáš,
> > >
> > > just wanted to ask about the status of this patch.. it didn't make it
> > > to git yet, did it? Do you plan to make some changes to it? I'm eager
> > > to test it ;-)
> >
> > No it did not and it would not given that there was not a merge
> > window since then. I've been distracted by other work, but I'll put
> > you in cc when I have a new patch - yes I plan to change the format
> > of the output to be more compliant with sysfs rules.
>
> Hey Lukas,
>
> I've been looking at old patches still in the ext4 patchwork queue,
> and I came across this. Just a quick reminder you were planning on
> updating this patch at some point when you have time.
>
> Cheers,
Right,
thanks a lot for the remainder it completely slipped out of my mind.
I'll take a look at it once again.
Thanks!
-Lukas
>
> - Ted
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>