2022-02-19 00:53:38

by Sweet Tea Dorminy

[permalink] [raw]
Subject: [PATCH v2] btrfs: add fs state details to error messages.

When a filesystem goes read-only due to an error, multiple errors tend
to be reported, some of which are knock-on failures. Logging some
fs_states, if any, in btrfs_handle_fs_error() and btrfs_printk()
helps distinguish the first error from subsequent messages which may
only exist due to an error state.

Under the new format, most initial errors will look like:
`BTRFS: error (device loop0) in ...`
while subsequent errors will begin with:
`error (device loop0: state E) in ...`

An initial transaction abort error will look like
`error (device loop0: state X) in ...`
and subsequent messages will contain
`(device loop0: state EX) in ...`

Signed-off-by: Sweet Tea Dorminy <[email protected]>
---
v2:
- Changed btrfs_state_to_string() for clarity
- Removed superfluous whitespace change

v1:
- https://lore.kernel.org/linux-btrfs/[email protected]/

fs/btrfs/super.c | 53 ++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 45 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4d947ba32da9..42429d68950d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,6 +66,37 @@ static struct file_system_type btrfs_root_fs_type;

static int btrfs_remount(struct super_block *sb, int *flags, char *data);

+#define STATE_STRING_PREFACE ": state "
+#define MAX_STATE_CHARS 2
+
+static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
+{
+ unsigned int states_printed = 0;
+ char *curr = buf;
+
+ memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
+ curr += sizeof(STATE_STRING_PREFACE) - 1;
+
+ /* If more states are reported, update MAX_STATE_CHARS also */
+ if (test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
+ *curr++ = 'E';
+ states_printed++;
+ }
+
+ if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &info->fs_state)) {
+ *curr++ = 'X';
+ states_printed++;
+ }
+
+ /* If no states were printed, reset the buffer */
+ if (!states_printed)
+ curr = buf;
+
+ WARN_ON_ONCE(states_printed > MAX_STATE_CHARS);
+
+ *curr++ = '\0';
+}
+
/*
* Generally the error codes correspond to their respective errors, but there
* are a few special cases.
@@ -128,6 +159,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
{
struct super_block *sb = fs_info->sb;
#ifdef CONFIG_PRINTK
+ char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
const char *errstr;
#endif

@@ -140,6 +172,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function

#ifdef CONFIG_PRINTK
errstr = btrfs_decode_error(errno);
+ btrfs_state_to_string(fs_info, statestr);
if (fmt) {
struct va_format vaf;
va_list args;
@@ -148,12 +181,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
vaf.fmt = fmt;
vaf.va = &args;

- pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
- sb->s_id, function, line, errno, errstr, &vaf);
+ pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
+ sb->s_id, statestr, function, line, errno, errstr, &vaf);
va_end(args);
} else {
- pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
- sb->s_id, function, line, errno, errstr);
+ pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
+ sb->s_id, statestr, function, line, errno, errstr);
}
#endif

@@ -240,11 +273,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
vaf.va = &args;

if (__ratelimit(ratelimit)) {
- if (fs_info)
- printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
- fs_info->sb->s_id, &vaf);
- else
+ if (fs_info) {
+ char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
+
+ btrfs_state_to_string(fs_info, statestr);
+ printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
+ fs_info->sb->s_id, statestr, &vaf);
+ } else {
printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
+ }
}

va_end(args);
--
2.35.1


2022-02-22 03:20:55

by David Sterba

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: add fs state details to error messages.

On Fri, Feb 18, 2022 at 02:04:29PM -0500, Sweet Tea Dorminy wrote:
> When a filesystem goes read-only due to an error, multiple errors tend
> to be reported, some of which are knock-on failures. Logging some
> fs_states, if any, in btrfs_handle_fs_error() and btrfs_printk()
> helps distinguish the first error from subsequent messages which may
> only exist due to an error state.
>
> Under the new format, most initial errors will look like:
> `BTRFS: error (device loop0) in ...`
> while subsequent errors will begin with:
> `error (device loop0: state E) in ...`
>
> An initial transaction abort error will look like
> `error (device loop0: state X) in ...`
> and subsequent messages will contain
> `(device loop0: state EX) in ...`

I think there should be also BTRFS_FS_STATE_LOG_CLEANUP_ERROR, and the
letter maybe more hinting of the error:

E - generic error
A - transaction abort
L - log related errors

>
> Signed-off-by: Sweet Tea Dorminy <[email protected]>
> ---
> v2:
> - Changed btrfs_state_to_string() for clarity
> - Removed superfluous whitespace change
>
> v1:
> - https://lore.kernel.org/linux-btrfs/[email protected]/
>
> fs/btrfs/super.c | 53 ++++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 45 insertions(+), 8 deletions(-)
>
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 4d947ba32da9..42429d68950d 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -66,6 +66,37 @@ static struct file_system_type btrfs_root_fs_type;
>
> static int btrfs_remount(struct super_block *sb, int *flags, char *data);
>
> +#define STATE_STRING_PREFACE ": state "
> +#define MAX_STATE_CHARS 2
> +
> +static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
> +{
> + unsigned int states_printed = 0;
> + char *curr = buf;
> +
> + memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
> + curr += sizeof(STATE_STRING_PREFACE) - 1;
> +
> + /* If more states are reported, update MAX_STATE_CHARS also */
> + if (test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
> + *curr++ = 'E';
> + states_printed++;
> + }
> +
> + if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &info->fs_state)) {
> + *curr++ = 'X';
> + states_printed++;
> + }
> +

So if we have more than 2 it may make sense to create a table and not to
opencode it like that and then use the constants (MAX_STATE_CHARS)
everywhere. For first implementation it's probably ok to have it like
three ifs.

> + /* If no states were printed, reset the buffer */
> + if (!states_printed)
> + curr = buf;
> +
> + WARN_ON_ONCE(states_printed > MAX_STATE_CHARS);
> +
> + *curr++ = '\0';
> +}
> +
> /*
> * Generally the error codes correspond to their respective errors, but there
> * are a few special cases.
> @@ -128,6 +159,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
> {
> struct super_block *sb = fs_info->sb;
> #ifdef CONFIG_PRINTK
> + char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];

A size defintion like that could be also predefined.

> const char *errstr;
> #endif
>
> @@ -140,6 +172,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
>
> #ifdef CONFIG_PRINTK
> errstr = btrfs_decode_error(errno);
> + btrfs_state_to_string(fs_info, statestr);
> if (fmt) {
> struct va_format vaf;
> va_list args;
> @@ -148,12 +181,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
> vaf.fmt = fmt;
> vaf.va = &args;
>
> - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
> - sb->s_id, function, line, errno, errstr, &vaf);
> + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
> + sb->s_id, statestr, function, line, errno, errstr, &vaf);
> va_end(args);
> } else {
> - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
> - sb->s_id, function, line, errno, errstr);
> + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
> + sb->s_id, statestr, function, line, errno, errstr);
> }
> #endif
>
> @@ -240,11 +273,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
> vaf.va = &args;
>
> if (__ratelimit(ratelimit)) {
> - if (fs_info)
> - printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
> - fs_info->sb->s_id, &vaf);
> - else
> + if (fs_info) {
> + char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
> +
> + btrfs_state_to_string(fs_info, statestr);
> + printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
> + fs_info->sb->s_id, statestr, &vaf);
> + } else {
> printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
> + }
> }
>
> va_end(args);
> --
> 2.35.1