2022-02-14 10:42:58

by Sweet Tea Dorminy

[permalink] [raw]
Subject: [PATCH] btrfs: add fs state details to error messages.

When a filesystem goes read-only due to an error, multiple errors tend
to be reported, some of which are knock-on failures. Logging some
fs_states, if any, in btrfs_handle_fs_error() and btrfs_printk()
helps distinguish the first error from subsequent messages which may
only exist due to an error state.

Under the new format, most initial errors will look like:
`BTRFS: error (device loop0) in ...`
while subsequent errors will begin with:
`error (device loop0: state E) in ...`

An initial transaction abort error will look like
`error (device loop0: state X) in ...`
and subsequent messages will contain
`(device loop0: state EX) in ...`

Signed-off-by: Sweet Tea Dorminy <[email protected]>
---
fs/btrfs/super.c | 49 +++++++++++++++++++++++++++++++++++++++---------
1 file changed, 40 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 33cfc9e27451..d0e81eb48eac 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,6 +66,31 @@ static struct file_system_type btrfs_root_fs_type;

static int btrfs_remount(struct super_block *sb, int *flags, char *data);

+#define STATE_STRING_PREFACE ": state "
+#define MAX_STATE_CHARS 2
+
+static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
+{
+ unsigned long state = info->fs_state;
+ char *curr = buf;
+
+ memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
+ curr += sizeof(STATE_STRING_PREFACE) - 1;
+
+ /* If more states are reported, update MAX_STATE_CHARS also */
+ if (test_and_clear_bit(BTRFS_FS_STATE_ERROR, &state))
+ *curr++ = 'E';
+
+ if (test_and_clear_bit(BTRFS_FS_STATE_TRANS_ABORTED, &state))
+ *curr++ = 'X';
+
+ /* If no states were printed, reset the buffer */
+ if (state == info->fs_state)
+ curr = buf;
+
+ *curr++ = '\0';
+}
+
/*
* Generally the error codes correspond to their respective errors, but there
* are a few special cases.
@@ -128,6 +153,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
{
struct super_block *sb = fs_info->sb;
#ifdef CONFIG_PRINTK
+ char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
const char *errstr;
#endif

@@ -136,10 +162,11 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
* under SB_RDONLY, then it is safe here.
*/
if (errno == -EROFS && sb_rdonly(sb))
- return;
+ return;

#ifdef CONFIG_PRINTK
errstr = btrfs_decode_error(errno);
+ btrfs_state_to_string(fs_info, statestr);
if (fmt) {
struct va_format vaf;
va_list args;
@@ -148,12 +175,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
vaf.fmt = fmt;
vaf.va = &args;

- pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
- sb->s_id, function, line, errno, errstr, &vaf);
+ pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
+ sb->s_id, statestr, function, line, errno, errstr, &vaf);
va_end(args);
} else {
- pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
- sb->s_id, function, line, errno, errstr);
+ pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
+ sb->s_id, statestr, function, line, errno, errstr);
}
#endif

@@ -240,11 +267,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
vaf.va = &args;

if (__ratelimit(ratelimit)) {
- if (fs_info)
- printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
- fs_info->sb->s_id, &vaf);
- else
+ if (fs_info) {
+ char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
+
+ btrfs_state_to_string(fs_info, statestr);
+ printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
+ fs_info->sb->s_id, statestr, &vaf);
+ } else {
printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
+ }
}

va_end(args);
--
2.30.2


2022-02-15 16:05:19

by David Sterba

[permalink] [raw]
Subject: Re: [PATCH] btrfs: add fs state details to error messages.

On Sat, Feb 12, 2022 at 02:10:42PM -0500, Sweet Tea Dorminy wrote:
> When a filesystem goes read-only due to an error, multiple errors tend
> to be reported, some of which are knock-on failures. Logging some
> fs_states, if any, in btrfs_handle_fs_error() and btrfs_printk()
> helps distinguish the first error from subsequent messages which may
> only exist due to an error state.
>
> Under the new format, most initial errors will look like:
> `BTRFS: error (device loop0) in ...`
> while subsequent errors will begin with:
> `error (device loop0: state E) in ...`
>
> An initial transaction abort error will look like
> `error (device loop0: state X) in ...`
> and subsequent messages will contain
> `(device loop0: state EX) in ...`
>
> Signed-off-by: Sweet Tea Dorminy <[email protected]>
> ---
> fs/btrfs/super.c | 49 +++++++++++++++++++++++++++++++++++++++---------
> 1 file changed, 40 insertions(+), 9 deletions(-)
>
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 33cfc9e27451..d0e81eb48eac 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -66,6 +66,31 @@ static struct file_system_type btrfs_root_fs_type;
>
> static int btrfs_remount(struct super_block *sb, int *flags, char *data);
>
> +#define STATE_STRING_PREFACE ": state "
> +#define MAX_STATE_CHARS 2
> +
> +static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
> +{
> + unsigned long state = info->fs_state;
> + char *curr = buf;
> +
> + memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
> + curr += sizeof(STATE_STRING_PREFACE) - 1;
> +
> + /* If more states are reported, update MAX_STATE_CHARS also */
> + if (test_and_clear_bit(BTRFS_FS_STATE_ERROR, &state))

The state is supposed to be sticky, so can't be cleared. Also as I read
the suggested change, the "state: " should be visible for all messages
that are printed after the filesystem state changes.

> + *curr++ = 'E';
> +
> + if (test_and_clear_bit(BTRFS_FS_STATE_TRANS_ABORTED, &state))
> + *curr++ = 'X';
> +
> + /* If no states were printed, reset the buffer */
> + if (state == info->fs_state)
> + curr = buf;
> +
> + *curr++ = '\0';
> +}
> +
> /*
> * Generally the error codes correspond to their respective errors, but there
> * are a few special cases.
> @@ -128,6 +153,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
> {
> struct super_block *sb = fs_info->sb;
> #ifdef CONFIG_PRINTK
> + char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
> const char *errstr;
> #endif
>
> @@ -136,10 +162,11 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
> * under SB_RDONLY, then it is safe here.
> */
> if (errno == -EROFS && sb_rdonly(sb))
> - return;
> + return;

Unnecessary change.

>
> #ifdef CONFIG_PRINTK
> errstr = btrfs_decode_error(errno);
> + btrfs_state_to_string(fs_info, statestr);
> if (fmt) {
> struct va_format vaf;
> va_list args;
> @@ -148,12 +175,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
> vaf.fmt = fmt;
> vaf.va = &args;
>
> - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
> - sb->s_id, function, line, errno, errstr, &vaf);
> + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
> + sb->s_id, statestr, function, line, errno, errstr, &vaf);

Alternatively the state message can be built into the message itself so
it does not require the extra array.


pr_crit("btrfs: error(device %s%s%s%s) ...",
sb->s_id,
statebits ? ", state" : "",
test_bit(FS_ERRROR) ? "E" : "",
test_bit(TRANS_ABORT) ? "T" : "", ...);

This is the idea, final code can use some wrappers around the bit
constatnts.


> va_end(args);
> } else {
> - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
> - sb->s_id, function, line, errno, errstr);
> + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
> + sb->s_id, statestr, function, line, errno, errstr);

Filling the temporary array makes sense as it's used twice, however I'm
not sure if the 'else' branch is ever executed.

2022-02-16 03:22:10

by Sweet Tea Dorminy

[permalink] [raw]
Subject: Re: [PATCH] btrfs: add fs state details to error messages.


>> +static void btrfs_state_to_string(const struct btrfs_fs_info *info,
>> char *buf)
>> +{
>> + unsigned long state = info->fs_state;
>> + char *curr = buf;
>> +
>> + memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
>> + curr += sizeof(STATE_STRING_PREFACE) - 1;
>> +
>> + /* If more states are reported, update MAX_STATE_CHARS also */
>> + if (test_and_clear_bit(BTRFS_FS_STATE_ERROR, &state))
>
> The state is supposed to be sticky, so can't be cleared. Also as I read
> the suggested change, the "state: " should be visible for all messages
> that are printed after the filesystem state changes.

'state' is a local copy of info->fs_state, so clearing bits on the local
copy should be okay, and the "state: " will be present for everything
after the fs state changes. Could instead use test_bit(&info->fs_state)
and keep a count of how many states were printed (to know if we need to
reset the buffer) if that is clearer.
>
>> + *curr++ = 'E';
>> +
>> + if (test_and_clear_bit(BTRFS_FS_STATE_TRANS_ABORTED, &state))
>> + *curr++ = 'X';
>> +
>> + /* If no states were printed, reset the buffer */
>> + if (state == info->fs_state)
>> + curr = buf;
>> +
>> + *curr++ = '\0';
>> +}
>> +
>> /*
>> * Generally the error codes correspond to their respective errors,
>> but there
>> * are a few special cases.
>> @@ -128,6 +153,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info
>> *fs_info, const char *function
>> {
>> struct super_block *sb = fs_info->sb;
>> #ifdef CONFIG_PRINTK
>> + char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
>> const char *errstr;
>> #endif
>>
>> @@ -136,10 +162,11 @@ void __btrfs_handle_fs_error(struct
>> btrfs_fs_info *fs_info, const char *function
>> * under SB_RDONLY, then it is safe here.
>> */
>> if (errno == -EROFS && sb_rdonly(sb))
>> - return;
>> + return;
>
> Unnecessary change.
Yeah, there's a stray space at the start of that line, but I can take
out fixing it.
>
>>
>> #ifdef CONFIG_PRINTK
>> errstr = btrfs_decode_error(errno);
>> + btrfs_state_to_string(fs_info, statestr);
>> if (fmt) {
>> struct va_format vaf;
>> va_list args;
>> @@ -148,12 +175,12 @@ void __btrfs_handle_fs_error(struct
>> btrfs_fs_info *fs_info, const char *function
>> vaf.fmt = fmt;
>> vaf.va = &args;
>>
>> - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
>> - sb->s_id, function, line, errno, errstr, &vaf);
>> + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
>> + sb->s_id, statestr, function, line, errno, errstr, &vaf);
>
> Alternatively the state message can be built into the message itself so
> it does not require the extra array.
>
>
> pr_crit("btrfs: error(device %s%s%s%s) ...",
> sb->s_id,
> statebits ? ", state" : "",
> test_bit(FS_ERRROR) ? "E" : "",
> test_bit(TRANS_ABORT) ? "T" : "", ...);
>
> This is the idea, final code can use some wrappers around the bit
> constatnts.
>
>
>> va_end(args);
>> } else {
>> - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
>> - sb->s_id, function, line, errno, errstr);
>> + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
>> + sb->s_id, statestr, function, line, errno, errstr);
>
> Filling the temporary array makes sense as it's used twice, however I'm
> not sure if the 'else' branch is ever executed.
There are a bunch of calls with NULL format -> else branch,
unfortunately.

Thanks!