2023-03-25 07:02:57

by zhanchengbin

[permalink] [raw]
Subject: [PATCH 0/2] Add some msg for io error

If there is an EIO during the process of fsck, the user can be notified of it.

zhanchengbin (2):
lib/ext2fs: add error handle in unix_flush and unix_write_byte
e2fsck: add sync error handle to e2fsck.

e2fsck/ehandler.c | 24 ++++++++++++++++++++++++
lib/ext2fs/ext2_io.h | 2 ++
lib/ext2fs/unix_io.c | 37 ++++++++++++++++++++++++++-----------
3 files changed, 52 insertions(+), 11 deletions(-)

--
2.31.1


2023-03-25 07:02:58

by zhanchengbin

[permalink] [raw]
Subject: [PATCH 2/2] e2fsck: add sync error handle to e2fsck.

If fsync fails during fsck, it is silent and users will not perceive it, so
a function to handle fsync failure should be added to fsck.

Signed-off-by: zhanchengbin <[email protected]>
---
e2fsck/ehandler.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)

diff --git a/e2fsck/ehandler.c b/e2fsck/ehandler.c
index 71ca301c..ae35f3ef 100644
--- a/e2fsck/ehandler.c
+++ b/e2fsck/ehandler.c
@@ -118,6 +118,29 @@ static errcode_t e2fsck_handle_write_error(io_channel channel,
return error;
}

+static errcode_t e2fsck_handle_sync_error(io_channel channel,
+ errcode_t error)
+{
+ ext2_filsys fs = (ext2_filsys) channel->app_data;
+ e2fsck_t ctx;
+
+ ctx = (e2fsck_t) fs->priv_data;
+ if (ctx->flags & E2F_FLAG_EXITING)
+ return 0;
+
+ if (operation)
+ printf(_("Error sync (%s) while %s. "),
+ error_message(error), operation);
+ else
+ printf(_("Error sync (%s). "),
+ error_message(error));
+ preenhalt(ctx);
+ if (ask(ctx, _("Ignore error"), 1))
+ return 0;
+
+ return error;
+}
+
const char *ehandler_operation(const char *op)
{
const char *ret = operation;
@@ -130,4 +153,5 @@ void ehandler_init(io_channel channel)
{
channel->read_error = e2fsck_handle_read_error;
channel->write_error = e2fsck_handle_write_error;
+ channel->sync_error = e2fsck_handle_sync_error;
}
--
2.31.1

2023-03-25 07:03:01

by zhanchengbin

[permalink] [raw]
Subject: [PATCH 1/2] lib/ext2fs: add error handle in unix_flush and unix_write_byte

As you can see, a new error handling has been added for fsync, and the error
handling for unix_write_byte function has reused the error handling of
write_blk.

Signed-off-by: zhanchengbin <[email protected]>
---
lib/ext2fs/ext2_io.h | 2 ++
lib/ext2fs/unix_io.c | 37 ++++++++++++++++++++++++++-----------
2 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/lib/ext2fs/ext2_io.h b/lib/ext2fs/ext2_io.h
index 679184e3..becd7078 100644
--- a/lib/ext2fs/ext2_io.h
+++ b/lib/ext2fs/ext2_io.h
@@ -56,6 +56,8 @@ struct struct_io_channel {
size_t size,
int actual_bytes_written,
errcode_t error);
+ errcode_t (*sync_error)(io_channel channel,
+ errcode_t error);
int refcount;
int flags;
long reserved[14];
diff --git a/lib/ext2fs/unix_io.c b/lib/ext2fs/unix_io.c
index 3171c736..283b4eb6 100644
--- a/lib/ext2fs/unix_io.c
+++ b/lib/ext2fs/unix_io.c
@@ -1250,7 +1250,8 @@ static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
#ifdef ALIGN_DEBUG
printf("unix_write_byte: O_DIRECT fallback\n");
#endif
- return EXT2_ET_UNIMPLEMENTED;
+ retval = EXT2_ET_UNIMPLEMENTED;
+ goto error_out;
}

#ifndef NO_IO_CACHE
@@ -1258,19 +1259,30 @@ static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
* Flush out the cache completely
*/
if ((retval = flush_cached_blocks(channel, data, FLUSH_INVALIDATE)))
- return retval;
+ goto error_out;
#endif

- if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
- return errno;
+ if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0) {
+ retval = errno;
+ goto error_out;
+ }

actual = write(data->dev, buf, size);
- if (actual < 0)
- return errno;
- if (actual != size)
- return EXT2_ET_SHORT_WRITE;
-
+ if (actual < 0) {
+ retval = errno;
+ goto error_out;
+ }
+ if (actual != size) {
+ retval = EXT2_ET_SHORT_WRITE;
+ goto error_out;
+ }
return 0;
+error_out:
+ if (channel->write_error)
+ retval = (channel->write_error)(channel,
+ offset / channel->block_size, 0, buf,
+ size, actual, retval);
+ return retval;
}

/*
@@ -1289,8 +1301,11 @@ static errcode_t unix_flush(io_channel channel)
retval = flush_cached_blocks(channel, data, 0);
#endif
#ifdef HAVE_FSYNC
- if (!retval && fsync(data->dev) != 0)
- return errno;
+ if (!retval && fsync(data->dev) != 0) {
+ if (channel->sync_error)
+ retval = (channel->sync_error)(channel, errno);
+ return retval;
+ }
#endif
return retval;
}
--
2.31.1

2023-03-25 17:34:00

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH 2/2] e2fsck: add sync error handle to e2fsck.

On Sat, Mar 25, 2023 at 02:56:52PM +0800, zhanchengbin wrote:
> If fsync fails during fsck, it is silent and users will not perceive it, so
> a function to handle fsync failure should be added to fsck.
>
> Signed-off-by: zhanchengbin <[email protected]>
> ---
> e2fsck/ehandler.c | 24 ++++++++++++++++++++++++
> 1 file changed, 24 insertions(+)
>
> diff --git a/e2fsck/ehandler.c b/e2fsck/ehandler.c
> index 71ca301c..ae35f3ef 100644
> --- a/e2fsck/ehandler.c
> +++ b/e2fsck/ehandler.c
> @@ -118,6 +118,29 @@ static errcode_t e2fsck_handle_write_error(io_channel channel,
> return error;
> }
>
> +static errcode_t e2fsck_handle_sync_error(io_channel channel,
> + errcode_t error)
> +{
> + ext2_filsys fs = (ext2_filsys) channel->app_data;
> + e2fsck_t ctx;
> +
> + ctx = (e2fsck_t) fs->priv_data;
> + if (ctx->flags & E2F_FLAG_EXITING)
> + return 0;
> +

Nit: ^^^ unnecessary indentation

> + if (operation)
> + printf(_("Error sync (%s) while %s. "),

I think we should be more explicit that *fsync* failed:

"Error during fsync of dirty metadata while %s: %s",
operation, error_message(...)?


> + error_message(error), operation);
> + else
> + printf(_("Error sync (%s). "),
> + error_message(error));
> + preenhalt(ctx);
> + if (ask(ctx, _("Ignore error"), 1))

ask_yn()?

Not sure what we're asking about here, or what happens if you answer NO?
Unless we're using a redo file, dirty metadata flush has failed so we
might as well keep going, right?

--D

> + return 0;
> +
> + return error;
> +}
> +
> const char *ehandler_operation(const char *op)
> {
> const char *ret = operation;
> @@ -130,4 +153,5 @@ void ehandler_init(io_channel channel)
> {
> channel->read_error = e2fsck_handle_read_error;
> channel->write_error = e2fsck_handle_write_error;
> + channel->sync_error = e2fsck_handle_sync_error;
> }
> --
> 2.31.1
>

2023-03-26 14:33:44

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 0/2] Add some msg for io error

On Sat, Mar 25, 2023 at 02:56:50PM +0800, zhanchengbin wrote:
> If there is an EIO during the process of fsck, the user can be notified of it.

Can you identify a code path where the user is *not* getting notified
while e2fsck is running without this patch series?

The unix_io.c module calls fsync() through unix_flush() only. When
unix_write_byte() calls flush_cached blocks(), if the read or write
system call fails, the error will be returned to the caller of
flush_cached_byte(), and the unix_write_byte() will return the error
back to the caller (in this case, e2fsck).

So in both cases, e2fsck checks the error return from ext2fs_flush()
(which is the only place where write_byte gets called) and
io_channel->flush(), and so the user will get some kind of error
report already.

The error message might not identify exactly what I/O failed, but the
"Error sync" message that this commit series provides is not going to
be much better.

- Ted

2023-03-30 03:00:13

by zhanchengbin

[permalink] [raw]
Subject: Re: [PATCH 0/2] Add some msg for io error


On 2023/3/26 22:31, Theodore Ts'o wrote:
> On Sat, Mar 25, 2023 at 02:56:50PM +0800, zhanchengbin wrote:
>> If there is an EIO during the process of fsck, the user can be notified of it.
>
> Can you identify a code path where the user is *not* getting notified
> while e2fsck is running without this patch series?
>
> The unix_io.c module calls fsync() through unix_flush() only. When
> unix_write_byte() calls flush_cached blocks(), if the read or write
> system call fails, the error will be returned to the caller of
> flush_cached_byte(), and the unix_write_byte() will return the error
> back to the caller (in this case, e2fsck).
>

io_channel_flush and io_channel_write_byte do have return values, but
they may not necessarily be checked at their calling points. As in the
following path:

e2fsck_run_ext3_journal
ext2fs_flush // Ignore errors.
ext2fs_flush2
io_channel_flush
ext2fs_mmp_stop // Ignore errors.
ext2fs_mmp_write
io_channel_flush

ext2fs_flush // Many calls ignore errors.
ext2fs_flush2
write_primary_superblock
io_channel_write_byte

Thanks,
- bin.

> So in both cases, e2fsck checks the error return from ext2fs_flush()
> (which is the only place where write_byte gets called) and
> io_channel->flush(), and so the user will get some kind of error
> report already.
>
> The error message might not identify exactly what I/O failed, but the
> "Error sync" message that this commit series provides is not going to
> be much better.
>
> - Ted
>
> .
>

2023-03-30 03:05:04

by zhanchengbin

[permalink] [raw]
Subject: Re: [PATCH 2/2] e2fsck: add sync error handle to e2fsck.

Thank you for your advice. I will modify patches.
- bin.

On 2023/3/26 1:13, Darrick J. Wong wrote:
> On Sat, Mar 25, 2023 at 02:56:52PM +0800, zhanchengbin wrote:
>> If fsync fails during fsck, it is silent and users will not perceive it, so
>> a function to handle fsync failure should be added to fsck.
>>
>> Signed-off-by: zhanchengbin <[email protected]>
>> ---
>> e2fsck/ehandler.c | 24 ++++++++++++++++++++++++
>> 1 file changed, 24 insertions(+)
>>
>> diff --git a/e2fsck/ehandler.c b/e2fsck/ehandler.c
>> index 71ca301c..ae35f3ef 100644
>> --- a/e2fsck/ehandler.c
>> +++ b/e2fsck/ehandler.c
>> @@ -118,6 +118,29 @@ static errcode_t e2fsck_handle_write_error(io_channel channel,
>> return error;
>> }
>>
>> +static errcode_t e2fsck_handle_sync_error(io_channel channel,
>> + errcode_t error)
>> +{
>> + ext2_filsys fs = (ext2_filsys) channel->app_data;
>> + e2fsck_t ctx;
>> +
>> + ctx = (e2fsck_t) fs->priv_data;
>> + if (ctx->flags & E2F_FLAG_EXITING)
>> + return 0;
>> +
>
> Nit: ^^^ unnecessary indentation
>
>> + if (operation)
>> + printf(_("Error sync (%s) while %s. "),
>
> I think we should be more explicit that *fsync* failed:
>
> "Error during fsync of dirty metadata while %s: %s",
> operation, error_message(...)?
>
>
>> + error_message(error), operation);
>> + else
>> + printf(_("Error sync (%s). "),
>> + error_message(error));
>> + preenhalt(ctx);
>> + if (ask(ctx, _("Ignore error"), 1))
>
> ask_yn()?
>
> Not sure what we're asking about here, or what happens if you answer NO?
> Unless we're using a redo file, dirty metadata flush has failed so we
> might as well keep going, right?
>
> --D
>
>> + return 0;
>> +
>> + return error;
>> +}
>> +
>> const char *ehandler_operation(const char *op)
>> {
>> const char *ret = operation;
>> @@ -130,4 +153,5 @@ void ehandler_init(io_channel channel)
>> {
>> channel->read_error = e2fsck_handle_read_error;
>> channel->write_error = e2fsck_handle_write_error;
>> + channel->sync_error = e2fsck_handle_sync_error;
>> }
>> --
>> 2.31.1
>>
> .
>