2022-05-14 14:58:44

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 2/5] NFS: fsync() should report filesystem errors over EINTR/ERESTARTSYS

From: Trond Myklebust <[email protected]>

If the commit to disk is interrupted, we should still first check for
filesystem errors so that we can report them in preference to the error
due to the signal.

Fixes: 2197e9b06c22 ("NFS: Fix up fsync() when the server rebooted")
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/file.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 150b7fa8f0a7..7c380e555224 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -204,15 +204,16 @@ static int
nfs_file_fsync_commit(struct file *file, int datasync)
{
struct inode *inode = file_inode(file);
- int ret;
+ int ret, ret2;

dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);

nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
ret = nfs_commit_inode(inode, FLUSH_SYNC);
- if (ret < 0)
- return ret;
- return file_check_and_advance_wb_err(file);
+ ret2 = file_check_and_advance_wb_err(file);
+ if (ret2 < 0)
+ return ret2;
+ return ret;
}

int
--
2.36.1



2022-05-14 15:32:07

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 3/5] NFS: Don't report ENOSPC write errors twice

From: Trond Myklebust <[email protected]>

Any errors reported by the write() system call need to be cleared from
the file descriptor's error tracking. The current call to nfs_wb_all()
causes the error to be reported, but since it doesn't call
file_check_and_advance_wb_err(), we can end up reporting the same error
a second time when the application calls fsync().

Note that since Linux 4.13, the rule is that EIO may be reported for
write(), but it must be reported by a subsequent fsync(), so let's just
drop reporting it in write.

The check for nfs_ctx_key_to_expire() is just a duplicate to the one
already in nfs_write_end(), so let's drop that too.

Reported-by: ChenXiaoSong <[email protected]>
Fixes: ce368536dd61 ("nfs: nfs_file_write() should check for writeback errors")
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/file.c | 34 ++++++++++++++--------------------
1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7c380e555224..87e4cd5e8fe2 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -598,18 +598,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
.page_mkwrite = nfs_vm_page_mkwrite,
};

-static int nfs_need_check_write(struct file *filp, struct inode *inode,
- int error)
-{
- struct nfs_open_context *ctx;
-
- ctx = nfs_file_open_context(filp);
- if (nfs_error_is_fatal_on_server(error) ||
- nfs_ctx_key_to_expire(ctx, inode))
- return 1;
- return 0;
-}
-
ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
@@ -637,7 +625,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
result = nfs_revalidate_file_size(inode, file);
if (result)
- goto out;
+ return result;
}

nfs_clear_invalid_mapping(file->f_mapping);
@@ -656,6 +644,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)

written = result;
iocb->ki_pos += written;
+ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);

if (mntflags & NFS_MOUNT_WRITE_EAGER) {
result = filemap_fdatawrite_range(file->f_mapping,
@@ -673,17 +662,22 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
}
result = generic_write_sync(iocb, written);
if (result < 0)
- goto out;
+ return result;

+out:
/* Return error values */
error = filemap_check_wb_err(file->f_mapping, since);
- if (nfs_need_check_write(file, inode, error)) {
- int err = nfs_wb_all(inode);
- if (err < 0)
- result = err;
+ switch (error) {
+ default:
+ break;
+ case -EDQUOT:
+ case -EFBIG:
+ case -ENOSPC:
+ nfs_wb_all(inode);
+ error = file_check_and_advance_wb_err(file);
+ if (error < 0)
+ result = error;
}
- nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
-out:
return result;

out_swapfile:
--
2.36.1


2022-05-14 21:55:57

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 4/5] NFS: Do not report flush errors in nfs_write_end()

From: Trond Myklebust <[email protected]>

If we do flush cached writebacks in nfs_write_end() due to the imminent
expiration of an RPCSEC_GSS session, then we should defer reporting any
resulting errors until the calls to file_check_and_advance_wb_err() in
nfs_file_write() and nfs_file_fsync().

Fixes: 6fbda89b257f ("NFS: Replace custom error reporting mechanism with generic one")
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/file.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 87e4cd5e8fe2..3f17748eaf29 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -386,11 +386,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
return status;
NFS_I(mapping->host)->write_io += copied;

- if (nfs_ctx_key_to_expire(ctx, mapping->host)) {
- status = nfs_wb_all(mapping->host);
- if (status < 0)
- return status;
- }
+ if (nfs_ctx_key_to_expire(ctx, mapping->host))
+ nfs_wb_all(mapping->host);

return copied;
}
--
2.36.1


2022-06-15 02:46:35

by ChenXiaoSong

[permalink] [raw]
Subject: Re: [PATCH v3 3/5] NFS: Don't report ENOSPC write errors twice

Hi Trond:

If old writeback (such as -ERESTARTSYS or -EINTR, etc.) exist in struct
address_space->wb_err, nfs_file_write() will always return the
unexpected error. filemap_check_wb_err() will return the old error
even if there is no new writeback error between filemap_sample_wb_err()
and filemap_check_wb_err().

```c
since = filemap_sample_wb_err() = 0 // never be error
errseq_sample
if (!(old & ERRSEQ_SEEN)) // nobody see the error
return 0;
nfs_wb_all // no new error
error = filemap_check_wb_err(..., since) != 0 // unexpected error
```

By the way, the following process have redundant code in nfs_file_write():

```c
if (mntflags & NFS_MOUNT_WRITE_WAIT) {
result = filemap_fdatawait_range(file->f_mapping,
iocb->ki_pos - written,
iocb->ki_pos - 1);
if (result < 0)
goto out;
}
```

filemap_fdatawait_range() will always return 0, since patch 6c984083ec24
("NFS: Use of mapping_set_error() results in spurious errors") do not
save the error in struct address_space->flags:

filemap_fdatawait_range(file->f_mapping, ...) = 0
filemap_check_errors(mapping) = 0
test_bit(..., &mapping->flags) // flags always is 0

So the return value result is always 0, `if (result < 0)` is redundant

在 2022/5/14 22:27, [email protected] 写道:
> From: Trond Myklebust <[email protected]>
>
> Any errors reported by the write() system call need to be cleared from
> the file descriptor's error tracking. The current call to nfs_wb_all()
> causes the error to be reported, but since it doesn't call
> file_check_and_advance_wb_err(), we can end up reporting the same error
> a second time when the application calls fsync().
>
> Note that since Linux 4.13, the rule is that EIO may be reported for
> write(), but it must be reported by a subsequent fsync(), so let's just
> drop reporting it in write.
>
> The check for nfs_ctx_key_to_expire() is just a duplicate to the one
> already in nfs_write_end(), so let's drop that too.
>
> Reported-by: ChenXiaoSong <[email protected]>
> Fixes: ce368536dd61 ("nfs: nfs_file_write() should check for writeback errors")
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/file.c | 34 ++++++++++++++--------------------
> 1 file changed, 14 insertions(+), 20 deletions(-)
>
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index 7c380e555224..87e4cd5e8fe2 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -598,18 +598,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
> .page_mkwrite = nfs_vm_page_mkwrite,
> };
>
> -static int nfs_need_check_write(struct file *filp, struct inode *inode,
> - int error)
> -{
> - struct nfs_open_context *ctx;
> -
> - ctx = nfs_file_open_context(filp);
> - if (nfs_error_is_fatal_on_server(error) ||
> - nfs_ctx_key_to_expire(ctx, inode))
> - return 1;
> - return 0;
> -}
> -
> ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
> {
> struct file *file = iocb->ki_filp;
> @@ -637,7 +625,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
> if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
> result = nfs_revalidate_file_size(inode, file);
> if (result)
> - goto out;
> + return result;
> }
>
> nfs_clear_invalid_mapping(file->f_mapping);
> @@ -656,6 +644,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
>
> written = result;
> iocb->ki_pos += written;
> + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
>
> if (mntflags & NFS_MOUNT_WRITE_EAGER) {
> result = filemap_fdatawrite_range(file->f_mapping,
> @@ -673,17 +662,22 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
> }
> result = generic_write_sync(iocb, written);
> if (result < 0)
> - goto out;
> + return result;
>
> +out:
> /* Return error values */
> error = filemap_check_wb_err(file->f_mapping, since);
> - if (nfs_need_check_write(file, inode, error)) {
> - int err = nfs_wb_all(inode);
> - if (err < 0)
> - result = err;
> + switch (error) {
> + default:
> + break;
> + case -EDQUOT:
> + case -EFBIG:
> + case -ENOSPC:
> + nfs_wb_all(inode);
> + error = file_check_and_advance_wb_err(file);
> + if (error < 0)
> + result = error;
> }
> - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
> -out:
> return result;
>
> out_swapfile:
>