2023-09-01 22:43:01

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH] NFS: Fix error handling for O_DIRECT write scheduling

From: Trond Myklebust <[email protected]>

If we fail to schedule a request for transmission, there are 2
possibilities:
1) Either we hit a fatal error, and we just want to drop the remaining
requests on the floor.
2) We were asked to try again, in which case we should allow the
outstanding RPC calls to complete, so that we can recoalesce requests
and try again.

Fixes: d600ad1f2bdb ("NFS41: pop some layoutget errors to application")
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/direct.c | 62 ++++++++++++++++++++++++++++++++++++-------------
1 file changed, 46 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 47d892a1d363..ee88f0a6e7b8 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -528,10 +528,9 @@ nfs_direct_write_scan_commit_list(struct inode *inode,
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
struct nfs_pageio_descriptor desc;
- struct nfs_page *req, *tmp;
+ struct nfs_page *req;
LIST_HEAD(reqs);
struct nfs_commit_info cinfo;
- LIST_HEAD(failed);

nfs_init_cinfo_from_dreq(&cinfo, dreq);
nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
@@ -549,27 +548,36 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;

- list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+ while (!list_empty(&reqs)) {
+ req = nfs_list_entry(reqs.next);
/* Bump the transmission count */
req->wb_nio++;
if (!nfs_pageio_add_request(&desc, req)) {
- nfs_list_move_request(req, &failed);
spin_lock(&cinfo.inode->i_lock);
- dreq->flags = 0;
- if (desc.pg_error < 0)
+ if (dreq->error < 0) {
+ desc.pg_error = dreq->error;
+ } else if (desc.pg_error != -EAGAIN) {
+ dreq->flags = 0;
+ if (!desc.pg_error)
+ desc.pg_error = -EIO;
dreq->error = desc.pg_error;
- else
- dreq->error = -EIO;
+ } else
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
spin_unlock(&cinfo.inode->i_lock);
+ break;
}
nfs_release_request(req);
}
nfs_pageio_complete(&desc);

- while (!list_empty(&failed)) {
- req = nfs_list_entry(failed.next);
+ while (!list_empty(&reqs)) {
+ req = nfs_list_entry(reqs.next);
nfs_list_remove_request(req);
nfs_unlock_and_release_request(req);
+ if (desc.pg_error == -EAGAIN)
+ nfs_mark_request_commit(req, NULL, &cinfo, 0);
+ else
+ nfs_release_request(req);
}

if (put_dreq(dreq))
@@ -794,9 +802,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
+ struct nfs_commit_info cinfo;
ssize_t result = 0;
size_t requested_bytes = 0;
size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
+ bool defer = false;

trace_nfs_direct_write_schedule_iovec(dreq);

@@ -837,17 +847,37 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
break;
}

- nfs_lock_request(req);
- if (!nfs_pageio_add_request(&desc, req)) {
- result = desc.pg_error;
- nfs_unlock_and_release_request(req);
- break;
- }
pgbase = 0;
bytes -= req_len;
requested_bytes += req_len;
pos += req_len;
dreq->bytes_left -= req_len;
+
+ if (defer) {
+ nfs_mark_request_commit(req, NULL, &cinfo, 0);
+ continue;
+ }
+
+ nfs_lock_request(req);
+ if (nfs_pageio_add_request(&desc, req))
+ continue;
+
+ /* Exit on hard errors */
+ if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) {
+ result = desc.pg_error;
+ nfs_unlock_and_release_request(req);
+ break;
+ }
+
+ /* If the error is soft, defer remaining requests */
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ spin_lock(&cinfo.inode->i_lock);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&cinfo.inode->i_lock);
+ nfs_unlock_request(req);
+ nfs_mark_request_commit(req, NULL, &cinfo, 0);
+ desc.pg_error = 0;
+ defer = true;
}
nfs_direct_release_pages(pagevec, npages);
kvfree(pagevec);
--
2.41.0