LinuxLists.cc - [PATCH 0/2] Two miscellaneous NFS patches.

2014-09-18 06:10:00

Subject: [PATCH 0/2] Two miscellaneous NFS patches.

These are two unrelated patches that I found while tidying up :-)
One has been sitting, unloved, for over a year... (though I did
mention it in a recent email - I'm resending it for simplicity).

Both fix performance issues in unusual situations.
The first allows an 'unlink' to complete in 100ms where it will
take 15 seconds in the current kernel.
The second uses non-STABLE writes in a case where STABLE writes
are current used but not needed, resulting in reduced throughput.

Thanks,
NeilBrown

---

NeilBrown (2):
NFSv4: use exponential retry on NFS4ERR_DELAY for async requests.
NFS: don't use STABLE writes during writeback.

fs/nfs/nfs4proc.c | 65 ++++++++++++++++++++++++++++++-----------------
fs/nfs/write.c | 7 ++++-
include/linux/nfs_xdr.h | 2 +
3 files changed, 49 insertions(+), 25 deletions(-)

--
Signature

2014-09-18 06:10:03

by NeilBrown

[permalink] [raw]

Subject: [PATCH 1/2] NFSv4: use exponential retry on NFS4ERR_DELAY for async requests.

Currently asynchronous NFSv4 request will be retried with
exponential timeout (from 1/10 to 15 seconds), but async
requests will always use a 15second retry.

Some "async" requests are really synchronous though. The
async mechanism is used to allow the request to continue if
the requesting process is killed.
In those cases, an exponential retry is appropriate.

For example, if two different clients both open a file and
get a READ delegation, and one client then unlinks the file
(while still holding an open file descriptor), that unlink
will used the "silly-rename" handling which is async.
The first rename will result in NFS4ERR_DELAY while the
delegation is reclaimed from the other client. The rename
will not be retried for 15 seconds, causing an unlink to take
15 seconds rather than 100msec.

This patch only added exponential timeout for async unlink and
async rename. Other async calls, such as 'close' are sometimes
waited for so they might benefit from exponential timeout too.

Signed-off-by: NeilBrown <[email protected]>
---
fs/nfs/nfs4proc.c | 65 ++++++++++++++++++++++++++++++-----------------
include/linux/nfs_xdr.h | 2 +
2 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ac2dd953fc18..882196adbfe7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -77,7 +77,7 @@ struct nfs4_opendata;
static int _nfs4_proc_open(struct nfs4_opendata *data);
static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
+static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *);
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label);
@@ -314,20 +314,30 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
kunmap_atomic(start);
}

+static long nfs4_update_delay(long *timeout)
+{
+ long ret;
+ if (!timeout)
+ return NFS4_POLL_RETRY_MAX;
+ if (*timeout <= 0)
+ *timeout = NFS4_POLL_RETRY_MIN;
+ if (*timeout > NFS4_POLL_RETRY_MAX)
+ *timeout = NFS4_POLL_RETRY_MAX;
+ ret = *timeout;
+ *timeout <<= 1;
+ return ret;
+}
+
static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
{
int res = 0;

might_sleep();

- if (*timeout <= 0)
- *timeout = NFS4_POLL_RETRY_MIN;
- if (*timeout > NFS4_POLL_RETRY_MAX)
- *timeout = NFS4_POLL_RETRY_MAX;
- freezable_schedule_timeout_killable_unsafe(*timeout);
+ freezable_schedule_timeout_killable_unsafe(
+ nfs4_update_delay(timeout));
if (fatal_signal_pending(current))
res = -ERESTARTSYS;
- *timeout <<= 1;
return res;
}

@@ -2589,7 +2599,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0)
break;
default:
- if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
goto out_release;
}
@@ -3576,7 +3586,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)

if (!nfs4_sequence_done(task, &res->seq_res))
return 0;
- if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+ if (nfs4_async_handle_error(task, res->server, NULL,
+ &data->timeout) == -EAGAIN)
return 0;
update_changeattr(dir, &res->cinfo);
return 1;
@@ -3609,7 +3620,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,

if (!nfs4_sequence_done(task, &res->seq_res))
return 0;
- if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+ if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN)
return 0;

update_changeattr(old_dir, &res->old_cinfo);
@@ -4113,7 +4124,8 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)

trace_nfs4_read(hdr, task->tk_status);
if (nfs4_async_handle_error(task, server,
- hdr->args.context->state) == -EAGAIN) {
+ hdr->args.context->state,
+ NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -4181,10 +4193,11 @@ static int nfs4_write_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
-
+
trace_nfs4_write(hdr, task->tk_status);
if (nfs4_async_handle_error(task, NFS_SERVER(inode),
- hdr->args.context->state) == -EAGAIN) {
+ hdr->args.context->state,
+ NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -4264,7 +4277,8 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da
struct inode *inode = data->inode;

trace_nfs4_commit(data, task->tk_status);
- if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, NFS_SERVER(inode),
+ NULL, NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -4817,7 +4831,8 @@ out:

static int
-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
+ struct nfs4_state *state, long *timeout)
{
struct nfs_client *clp = server->nfs_client;

@@ -4867,6 +4882,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
#endif /* CONFIG_NFS_V4_1 */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
+ rpc_delay(task, nfs4_update_delay(timeout));
+ goto restart_call;
case -NFS4ERR_GRACE:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
case -NFS4ERR_RETRY_UNCACHED_REP:
@@ -5107,8 +5124,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
pnfs_roc_set_barrier(data->inode, data->roc_barrier);
break;
default:
- if (nfs4_async_handle_error(task, data->res.server, NULL) ==
- -EAGAIN) {
+ if (nfs4_async_handle_error(task, data->res.server,
+ NULL, NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
return;
}
@@ -5372,7 +5389,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
case -NFS4ERR_EXPIRED:
break;
default:
- if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
+ if (nfs4_async_handle_error(task, calldata->server,
+ NULL, NULL) == -EAGAIN)
rpc_restart_call_prepare(task);
}
nfs_release_seqid(calldata->arg.seqid);
@@ -5978,7 +5996,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
break;
case -NFS4ERR_LEASE_MOVED:
case -NFS4ERR_DELAY:
- if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
+ if (nfs4_async_handle_error(task, server,
+ NULL, NULL) == -EAGAIN)
rpc_restart_call_prepare(task);
}
}
@@ -7590,7 +7609,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
pnfs_free_lseg_list(&head);
}
}
- if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
+ if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
rpc_restart_call_prepare(task);
out:
dprintk("<-- %s\n", __func__);
@@ -7750,7 +7769,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
case 0:
break;
case -NFS4ERR_DELAY:
- if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN)
+ if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
break;
rpc_restart_call_prepare(task);
return;
@@ -7929,7 +7948,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
case 0:
break;
default:
- if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
return;
}
@@ -8225,7 +8244,7 @@ static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata)

switch (task->tk_status) {
case -NFS4ERR_DELAY:
- if (nfs4_async_handle_error(task, data->server, NULL) == -EAGAIN)
+ if (nfs4_async_handle_error(task, data->server, NULL, NULL) == -EAGAIN)
rpc_restart_call_prepare(task);
}
}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0040629894df..9e02174af87e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1346,6 +1346,7 @@ struct nfs_unlinkdata {
struct inode *dir;
struct rpc_cred *cred;
struct nfs_fattr dir_attr;
+ long timeout;
};

struct nfs_renamedata {
@@ -1359,6 +1360,7 @@ struct nfs_renamedata {
struct dentry *new_dentry;
struct nfs_fattr new_fattr;
void (*complete)(struct rpc_task *, struct nfs_renamedata *);
+ long timeout;
};

struct nfs_access_entry;

2014-09-18 06:10:09

by NeilBrown

[permalink] [raw]

Subject: [PATCH 2/2] NFS: don't use STABLE writes during writeback.

commit b31268ac793fd300da66b9c28bbf0a200339ab96
FS: Use stable writes when not doing a bulk flush

was a bit heavy handed.
The particular problem that lead to this patch was that
small writes to an O_SYNC file we being written as UNSTABLE writes
followed by a commit.
This is appropriate for large writes (which require multiple NFS
requests) but for small writes (single NFS request), using
NFS_FILE_SYNC is more efficient.

So that patch causes the code to select between the two methods
depending on how many nfs requests get generated.

Unfortunately this ends up applying to non O_SYNC writes as well.
In particular if you memory-map a file and update random pages, then
when they are eventually written out by writeback they will go as
NFS_FILE_SYNC. This is inefficient and slows down the application.

So: only set FLUSH_COND_STABLE when wbc->sync_mode is WB_SYNC_ALL.
With this patch:
O_SYNC writes are NFS_FILE_SYNC for single requests, and NFS_UNSTABLE
followed by COMMIT for multiple requests
Writing immediately before close of fsync follow the same pattern.
Non-O_SYNC writes without an fsync of close eventually get flushed
out as UNSTABLE and a commit follows eventually as appropriate.

Signed-off-by: NeilBrown <[email protected]>
---
fs/nfs/write.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3066c7fcb565..8d4aae9d977a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -271,11 +271,14 @@ static void nfs_mark_uptodate(struct nfs_page *req)

static int wb_priority(struct writeback_control *wbc)
{
+ int ret = 0;
if (wbc->for_reclaim)
return FLUSH_HIGHPRI | FLUSH_STABLE;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ ret = FLUSH_COND_STABLE;
if (wbc->for_kupdate || wbc->for_background)
- return FLUSH_LOWPRI | FLUSH_COND_STABLE;
- return FLUSH_COND_STABLE;
+ ret |= FLUSH_LOWPRI;
+ return ret;
}

/*