2021-03-23 05:49:47

by Nagendra Tomar

[permalink] [raw]
Subject: [PATCH 2/5] nfs: Add mount option for forcing RPC requests to one file over one connection

From: Nagendra S Tomar <[email protected]>

Adds a new mount option ncpolicy=roundrobin|hash which allows user to
select the nconnect policy for the given mount. Defaults to roundrobin.
We store the user selected policy inside the rpc_clnt structure and
pass it down to the RPC client where the transport selection can be
accordingly done.
Also adds a new function pointer p_fhhash to struct rpc_procinfo.
This can be supplied to find the target file's hash for the given RPC
which will then be used to affine RPCs for a file to one xprt.

Signed-off-by: Nagendra S Tomar <[email protected]>
---
fs/nfs/client.c | 3 +++
fs/nfs/fs_context.c | 26 ++++++++++++++++++++++++++
fs/nfs/internal.h | 2 ++
fs/nfs/nfs3client.c | 4 +++-
fs/nfs/nfs4client.c | 14 +++++++++++---
fs/nfs/super.c | 7 ++++++-
include/linux/nfs_fs_sb.h | 1 +
include/linux/sunrpc/clnt.h | 15 +++++++++++++++
net/sunrpc/clnt.c | 34 ++++++++++++++++++++++++++++------
9 files changed, 95 insertions(+), 11 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ff5c4d0d6d13..5c2809d8368a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)

clp->cl_proto = cl_init->proto;
clp->cl_nconnect = cl_init->nconnect;
+ clp->cl_ncpolicy = cl_init->ncpolicy;
clp->cl_net = get_net(cl_init->net);

clp->cl_principal = "*";
@@ -506,6 +507,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
.net = clp->cl_net,
.protocol = clp->cl_proto,
.nconnect = clp->cl_nconnect,
+ .ncpolicy = clp->cl_ncpolicy,
.address = (struct sockaddr *)&clp->cl_addr,
.addrsize = clp->cl_addrlen,
.timeout = cl_init->timeparms,
@@ -678,6 +680,7 @@ static int nfs_init_server(struct nfs_server *server,
.timeparms = &timeparms,
.cred = server->cred,
.nconnect = ctx->nfs_server.nconnect,
+ .ncpolicy = ctx->nfs_server.ncpolicy,
.init_flags = (1UL << NFS_CS_REUSEPORT),
};
struct nfs_client *clp;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 971a9251c1d9..7bb8f1c8356f 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -60,6 +60,7 @@ enum nfs_param {
Opt_mountvers,
Opt_namelen,
Opt_nconnect,
+ Opt_ncpolicy,
Opt_port,
Opt_posix,
Opt_proto,
@@ -127,6 +128,18 @@ static const struct constant_table nfs_param_enums_write[] = {
{}
};

+enum {
+ Opt_ncpolicy_roundrobin,
+ Opt_ncpolicy_hash,
+};
+
+static const struct constant_table nfs_param_enums_ncpolicy[] = {
+ { "hash", Opt_ncpolicy_hash },
+ { "roundrobin", Opt_ncpolicy_roundrobin },
+ { "rr", Opt_ncpolicy_roundrobin },
+ {}
+};
+
static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_flag_no("ac", Opt_ac),
fsparam_u32 ("acdirmax", Opt_acdirmax),
@@ -158,6 +171,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_u32 ("mountvers", Opt_mountvers),
fsparam_u32 ("namlen", Opt_namelen),
fsparam_u32 ("nconnect", Opt_nconnect),
+ fsparam_enum ("ncpolicy", Opt_ncpolicy, nfs_param_enums_ncpolicy),
fsparam_string("nfsvers", Opt_vers),
fsparam_u32 ("port", Opt_port),
fsparam_flag_no("posix", Opt_posix),
@@ -749,6 +763,18 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
goto out_of_bounds;
ctx->nfs_server.nconnect = result.uint_32;
break;
+ case Opt_ncpolicy:
+ switch (result.uint_32) {
+ case Opt_ncpolicy_roundrobin:
+ ctx->nfs_server.ncpolicy = ncpolicy_roundrobin;
+ break;
+ case Opt_ncpolicy_hash:
+ ctx->nfs_server.ncpolicy = ncpolicy_hash;
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ break;
case Opt_lookupcache:
switch (result.uint_32) {
case Opt_lookupcache_all:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7b644d6c09e4..e6ca664d7e91 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -67,6 +67,7 @@ struct nfs_client_initdata {
int proto;
u32 minorversion;
unsigned int nconnect;
+ enum ncpolicy ncpolicy;
struct net *net;
const struct rpc_timeout *timeparms;
const struct cred *cred;
@@ -120,6 +121,7 @@ struct nfs_fs_context {
int port;
unsigned short protocol;
unsigned short nconnect;
+ enum ncpolicy ncpolicy;
unsigned short export_path_len;
} nfs_server;

diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 5601e47360c2..f8a648f7492a 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -102,8 +102,10 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;

- if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
+ if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
cl_init.nconnect = mds_clp->cl_nconnect;
+ cl_init.ncpolicy = mds_clp->cl_ncpolicy;
+ }

if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 889a9f4c0310..c967c214129a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -863,6 +863,7 @@ static int nfs4_set_client(struct nfs_server *server,
const char *ip_addr,
int proto, const struct rpc_timeout *timeparms,
u32 minorversion, unsigned int nconnect,
+ enum ncpolicy ncpolicy,
struct net *net)
{
struct nfs_client_initdata cl_init = {
@@ -881,8 +882,10 @@ static int nfs4_set_client(struct nfs_server *server,

if (minorversion == 0)
__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
- if (proto == XPRT_TRANSPORT_TCP)
+ if (proto == XPRT_TRANSPORT_TCP) {
cl_init.nconnect = nconnect;
+ cl_init.ncpolicy = ncpolicy;
+ }

if (server->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -950,8 +953,10 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;

- if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
+ if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
cl_init.nconnect = mds_clp->cl_nconnect;
+ cl_init.ncpolicy = mds_clp->cl_ncpolicy;
+ }

if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -1120,6 +1125,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
&timeparms,
ctx->minorversion,
ctx->nfs_server.nconnect,
+ ctx->nfs_server.ncpolicy,
fc->net_ns);
if (error < 0)
return error;
@@ -1209,6 +1215,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
parent_server->client->cl_timeout,
parent_client->cl_mvops->minor_version,
parent_client->cl_nconnect,
+ parent_client->cl_ncpolicy,
parent_client->cl_net);
if (!error)
goto init_server;
@@ -1224,6 +1231,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
parent_server->client->cl_timeout,
parent_client->cl_mvops->minor_version,
parent_client->cl_nconnect,
+ parent_client->cl_ncpolicy,
parent_client->cl_net);
if (error < 0)
goto error;
@@ -1321,7 +1329,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
error = nfs4_set_client(server, hostname, sap, salen, buf,
clp->cl_proto, clnt->cl_timeout,
clp->cl_minorversion,
- clp->cl_nconnect, net);
+ clp->cl_nconnect, clp->cl_ncpolicy, net);
clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
if (error != 0) {
nfs_server_insert_lists(server);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 94885c6f8f54..8719be70051b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -481,8 +481,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
seq_printf(m, ",proto=%s",
rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID));
rcu_read_unlock();
- if (clp->cl_nconnect > 0)
+ if (clp->cl_nconnect > 0) {
seq_printf(m, ",nconnect=%u", clp->cl_nconnect);
+ if (clp->cl_ncpolicy == ncpolicy_roundrobin)
+ seq_puts(m, ",ncpolicy=roundrobin");
+ else if (clp->cl_ncpolicy == ncpolicy_hash)
+ seq_puts(m, ",ncpolicy=hash");
+ }
if (version == 4) {
if (nfss->port != NFS_PORT)
seq_printf(m, ",port=%u", nfss->port);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6f76b32a0238..737f4d231e23 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -62,6 +62,7 @@ struct nfs_client {

u32 cl_minorversion;/* NFSv4 minorversion */
unsigned int cl_nconnect; /* Number of connections */
+ enum ncpolicy cl_ncpolicy; /* nconnect policy */
const char * cl_principal; /* used for machine cred */

#if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 02e7a5863d28..aa1c1706f4d5 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -28,6 +28,15 @@
#include <net/ipv6.h>
#include <linux/sunrpc/xprtmultipath.h>

+/*
+ * Policies for controlling distribution of RPC requests over multiple
+ * nconnect connections.
+ */
+enum ncpolicy {
+ ncpolicy_roundrobin, // Select roundrobin.
+ ncpolicy_hash, // Select based on target filehandle hash.
+};
+
struct rpc_inode;

/*
@@ -40,6 +49,7 @@ struct rpc_clnt {
struct list_head cl_tasks; /* List of tasks */
spinlock_t cl_lock; /* spinlock */
struct rpc_xprt __rcu * cl_xprt; /* transport */
+ enum ncpolicy cl_ncpolicy; /* nconnect policy */
const struct rpc_procinfo *cl_procinfo; /* procedure info */
u32 cl_prog, /* RPC program number */
cl_vers, /* RPC version number */
@@ -101,6 +111,8 @@ struct rpc_version {
unsigned int *counts; /* call counts */
};

+typedef u32 (*getfhhash_t)(const void *obj);
+
/*
* Procedure information
*/
@@ -108,6 +120,7 @@ struct rpc_procinfo {
u32 p_proc; /* RPC procedure number */
kxdreproc_t p_encode; /* XDR encode function */
kxdrdproc_t p_decode; /* XDR decode function */
+ getfhhash_t p_fhhash; /* Returns target fh hash */
unsigned int p_arglen; /* argument hdr length (u32) */
unsigned int p_replen; /* reply hdr length (u32) */
unsigned int p_timer; /* Which RTT timer to use */
@@ -129,6 +142,7 @@ struct rpc_create_args {
u32 version;
rpc_authflavor_t authflavor;
u32 nconnect;
+ enum ncpolicy ncpolicy;
unsigned long flags;
char *client_name;
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
@@ -247,4 +261,5 @@ static inline void rpc_task_close_connection(struct rpc_task *task)
if (task->tk_xprt)
xprt_force_disconnect(task->tk_xprt);
}
+
#endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1b2a02460601..ed470a75e91d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -410,6 +410,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
}

rpc_clnt_set_transport(clnt, xprt, timeout);
+ clnt->cl_ncpolicy = args->ncpolicy;
xprt_iter_init(&clnt->cl_xpi, xps);
xprt_switch_put(xps);

@@ -640,6 +641,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
new->cl_principal = clnt->cl_principal;
+ new->cl_ncpolicy = clnt->cl_ncpolicy;
return new;

out_err:
@@ -1053,9 +1055,10 @@ rpc_task_get_first_xprt(struct rpc_clnt *clnt)
}

static struct rpc_xprt *
-rpc_task_get_next_xprt(struct rpc_clnt *clnt)
+rpc_task_get_next_xprt(struct rpc_clnt *clnt, u32 hash)
{
- return rpc_task_get_xprt(clnt, xprt_iter_get_next(&clnt->cl_xpi, 0));
+ return rpc_task_get_xprt(clnt,
+ xprt_iter_get_next(&clnt->cl_xpi, hash));
}

static
@@ -1065,8 +1068,16 @@ void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
return;
if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
task->tk_xprt = rpc_task_get_first_xprt(clnt);
- else
- task->tk_xprt = rpc_task_get_next_xprt(clnt);
+ else {
+ u32 xprt_hint = 0;
+
+ if (clnt->cl_ncpolicy == ncpolicy_hash &&
+ task->tk_msg.rpc_proc->p_fhhash) {
+ xprt_hint = task->tk_msg.rpc_proc->p_fhhash(
+ task->tk_msg.rpc_argp);
+ }
+ task->tk_xprt = rpc_task_get_next_xprt(clnt, xprt_hint);
+ }
}

static
@@ -1130,8 +1141,8 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
if (!RPC_IS_ASYNC(task))
task->tk_flags |= RPC_TASK_CRED_NOREF;

- rpc_task_set_client(task, task_setup_data->rpc_client);
rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
+ rpc_task_set_client(task, task_setup_data->rpc_client);

if (task->tk_action == NULL)
rpc_call_start(task);
@@ -1636,6 +1647,7 @@ call_start(struct rpc_task *task)
/* Increment call count (version might not be valid for ping) */
if (clnt->cl_program->version[clnt->cl_vers])
clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
+
clnt->cl_stats->rpccnt++;
task->tk_action = call_reserve;
rpc_task_set_transport(task, clnt);
@@ -2888,7 +2900,17 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
connect_timeout,
reconnect_timeout);

- rpc_xprt_switch_set_roundrobin(xps);
+ switch (clnt->cl_ncpolicy) {
+ case ncpolicy_roundrobin:
+ default:
+ WARN_ON(clnt->cl_ncpolicy != ncpolicy_roundrobin);
+ rpc_xprt_switch_set_roundrobin(xps);
+ break;
+ case ncpolicy_hash:
+ rpc_xprt_switch_set_hash(xps);
+ break;
+ }
+
if (setup) {
ret = setup(clnt, xps, xprt, data);
if (ret != 0)