2021-02-15 17:43:44

by Dan Aloni

[permalink] [raw]
Subject: [PATCH v1 0/8] sysfs files for multipath transport control

Hi Anna,

This patchset builds ontop v2 of your 'sysfs files for changing IP' changeset.

- The patchset adds two more sysfs objects, for one for transport and another
for multipath.
- Also, `net` renamed to `client`, and `client` now has symlink to its principal
transport. A client also has a symlink to its `multipath` object.
- The transport interface lets you change `dstaddr` of individual transports,
when `nconnect` is used (or if it wasn't used and these were added with the
new interface).
- The interface to add a transport is using a single string written to 'add',
for example:

echo 'dstaddr 192.168.40.8 kind rdma' \
> /sys/kernel/sunrpc/client/0/multipath/add

These changes are independent of the method used to obtain a sunrpc ID for a
mountpoint. For that I've sent a concept patch showing an fspick-based
implementation: https://marc.info/?l=linux-nfs&m=161332454821849&w=4

Thanks

Dan Aloni (8):
sunrpc: rename 'net' to 'client'
sunrpc: add xprt id
sunrpc: add a directory per sunrpc xprt
sunrpc: have client directory a symlink to the root transport
sunrpc: add IDs to multipath
sunrpc: add multipath directory and symlink from client
sunrpc: change rpc_clnt_add_xprt() to rpc_add_xprt()
sunrpc: introduce an 'add' node to 'multipath' sysfs directory

fs/nfs/pnfs_nfs.c | 12 +-
include/linux/sunrpc/clnt.h | 12 +-
include/linux/sunrpc/xprt.h | 3 +
include/linux/sunrpc/xprtmultipath.h | 6 +
net/sunrpc/clnt.c | 39 +--
net/sunrpc/sunrpc_syms.c | 2 +
net/sunrpc/sysfs.c | 403 +++++++++++++++++++++++----
net/sunrpc/sysfs.h | 21 +-
net/sunrpc/xprt.c | 29 ++
net/sunrpc/xprtmultipath.c | 37 +++
10 files changed, 487 insertions(+), 77 deletions(-)

--
2.26.2


2021-02-15 17:43:53

by Dan Aloni

[permalink] [raw]
Subject: [PATCH v1 6/8] sunrpc: add multipath directory and symlink from client

This also adds a `list` attribute to multipath directory that provides
the transport IDs of the transports contained in the multipath object.

Signed-off-by: Dan Aloni <[email protected]>
---
include/linux/sunrpc/xprtmultipath.h | 2 +
net/sunrpc/sysfs.c | 122 +++++++++++++++++++++++++++
net/sunrpc/sysfs.h | 8 ++
net/sunrpc/xprtmultipath.c | 10 +++
4 files changed, 142 insertions(+)

diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h
index ef95a6f18ccf..2d0832dc10f5 100644
--- a/include/linux/sunrpc/xprtmultipath.h
+++ b/include/linux/sunrpc/xprtmultipath.h
@@ -24,6 +24,8 @@ struct rpc_xprt_switch {

const struct rpc_xprt_iter_ops *xps_iter_ops;

+ void *xps_sysfs; /* /sys/kernel/sunrpc/multipath/<id> */
+
struct rcu_head xps_rcu;
};

diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index ae608235d7e0..3592f3b862b2 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -10,6 +10,7 @@

struct kobject *rpc_client_kobj;
struct kobject *rpc_xprt_kobj;
+struct kobject *rpc_xps_kobj;
static struct kset *rpc_sunrpc_kset;

static void rpc_netns_object_release(struct kobject *kobj)
@@ -58,8 +59,15 @@ int rpc_sysfs_init(void)
if (!rpc_xprt_kobj)
goto err_client;

+ rpc_xps_kobj = rpc_netns_object_alloc("multipath", rpc_sunrpc_kset, NULL);
+ if (!rpc_xps_kobj)
+ goto err_xprt;
+
return 0;

+err_xprt:
+ kobject_put(rpc_xprt_kobj);
+ rpc_xprt_kobj = NULL;
err_client:
kobject_put(rpc_client_kobj);
rpc_client_kobj = NULL;
@@ -95,6 +103,7 @@ static struct kobj_type rpc_netns_client_type = {

void rpc_sysfs_exit(void)
{
+ kobject_put(rpc_xps_kobj);
kobject_put(rpc_xprt_kobj);
kobject_put(rpc_client_kobj);
kset_unregister(rpc_sunrpc_kset);
@@ -122,17 +131,29 @@ void rpc_netns_client_sysfs_setup(struct rpc_clnt *clnt, struct net *net)
struct rpc_netns_client *rpc_client;
struct rpc_xprt *xprt = rcu_dereference(clnt->cl_xprt);
struct rpc_netns_xprt *rpc_xprt;
+ struct rpc_netns_multipath *rpc_multipath;
+ struct rpc_xprt_switch *xps;
int ret;

+ xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+
rpc_client = rpc_netns_client_alloc(rpc_client_kobj, net, clnt->cl_clid);
if (rpc_client) {
rpc_xprt = xprt->sysfs;
ret = sysfs_create_link_nowarn(&rpc_client->kobject,
&rpc_xprt->kobject, "transport");
+ if (xps) {
+ rpc_multipath = xps->xps_sysfs;
+ ret = sysfs_create_link_nowarn(&rpc_client->kobject,
+ &rpc_multipath->kobject,
+ "multipath");
+ }
clnt->cl_sysfs = rpc_client;
rpc_client->clnt = clnt;
kobject_uevent(&rpc_client->kobject, KOBJ_ADD);
}
+
+ xprt_switch_put(xps);
}

void rpc_netns_client_sysfs_destroy(struct rpc_clnt *clnt)
@@ -141,6 +162,7 @@ void rpc_netns_client_sysfs_destroy(struct rpc_clnt *clnt)

if (rpc_client) {
sysfs_remove_link(&rpc_client->kobject, "transport");
+ sysfs_remove_link(&rpc_client->kobject, "multipath");
kobject_uevent(&rpc_client->kobject, KOBJ_REMOVE);
kobject_del(&rpc_client->kobject);
kobject_put(&rpc_client->kobject);
@@ -255,3 +277,103 @@ void rpc_netns_xprt_sysfs_destroy(struct rpc_xprt *xprt)
xprt->sysfs = NULL;
}
}
+
+static void rpc_netns_multipath_release(struct kobject *kobj)
+{
+ struct rpc_netns_multipath *c;
+
+ c = container_of(kobj, struct rpc_netns_multipath, kobject);
+ kfree(c);
+}
+
+static const void *rpc_netns_multipath_namespace(struct kobject *kobj)
+{
+ return container_of(kobj, struct rpc_netns_multipath, kobject)->net;
+}
+
+static ssize_t rpc_netns_multipath_list_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct rpc_netns_multipath *c =
+ container_of(kobj, struct rpc_netns_multipath, kobject);
+ struct rpc_xprt_switch *xps = c->xps;
+ struct rpc_xprt_iter xpi;
+ int pos = 0;
+
+ xprt_iter_init_listall(&xpi, xps);
+ for (;;) {
+ struct rpc_xprt *xprt = xprt_iter_get_next(&xpi);
+ if (!xprt)
+ break;
+
+ snprintf(&buf[pos], PAGE_SIZE - pos, "%d\n", xprt->id);
+ pos += strlen(&buf[pos]);
+ xprt_put(xprt);
+ }
+ xprt_iter_destroy(&xpi);
+
+ return pos;
+}
+
+static ssize_t rpc_netns_multipath_list_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ return -EINVAL;
+}
+
+static struct kobj_attribute rpc_netns_multipath_list = __ATTR(list,
+ 0644, rpc_netns_multipath_list_show, rpc_netns_multipath_list_store);
+
+
+static struct attribute *rpc_netns_multipath_attrs[] = {
+ &rpc_netns_multipath_list.attr,
+ NULL,
+};
+
+static struct kobj_type rpc_netns_multipath_type = {
+ .release = rpc_netns_multipath_release,
+ .default_attrs = rpc_netns_multipath_attrs,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .namespace = rpc_netns_multipath_namespace,
+};
+
+static struct rpc_netns_multipath *rpc_netns_multipath_alloc(struct kobject *parent,
+ struct net *net, int id)
+{
+ struct rpc_netns_multipath *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (p) {
+ p->net = net;
+ p->kobject.kset = rpc_sunrpc_kset;
+ if (kobject_init_and_add(&p->kobject, &rpc_netns_multipath_type,
+ parent, "%d", id) == 0)
+ return p;
+ kobject_put(&p->kobject);
+ }
+ return NULL;
+}
+
+void rpc_netns_multipath_sysfs_setup(struct rpc_xprt_switch *xps, struct net *net)
+{
+ struct rpc_netns_multipath *rpc_multipath;
+
+ rpc_multipath = rpc_netns_multipath_alloc(rpc_xps_kobj, net, xps->xps_id);
+ if (rpc_multipath) {
+ xps->xps_sysfs = rpc_multipath;
+ rpc_multipath->xps = xps;
+ kobject_uevent(&rpc_multipath->kobject, KOBJ_ADD);
+ }
+}
+
+void rpc_netns_multipath_sysfs_destroy(struct rpc_xprt_switch *xps)
+{
+ struct rpc_netns_multipath *rpc_multipath = xps->xps_sysfs;
+
+ if (rpc_multipath) {
+ kobject_uevent(&rpc_multipath->kobject, KOBJ_REMOVE);
+ kobject_del(&rpc_multipath->kobject);
+ kobject_put(&rpc_multipath->kobject);
+ xps->xps_sysfs = NULL;
+ }
+}
diff --git a/net/sunrpc/sysfs.h b/net/sunrpc/sysfs.h
index e08dd7f6a1ec..b2e379f78b91 100644
--- a/net/sunrpc/sysfs.h
+++ b/net/sunrpc/sysfs.h
@@ -17,6 +17,12 @@ struct rpc_netns_xprt {
struct rpc_xprt *xprt;
};

+struct rpc_netns_multipath {
+ struct kobject kobject;
+ struct net *net;
+ struct rpc_xprt_switch *xps;
+};
+
extern struct kobject *rpc_client_kobj;
extern struct kobject *rpc_xprt_kobj;

@@ -27,5 +33,7 @@ void rpc_netns_client_sysfs_setup(struct rpc_clnt *clnt, struct net *net);
void rpc_netns_client_sysfs_destroy(struct rpc_clnt *clnt);
void rpc_netns_xprt_sysfs_setup(struct rpc_xprt *xprt, struct net *net);
void rpc_netns_xprt_sysfs_destroy(struct rpc_xprt *xprt);
+void rpc_netns_multipath_sysfs_setup(struct rpc_xprt_switch *xps, struct net *net);
+void rpc_netns_multipath_sysfs_destroy(struct rpc_xprt_switch *xps);

#endif
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 52a9584b23af..d03fb3bb74ce 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -19,6 +19,8 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/xprtmultipath.h>

+#include "sysfs.h"
+
typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps,
const struct rpc_xprt *cur);

@@ -83,6 +85,9 @@ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
spin_lock(&xps->xps_lock);
xprt_switch_remove_xprt_locked(xps, xprt);
spin_unlock(&xps->xps_lock);
+
+ if (!xps->xps_net)
+ rpc_netns_multipath_sysfs_destroy(xps);
xprt_put(xprt);
}

@@ -135,6 +140,10 @@ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
INIT_LIST_HEAD(&xps->xps_xprt_list);
xps->xps_iter_ops = &rpc_xprt_iter_singular;
xprt_switch_add_xprt_locked(xps, xprt);
+ xps->xps_sysfs = NULL;
+
+ if (xprt->xprt_net != NULL)
+ rpc_netns_multipath_sysfs_setup(xps, xprt->xprt_net);
}

return xps;
@@ -162,6 +171,7 @@ static void xprt_switch_free(struct kref *kref)
struct rpc_xprt_switch, xps_kref);

xprt_switch_free_entries(xps);
+ rpc_netns_multipath_sysfs_destroy(xps);
xprt_switch_free_id(xps);
kfree_rcu(xps, xps_rcu);
}
--
2.26.2

2021-03-04 06:32:48

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 0/8] sysfs files for multipath transport control

Hi Dan,

On Mon, Feb 15, 2021 at 12:43 PM Dan Aloni <[email protected]> wrote:
>
> Hi Anna,
>
> This patchset builds ontop v2 of your 'sysfs files for changing IP' changeset.
>
> - The patchset adds two more sysfs objects, for one for transport and another
> for multipath.
> - Also, `net` renamed to `client`, and `client` now has symlink to its principal
> transport. A client also has a symlink to its `multipath` object.
> - The transport interface lets you change `dstaddr` of individual transports,
> when `nconnect` is used (or if it wasn't used and these were added with the
> new interface).
> - The interface to add a transport is using a single string written to 'add',
> for example:
>
> echo 'dstaddr 192.168.40.8 kind rdma' \
> > /sys/kernel/sunrpc/client/0/multipath/add
>
> These changes are independent of the method used to obtain a sunrpc ID for a
> mountpoint. For that I've sent a concept patch showing an fspick-based
> implementation: https://marc.info/?l=linux-nfs&m=161332454821849&w=4

I'm confused: does this allow adding arbitrary connections between a
client and some server IP to an existing RPC client? Given the above
description, that's how it reads to me, can you clarify please. I
thought it was something specifically for v3 (because it has no
concept of trunking). As for NFSv4 there is a notion of getting server
locations via FS_LOCATION and doing trunking (ie multipathing)? I
don't see how this code restricts the addition of transports to v3.

>
> Thanks
>
> Dan Aloni (8):
> sunrpc: rename 'net' to 'client'
> sunrpc: add xprt id
> sunrpc: add a directory per sunrpc xprt
> sunrpc: have client directory a symlink to the root transport
> sunrpc: add IDs to multipath
> sunrpc: add multipath directory and symlink from client
> sunrpc: change rpc_clnt_add_xprt() to rpc_add_xprt()
> sunrpc: introduce an 'add' node to 'multipath' sysfs directory
>
> fs/nfs/pnfs_nfs.c | 12 +-
> include/linux/sunrpc/clnt.h | 12 +-
> include/linux/sunrpc/xprt.h | 3 +
> include/linux/sunrpc/xprtmultipath.h | 6 +
> net/sunrpc/clnt.c | 39 +--
> net/sunrpc/sunrpc_syms.c | 2 +
> net/sunrpc/sysfs.c | 403 +++++++++++++++++++++++----
> net/sunrpc/sysfs.h | 21 +-
> net/sunrpc/xprt.c | 29 ++
> net/sunrpc/xprtmultipath.c | 37 +++
> 10 files changed, 487 insertions(+), 77 deletions(-)
>
> --
> 2.26.2
>

2021-03-04 16:40:20

by Dan Aloni

[permalink] [raw]
Subject: Re: [PATCH v1 0/8] sysfs files for multipath transport control

On Mon, Mar 01, 2021 at 10:56:22PM -0500, Olga Kornievskaia wrote:
> Hi Dan,
>
> On Mon, Feb 15, 2021 at 12:43 PM Dan Aloni <[email protected]> wrote:
> >
> > Hi Anna,
> >
> > This patchset builds ontop v2 of your 'sysfs files for changing IP' changeset.
> >
> > - The patchset adds two more sysfs objects, for one for transport and another
> > for multipath.
> > - Also, `net` renamed to `client`, and `client` now has symlink to its principal
> > transport. A client also has a symlink to its `multipath` object.
> > - The transport interface lets you change `dstaddr` of individual transports,
> > when `nconnect` is used (or if it wasn't used and these were added with the
> > new interface).
> > - The interface to add a transport is using a single string written to 'add',
> > for example:
> >
> > echo 'dstaddr 192.168.40.8 kind rdma' \
> > > /sys/kernel/sunrpc/client/0/multipath/add
> >
> > These changes are independent of the method used to obtain a sunrpc ID for a
> > mountpoint. For that I've sent a concept patch showing an fspick-based
> > implementation: https://marc.info/?l=linux-nfs&m=161332454821849&w=4
>
> I'm confused: does this allow adding arbitrary connections between a
> client and some server IP to an existing RPC client? Given the above
> description, that's how it reads to me, can you clarify please. I
> thought it was something specifically for v3 (because it has no
> concept of trunking). As for NFSv4 there is a notion of getting server
> locations via FS_LOCATION and doing trunking (ie multipathing)? I
> don't see how this code restricts the addition of transports to v3.

Indeed, there's no restriction to NFSv3.

There can be potential uses for this for NFSv4 too. FS_LOCATIONS serving
as recommendation to which hosts the client can connect, while smart
load-balancing logic in userspace can determine to which subset of these
servers each client in a cluster should actually connect (a full mesh
is not always desired).

At any case, if this restriction is desired, we can add a new sunrpc
client flag for that and pass it only in NFSv3 client init.

--
Dan Aloni

2021-03-04 19:11:01

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v1 0/8] sysfs files for multipath transport control



> On Mar 4, 2021, at 6:58 AM, Dan Aloni <[email protected]> wrote:
>
> On Mon, Mar 01, 2021 at 10:56:22PM -0500, Olga Kornievskaia wrote:
>> Hi Dan,
>>
>> On Mon, Feb 15, 2021 at 12:43 PM Dan Aloni <[email protected]> wrote:
>>>
>>> Hi Anna,
>>>
>>> This patchset builds ontop v2 of your 'sysfs files for changing IP' changeset.
>>>
>>> - The patchset adds two more sysfs objects, for one for transport and another
>>> for multipath.
>>> - Also, `net` renamed to `client`, and `client` now has symlink to its principal
>>> transport. A client also has a symlink to its `multipath` object.
>>> - The transport interface lets you change `dstaddr` of individual transports,
>>> when `nconnect` is used (or if it wasn't used and these were added with the
>>> new interface).
>>> - The interface to add a transport is using a single string written to 'add',
>>> for example:
>>>
>>> echo 'dstaddr 192.168.40.8 kind rdma' \
>>>> /sys/kernel/sunrpc/client/0/multipath/add
>>>
>>> These changes are independent of the method used to obtain a sunrpc ID for a
>>> mountpoint. For that I've sent a concept patch showing an fspick-based
>>> implementation: https://marc.info/?l=linux-nfs&m=161332454821849&w=4
>>
>> I'm confused: does this allow adding arbitrary connections between a
>> client and some server IP to an existing RPC client? Given the above
>> description, that's how it reads to me, can you clarify please. I
>> thought it was something specifically for v3 (because it has no
>> concept of trunking). As for NFSv4 there is a notion of getting server
>> locations via FS_LOCATION and doing trunking (ie multipathing)? I
>> don't see how this code restricts the addition of transports to v3.
>
> Indeed, there's no restriction to NFSv3.
>
> There can be potential uses for this for NFSv4 too. FS_LOCATIONS serving
> as recommendation to which hosts the client can connect, while smart
> load-balancing logic in userspace can determine to which subset of these
> servers each client in a cluster should actually connect (a full mesh
> is not always desired).
>
> At any case, if this restriction is desired, we can add a new sunrpc
> client flag for that and pass it only in NFSv3 client init.

IMO an NFSv3-only policy should not be built into this API.

This is a user-space / kernel API, not something that is an administrative
interface. The administrative interface, which is the place to apply an
NFSv3-only policy, would use this sysfs API. So would smart load-
balancing logic based on fs_locations.

If the API is under /sys/kernel/sunrpc/client, then including NFS-specific
controls is a layering violation. Consider that the kernel can send
multiple protocols over the same connection (NFSv3 and NFSACL, eg).


--
Chuck Lever