2021-06-03 23:00:21

by Olga Kornievskaia

[permalink] [raw]
Subject: [PATCH v1 1/3] sunrpc: take a xprt offline using sysfs

From: Olga Kornievskaia <[email protected]>

Using sysfs's xprt_state attribute, mark a particular transport offline.
It will not be picked during the round-robin selection. It's not allowed
to take the main (1st created transport associated with the rpc_client)
offline.

Signed-off-by: Olga Kornievskaia <[email protected]>
---
include/linux/sunrpc/xprt.h | 2 ++
net/sunrpc/clnt.c | 1 +
net/sunrpc/sysfs.c | 42 +++++++++++++++++++++++++++++++++----
net/sunrpc/xprtmultipath.c | 3 ++-
4 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 13a4eaf385cf..72a858f032c7 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -293,6 +293,7 @@ struct rpc_xprt {
struct rcu_head rcu;
const struct xprt_class *xprt_class;
struct rpc_sysfs_xprt *xprt_sysfs;
+ bool main; /* marked if it's the 1st transport */
};

#if defined(CONFIG_SUNRPC_BACKCHANNEL)
@@ -426,6 +427,7 @@ void xprt_release_write(struct rpc_xprt *, struct rpc_task *);
#define XPRT_BOUND (4)
#define XPRT_BINDING (5)
#define XPRT_CLOSING (6)
+#define XPRT_OFFLINE (7)
#define XPRT_CONGESTED (9)
#define XPRT_CWND_WAIT (10)
#define XPRT_WRITE_SPACE (11)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9bf820bad84c..408618765aa5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -412,6 +412,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
}

rpc_clnt_set_transport(clnt, xprt, timeout);
+ xprt->main = true;
xprt_iter_init(&clnt->cl_xpi, xps);
xprt_switch_put(xps);

diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index ec06c9257c07..02c918c5061b 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -118,7 +118,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
ssize_t ret;
int locked, connected, connecting, close_wait, bound, binding,
- closing, congested, cwnd_wait, write_space;
+ closing, congested, cwnd_wait, write_space, offline;

if (!xprt)
return 0;
@@ -136,8 +136,9 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
congested = test_bit(XPRT_CONGESTED, &xprt->state);
cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
write_space = test_bit(XPRT_WRITE_SPACE, &xprt->state);
+ offline = test_bit(XPRT_OFFLINE, &xprt->state);

- ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s\n",
+ ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s %s\n",
locked ? "LOCKED" : "",
connected ? "CONNECTED" : "",
connecting ? "CONNECTING" : "",
@@ -147,7 +148,8 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
closing ? "CLOSING" : "",
congested ? "CONGESTED" : "",
cwnd_wait ? "CWND_WAIT" : "",
- write_space ? "WRITE_SPACE" : "");
+ write_space ? "WRITE_SPACE" : "",
+ offline ? "OFFLINE" : "");
}

xprt_put(xprt);
@@ -223,6 +225,38 @@ static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
goto out;
}

+static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ int offline = 0;
+
+ if (!xprt)
+ return 0;
+
+ if (!strncmp(buf, "offline", 7))
+ offline = 1;
+ else
+ return -EINVAL;
+
+ if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
+ count = -EINTR;
+ goto out_put;
+ }
+ if (offline) {
+ if (xprt->main)
+ count = -EINVAL;
+ else
+ set_bit(XPRT_OFFLINE, &xprt->state);
+ }
+
+ xprt_release_write(xprt, NULL);
+out_put:
+ xprt_put(xprt);
+ return count;
+}
+
int rpc_sysfs_init(void)
{
rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL, kernel_kobj);
@@ -293,7 +327,7 @@ static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info,
0444, rpc_sysfs_xprt_info_show, NULL);

static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state,
- 0644, rpc_sysfs_xprt_state_show, NULL);
+ 0644, rpc_sysfs_xprt_state_show, rpc_sysfs_xprt_state_change);

static struct attribute *rpc_sysfs_xprt_attrs[] = {
&rpc_sysfs_xprt_dstaddr.attr,
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 07e76ae1028a..39551b794b80 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -230,7 +230,8 @@ void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi)
static
bool xprt_is_active(const struct rpc_xprt *xprt)
{
- return kref_read(&xprt->kref) != 0;
+ return (kref_read(&xprt->kref) != 0 &&
+ !test_bit(XPRT_OFFLINE, &xprt->state));
}

static
--
2.27.0


2021-06-13 16:20:09

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH v1 1/3] sunrpc: take a xprt offline using sysfs

On Thu, 2021-06-03 at 18:59 -0400, Olga Kornievskaia wrote:
> From: Olga Kornievskaia <[email protected]>
>
> Using sysfs's xprt_state attribute, mark a particular transport
> offline.
> It will not be picked during the round-robin selection. It's not
> allowed
> to take the main (1st created transport associated with the
> rpc_client)
> offline.
>
> Signed-off-by: Olga Kornievskaia <[email protected]>
> ---
>  include/linux/sunrpc/xprt.h |  2 ++
>  net/sunrpc/clnt.c           |  1 +
>  net/sunrpc/sysfs.c          | 42 +++++++++++++++++++++++++++++++++--
> --
>  net/sunrpc/xprtmultipath.c  |  3 ++-
>  4 files changed, 43 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/sunrpc/xprt.h
> b/include/linux/sunrpc/xprt.h
> index 13a4eaf385cf..72a858f032c7 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -293,6 +293,7 @@ struct rpc_xprt {
>         struct rcu_head         rcu;
>         const struct xprt_class *xprt_class;
>         struct rpc_sysfs_xprt   *xprt_sysfs;
> +       bool                    main; /* marked if it's the 1st
> transport */
>  };
>  
>  #if defined(CONFIG_SUNRPC_BACKCHANNEL)
> @@ -426,6 +427,7 @@
> void                        xprt_release_write(struct rpc_xprt *,
> struct rpc_task *);
>  #define XPRT_BOUND             (4)
>  #define XPRT_BINDING           (5)
>  #define XPRT_CLOSING           (6)
> +#define XPRT_OFFLINE           (7)
>  #define XPRT_CONGESTED         (9)
>  #define XPRT_CWND_WAIT         (10)
>  #define XPRT_WRITE_SPACE       (11)
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 9bf820bad84c..408618765aa5 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -412,6 +412,7 @@ static struct rpc_clnt * rpc_new_client(const
> struct rpc_create_args *args,
>         }
>  
>         rpc_clnt_set_transport(clnt, xprt, timeout);
> +       xprt->main = true;
>         xprt_iter_init(&clnt->cl_xpi, xps);
>         xprt_switch_put(xps);
>  
> diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
> index ec06c9257c07..02c918c5061b 100644
> --- a/net/sunrpc/sysfs.c
> +++ b/net/sunrpc/sysfs.c
> @@ -118,7 +118,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct
> kobject *kobj,
>         struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
>         ssize_t ret;
>         int locked, connected, connecting, close_wait, bound,
> binding,
> -           closing, congested, cwnd_wait, write_space;
> +           closing, congested, cwnd_wait, write_space, offline;
>  
>         if (!xprt)
>                 return 0;
> @@ -136,8 +136,9 @@ static ssize_t rpc_sysfs_xprt_state_show(struct
> kobject *kobj,
>                 congested = test_bit(XPRT_CONGESTED, &xprt->state);
>                 cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
>                 write_space = test_bit(XPRT_WRITE_SPACE, &xprt-
> >state);
> +               offline = test_bit(XPRT_OFFLINE, &xprt->state);
>  
> -               ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s
> %s\n",
> +               ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s
> %s %s\n",
>                               locked ? "LOCKED" : "",
>                               connected ? "CONNECTED" : "",
>                               connecting ? "CONNECTING" : "",
> @@ -147,7 +148,8 @@ static ssize_t rpc_sysfs_xprt_state_show(struct
> kobject *kobj,
>                               closing ? "CLOSING" : "",
>                               congested ? "CONGESTED" : "",
>                               cwnd_wait ? "CWND_WAIT" : "",
> -                             write_space ? "WRITE_SPACE" : "");
> +                             write_space ? "WRITE_SPACE" : "",
> +                             offline ? "OFFLINE" : "");
>         }
>  
>         xprt_put(xprt);
> @@ -223,6 +225,38 @@ static ssize_t
> rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
>         goto out;
>  }
>  
> +static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
> +                                          struct kobj_attribute
> *attr,
> +                                          const char *buf, size_t
> count)
> +{
> +       struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
> +       int offline = 0;
> +
> +       if (!xprt)
> +               return 0;
> +
> +       if (!strncmp(buf, "offline", 7))
> +               offline = 1;
> +       else
> +               return -EINVAL;
> +
> +       if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED,
> TASK_KILLABLE)) {
> +               count = -EINTR;
> +               goto out_put;
> +       }
> +       if (offline) {
> +               if (xprt->main)
> +                       count = -EINVAL;
> +               else
> +                       set_bit(XPRT_OFFLINE, &xprt->state);
> +       }

Is there any way to put the transport back online? What say the problem
with the downed IP address gets fixed?

> +
> +       xprt_release_write(xprt, NULL);
> +out_put:
> +       xprt_put(xprt);
> +       return count;
> +}
> +
>  int rpc_sysfs_init(void)
>  {
>         rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL,
> kernel_kobj);
> @@ -293,7 +327,7 @@ static struct kobj_attribute rpc_sysfs_xprt_info
> = __ATTR(xprt_info,
>         0444, rpc_sysfs_xprt_info_show, NULL);
>  
>  static struct kobj_attribute rpc_sysfs_xprt_change_state =
> __ATTR(xprt_state,
> -       0644, rpc_sysfs_xprt_state_show, NULL);
> +       0644, rpc_sysfs_xprt_state_show,
> rpc_sysfs_xprt_state_change);
>  
>  static struct attribute *rpc_sysfs_xprt_attrs[] = {
>         &rpc_sysfs_xprt_dstaddr.attr,
> diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
> index 07e76ae1028a..39551b794b80 100644
> --- a/net/sunrpc/xprtmultipath.c
> +++ b/net/sunrpc/xprtmultipath.c
> @@ -230,7 +230,8 @@ void xprt_iter_default_rewind(struct
> rpc_xprt_iter *xpi)
>  static
>  bool xprt_is_active(const struct rpc_xprt *xprt)
>  {
> -       return kref_read(&xprt->kref) != 0;
> +       return (kref_read(&xprt->kref) != 0 &&
> +               !test_bit(XPRT_OFFLINE, &xprt->state));
>  }
>  
>  static

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2021-06-15 15:07:06

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 1/3] sunrpc: take a xprt offline using sysfs

On Sun, Jun 13, 2021 at 12:16 PM Trond Myklebust
<[email protected]> wrote:
>
> On Thu, 2021-06-03 at 18:59 -0400, Olga Kornievskaia wrote:
> > From: Olga Kornievskaia <[email protected]>
> >
> > Using sysfs's xprt_state attribute, mark a particular transport
> > offline.
> > It will not be picked during the round-robin selection. It's not
> > allowed
> > to take the main (1st created transport associated with the
> > rpc_client)
> > offline.
> >
> > Signed-off-by: Olga Kornievskaia <[email protected]>
> > ---
> > include/linux/sunrpc/xprt.h | 2 ++
> > net/sunrpc/clnt.c | 1 +
> > net/sunrpc/sysfs.c | 42 +++++++++++++++++++++++++++++++++--
> > --
> > net/sunrpc/xprtmultipath.c | 3 ++-
> > 4 files changed, 43 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/xprt.h
> > b/include/linux/sunrpc/xprt.h
> > index 13a4eaf385cf..72a858f032c7 100644
> > --- a/include/linux/sunrpc/xprt.h
> > +++ b/include/linux/sunrpc/xprt.h
> > @@ -293,6 +293,7 @@ struct rpc_xprt {
> > struct rcu_head rcu;
> > const struct xprt_class *xprt_class;
> > struct rpc_sysfs_xprt *xprt_sysfs;
> > + bool main; /* marked if it's the 1st
> > transport */
> > };
> >
> > #if defined(CONFIG_SUNRPC_BACKCHANNEL)
> > @@ -426,6 +427,7 @@
> > void xprt_release_write(struct rpc_xprt *,
> > struct rpc_task *);
> > #define XPRT_BOUND (4)
> > #define XPRT_BINDING (5)
> > #define XPRT_CLOSING (6)
> > +#define XPRT_OFFLINE (7)
> > #define XPRT_CONGESTED (9)
> > #define XPRT_CWND_WAIT (10)
> > #define XPRT_WRITE_SPACE (11)
> > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > index 9bf820bad84c..408618765aa5 100644
> > --- a/net/sunrpc/clnt.c
> > +++ b/net/sunrpc/clnt.c
> > @@ -412,6 +412,7 @@ static struct rpc_clnt * rpc_new_client(const
> > struct rpc_create_args *args,
> > }
> >
> > rpc_clnt_set_transport(clnt, xprt, timeout);
> > + xprt->main = true;
> > xprt_iter_init(&clnt->cl_xpi, xps);
> > xprt_switch_put(xps);
> >
> > diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
> > index ec06c9257c07..02c918c5061b 100644
> > --- a/net/sunrpc/sysfs.c
> > +++ b/net/sunrpc/sysfs.c
> > @@ -118,7 +118,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct
> > kobject *kobj,
> > struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
> > ssize_t ret;
> > int locked, connected, connecting, close_wait, bound,
> > binding,
> > - closing, congested, cwnd_wait, write_space;
> > + closing, congested, cwnd_wait, write_space, offline;
> >
> > if (!xprt)
> > return 0;
> > @@ -136,8 +136,9 @@ static ssize_t rpc_sysfs_xprt_state_show(struct
> > kobject *kobj,
> > congested = test_bit(XPRT_CONGESTED, &xprt->state);
> > cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
> > write_space = test_bit(XPRT_WRITE_SPACE, &xprt-
> > >state);
> > + offline = test_bit(XPRT_OFFLINE, &xprt->state);
> >
> > - ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s
> > %s\n",
> > + ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s
> > %s %s\n",
> > locked ? "LOCKED" : "",
> > connected ? "CONNECTED" : "",
> > connecting ? "CONNECTING" : "",
> > @@ -147,7 +148,8 @@ static ssize_t rpc_sysfs_xprt_state_show(struct
> > kobject *kobj,
> > closing ? "CLOSING" : "",
> > congested ? "CONGESTED" : "",
> > cwnd_wait ? "CWND_WAIT" : "",
> > - write_space ? "WRITE_SPACE" : "");
> > + write_space ? "WRITE_SPACE" : "",
> > + offline ? "OFFLINE" : "");
> > }
> >
> > xprt_put(xprt);
> > @@ -223,6 +225,38 @@ static ssize_t
> > rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
> > goto out;
> > }
> >
> > +static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
> > + struct kobj_attribute
> > *attr,
> > + const char *buf, size_t
> > count)
> > +{
> > + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
> > + int offline = 0;
> > +
> > + if (!xprt)
> > + return 0;
> > +
> > + if (!strncmp(buf, "offline", 7))
> > + offline = 1;
> > + else
> > + return -EINVAL;
> > +
> > + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED,
> > TASK_KILLABLE)) {
> > + count = -EINTR;
> > + goto out_put;
> > + }
> > + if (offline) {
> > + if (xprt->main)
> > + count = -EINVAL;
> > + else
> > + set_bit(XPRT_OFFLINE, &xprt->state);
> > + }
>
> Is there any way to put the transport back online? What say the problem
> with the downed IP address gets fixed?

I will add this in v2. Originally the thought was that offlining a
transport was just a middle step before removing it and not something
in its own right. I would like to know if it's appropriate to also
then decrement the xps_nactive counter when the xprt is offline?

>
> > +
> > + xprt_release_write(xprt, NULL);
> > +out_put:
> > + xprt_put(xprt);
> > + return count;
> > +}
> > +
> > int rpc_sysfs_init(void)
> > {
> > rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL,
> > kernel_kobj);
> > @@ -293,7 +327,7 @@ static struct kobj_attribute rpc_sysfs_xprt_info
> > = __ATTR(xprt_info,
> > 0444, rpc_sysfs_xprt_info_show, NULL);
> >
> > static struct kobj_attribute rpc_sysfs_xprt_change_state =
> > __ATTR(xprt_state,
> > - 0644, rpc_sysfs_xprt_state_show, NULL);
> > + 0644, rpc_sysfs_xprt_state_show,
> > rpc_sysfs_xprt_state_change);
> >
> > static struct attribute *rpc_sysfs_xprt_attrs[] = {
> > &rpc_sysfs_xprt_dstaddr.attr,
> > diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
> > index 07e76ae1028a..39551b794b80 100644
> > --- a/net/sunrpc/xprtmultipath.c
> > +++ b/net/sunrpc/xprtmultipath.c
> > @@ -230,7 +230,8 @@ void xprt_iter_default_rewind(struct
> > rpc_xprt_iter *xpi)
> > static
> > bool xprt_is_active(const struct rpc_xprt *xprt)
> > {
> > - return kref_read(&xprt->kref) != 0;
> > + return (kref_read(&xprt->kref) != 0 &&
> > + !test_bit(XPRT_OFFLINE, &xprt->state));
> > }
> >
> > static
>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> [email protected]
>
>

2021-06-15 15:56:47

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH v1 1/3] sunrpc: take a xprt offline using sysfs

On Tue, 2021-06-15 at 11:06 -0400, Olga Kornievskaia wrote:
> On Sun, Jun 13, 2021 at 12:16 PM Trond Myklebust
> <[email protected]> wrote:
> >
> > On Thu, 2021-06-03 at 18:59 -0400, Olga Kornievskaia wrote:
> > > From: Olga Kornievskaia <[email protected]>
> > >
> > > Using sysfs's xprt_state attribute, mark a particular transport
> > > offline.
> > > It will not be picked during the round-robin selection. It's not
> > > allowed
> > > to take the main (1st created transport associated with the
> > > rpc_client)
> > > offline.
> > >
> > > Signed-off-by: Olga Kornievskaia <[email protected]>
> > > ---
> > >  include/linux/sunrpc/xprt.h |  2 ++
> > >  net/sunrpc/clnt.c           |  1 +
> > >  net/sunrpc/sysfs.c          | 42
> > > +++++++++++++++++++++++++++++++++--
> > > --
> > >  net/sunrpc/xprtmultipath.c  |  3 ++-
> > >  4 files changed, 43 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/include/linux/sunrpc/xprt.h
> > > b/include/linux/sunrpc/xprt.h
> > > index 13a4eaf385cf..72a858f032c7 100644
> > > --- a/include/linux/sunrpc/xprt.h
> > > +++ b/include/linux/sunrpc/xprt.h
> > > @@ -293,6 +293,7 @@ struct rpc_xprt {
> > >         struct rcu_head         rcu;
> > >         const struct xprt_class *xprt_class;
> > >         struct rpc_sysfs_xprt   *xprt_sysfs;
> > > +       bool                    main; /* marked if it's the 1st
> > > transport */
> > >  };
> > >
> > >  #if defined(CONFIG_SUNRPC_BACKCHANNEL)
> > > @@ -426,6 +427,7 @@
> > > void                        xprt_release_write(struct rpc_xprt *,
> > > struct rpc_task *);
> > >  #define XPRT_BOUND             (4)
> > >  #define XPRT_BINDING           (5)
> > >  #define XPRT_CLOSING           (6)
> > > +#define XPRT_OFFLINE           (7)
> > >  #define XPRT_CONGESTED         (9)
> > >  #define XPRT_CWND_WAIT         (10)
> > >  #define XPRT_WRITE_SPACE       (11)
> > > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > > index 9bf820bad84c..408618765aa5 100644
> > > --- a/net/sunrpc/clnt.c
> > > +++ b/net/sunrpc/clnt.c
> > > @@ -412,6 +412,7 @@ static struct rpc_clnt * rpc_new_client(const
> > > struct rpc_create_args *args,
> > >         }
> > >
> > >         rpc_clnt_set_transport(clnt, xprt, timeout);
> > > +       xprt->main = true;
> > >         xprt_iter_init(&clnt->cl_xpi, xps);
> > >         xprt_switch_put(xps);
> > >
> > > diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
> > > index ec06c9257c07..02c918c5061b 100644
> > > --- a/net/sunrpc/sysfs.c
> > > +++ b/net/sunrpc/sysfs.c
> > > @@ -118,7 +118,7 @@ static ssize_t
> > > rpc_sysfs_xprt_state_show(struct
> > > kobject *kobj,
> > >         struct rpc_xprt *xprt =
> > > rpc_sysfs_xprt_kobj_get_xprt(kobj);
> > >         ssize_t ret;
> > >         int locked, connected, connecting, close_wait, bound,
> > > binding,
> > > -           closing, congested, cwnd_wait, write_space;
> > > +           closing, congested, cwnd_wait, write_space, offline;
> > >
> > >         if (!xprt)
> > >                 return 0;
> > > @@ -136,8 +136,9 @@ static ssize_t
> > > rpc_sysfs_xprt_state_show(struct
> > > kobject *kobj,
> > >                 congested = test_bit(XPRT_CONGESTED, &xprt-
> > > >state);
> > >                 cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt-
> > > >state);
> > >                 write_space = test_bit(XPRT_WRITE_SPACE, &xprt-
> > > > state);
> > > +               offline = test_bit(XPRT_OFFLINE, &xprt->state);
> > >
> > > -               ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s
> > > %s
> > > %s\n",
> > > +               ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s
> > > %s
> > > %s %s\n",
> > >                               locked ? "LOCKED" : "",
> > >                               connected ? "CONNECTED" : "",
> > >                               connecting ? "CONNECTING" : "",
> > > @@ -147,7 +148,8 @@ static ssize_t
> > > rpc_sysfs_xprt_state_show(struct
> > > kobject *kobj,
> > >                               closing ? "CLOSING" : "",
> > >                               congested ? "CONGESTED" : "",
> > >                               cwnd_wait ? "CWND_WAIT" : "",
> > > -                             write_space ? "WRITE_SPACE" : "");
> > > +                             write_space ? "WRITE_SPACE" : "",
> > > +                             offline ? "OFFLINE" : "");
> > >         }
> > >
> > >         xprt_put(xprt);
> > > @@ -223,6 +225,38 @@ static ssize_t
> > > rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
> > >         goto out;
> > >  }
> > >
> > > +static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
> > > +                                          struct kobj_attribute
> > > *attr,
> > > +                                          const char *buf,
> > > size_t
> > > count)
> > > +{
> > > +       struct rpc_xprt *xprt =
> > > rpc_sysfs_xprt_kobj_get_xprt(kobj);
> > > +       int offline = 0;
> > > +
> > > +       if (!xprt)
> > > +               return 0;
> > > +
> > > +       if (!strncmp(buf, "offline", 7))
> > > +               offline = 1;
> > > +       else
> > > +               return -EINVAL;
> > > +
> > > +       if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED,
> > > TASK_KILLABLE)) {
> > > +               count = -EINTR;
> > > +               goto out_put;
> > > +       }
> > > +       if (offline) {
> > > +               if (xprt->main)
> > > +                       count = -EINVAL;
> > > +               else
> > > +                       set_bit(XPRT_OFFLINE, &xprt->state);
> > > +       }
> >
> > Is there any way to put the transport back online? What say the
> > problem
> > with the downed IP address gets fixed?
>
> I will add this in v2. Originally the thought was that offlining a
> transport was just a middle step before removing it and not something
> in its own right. I would like to know if it's appropriate to also
> then decrement the xps_nactive counter when the xprt is offline?
>

Yes, that needs to be done. Otherwise the average queue length
calculations in functions like xprt_switch_find_next_entry_roundrobin
will be wrong.

> >
> > > +
> > > +       xprt_release_write(xprt, NULL);
> > > +out_put:
> > > +       xprt_put(xprt);
> > > +       return count;
> > > +}
> > > +
> > >  int rpc_sysfs_init(void)
> > >  {
> > >         rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL,
> > > kernel_kobj);
> > > @@ -293,7 +327,7 @@ static struct kobj_attribute
> > > rpc_sysfs_xprt_info
> > > = __ATTR(xprt_info,
> > >         0444, rpc_sysfs_xprt_info_show, NULL);
> > >
> > >  static struct kobj_attribute rpc_sysfs_xprt_change_state =
> > > __ATTR(xprt_state,
> > > -       0644, rpc_sysfs_xprt_state_show, NULL);
> > > +       0644, rpc_sysfs_xprt_state_show,
> > > rpc_sysfs_xprt_state_change);
> > >
> > >  static struct attribute *rpc_sysfs_xprt_attrs[] = {
> > >         &rpc_sysfs_xprt_dstaddr.attr,
> > > diff --git a/net/sunrpc/xprtmultipath.c
> > > b/net/sunrpc/xprtmultipath.c
> > > index 07e76ae1028a..39551b794b80 100644
> > > --- a/net/sunrpc/xprtmultipath.c
> > > +++ b/net/sunrpc/xprtmultipath.c
> > > @@ -230,7 +230,8 @@ void xprt_iter_default_rewind(struct
> > > rpc_xprt_iter *xpi)
> > >  static
> > >  bool xprt_is_active(const struct rpc_xprt *xprt)
> > >  {
> > > -       return kref_read(&xprt->kref) != 0;
> > > +       return (kref_read(&xprt->kref) != 0 &&
> > > +               !test_bit(XPRT_OFFLINE, &xprt->state));
> > >  }
> > >
> > >  static
> >
> > --
> > Trond Myklebust
> > Linux NFS client maintainer, Hammerspace
> > [email protected]
> >
> >

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]