2021-04-19 20:44:47

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v3 07/26] xprtrdma: Improve locking around rpcrdma_rep destruction

Currently rpcrdma_reps_destroy() assumes that, at transport
tear-down, the content of the rb_free_reps list is the same as the
content of the rb_all_reps list. Although that is usually true,
using the rb_all_reps list should be more reliable because of
the way it's managed. And, rpcrdma_reps_unmap() uses rb_all_reps;
these two functions should both traverse the "all" list.

Ensure that all rpcrdma_reps are always destroyed whether they are
on the rep free list or not.

Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/verbs.c | 31 ++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 1b599a623eea..482fdc9e25c2 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1007,16 +1007,23 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
return NULL;
}

-/* No locking needed here. This function is invoked only by the
- * Receive completion handler, or during transport shutdown.
- */
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+static void rpcrdma_rep_destroy_locked(struct rpcrdma_rep *rep)
{
- list_del(&rep->rr_all);
rpcrdma_regbuf_free(rep->rr_rdmabuf);
kfree(rep);
}

+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
+
+ spin_lock(&buf->rb_lock);
+ list_del(&rep->rr_all);
+ spin_unlock(&buf->rb_lock);
+
+ rpcrdma_rep_destroy_locked(rep);
+}
+
static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
{
struct llist_node *node;
@@ -1049,8 +1056,18 @@ static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_rep *rep;

- while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
- rpcrdma_rep_destroy(rep);
+ spin_lock(&buf->rb_lock);
+ while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
+ struct rpcrdma_rep,
+ rr_all)) != NULL) {
+ list_del(&rep->rr_all);
+ spin_unlock(&buf->rb_lock);
+
+ rpcrdma_rep_destroy_locked(rep);
+
+ spin_lock(&buf->rb_lock);
+ }
+ spin_unlock(&buf->rb_lock);
}

/**



2021-04-23 21:07:18

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH v3 07/26] xprtrdma: Improve locking around rpcrdma_rep destruction

On Mon, 2021-04-19 at 14:02 -0400, Chuck Lever wrote:
> Currently rpcrdma_reps_destroy() assumes that, at transport
> tear-down, the content of the rb_free_reps list is the same as the
> content of the rb_all_reps list. Although that is usually true,
> using the rb_all_reps list should be more reliable because of
> the way it's managed. And, rpcrdma_reps_unmap() uses rb_all_reps;
> these two functions should both traverse the "all" list.
>
> Ensure that all rpcrdma_reps are always destroyed whether they are
> on the rep free list or not.
>
> Signed-off-by: Chuck Lever <[email protected]>
> ---
>  net/sunrpc/xprtrdma/verbs.c |   31 ++++++++++++++++++++++++-------
>  1 file changed, 24 insertions(+), 7 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/verbs.c
> b/net/sunrpc/xprtrdma/verbs.c
> index 1b599a623eea..482fdc9e25c2 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1007,16 +1007,23 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct
> rpcrdma_xprt *r_xprt,
>         return NULL;
>  }
>  
> -/* No locking needed here. This function is invoked only by the
> - * Receive completion handler, or during transport shutdown.
> - */
> -static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
> +static void rpcrdma_rep_destroy_locked(struct rpcrdma_rep *rep)

The name here is extremely confusing. As far as I can tell, this isn't
called with any lock?

>  {
> -       list_del(&rep->rr_all);
>         rpcrdma_regbuf_free(rep->rr_rdmabuf);
>         kfree(rep);
>  }
>  
> +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
> +{
> +       struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
> +
> +       spin_lock(&buf->rb_lock);
> +       list_del(&rep->rr_all);
> +       spin_unlock(&buf->rb_lock);
> +
> +       rpcrdma_rep_destroy_locked(rep);
> +}
> +
>  static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct
> rpcrdma_buffer *buf)
>  {
>         struct llist_node *node;
> @@ -1049,8 +1056,18 @@ static void rpcrdma_reps_destroy(struct
> rpcrdma_buffer *buf)
>  {
>         struct rpcrdma_rep *rep;
>  
> -       while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
> -               rpcrdma_rep_destroy(rep);
> +       spin_lock(&buf->rb_lock);
> +       while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
> +                                              struct rpcrdma_rep,
> +                                              rr_all)) != NULL) {
> +               list_del(&rep->rr_all);
> +               spin_unlock(&buf->rb_lock);
> +
> +               rpcrdma_rep_destroy_locked(rep);
> +
> +               spin_lock(&buf->rb_lock);
> +       }
> +       spin_unlock(&buf->rb_lock);
>  }
>  
>  /**
>
>

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2021-04-24 17:44:00

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v3 07/26] xprtrdma: Improve locking around rpcrdma_rep destruction



> On Apr 23, 2021, at 5:06 PM, Trond Myklebust <[email protected]> wrote:
>
> On Mon, 2021-04-19 at 14:02 -0400, Chuck Lever wrote:
>> Currently rpcrdma_reps_destroy() assumes that, at transport
>> tear-down, the content of the rb_free_reps list is the same as the
>> content of the rb_all_reps list. Although that is usually true,
>> using the rb_all_reps list should be more reliable because of
>> the way it's managed. And, rpcrdma_reps_unmap() uses rb_all_reps;
>> these two functions should both traverse the "all" list.
>>
>> Ensure that all rpcrdma_reps are always destroyed whether they are
>> on the rep free list or not.
>>
>> Signed-off-by: Chuck Lever <[email protected]>
>> ---
>> net/sunrpc/xprtrdma/verbs.c | 31 ++++++++++++++++++++++++-------
>> 1 file changed, 24 insertions(+), 7 deletions(-)
>>
>> diff --git a/net/sunrpc/xprtrdma/verbs.c
>> b/net/sunrpc/xprtrdma/verbs.c
>> index 1b599a623eea..482fdc9e25c2 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1007,16 +1007,23 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct
>> rpcrdma_xprt *r_xprt,
>> return NULL;
>> }
>>
>> -/* No locking needed here. This function is invoked only by the
>> - * Receive completion handler, or during transport shutdown.
>> - */
>> -static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
>> +static void rpcrdma_rep_destroy_locked(struct rpcrdma_rep *rep)
>
> The name here is extremely confusing. As far as I can tell, this isn't
> called with any lock?

Fair enough.

I renamed it rpcrdma_rep_free() and it doesn't seem to have
any consequences for downstream commits in this series.

You could do a global edit, I can resend you this patch with
the change, or I can post a v4 of this series. Let me know
your preference.


>> {
>> - list_del(&rep->rr_all);
>> rpcrdma_regbuf_free(rep->rr_rdmabuf);
>> kfree(rep);
>> }
>>
>> +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
>> +{
>> + struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
>> +
>> + spin_lock(&buf->rb_lock);
>> + list_del(&rep->rr_all);
>> + spin_unlock(&buf->rb_lock);
>> +
>> + rpcrdma_rep_destroy_locked(rep);
>> +}
>> +
>> static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct
>> rpcrdma_buffer *buf)
>> {
>> struct llist_node *node;
>> @@ -1049,8 +1056,18 @@ static void rpcrdma_reps_destroy(struct
>> rpcrdma_buffer *buf)
>> {
>> struct rpcrdma_rep *rep;
>>
>> - while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
>> - rpcrdma_rep_destroy(rep);
>> + spin_lock(&buf->rb_lock);
>> + while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
>> + struct rpcrdma_rep,
>> + rr_all)) != NULL) {
>> + list_del(&rep->rr_all);
>> + spin_unlock(&buf->rb_lock);
>> +
>> + rpcrdma_rep_destroy_locked(rep);
>> +
>> + spin_lock(&buf->rb_lock);
>> + }
>> + spin_unlock(&buf->rb_lock);
>> }
>>
>> /**
>>
>>
>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> [email protected]

--
Chuck Lever



2021-04-24 18:02:20

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH v3 07/26] xprtrdma: Improve locking around rpcrdma_rep destruction

On Sat, 2021-04-24 at 17:39 +0000, Chuck Lever III wrote:
>
>
> > On Apr 23, 2021, at 5:06 PM, Trond Myklebust <
> > [email protected]> wrote:
> >
> > On Mon, 2021-04-19 at 14:02 -0400, Chuck Lever wrote:
> > > Currently rpcrdma_reps_destroy() assumes that, at transport
> > > tear-down, the content of the rb_free_reps list is the same as
> > > the
> > > content of the rb_all_reps list. Although that is usually true,
> > > using the rb_all_reps list should be more reliable because of
> > > the way it's managed. And, rpcrdma_reps_unmap() uses rb_all_reps;
> > > these two functions should both traverse the "all" list.
> > >
> > > Ensure that all rpcrdma_reps are always destroyed whether they
> > > are
> > > on the rep free list or not.
> > >
> > > Signed-off-by: Chuck Lever <[email protected]>
> > > ---
> > >  net/sunrpc/xprtrdma/verbs.c |   31 ++++++++++++++++++++++++-----
> > > --
> > >  1 file changed, 24 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/net/sunrpc/xprtrdma/verbs.c
> > > b/net/sunrpc/xprtrdma/verbs.c
> > > index 1b599a623eea..482fdc9e25c2 100644
> > > --- a/net/sunrpc/xprtrdma/verbs.c
> > > +++ b/net/sunrpc/xprtrdma/verbs.c
> > > @@ -1007,16 +1007,23 @@ struct rpcrdma_rep
> > > *rpcrdma_rep_create(struct
> > > rpcrdma_xprt *r_xprt,
> > >         return NULL;
> > >  }
> > >  
> > > -/* No locking needed here. This function is invoked only by the
> > > - * Receive completion handler, or during transport shutdown.
> > > - */
> > > -static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
> > > +static void rpcrdma_rep_destroy_locked(struct rpcrdma_rep *rep)
> >
> > The name here is extremely confusing. As far as I can tell, this
> > isn't
> > called with any lock?
>
> Fair enough.
>
> I renamed it rpcrdma_rep_free() and it doesn't seem to have
> any consequences for downstream commits in this series.

Sounds good.

>
> You could do a global edit, I can resend you this patch with
> the change, or I can post a v4 of this series. Let me know
> your preference.
>

Can you please just resend this patch with the update, unless there are
repercussions for the other patches? (in which case a v4 would be
welcome)

>
> > >  {
> > > -       list_del(&rep->rr_all);
> > >         rpcrdma_regbuf_free(rep->rr_rdmabuf);
> > >         kfree(rep);
> > >  }
> > >  
> > > +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
> > > +{
> > > +       struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
> > > +
> > > +       spin_lock(&buf->rb_lock);
> > > +       list_del(&rep->rr_all);
> > > +       spin_unlock(&buf->rb_lock);
> > > +
> > > +       rpcrdma_rep_destroy_locked(rep);
> > > +}
> > > +
> > >  static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct
> > > rpcrdma_buffer *buf)
> > >  {
> > >         struct llist_node *node;
> > > @@ -1049,8 +1056,18 @@ static void rpcrdma_reps_destroy(struct
> > > rpcrdma_buffer *buf)
> > >  {
> > >         struct rpcrdma_rep *rep;
> > >  
> > > -       while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
> > > -               rpcrdma_rep_destroy(rep);
> > > +       spin_lock(&buf->rb_lock);
> > > +       while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
> > > +                                              struct
> > > rpcrdma_rep,
> > > +                                              rr_all)) != NULL)
> > > {
> > > +               list_del(&rep->rr_all);
> > > +               spin_unlock(&buf->rb_lock);
> > > +
> > > +               rpcrdma_rep_destroy_locked(rep);
> > > +
> > > +               spin_lock(&buf->rb_lock);
> > > +       }
> > > +       spin_unlock(&buf->rb_lock);
> > >  }
> > >  
> > >  /**
> > >
> > >
> >
> > --
> > Trond Myklebust
> > Linux NFS client maintainer, Hammerspace
> > [email protected]
>
> --
> Chuck Lever
>
>
>

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]