MIME-Version: 1.0
In-Reply-To: <20160212210635.5278.72709.stgit@manet.1015granger.net>
References: <20160212205107.5278.55938.stgit@manet.1015granger.net> <20160212210635.5278.72709.stgit@manet.1015granger.net>
From: Devesh Sharma <devesh.sharma@broadcom.com>
Date: Mon, 15 Feb 2016 19:59:33 +0530
Message-ID: <CANjDDBgag4c2K0G+c-Mbxri+Bh2CZ6qkb49S-3myNva2J4bu_Q@mail.gmail.com>
Subject: Re: [PATCH v1 5/8] xprtrdma: Serialize credit accounting again
To: Chuck Lever <chuck.lever@oracle.com>
Cc: linux-rdma@vger.kernel.org,
        Linux NFS Mailing List <linux-nfs@vger.kernel.org>
Content-Type: text/plain; charset=UTF-8
Sender: linux-nfs-owner@vger.kernel.org

On Sat, Feb 13, 2016 at 2:36 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
> Commit fe97b47cd623 ("xprtrdma: Use workqueue to process RPC/RDMA
> replies") replaced the reply tasklet with a workqueue that allows
> RPC replies to be processed in parallel. Thus the credit values in
> RPC-over-RDMA replies can applied in a different order than in
> which the server sent them.
>
> To fix this, revert commit eba8ff660b2d ("xprtrdma: Move credit
> update to RPC reply handler"). Done by hand to accommodate code
> changes that have occurred since then.
>
> Fixes: fe97b47cd623 ("xprtrdma: Use workqueue to process . . .")
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/rpc_rdma.c  |    9 +--------
>  net/sunrpc/xprtrdma/verbs.c     |   27 ++++++++++++++++++++++++++-
>  net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
>  3 files changed, 28 insertions(+), 9 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index c341225..0c45288 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -797,7 +797,6 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
>         __be32 *iptr;
>         int rdmalen, status, rmerr;
>         unsigned long cwnd;
> -       u32 credits;
>
>         dprintk("RPC:       %s: incoming rep %p\n", __func__, rep);

You may also want to remove the extra header len checks from here.
Header len validity is already checked
in rpcrdma_update_granted_credits() function call before scheduling wq.

>
> @@ -930,15 +929,9 @@ out:
>         if (req->rl_nchunks)
>                 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
>
> -       credits = be32_to_cpu(headerp->rm_credit);
> -       if (credits == 0)
> -               credits = 1;    /* don't deadlock */
> -       else if (credits > r_xprt->rx_buf.rb_max_requests)
> -               credits = r_xprt->rx_buf.rb_max_requests;
> -
>         spin_lock_bh(&xprt->transport_lock);
>         cwnd = xprt->cwnd;
> -       xprt->cwnd = credits << RPC_CWNDSHIFT;
> +       xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
>         if (xprt->cwnd > cwnd)
>                 xprt_release_rqst_cong(rqst->rq_task);
>
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 878f1bf..fc1ef5f 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -190,6 +190,28 @@ rpcrdma_receive_worker(struct work_struct *work)
>         rpcrdma_reply_handler(rep);
>  }
>
> +/* Perform basic sanity checking to avoid using garbage
> + * to update the credit grant value.
> + */
> +static void
> +rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
> +{
> +       struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf);
> +       struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
> +       u32 credits;
> +
> +       if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
> +               return;
> +
> +       credits = be32_to_cpu(rmsgp->rm_credit);
> +       if (credits == 0)
> +               credits = 1;    /* don't deadlock */
> +       else if (credits > buffer->rb_max_requests)
> +               credits = buffer->rb_max_requests;
> +
> +       atomic_set(&buffer->rb_credits, credits);
> +}
> +
>  static void
>  rpcrdma_recvcq_process_wc(struct ib_wc *wc)
>  {
> @@ -211,7 +233,8 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc)
>         ib_dma_sync_single_for_cpu(rep->rr_device,
>                                    rdmab_addr(rep->rr_rdmabuf),
>                                    rep->rr_len, DMA_FROM_DEVICE);
> -       prefetch(rdmab_to_msg(rep->rr_rdmabuf));

do you really want to remove prefetch()?

> +
> +       rpcrdma_update_granted_credits(rep);
>
>  out_schedule:
>         queue_work(rpcrdma_receive_wq, &rep->rr_work);
> @@ -330,6 +353,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
>  connected:
>                 dprintk("RPC:       %s: %sconnected\n",
>                                         __func__, connstate > 0 ? "" : "dis");
> +               atomic_set(&xprt->rx_buf.rb_credits, 1);
>                 ep->rep_connected = connstate;
>                 rpcrdma_conn_func(ep);
>                 wake_up_all(&ep->rep_connect_wait);
> @@ -943,6 +967,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
>         buf->rb_max_requests = r_xprt->rx_data.max_requests;
>         buf->rb_bc_srv_max_requests = 0;
>         spin_lock_init(&buf->rb_lock);
> +       atomic_set(&buf->rb_credits, 1);

Will this give a slow start to server initially? should it be rb_max_requests?
I am not sure, just raising a flag to bring your notice.

>
>         rc = ia->ri_ops->ro_init(r_xprt);
>         if (rc)
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index bf98c67..efd6fa7 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -312,6 +312,7 @@ struct rpcrdma_buffer {
>         struct list_head        rb_send_bufs;
>         struct list_head        rb_recv_bufs;
>         u32                     rb_max_requests;
> +       atomic_t                rb_credits;     /* most recent credit grant */
>
>         u32                     rb_bc_srv_max_requests;
>         spinlock_t              rb_reqslock;    /* protect rb_allreqs */
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html