2017-10-10 17:31:48

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 1/2] SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status

There is no guarantee that either the request or the svc_xprt exist
by the time we get round to printing the trace message.

Signed-off-by: Trond Myklebust <[email protected]>
---
include/trace/events/sunrpc.h | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 8a707f8a41c3..8a13e3903839 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -455,20 +455,22 @@ TRACE_EVENT(svc_recv,
TP_ARGS(rqst, status),

TP_STRUCT__entry(
- __field(struct sockaddr *, addr)
__field(__be32, xid)
__field(int, status)
__field(unsigned long, flags)
+ __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
),

TP_fast_assign(
- __entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = status > 0 ? rqst->rq_xid : 0;
__entry->status = status;
__entry->flags = rqst->rq_flags;
+ memcpy(__get_dynamic_array(addr),
+ &rqst->rq_addr, rqst->rq_addrlen);
),

- TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s", __entry->addr,
+ TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s",
+ (struct sockaddr *)__get_dynamic_array(addr),
be32_to_cpu(__entry->xid), __entry->status,
show_rqstp_flags(__entry->flags))
);
@@ -513,22 +515,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
TP_ARGS(rqst, status),

TP_STRUCT__entry(
- __field(struct sockaddr *, addr)
__field(__be32, xid)
- __field(int, dropme)
__field(int, status)
__field(unsigned long, flags)
+ __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
),

TP_fast_assign(
- __entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = rqst->rq_xid;
__entry->status = status;
__entry->flags = rqst->rq_flags;
+ memcpy(__get_dynamic_array(addr),
+ &rqst->rq_addr, rqst->rq_addrlen);
),

TP_printk("addr=%pIScp rq_xid=0x%x status=%d flags=%s",
- __entry->addr, be32_to_cpu(__entry->xid),
+ (struct sockaddr *)__get_dynamic_array(addr),
+ be32_to_cpu(__entry->xid),
__entry->status, show_rqstp_flags(__entry->flags))
);

--
2.13.6



2017-10-10 17:31:49

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 2/2] SUNRPC: Improve ordering of transport processing

Since it can take a while before a specific thread gets scheduled, it
is better to just implement a first come first served queue mechanism.
That way, if a thread is already scheduled and is idle, it can pick up
the work to do from the queue.

Signed-off-by: Trond Myklebust <[email protected]>
---
include/linux/sunrpc/svc.h | 1 +
net/sunrpc/svc_xprt.c | 100 ++++++++++++++-------------------------------
2 files changed, 31 insertions(+), 70 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 38f561b2dda3..23c4d6496aac 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -46,6 +46,7 @@ struct svc_pool {
struct svc_pool_stats sp_stats; /* statistics on pool operation */
#define SP_TASK_PENDING (0) /* still work to do even if no
* xprt is queued. */
+#define SP_CONGESTED (1)
unsigned long sp_flags;
} ____cacheline_aligned_in_smp;

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d16a8b423c20..9b29c53281a8 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
struct svc_pool *pool;
struct svc_rqst *rqstp = NULL;
int cpu;
- bool queued = false;

if (!svc_xprt_has_something_to_do(xprt))
goto out;
@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)

atomic_long_inc(&pool->sp_stats.packets);

-redo_search:
+ dprintk("svc: transport %p put into queue\n", xprt);
+ spin_lock_bh(&pool->sp_lock);
+ list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
+ pool->sp_stats.sockets_queued++;
+ spin_unlock_bh(&pool->sp_lock);
+
/* find a thread for this xprt */
rcu_read_lock();
list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- /* Do a lockless check first */
- if (test_bit(RQ_BUSY, &rqstp->rq_flags))
+ if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
-
- /*
- * Once the xprt has been queued, it can only be dequeued by
- * the task that intends to service it. All we can do at that
- * point is to try to wake this thread back up so that it can
- * do so.
- */
- if (!queued) {
- spin_lock_bh(&rqstp->rq_lock);
- if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
- /* already busy, move on... */
- spin_unlock_bh(&rqstp->rq_lock);
- continue;
- }
-
- /* this one will do */
- rqstp->rq_xprt = xprt;
- svc_xprt_get(xprt);
- spin_unlock_bh(&rqstp->rq_lock);
- }
- rcu_read_unlock();
-
atomic_long_inc(&pool->sp_stats.threads_woken);
wake_up_process(rqstp->rq_task);
- put_cpu();
- goto out;
- }
- rcu_read_unlock();
-
- /*
- * We didn't find an idle thread to use, so we need to queue the xprt.
- * Do so and then search again. If we find one, we can't hook this one
- * up to it directly but we can wake the thread up in the hopes that it
- * will pick it up once it searches for a xprt to service.
- */
- if (!queued) {
- queued = true;
- dprintk("svc: transport %p put into queue\n", xprt);
- spin_lock_bh(&pool->sp_lock);
- list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
- pool->sp_stats.sockets_queued++;
- spin_unlock_bh(&pool->sp_lock);
- goto redo_search;
+ goto out_unlock;
}
+ set_bit(SP_CONGESTED, &pool->sp_flags);
rqstp = NULL;
+out_unlock:
+ rcu_read_unlock();
put_cpu();
out:
trace_svc_xprt_do_enqueue(xprt, rqstp);
@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)

static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{
- struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool;
long time_left = 0;

/* rq_xprt should be clear on entry */
WARN_ON_ONCE(rqstp->rq_xprt);

- /* Normally we will wait up to 5 seconds for any required
- * cache information to be provided.
- */
- rqstp->rq_chandle.thread_wait = 5*HZ;
-
- xprt = svc_xprt_dequeue(pool);
- if (xprt) {
- rqstp->rq_xprt = xprt;
-
- /* As there is a shortage of threads and this request
- * had to be queued, don't allow the thread to wait so
- * long for cache updates.
- */
- rqstp->rq_chandle.thread_wait = 1*HZ;
- clear_bit(SP_TASK_PENDING, &pool->sp_flags);
- return xprt;
- }
+ rqstp->rq_xprt = svc_xprt_dequeue(pool);
+ if (rqstp->rq_xprt)
+ goto out_found;

/*
* We have to be able to interrupt this wait
* to bring down the daemons ...
*/
set_current_state(TASK_INTERRUPTIBLE);
+ smp_mb__before_atomic();
+ clear_bit(SP_CONGESTED, &pool->sp_flags);
clear_bit(RQ_BUSY, &rqstp->rq_flags);
- smp_mb();
+ smp_mb__after_atomic();

if (likely(rqst_should_sleep(rqstp)))
time_left = schedule_timeout(timeout);
@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)

try_to_freeze();

- spin_lock_bh(&rqstp->rq_lock);
set_bit(RQ_BUSY, &rqstp->rq_flags);
- spin_unlock_bh(&rqstp->rq_lock);
-
- xprt = rqstp->rq_xprt;
- if (xprt != NULL)
- return xprt;
+ smp_mb__after_atomic();
+ rqstp->rq_xprt = svc_xprt_dequeue(pool);
+ if (rqstp->rq_xprt)
+ goto out_found;

if (!time_left)
atomic_long_inc(&pool->sp_stats.threads_timedout);
@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (signalled() || kthread_should_stop())
return ERR_PTR(-EINTR);
return ERR_PTR(-EAGAIN);
+out_found:
+ /* Normally we will wait up to 5 seconds for any required
+ * cache information to be provided.
+ */
+ if (!test_bit(SP_CONGESTED, &pool->sp_flags))
+ rqstp->rq_chandle.thread_wait = 5*HZ;
+ else
+ rqstp->rq_chandle.thread_wait = 1*HZ;
+ return rqstp->rq_xprt;
}

static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
--
2.13.6


2017-10-19 19:38:25

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 2/2] SUNRPC: Improve ordering of transport processing


> On Oct 10, 2017, at 1:31 PM, Trond Myklebust <[email protected]> wrote:
>
> Since it can take a while before a specific thread gets scheduled, it
> is better to just implement a first come first served queue mechanism.
> That way, if a thread is already scheduled and is idle, it can pick up
> the work to do from the queue.
>
> Signed-off-by: Trond Myklebust <[email protected]>

Tested-by: Chuck Lever <[email protected]>

Light performance testing, and a git software build / regression
suite run on NFSv4.1. All on NFS/RDMA.


> ---
> include/linux/sunrpc/svc.h | 1 +
> net/sunrpc/svc_xprt.c | 100 ++++++++++++++-------------------------------
> 2 files changed, 31 insertions(+), 70 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 38f561b2dda3..23c4d6496aac 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -46,6 +46,7 @@ struct svc_pool {
> struct svc_pool_stats sp_stats; /* statistics on pool operation */
> #define SP_TASK_PENDING (0) /* still work to do even if no
> * xprt is queued. */
> +#define SP_CONGESTED (1)
> unsigned long sp_flags;
> } ____cacheline_aligned_in_smp;
>
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index d16a8b423c20..9b29c53281a8 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
> struct svc_pool *pool;
> struct svc_rqst *rqstp = NULL;
> int cpu;
> - bool queued = false;
>
> if (!svc_xprt_has_something_to_do(xprt))
> goto out;
> @@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
>
> atomic_long_inc(&pool->sp_stats.packets);
>
> -redo_search:
> + dprintk("svc: transport %p put into queue\n", xprt);
> + spin_lock_bh(&pool->sp_lock);
> + list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
> + pool->sp_stats.sockets_queued++;
> + spin_unlock_bh(&pool->sp_lock);
> +
> /* find a thread for this xprt */
> rcu_read_lock();
> list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
> - /* Do a lockless check first */
> - if (test_bit(RQ_BUSY, &rqstp->rq_flags))
> + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
> continue;
> -
> - /*
> - * Once the xprt has been queued, it can only be dequeued by
> - * the task that intends to service it. All we can do at that
> - * point is to try to wake this thread back up so that it can
> - * do so.
> - */
> - if (!queued) {
> - spin_lock_bh(&rqstp->rq_lock);
> - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
> - /* already busy, move on... */
> - spin_unlock_bh(&rqstp->rq_lock);
> - continue;
> - }
> -
> - /* this one will do */
> - rqstp->rq_xprt = xprt;
> - svc_xprt_get(xprt);
> - spin_unlock_bh(&rqstp->rq_lock);
> - }
> - rcu_read_unlock();
> -
> atomic_long_inc(&pool->sp_stats.threads_woken);
> wake_up_process(rqstp->rq_task);
> - put_cpu();
> - goto out;
> - }
> - rcu_read_unlock();
> -
> - /*
> - * We didn't find an idle thread to use, so we need to queue the xprt.
> - * Do so and then search again. If we find one, we can't hook this one
> - * up to it directly but we can wake the thread up in the hopes that it
> - * will pick it up once it searches for a xprt to service.
> - */
> - if (!queued) {
> - queued = true;
> - dprintk("svc: transport %p put into queue\n", xprt);
> - spin_lock_bh(&pool->sp_lock);
> - list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
> - pool->sp_stats.sockets_queued++;
> - spin_unlock_bh(&pool->sp_lock);
> - goto redo_search;
> + goto out_unlock;
> }
> + set_bit(SP_CONGESTED, &pool->sp_flags);
> rqstp = NULL;
> +out_unlock:
> + rcu_read_unlock();
> put_cpu();
> out:
> trace_svc_xprt_do_enqueue(xprt, rqstp);
> @@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
>
> static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> {
> - struct svc_xprt *xprt;
> struct svc_pool *pool = rqstp->rq_pool;
> long time_left = 0;
>
> /* rq_xprt should be clear on entry */
> WARN_ON_ONCE(rqstp->rq_xprt);
>
> - /* Normally we will wait up to 5 seconds for any required
> - * cache information to be provided.
> - */
> - rqstp->rq_chandle.thread_wait = 5*HZ;
> -
> - xprt = svc_xprt_dequeue(pool);
> - if (xprt) {
> - rqstp->rq_xprt = xprt;
> -
> - /* As there is a shortage of threads and this request
> - * had to be queued, don't allow the thread to wait so
> - * long for cache updates.
> - */
> - rqstp->rq_chandle.thread_wait = 1*HZ;
> - clear_bit(SP_TASK_PENDING, &pool->sp_flags);
> - return xprt;
> - }
> + rqstp->rq_xprt = svc_xprt_dequeue(pool);
> + if (rqstp->rq_xprt)
> + goto out_found;
>
> /*
> * We have to be able to interrupt this wait
> * to bring down the daemons ...
> */
> set_current_state(TASK_INTERRUPTIBLE);
> + smp_mb__before_atomic();
> + clear_bit(SP_CONGESTED, &pool->sp_flags);
> clear_bit(RQ_BUSY, &rqstp->rq_flags);
> - smp_mb();
> + smp_mb__after_atomic();
>
> if (likely(rqst_should_sleep(rqstp)))
> time_left = schedule_timeout(timeout);
> @@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
>
> try_to_freeze();
>
> - spin_lock_bh(&rqstp->rq_lock);
> set_bit(RQ_BUSY, &rqstp->rq_flags);
> - spin_unlock_bh(&rqstp->rq_lock);
> -
> - xprt = rqstp->rq_xprt;
> - if (xprt != NULL)
> - return xprt;
> + smp_mb__after_atomic();
> + rqstp->rq_xprt = svc_xprt_dequeue(pool);
> + if (rqstp->rq_xprt)
> + goto out_found;
>
> if (!time_left)
> atomic_long_inc(&pool->sp_stats.threads_timedout);
> @@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> if (signalled() || kthread_should_stop())
> return ERR_PTR(-EINTR);
> return ERR_PTR(-EAGAIN);
> +out_found:
> + /* Normally we will wait up to 5 seconds for any required
> + * cache information to be provided.
> + */
> + if (!test_bit(SP_CONGESTED, &pool->sp_flags))
> + rqstp->rq_chandle.thread_wait = 5*HZ;
> + else
> + rqstp->rq_chandle.thread_wait = 1*HZ;
> + return rqstp->rq_xprt;
> }
>
> static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
> --
> 2.13.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

--
Chuck Lever




2017-10-19 20:33:47

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 2/2] SUNRPC: Improve ordering of transport processing

On Thu, Oct 19, 2017 at 03:38:17PM -0400, Chuck Lever wrote:
>
> > On Oct 10, 2017, at 1:31 PM, Trond Myklebust <[email protected]> wrote:
> >
> > Since it can take a while before a specific thread gets scheduled, it
> > is better to just implement a first come first served queue mechanism.
> > That way, if a thread is already scheduled and is idle, it can pick up
> > the work to do from the queue.
> >
> > Signed-off-by: Trond Myklebust <[email protected]>
>
> Tested-by: Chuck Lever <[email protected]>
>
> Light performance testing, and a git software build / regression
> suite run on NFSv4.1. All on NFS/RDMA.

Thanks! And sorry for the slow response to this patch, the basic idea
sounds fine I just haven't found the time to understand the code change
yet!

--b.

>
>
> > ---
> > include/linux/sunrpc/svc.h | 1 +
> > net/sunrpc/svc_xprt.c | 100 ++++++++++++++-------------------------------
> > 2 files changed, 31 insertions(+), 70 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index 38f561b2dda3..23c4d6496aac 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -46,6 +46,7 @@ struct svc_pool {
> > struct svc_pool_stats sp_stats; /* statistics on pool operation */
> > #define SP_TASK_PENDING (0) /* still work to do even if no
> > * xprt is queued. */
> > +#define SP_CONGESTED (1)
> > unsigned long sp_flags;
> > } ____cacheline_aligned_in_smp;
> >
> > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > index d16a8b423c20..9b29c53281a8 100644
> > --- a/net/sunrpc/svc_xprt.c
> > +++ b/net/sunrpc/svc_xprt.c
> > @@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
> > struct svc_pool *pool;
> > struct svc_rqst *rqstp = NULL;
> > int cpu;
> > - bool queued = false;
> >
> > if (!svc_xprt_has_something_to_do(xprt))
> > goto out;
> > @@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
> >
> > atomic_long_inc(&pool->sp_stats.packets);
> >
> > -redo_search:
> > + dprintk("svc: transport %p put into queue\n", xprt);
> > + spin_lock_bh(&pool->sp_lock);
> > + list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
> > + pool->sp_stats.sockets_queued++;
> > + spin_unlock_bh(&pool->sp_lock);
> > +
> > /* find a thread for this xprt */
> > rcu_read_lock();
> > list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
> > - /* Do a lockless check first */
> > - if (test_bit(RQ_BUSY, &rqstp->rq_flags))
> > + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
> > continue;
> > -
> > - /*
> > - * Once the xprt has been queued, it can only be dequeued by
> > - * the task that intends to service it. All we can do at that
> > - * point is to try to wake this thread back up so that it can
> > - * do so.
> > - */
> > - if (!queued) {
> > - spin_lock_bh(&rqstp->rq_lock);
> > - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
> > - /* already busy, move on... */
> > - spin_unlock_bh(&rqstp->rq_lock);
> > - continue;
> > - }
> > -
> > - /* this one will do */
> > - rqstp->rq_xprt = xprt;
> > - svc_xprt_get(xprt);
> > - spin_unlock_bh(&rqstp->rq_lock);
> > - }
> > - rcu_read_unlock();
> > -
> > atomic_long_inc(&pool->sp_stats.threads_woken);
> > wake_up_process(rqstp->rq_task);
> > - put_cpu();
> > - goto out;
> > - }
> > - rcu_read_unlock();
> > -
> > - /*
> > - * We didn't find an idle thread to use, so we need to queue the xprt.
> > - * Do so and then search again. If we find one, we can't hook this one
> > - * up to it directly but we can wake the thread up in the hopes that it
> > - * will pick it up once it searches for a xprt to service.
> > - */
> > - if (!queued) {
> > - queued = true;
> > - dprintk("svc: transport %p put into queue\n", xprt);
> > - spin_lock_bh(&pool->sp_lock);
> > - list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
> > - pool->sp_stats.sockets_queued++;
> > - spin_unlock_bh(&pool->sp_lock);
> > - goto redo_search;
> > + goto out_unlock;
> > }
> > + set_bit(SP_CONGESTED, &pool->sp_flags);
> > rqstp = NULL;
> > +out_unlock:
> > + rcu_read_unlock();
> > put_cpu();
> > out:
> > trace_svc_xprt_do_enqueue(xprt, rqstp);
> > @@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> >
> > static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> > {
> > - struct svc_xprt *xprt;
> > struct svc_pool *pool = rqstp->rq_pool;
> > long time_left = 0;
> >
> > /* rq_xprt should be clear on entry */
> > WARN_ON_ONCE(rqstp->rq_xprt);
> >
> > - /* Normally we will wait up to 5 seconds for any required
> > - * cache information to be provided.
> > - */
> > - rqstp->rq_chandle.thread_wait = 5*HZ;
> > -
> > - xprt = svc_xprt_dequeue(pool);
> > - if (xprt) {
> > - rqstp->rq_xprt = xprt;
> > -
> > - /* As there is a shortage of threads and this request
> > - * had to be queued, don't allow the thread to wait so
> > - * long for cache updates.
> > - */
> > - rqstp->rq_chandle.thread_wait = 1*HZ;
> > - clear_bit(SP_TASK_PENDING, &pool->sp_flags);
> > - return xprt;
> > - }
> > + rqstp->rq_xprt = svc_xprt_dequeue(pool);
> > + if (rqstp->rq_xprt)
> > + goto out_found;
> >
> > /*
> > * We have to be able to interrupt this wait
> > * to bring down the daemons ...
> > */
> > set_current_state(TASK_INTERRUPTIBLE);
> > + smp_mb__before_atomic();
> > + clear_bit(SP_CONGESTED, &pool->sp_flags);
> > clear_bit(RQ_BUSY, &rqstp->rq_flags);
> > - smp_mb();
> > + smp_mb__after_atomic();
> >
> > if (likely(rqst_should_sleep(rqstp)))
> > time_left = schedule_timeout(timeout);
> > @@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> >
> > try_to_freeze();
> >
> > - spin_lock_bh(&rqstp->rq_lock);
> > set_bit(RQ_BUSY, &rqstp->rq_flags);
> > - spin_unlock_bh(&rqstp->rq_lock);
> > -
> > - xprt = rqstp->rq_xprt;
> > - if (xprt != NULL)
> > - return xprt;
> > + smp_mb__after_atomic();
> > + rqstp->rq_xprt = svc_xprt_dequeue(pool);
> > + if (rqstp->rq_xprt)
> > + goto out_found;
> >
> > if (!time_left)
> > atomic_long_inc(&pool->sp_stats.threads_timedout);
> > @@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> > if (signalled() || kthread_should_stop())
> > return ERR_PTR(-EINTR);
> > return ERR_PTR(-EAGAIN);
> > +out_found:
> > + /* Normally we will wait up to 5 seconds for any required
> > + * cache information to be provided.
> > + */
> > + if (!test_bit(SP_CONGESTED, &pool->sp_flags))
> > + rqstp->rq_chandle.thread_wait = 5*HZ;
> > + else
> > + rqstp->rq_chandle.thread_wait = 1*HZ;
> > + return rqstp->rq_xprt;
> > }
> >
> > static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
> > --
> > 2.13.6
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> --
> Chuck Lever
>
>

2017-11-08 22:29:27

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 2/2] SUNRPC: Improve ordering of transport processing

On Thu, Oct 19, 2017 at 04:33:47PM -0400, bfields wrote:
> On Thu, Oct 19, 2017 at 03:38:17PM -0400, Chuck Lever wrote:
> >
> > > On Oct 10, 2017, at 1:31 PM, Trond Myklebust <[email protected]> wrote:
> > >
> > > Since it can take a while before a specific thread gets scheduled, it
> > > is better to just implement a first come first served queue mechanism.
> > > That way, if a thread is already scheduled and is idle, it can pick up
> > > the work to do from the queue.
> > >
> > > Signed-off-by: Trond Myklebust <[email protected]>
> >
> > Tested-by: Chuck Lever <[email protected]>
> >
> > Light performance testing, and a git software build / regression
> > suite run on NFSv4.1. All on NFS/RDMA.
>
> Thanks! And sorry for the slow response to this patch, the basic idea
> sounds fine I just haven't found the time to understand the code change
> yet!

Sorry, I'm slow. Took me a little staring to realize that this:

> > > @@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
...
> > > - /* this one will do */
> > > - rqstp->rq_xprt = xprt;

is the main change. We'll let any thread running in svc_recv() pick up
an xprt from the queue rather than assigning an xprt to a specific
thread here in enqueue and then waiting for that specific thread to get
to it.

Makes sense, thanks, applied!

--b.