2014-07-25 03:59:23

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue()

Ensure that all calls to svc_xprt_enqueue() except svc_xprt_received()
check the value of XPT_BUSY, before attempting to grab spinlocks etc.
This is to avoid situations such as the following "perf" trace,
which shows heavy contention on the pool spinlock:

54.15% nfsd [kernel.kallsyms] [k] _raw_spin_lock_bh
|
--- _raw_spin_lock_bh
|
|--71.43%-- svc_xprt_enqueue
| |
| |--50.31%-- svc_reserve
| |
| |--31.35%-- svc_xprt_received
| |
| |--18.34%-- svc_tcp_data_ready
...

Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/svc_xprt.c | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b4737fbdec13..54a761fa6351 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -23,6 +23,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
static void svc_age_temp_xprts(unsigned long closure);
static void svc_delete_xprt(struct svc_xprt *xprt);
+static void svc_xprt_do_enqueue(struct svc_xprt *xprt);

/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
@@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
return;
/* As soon as we clear busy, the xprt could be closed and
- * 'put', so we need a reference to call svc_xprt_enqueue with:
+ * 'put', so we need a reference to call svc_xprt_do_enqueue with:
*/
svc_xprt_get(xprt);
+ smp_mb__before_clear_bit();
clear_bit(XPT_BUSY, &xprt->xpt_flags);
- svc_xprt_enqueue(xprt);
+ svc_xprt_do_enqueue(xprt);
svc_xprt_put(xprt);
}

@@ -335,12 +337,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
return false;
}

-/*
- * Queue up a transport with data pending. If there are idle nfsd
- * processes, wake 'em up.
- *
- */
-void svc_xprt_enqueue(struct svc_xprt *xprt)
+static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
struct svc_rqst *rqstp;
@@ -398,6 +395,18 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
out_unlock:
spin_unlock_bh(&pool->sp_lock);
}
+
+/*
+ * Queue up a transport with data pending. If there are idle nfsd
+ * processes, wake 'em up.
+ *
+ */
+void svc_xprt_enqueue(struct svc_xprt *xprt)
+{
+ if (test_bit(XPT_BUSY, &xprt->xpt_flags))
+ return;
+ svc_xprt_do_enqueue(xprt);
+}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);

/*
--
1.9.3



2014-07-25 03:59:25

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 2/3] SUNRPC: svc_tcp_write_space: don't clear SOCK_NOSPACE prematurely

If requests are queued in the socket inbuffer waiting for an
svc_tcp_has_wspace() requirement to be satisfied, then we do not want
to clear the SOCK_NOSPACE flag until we've satisfied that requirement.

Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/svcsock.c | 39 +++++++++++++++++++++------------------
1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b507cd327d9b..7322ea1164fd 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -446,11 +446,31 @@ static void svc_write_space(struct sock *sk)
}
}

+static int svc_tcp_has_wspace(struct svc_xprt *xprt)
+{
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct svc_serv *serv = svsk->sk_xprt.xpt_server;
+ int required;
+
+ if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
+ return 1;
+ required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
+ if (sk_stream_wspace(svsk->sk_sk) >= required ||
+ (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
+ atomic_read(&xprt->xpt_reserved) == 0))
+ return 1;
+ set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+ return 0;
+}
+
static void svc_tcp_write_space(struct sock *sk)
{
+ struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
struct socket *sock = sk->sk_socket;

- if (sk_stream_is_writeable(sk) && sock)
+ if (!sk_stream_is_writeable(sk) || !sock)
+ return;
+ if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt))
clear_bit(SOCK_NOSPACE, &sock->flags);
svc_write_space(sk);
}
@@ -1197,23 +1217,6 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
svc_putnl(resv, 0);
}

-static int svc_tcp_has_wspace(struct svc_xprt *xprt)
-{
- struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- struct svc_serv *serv = svsk->sk_xprt.xpt_server;
- int required;
-
- if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
- return 1;
- required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
- if (sk_stream_wspace(svsk->sk_sk) >= required ||
- (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
- atomic_read(&xprt->xpt_reserved) == 0))
- return 1;
- set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
- return 0;
-}
-
static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
--
1.9.3


2014-07-29 19:31:38

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue()

All three patches look good to me, thanks.

>From private email, this:

On Thu, Jul 24, 2014 at 11:59:31PM -0400, Trond Myklebust wrote:
> @@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
> if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
> return;
> /* As soon as we clear busy, the xprt could be closed and
> - * 'put', so we need a reference to call svc_xprt_enqueue with:
> + * 'put', so we need a reference to call svc_xprt_do_enqueue with:
> */
> svc_xprt_get(xprt);
> + smp_mb__before_clear_bit();

triggered a warning about smp_mb__before_clear_bit noticed by the kbuild
robot. Looks like that was due to
febdbfe8a91ce0d11939d4940b592eb0dba8d663 "arch: Prepare for
smp_mb__{before,after}_atomic()".

You questioned whether deprecating smp_mb__{before,after}_clear_bit was
an unnecessary burden on people maintaining stable kernels or doing
backports more generally. Cc'ing some addresses from that commit.

Whatever--I'll probably just replace do the clear_bit->before_atomic
replacement and apply unless there's some objection.

--b.

> clear_bit(XPT_BUSY, &xprt->xpt_flags);
> - svc_xprt_enqueue(xprt);
> + svc_xprt_do_enqueue(xprt);
> svc_xprt_put(xprt);
> }
>
> @@ -335,12 +337,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
> return false;
> }
>
> -/*
> - * Queue up a transport with data pending. If there are idle nfsd
> - * processes, wake 'em up.
> - *
> - */
> -void svc_xprt_enqueue(struct svc_xprt *xprt)
> +static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
> {
> struct svc_pool *pool;
> struct svc_rqst *rqstp;
> @@ -398,6 +395,18 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
> out_unlock:
> spin_unlock_bh(&pool->sp_lock);
> }
> +
> +/*
> + * Queue up a transport with data pending. If there are idle nfsd
> + * processes, wake 'em up.
> + *
> + */
> +void svc_xprt_enqueue(struct svc_xprt *xprt)
> +{
> + if (test_bit(XPT_BUSY, &xprt->xpt_flags))
> + return;
> + svc_xprt_do_enqueue(xprt);
> +}
> EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
>
> /*
> --
> 1.9.3
>

2014-07-25 03:59:26

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 3/3] SUNRPC: Allow svc_reserve() to notify TCP socket that space has been freed

Signed-off-by: Trond Myklebust <[email protected]>
---
include/linux/sunrpc/svc_xprt.h | 1 +
net/sunrpc/svc_xprt.c | 2 ++
net/sunrpc/svcsock.c | 9 +++++++++
3 files changed, 12 insertions(+)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 7235040a19b2..8f241ac6934b 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -25,6 +25,7 @@ struct svc_xprt_ops {
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
int (*xpo_secure_port)(struct svc_rqst *);
+ void (*xpo_adjust_wspace)(struct svc_xprt *);
};

struct svc_xprt_class {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 54a761fa6351..32647b2a6a34 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -448,6 +448,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;

+ if (xprt->xpt_ops->xpo_adjust_wspace)
+ xprt->xpt_ops->xpo_adjust_wspace(xprt);
svc_xprt_enqueue(xprt);
}
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7322ea1164fd..72597d7fe60a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -475,6 +475,14 @@ static void svc_tcp_write_space(struct sock *sk)
svc_write_space(sk);
}

+static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
+{
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+ if (svc_tcp_has_wspace(xprt))
+ clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+}
+
/*
* See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
*/
@@ -1288,6 +1296,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
+ .xpo_adjust_wspace = svc_tcp_adjust_wspace,
};

static struct svc_xprt_class svc_tcp_class = {
--
1.9.3


2014-07-29 19:57:57

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue()

On Tue, Jul 29, 2014 at 03:31:08PM -0400, Bruce Fields wrote:
> All three patches look good to me, thanks.
>
> From private email, this:
>
> On Thu, Jul 24, 2014 at 11:59:31PM -0400, Trond Myklebust wrote:
> > @@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
> > if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
> > return;
> > /* As soon as we clear busy, the xprt could be closed and
> > - * 'put', so we need a reference to call svc_xprt_enqueue with:
> > + * 'put', so we need a reference to call svc_xprt_do_enqueue with:
> > */
> > svc_xprt_get(xprt);
> > + smp_mb__before_clear_bit();
>
> triggered a warning about smp_mb__before_clear_bit noticed by the kbuild
> robot. Looks like that was due to
> febdbfe8a91ce0d11939d4940b592eb0dba8d663 "arch: Prepare for
> smp_mb__{before,after}_atomic()".
>
> You questioned whether deprecating smp_mb__{before,after}_clear_bit was
> an unnecessary burden on people maintaining stable kernels or doing
> backports more generally. Cc'ing some addresses from that commit.

I absolutely do not care one whit for that. The kernel lives, deal with
it.

Memory barriers are hard enough, we do not need multiple versions of the
same thing just to confuse people.

> Whatever--I'll probably just replace do the clear_bit->before_atomic
> replacement and apply unless there's some objection.

The old API was preserved to allow for non-flag-day migration to the new
API. I should do a final sweep and then kill the old API.

The conversion shrank the memory barrier API by 4 variants while
covering more cases, which is an absolute win in my book.