2007-11-29 23:20:26

by Tom Tucker

[permalink] [raw]
Subject: [RFC,PATCH 09/38] svc: Add a transport function that checks for write space


In order to avoid blocking a service thread, the receive side checks
to see if there is sufficient write space to reply to the request.
Each transport has a different mechanism for determining if there is
enough write space to reply.

The code that checked for white space was coupled with code that
checked for CLOSE and CONN. These checks have been broken out into
separate statements to make the code easier to read.

Signed-off-by: Tom Tucker <[email protected]>
---

include/linux/sunrpc/svc_xprt.h | 1 +
net/sunrpc/svcsock.c | 60 +++++++++++++++++++++++++++++++++------
2 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 8501115..3adc8f3 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -10,6 +10,7 @@
#include <linux/sunrpc/svc.h>

struct svc_xprt_ops {
+ int (*xpo_has_wspace)(struct svc_xprt *);
int (*xpo_recvfrom)(struct svc_rqst *);
void (*xpo_prep_reply_hdr)(struct svc_rqst *);
int (*xpo_sendto)(struct svc_rqst *);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 510ad45..b796244 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -269,22 +269,24 @@ svc_sock_enqueue(struct svc_sock *svsk)
BUG_ON(svsk->sk_pool != NULL);
svsk->sk_pool = pool;

- set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
- if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
- > svc_sock_wspace(svsk))
- && !test_bit(SK_CLOSE, &svsk->sk_flags)
- && !test_bit(SK_CONN, &svsk->sk_flags)) {
+ /* Handle pending connection */
+ if (test_bit(SK_CONN, &svsk->sk_flags))
+ goto process;
+
+ /* Handle close in-progress */
+ if (test_bit(SK_CLOSE, &svsk->sk_flags))
+ goto process;
+
+ /* Check if we have space to reply to a request */
+ if (!svsk->sk_xprt.xpt_ops->xpo_has_wspace(&svsk->sk_xprt)) {
/* Don't enqueue while not enough space for reply */
- dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n",
- svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
- svc_sock_wspace(svsk));
+ dprintk("svc: no write space, socket %p not enqueued\n", svsk);
svsk->sk_pool = NULL;
clear_bit(SK_BUSY, &svsk->sk_flags);
goto out_unlock;
}
- clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-

+ process:
if (!list_empty(&pool->sp_threads)) {
rqstp = list_entry(pool->sp_threads.next,
struct svc_rqst,
@@ -897,6 +899,24 @@ static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
{
}

+static int svc_udp_has_wspace(struct svc_xprt *xprt)
+{
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct svc_serv *serv = svsk->sk_server;
+ int required;
+
+ /*
+ * Set the SOCK_NOSPACE flag before checking the available
+ * sock space.
+ */
+ set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+ required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
+ if (required*2 > sock_wspace(svsk->sk_sk))
+ return 0;
+ clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+ return 1;
+}
+
static struct svc_xprt_ops svc_udp_ops = {
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
@@ -904,6 +924,7 @@ static struct svc_xprt_ops svc_udp_ops = {
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
+ .xpo_has_wspace = svc_udp_has_wspace,
};

static struct svc_xprt_class svc_udp_class = {
@@ -1366,6 +1387,24 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
svc_putnl(resv, 0);
}

+static int svc_tcp_has_wspace(struct svc_xprt *xprt)
+{
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct svc_serv *serv = svsk->sk_server;
+ int required;
+
+ /*
+ * Set the SOCK_NOSPACE flag before checking the available
+ * sock space.
+ */
+ set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+ required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
+ if (required*2 > sk_stream_wspace(svsk->sk_sk))
+ return 0;
+ clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+ return 1;
+}
+
static struct svc_xprt_ops svc_tcp_ops = {
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
@@ -1373,6 +1412,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+ .xpo_has_wspace = svc_tcp_has_wspace,
};

static struct svc_xprt_class svc_tcp_class = {


2007-11-30 20:46:47

by Chuck Lever III

[permalink] [raw]
Subject: Re: [RFC,PATCH 09/38] svc: Add a transport function that checks for write space

On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> In order to avoid blocking a service thread, the receive side checks
> to see if there is sufficient write space to reply to the request.
> Each transport has a different mechanism for determining if there is
> enough write space to reply.
>
> The code that checked for white space was coupled with code that

s/white space/write space/

> checked for CLOSE and CONN. These checks have been broken out into
> separate statements to make the code easier to read.
>
> Signed-off-by: Tom Tucker <[email protected]>
> ---
>
> include/linux/sunrpc/svc_xprt.h | 1 +
> net/sunrpc/svcsock.c | 60 ++++++++++++++++++++++++++++
> +++++------
> 2 files changed, 51 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/
> svc_xprt.h
> index 8501115..3adc8f3 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -10,6 +10,7 @@
> #include <linux/sunrpc/svc.h>
>
> struct svc_xprt_ops {
> + int (*xpo_has_wspace)(struct svc_xprt *);
> int (*xpo_recvfrom)(struct svc_rqst *);
> void (*xpo_prep_reply_hdr)(struct svc_rqst *);
> int (*xpo_sendto)(struct svc_rqst *);
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 510ad45..b796244 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -269,22 +269,24 @@ svc_sock_enqueue(struct svc_sock *svsk)
> BUG_ON(svsk->sk_pool != NULL);
> svsk->sk_pool = pool;
>
> - set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> - if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
> - > svc_sock_wspace(svsk))
> - && !test_bit(SK_CLOSE, &svsk->sk_flags)
> - && !test_bit(SK_CONN, &svsk->sk_flags)) {
> + /* Handle pending connection */
> + if (test_bit(SK_CONN, &svsk->sk_flags))
> + goto process;
> +
> + /* Handle close in-progress */
> + if (test_bit(SK_CLOSE, &svsk->sk_flags))
> + goto process;
> +
> + /* Check if we have space to reply to a request */
> + if (!svsk->sk_xprt.xpt_ops->xpo_has_wspace(&svsk->sk_xprt)) {
> /* Don't enqueue while not enough space for reply */
> - dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n",
> - svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
> - svc_sock_wspace(svsk));
> + dprintk("svc: no write space, socket %p not enqueued\n", svsk);

Since you remove the only callers of svc_sock_wspace here, you can
probably safely delete that function in this patch as well.

> svsk->sk_pool = NULL;
> clear_bit(SK_BUSY, &svsk->sk_flags);
> goto out_unlock;
> }
> - clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> -
>
> + process:
> if (!list_empty(&pool->sp_threads)) {
> rqstp = list_entry(pool->sp_threads.next,
> struct svc_rqst,
> @@ -897,6 +899,24 @@ static void svc_udp_prep_reply_hdr(struct
> svc_rqst *rqstp)
> {
> }
>
> +static int svc_udp_has_wspace(struct svc_xprt *xprt)
> +{
> + struct svc_sock *svsk = container_of(xprt, struct svc_sock,
> sk_xprt);
> + struct svc_serv *serv = svsk->sk_server;
> + int required;
> +
> + /*
> + * Set the SOCK_NOSPACE flag before checking the available
> + * sock space.
> + */
> + set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> + required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;

The result of the sum is unsigned, but then we stuff it into a signed
integer...

> + if (required*2 > sock_wspace(svsk->sk_sk))
> + return 0;

...and then this introduces a mixed sign comparison (harmless
AFAICT). Perhaps "required" should be an unsigned long.

Also, some may prefer "<< 1" to "* 2". I'm not sure it makes much
difference here. Arguably, it might be slightly better documentation
to double "required" before the if statement.

> + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> + return 1;
> +}
> +
> static struct svc_xprt_ops svc_udp_ops = {
> .xpo_recvfrom = svc_udp_recvfrom,
> .xpo_sendto = svc_udp_sendto,
> @@ -904,6 +924,7 @@ static struct svc_xprt_ops svc_udp_ops = {
> .xpo_detach = svc_sock_detach,
> .xpo_free = svc_sock_free,
> .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
> + .xpo_has_wspace = svc_udp_has_wspace,
> };
>
> static struct svc_xprt_class svc_udp_class = {
> @@ -1366,6 +1387,24 @@ static void svc_tcp_prep_reply_hdr(struct
> svc_rqst *rqstp)
> svc_putnl(resv, 0);
> }
>
> +static int svc_tcp_has_wspace(struct svc_xprt *xprt)
> +{
> + struct svc_sock *svsk = container_of(xprt, struct svc_sock,
> sk_xprt);
> + struct svc_serv *serv = svsk->sk_server;
> + int required;
> +
> + /*
> + * Set the SOCK_NOSPACE flag before checking the available
> + * sock space.
> + */
> + set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> + required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;

Ibid.

> + if (required*2 > sk_stream_wspace(svsk->sk_sk))
> + return 0;

Oddly sk_stream_wspace() returns an int, but sock_space() returns an
unsigned long. Sigh...

> + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> + return 1;
> +}
> +
> static struct svc_xprt_ops svc_tcp_ops = {
> .xpo_recvfrom = svc_tcp_recvfrom,
> .xpo_sendto = svc_tcp_sendto,
> @@ -1373,6 +1412,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
> .xpo_detach = svc_sock_detach,
> .xpo_free = svc_sock_free,
> .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> + .xpo_has_wspace = svc_tcp_has_wspace,
> };
>
> static struct svc_xprt_class svc_tcp_class = {

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

2007-11-30 21:35:19

by Tom Tucker

[permalink] [raw]
Subject: Re: [RFC,PATCH 09/38] svc: Add a transport function that checks for write space


On Fri, 2007-11-30 at 15:46 -0500, Chuck Lever wrote:
> On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> > In order to avoid blocking a service thread, the receive side checks
> > to see if there is sufficient write space to reply to the request.
> > Each transport has a different mechanism for determining if there is
> > enough write space to reply.
> >
> > The code that checked for white space was coupled with code that
>
> s/white space/write space/

ok.

>
> > checked for CLOSE and CONN. These checks have been broken out into
> > separate statements to make the code easier to read.
> >
> > Signed-off-by: Tom Tucker <[email protected]>
> > ---
> >
> > include/linux/sunrpc/svc_xprt.h | 1 +
> > net/sunrpc/svcsock.c | 60 ++++++++++++++++++++++++++++
> > +++++------
> > 2 files changed, 51 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/
> > svc_xprt.h
> > index 8501115..3adc8f3 100644
> > --- a/include/linux/sunrpc/svc_xprt.h
> > +++ b/include/linux/sunrpc/svc_xprt.h
> > @@ -10,6 +10,7 @@
> > #include <linux/sunrpc/svc.h>
> >
> > struct svc_xprt_ops {
> > + int (*xpo_has_wspace)(struct svc_xprt *);
> > int (*xpo_recvfrom)(struct svc_rqst *);
> > void (*xpo_prep_reply_hdr)(struct svc_rqst *);
> > int (*xpo_sendto)(struct svc_rqst *);
> > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> > index 510ad45..b796244 100644
> > --- a/net/sunrpc/svcsock.c
> > +++ b/net/sunrpc/svcsock.c
> > @@ -269,22 +269,24 @@ svc_sock_enqueue(struct svc_sock *svsk)
> > BUG_ON(svsk->sk_pool != NULL);
> > svsk->sk_pool = pool;
> >
> > - set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > - if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
> > - > svc_sock_wspace(svsk))
> > - && !test_bit(SK_CLOSE, &svsk->sk_flags)
> > - && !test_bit(SK_CONN, &svsk->sk_flags)) {
> > + /* Handle pending connection */
> > + if (test_bit(SK_CONN, &svsk->sk_flags))
> > + goto process;
> > +
> > + /* Handle close in-progress */
> > + if (test_bit(SK_CLOSE, &svsk->sk_flags))
> > + goto process;
> > +
> > + /* Check if we have space to reply to a request */
> > + if (!svsk->sk_xprt.xpt_ops->xpo_has_wspace(&svsk->sk_xprt)) {
> > /* Don't enqueue while not enough space for reply */
> > - dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n",
> > - svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
> > - svc_sock_wspace(svsk));
> > + dprintk("svc: no write space, socket %p not enqueued\n", svsk);
>
> Since you remove the only callers of svc_sock_wspace here, you can
> probably safely delete that function in this patch as well.
>

ok.

> > svsk->sk_pool = NULL;
> > clear_bit(SK_BUSY, &svsk->sk_flags);
> > goto out_unlock;
> > }
> > - clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > -
> >
> > + process:
> > if (!list_empty(&pool->sp_threads)) {
> > rqstp = list_entry(pool->sp_threads.next,
> > struct svc_rqst,
> > @@ -897,6 +899,24 @@ static void svc_udp_prep_reply_hdr(struct
> > svc_rqst *rqstp)
> > {
> > }
> >
> > +static int svc_udp_has_wspace(struct svc_xprt *xprt)
> > +{
> > + struct svc_sock *svsk = container_of(xprt, struct svc_sock,
> > sk_xprt);
> > + struct svc_serv *serv = svsk->sk_server;
> > + int required;
> > +
> > + /*
> > + * Set the SOCK_NOSPACE flag before checking the available
> > + * sock space.
> > + */
> > + set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > + required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
>
> The result of the sum is unsigned, but then we stuff it into a signed
> integer...
>
> > + if (required*2 > sock_wspace(svsk->sk_sk))
> > + return 0;
>
> ...and then this introduces a mixed sign comparison (harmless
> AFAICT). Perhaps "required" should be an unsigned long.
>

So for svc_udp_has_wspace, it makes sense for required to
be unsigned, and then demote to signed on return. Yes?

>
> Also, some may prefer "<< 1" to "* 2". I'm not sure it makes much
> difference here. Arguably, it might be slightly better documentation
> to double "required" before the if statement.
>
> > + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > + return 1;
> > +}
> > +
> > static struct svc_xprt_ops svc_udp_ops = {
> > .xpo_recvfrom = svc_udp_recvfrom,
> > .xpo_sendto = svc_udp_sendto,
> > @@ -904,6 +924,7 @@ static struct svc_xprt_ops svc_udp_ops = {
> > .xpo_detach = svc_sock_detach,
> > .xpo_free = svc_sock_free,
> > .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
> > + .xpo_has_wspace = svc_udp_has_wspace,
> > };
> >
> > static struct svc_xprt_class svc_udp_class = {
> > @@ -1366,6 +1387,24 @@ static void svc_tcp_prep_reply_hdr(struct
> > svc_rqst *rqstp)
> > svc_putnl(resv, 0);
> > }
> >
> > +static int svc_tcp_has_wspace(struct svc_xprt *xprt)
> > +{
> > + struct svc_sock *svsk = container_of(xprt, struct svc_sock,
> > sk_xprt);
> > + struct svc_serv *serv = svsk->sk_server;
> > + int required;
> > +
> > + /*
> > + * Set the SOCK_NOSPACE flag before checking the available
> > + * sock space.
> > + */
> > + set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > + required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
>
> Ibid.
>
> > + if (required*2 > sk_stream_wspace(svsk->sk_sk))
> > + return 0;
>
> Oddly sk_stream_wspace() returns an int, but sock_space() returns an
> unsigned long. Sigh...

For this one, let's leave required signed and add an explicit cast to
serv->sv_max_mesg. Sound ok?

What a mess...

>
> > + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > + return 1;
> > +}
> > +
> > static struct svc_xprt_ops svc_tcp_ops = {
> > .xpo_recvfrom = svc_tcp_recvfrom,
> > .xpo_sendto = svc_tcp_sendto,
> > @@ -1373,6 +1412,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
> > .xpo_detach = svc_sock_detach,
> > .xpo_free = svc_sock_free,
> > .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> > + .xpo_has_wspace = svc_tcp_has_wspace,
> > };
> >
> > static struct svc_xprt_class svc_tcp_class = {
>
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com


2007-11-30 22:44:15

by Chuck Lever III

[permalink] [raw]
Subject: Re: [RFC,PATCH 09/38] svc: Add a transport function that checks for write space

On Nov 30, 2007, at 4:39 PM, Tom Tucker wrote:
> On Fri, 2007-11-30 at 15:46 -0500, Chuck Lever wrote:
>> On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
>>> +static int svc_udp_has_wspace(struct svc_xprt *xprt)
>>> +{
>>> + struct svc_sock *svsk = container_of(xprt, struct svc_sock,
>>> sk_xprt);
>>> + struct svc_serv *serv = svsk->sk_server;
>>> + int required;
>>> +
>>> + /*
>>> + * Set the SOCK_NOSPACE flag before checking the available
>>> + * sock space.
>>> + */
>>> + set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>>> + required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
>>
>> The result of the sum is unsigned, but then we stuff it into a signed
>> integer...
>>
>>> + if (required*2 > sock_wspace(svsk->sk_sk))
>>> + return 0;
>>
>> ...and then this introduces a mixed sign comparison (harmless
>> AFAICT). Perhaps "required" should be an unsigned long.
>>
>
> So for svc_udp_has_wspace, it makes sense for required to
> be unsigned, and then demote to signed on return. Yes?
>
>>
>> Also, some may prefer "<< 1" to "* 2". I'm not sure it makes much
>> difference here. Arguably, it might be slightly better documentation
>> to double "required" before the if statement.
>>
>>> + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>>> + return 1;
>>> +}

As far as I can tell you never return "required." The result of
svc_udp_has_wspace() is really a boolean, right? Or did I miss your
point?

>>> +
>>> static struct svc_xprt_ops svc_udp_ops = {
>>> .xpo_recvfrom = svc_udp_recvfrom,
>>> .xpo_sendto = svc_udp_sendto,
>>> @@ -904,6 +924,7 @@ static struct svc_xprt_ops svc_udp_ops = {
>>> .xpo_detach = svc_sock_detach,
>>> .xpo_free = svc_sock_free,
>>> .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
>>> + .xpo_has_wspace = svc_udp_has_wspace,
>>> };
>>>
>>> static struct svc_xprt_class svc_udp_class = {
>>> @@ -1366,6 +1387,24 @@ static void svc_tcp_prep_reply_hdr(struct
>>> svc_rqst *rqstp)
>>> svc_putnl(resv, 0);
>>> }
>>>
>>> +static int svc_tcp_has_wspace(struct svc_xprt *xprt)
>>> +{
>>> + struct svc_sock *svsk = container_of(xprt, struct svc_sock,
>>> sk_xprt);
>>> + struct svc_serv *serv = svsk->sk_server;
>>> + int required;
>>> +
>>> + /*
>>> + * Set the SOCK_NOSPACE flag before checking the available
>>> + * sock space.
>>> + */
>>> + set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>>> + required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
>>
>> Ibid.
>>
>>> + if (required*2 > sk_stream_wspace(svsk->sk_sk))
>>> + return 0;
>>
>> Oddly sk_stream_wspace() returns an int, but sock_space() returns an
>> unsigned long. Sigh...
>
> For this one, let's leave required signed and add an explicit cast to
> serv->sv_max_mesg. Sound ok?

If sk_reserved goes negative, it will be converted to unsigned, and
become a very large positive number. The result of the sum will be
recast back to an int when it's assigned to "required," and we
probably get a reasonable result. I doubt an explicit cast will
change things at all.

Instead, perhaps we should add an explicit check to ensure
sk_reserved is a reasonable positive value before doing any other
checks. (Likewise in the UDP case as well).

I wonder if this is really the correct write space check to use for
TCP, though. I remember fixing a similar issue in the RPC client a
long time ago -- both UDP and TCP used the same wspace check. It
resulted in the sk_write_space callback hammering on the RPC client,
and forward progress on TCP socket writes would slow to a crawl.

You probably want something like this instead:

set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);

required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
wspace = sk_stream_wspace(svsk->sk_sk);

if (wspace < sk_stream_min_wspace(svsk->sk_sk))
return 0;
if (required * 2 > wspace)
return 0;

clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
return 1;

The first test mimics sk_stream_write_space() and xs_tcp_write_space
(). I'm still unsure what to do about the possibility of one of
these signed integers going negative on us.

Bruce? Neil? What sayest thou?

>>> + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>>> + return 1;
>>> +}
>>> +
>>> static struct svc_xprt_ops svc_tcp_ops = {
>>> .xpo_recvfrom = svc_tcp_recvfrom,
>>> .xpo_sendto = svc_tcp_sendto,
>>> @@ -1373,6 +1412,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
>>> .xpo_detach = svc_sock_detach,
>>> .xpo_free = svc_sock_free,
>>> .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
>>> + .xpo_has_wspace = svc_tcp_has_wspace,
>>> };
>>>
>>> static struct svc_xprt_class svc_tcp_class = {

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

2007-12-10 20:39:05

by Tom Tucker

[permalink] [raw]
Subject: Re: [RFC,PATCH 09/38] svc: Add a transport function that checks for write space


On Fri, 2007-11-30 at 17:43 -0500, Chuck Lever wrote:
> On Nov 30, 2007, at 4:39 PM, Tom Tucker wrote:

[...snip...]

> >
> > For this one, let's leave required signed and add an explicit cast to
> > serv->sv_max_mesg. Sound ok?
>
> If sk_reserved goes negative, it will be converted to unsigned, and
> become a very large positive number. The result of the sum will be
> recast back to an int when it's assigned to "required," and we
> probably get a reasonable result. I doubt an explicit cast will
> change things at all.
>
> Instead, perhaps we should add an explicit check to ensure
> sk_reserved is a reasonable positive value before doing any other
> checks. (Likewise in the UDP case as well).
>
> I wonder if this is really the correct write space check to use for
> TCP, though. I remember fixing a similar issue in the RPC client a
> long time ago -- both UDP and TCP used the same wspace check. It
> resulted in the sk_write_space callback hammering on the RPC client,
> and forward progress on TCP socket writes would slow to a crawl.

For the server, the callback function is svc_write_space. Does the
sk_stream_wspace() < sk_stream_min_wspace() check belong in there as
well? I think this callback gets invoked whenever productive acks are
received for the connection. In other words, I don't see how it avoids
"hammering" the callback, rather it avoids fruitlessly waking up (i.e.
calling svc_xprt_enqueue) a transport.

I'm going to optimistically add it. Please let me know if there are
objections.

>
> You probably want something like this instead:
>
> set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>
> required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
> wspace = sk_stream_wspace(svsk->sk_sk);
>
> if (wspace < sk_stream_min_wspace(svsk->sk_sk))
> return 0;
> if (required * 2 > wspace)
> return 0;
>
> clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> return 1;
>

I think this is good stuff and will add it unless someone has an
objection.

> The first test mimics sk_stream_write_space() and xs_tcp_write_space
> (). I'm still unsure what to do about the possibility of one of
> these signed integers going negative on us.
>
> Bruce? Neil? What sayest thou?
>
> >>> + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> >>> + return 1;
> >>> +}
> >>> +
> >>> static struct svc_xprt_ops svc_tcp_ops = {
> >>> .xpo_recvfrom = svc_tcp_recvfrom,
> >>> .xpo_sendto = svc_tcp_sendto,
> >>> @@ -1373,6 +1412,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
> >>> .xpo_detach = svc_sock_detach,
> >>> .xpo_free = svc_sock_free,
> >>> .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> >>> + .xpo_has_wspace = svc_tcp_has_wspace,
> >>> };
> >>>
> >>> static struct svc_xprt_class svc_tcp_class = {
>
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com
> -
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html