2007-11-29 23:20:22

by Tom Tucker

[permalink] [raw]
Subject: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files


Add a file that when read lists the set of registered svc
transports.

Signed-off-by: Tom Tucker <[email protected]>
---

include/linux/sunrpc/svc_xprt.h | 2 ++
net/sunrpc/svc_xprt.c | 28 ++++++++++++++++++++++++++++
net/sunrpc/sysctl.c | 31 +++++++++++++++++++++++++++++++
3 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 36f8b09..c2fa41d 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -79,11 +79,13 @@ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
void svc_xprt_enqueue(struct svc_xprt *xprt);
int svc_port_is_privileged(struct sockaddr *sin);
void svc_delete_xprt(struct svc_xprt *xprt);
+int svc_print_xprts(char *buf, int maxlen);

static inline void svc_xprt_get(struct svc_xprt *xprt)
{
kref_get(&xprt->xpt_ref);
}
+
static inline void svc_xprt_set_local(struct svc_xprt *xprt,
struct sockaddr *sa, int salen)
{
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b31ba0e..7416e66 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -93,6 +93,34 @@ int svc_unreg_xprt_class(struct svc_xprt_class *xcl)
}
EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);

+/*
+ * Format the transport list for printing
+ */
+int svc_print_xprts(char *buf, int maxlen)
+{
+ struct list_head *le;
+ char tmpstr[80];
+ int len = 0;
+ buf[0] = '\0';
+
+ spin_lock(&svc_xprt_class_lock);
+ list_for_each(le, &svc_xprt_class_list) {
+ int slen;
+ struct svc_xprt_class *xcl =
+ list_entry(le, struct svc_xprt_class, xcl_list);
+
+ sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
+ slen = strlen(tmpstr);
+ if (len + slen > maxlen)
+ break;
+ len += slen;
+ strcat(buf, tmpstr);
+ }
+ spin_unlock(&svc_xprt_class_lock);
+
+ return len;
+}
+
static void svc_xprt_free(struct kref *kref)
{
struct svc_xprt *xprt =
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 2be714e..fd7cf59 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -18,6 +18,7 @@
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/svc_xprt.h>

/*
* Declare the debug flags here
@@ -48,6 +49,30 @@ rpc_unregister_sysctl(void)
}
}

+static int proc_do_xprt(ctl_table *table, int write, struct file *file,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ char tmpbuf[256];
+ int len;
+ if ((*ppos && !write) || !*lenp) {
+ *lenp = 0;
+ return 0;
+ }
+ if (write)
+ return -EINVAL;
+ else {
+ len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
+ if (!access_ok(VERIFY_WRITE, buffer, len))
+ return -EFAULT;
+
+ if (__copy_to_user(buffer, tmpbuf, len))
+ return -EFAULT;
+ }
+ *lenp -= len;
+ *ppos += len;
+ return 0;
+}
+
static int
proc_dodebug(ctl_table *table, int write, struct file *file,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -140,6 +165,12 @@ static ctl_table debug_table[] = {
.mode = 0644,
.proc_handler = &proc_dodebug
},
+ {
+ .procname = "transports",
+ .maxlen = 256,
+ .mode = 0444,
+ .proc_handler = &proc_do_xprt,
+ },
{ .ctl_name = 0 }
};



2007-12-03 16:44:31

by Chuck Lever

[permalink] [raw]
Subject: Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files

On Nov 29, 2007, at 5:41 PM, Tom Tucker wrote:
> Add a file that when read lists the set of registered svc
> transports.
>
> Signed-off-by: Tom Tucker <[email protected]>
> ---
>
> include/linux/sunrpc/svc_xprt.h | 2 ++
> net/sunrpc/svc_xprt.c | 28 ++++++++++++++++++++++++++++
> net/sunrpc/sysctl.c | 31 ++++++++++++++++++++++++++++
> +++
> 3 files changed, 61 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/
> svc_xprt.h
> index 36f8b09..c2fa41d 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -79,11 +79,13 @@ void svc_xprt_copy_addrs(struct svc_rqst
> *rqstp, struct svc_xprt *xprt);
> void svc_xprt_enqueue(struct svc_xprt *xprt);
> int svc_port_is_privileged(struct sockaddr *sin);
> void svc_delete_xprt(struct svc_xprt *xprt);
> +int svc_print_xprts(char *buf, int maxlen);
>
> static inline void svc_xprt_get(struct svc_xprt *xprt)
> {
> kref_get(&xprt->xpt_ref);
> }
> +
> static inline void svc_xprt_set_local(struct svc_xprt *xprt,
> struct sockaddr *sa, int salen)
> {
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index b31ba0e..7416e66 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -93,6 +93,34 @@ int svc_unreg_xprt_class(struct svc_xprt_class
> *xcl)
> }
> EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
>
> +/*
> + * Format the transport list for printing
> + */
> +int svc_print_xprts(char *buf, int maxlen)

One last one, then I will crawl back into my hole.

Arguments and variables that handle string lengths should be of type
size_t (or at the very least, should be unsigned).

Otherwise, I don't see any architectural problems with the patch series.

> +{
> + struct list_head *le;
> + char tmpstr[80];
> + int len = 0;
> + buf[0] = '\0';
> +
> + spin_lock(&svc_xprt_class_lock);
> + list_for_each(le, &svc_xprt_class_list) {
> + int slen;
> + struct svc_xprt_class *xcl =
> + list_entry(le, struct svc_xprt_class, xcl_list);
> +
> + sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
> + slen = strlen(tmpstr);
> + if (len + slen > maxlen)
> + break;
> + len += slen;
> + strcat(buf, tmpstr);
> + }
> + spin_unlock(&svc_xprt_class_lock);
> +
> + return len;
> +}
> +
> static void svc_xprt_free(struct kref *kref)
> {
> struct svc_xprt *xprt =
> diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
> index 2be714e..fd7cf59 100644
> --- a/net/sunrpc/sysctl.c
> +++ b/net/sunrpc/sysctl.c
> @@ -18,6 +18,7 @@
> #include <linux/sunrpc/types.h>
> #include <linux/sunrpc/sched.h>
> #include <linux/sunrpc/stats.h>
> +#include <linux/sunrpc/svc_xprt.h>
>
> /*
> * Declare the debug flags here
> @@ -48,6 +49,30 @@ rpc_unregister_sysctl(void)
> }
> }
>
> +static int proc_do_xprt(ctl_table *table, int write, struct file
> *file,
> + void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> + char tmpbuf[256];
> + int len;
> + if ((*ppos && !write) || !*lenp) {
> + *lenp = 0;
> + return 0;
> + }
> + if (write)
> + return -EINVAL;
> + else {
> + len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
> + if (!access_ok(VERIFY_WRITE, buffer, len))
> + return -EFAULT;
> +
> + if (__copy_to_user(buffer, tmpbuf, len))
> + return -EFAULT;
> + }
> + *lenp -= len;
> + *ppos += len;
> + return 0;
> +}
> +
> static int
> proc_dodebug(ctl_table *table, int write, struct file *file,
> void __user *buffer, size_t *lenp, loff_t *ppos)
> @@ -140,6 +165,12 @@ static ctl_table debug_table[] = {
> .mode = 0644,
> .proc_handler = &proc_dodebug
> },
> + {
> + .procname = "transports",
> + .maxlen = 256,
> + .mode = 0444,
> + .proc_handler = &proc_do_xprt,
> + },
> { .ctl_name = 0 }
> };


--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

2007-12-03 16:58:56

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files

On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
> One last one, then I will crawl back into my hole.

By the way, based on who I recall making a lot of comments, I'm planning
on just adding:

Acked-by: Neil Brown <[email protected]>
Reviewed-by: Chuck Lever <[email protected]>
Reviewed-by: Greg Banks <[email protected]>

to all of the server transport-switch and rdma patches. Does that sound
right?

And I'm assuming there are no objections to submitting this come the
next merge window.

--b.

2007-12-03 18:32:35

by Chuck Lever

[permalink] [raw]
Subject: Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files

On Dec 3, 2007, at 11:58 AM, J. Bruce Fields wrote:
> On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
>> One last one, then I will crawl back into my hole.
>
> By the way, based on who I recall making a lot of comments, I'm
> planning
> on just adding:
>
> Acked-by: Neil Brown <[email protected]>
> Reviewed-by: Chuck Lever <[email protected]>
> Reviewed-by: Greg Banks <[email protected]>
>
> to all of the server transport-switch and rdma patches. Does that
> sound
> right?

OK for the transport switch. I'm not qualified to review the bulk of
the RDMA work, so I would prefer that you don't add Reviewed-by "me"
for those patches. My eyes glazed over a few lines after the GPL
boilerplate.

> And I'm assuming there are no objections to submitting this come the
> next merge window.


Refactoring the server-side write space logic has exposed some
problems that really shouldn't be allowed to stand. I think we
should resolve the issues in the TCP write space callback before
merging. Realistically those problems are non-architectural and
should be fixable before the window opens.

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

2007-12-03 19:29:12

by Tom Tucker

[permalink] [raw]
Subject: Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files


On Mon, 2007-12-03 at 13:30 -0500, Chuck Lever wrote:
> On Dec 3, 2007, at 11:58 AM, J. Bruce Fields wrote:
> > On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
> >> One last one, then I will crawl back into my hole.
> >
> > By the way, based on who I recall making a lot of comments, I'm
> > planning
> > on just adding:
> >
> > Acked-by: Neil Brown <[email protected]>
> > Reviewed-by: Chuck Lever <[email protected]>
> > Reviewed-by: Greg Banks <[email protected]>
> >
> > to all of the server transport-switch and rdma patches. Does that
> > sound
> > right?
>
> OK for the transport switch. I'm not qualified to review the bulk of
> the RDMA work, so I would prefer that you don't add Reviewed-by "me"
> for those patches. My eyes glazed over a few lines after the GPL
> boilerplate.
>
> > And I'm assuming there are no objections to submitting this come the
> > next merge window.
>
>
> Refactoring the server-side write space logic has exposed some
> problems that really shouldn't be allowed to stand. I think we
> should resolve the issues in the TCP write space callback before
> merging. Realistically those problems are non-architectural and
> should be fixable before the window opens.
>

Could you be more specific on what these problems are?

> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com


2007-12-04 00:22:52

by Chuck Lever

[permalink] [raw]
Subject: Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files

On Dec 3, 2007, at 2:10 PM, Tom Tucker wrote:
> On Mon, 2007-12-03 at 13:30 -0500, Chuck Lever wrote:
>> On Dec 3, 2007, at 11:58 AM, J. Bruce Fields wrote:
>>> On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
>>>> One last one, then I will crawl back into my hole.
>>>
>>> By the way, based on who I recall making a lot of comments, I'm
>>> planning
>>> on just adding:
>>>
>>> Acked-by: Neil Brown <[email protected]>
>>> Reviewed-by: Chuck Lever <[email protected]>
>>> Reviewed-by: Greg Banks <[email protected]>
>>>
>>> to all of the server transport-switch and rdma patches. Does that
>>> sound
>>> right?
>>
>> OK for the transport switch. I'm not qualified to review the bulk of
>> the RDMA work, so I would prefer that you don't add Reviewed-by "me"
>> for those patches. My eyes glazed over a few lines after the GPL
>> boilerplate.
>>
>>> And I'm assuming there are no objections to submitting this come the
>>> next merge window.
>>
>> Refactoring the server-side write space logic has exposed some
>> problems that really shouldn't be allowed to stand. I think we
>> should resolve the issues in the TCP write space callback before
>> merging. Realistically those problems are non-architectural and
>> should be fixable before the window opens.
>
> Could you be more specific on what these problems are?

The problems we discussed on Friday about svc_tcp_has_wspace(),
introduced in 9/38.

The mess is hidden by implicit type casts in the write space check,
replaced by 9/38, in svc_sock_enqueue(). The return type of
svc_sock_wspace() is unsigned long, which means the other side of the
comparison in svc_sock_enqueue() (the sum that becomes "required" in
your patch) is implicitly promoted to unsigned long before the
comparison is done.

Your patch misses the implicit cast at least by making "required" an
int in the new callback functions. "required" should be an unsigned
long in both the UDP and TCP callback to preserve the semantics of
the existing logic.

It's also the case that sk_stream_wspace() can return a negative
value if sk_wmem_queued somehow becomes larger than sk_sndbuf.
However, in the existing svc_sock_enqueue() logic, a negative result
from sk_stream_wspace() is converted to an unsigned long, making it a
large positive number. The server then thinks it may continue
writing to a socket whose buffer is already full.

I'm also worried about whether sk_reserved, which is an int and is
incremented and decremented without a check to see if it has gone
negative, can go negative during normal operation -- and if it does,
what does that do to the value of "required" and the result of the
write space check, in either the UDP or TCP case?

In addition, the server's TCP write space check is missing a
comparison that every other TCP write space callback in the kernel
has (comparing sk_stream_wspace and sk_stream_min_wspace). If we
don't include it here, then we need some testing to validate that it
isn't needed, and a comment to explain why this write space callback
is different from the others.

I was hoping for some comment from Neil or Bruce.

From Friday's post:
> If sk_reserved goes negative, it will be converted to unsigned, and
> become a very large positive number. The result of the sum will be
> recast back to an int when it's assigned to "required," and we
> probably get a reasonable result. I doubt an explicit cast will
> change things at all.
>
> Instead, perhaps we should add an explicit check to ensure
> sk_reserved is a reasonable positive value before doing any other
> checks. (Likewise in the UDP case as well).
>
> I wonder if this is really the correct write space check to use for
> TCP, though. I remember fixing a similar issue in the RPC client a
> long time ago -- both UDP and TCP used the same wspace check. It
> resulted in the sk_write_space callback hammering on the RPC
> client, and forward progress on TCP socket writes would slow to a
> crawl.
>
> You probably want something like this instead:
>
> set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>
> required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
> wspace = sk_stream_wspace(svsk->sk_sk);
>
> if (wspace < sk_stream_min_wspace(svsk->sk_sk))
> return 0;
> if (required * 2 > wspace)
> return 0;
>
> clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> return 1;
>
> The first test mimics sk_stream_write_space() and xs_tcp_write_space
> (). I'm still unsure what to do about the possibility of one of
> these signed integers going negative on us.


--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

2007-12-04 01:16:05

by Tom Tucker

[permalink] [raw]
Subject: Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files




On 12/3/07 2:36 PM, "Chuck Lever" <[email protected]> wrote:

> On Dec 3, 2007, at 2:10 PM, Tom Tucker wrote:
>> On Mon, 2007-12-03 at 13:30 -0500, Chuck Lever wrote:
>>> On Dec 3, 2007, at 11:58 AM, J. Bruce Fields wrote:
>>>> On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
>>
>> Could you be more specific on what these problems are?

[...snip...]

>
> The problems we discussed on Friday about svc_tcp_has_wspace(),
> introduced in 9/38.
>
> The mess is hidden by implicit type casts in the write space check,
> replaced by 9/38, in svc_sock_enqueue(). The return type of
> svc_sock_wspace() is unsigned long, which means the other side of the
> comparison in svc_sock_enqueue() (the sum that becomes "required" in
> your patch) is implicitly promoted to unsigned long before the
> comparison is done.
>
> Your patch misses the implicit cast at least by making "required" an
> int in the new callback functions. "required" should be an unsigned
> long in both the UDP and TCP callback to preserve the semantics of
> the existing logic.
>
> It's also the case that sk_stream_wspace() can return a negative
> value if sk_wmem_queued somehow becomes larger than sk_sndbuf.
> However, in the existing svc_sock_enqueue() logic, a negative result
> from sk_stream_wspace() is converted to an unsigned long, making it a
> large positive number. The server then thinks it may continue
> writing to a socket whose buffer is already full.
>
> I'm also worried about whether sk_reserved, which is an int and is
> incremented and decremented without a check to see if it has gone
> negative, can go negative during normal operation -- and if it does,
> what does that do to the value of "required" and the result of the
> write space check, in either the UDP or TCP case?
>
> In addition, the server's TCP write space check is missing a
> comparison that every other TCP write space callback in the kernel
> has (comparing sk_stream_wspace and sk_stream_min_wspace). If we
> don't include it here, then we need some testing to validate that it
> isn't needed, and a comment to explain why this write space callback
> is different from the others.
>
> I was hoping for some comment from Neil or Bruce.

Ok, cool. I just didn't get the map from "refactoring" to this. These
changes are straightforward. I have already modified the patch to resolve
the type promotion issue and added a check for negative. I planned on
reposting the whole series with the updates after we get the build issues
hammered out.

Thanks,
Tom

>
> From Friday's post:
>> If sk_reserved goes negative, it will be converted to unsigned, and
>> become a very large positive number. The result of the sum will be
>> recast back to an int when it's assigned to "required," and we
>> probably get a reasonable result. I doubt an explicit cast will
>> change things at all.
>>
>> Instead, perhaps we should add an explicit check to ensure
>> sk_reserved is a reasonable positive value before doing any other
>> checks. (Likewise in the UDP case as well).
>>
>> I wonder if this is really the correct write space check to use for
>> TCP, though. I remember fixing a similar issue in the RPC client a
>> long time ago -- both UDP and TCP used the same wspace check. It
>> resulted in the sk_write_space callback hammering on the RPC
>> client, and forward progress on TCP socket writes would slow to a
>> crawl.
>>
>> You probably want something like this instead:
>>
>> set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>>
>> required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
>> wspace = sk_stream_wspace(svsk->sk_sk);
>>
>> if (wspace < sk_stream_min_wspace(svsk->sk_sk))
>> return 0;
>> if (required * 2 > wspace)
>> return 0;
>>
>> clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>> return 1;
>>
>> The first test mimics sk_stream_write_space() and xs_tcp_write_space
>> (). I'm still unsure what to do about the possibility of one of
>> these signed integers going negative on us.
>
>
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com



2007-12-05 08:37:31

by Greg Banks

[permalink] [raw]
Subject: Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files

On Mon, Dec 03, 2007 at 11:58:53AM -0500, J. Bruce Fields wrote:
> On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
> > One last one, then I will crawl back into my hole.
>
> By the way, based on who I recall making a lot of comments, I'm planning
> on just adding:
>
> Acked-by: Neil Brown <[email protected]>
> Reviewed-by: Chuck Lever <[email protected]>
> Reviewed-by: Greg Banks <[email protected]>
>
> to all of the server transport-switch and rdma patches. Does that sound
> right?

For the server transport switch patches: yes please.

For the server RDMA patches: I've not had a chance to read the latest
versions (a long-running drama has been sucking up all my time for some
weeks now), but after all the discussion I'm not expecting any nasty
surprises. I'll get back to you in a few days.

> And I'm assuming there are no objections to submitting this come the
> next merge window.

Agreed.

Greg.
--
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
Apparently, I'm Bedevere. Which MPHG character are you?
I don't speak for SGI.