After some discussion with David Howells at LSF/MM 2023, we arrived
at a plan to use a single sock_sendmsg() call for transmitting an
RPC message on socket-based transports. This is an initial part of
the transition to support handling folios with file content, but it
has scalability benefits as well.
Comments, suggestions, and test results are welcome.
---
Chuck Lever (4):
SUNRPC: Convert svc_tcp_sendmsg to use bio_vecs directly
SUNRPC: Convert svc_udp_sendto() to use the per-socket bio_vec array
SUNRPC: Use a per-transport receive bio_vec array
SUNRPC: Send RPC message on TCP with a single sock_sendmsg() call
include/linux/sunrpc/svc.h | 1 -
include/linux/sunrpc/svcsock.h | 7 ++
net/sunrpc/svcsock.c | 142 ++++++++++++++++++---------------
3 files changed, 86 insertions(+), 64 deletions(-)
--
Chuck Lever
From: Chuck Lever <[email protected]>
Add a helper to convert a whole xdr_buf directly into an array of
bio_vecs, then send this array instead of iterating piecemeal over
the xdr_buf containing the outbound RPC message.
Note that the rules of the RPC protocol mean there can be only one
outstanding send at a time on a transport socket. The kernel's
SunRPC server enforces this via the transport's xpt_mutex. Thus we
can use a per-transport shared array for the xdr_buf conversion
rather than allocate one every time or use one that is part of
struct svc_rqst.
Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/svcsock.h | 3 +
net/sunrpc/svcsock.c | 93 +++++++++++++++++++++++-----------------
2 files changed, 56 insertions(+), 40 deletions(-)
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index a7116048a4d4..a9bfeadf4cbe 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -40,6 +40,9 @@ struct svc_sock {
struct completion sk_handshake_done;
+ struct bio_vec sk_send_bvec[RPCSVC_MAXPAGES]
+ ____cacheline_aligned;
+
struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
};
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e43f26382411..d3c5f1a07979 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -188,6 +188,42 @@ static int svc_sock_result_payload(struct svc_rqst *rqstp, unsigned int offset,
return 0;
}
+static unsigned int svc_sock_xdr_to_bvecs(struct bio_vec *bvec,
+ struct xdr_buf *xdr)
+{
+ const struct kvec *head = xdr->head;
+ const struct kvec *tail = xdr->tail;
+ unsigned int count = 0;
+
+ if (head->iov_len) {
+ bvec_set_virt(bvec++, head->iov_base, head->iov_len);
+ count++;
+ }
+
+ if (xdr->page_len) {
+ unsigned int offset, len, remaining;
+ struct page **pages = xdr->pages;
+
+ offset = offset_in_page(xdr->page_base);
+ remaining = xdr->page_len;
+ while (remaining > 0) {
+ len = min_t(unsigned int, remaining,
+ PAGE_SIZE - offset);
+ bvec_set_page(bvec++, *pages++, len, offset);
+ remaining -= len;
+ offset = 0;
+ count++;
+ }
+ }
+
+ if (tail->iov_len) {
+ bvec_set_virt(bvec, tail->iov_base, tail->iov_len);
+ count++;
+ }
+
+ return count;
+}
+
/*
* Report socket names for nfsdfs
*/
@@ -1194,72 +1230,50 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
return 0; /* record not complete */
}
-static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
- int flags)
-{
- struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | flags, };
-
- iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 1, vec->iov_len);
- return sock_sendmsg(sock, &msg);
-}
-
/*
* MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
*
- * In addition, the logic assumes that * .bv_len is never larger
- * than PAGE_SIZE.
+ * Note that the send is non-blocking. The caller has incremented
+ * the reference count on each page backing the RPC message, and
+ * the network layer will "put" these pages when transmission is
+ * complete.
+ *
+ * This is safe for our RPC services because the memory backing
+ * the head and tail components is never kmalloc'd. These always
+ * come from pages in the svc_rqst::rq_pages array.
*/
-static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
+static int svc_tcp_sendmsg(struct svc_sock *svsk, struct xdr_buf *xdr,
rpc_fraghdr marker, unsigned int *sentp)
{
- const struct kvec *head = xdr->head;
- const struct kvec *tail = xdr->tail;
struct kvec rm = {
.iov_base = &marker,
.iov_len = sizeof(marker),
};
struct msghdr msg = {
- .msg_flags = 0,
+ .msg_flags = MSG_MORE,
};
+ unsigned int count;
int ret;
*sentp = 0;
- ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (ret < 0)
- return ret;
- ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
+ ret = kernel_sendmsg(svsk->sk_sock, &msg, &rm, 1, rm.iov_len);
if (ret < 0)
return ret;
*sentp += ret;
if (ret != rm.iov_len)
return -EAGAIN;
- ret = svc_tcp_send_kvec(sock, head, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != head->iov_len)
- goto out;
-
+ count = svc_sock_xdr_to_bvecs(svsk->sk_send_bvec, xdr);
msg.msg_flags = MSG_SPLICE_PAGES;
- iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
- xdr_buf_pagecount(xdr), xdr->page_len);
- ret = sock_sendmsg(sock, &msg);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_send_bvec,
+ count, xdr->len);
+ ret = sock_sendmsg(svsk->sk_sock, &msg);
if (ret < 0)
return ret;
*sentp += ret;
-
- if (tail->iov_len) {
- ret = svc_tcp_send_kvec(sock, tail, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- }
-
-out:
return 0;
}
@@ -1290,8 +1304,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
tcp_sock_set_cork(svsk->sk_sk, true);
- err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
- xdr_free_bvec(xdr);
+ err = svc_tcp_sendmsg(svsk, xdr, marker, &sent);
trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
From: Chuck Lever <[email protected]>
TCP receives are serialized, so we need only one bio_vec array per
socket. This shrinks the size of struct svc_rqst by 4144 bytes on
x86_64.
Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/svc.h | 1 -
include/linux/sunrpc/svcsock.h | 2 ++
net/sunrpc/svcsock.c | 2 +-
3 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f8751118c122..36052188222d 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -224,7 +224,6 @@ struct svc_rqst {
struct folio_batch rq_fbatch;
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
- struct bio_vec rq_bvec[RPCSVC_MAXPAGES];
__be32 rq_xid; /* transmission id */
u32 rq_prog; /* program number */
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index a9bfeadf4cbe..4efae760f3cb 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -40,6 +40,8 @@ struct svc_sock {
struct completion sk_handshake_done;
+ struct bio_vec sk_recv_bvec[RPCSVC_MAXPAGES]
+ ____cacheline_aligned;
struct bio_vec sk_send_bvec[RPCSVC_MAXPAGES]
____cacheline_aligned;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ae7143f68343..6f672cb0b0b3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -333,7 +333,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
- struct bio_vec *bvec = rqstp->rq_bvec;
+ struct bio_vec *bvec = svsk->sk_recv_bvec;
struct msghdr msg = { NULL };
unsigned int i;
ssize_t len;
From: Chuck Lever <[email protected]>
Commit da1661b93bf4 ("SUNRPC: Teach server to use xprt_sock_sendmsg
for socket sends") modified svc_udp_sendto() to use xprt_sock_sendmsg()
because we originally believed xprt_sock_sendmsg() would be needed
for TLS support. That does not actually appear to be the case.
In addition, the linkage between the client and server send code has
been a bit of a maintenance headache because of the distinct ways
that the client and server handle memory allocation.
Going forward, eventually the XDR layer will deal with its buffers
in the form of bio_vec arrays, so convert this function accordingly.
Once the use of bio_vecs is ubiquitous, the xdr_buf-to-bio_vec array
code can be hoisted into a path that is common for all transports.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/svcsock.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d3c5f1a07979..ae7143f68343 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -729,7 +729,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
.msg_control = cmh,
.msg_controllen = sizeof(buffer),
};
- unsigned int sent;
+ unsigned int count;
int err;
svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
@@ -742,22 +742,22 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- err = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (err < 0)
- goto out_unlock;
+ count = svc_sock_xdr_to_bvecs(svsk->sk_send_bvec, xdr);
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_send_bvec,
+ count, 0);
+ err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_send_bvec,
+ count, 0);
+ err = sock_sendmsg(svsk->sk_sock, &msg);
}
- xdr_free_bvec(xdr);
+
trace_svcsock_udp_send(xprt, err);
-out_unlock:
+
mutex_unlock(&xprt->xpt_mutex);
- if (err < 0)
- return err;
- return sent;
+ return err;
out_notconn:
mutex_unlock(&xprt->xpt_mutex);
From: Chuck Lever <[email protected]>
There is now enough infrastructure in place to combine the stream
record marker into the biovec array used to send each outgoing RPC
message. The whole message can be more efficiently sent with a
single call to sock_sendmsg() using a bio_vec iterator.
Note that this also helps with RPC-with-TLS: the TLS implementation
can now clearly see where the upper layer message boundaries are.
Before, it would send each component of the xdr_buf in a separate
TLS record.
Suggested-by: David Howells <[email protected]>
Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/svcsock.h | 2 ++
net/sunrpc/svcsock.c | 33 ++++++++++++++++++---------------
2 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 4efae760f3cb..55446136499f 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -38,6 +38,8 @@ struct svc_sock {
/* Number of queued send requests */
atomic_t sk_sendqlen;
+ struct page_frag_cache sk_frag_cache;
+
struct completion sk_handshake_done;
struct bio_vec sk_recv_bvec[RPCSVC_MAXPAGES]
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 6f672cb0b0b3..19cab73229e4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1247,29 +1247,28 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
static int svc_tcp_sendmsg(struct svc_sock *svsk, struct xdr_buf *xdr,
rpc_fraghdr marker, unsigned int *sentp)
{
- struct kvec rm = {
- .iov_base = &marker,
- .iov_len = sizeof(marker),
- };
struct msghdr msg = {
- .msg_flags = MSG_MORE,
+ .msg_flags = MSG_SPLICE_PAGES,
};
unsigned int count;
+ void *tmp;
int ret;
*sentp = 0;
- ret = kernel_sendmsg(svsk->sk_sock, &msg, &rm, 1, rm.iov_len);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != rm.iov_len)
- return -EAGAIN;
-
- count = svc_sock_xdr_to_bvecs(svsk->sk_send_bvec, xdr);
- msg.msg_flags = MSG_SPLICE_PAGES;
+ /* The stream record marker is copied into a temporary page
+ * buffer so that it can be included in sk_send_bvec.
+ */
+ tmp = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
+ GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+ memcpy(tmp, &marker, sizeof(marker));
+ bvec_set_virt(svsk->sk_send_bvec, tmp, sizeof(marker));
+
+ count = svc_sock_xdr_to_bvecs(svsk->sk_send_bvec + 1, xdr);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_send_bvec,
- count, xdr->len);
+ 1 + count, sizeof(marker) + xdr->len);
ret = sock_sendmsg(svsk->sk_sock, &msg);
if (ret < 0)
return ret;
@@ -1648,6 +1647,7 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct page_frag_cache *pfc = &svsk->sk_frag_cache;
struct socket *sock = svsk->sk_sock;
trace_svcsock_free(svsk, sock);
@@ -1657,5 +1657,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
sockfd_put(sock);
else
sock_release(sock);
+ if (pfc->va)
+ __page_frag_cache_drain(virt_to_head_page(pfc->va),
+ pfc->pagecnt_bias);
kfree(svsk);
}
On Sun, 2023-07-09 at 16:04 -0400, Chuck Lever wrote:
> From: Chuck Lever <[email protected]>
>
> Add a helper to convert a whole xdr_buf directly into an array of
> bio_vecs, then send this array instead of iterating piecemeal over
> the xdr_buf containing the outbound RPC message.
>
> Note that the rules of the RPC protocol mean there can be only one
> outstanding send at a time on a transport socket. The kernel's
> SunRPC server enforces this via the transport's xpt_mutex. Thus we
> can use a per-transport shared array for the xdr_buf conversion
> rather than allocate one every time or use one that is part of
> struct svc_rqst.
>
> Signed-off-by: Chuck Lever <[email protected]>
> ---
> include/linux/sunrpc/svcsock.h | 3 +
> net/sunrpc/svcsock.c | 93 +++++++++++++++++++++++-----------------
> 2 files changed, 56 insertions(+), 40 deletions(-)
>
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> index a7116048a4d4..a9bfeadf4cbe 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -40,6 +40,9 @@ struct svc_sock {
>
> struct completion sk_handshake_done;
>
> + struct bio_vec sk_send_bvec[RPCSVC_MAXPAGES]
> + ____cacheline_aligned;
> +
> struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
> };
>
Hmm ok, so this adds ~4k per socket, but we get rid of allocation in the
send path. I like it!
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index e43f26382411..d3c5f1a07979 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -188,6 +188,42 @@ static int svc_sock_result_payload(struct svc_rqst *rqstp, unsigned int offset,
> return 0;
> }
>
> +static unsigned int svc_sock_xdr_to_bvecs(struct bio_vec *bvec,
> + struct xdr_buf *xdr)
> +{
> + const struct kvec *head = xdr->head;
> + const struct kvec *tail = xdr->tail;
> + unsigned int count = 0;
> +
> + if (head->iov_len) {
> + bvec_set_virt(bvec++, head->iov_base, head->iov_len);
> + count++;
> + }
> +
> + if (xdr->page_len) {
> + unsigned int offset, len, remaining;
> + struct page **pages = xdr->pages;
> +
> + offset = offset_in_page(xdr->page_base);
> + remaining = xdr->page_len;
> + while (remaining > 0) {
> + len = min_t(unsigned int, remaining,
> + PAGE_SIZE - offset);
> + bvec_set_page(bvec++, *pages++, len, offset);
> + remaining -= len;
> + offset = 0;
> + count++;
> + }
> + }
> +
> + if (tail->iov_len) {
> + bvec_set_virt(bvec, tail->iov_base, tail->iov_len);
> + count++;
> + }
> +
> + return count;
> +}
> +
The lack of bounds checking in the above function bothers me a bit. I
think we need to ensure that "bvec" doesn't walk off the end of the
array.
> /*
> * Report socket names for nfsdfs
> */
> @@ -1194,72 +1230,50 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
> return 0; /* record not complete */
> }
>
> -static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
> - int flags)
> -{
> - struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | flags, };
> -
> - iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 1, vec->iov_len);
> - return sock_sendmsg(sock, &msg);
> -}
> -
> /*
> * MSG_SPLICE_PAGES is used exclusively to reduce the number of
> * copy operations in this path. Therefore the caller must ensure
> * that the pages backing @xdr are unchanging.
> *
> - * In addition, the logic assumes that * .bv_len is never larger
> - * than PAGE_SIZE.
> + * Note that the send is non-blocking. The caller has incremented
> + * the reference count on each page backing the RPC message, and
> + * the network layer will "put" these pages when transmission is
> + * complete.
> + *
> + * This is safe for our RPC services because the memory backing
> + * the head and tail components is never kmalloc'd. These always
> + * come from pages in the svc_rqst::rq_pages array.
> */
> -static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
> +static int svc_tcp_sendmsg(struct svc_sock *svsk, struct xdr_buf *xdr,
> rpc_fraghdr marker, unsigned int *sentp)
> {
> - const struct kvec *head = xdr->head;
> - const struct kvec *tail = xdr->tail;
> struct kvec rm = {
> .iov_base = &marker,
> .iov_len = sizeof(marker),
> };
> struct msghdr msg = {
> - .msg_flags = 0,
> + .msg_flags = MSG_MORE,
> };
> + unsigned int count;
> int ret;
>
> *sentp = 0;
> - ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
> - if (ret < 0)
> - return ret;
>
> - ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
> + ret = kernel_sendmsg(svsk->sk_sock, &msg, &rm, 1, rm.iov_len);
> if (ret < 0)
> return ret;
> *sentp += ret;
> if (ret != rm.iov_len)
> return -EAGAIN;
>
> - ret = svc_tcp_send_kvec(sock, head, 0);
> - if (ret < 0)
> - return ret;
> - *sentp += ret;
> - if (ret != head->iov_len)
> - goto out;
> -
> + count = svc_sock_xdr_to_bvecs(svsk->sk_send_bvec, xdr);
> msg.msg_flags = MSG_SPLICE_PAGES;
> - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
> - xdr_buf_pagecount(xdr), xdr->page_len);
> - ret = sock_sendmsg(sock, &msg);
> + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_send_bvec,
> + count, xdr->len);
> + ret = sock_sendmsg(svsk->sk_sock, &msg);
> if (ret < 0)
> return ret;
> *sentp += ret;
> -
> - if (tail->iov_len) {
> - ret = svc_tcp_send_kvec(sock, tail, 0);
> - if (ret < 0)
> - return ret;
> - *sentp += ret;
> - }
> -
> -out:
> return 0;
> }
>
> @@ -1290,8 +1304,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
> if (svc_xprt_is_dead(xprt))
> goto out_notconn;
> tcp_sock_set_cork(svsk->sk_sk, true);
> - err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
> - xdr_free_bvec(xdr);
> + err = svc_tcp_sendmsg(svsk, xdr, marker, &sent);
> trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
> if (err < 0 || sent != (xdr->len + sizeof(marker)))
> goto out_close;
>
>
--
Jeff Layton <[email protected]>
On Sun, 2023-07-09 at 16:04 -0400, Chuck Lever wrote:
> After some discussion with David Howells at LSF/MM 2023, we arrived
> at a plan to use a single sock_sendmsg() call for transmitting an
> RPC message on socket-based transports. This is an initial part of
> the transition to support handling folios with file content, but it
> has scalability benefits as well.
>
> Comments, suggestions, and test results are welcome.
>
> ---
>
> Chuck Lever (4):
> SUNRPC: Convert svc_tcp_sendmsg to use bio_vecs directly
> SUNRPC: Convert svc_udp_sendto() to use the per-socket bio_vec array
> SUNRPC: Use a per-transport receive bio_vec array
> SUNRPC: Send RPC message on TCP with a single sock_sendmsg() call
>
>
> include/linux/sunrpc/svc.h | 1 -
> include/linux/sunrpc/svcsock.h | 7 ++
> net/sunrpc/svcsock.c | 142 ++++++++++++++++++---------------
> 3 files changed, 86 insertions(+), 64 deletions(-)
>
> --
> Chuck Lever
>
Aside from my concerns with bounds checking on the first patch, this
looks like a good set of changes overall. Does it show any performance
improvements in your testing?
--
Jeff Layton <[email protected]>
> On Jul 14, 2023, at 9:26 AM, Jeff Layton <[email protected]> wrote:
>
> On Sun, 2023-07-09 at 16:04 -0400, Chuck Lever wrote:
>> After some discussion with David Howells at LSF/MM 2023, we arrived
>> at a plan to use a single sock_sendmsg() call for transmitting an
>> RPC message on socket-based transports. This is an initial part of
>> the transition to support handling folios with file content, but it
>> has scalability benefits as well.
>>
>> Comments, suggestions, and test results are welcome.
>>
>> ---
>>
>> Chuck Lever (4):
>> SUNRPC: Convert svc_tcp_sendmsg to use bio_vecs directly
>> SUNRPC: Convert svc_udp_sendto() to use the per-socket bio_vec array
>> SUNRPC: Use a per-transport receive bio_vec array
>> SUNRPC: Send RPC message on TCP with a single sock_sendmsg() call
>>
>>
>> include/linux/sunrpc/svc.h | 1 -
>> include/linux/sunrpc/svcsock.h | 7 ++
>> net/sunrpc/svcsock.c | 142 ++++++++++++++++++---------------
>> 3 files changed, 86 insertions(+), 64 deletions(-)
>>
>> --
>> Chuck Lever
>>
>
> Aside from my concerns with bounds checking on the first patch, this
> looks like a good set of changes overall. Does it show any performance
> improvements in your testing?
At the moment I'm mostly interested in not causing behavior regressions.
I plan to look at instruction path length and such once we've agreed
on the form of this change. With TCP, there are enough bottlenecks
that this kind of modification won't translate into much of a
performance delta observed at the client, but it might improve
scalability on the server.
When it's available in a git repo, I can ask Daire to try it out too.
--
Chuck Lever