Make TLS's sendmsg() support MSG_SPLICE_PAGES. This causes pages to be
spliced from the source iterator if possible.
This allows ->sendpage() to be replaced by something that can handle
multiple multipage folios in a single transaction.
Signed-off-by: David Howells <[email protected]>
cc: Chuck Lever <[email protected]>
cc: Boris Pismenny <[email protected]>
cc: John Fastabend <[email protected]>
cc: Jakub Kicinski <[email protected]>
cc: Eric Dumazet <[email protected]>
cc: "David S. Miller" <[email protected]>
cc: Paolo Abeni <[email protected]>
cc: Jens Axboe <[email protected]>
cc: Matthew Wilcox <[email protected]>
cc: [email protected]
---
Notes:
ver #2)
- "rls_" should be "tls_".
net/tls/tls_sw.c | 46 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 45 insertions(+), 1 deletion(-)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index a2fb0256ff1c..fcbaf594c300 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -931,6 +931,38 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags)
&copied, flags);
}
+static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg,
+ struct sk_msg *msg_pl, size_t try_to_copy,
+ ssize_t *copied)
+{
+ struct page *page = NULL, **pages = &page;
+
+ do {
+ ssize_t part;
+ size_t off;
+ bool put = false;
+
+ part = iov_iter_extract_pages(&msg->msg_iter, &pages,
+ try_to_copy, 1, 0, &off);
+ if (part <= 0)
+ return part ?: -EIO;
+
+ if (WARN_ON_ONCE(!sendpage_ok(page))) {
+ iov_iter_revert(&msg->msg_iter, part);
+ return -EIO;
+ }
+
+ sk_msg_page_add(msg_pl, page, part, off);
+ sk_mem_charge(sk, part);
+ if (put)
+ put_page(page);
+ *copied += part;
+ try_to_copy -= part;
+ } while (try_to_copy && !sk_msg_full(msg_pl));
+
+ return 0;
+}
+
int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -1020,6 +1052,17 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
full_record = true;
}
+ if (try_to_copy && (msg->msg_flags & MSG_SPLICE_PAGES)) {
+ ret = tls_sw_sendmsg_splice(sk, msg, msg_pl,
+ try_to_copy, &copied);
+ if (ret < 0)
+ goto send_end;
+ tls_ctx->pending_open_record_frags = true;
+ if (full_record || eor || sk_msg_full(msg_pl))
+ goto copied;
+ continue;
+ }
+
if (!is_kvec && (full_record || eor) && !async_capable) {
u32 first = msg_pl->sg.end;
@@ -1082,8 +1125,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
/* Open records defined only if successfully copied, otherwise
* we would trim the sg but not reset the open record frags.
*/
- tls_ctx->pending_open_record_frags = true;
copied += try_to_copy;
+copied:
+ tls_ctx->pending_open_record_frags = true;
if (full_record || eor) {
ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
record_type, &copied,
On Wed, 7 Jun 2023 15:05:56 +0100 David Howells wrote:
> +static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg,
> + struct sk_msg *msg_pl, size_t try_to_copy,
> + ssize_t *copied)
> +{
> + struct page *page = NULL, **pages = &page;
> +
> + do {
> + ssize_t part;
> + size_t off;
> + bool put = false;
> +
> + part = iov_iter_extract_pages(&msg->msg_iter, &pages,
> + try_to_copy, 1, 0, &off);
> + if (part <= 0)
> + return part ?: -EIO;
> +
> + if (WARN_ON_ONCE(!sendpage_ok(page))) {
> + iov_iter_revert(&msg->msg_iter, part);
> + return -EIO;
> + }
> +
> + sk_msg_page_add(msg_pl, page, part, off);
> + sk_mem_charge(sk, part);
> + if (put)
> + put_page(page);
is put ever set to true?
> + *copied += part;
> + try_to_copy -= part;
> + } while (try_to_copy && !sk_msg_full(msg_pl));
> +
> + return 0;
> +}
> +
> int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
> {
> long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
> @@ -1020,6 +1052,17 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
> full_record = true;
> }
>
> + if (try_to_copy && (msg->msg_flags & MSG_SPLICE_PAGES)) {
> + ret = tls_sw_sendmsg_splice(sk, msg, msg_pl,
> + try_to_copy, &copied);
> + if (ret < 0)
> + goto send_end;
> + tls_ctx->pending_open_record_frags = true;
> + if (full_record || eor || sk_msg_full(msg_pl))
> + goto copied;
> + continue;
> + }
> +
> if (!is_kvec && (full_record || eor) && !async_capable) {
> u32 first = msg_pl->sg.end;
>
> @@ -1082,8 +1125,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
> /* Open records defined only if successfully copied, otherwise
> * we would trim the sg but not reset the open record frags.
> */
> - tls_ctx->pending_open_record_frags = true;
> copied += try_to_copy;
> +copied:
> + tls_ctx->pending_open_record_frags = true;
Why move pending-open-record-frags setting if it's also set before
jumping?
> if (full_record || eor) {
> ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
> record_type, &copied,
David Howells <[email protected]> wrote:
> > > - tls_ctx->pending_open_record_frags = true;
> > > copied += try_to_copy;
> > > +copied:
> > > + tls_ctx->pending_open_record_frags = true;
> >
> > Why move pending-open-record-frags setting if it's also set before
> > jumping?
>
> I should probably remove it from before the goto - unless you'd prefer to do
> it in both places.
Actually, I need to keep the one before the goto.
David
Jakub Kicinski <[email protected]> wrote:
> > + if (put)
> > + put_page(page);
>
> is put ever set to true?
Ah, the copy-data-if-slab thing got removed. I'll clean this bit up.
> > - tls_ctx->pending_open_record_frags = true;
> > copied += try_to_copy;
> > +copied:
> > + tls_ctx->pending_open_record_frags = true;
>
> Why move pending-open-record-frags setting if it's also set before
> jumping?
I should probably remove it from before the goto - unless you'd prefer to do
it in both places.
David
On Wed, 07 Jun 2023 18:31:10 +0100 David Howells wrote:
> > > Why move pending-open-record-frags setting if it's also set before
> > > jumping?
> >
> > I should probably remove it from before the goto - unless you'd prefer to do
> > it in both places.
>
> Actually, I need to keep the one before the goto.
Yeah, feels like goes together with updating copied, really,
not no point passing it all the way to tls_sw_sendmsg_splice().
I'd drop the reshuffle next to the label.