LinuxLists.cc - [PATCH net-next 10/12] tls/sw: Convert tls_sw_sendpage() to use MSG_SPLICE

2023-05-24 15:45:43

Subject: [PATCH net-next 10/12] tls/sw: Convert tls_sw_sendpage() to use MSG_SPLICE_PAGES

Convert tls_sw_sendpage() and tls_sw_sendpage_locked() to use sendmsg()
with MSG_SPLICE_PAGES rather than directly splicing in the pages itself.

[!] Note that tls_sw_sendpage_locked() appears to have the wrong locking
upstream. I think the caller will only hold the socket lock, but it
should hold tls_ctx->tx_lock too.

This allows ->sendpage() to be replaced by something that can handle
multiple multipage folios in a single transaction.

Signed-off-by: David Howells <[email protected]>
cc: Chuck Lever <[email protected]>
cc: Boris Pismenny <[email protected]>
cc: John Fastabend <[email protected]>
cc: Jakub Kicinski <[email protected]>
cc: Eric Dumazet <[email protected]>
cc: "David S. Miller" <[email protected]>
cc: Paolo Abeni <[email protected]>
cc: Jens Axboe <[email protected]>
cc: Matthew Wilcox <[email protected]>
cc: [email protected]
---
net/tls/tls_sw.c | 164 +++++++++--------------------------------------
1 file changed, 30 insertions(+), 134 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 0ccef8aa9951..1a5926cc3e84 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -972,7 +972,7 @@ static int rls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg,
return 0;
}

-int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
{
long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -995,15 +995,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
int ret = 0;
int pending;

- if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
- MSG_CMSG_COMPAT))
- return -EOPNOTSUPP;
-
- ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
- if (ret)
- return ret;
- lock_sock(sk);
-
if (unlikely(msg->msg_controllen)) {
ret = tls_process_cmsg(sk, msg, &record_type);
if (ret) {
@@ -1204,157 +1195,62 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)

send_end:
ret = sk_stream_error(sk, msg->msg_flags, ret);
-
- release_sock(sk);
- mutex_unlock(&tls_ctx->tx_lock);
return copied > 0 ? copied : ret;
}

-static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags)
+int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
- long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
- struct tls_prot_info *prot = &tls_ctx->prot_info;
- unsigned char record_type = TLS_RECORD_TYPE_DATA;
- struct sk_msg *msg_pl;
- struct tls_rec *rec;
- int num_async = 0;
- ssize_t copied = 0;
- bool full_record;
- int record_room;
- int ret = 0;
- bool eor;
-
- eor = !(flags & MSG_SENDPAGE_NOTLAST);
- sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
- /* Call the sk_stream functions to manage the sndbuf mem. */
- while (size > 0) {
- size_t copy, required_size;
-
- if (sk->sk_err) {
- ret = -sk->sk_err;
- goto sendpage_end;
- }
-
- if (ctx->open_rec)
- rec = ctx->open_rec;
- else
- rec = ctx->open_rec = tls_get_rec(sk);
- if (!rec) {
- ret = -ENOMEM;
- goto sendpage_end;
- }
-
- msg_pl = &rec->msg_plaintext;
-
- full_record = false;
- record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
- copy = size;
- if (copy >= record_room) {
- copy = record_room;
- full_record = true;
- }
-
- required_size = msg_pl->sg.size + copy + prot->overhead_size;
-
- if (!sk_stream_memory_free(sk))
- goto wait_for_sndbuf;
-alloc_payload:
- ret = tls_alloc_encrypted_msg(sk, required_size);
- if (ret) {
- if (ret != -ENOSPC)
- goto wait_for_memory;
-
- /* Adjust copy according to the amount that was
- * actually allocated. The difference is due
- * to max sg elements limit
- */
- copy -= required_size - msg_pl->sg.size;
- full_record = true;
- }
-
- sk_msg_page_add(msg_pl, page, copy, offset);
- sk_mem_charge(sk, copy);
-
- offset += copy;
- size -= copy;
- copied += copy;
-
- tls_ctx->pending_open_record_frags = true;
- if (full_record || eor || sk_msg_full(msg_pl)) {
- ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
- record_type, &copied, flags);
- if (ret) {
- if (ret == -EINPROGRESS)
- num_async++;
- else if (ret == -ENOMEM)
- goto wait_for_memory;
- else if (ret != -EAGAIN) {
- if (ret == -ENOSPC)
- ret = 0;
- goto sendpage_end;
- }
- }
- }
- continue;
-wait_for_sndbuf:
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-wait_for_memory:
- ret = sk_stream_wait_memory(sk, &timeo);
- if (ret) {
- if (ctx->open_rec)
- tls_trim_both_msgs(sk, msg_pl->sg.size);
- goto sendpage_end;
- }
+ int ret;

- if (ctx->open_rec)
- goto alloc_payload;
- }
+ if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_CMSG_COMPAT | MSG_SPLICE_PAGES |
+ MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+ return -EOPNOTSUPP;

- if (num_async) {
- /* Transmit if any encryptions have completed */
- if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
- cancel_delayed_work(&ctx->tx_work.work);
- tls_tx_records(sk, flags);
- }
- }
-sendpage_end:
- ret = sk_stream_error(sk, flags, ret);
- return copied > 0 ? copied : ret;
+ ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
+ if (ret)
+ return ret;
+ lock_sock(sk);
+ ret = tls_sw_sendmsg_locked(sk, msg, size);
+ release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
+ return ret;
}

int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
+
if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY |
MSG_NO_SHARED_FRAGS))
return -EOPNOTSUPP;
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;

- return tls_sw_do_sendpage(sk, page, offset, size, flags);
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ return tls_sw_sendmsg_locked(sk, &msg, size);
}

int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- int ret;
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };

if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
return -EOPNOTSUPP;
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;

- ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
- if (ret)
- return ret;
- lock_sock(sk);
- ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
- release_sock(sk);
- mutex_unlock(&tls_ctx->tx_lock);
- return ret;
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ return tls_sw_sendmsg(sk, &msg, size);
}

static int

2023-05-27 01:22:19

by Jakub Kicinski

[permalink] [raw]

Subject: Re: [PATCH net-next 10/12] tls/sw: Convert tls_sw_sendpage() to use MSG_SPLICE_PAGES

On Wed, 24 May 2023 16:33:09 +0100 David Howells wrote:
> Convert tls_sw_sendpage() and tls_sw_sendpage_locked() to use sendmsg()
> with MSG_SPLICE_PAGES rather than directly splicing in the pages itself.
>
> [!] Note that tls_sw_sendpage_locked() appears to have the wrong locking
> upstream. I think the caller will only hold the socket lock, but it
> should hold tls_ctx->tx_lock too.

Lock ordering, as you probably discovered. It is what it is :|

> + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
> + MSG_CMSG_COMPAT | MSG_SPLICE_PAGES |
> + MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
> + return -EOPNOTSUPP;

Now MSG_SENDPAGE_* can leak in thru the sendmsg() call?
Letting MSG_SENDPAGE_NOPOLICY in seems pretty suspicious, no?