Teach ipv4/udp how to use external ubuf_info provided in msghdr and
also prepare it for managed frags by sprinkling
skb_zcopy_downgrade_managed() when it could mix managed and not managed
frags.
Signed-off-by: Pavel Begunkov <[email protected]>
---
net/ipv4/tcp.c | 32 ++++++++++++++++++++------------
1 file changed, 20 insertions(+), 12 deletions(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 390eb3dc53bd..a81f694af5e9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1223,17 +1223,23 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
flags = msg->msg_flags;
- if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
+ if ((flags & MSG_ZEROCOPY) && size) {
skb = tcp_write_queue_tail(sk);
- uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
- if (!uarg) {
- err = -ENOBUFS;
- goto out_err;
- }
- zc = sk->sk_route_caps & NETIF_F_SG;
- if (!zc)
- uarg->zerocopy = 0;
+ if (msg->msg_ubuf) {
+ uarg = msg->msg_ubuf;
+ net_zcopy_get(uarg);
+ zc = sk->sk_route_caps & NETIF_F_SG;
+ } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+ uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
+ if (!uarg) {
+ err = -ENOBUFS;
+ goto out_err;
+ }
+ zc = sk->sk_route_caps & NETIF_F_SG;
+ if (!zc)
+ uarg->zerocopy = 0;
+ }
}
if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
@@ -1356,9 +1362,11 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- if (tcp_downgrade_zcopy_pure(sk, skb))
- goto wait_for_space;
-
+ if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) {
+ if (tcp_downgrade_zcopy_pure(sk, skb))
+ goto wait_for_space;
+ skb_zcopy_downgrade_managed(skb);
+ }
copy = tcp_wmem_schedule(sk, copy);
if (!copy)
goto wait_for_space;
--
2.36.1
On 7/7/22 5:49 AM, Pavel Begunkov wrote:
> Teach ipv4/udp how to use external ubuf_info provided in msghdr and
ipv4/tcp?
> also prepare it for managed frags by sprinkling
> skb_zcopy_downgrade_managed() when it could mix managed and not managed
> frags.
>
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
> net/ipv4/tcp.c | 32 ++++++++++++++++++++------------
> 1 file changed, 20 insertions(+), 12 deletions(-)
>
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 390eb3dc53bd..a81f694af5e9 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -1223,17 +1223,23 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
>
> flags = msg->msg_flags;
>
> - if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
> + if ((flags & MSG_ZEROCOPY) && size) {
> skb = tcp_write_queue_tail(sk);
> - uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
> - if (!uarg) {
> - err = -ENOBUFS;
> - goto out_err;
> - }
>
> - zc = sk->sk_route_caps & NETIF_F_SG;
> - if (!zc)
> - uarg->zerocopy = 0;
> + if (msg->msg_ubuf) {
> + uarg = msg->msg_ubuf;
> + net_zcopy_get(uarg);
> + zc = sk->sk_route_caps & NETIF_F_SG;
> + } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
> + uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
> + if (!uarg) {
> + err = -ENOBUFS;
> + goto out_err;
> + }
> + zc = sk->sk_route_caps & NETIF_F_SG;
> + if (!zc)
> + uarg->zerocopy = 0;
> + }
> }
>
> if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
> @@ -1356,9 +1362,11 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
>
> copy = min_t(int, copy, pfrag->size - pfrag->offset);
>
> - if (tcp_downgrade_zcopy_pure(sk, skb))
> - goto wait_for_space;
> -
> + if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) {
> + if (tcp_downgrade_zcopy_pure(sk, skb))
> + goto wait_for_space;
> + skb_zcopy_downgrade_managed(skb);
> + }
> copy = tcp_wmem_schedule(sk, copy);
> if (!copy)
> goto wait_for_space;
You dropped the msg->msg_ubuf checks on jump labels. Removing the one
you had at 'out_nopush' I agree with based on my tests (i.e, it is not
needed).
The one at 'out_err' seems like it is needed - but it has been a few
weeks since I debugged that case. I believe the error path I was hitting
was sk_stream_wait_memory with MSG_DONTWAIT flag set meaning timeout is
0 and it jumps there with EPIPE.
On 7/8/22 05:06, David Ahern wrote:
> On 7/7/22 5:49 AM, Pavel Begunkov wrote:
>> Teach ipv4/udp how to use external ubuf_info provided in msghdr and
>
> ipv4/tcp?
Ehh, just tcp. Thanks, I updated the branches
>> also prepare it for managed frags by sprinkling
>> skb_zcopy_downgrade_managed() when it could mix managed and not managed
>> frags.
>>
>> Signed-off-by: Pavel Begunkov <[email protected]>
>> ---
>> net/ipv4/tcp.c | 32 ++++++++++++++++++++------------
>> 1 file changed, 20 insertions(+), 12 deletions(-)
>>
>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
>> index 390eb3dc53bd..a81f694af5e9 100644
>> --- a/net/ipv4/tcp.c
>> +++ b/net/ipv4/tcp.c
>> @@ -1223,17 +1223,23 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
>>
>> flags = msg->msg_flags;
>>
>> - if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
>> + if ((flags & MSG_ZEROCOPY) && size) {
>> skb = tcp_write_queue_tail(sk);
>> - uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
>> - if (!uarg) {
>> - err = -ENOBUFS;
>> - goto out_err;
>> - }
>>
>> - zc = sk->sk_route_caps & NETIF_F_SG;
>> - if (!zc)
>> - uarg->zerocopy = 0;
>> + if (msg->msg_ubuf) {
>> + uarg = msg->msg_ubuf;
>> + net_zcopy_get(uarg);
>> + zc = sk->sk_route_caps & NETIF_F_SG;
>> + } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
>> + uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
>> + if (!uarg) {
>> + err = -ENOBUFS;
>> + goto out_err;
>> + }
>> + zc = sk->sk_route_caps & NETIF_F_SG;
>> + if (!zc)
>> + uarg->zerocopy = 0;
>> + }
>> }
>>
>> if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
>> @@ -1356,9 +1362,11 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
>>
>> copy = min_t(int, copy, pfrag->size - pfrag->offset);
>>
>> - if (tcp_downgrade_zcopy_pure(sk, skb))
>> - goto wait_for_space;
>> -
>> + if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) {
>> + if (tcp_downgrade_zcopy_pure(sk, skb))
>> + goto wait_for_space;
>> + skb_zcopy_downgrade_managed(skb);
>> + }
>> copy = tcp_wmem_schedule(sk, copy);
>> if (!copy)
>> goto wait_for_space;
>
> You dropped the msg->msg_ubuf checks on jump labels. Removing the one
> you had at 'out_nopush' I agree with based on my tests (i.e, it is not
> needed).
It was an optimisation, which I dropped for simplicity. Will be sending it
and couple more afterwards.
> The one at 'out_err' seems like it is needed - but it has been a few
> weeks since I debugged that case. I believe the error path I was hitting
> was sk_stream_wait_memory with MSG_DONTWAIT flag set meaning timeout is
> 0 and it jumps there with EPIPE.
Currently, it's consistent with MSG_ZEROCOPY ubuf_info, we grab a ubuf_info
reference at the beginning (msg_zerocopy_realloc() for MSG_ZEROCOPY and
net_zcopy_get() for msg_ubuf), and then release it at the end
with net_zcopy_put() or net_zcopy_put_abort().
All users, e.g. skb_zerocopy_iter_stream(), have to grab a new reference,
skb_zcopy_set() -> net_zcopy_get().
Not sure I see any issue, and if there is it sounds that it should also
be affecting MSG_ZEROCOPY.
--
Pavel Begunkov
On 7/8/22 7:03 AM, Pavel Begunkov wrote:
>>> @@ -1356,9 +1362,11 @@ int tcp_sendmsg_locked(struct sock *sk, struct
>>> msghdr *msg, size_t size)
>>> copy = min_t(int, copy, pfrag->size - pfrag->offset);
>>> - if (tcp_downgrade_zcopy_pure(sk, skb))
>>> - goto wait_for_space;
>>> -
>>> + if (unlikely(skb_zcopy_pure(skb) ||
>>> skb_zcopy_managed(skb))) {
>>> + if (tcp_downgrade_zcopy_pure(sk, skb))
>>> + goto wait_for_space;
>>> + skb_zcopy_downgrade_managed(skb);
>>> + }
>>> copy = tcp_wmem_schedule(sk, copy);
>>> if (!copy)
>>> goto wait_for_space;
>>
>> You dropped the msg->msg_ubuf checks on jump labels. Removing the one
>> you had at 'out_nopush' I agree with based on my tests (i.e, it is not
>> needed).
>
> It was an optimisation, which I dropped for simplicity. Will be sending it
> and couple more afterwards.
>
>
>> The one at 'out_err' seems like it is needed - but it has been a few
>> weeks since I debugged that case. I believe the error path I was hitting
>> was sk_stream_wait_memory with MSG_DONTWAIT flag set meaning timeout is
>> 0 and it jumps there with EPIPE.
>
> Currently, it's consistent with MSG_ZEROCOPY ubuf_info, we grab a ubuf_info
> reference at the beginning (msg_zerocopy_realloc() for MSG_ZEROCOPY and
> net_zcopy_get() for msg_ubuf), and then release it at the end
> with net_zcopy_put() or net_zcopy_put_abort().
>
my fault; I somehow dropped a line in the port to the 5.13 kernel.