From: David Ahern <[email protected]>
Add support for custom iov_iter handling to msghdr. The idea is that
in-kernel subsystems want control over how an SG is split.
Signed-off-by: David Ahern <[email protected]>
[pavel: move callback into msghdr]
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/skbuff.h | 7 ++++---
include/linux/socket.h | 4 ++++
net/core/datagram.c | 14 ++++++++++----
net/core/skbuff.c | 2 +-
4 files changed, 19 insertions(+), 8 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8e12b3b9ad6c..a8a2dd4cfdfd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1776,13 +1776,14 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
bool success);
-int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *from, size_t length);
+int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb, struct iov_iter *from,
+ size_t length);
static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
struct msghdr *msg, int len)
{
- return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
+ return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len);
}
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 7bac9fc1cee0..3c11ef18a9cf 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -14,6 +14,8 @@ struct file;
struct pid;
struct cred;
struct socket;
+struct sock;
+struct sk_buff;
#define __sockaddr_check_size(size) \
BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
@@ -70,6 +72,8 @@ struct msghdr {
__kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
struct ubuf_info *msg_ubuf;
+ int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb,
+ struct iov_iter *from, size_t length);
};
struct user_msghdr {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 50f4faeea76c..b3c05efd659f 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -613,10 +613,16 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
}
EXPORT_SYMBOL(skb_copy_datagram_from_iter);
-int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *from, size_t length)
+int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb, struct iov_iter *from,
+ size_t length)
{
- int frag = skb_shinfo(skb)->nr_frags;
+ int frag;
+
+ if (msg && msg->sg_from_iter && msg->msg_ubuf == skb_zcopy(skb))
+ return msg->sg_from_iter(sk, skb, from, length);
+
+ frag = skb_shinfo(skb)->nr_frags;
while (length && iov_iter_count(from)) {
struct page *pages[MAX_SKB_FRAGS];
@@ -702,7 +708,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
if (skb_copy_datagram_from_iter(skb, 0, from, copy))
return -EFAULT;
- return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
+ return __zerocopy_sg_from_iter(NULL, NULL, skb, from, ~0U);
}
EXPORT_SYMBOL(zerocopy_sg_from_iter);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fc22b3d32052..f5a3ebbc1f7e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1358,7 +1358,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
if (orig_uarg && uarg != orig_uarg)
return -EEXIST;
- err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
+ err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len);
if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
struct sock *save_sk = skb->sk;
--
2.36.1
On 7/7/22 12:49, Pavel Begunkov wrote:
> From: David Ahern <[email protected]>
>
> Add support for custom iov_iter handling to msghdr. The idea is that
> in-kernel subsystems want control over how an SG is split.
>
> Signed-off-by: David Ahern <[email protected]>
> [pavel: move callback into msghdr]
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
> include/linux/skbuff.h | 7 ++++---
> include/linux/socket.h | 4 ++++
> net/core/datagram.c | 14 ++++++++++----
> net/core/skbuff.c | 2 +-
> 4 files changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 8e12b3b9ad6c..a8a2dd4cfdfd 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -1776,13 +1776,14 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
> void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
> bool success);
>
> -int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
> - struct iov_iter *from, size_t length);
> +int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
> + struct sk_buff *skb, struct iov_iter *from,
> + size_t length);
>
> static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
> struct msghdr *msg, int len)
> {
> - return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
> + return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len);
> }
>
> int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
> diff --git a/include/linux/socket.h b/include/linux/socket.h
> index 7bac9fc1cee0..3c11ef18a9cf 100644
> --- a/include/linux/socket.h
> +++ b/include/linux/socket.h
> @@ -14,6 +14,8 @@ struct file;
> struct pid;
> struct cred;
> struct socket;
> +struct sock;
> +struct sk_buff;
>
> #define __sockaddr_check_size(size) \
> BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
> @@ -70,6 +72,8 @@ struct msghdr {
> __kernel_size_t msg_controllen; /* ancillary data buffer length */
> struct kiocb *msg_iocb; /* ptr to iocb for async requests */
> struct ubuf_info *msg_ubuf;
> + int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb,
> + struct iov_iter *from, size_t length);
> };
>
> struct user_msghdr {
> diff --git a/net/core/datagram.c b/net/core/datagram.c
> index 50f4faeea76c..b3c05efd659f 100644
> --- a/net/core/datagram.c
> +++ b/net/core/datagram.c
> @@ -613,10 +613,16 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
> }
> EXPORT_SYMBOL(skb_copy_datagram_from_iter);
>
> -int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
> - struct iov_iter *from, size_t length)
> +int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
> + struct sk_buff *skb, struct iov_iter *from,
> + size_t length)
> {
> - int frag = skb_shinfo(skb)->nr_frags;
> + int frag;
> +
> + if (msg && msg->sg_from_iter && msg->msg_ubuf == skb_zcopy(skb))
I'm killing "msg->msg_ubuf == skb_zcopy(skb)", which I added with an
intention to make it less fragile, but it disables the optimisation for
TCP because skb_zerocopy_iter_stream() assigns ubuf to the skb only after
calling __zerocopy_sg_from_iter().
> + return msg->sg_from_iter(sk, skb, from, length);
> +
> + frag = skb_shinfo(skb)->nr_frags;
>
> while (length && iov_iter_count(from)) {
> struct page *pages[MAX_SKB_FRAGS];
> @@ -702,7 +708,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
> if (skb_copy_datagram_from_iter(skb, 0, from, copy))
> return -EFAULT;
>
> - return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
> + return __zerocopy_sg_from_iter(NULL, NULL, skb, from, ~0U);
> }
> EXPORT_SYMBOL(zerocopy_sg_from_iter);
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index fc22b3d32052..f5a3ebbc1f7e 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1358,7 +1358,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
> if (orig_uarg && uarg != orig_uarg)
> return -EEXIST;
>
> - err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
> + err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len);
> if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
> struct sock *save_sk = skb->sk;
>
--
Pavel Begunkov