This patchset creates the initial plumbing for a io_uring command for
sockets.
For now, create two uring commands for sockets, SOCKET_URING_OP_SIOCOUTQ
and SOCKET_URING_OP_SIOCINQ, which are available in TCP, UDP and RAW
sockets.
In order to test this code, I created a liburing test, which is
currently located at [1], and I will create a pull request once we are
good with this patchset.
V1 submission was sent a while ago[2], but it required more plumbing
that were done in different patch submissions[3][4].
PS: This patchset depends on a commit[4] that is not committed to the
tree yet (but close too, IMO).
[1] Link: https://github.com/leitao/liburing/blob/master/test/socket-io-cmd.c
[2] Link: https://lore.kernel.org/lkml/[email protected]/
[3] Link: https://lore.kernel.org/lkml/[email protected]/
[4] Link: https://lore.kernel.org/lkml/[email protected]/
V1->V2:
* Rely on a generic socket->ioctl infrastructure instead of
reimplementing it
Breno Leitao (4):
net: wire up support for file_operations->uring_cmd()
net: add uring_cmd callback to UDP
net: add uring_cmd callback to TCP
net: add uring_cmd callback to raw "protocol"
include/linux/net.h | 2 ++
include/net/raw.h | 3 +++
include/net/sock.h | 6 ++++++
include/net/tcp.h | 2 ++
include/net/udp.h | 2 ++
include/uapi/linux/net.h | 5 +++++
net/core/sock.c | 17 +++++++++++++++--
net/dccp/ipv4.c | 1 +
net/ipv4/af_inet.c | 3 +++
net/ipv4/raw.c | 23 +++++++++++++++++++++++
net/ipv4/tcp.c | 21 +++++++++++++++++++++
net/ipv4/tcp_ipv4.c | 1 +
net/ipv4/udp.c | 22 ++++++++++++++++++++++
net/l2tp/l2tp_ip.c | 1 +
net/mptcp/protocol.c | 1 +
net/sctp/protocol.c | 1 +
net/socket.c | 13 +++++++++++++
17 files changed, 122 insertions(+), 2 deletions(-)
--
2.34.1
This is the implementation of uring_cmd for the UDP protocol. It
basically encompasses SOCKET_URING_OP_SIOCOUTQ and
SOCKET_URING_OP_SIOCINQ, which is the io_uring representation for
SIOCOUTQ and SIOCINQ.
SIOCINQ and SIOCOUTQ are the only two CMDs handled by udp_ioctl().
Signed-off-by: Breno Leitao <[email protected]>
---
include/net/udp.h | 2 ++
include/uapi/linux/net.h | 5 +++++
net/ipv4/udp.c | 22 ++++++++++++++++++++++
3 files changed, 29 insertions(+)
diff --git a/include/net/udp.h b/include/net/udp.h
index 10d94a42117b..046ca7231d27 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -285,6 +285,8 @@ int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
int udp_rcv(struct sk_buff *skb);
int udp_ioctl(struct sock *sk, int cmd, int *karg);
+int udp_uring_cmd(struct sock *sk, struct io_uring_cmd *cmd,
+ unsigned int issue_flags);
int udp_init_sock(struct sock *sk);
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int __udp_disconnect(struct sock *sk, int flags);
diff --git a/include/uapi/linux/net.h b/include/uapi/linux/net.h
index 4dabec6bd957..dd8e7ced7d24 100644
--- a/include/uapi/linux/net.h
+++ b/include/uapi/linux/net.h
@@ -55,4 +55,9 @@ typedef enum {
#define __SO_ACCEPTCON (1 << 16) /* performed a listen */
+enum {
+ SOCKET_URING_OP_SIOCINQ = 0,
+ SOCKET_URING_OP_SIOCOUTQ,
+};
+
#endif /* _UAPI_LINUX_NET_H */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6a09757f287b..5e06b6de1c08 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -113,6 +113,7 @@
#include <net/sock_reuseport.h>
#include <net/addrconf.h>
#include <net/udp_tunnel.h>
+#include <linux/io_uring.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
@@ -1687,6 +1688,26 @@ static int first_packet_length(struct sock *sk)
return res;
}
+int udp_uring_cmd(struct sock *sk, struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ int ret;
+
+ switch (cmd->sqe->cmd_op) {
+ case SOCKET_URING_OP_SIOCINQ:
+ if (udp_ioctl(sk, SIOCINQ, &ret))
+ return -EFAULT;
+ return ret;
+ case SOCKET_URING_OP_SIOCOUTQ:
+ if (udp_ioctl(sk, SIOCOUTQ, &ret))
+ return -EFAULT;
+ return ret;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+EXPORT_SYMBOL_GPL(udp_uring_cmd);
+
/*
* IOCTL requests applicable to the UDP protocol
*/
@@ -2925,6 +2946,7 @@ struct proto udp_prot = {
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
+ .uring_cmd = udp_uring_cmd,
.init = udp_init_sock,
.destroy = udp_destroy_sock,
.setsockopt = udp_setsockopt,
--
2.34.1
Create the initial plumbing to call protocol specific uring_cmd
callbacks. These are io_uring specific callbacks that implement
ioctl-like operation types, such as SIOCINQ, SIOCOUTQ and others.
In order to achieve this, create uring_cmd callback placeholders in
file_ops, proto and proto_ops structures.
Create also the functions that does the plumbing from io_uring_cmd() up
to sk_proto->uring_cmd(). If the callback is not implemented,
-EOPNOTSUPP is returned.
That way, the io_uring issue path calls file_operations->uring_cmd
(sock_uring_cmd()). This function calls proto_ops->uring_cmd
(sock_common_uring_cmd()). sock_common_uring_cmd() is responsible for
calling protocol specific (struct proto_ops) uring_cmd callback
(sock_common_uring_cmd()). sock_common_uring_cmd() then calls the proto
specific (struct proto) uring_cmd function, which are implemented in the
upcoming patch.
By the end, uring_cmd() function has access to 'struct io_uring_cmd'
which points to the whole SQE, and any field could be accessed from the
function pointer.
Signed-off-by: Breno Leitao <[email protected]>
---
include/linux/net.h | 2 ++
include/net/sock.h | 6 ++++++
net/core/sock.c | 17 +++++++++++++++--
net/dccp/ipv4.c | 1 +
net/ipv4/af_inet.c | 3 +++
net/l2tp/l2tp_ip.c | 1 +
net/mptcp/protocol.c | 1 +
net/sctp/protocol.c | 1 +
net/socket.c | 13 +++++++++++++
9 files changed, 43 insertions(+), 2 deletions(-)
diff --git a/include/linux/net.h b/include/linux/net.h
index 8defc8f1d82e..58dea87077af 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -182,6 +182,8 @@ struct proto_ops {
int (*compat_ioctl) (struct socket *sock, unsigned int cmd,
unsigned long arg);
#endif
+ int (*uring_cmd)(struct socket *sock, struct io_uring_cmd *cmd,
+ unsigned int issue_flags);
int (*gettstamp) (struct socket *sock, void __user *userstamp,
bool timeval, bool time32);
int (*listen) (struct socket *sock, int len);
diff --git a/include/net/sock.h b/include/net/sock.h
index 62a1b99da349..a49b8b19292b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -111,6 +111,7 @@ typedef struct {
struct sock;
struct proto;
struct net;
+struct io_uring_cmd;
typedef __u32 __bitwise __portpair;
typedef __u64 __bitwise __addrpair;
@@ -1259,6 +1260,9 @@ struct proto {
int (*ioctl)(struct sock *sk, int cmd,
int *karg);
+ int (*uring_cmd)(struct sock *sk,
+ struct io_uring_cmd *cmd,
+ unsigned int issue_flags);
int (*init)(struct sock *sk);
void (*destroy)(struct sock *sk);
void (*shutdown)(struct sock *sk, int how);
@@ -1934,6 +1938,8 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int sock_common_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen);
+int sock_common_uring_cmd(struct socket *sock, struct io_uring_cmd *cmd,
+ unsigned int issue_flags);
void sk_common_release(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index 1df7e432fec5..339fa74db60f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3668,6 +3668,18 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(sock_common_setsockopt);
+int sock_common_uring_cmd(struct socket *sock, struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ struct sock *sk = sock->sk;
+
+ if (!sk->sk_prot || !sk->sk_prot->uring_cmd)
+ return -EOPNOTSUPP;
+
+ return sk->sk_prot->uring_cmd(sk, cmd, issue_flags);
+}
+EXPORT_SYMBOL(sock_common_uring_cmd);
+
void sk_common_release(struct sock *sk)
{
if (sk->sk_prot->destroy)
@@ -4008,7 +4020,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
{
seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
- "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
+ "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
sock_prot_inuse_get(seq_file_net(seq), proto),
@@ -4022,6 +4034,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
proto_method_implemented(proto->disconnect),
proto_method_implemented(proto->accept),
proto_method_implemented(proto->ioctl),
+ proto_method_implemented(proto->uring_cmd),
proto_method_implemented(proto->init),
proto_method_implemented(proto->destroy),
proto_method_implemented(proto->shutdown),
@@ -4050,7 +4063,7 @@ static int proto_seq_show(struct seq_file *seq, void *v)
"maxhdr",
"slab",
"module",
- "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
+ "cl co di ac io ur in de sh ss gs se re sp bi br ha uh gp em\n");
else
proto_seq_printf(seq, list_entry(v, struct proto, node));
return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 3ab68415d121..1baad5ff402e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1001,6 +1001,7 @@ static const struct proto_ops inet_dccp_ops = {
/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
.poll = dccp_poll,
.ioctl = inet_ioctl,
+ .uring_cmd = sock_common_uring_cmd,
.gettstamp = sock_gettstamp,
/* FIXME: work on inet_listen to rename it to sock_common_listen */
.listen = inet_dccp_listen,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9cd48df6a331..2947d4dd4922 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1055,6 +1055,7 @@ const struct proto_ops inet_stream_ops = {
.getname = inet_getname,
.poll = tcp_poll,
.ioctl = inet_ioctl,
+ .uring_cmd = sock_common_uring_cmd,
.gettstamp = sock_gettstamp,
.listen = inet_listen,
.shutdown = inet_shutdown,
@@ -1091,6 +1092,7 @@ const struct proto_ops inet_dgram_ops = {
.getname = inet_getname,
.poll = udp_poll,
.ioctl = inet_ioctl,
+ .uring_cmd = sock_common_uring_cmd,
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
@@ -1124,6 +1126,7 @@ static const struct proto_ops inet_sockraw_ops = {
.getname = inet_getname,
.poll = datagram_poll,
.ioctl = inet_ioctl,
+ .uring_cmd = sock_common_uring_cmd,
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 2b795c1064f5..3540e01455f7 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -616,6 +616,7 @@ static const struct proto_ops l2tp_ip_ops = {
.getname = l2tp_ip_getname,
.poll = datagram_poll,
.ioctl = inet_ioctl,
+ .uring_cmd = sock_common_uring_cmd,
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 992b89c75631..444dacb9d804 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3883,6 +3883,7 @@ static const struct proto_ops mptcp_stream_ops = {
.getname = inet_getname,
.poll = mptcp_poll,
.ioctl = inet_ioctl,
+ .uring_cmd = sock_common_uring_cmd,
.gettstamp = sock_gettstamp,
.listen = mptcp_listen,
.shutdown = inet_shutdown,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 664d1f2e9121..32b1a87d958a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1125,6 +1125,7 @@ static const struct proto_ops inet_seqpacket_ops = {
.getname = inet_getname, /* Semantics are different. */
.poll = sctp_poll,
.ioctl = inet_ioctl,
+ .uring_cmd = sock_common_uring_cmd,
.gettstamp = sock_gettstamp,
.listen = sctp_inet_listen,
.shutdown = inet_shutdown, /* Looks harmless. */
diff --git a/net/socket.c b/net/socket.c
index b778fc03c6e0..44cf9841af44 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -88,6 +88,7 @@
#include <linux/xattr.h>
#include <linux/nospec.h>
#include <linux/indirect_call_wrapper.h>
+#include <linux/io_uring.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -117,6 +118,7 @@ unsigned int sysctl_net_busy_poll __read_mostly;
static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
static int sock_mmap(struct file *file, struct vm_area_struct *vma);
+static int sock_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
static int sock_close(struct inode *inode, struct file *file);
static __poll_t sock_poll(struct file *file,
@@ -159,6 +161,7 @@ static const struct file_operations socket_file_ops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_sock_ioctl,
#endif
+ .uring_cmd = sock_uring_cmd,
.mmap = sock_mmap,
.release = sock_close,
.fasync = sock_fasync,
@@ -1309,6 +1312,16 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return err;
}
+static int sock_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct socket *sock = cmd->file->private_data;
+
+ if (!sock->ops || !sock->ops->uring_cmd)
+ return -EOPNOTSUPP;
+
+ return sock->ops->uring_cmd(sock, cmd, issue_flags);
+}
+
/**
* sock_create_lite - creates a socket
* @family: protocol family (AF_INET, ...)
--
2.34.1
On 6/14/23 5:07 AM, Breno Leitao wrote:
> diff --git a/include/linux/net.h b/include/linux/net.h
> index 8defc8f1d82e..58dea87077af 100644
> --- a/include/linux/net.h
> +++ b/include/linux/net.h
> @@ -182,6 +182,8 @@ struct proto_ops {
> int (*compat_ioctl) (struct socket *sock, unsigned int cmd,
> unsigned long arg);
> #endif
> + int (*uring_cmd)(struct socket *sock, struct io_uring_cmd *cmd,
> + unsigned int issue_flags);
> int (*gettstamp) (struct socket *sock, void __user *userstamp,
> bool timeval, bool time32);
> int (*listen) (struct socket *sock, int len);
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 62a1b99da349..a49b8b19292b 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -111,6 +111,7 @@ typedef struct {
> struct sock;
> struct proto;
> struct net;
> +struct io_uring_cmd;
>
> typedef __u32 __bitwise __portpair;
> typedef __u64 __bitwise __addrpair;
> @@ -1259,6 +1260,9 @@ struct proto {
>
> int (*ioctl)(struct sock *sk, int cmd,
> int *karg);
> + int (*uring_cmd)(struct sock *sk,
> + struct io_uring_cmd *cmd,
> + unsigned int issue_flags);
> int (*init)(struct sock *sk);
> void (*destroy)(struct sock *sk);
> void (*shutdown)(struct sock *sk, int how);
> @@ -1934,6 +1938,8 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
> int flags);
> int sock_common_setsockopt(struct socket *sock, int level, int optname,
> sockptr_t optval, unsigned int optlen);
> +int sock_common_uring_cmd(struct socket *sock, struct io_uring_cmd *cmd,
> + unsigned int issue_flags);
>
> void sk_common_release(struct sock *sk);
>
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 1df7e432fec5..339fa74db60f 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -3668,6 +3668,18 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname,
> }
> EXPORT_SYMBOL(sock_common_setsockopt);
>
> +int sock_common_uring_cmd(struct socket *sock, struct io_uring_cmd *cmd,
> + unsigned int issue_flags)
> +{
> + struct sock *sk = sock->sk;
> +
> + if (!sk->sk_prot || !sk->sk_prot->uring_cmd)
> + return -EOPNOTSUPP;
> +
> + return sk->sk_prot->uring_cmd(sk, cmd, issue_flags);
> +}
> +EXPORT_SYMBOL(sock_common_uring_cmd);
> +
io_uring is just another in-kernel user of sockets. There is no reason
for io_uring references to be in core net code. It should be using
exposed in-kernel APIs and doing any translation of its op codes in
io_uring/ code.
On 6/14/23 16:15, David Ahern wrote:
> On 6/14/23 5:07 AM, Breno Leitao wrote:
>> diff --git a/include/linux/net.h b/include/linux/net.h
>> index 8defc8f1d82e..58dea87077af 100644
>> --- a/include/linux/net.h
>> +++ b/include/linux/net.h
>> @@ -182,6 +182,8 @@ struct proto_ops {
>> int (*compat_ioctl) (struct socket *sock, unsigned int cmd,
>> unsigned long arg);
>> #endif
>> + int (*uring_cmd)(struct socket *sock, struct io_uring_cmd *cmd,
>> + unsigned int issue_flags);
>> int (*gettstamp) (struct socket *sock, void __user *userstamp,
>> bool timeval, bool time32);
>> int (*listen) (struct socket *sock, int len);
>> diff --git a/include/net/sock.h b/include/net/sock.h
>> index 62a1b99da349..a49b8b19292b 100644
>> --- a/include/net/sock.h
>> +++ b/include/net/sock.h
>> @@ -111,6 +111,7 @@ typedef struct {
>> struct sock;
>> struct proto;
>> struct net;
>> +struct io_uring_cmd;
>>
>> typedef __u32 __bitwise __portpair;
>> typedef __u64 __bitwise __addrpair;
>> @@ -1259,6 +1260,9 @@ struct proto {
>>
>> int (*ioctl)(struct sock *sk, int cmd,
>> int *karg);
>> + int (*uring_cmd)(struct sock *sk,
>> + struct io_uring_cmd *cmd,
>> + unsigned int issue_flags);
>> int (*init)(struct sock *sk);
>> void (*destroy)(struct sock *sk);
>> void (*shutdown)(struct sock *sk, int how);
>> @@ -1934,6 +1938,8 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
>> int flags);
>> int sock_common_setsockopt(struct socket *sock, int level, int optname,
>> sockptr_t optval, unsigned int optlen);
>> +int sock_common_uring_cmd(struct socket *sock, struct io_uring_cmd *cmd,
>> + unsigned int issue_flags);
>>
>> void sk_common_release(struct sock *sk);
>>
>> diff --git a/net/core/sock.c b/net/core/sock.c
>> index 1df7e432fec5..339fa74db60f 100644
>> --- a/net/core/sock.c
>> +++ b/net/core/sock.c
>> @@ -3668,6 +3668,18 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname,
>> }
>> EXPORT_SYMBOL(sock_common_setsockopt);
>>
>> +int sock_common_uring_cmd(struct socket *sock, struct io_uring_cmd *cmd,
>> + unsigned int issue_flags)
>> +{
>> + struct sock *sk = sock->sk;
>> +
>> + if (!sk->sk_prot || !sk->sk_prot->uring_cmd)
>> + return -EOPNOTSUPP;
>> +
>> + return sk->sk_prot->uring_cmd(sk, cmd, issue_flags);
>> +}
>> +EXPORT_SYMBOL(sock_common_uring_cmd);
>> +
>
>
> io_uring is just another in-kernel user of sockets. There is no reason
> for io_uring references to be in core net code. It should be using
> exposed in-kernel APIs and doing any translation of its op codes in
> io_uring/ code.
That callback is all about file dependent operations, just like ioctl.
And as the patch in question is doing socket specific stuff, I think
architecturally it fits well. I also believe Breno wants to extend it
later to support more operations.
Sockets are a large chunk of use cases, it can be implemented as a
separate io_uring request type if nothing else works, but in general
that might not be as scalable.
--
Pavel Begunkov
On Wed, Jun 14, 2023 at 08:15:10AM -0700, David Ahern wrote:
> On 6/14/23 5:07 AM, Breno Leitao wrote:
> io_uring is just another in-kernel user of sockets. There is no reason
> for io_uring references to be in core net code. It should be using
> exposed in-kernel APIs and doing any translation of its op codes in
> io_uring/ code.
Thanks for the feedback. If we want to keep the network subsystem
untouched, then I we can do it using an approach similar to the
following. Is this a better approach moving forward?
--
From: Breno Leitao <[email protected]>
Date: Mon, 19 Jun 2023 03:37:40 -0700
Subject: [RFC PATCH v2] io_uring: add initial io_uring_cmd support for sockets
Enable io_uring command operations on sockets. Create two
SOCKET_URING_OP commands that will operate on sockets.
For that, use the file_operations->uring_cmd callback, and map it to a
uring socket callback, which handles the SOCKET_URING_OP accordingly.
Signed-off-by: Breno Leitao <[email protected]>
---
include/linux/io_uring.h | 6 ++++++
include/uapi/linux/io_uring.h | 8 ++++++++
io_uring/uring_cmd.c | 27 +++++++++++++++++++++++++++
net/socket.c | 2 ++
4 files changed, 43 insertions(+)
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 7fe31b2cd02f..d1b20e2a9fb0 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -71,6 +71,7 @@ static inline void io_uring_free(struct task_struct *tsk)
if (tsk->io_uring)
__io_uring_free(tsk);
}
+int uring_sock_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
#else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
@@ -102,6 +103,11 @@ static inline const char *io_uring_get_opcode(u8 opcode)
{
return "";
}
+static inline int uring_sock_cmd(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ return -EOPNOTSUPP;
+}
#endif
#endif
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 0716cb17e436..d93a5ee7d984 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -703,6 +703,14 @@ struct io_uring_recvmsg_out {
__u32 flags;
};
+/*
+ * Argument for IORING_OP_URING_CMD when file is a socket
+ */
+enum {
+ SOCKET_URING_OP_SIOCINQ = 0,
+ SOCKET_URING_OP_SIOCOUTQ,
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 5e32db48696d..dcbe6493b03f 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -7,6 +7,7 @@
#include <linux/nospec.h>
#include <uapi/linux/io_uring.h>
+#include <uapi/asm-generic/ioctls.h>
#include "io_uring.h"
#include "rsrc.h"
@@ -156,3 +157,29 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
return io_import_fixed(rw, iter, req->imu, ubuf, len);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
+
+int uring_sock_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct socket *sock = cmd->file->private_data;
+ struct sock *sk = sock->sk;
+ int ret, arg = 0;
+
+ if (!sk->sk_prot || !sk->sk_prot->ioctl)
+ return -EOPNOTSUPP;
+
+ switch (cmd->sqe->cmd_op) {
+ case SOCKET_URING_OP_SIOCINQ:
+ ret = sk->sk_prot->ioctl(sk, SIOCINQ, &arg);
+ if (ret)
+ return ret;
+ return arg;
+ case SOCKET_URING_OP_SIOCOUTQ:
+ ret = sk->sk_prot->ioctl(sk, SIOCOUTQ, &arg);
+ if (ret)
+ return ret;
+ return arg;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+EXPORT_SYMBOL_GPL(uring_sock_cmd);
diff --git a/net/socket.c b/net/socket.c
index b778fc03c6e0..db11e94d2259 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -88,6 +88,7 @@
#include <linux/xattr.h>
#include <linux/nospec.h>
#include <linux/indirect_call_wrapper.h>
+#include <linux/io_uring.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -159,6 +160,7 @@ static const struct file_operations socket_file_ops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_sock_ioctl,
#endif
+ .uring_cmd = uring_sock_cmd,
.mmap = sock_mmap,
.release = sock_close,
.fasync = sock_fasync,
--
2.34.1
On 6/19/23 4:20 AM, Breno Leitao wrote:
> On Wed, Jun 14, 2023 at 08:15:10AM -0700, David Ahern wrote:
>> On 6/14/23 5:07 AM, Breno Leitao wrote:
>> io_uring is just another in-kernel user of sockets. There is no reason
>> for io_uring references to be in core net code. It should be using
>> exposed in-kernel APIs and doing any translation of its op codes in
>> io_uring/ code.
> Thanks for the feedback. If we want to keep the network subsystem
> untouched, then I we can do it using an approach similar to the
> following. Is this a better approach moving forward?
yes. It keeps the translation from io_uring commands to networking APIs
in one place and does not need to propagate that translation through the
networking code.
On 6/19/23 2:28 AM, Pavel Begunkov wrote:
> That callback is all about file dependent operations, just like ioctl.
> And as the patch in question is doing socket specific stuff, I think
> architecturally it fits well. I also believe Breno wants to extend it
> later to support more operations.
>
> Sockets are a large chunk of use cases, it can be implemented as a
> separate io_uring request type if nothing else works, but in general
> that might not be as scalable.
The io_uring commands are wrappers to existing networking APIs - doing
via io_uring what userspace apps can do via system calls. As such, the
translations should be done in io_uring code and then invoking in-kernel
APIs.
Same comment applies to sockopts when those come around and any other
future extensions.
Am 19.06.23 um 13:20 schrieb Breno Leitao:
> On Wed, Jun 14, 2023 at 08:15:10AM -0700, David Ahern wrote:
>> On 6/14/23 5:07 AM, Breno Leitao wrote:
>> io_uring is just another in-kernel user of sockets. There is no reason
>> for io_uring references to be in core net code. It should be using
>> exposed in-kernel APIs and doing any translation of its op codes in
>> io_uring/ code.
>
> Thanks for the feedback. If we want to keep the network subsystem
> untouched, then I we can do it using an approach similar to the
> following. Is this a better approach moving forward?
I'd like to keep it passed to socket layer, so that sockets could
implement some extra features in an async fashion.
What about having the function you posted below (and in v3)
as a default implementation if proto_ops->uring_cmd is NULL?
metze
> --
>
> From: Breno Leitao <[email protected]>
> Date: Mon, 19 Jun 2023 03:37:40 -0700
> Subject: [RFC PATCH v2] io_uring: add initial io_uring_cmd support for sockets
>
> Enable io_uring command operations on sockets. Create two
> SOCKET_URING_OP commands that will operate on sockets.
>
> For that, use the file_operations->uring_cmd callback, and map it to a
> uring socket callback, which handles the SOCKET_URING_OP accordingly.
>
> Signed-off-by: Breno Leitao <[email protected]>
> ---
> include/linux/io_uring.h | 6 ++++++
> include/uapi/linux/io_uring.h | 8 ++++++++
> io_uring/uring_cmd.c | 27 +++++++++++++++++++++++++++
> net/socket.c | 2 ++
> 4 files changed, 43 insertions(+)
>
> diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
> index 7fe31b2cd02f..d1b20e2a9fb0 100644
> --- a/include/linux/io_uring.h
> +++ b/include/linux/io_uring.h
> @@ -71,6 +71,7 @@ static inline void io_uring_free(struct task_struct *tsk)
> if (tsk->io_uring)
> __io_uring_free(tsk);
> }
> +int uring_sock_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
> #else
> static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
> struct iov_iter *iter, void *ioucmd)
> @@ -102,6 +103,11 @@ static inline const char *io_uring_get_opcode(u8 opcode)
> {
> return "";
> }
> +static inline int uring_sock_cmd(struct io_uring_cmd *cmd,
> + unsigned int issue_flags)
> +{
> + return -EOPNOTSUPP;
> +}
> #endif
>
> #endif
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 0716cb17e436..d93a5ee7d984 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -703,6 +703,14 @@ struct io_uring_recvmsg_out {
> __u32 flags;
> };
>
> +/*
> + * Argument for IORING_OP_URING_CMD when file is a socket
> + */
> +enum {
> + SOCKET_URING_OP_SIOCINQ = 0,
> + SOCKET_URING_OP_SIOCOUTQ,
> +};
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
> index 5e32db48696d..dcbe6493b03f 100644
> --- a/io_uring/uring_cmd.c
> +++ b/io_uring/uring_cmd.c
> @@ -7,6 +7,7 @@
> #include <linux/nospec.h>
>
> #include <uapi/linux/io_uring.h>
> +#include <uapi/asm-generic/ioctls.h>
>
> #include "io_uring.h"
> #include "rsrc.h"
> @@ -156,3 +157,29 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
> return io_import_fixed(rw, iter, req->imu, ubuf, len);
> }
> EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
> +
> +int uring_sock_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
> +{
> + struct socket *sock = cmd->file->private_data;
> + struct sock *sk = sock->sk;
> + int ret, arg = 0;
> +
> + if (!sk->sk_prot || !sk->sk_prot->ioctl)
> + return -EOPNOTSUPP;
> +
> + switch (cmd->sqe->cmd_op) {
> + case SOCKET_URING_OP_SIOCINQ:
> + ret = sk->sk_prot->ioctl(sk, SIOCINQ, &arg);
> + if (ret)
> + return ret;
> + return arg;
> + case SOCKET_URING_OP_SIOCOUTQ:
> + ret = sk->sk_prot->ioctl(sk, SIOCOUTQ, &arg);
> + if (ret)
> + return ret;
> + return arg;
> + default:
> + return -EOPNOTSUPP;
> + }
> +}
> +EXPORT_SYMBOL_GPL(uring_sock_cmd);
> diff --git a/net/socket.c b/net/socket.c
> index b778fc03c6e0..db11e94d2259 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -88,6 +88,7 @@
> #include <linux/xattr.h>
> #include <linux/nospec.h>
> #include <linux/indirect_call_wrapper.h>
> +#include <linux/io_uring.h>
>
> #include <linux/uaccess.h>
> #include <asm/unistd.h>
> @@ -159,6 +160,7 @@ static const struct file_operations socket_file_ops = {
> #ifdef CONFIG_COMPAT
> .compat_ioctl = compat_sock_ioctl,
> #endif
> + .uring_cmd = uring_sock_cmd,
> .mmap = sock_mmap,
> .release = sock_close,
> .fasync = sock_fasync,
On 6/23/23 3:17 AM, Stefan Metzmacher wrote:
>
> I'd like to keep it passed to socket layer, so that sockets could
> implement some extra features in an async fashion.
>
> What about having the function you posted below (and in v3)
> as a default implementation if proto_ops->uring_cmd is NULL?
>
Nothing about this set needs uring_cmd added to proto ops. It adds uring
commands which are wrappers to networking APIs. Let's keep proper APIs
between subsystems.