Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
but it contains pidfd instead of plain pid, which allows programmers not
to care about PID reuse problem.
Idea comes from UAPI kernel group:
https://uapi-group.org/kernel-features/
Big thanks to Christian Brauner and Lennart Poettering for productive
discussions about this.
Cc: "David S. Miller" <[email protected]>
Cc: Eric Dumazet <[email protected]>
Cc: Jakub Kicinski <[email protected]>
Cc: Paolo Abeni <[email protected]>
Cc: Leon Romanovsky <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Christian Brauner <[email protected]>
Cc: Kuniyuki Iwashima <[email protected]>
Cc: Lennart Poettering <[email protected]>
Cc: Luca Boccassi <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Tested-by: Luca Boccassi <[email protected]>
Reviewed-by: Kuniyuki Iwashima <[email protected]>
Signed-off-by: Alexander Mikhalitsyn <[email protected]>
---
v4:
- fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian)
v2:
According to review comments from Kuniyuki Iwashima and Christian Brauner:
- use pidfd_create(..) retval as a result
- whitespace change
---
arch/alpha/include/uapi/asm/socket.h | 2 ++
arch/mips/include/uapi/asm/socket.h | 2 ++
arch/parisc/include/uapi/asm/socket.h | 2 ++
arch/sparc/include/uapi/asm/socket.h | 2 ++
include/linux/net.h | 1 +
include/linux/socket.h | 1 +
include/net/scm.h | 39 +++++++++++++++++++++++--
include/uapi/asm-generic/socket.h | 2 ++
net/core/sock.c | 11 +++++++
net/mptcp/sockopt.c | 1 +
net/unix/af_unix.c | 18 ++++++++----
tools/include/uapi/asm-generic/socket.h | 2 ++
12 files changed, 76 insertions(+), 7 deletions(-)
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 739891b94136..ff310613ae64 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -137,6 +137,8 @@
#define SO_RCVMARK 75
+#define SO_PASSPIDFD 76
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 18f3d95ecfec..762dcb80e4ec 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -148,6 +148,8 @@
#define SO_RCVMARK 75
+#define SO_PASSPIDFD 76
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index f486d3dfb6bb..df16a3e16d64 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -129,6 +129,8 @@
#define SO_RCVMARK 0x4049
+#define SO_PASSPIDFD 0x404A
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 2fda57a3ea86..6e2847804fea 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -130,6 +130,8 @@
#define SO_RCVMARK 0x0054
+#define SO_PASSPIDFD 0x0055
+
#if !defined(__KERNEL__)
diff --git a/include/linux/net.h b/include/linux/net.h
index b73ad8e3c212..c234dfbe7a30 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -43,6 +43,7 @@ struct net;
#define SOCK_PASSSEC 4
#define SOCK_SUPPORT_ZC 5
#define SOCK_CUSTOM_SOCKOPT 6
+#define SOCK_PASSPIDFD 7
#ifndef ARCH_HAS_SOCKET_TYPES
/**
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 13c3a237b9c9..6bf90f251910 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
#define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
#define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
#define SCM_SECURITY 0x03 /* rw: security label */
+#define SCM_PIDFD 0x04 /* ro: pidfd (int) */
struct ucred {
__u32 pid;
diff --git a/include/net/scm.h b/include/net/scm.h
index 585adc1346bd..c67f765a165b 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock)
}
#endif /* CONFIG_SECURITY_NETWORK */
+static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
+{
+ struct file *pidfd_file = NULL;
+ int pidfd;
+
+ /*
+ * put_cmsg() doesn't return an error if CMSG is truncated,
+ * that's why we need to opencode these checks here.
+ */
+ if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
+ (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
+ msg->msg_flags |= MSG_CTRUNC;
+ return;
+ }
+
+ WARN_ON_ONCE(!scm->pid);
+ pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
+
+ if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
+ if (pidfd_file) {
+ put_unused_fd(pidfd);
+ fput(pidfd_file);
+ }
+
+ return;
+ }
+
+ if (pidfd_file)
+ fd_install(pidfd, pidfd_file);
+}
+
static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
struct scm_cookie *scm, int flags)
{
if (!msg->msg_control) {
- if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp ||
- scm_has_secdata(sock))
+ if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags) ||
+ scm->fp || scm_has_secdata(sock))
msg->msg_flags |= MSG_CTRUNC;
scm_destroy(scm);
return;
@@ -141,6 +173,9 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds);
}
+ if (test_bit(SOCK_PASSPIDFD, &sock->flags))
+ scm_pidfd_recv(msg, scm);
+
scm_destroy_cred(scm);
scm_passec(sock, msg, scm);
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 638230899e98..b76169fdb80b 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -132,6 +132,8 @@
#define SO_RCVMARK 75
+#define SO_PASSPIDFD 76
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/net/core/sock.c b/net/core/sock.c
index c25888795390..3f974246ba3e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1246,6 +1246,13 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
clear_bit(SOCK_PASSCRED, &sock->flags);
break;
+ case SO_PASSPIDFD:
+ if (valbool)
+ set_bit(SOCK_PASSPIDFD, &sock->flags);
+ else
+ clear_bit(SOCK_PASSPIDFD, &sock->flags);
+ break;
+
case SO_TIMESTAMP_OLD:
case SO_TIMESTAMP_NEW:
case SO_TIMESTAMPNS_OLD:
@@ -1737,6 +1744,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
break;
+ case SO_PASSPIDFD:
+ v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
+ break;
+
case SO_PEERCRED:
{
struct ucred peercred;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index b655cebda0f3..67be0558862f 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -355,6 +355,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BROADCAST:
case SO_BSDCOMPAT:
case SO_PASSCRED:
+ case SO_PASSPIDFD:
case SO_PASSSEC:
case SO_RXQ_OVFL:
case SO_WIFI_STATUS:
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index fb31e8a4409e..6d5dff4dfe83 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1361,7 +1361,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
if (err)
goto out;
- if (test_bit(SOCK_PASSCRED, &sock->flags) &&
+ if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
!unix_sk(sk)->addr) {
err = unix_autobind(sk);
if (err)
@@ -1469,7 +1470,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (err)
goto out;
- if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
+ if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -1670,6 +1672,8 @@ static void unix_sock_inherit_flags(const struct socket *old,
{
if (test_bit(SOCK_PASSCRED, &old->flags))
set_bit(SOCK_PASSCRED, &new->flags);
+ if (test_bit(SOCK_PASSPIDFD, &old->flags))
+ set_bit(SOCK_PASSPIDFD, &new->flags);
if (test_bit(SOCK_PASSSEC, &old->flags))
set_bit(SOCK_PASSSEC, &new->flags);
}
@@ -1819,8 +1823,10 @@ static bool unix_passcred_enabled(const struct socket *sock,
const struct sock *other)
{
return test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags) ||
!other->sk_socket ||
- test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+ test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
+ test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
}
/*
@@ -1922,7 +1928,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
goto out;
}
- if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
+ if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -2824,7 +2831,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
/* Never glue messages from different writers */
if (!unix_skb_scm_eq(skb, &scm))
break;
- } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
+ } else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) {
/* Copy credentials */
scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
unix_set_secdata(&scm, skb);
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index 8756df13be50..fbbc4bf53ee3 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -121,6 +121,8 @@
#define SO_RCVMARK 75
+#define SO_PASSPIDFD 76
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
--
2.34.1
On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote:
> Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
> but it contains pidfd instead of plain pid, which allows programmers not
> to care about PID reuse problem.
>
> Idea comes from UAPI kernel group:
> https://uapi-group.org/kernel-features/
>
> Big thanks to Christian Brauner and Lennart Poettering for productive
> discussions about this.
>
> Cc: "David S. Miller" <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> Cc: Jakub Kicinski <[email protected]>
> Cc: Paolo Abeni <[email protected]>
> Cc: Leon Romanovsky <[email protected]>
> Cc: David Ahern <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Christian Brauner <[email protected]>
> Cc: Kuniyuki Iwashima <[email protected]>
> Cc: Lennart Poettering <[email protected]>
> Cc: Luca Boccassi <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Tested-by: Luca Boccassi <[email protected]>
> Reviewed-by: Kuniyuki Iwashima <[email protected]>
> Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> ---
> v4:
> - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian)
> v2:
> According to review comments from Kuniyuki Iwashima and Christian Brauner:
> - use pidfd_create(..) retval as a result
> - whitespace change
> ---
> arch/alpha/include/uapi/asm/socket.h | 2 ++
> arch/mips/include/uapi/asm/socket.h | 2 ++
> arch/parisc/include/uapi/asm/socket.h | 2 ++
> arch/sparc/include/uapi/asm/socket.h | 2 ++
> include/linux/net.h | 1 +
> include/linux/socket.h | 1 +
> include/net/scm.h | 39 +++++++++++++++++++++++--
> include/uapi/asm-generic/socket.h | 2 ++
> net/core/sock.c | 11 +++++++
> net/mptcp/sockopt.c | 1 +
> net/unix/af_unix.c | 18 ++++++++----
> tools/include/uapi/asm-generic/socket.h | 2 ++
> 12 files changed, 76 insertions(+), 7 deletions(-)
>
> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> index 739891b94136..ff310613ae64 100644
> --- a/arch/alpha/include/uapi/asm/socket.h
> +++ b/arch/alpha/include/uapi/asm/socket.h
> @@ -137,6 +137,8 @@
>
> #define SO_RCVMARK 75
>
> +#define SO_PASSPIDFD 76
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> index 18f3d95ecfec..762dcb80e4ec 100644
> --- a/arch/mips/include/uapi/asm/socket.h
> +++ b/arch/mips/include/uapi/asm/socket.h
> @@ -148,6 +148,8 @@
>
> #define SO_RCVMARK 75
>
> +#define SO_PASSPIDFD 76
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> index f486d3dfb6bb..df16a3e16d64 100644
> --- a/arch/parisc/include/uapi/asm/socket.h
> +++ b/arch/parisc/include/uapi/asm/socket.h
> @@ -129,6 +129,8 @@
>
> #define SO_RCVMARK 0x4049
>
> +#define SO_PASSPIDFD 0x404A
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> index 2fda57a3ea86..6e2847804fea 100644
> --- a/arch/sparc/include/uapi/asm/socket.h
> +++ b/arch/sparc/include/uapi/asm/socket.h
> @@ -130,6 +130,8 @@
>
> #define SO_RCVMARK 0x0054
>
> +#define SO_PASSPIDFD 0x0055
> +
> #if !defined(__KERNEL__)
>
>
> diff --git a/include/linux/net.h b/include/linux/net.h
> index b73ad8e3c212..c234dfbe7a30 100644
> --- a/include/linux/net.h
> +++ b/include/linux/net.h
> @@ -43,6 +43,7 @@ struct net;
> #define SOCK_PASSSEC 4
> #define SOCK_SUPPORT_ZC 5
> #define SOCK_CUSTOM_SOCKOPT 6
> +#define SOCK_PASSPIDFD 7
>
> #ifndef ARCH_HAS_SOCKET_TYPES
> /**
> diff --git a/include/linux/socket.h b/include/linux/socket.h
> index 13c3a237b9c9..6bf90f251910 100644
> --- a/include/linux/socket.h
> +++ b/include/linux/socket.h
> @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
> #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
> #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
> #define SCM_SECURITY 0x03 /* rw: security label */
> +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */
>
> struct ucred {
> __u32 pid;
> diff --git a/include/net/scm.h b/include/net/scm.h
> index 585adc1346bd..c67f765a165b 100644
> --- a/include/net/scm.h
> +++ b/include/net/scm.h
> @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock)
> }
> #endif /* CONFIG_SECURITY_NETWORK */
>
> +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
> +{
> + struct file *pidfd_file = NULL;
> + int pidfd;
> +
> + /*
> + * put_cmsg() doesn't return an error if CMSG is truncated,
> + * that's why we need to opencode these checks here.
> + */
> + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
> + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
> + msg->msg_flags |= MSG_CTRUNC;
> + return;
Hm, curious about this: We mark the message as truncated for SCM_PIDFD
but if the same conditions were to apply for SCM_PASSCRED we don't mark
the message as truncated. Am I reading this correct? And is so, you
please briefly explain this difference?
> + }
> +
> + WARN_ON_ONCE(!scm->pid);
> + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
> +
> + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
If the put_cmsg() of the pidfd fails userspace needs to be able to
detect this. Otherwise they can't distinguish between the SCM_PIDFD
value being zero because the put_cmsg() failed or put_cmsg() succeeded
and the allocated fd nr was 0.
Looking at put_cmsg() it looks to me that userspace will receive a
SCM_PIDFD message only if the put_cmsg() is completely successful. IIUC,
then this change is fine.
On Mon, Apr 17, 2023 at 5:18 PM Christian Brauner <[email protected]> wrote:
>
> On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote:
> > Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
> > but it contains pidfd instead of plain pid, which allows programmers not
> > to care about PID reuse problem.
> >
> > Idea comes from UAPI kernel group:
> > https://uapi-group.org/kernel-features/
> >
> > Big thanks to Christian Brauner and Lennart Poettering for productive
> > discussions about this.
> >
> > Cc: "David S. Miller" <[email protected]>
> > Cc: Eric Dumazet <[email protected]>
> > Cc: Jakub Kicinski <[email protected]>
> > Cc: Paolo Abeni <[email protected]>
> > Cc: Leon Romanovsky <[email protected]>
> > Cc: David Ahern <[email protected]>
> > Cc: Arnd Bergmann <[email protected]>
> > Cc: Kees Cook <[email protected]>
> > Cc: Christian Brauner <[email protected]>
> > Cc: Kuniyuki Iwashima <[email protected]>
> > Cc: Lennart Poettering <[email protected]>
> > Cc: Luca Boccassi <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Cc: [email protected]
> > Tested-by: Luca Boccassi <[email protected]>
> > Reviewed-by: Kuniyuki Iwashima <[email protected]>
> > Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> > ---
> > v4:
> > - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian)
> > v2:
> > According to review comments from Kuniyuki Iwashima and Christian Brauner:
> > - use pidfd_create(..) retval as a result
> > - whitespace change
> > ---
> > arch/alpha/include/uapi/asm/socket.h | 2 ++
> > arch/mips/include/uapi/asm/socket.h | 2 ++
> > arch/parisc/include/uapi/asm/socket.h | 2 ++
> > arch/sparc/include/uapi/asm/socket.h | 2 ++
> > include/linux/net.h | 1 +
> > include/linux/socket.h | 1 +
> > include/net/scm.h | 39 +++++++++++++++++++++++--
> > include/uapi/asm-generic/socket.h | 2 ++
> > net/core/sock.c | 11 +++++++
> > net/mptcp/sockopt.c | 1 +
> > net/unix/af_unix.c | 18 ++++++++----
> > tools/include/uapi/asm-generic/socket.h | 2 ++
> > 12 files changed, 76 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> > index 739891b94136..ff310613ae64 100644
> > --- a/arch/alpha/include/uapi/asm/socket.h
> > +++ b/arch/alpha/include/uapi/asm/socket.h
> > @@ -137,6 +137,8 @@
> >
> > #define SO_RCVMARK 75
> >
> > +#define SO_PASSPIDFD 76
> > +
> > #if !defined(__KERNEL__)
> >
> > #if __BITS_PER_LONG == 64
> > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> > index 18f3d95ecfec..762dcb80e4ec 100644
> > --- a/arch/mips/include/uapi/asm/socket.h
> > +++ b/arch/mips/include/uapi/asm/socket.h
> > @@ -148,6 +148,8 @@
> >
> > #define SO_RCVMARK 75
> >
> > +#define SO_PASSPIDFD 76
> > +
> > #if !defined(__KERNEL__)
> >
> > #if __BITS_PER_LONG == 64
> > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> > index f486d3dfb6bb..df16a3e16d64 100644
> > --- a/arch/parisc/include/uapi/asm/socket.h
> > +++ b/arch/parisc/include/uapi/asm/socket.h
> > @@ -129,6 +129,8 @@
> >
> > #define SO_RCVMARK 0x4049
> >
> > +#define SO_PASSPIDFD 0x404A
> > +
> > #if !defined(__KERNEL__)
> >
> > #if __BITS_PER_LONG == 64
> > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> > index 2fda57a3ea86..6e2847804fea 100644
> > --- a/arch/sparc/include/uapi/asm/socket.h
> > +++ b/arch/sparc/include/uapi/asm/socket.h
> > @@ -130,6 +130,8 @@
> >
> > #define SO_RCVMARK 0x0054
> >
> > +#define SO_PASSPIDFD 0x0055
> > +
> > #if !defined(__KERNEL__)
> >
> >
> > diff --git a/include/linux/net.h b/include/linux/net.h
> > index b73ad8e3c212..c234dfbe7a30 100644
> > --- a/include/linux/net.h
> > +++ b/include/linux/net.h
> > @@ -43,6 +43,7 @@ struct net;
> > #define SOCK_PASSSEC 4
> > #define SOCK_SUPPORT_ZC 5
> > #define SOCK_CUSTOM_SOCKOPT 6
> > +#define SOCK_PASSPIDFD 7
> >
> > #ifndef ARCH_HAS_SOCKET_TYPES
> > /**
> > diff --git a/include/linux/socket.h b/include/linux/socket.h
> > index 13c3a237b9c9..6bf90f251910 100644
> > --- a/include/linux/socket.h
> > +++ b/include/linux/socket.h
> > @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
> > #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
> > #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
> > #define SCM_SECURITY 0x03 /* rw: security label */
> > +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */
> >
> > struct ucred {
> > __u32 pid;
> > diff --git a/include/net/scm.h b/include/net/scm.h
> > index 585adc1346bd..c67f765a165b 100644
> > --- a/include/net/scm.h
> > +++ b/include/net/scm.h
> > @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock)
> > }
> > #endif /* CONFIG_SECURITY_NETWORK */
> >
> > +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
> > +{
> > + struct file *pidfd_file = NULL;
> > + int pidfd;
> > +
> > + /*
> > + * put_cmsg() doesn't return an error if CMSG is truncated,
> > + * that's why we need to opencode these checks here.
> > + */
> > + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
> > + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
> > + msg->msg_flags |= MSG_CTRUNC;
> > + return;
>
> Hm, curious about this: We mark the message as truncated for SCM_PIDFD
> but if the same conditions were to apply for SCM_PASSCRED we don't mark
> the message as truncated. Am I reading this correct? And is so, you
> please briefly explain this difference?
Hi, Christian!
For SCM_CREDENTIALS we mark it too. Inside the put_cmsg function:
https://github.com/torvalds/linux/blob/6a8f57ae2eb07ab39a6f0ccad60c760743051026/net/core/scm.c#L225
The reason why I'm open-coding these checks is that I want to know
that the message
doesn't fit into the userspace buffer before doing pidfd_prepare and
other stuff and because
put_cmsg is not returning an error when message doesn't fit in the
userspace buffer and
we won't be able to properly do pidfd cleanup (put struct pid and fd index).
>
> > + }
> > +
> > + WARN_ON_ONCE(!scm->pid);
> > + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
> > +
> > + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
>
> If the put_cmsg() of the pidfd fails userspace needs to be able to
> detect this. Otherwise they can't distinguish between the SCM_PIDFD
> value being zero because the put_cmsg() failed or put_cmsg() succeeded
> and the allocated fd nr was 0.
If pidfd_prepare fails then userspace will receive SCM_PIDFD message
with negative pidfd value.
>
> Looking at put_cmsg() it looks to me that userspace will receive a
> SCM_PIDFD message only if the put_cmsg() is completely successful. IIUC,
> then this change is fine.
Kind regards,
Alex
On Mon, Apr 17, 2023 at 06:01:16PM +0200, Aleksandr Mikhalitsyn wrote:
> On Mon, Apr 17, 2023 at 5:18 PM Christian Brauner <[email protected]> wrote:
> >
> > On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote:
> > > Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
> > > but it contains pidfd instead of plain pid, which allows programmers not
> > > to care about PID reuse problem.
> > >
> > > Idea comes from UAPI kernel group:
> > > https://uapi-group.org/kernel-features/
> > >
> > > Big thanks to Christian Brauner and Lennart Poettering for productive
> > > discussions about this.
> > >
> > > Cc: "David S. Miller" <[email protected]>
> > > Cc: Eric Dumazet <[email protected]>
> > > Cc: Jakub Kicinski <[email protected]>
> > > Cc: Paolo Abeni <[email protected]>
> > > Cc: Leon Romanovsky <[email protected]>
> > > Cc: David Ahern <[email protected]>
> > > Cc: Arnd Bergmann <[email protected]>
> > > Cc: Kees Cook <[email protected]>
> > > Cc: Christian Brauner <[email protected]>
> > > Cc: Kuniyuki Iwashima <[email protected]>
> > > Cc: Lennart Poettering <[email protected]>
> > > Cc: Luca Boccassi <[email protected]>
> > > Cc: [email protected]
> > > Cc: [email protected]
> > > Cc: [email protected]
> > > Tested-by: Luca Boccassi <[email protected]>
> > > Reviewed-by: Kuniyuki Iwashima <[email protected]>
> > > Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> > > ---
> > > v4:
> > > - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian)
> > > v2:
> > > According to review comments from Kuniyuki Iwashima and Christian Brauner:
> > > - use pidfd_create(..) retval as a result
> > > - whitespace change
> > > ---
> > > arch/alpha/include/uapi/asm/socket.h | 2 ++
> > > arch/mips/include/uapi/asm/socket.h | 2 ++
> > > arch/parisc/include/uapi/asm/socket.h | 2 ++
> > > arch/sparc/include/uapi/asm/socket.h | 2 ++
> > > include/linux/net.h | 1 +
> > > include/linux/socket.h | 1 +
> > > include/net/scm.h | 39 +++++++++++++++++++++++--
> > > include/uapi/asm-generic/socket.h | 2 ++
> > > net/core/sock.c | 11 +++++++
> > > net/mptcp/sockopt.c | 1 +
> > > net/unix/af_unix.c | 18 ++++++++----
> > > tools/include/uapi/asm-generic/socket.h | 2 ++
> > > 12 files changed, 76 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> > > index 739891b94136..ff310613ae64 100644
> > > --- a/arch/alpha/include/uapi/asm/socket.h
> > > +++ b/arch/alpha/include/uapi/asm/socket.h
> > > @@ -137,6 +137,8 @@
> > >
> > > #define SO_RCVMARK 75
> > >
> > > +#define SO_PASSPIDFD 76
> > > +
> > > #if !defined(__KERNEL__)
> > >
> > > #if __BITS_PER_LONG == 64
> > > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> > > index 18f3d95ecfec..762dcb80e4ec 100644
> > > --- a/arch/mips/include/uapi/asm/socket.h
> > > +++ b/arch/mips/include/uapi/asm/socket.h
> > > @@ -148,6 +148,8 @@
> > >
> > > #define SO_RCVMARK 75
> > >
> > > +#define SO_PASSPIDFD 76
> > > +
> > > #if !defined(__KERNEL__)
> > >
> > > #if __BITS_PER_LONG == 64
> > > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> > > index f486d3dfb6bb..df16a3e16d64 100644
> > > --- a/arch/parisc/include/uapi/asm/socket.h
> > > +++ b/arch/parisc/include/uapi/asm/socket.h
> > > @@ -129,6 +129,8 @@
> > >
> > > #define SO_RCVMARK 0x4049
> > >
> > > +#define SO_PASSPIDFD 0x404A
> > > +
> > > #if !defined(__KERNEL__)
> > >
> > > #if __BITS_PER_LONG == 64
> > > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> > > index 2fda57a3ea86..6e2847804fea 100644
> > > --- a/arch/sparc/include/uapi/asm/socket.h
> > > +++ b/arch/sparc/include/uapi/asm/socket.h
> > > @@ -130,6 +130,8 @@
> > >
> > > #define SO_RCVMARK 0x0054
> > >
> > > +#define SO_PASSPIDFD 0x0055
> > > +
> > > #if !defined(__KERNEL__)
> > >
> > >
> > > diff --git a/include/linux/net.h b/include/linux/net.h
> > > index b73ad8e3c212..c234dfbe7a30 100644
> > > --- a/include/linux/net.h
> > > +++ b/include/linux/net.h
> > > @@ -43,6 +43,7 @@ struct net;
> > > #define SOCK_PASSSEC 4
> > > #define SOCK_SUPPORT_ZC 5
> > > #define SOCK_CUSTOM_SOCKOPT 6
> > > +#define SOCK_PASSPIDFD 7
> > >
> > > #ifndef ARCH_HAS_SOCKET_TYPES
> > > /**
> > > diff --git a/include/linux/socket.h b/include/linux/socket.h
> > > index 13c3a237b9c9..6bf90f251910 100644
> > > --- a/include/linux/socket.h
> > > +++ b/include/linux/socket.h
> > > @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
> > > #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
> > > #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
> > > #define SCM_SECURITY 0x03 /* rw: security label */
> > > +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */
> > >
> > > struct ucred {
> > > __u32 pid;
> > > diff --git a/include/net/scm.h b/include/net/scm.h
> > > index 585adc1346bd..c67f765a165b 100644
> > > --- a/include/net/scm.h
> > > +++ b/include/net/scm.h
> > > @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock)
> > > }
> > > #endif /* CONFIG_SECURITY_NETWORK */
> > >
> > > +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
> > > +{
> > > + struct file *pidfd_file = NULL;
> > > + int pidfd;
> > > +
> > > + /*
> > > + * put_cmsg() doesn't return an error if CMSG is truncated,
> > > + * that's why we need to opencode these checks here.
> > > + */
> > > + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
> > > + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
> > > + msg->msg_flags |= MSG_CTRUNC;
> > > + return;
> >
> > Hm, curious about this: We mark the message as truncated for SCM_PIDFD
> > but if the same conditions were to apply for SCM_PASSCRED we don't mark
> > the message as truncated. Am I reading this correct? And is so, you
> > please briefly explain this difference?
>
> Hi, Christian!
>
> For SCM_CREDENTIALS we mark it too. Inside the put_cmsg function:
> https://github.com/torvalds/linux/blob/6a8f57ae2eb07ab39a6f0ccad60c760743051026/net/core/scm.c#L225
>
> The reason why I'm open-coding these checks is that I want to know
> that the message
> doesn't fit into the userspace buffer before doing pidfd_prepare and
> other stuff and because
> put_cmsg is not returning an error when message doesn't fit in the
> userspace buffer and
> we won't be able to properly do pidfd cleanup (put struct pid and fd index).
>
> >
> > > + }
> > > +
> > > + WARN_ON_ONCE(!scm->pid);
> > > + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
> > > +
> > > + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
> >
> > If the put_cmsg() of the pidfd fails userspace needs to be able to
> > detect this. Otherwise they can't distinguish between the SCM_PIDFD
> > value being zero because the put_cmsg() failed or put_cmsg() succeeded
> > and the allocated fd nr was 0.
>
> If pidfd_prepare fails then userspace will receive SCM_PIDFD message
> with negative pidfd value.
So we discussed this a bit offline and I think there's still an issue.
If put_cmsg() fails
if (msg->msg_control_is_user) {
struct cmsghdr __user *cm = msg->msg_control_user;
check_object_size(data, cmlen - sizeof(*cm), true);
if (!user_write_access_begin(cm, cmlen))
goto efault;
// This succeeds so cm->cmsg_len == sizeof(int)
unsafe_put_user(cmlen, &cm->cmsg_len, efault_end);
// This succeeds so cm->cmsg_level == SOL_SOCKET
unsafe_put_user(level, &cm->cmsg_level, efault_end);
// This succeeds so cm->cmsg_type == SCM_PIDFD
unsafe_put_user(type, &cm->cmsg_type, efault_end);
// This fails and leaves all bits set to 0
unsafe_copy_to_user(CMSG_USER_DATA(cm), data,
cmlen - sizeof(*cm), efault_end);
user_write_access_end();
so now we hit
if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
if (pidfd_file) {
put_unused_fd(pidfd);
fput(pidfd_file);
}
return;
}
and return early. Afaict, userspace would now receive:
if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(int)) &&
cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_PIDFD) {
memcpy(&pidfd, CMSG_DATA(cmsg), sizeof(int));
// pidfd is now 0 which is a valid fd number
// it'll likely refer to /dev/stdin or whatever and so
// will fail or, worst case, 0 refers to another pidfd :)
pidfd_send_signal(pidfd, SIGKILL);
so we need to address this. So one way I think that would solve this is:
diff --git a/net/core/scm.c b/net/core/scm.c
index 3cd7dd377e53..d1f4cd135c5a 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -236,9 +236,9 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
unsafe_put_user(cmlen, &cm->cmsg_len, efault_end);
unsafe_put_user(level, &cm->cmsg_level, efault_end);
- unsafe_put_user(type, &cm->cmsg_type, efault_end);
unsafe_copy_to_user(CMSG_USER_DATA(cm), data,
cmlen - sizeof(*cm), efault_end);
+ unsafe_put_user(type, &cm->cmsg_type, efault_end);
user_write_access_end();
} else {
struct cmsghdr *cm = msg->msg_control;
such that we only copy cm->cmsg_type after we transfered the data.
On Mon, Apr 17, 2023 at 7:16 PM Christian Brauner <[email protected]> wrote:
>
> On Mon, Apr 17, 2023 at 06:01:16PM +0200, Aleksandr Mikhalitsyn wrote:
> > On Mon, Apr 17, 2023 at 5:18 PM Christian Brauner <[email protected]> wrote:
> > >
> > > On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote:
> > > > Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
> > > > but it contains pidfd instead of plain pid, which allows programmers not
> > > > to care about PID reuse problem.
> > > >
> > > > Idea comes from UAPI kernel group:
> > > > https://uapi-group.org/kernel-features/
> > > >
> > > > Big thanks to Christian Brauner and Lennart Poettering for productive
> > > > discussions about this.
> > > >
> > > > Cc: "David S. Miller" <[email protected]>
> > > > Cc: Eric Dumazet <[email protected]>
> > > > Cc: Jakub Kicinski <[email protected]>
> > > > Cc: Paolo Abeni <[email protected]>
> > > > Cc: Leon Romanovsky <[email protected]>
> > > > Cc: David Ahern <[email protected]>
> > > > Cc: Arnd Bergmann <[email protected]>
> > > > Cc: Kees Cook <[email protected]>
> > > > Cc: Christian Brauner <[email protected]>
> > > > Cc: Kuniyuki Iwashima <[email protected]>
> > > > Cc: Lennart Poettering <[email protected]>
> > > > Cc: Luca Boccassi <[email protected]>
> > > > Cc: [email protected]
> > > > Cc: [email protected]
> > > > Cc: [email protected]
> > > > Tested-by: Luca Boccassi <[email protected]>
> > > > Reviewed-by: Kuniyuki Iwashima <[email protected]>
> > > > Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> > > > ---
> > > > v4:
> > > > - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian)
> > > > v2:
> > > > According to review comments from Kuniyuki Iwashima and Christian Brauner:
> > > > - use pidfd_create(..) retval as a result
> > > > - whitespace change
> > > > ---
> > > > arch/alpha/include/uapi/asm/socket.h | 2 ++
> > > > arch/mips/include/uapi/asm/socket.h | 2 ++
> > > > arch/parisc/include/uapi/asm/socket.h | 2 ++
> > > > arch/sparc/include/uapi/asm/socket.h | 2 ++
> > > > include/linux/net.h | 1 +
> > > > include/linux/socket.h | 1 +
> > > > include/net/scm.h | 39 +++++++++++++++++++++++--
> > > > include/uapi/asm-generic/socket.h | 2 ++
> > > > net/core/sock.c | 11 +++++++
> > > > net/mptcp/sockopt.c | 1 +
> > > > net/unix/af_unix.c | 18 ++++++++----
> > > > tools/include/uapi/asm-generic/socket.h | 2 ++
> > > > 12 files changed, 76 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> > > > index 739891b94136..ff310613ae64 100644
> > > > --- a/arch/alpha/include/uapi/asm/socket.h
> > > > +++ b/arch/alpha/include/uapi/asm/socket.h
> > > > @@ -137,6 +137,8 @@
> > > >
> > > > #define SO_RCVMARK 75
> > > >
> > > > +#define SO_PASSPIDFD 76
> > > > +
> > > > #if !defined(__KERNEL__)
> > > >
> > > > #if __BITS_PER_LONG == 64
> > > > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> > > > index 18f3d95ecfec..762dcb80e4ec 100644
> > > > --- a/arch/mips/include/uapi/asm/socket.h
> > > > +++ b/arch/mips/include/uapi/asm/socket.h
> > > > @@ -148,6 +148,8 @@
> > > >
> > > > #define SO_RCVMARK 75
> > > >
> > > > +#define SO_PASSPIDFD 76
> > > > +
> > > > #if !defined(__KERNEL__)
> > > >
> > > > #if __BITS_PER_LONG == 64
> > > > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> > > > index f486d3dfb6bb..df16a3e16d64 100644
> > > > --- a/arch/parisc/include/uapi/asm/socket.h
> > > > +++ b/arch/parisc/include/uapi/asm/socket.h
> > > > @@ -129,6 +129,8 @@
> > > >
> > > > #define SO_RCVMARK 0x4049
> > > >
> > > > +#define SO_PASSPIDFD 0x404A
> > > > +
> > > > #if !defined(__KERNEL__)
> > > >
> > > > #if __BITS_PER_LONG == 64
> > > > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> > > > index 2fda57a3ea86..6e2847804fea 100644
> > > > --- a/arch/sparc/include/uapi/asm/socket.h
> > > > +++ b/arch/sparc/include/uapi/asm/socket.h
> > > > @@ -130,6 +130,8 @@
> > > >
> > > > #define SO_RCVMARK 0x0054
> > > >
> > > > +#define SO_PASSPIDFD 0x0055
> > > > +
> > > > #if !defined(__KERNEL__)
> > > >
> > > >
> > > > diff --git a/include/linux/net.h b/include/linux/net.h
> > > > index b73ad8e3c212..c234dfbe7a30 100644
> > > > --- a/include/linux/net.h
> > > > +++ b/include/linux/net.h
> > > > @@ -43,6 +43,7 @@ struct net;
> > > > #define SOCK_PASSSEC 4
> > > > #define SOCK_SUPPORT_ZC 5
> > > > #define SOCK_CUSTOM_SOCKOPT 6
> > > > +#define SOCK_PASSPIDFD 7
> > > >
> > > > #ifndef ARCH_HAS_SOCKET_TYPES
> > > > /**
> > > > diff --git a/include/linux/socket.h b/include/linux/socket.h
> > > > index 13c3a237b9c9..6bf90f251910 100644
> > > > --- a/include/linux/socket.h
> > > > +++ b/include/linux/socket.h
> > > > @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
> > > > #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
> > > > #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
> > > > #define SCM_SECURITY 0x03 /* rw: security label */
> > > > +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */
> > > >
> > > > struct ucred {
> > > > __u32 pid;
> > > > diff --git a/include/net/scm.h b/include/net/scm.h
> > > > index 585adc1346bd..c67f765a165b 100644
> > > > --- a/include/net/scm.h
> > > > +++ b/include/net/scm.h
> > > > @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock)
> > > > }
> > > > #endif /* CONFIG_SECURITY_NETWORK */
> > > >
> > > > +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
> > > > +{
> > > > + struct file *pidfd_file = NULL;
> > > > + int pidfd;
> > > > +
> > > > + /*
> > > > + * put_cmsg() doesn't return an error if CMSG is truncated,
> > > > + * that's why we need to opencode these checks here.
> > > > + */
> > > > + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
> > > > + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
> > > > + msg->msg_flags |= MSG_CTRUNC;
> > > > + return;
> > >
> > > Hm, curious about this: We mark the message as truncated for SCM_PIDFD
> > > but if the same conditions were to apply for SCM_PASSCRED we don't mark
> > > the message as truncated. Am I reading this correct? And is so, you
> > > please briefly explain this difference?
> >
> > Hi, Christian!
> >
> > For SCM_CREDENTIALS we mark it too. Inside the put_cmsg function:
> > https://github.com/torvalds/linux/blob/6a8f57ae2eb07ab39a6f0ccad60c760743051026/net/core/scm.c#L225
> >
> > The reason why I'm open-coding these checks is that I want to know
> > that the message
> > doesn't fit into the userspace buffer before doing pidfd_prepare and
> > other stuff and because
> > put_cmsg is not returning an error when message doesn't fit in the
> > userspace buffer and
> > we won't be able to properly do pidfd cleanup (put struct pid and fd index).
> >
> > >
> > > > + }
> > > > +
> > > > + WARN_ON_ONCE(!scm->pid);
> > > > + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
> > > > +
> > > > + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
> > >
> > > If the put_cmsg() of the pidfd fails userspace needs to be able to
> > > detect this. Otherwise they can't distinguish between the SCM_PIDFD
> > > value being zero because the put_cmsg() failed or put_cmsg() succeeded
> > > and the allocated fd nr was 0.
> >
> > If pidfd_prepare fails then userspace will receive SCM_PIDFD message
> > with negative pidfd value.
>
> So we discussed this a bit offline and I think there's still an issue.
> If put_cmsg() fails
>
> if (msg->msg_control_is_user) {
> struct cmsghdr __user *cm = msg->msg_control_user;
>
> check_object_size(data, cmlen - sizeof(*cm), true);
>
> if (!user_write_access_begin(cm, cmlen))
> goto efault;
>
> // This succeeds so cm->cmsg_len == sizeof(int)
> unsafe_put_user(cmlen, &cm->cmsg_len, efault_end);
>
> // This succeeds so cm->cmsg_level == SOL_SOCKET
> unsafe_put_user(level, &cm->cmsg_level, efault_end);
>
> // This succeeds so cm->cmsg_type == SCM_PIDFD
> unsafe_put_user(type, &cm->cmsg_type, efault_end);
>
> // This fails and leaves all bits set to 0
> unsafe_copy_to_user(CMSG_USER_DATA(cm), data,
> cmlen - sizeof(*cm), efault_end);
> user_write_access_end();
>
> so now we hit
>
> if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
> if (pidfd_file) {
> put_unused_fd(pidfd);
> fput(pidfd_file);
> }
>
> return;
> }
>
> and return early. Afaict, userspace would now receive:
>
> if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(int)) &&
> cmsg->cmsg_level == SOL_SOCKET &&
> cmsg->cmsg_type == SCM_PIDFD) {
> memcpy(&pidfd, CMSG_DATA(cmsg), sizeof(int));
>
> // pidfd is now 0 which is a valid fd number
> // it'll likely refer to /dev/stdin or whatever and so
> // will fail or, worst case, 0 refers to another pidfd :)
> pidfd_send_signal(pidfd, SIGKILL);
>
> so we need to address this. So one way I think that would solve this is:
>
> diff --git a/net/core/scm.c b/net/core/scm.c
> index 3cd7dd377e53..d1f4cd135c5a 100644
> --- a/net/core/scm.c
> +++ b/net/core/scm.c
> @@ -236,9 +236,9 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
>
> unsafe_put_user(cmlen, &cm->cmsg_len, efault_end);
> unsafe_put_user(level, &cm->cmsg_level, efault_end);
> - unsafe_put_user(type, &cm->cmsg_type, efault_end);
> unsafe_copy_to_user(CMSG_USER_DATA(cm), data,
> cmlen - sizeof(*cm), efault_end);
> + unsafe_put_user(type, &cm->cmsg_type, efault_end);
> user_write_access_end();
> } else {
> struct cmsghdr *cm = msg->msg_control;
>
> such that we only copy cm->cmsg_type after we transfered the data.
This looks wrong to me.
if put_cmsg() returns -EFAULT, then msg->msg_control and
msg->msg_controllen were not changed.
So the user application should not attempt to read this part of the
control buffer, this could contain garbage.
On Mon, Apr 17, 2023 at 07:43:19PM +0200, Eric Dumazet wrote:
> On Mon, Apr 17, 2023 at 7:16 PM Christian Brauner <[email protected]> wrote:
> >
> > On Mon, Apr 17, 2023 at 06:01:16PM +0200, Aleksandr Mikhalitsyn wrote:
> > > On Mon, Apr 17, 2023 at 5:18 PM Christian Brauner <[email protected]> wrote:
> > > >
> > > > On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote:
> > > > > Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
> > > > > but it contains pidfd instead of plain pid, which allows programmers not
> > > > > to care about PID reuse problem.
> > > > >
> > > > > Idea comes from UAPI kernel group:
> > > > > https://uapi-group.org/kernel-features/
> > > > >
> > > > > Big thanks to Christian Brauner and Lennart Poettering for productive
> > > > > discussions about this.
> > > > >
> > > > > Cc: "David S. Miller" <[email protected]>
> > > > > Cc: Eric Dumazet <[email protected]>
> > > > > Cc: Jakub Kicinski <[email protected]>
> > > > > Cc: Paolo Abeni <[email protected]>
> > > > > Cc: Leon Romanovsky <[email protected]>
> > > > > Cc: David Ahern <[email protected]>
> > > > > Cc: Arnd Bergmann <[email protected]>
> > > > > Cc: Kees Cook <[email protected]>
> > > > > Cc: Christian Brauner <[email protected]>
> > > > > Cc: Kuniyuki Iwashima <[email protected]>
> > > > > Cc: Lennart Poettering <[email protected]>
> > > > > Cc: Luca Boccassi <[email protected]>
> > > > > Cc: [email protected]
> > > > > Cc: [email protected]
> > > > > Cc: [email protected]
> > > > > Tested-by: Luca Boccassi <[email protected]>
> > > > > Reviewed-by: Kuniyuki Iwashima <[email protected]>
> > > > > Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> > > > > ---
> > > > > v4:
> > > > > - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian)
> > > > > v2:
> > > > > According to review comments from Kuniyuki Iwashima and Christian Brauner:
> > > > > - use pidfd_create(..) retval as a result
> > > > > - whitespace change
> > > > > ---
> > > > > arch/alpha/include/uapi/asm/socket.h | 2 ++
> > > > > arch/mips/include/uapi/asm/socket.h | 2 ++
> > > > > arch/parisc/include/uapi/asm/socket.h | 2 ++
> > > > > arch/sparc/include/uapi/asm/socket.h | 2 ++
> > > > > include/linux/net.h | 1 +
> > > > > include/linux/socket.h | 1 +
> > > > > include/net/scm.h | 39 +++++++++++++++++++++++--
> > > > > include/uapi/asm-generic/socket.h | 2 ++
> > > > > net/core/sock.c | 11 +++++++
> > > > > net/mptcp/sockopt.c | 1 +
> > > > > net/unix/af_unix.c | 18 ++++++++----
> > > > > tools/include/uapi/asm-generic/socket.h | 2 ++
> > > > > 12 files changed, 76 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> > > > > index 739891b94136..ff310613ae64 100644
> > > > > --- a/arch/alpha/include/uapi/asm/socket.h
> > > > > +++ b/arch/alpha/include/uapi/asm/socket.h
> > > > > @@ -137,6 +137,8 @@
> > > > >
> > > > > #define SO_RCVMARK 75
> > > > >
> > > > > +#define SO_PASSPIDFD 76
> > > > > +
> > > > > #if !defined(__KERNEL__)
> > > > >
> > > > > #if __BITS_PER_LONG == 64
> > > > > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> > > > > index 18f3d95ecfec..762dcb80e4ec 100644
> > > > > --- a/arch/mips/include/uapi/asm/socket.h
> > > > > +++ b/arch/mips/include/uapi/asm/socket.h
> > > > > @@ -148,6 +148,8 @@
> > > > >
> > > > > #define SO_RCVMARK 75
> > > > >
> > > > > +#define SO_PASSPIDFD 76
> > > > > +
> > > > > #if !defined(__KERNEL__)
> > > > >
> > > > > #if __BITS_PER_LONG == 64
> > > > > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> > > > > index f486d3dfb6bb..df16a3e16d64 100644
> > > > > --- a/arch/parisc/include/uapi/asm/socket.h
> > > > > +++ b/arch/parisc/include/uapi/asm/socket.h
> > > > > @@ -129,6 +129,8 @@
> > > > >
> > > > > #define SO_RCVMARK 0x4049
> > > > >
> > > > > +#define SO_PASSPIDFD 0x404A
> > > > > +
> > > > > #if !defined(__KERNEL__)
> > > > >
> > > > > #if __BITS_PER_LONG == 64
> > > > > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> > > > > index 2fda57a3ea86..6e2847804fea 100644
> > > > > --- a/arch/sparc/include/uapi/asm/socket.h
> > > > > +++ b/arch/sparc/include/uapi/asm/socket.h
> > > > > @@ -130,6 +130,8 @@
> > > > >
> > > > > #define SO_RCVMARK 0x0054
> > > > >
> > > > > +#define SO_PASSPIDFD 0x0055
> > > > > +
> > > > > #if !defined(__KERNEL__)
> > > > >
> > > > >
> > > > > diff --git a/include/linux/net.h b/include/linux/net.h
> > > > > index b73ad8e3c212..c234dfbe7a30 100644
> > > > > --- a/include/linux/net.h
> > > > > +++ b/include/linux/net.h
> > > > > @@ -43,6 +43,7 @@ struct net;
> > > > > #define SOCK_PASSSEC 4
> > > > > #define SOCK_SUPPORT_ZC 5
> > > > > #define SOCK_CUSTOM_SOCKOPT 6
> > > > > +#define SOCK_PASSPIDFD 7
> > > > >
> > > > > #ifndef ARCH_HAS_SOCKET_TYPES
> > > > > /**
> > > > > diff --git a/include/linux/socket.h b/include/linux/socket.h
> > > > > index 13c3a237b9c9..6bf90f251910 100644
> > > > > --- a/include/linux/socket.h
> > > > > +++ b/include/linux/socket.h
> > > > > @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
> > > > > #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
> > > > > #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
> > > > > #define SCM_SECURITY 0x03 /* rw: security label */
> > > > > +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */
> > > > >
> > > > > struct ucred {
> > > > > __u32 pid;
> > > > > diff --git a/include/net/scm.h b/include/net/scm.h
> > > > > index 585adc1346bd..c67f765a165b 100644
> > > > > --- a/include/net/scm.h
> > > > > +++ b/include/net/scm.h
> > > > > @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock)
> > > > > }
> > > > > #endif /* CONFIG_SECURITY_NETWORK */
> > > > >
> > > > > +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
> > > > > +{
> > > > > + struct file *pidfd_file = NULL;
> > > > > + int pidfd;
> > > > > +
> > > > > + /*
> > > > > + * put_cmsg() doesn't return an error if CMSG is truncated,
> > > > > + * that's why we need to opencode these checks here.
> > > > > + */
> > > > > + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
> > > > > + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
> > > > > + msg->msg_flags |= MSG_CTRUNC;
> > > > > + return;
> > > >
> > > > Hm, curious about this: We mark the message as truncated for SCM_PIDFD
> > > > but if the same conditions were to apply for SCM_PASSCRED we don't mark
> > > > the message as truncated. Am I reading this correct? And is so, you
> > > > please briefly explain this difference?
> > >
> > > Hi, Christian!
> > >
> > > For SCM_CREDENTIALS we mark it too. Inside the put_cmsg function:
> > > https://github.com/torvalds/linux/blob/6a8f57ae2eb07ab39a6f0ccad60c760743051026/net/core/scm.c#L225
> > >
> > > The reason why I'm open-coding these checks is that I want to know
> > > that the message
> > > doesn't fit into the userspace buffer before doing pidfd_prepare and
> > > other stuff and because
> > > put_cmsg is not returning an error when message doesn't fit in the
> > > userspace buffer and
> > > we won't be able to properly do pidfd cleanup (put struct pid and fd index).
> > >
> > > >
> > > > > + }
> > > > > +
> > > > > + WARN_ON_ONCE(!scm->pid);
> > > > > + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
> > > > > +
> > > > > + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
> > > >
> > > > If the put_cmsg() of the pidfd fails userspace needs to be able to
> > > > detect this. Otherwise they can't distinguish between the SCM_PIDFD
> > > > value being zero because the put_cmsg() failed or put_cmsg() succeeded
> > > > and the allocated fd nr was 0.
> > >
> > > If pidfd_prepare fails then userspace will receive SCM_PIDFD message
> > > with negative pidfd value.
> >
> > So we discussed this a bit offline and I think there's still an issue.
> > If put_cmsg() fails
> >
> > if (msg->msg_control_is_user) {
> > struct cmsghdr __user *cm = msg->msg_control_user;
> >
> > check_object_size(data, cmlen - sizeof(*cm), true);
> >
> > if (!user_write_access_begin(cm, cmlen))
> > goto efault;
> >
> > // This succeeds so cm->cmsg_len == sizeof(int)
> > unsafe_put_user(cmlen, &cm->cmsg_len, efault_end);
> >
> > // This succeeds so cm->cmsg_level == SOL_SOCKET
> > unsafe_put_user(level, &cm->cmsg_level, efault_end);
> >
> > // This succeeds so cm->cmsg_type == SCM_PIDFD
> > unsafe_put_user(type, &cm->cmsg_type, efault_end);
> >
> > // This fails and leaves all bits set to 0
> > unsafe_copy_to_user(CMSG_USER_DATA(cm), data,
> > cmlen - sizeof(*cm), efault_end);
> > user_write_access_end();
> >
> > so now we hit
> >
> > if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
> > if (pidfd_file) {
> > put_unused_fd(pidfd);
> > fput(pidfd_file);
> > }
> >
> > return;
> > }
> >
> > and return early. Afaict, userspace would now receive:
> >
> > if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(int)) &&
> > cmsg->cmsg_level == SOL_SOCKET &&
> > cmsg->cmsg_type == SCM_PIDFD) {
> > memcpy(&pidfd, CMSG_DATA(cmsg), sizeof(int));
> >
> > // pidfd is now 0 which is a valid fd number
> > // it'll likely refer to /dev/stdin or whatever and so
> > // will fail or, worst case, 0 refers to another pidfd :)
> > pidfd_send_signal(pidfd, SIGKILL);
> >
> > so we need to address this. So one way I think that would solve this is:
> >
> > diff --git a/net/core/scm.c b/net/core/scm.c
> > index 3cd7dd377e53..d1f4cd135c5a 100644
> > --- a/net/core/scm.c
> > +++ b/net/core/scm.c
> > @@ -236,9 +236,9 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
> >
> > unsafe_put_user(cmlen, &cm->cmsg_len, efault_end);
> > unsafe_put_user(level, &cm->cmsg_level, efault_end);
> > - unsafe_put_user(type, &cm->cmsg_type, efault_end);
> > unsafe_copy_to_user(CMSG_USER_DATA(cm), data,
> > cmlen - sizeof(*cm), efault_end);
> > + unsafe_put_user(type, &cm->cmsg_type, efault_end);
> > user_write_access_end();
> > } else {
> > struct cmsghdr *cm = msg->msg_control;
> >
> > such that we only copy cm->cmsg_type after we transfered the data.
>
> This looks wrong to me.
>
> if put_cmsg() returns -EFAULT, then msg->msg_control and
> msg->msg_controllen were not changed.
>
> So the user application should not attempt to read this part of the
> control buffer, this could contain garbage.
Thanks for the review, Eric. That's reassuring.
I've done a bit of container related networking before but I'm fumbling
my way through the reviews here. So any additional reviews here would be
very helpful.
On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote:
> Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
> but it contains pidfd instead of plain pid, which allows programmers not
> to care about PID reuse problem.
>
> Idea comes from UAPI kernel group:
> https://uapi-group.org/kernel-features/
>
> Big thanks to Christian Brauner and Lennart Poettering for productive
> discussions about this.
>
> Cc: "David S. Miller" <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> Cc: Jakub Kicinski <[email protected]>
> Cc: Paolo Abeni <[email protected]>
> Cc: Leon Romanovsky <[email protected]>
> Cc: David Ahern <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Christian Brauner <[email protected]>
> Cc: Kuniyuki Iwashima <[email protected]>
> Cc: Lennart Poettering <[email protected]>
> Cc: Luca Boccassi <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Tested-by: Luca Boccassi <[email protected]>
> Reviewed-by: Kuniyuki Iwashima <[email protected]>
> Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> ---
> v4:
> - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian)
I don't have a lot more to add to this,
Reviewed-by: Christian Brauner <[email protected]>