2023-03-21 18:45:46

by Aleksandr Mikhalitsyn

[permalink] [raw]
Subject: [PATCH net-next v2 0/3] Add SCM_PIDFD and SO_PEERPIDFD

1. Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
but it contains pidfd instead of plain pid, which allows programmers not
to care about PID reuse problem.

2. Add SO_PEERPIDFD which allows to get pidfd of peer socket holder pidfd.
This thing is direct analog of SO_PEERCRED which allows to get plain PID.

3. Add SCM_PIDFD / SO_PEERPIDFD kselftest

Idea comes from UAPI kernel group:
https://uapi-group.org/kernel-features/

Big thanks to Christian Brauner and Lennart Poettering for productive
discussions about this.

Cc: "David S. Miller" <[email protected]>
Cc: Eric Dumazet <[email protected]>
Cc: Jakub Kicinski <[email protected]>
Cc: Paolo Abeni <[email protected]>
Cc: Leon Romanovsky <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Christian Brauner <[email protected]>
Cc: Kuniyuki Iwashima <[email protected]>
Cc: Lennart Poettering <[email protected]>

Alexander Mikhalitsyn (3):
scm: add SO_PASSPIDFD and SCM_PIDFD
net: core: add getsockopt SO_PEERPIDFD
selftests: net: add SCM_PIDFD / SO_PEERPIDFD test

arch/alpha/include/uapi/asm/socket.h | 3 +
arch/mips/include/uapi/asm/socket.h | 3 +
arch/parisc/include/uapi/asm/socket.h | 3 +
arch/sparc/include/uapi/asm/socket.h | 3 +
include/linux/net.h | 1 +
include/linux/socket.h | 1 +
include/net/scm.h | 14 +-
include/uapi/asm-generic/socket.h | 3 +
net/core/sock.c | 32 ++
net/mptcp/sockopt.c | 1 +
net/unix/af_unix.c | 18 +-
tools/include/uapi/asm-generic/socket.h | 3 +
tools/testing/selftests/net/.gitignore | 1 +
tools/testing/selftests/net/af_unix/Makefile | 3 +-
.../testing/selftests/net/af_unix/scm_pidfd.c | 336 ++++++++++++++++++
15 files changed, 417 insertions(+), 8 deletions(-)
create mode 100644 tools/testing/selftests/net/af_unix/scm_pidfd.c

--
2.34.1



2023-03-21 18:46:09

by Aleksandr Mikhalitsyn

[permalink] [raw]
Subject: [PATCH net-next v2 3/3] selftests: net: add SCM_PIDFD / SO_PEERPIDFD test

Basic test to check consistency between:
- SCM_CREDENTIALS and SCM_PIDFD
- SO_PEERCRED and SO_PEERPIDFD

Cc: "David S. Miller" <[email protected]>
Cc: Eric Dumazet <[email protected]>
Cc: Jakub Kicinski <[email protected]>
Cc: Paolo Abeni <[email protected]>
Cc: Leon Romanovsky <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Christian Brauner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Alexander Mikhalitsyn <[email protected]>
---
tools/testing/selftests/net/.gitignore | 1 +
tools/testing/selftests/net/af_unix/Makefile | 3 +-
.../testing/selftests/net/af_unix/scm_pidfd.c | 336 ++++++++++++++++++
3 files changed, 339 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/net/af_unix/scm_pidfd.c

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index a6911cae368c..f2d23a1df596 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -25,6 +25,7 @@ reuseport_bpf_cpu
reuseport_bpf_numa
reuseport_dualstack
rxtimestamp
+scm_pidfd
sk_bind_sendto_listen
sk_connect_zero_addr
socket
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 1e4b397cece6..221c387a7d7f 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,3 +1,4 @@
-TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect
+CFLAGS += $(KHDR_INCLUDES)
+TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd

include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
new file mode 100644
index 000000000000..fa502510ee9e
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <error.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <linux/socket.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", __FILE__, __LINE__, \
+ clean_errno(), ##__VA_ARGS__)
+
+#ifndef SCM_PIDFD
+#define SCM_PIDFD 0x04
+#endif
+
+static pid_t client_pid;
+static char sock_name[32];
+
+static void die(int status)
+{
+ unlink(sock_name);
+ kill(client_pid, SIGTERM);
+ exit(status);
+}
+
+static void child_die()
+{
+ kill(getppid(), SIGTERM);
+ exit(1);
+}
+
+static int safe_int(const char *numstr, int *converted)
+{
+ char *err = NULL;
+ long sli;
+
+ errno = 0;
+ sli = strtol(numstr, &err, 0);
+ if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
+ return -ERANGE;
+
+ if (errno != 0 && sli == 0)
+ return -EINVAL;
+
+ if (err == numstr || *err != '\0')
+ return -EINVAL;
+
+ if (sli > INT_MAX || sli < INT_MIN)
+ return -ERANGE;
+
+ *converted = (int)sli;
+ return 0;
+}
+
+static int char_left_gc(const char *buffer, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (buffer[i] == ' ' || buffer[i] == '\t')
+ continue;
+
+ return i;
+ }
+
+ return 0;
+}
+
+static int char_right_gc(const char *buffer, size_t len)
+{
+ int i;
+
+ for (i = len - 1; i >= 0; i--) {
+ if (buffer[i] == ' ' || buffer[i] == '\t' ||
+ buffer[i] == '\n' || buffer[i] == '\0')
+ continue;
+
+ return i + 1;
+ }
+
+ return 0;
+}
+
+static char *trim_whitespace_in_place(char *buffer)
+{
+ buffer += char_left_gc(buffer, strlen(buffer));
+ buffer[char_right_gc(buffer, strlen(buffer))] = '\0';
+ return buffer;
+}
+
+/* borrowed (with all helpers) from pidfd/pidfd_open_test.c */
+static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)
+{
+ int ret;
+ char path[512];
+ FILE *f;
+ size_t n = 0;
+ pid_t result = -1;
+ char *line = NULL;
+
+ snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd);
+
+ f = fopen(path, "re");
+ if (!f)
+ return -1;
+
+ while (getline(&line, &n, f) != -1) {
+ char *numstr;
+
+ if (strncmp(line, key, keylen))
+ continue;
+
+ numstr = trim_whitespace_in_place(line + 4);
+ ret = safe_int(numstr, &result);
+ if (ret < 0)
+ goto out;
+
+ break;
+ }
+
+out:
+ free(line);
+ fclose(f);
+ return result;
+}
+
+static int cmsg_check(int fd)
+{
+ struct msghdr msg = { 0 };
+ struct cmsghdr *cmsg;
+ struct iovec iov;
+ struct ucred *ucred = NULL;
+ int data = 0;
+ char control[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int))] = { 0 };
+ int *pidfd = NULL;
+ pid_t parent_pid;
+ int err;
+
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ err = recvmsg(fd, &msg, 0);
+ if (err < 0) {
+ log_err("recvmsg");
+ return 1;
+ }
+
+ if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_PIDFD) {
+ if (cmsg->cmsg_len < sizeof(*pidfd)) {
+ log_err("CMSG parse: SCM_PIDFD wrong len");
+ return 1;
+ }
+
+ pidfd = (void *)CMSG_DATA(cmsg);
+ }
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS) {
+ if (cmsg->cmsg_len < sizeof(*ucred)) {
+ log_err("CMSG parse: SCM_CREDENTIALS wrong len");
+ return 1;
+ }
+
+ ucred = (void *)CMSG_DATA(cmsg);
+ }
+ }
+
+ /* send(pfd, "x", sizeof(char), 0) */
+ if (data != 'x') {
+ log_err("recvmsg: data corruption");
+ return 1;
+ }
+
+ if (!pidfd) {
+ log_err("CMSG parse: SCM_PIDFD not found");
+ return 1;
+ }
+
+ if (!ucred) {
+ log_err("CMSG parse: SCM_CREDENTIALS not found");
+ return 1;
+ }
+
+ /* pidfd from SCM_PIDFD should point to the parent process PID */
+ parent_pid =
+ get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1);
+ if (parent_pid != getppid()) {
+ log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ return 1;
+ }
+
+ return 0;
+}
+
+void client(struct sockaddr_un *listen_addr)
+{
+ int cfd;
+ socklen_t len;
+ struct ucred peer_cred;
+ int peer_pidfd;
+ pid_t peer_pid;
+ int on = 0;
+
+ cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (cfd < 0) {
+ log_err("socket");
+ child_die();
+ }
+
+ if (connect(cfd, (struct sockaddr *)listen_addr,
+ sizeof(*listen_addr)) != 0) {
+ log_err("connect");
+ child_die();
+ }
+
+ on = 1;
+ if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSCRED");
+ child_die();
+ }
+
+ if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSPIDFD");
+ child_die();
+ }
+
+ if (cmsg_check(cfd)) {
+ log_err("cmsg_check failed");
+ child_die();
+ }
+
+ len = sizeof(peer_cred);
+ if (getsockopt(cfd, SOL_SOCKET, SO_PEERCRED, &peer_cred, &len)) {
+ log_err("Failed to get SO_PEERCRED");
+ child_die();
+ }
+
+ len = sizeof(peer_pidfd);
+ if (getsockopt(cfd, SOL_SOCKET, SO_PEERPIDFD, &peer_pidfd, &len)) {
+ log_err("Failed to get SO_PEERPIDFD");
+ child_die();
+ }
+
+ /* pid from SO_PEERCRED should point to the parent process PID */
+ if (peer_cred.pid != getppid()) {
+ log_err("Failed to get SO_PEERPIDFD");
+ child_die();
+ }
+
+ peer_pid = get_pid_from_fdinfo_file(peer_pidfd,
+ "Pid:", sizeof("Pid:") - 1);
+ if (peer_pid != peer_cred.pid) {
+ log_err("Failed to get SO_PEERPIDFD");
+ child_die();
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int lfd, pfd;
+ int child_status = 0;
+ struct sockaddr_un listen_addr;
+
+ lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (lfd < 0) {
+ perror("socket");
+ exit(1);
+ }
+
+ memset(&listen_addr, 0, sizeof(listen_addr));
+ listen_addr.sun_family = AF_UNIX;
+ sprintf(sock_name, "scm_pidfd_%d", getpid());
+ unlink(sock_name);
+ strcpy(listen_addr.sun_path, sock_name);
+
+ if ((bind(lfd, (struct sockaddr *)&listen_addr, sizeof(listen_addr))) !=
+ 0) {
+ perror("socket bind failed");
+ exit(1);
+ }
+
+ if (listen(lfd, 1) < 0) {
+ perror("listen");
+ exit(1);
+ }
+
+ client_pid = fork();
+ if (client_pid < 0) {
+ perror("fork");
+ exit(1);
+ }
+
+ if (client_pid == 0) {
+ client(&listen_addr);
+ exit(0);
+ }
+
+ pfd = accept(lfd, NULL, NULL);
+ if (pfd < 0) {
+ perror("accept");
+ die(1);
+ }
+
+ if (send(pfd, "x", sizeof(char), 0) < 0) {
+ perror("send");
+ die(1);
+ }
+
+ waitpid(client_pid, &child_status, 0);
+ die(WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+ die(0);
+}
\ No newline at end of file
--
2.34.1


2023-03-21 18:46:13

by Aleksandr Mikhalitsyn

[permalink] [raw]
Subject: [PATCH net-next v2 2/3] net: core: add getsockopt SO_PEERPIDFD

Add SO_PEERPIDFD which allows to get pidfd of peer socket holder pidfd.
This thing is direct analog of SO_PEERCRED which allows to get plain PID.

Cc: "David S. Miller" <[email protected]>
Cc: Eric Dumazet <[email protected]>
Cc: Jakub Kicinski <[email protected]>
Cc: Paolo Abeni <[email protected]>
Cc: Leon Romanovsky <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Christian Brauner <[email protected]>
Cc: Kuniyuki Iwashima <[email protected]>
Cc: Lennart Poettering <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Alexander Mikhalitsyn <[email protected]>
---
v2:
According to review comments from Kuniyuki Iwashima and Christian Brauner:
- use pidfd_create(..) retval as a result
- whitespace change
---
arch/alpha/include/uapi/asm/socket.h | 1 +
arch/mips/include/uapi/asm/socket.h | 1 +
arch/parisc/include/uapi/asm/socket.h | 1 +
arch/sparc/include/uapi/asm/socket.h | 1 +
include/uapi/asm-generic/socket.h | 1 +
net/core/sock.c | 21 +++++++++++++++++++++
tools/include/uapi/asm-generic/socket.h | 1 +
7 files changed, 27 insertions(+)

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index ff310613ae64..e94f621903fe 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -138,6 +138,7 @@
#define SO_RCVMARK 75

#define SO_PASSPIDFD 76
+#define SO_PEERPIDFD 77

#if !defined(__KERNEL__)

diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 762dcb80e4ec..60ebaed28a4c 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -149,6 +149,7 @@
#define SO_RCVMARK 75

#define SO_PASSPIDFD 76
+#define SO_PEERPIDFD 77

#if !defined(__KERNEL__)

diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index df16a3e16d64..be264c2b1a11 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -130,6 +130,7 @@
#define SO_RCVMARK 0x4049

#define SO_PASSPIDFD 0x404A
+#define SO_PEERPIDFD 0x404B

#if !defined(__KERNEL__)

diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 6e2847804fea..682da3714686 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -131,6 +131,7 @@
#define SO_RCVMARK 0x0054

#define SO_PASSPIDFD 0x0055
+#define SO_PEERPIDFD 0x0056

#if !defined(__KERNEL__)

diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index b76169fdb80b..8ce8a39a1e5f 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -133,6 +133,7 @@
#define SO_RCVMARK 75

#define SO_PASSPIDFD 76
+#define SO_PEERPIDFD 77

#if !defined(__KERNEL__)

diff --git a/net/core/sock.c b/net/core/sock.c
index 3f974246ba3e..85c269ca9d8a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1763,6 +1763,27 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
goto lenout;
}

+ case SO_PEERPIDFD:
+ {
+ struct pid *peer_pid;
+ int pidfd;
+
+ if (len > sizeof(pidfd))
+ len = sizeof(pidfd);
+
+ spin_lock(&sk->sk_peer_lock);
+ peer_pid = get_pid(sk->sk_peer_pid);
+ spin_unlock(&sk->sk_peer_lock);
+
+ pidfd = pidfd_create(peer_pid, 0);
+
+ put_pid(peer_pid);
+
+ if (copy_to_sockptr(optval, &pidfd, len))
+ return -EFAULT;
+ goto lenout;
+ }
+
case SO_PEERGROUPS:
{
const struct cred *cred;
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index fbbc4bf53ee3..54d9c8bf7c55 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -122,6 +122,7 @@
#define SO_RCVMARK 75

#define SO_PASSPIDFD 76
+#define SO_PEERPIDFD 77

#if !defined(__KERNEL__)

--
2.34.1


2023-03-21 18:46:17

by Aleksandr Mikhalitsyn

[permalink] [raw]
Subject: [PATCH net-next v2 1/3] scm: add SO_PASSPIDFD and SCM_PIDFD

Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
but it contains pidfd instead of plain pid, which allows programmers not
to care about PID reuse problem.

Idea comes from UAPI kernel group:
https://uapi-group.org/kernel-features/

Big thanks to Christian Brauner and Lennart Poettering for productive
discussions about this.

Cc: "David S. Miller" <[email protected]>
Cc: Eric Dumazet <[email protected]>
Cc: Jakub Kicinski <[email protected]>
Cc: Paolo Abeni <[email protected]>
Cc: Leon Romanovsky <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Christian Brauner <[email protected]>
Cc: Kuniyuki Iwashima <[email protected]>
Cc: Lennart Poettering <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Alexander Mikhalitsyn <[email protected]>
---
v2:
According to review comments from Kuniyuki Iwashima and Christian Brauner:
- use pidfd_create(..) retval as a result
- whitespace change
---
arch/alpha/include/uapi/asm/socket.h | 2 ++
arch/mips/include/uapi/asm/socket.h | 2 ++
arch/parisc/include/uapi/asm/socket.h | 2 ++
arch/sparc/include/uapi/asm/socket.h | 2 ++
include/linux/net.h | 1 +
include/linux/socket.h | 1 +
include/net/scm.h | 14 ++++++++++++--
include/uapi/asm-generic/socket.h | 2 ++
net/core/sock.c | 11 +++++++++++
net/mptcp/sockopt.c | 1 +
net/unix/af_unix.c | 18 +++++++++++++-----
tools/include/uapi/asm-generic/socket.h | 2 ++
12 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 739891b94136..ff310613ae64 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -137,6 +137,8 @@

#define SO_RCVMARK 75

+#define SO_PASSPIDFD 76
+
#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 18f3d95ecfec..762dcb80e4ec 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -148,6 +148,8 @@

#define SO_RCVMARK 75

+#define SO_PASSPIDFD 76
+
#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index f486d3dfb6bb..df16a3e16d64 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -129,6 +129,8 @@

#define SO_RCVMARK 0x4049

+#define SO_PASSPIDFD 0x404A
+
#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 2fda57a3ea86..6e2847804fea 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -130,6 +130,8 @@

#define SO_RCVMARK 0x0054

+#define SO_PASSPIDFD 0x0055
+
#if !defined(__KERNEL__)


diff --git a/include/linux/net.h b/include/linux/net.h
index b73ad8e3c212..c234dfbe7a30 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -43,6 +43,7 @@ struct net;
#define SOCK_PASSSEC 4
#define SOCK_SUPPORT_ZC 5
#define SOCK_CUSTOM_SOCKOPT 6
+#define SOCK_PASSPIDFD 7

#ifndef ARCH_HAS_SOCKET_TYPES
/**
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 13c3a237b9c9..6bf90f251910 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
#define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
#define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
#define SCM_SECURITY 0x03 /* rw: security label */
+#define SCM_PIDFD 0x04 /* ro: pidfd (int) */

struct ucred {
__u32 pid;
diff --git a/include/net/scm.h b/include/net/scm.h
index 585adc1346bd..0c717ae9c8db 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -124,8 +124,9 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
struct scm_cookie *scm, int flags)
{
if (!msg->msg_control) {
- if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp ||
- scm_has_secdata(sock))
+ if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags) ||
+ scm->fp || scm_has_secdata(sock))
msg->msg_flags |= MSG_CTRUNC;
scm_destroy(scm);
return;
@@ -141,6 +142,15 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds);
}

+ if (test_bit(SOCK_PASSPIDFD, &sock->flags)) {
+ int pidfd;
+
+ WARN_ON_ONCE(!scm->pid);
+ pidfd = pidfd_create(scm->pid, 0);
+
+ put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd);
+ }
+
scm_destroy_cred(scm);

scm_passec(sock, msg, scm);
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 638230899e98..b76169fdb80b 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -132,6 +132,8 @@

#define SO_RCVMARK 75

+#define SO_PASSPIDFD 76
+
#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/net/core/sock.c b/net/core/sock.c
index c25888795390..3f974246ba3e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1246,6 +1246,13 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
clear_bit(SOCK_PASSCRED, &sock->flags);
break;

+ case SO_PASSPIDFD:
+ if (valbool)
+ set_bit(SOCK_PASSPIDFD, &sock->flags);
+ else
+ clear_bit(SOCK_PASSPIDFD, &sock->flags);
+ break;
+
case SO_TIMESTAMP_OLD:
case SO_TIMESTAMP_NEW:
case SO_TIMESTAMPNS_OLD:
@@ -1737,6 +1744,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
break;

+ case SO_PASSPIDFD:
+ v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
+ break;
+
case SO_PEERCRED:
{
struct ucred peercred;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 8a9656248b0f..bd80e707d0b3 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -355,6 +355,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BROADCAST:
case SO_BSDCOMPAT:
case SO_PASSCRED:
+ case SO_PASSPIDFD:
case SO_PASSSEC:
case SO_RXQ_OVFL:
case SO_WIFI_STATUS:
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0b0f18ecce44..b0ac768752fa 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1361,7 +1361,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
if (err)
goto out;

- if (test_bit(SOCK_PASSCRED, &sock->flags) &&
+ if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
!unix_sk(sk)->addr) {
err = unix_autobind(sk);
if (err)
@@ -1469,7 +1470,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (err)
goto out;

- if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
+ if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -1670,6 +1672,8 @@ static void unix_sock_inherit_flags(const struct socket *old,
{
if (test_bit(SOCK_PASSCRED, &old->flags))
set_bit(SOCK_PASSCRED, &new->flags);
+ if (test_bit(SOCK_PASSPIDFD, &old->flags))
+ set_bit(SOCK_PASSPIDFD, &new->flags);
if (test_bit(SOCK_PASSSEC, &old->flags))
set_bit(SOCK_PASSSEC, &new->flags);
}
@@ -1819,8 +1823,10 @@ static bool unix_passcred_enabled(const struct socket *sock,
const struct sock *other)
{
return test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags) ||
!other->sk_socket ||
- test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+ test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
+ test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
}

/*
@@ -1922,7 +1928,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
goto out;
}

- if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
+ if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -2824,7 +2831,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
/* Never glue messages from different writers */
if (!unix_skb_scm_eq(skb, &scm))
break;
- } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
+ } else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) {
/* Copy credentials */
scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
unix_set_secdata(&scm, skb);
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index 8756df13be50..fbbc4bf53ee3 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -121,6 +121,8 @@

#define SO_RCVMARK 75

+#define SO_PASSPIDFD 76
+
#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
--
2.34.1


2023-03-22 00:46:59

by Kuniyuki Iwashima

[permalink] [raw]
Subject: Re: [PATCH net-next v2 1/3] scm: add SO_PASSPIDFD and SCM_PIDFD

From: Alexander Mikhalitsyn <[email protected]>
Date: Tue, 21 Mar 2023 19:33:40 +0100
> Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
> but it contains pidfd instead of plain pid, which allows programmers not
> to care about PID reuse problem.
>
> Idea comes from UAPI kernel group:
> https://uapi-group.org/kernel-features/
>
> Big thanks to Christian Brauner and Lennart Poettering for productive
> discussions about this.
>
> Cc: "David S. Miller" <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> Cc: Jakub Kicinski <[email protected]>
> Cc: Paolo Abeni <[email protected]>
> Cc: Leon Romanovsky <[email protected]>
> Cc: David Ahern <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Christian Brauner <[email protected]>
> Cc: Kuniyuki Iwashima <[email protected]>
> Cc: Lennart Poettering <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Alexander Mikhalitsyn <[email protected]>

Reviewed-by: Kuniyuki Iwashima <[email protected]>

Thanks,
Kuniyuki

2023-03-22 00:48:44

by Kuniyuki Iwashima

[permalink] [raw]
Subject: Re: [PATCH net-next v2 2/3] net: core: add getsockopt SO_PEERPIDFD

From: Alexander Mikhalitsyn <[email protected]>
Date: Tue, 21 Mar 2023 19:33:41 +0100
> Add SO_PEERPIDFD which allows to get pidfd of peer socket holder pidfd.
> This thing is direct analog of SO_PEERCRED which allows to get plain PID.
>
> Cc: "David S. Miller" <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> Cc: Jakub Kicinski <[email protected]>
> Cc: Paolo Abeni <[email protected]>
> Cc: Leon Romanovsky <[email protected]>
> Cc: David Ahern <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Christian Brauner <[email protected]>
> Cc: Kuniyuki Iwashima <[email protected]>
> Cc: Lennart Poettering <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Alexander Mikhalitsyn <[email protected]>

Reviewed-by: Kuniyuki Iwashima <[email protected]>

Thanks,
Kuniyuki

2023-03-22 00:50:50

by Kuniyuki Iwashima

[permalink] [raw]
Subject: Re: [PATCH net-next v2 3/3] selftests: net: add SCM_PIDFD / SO_PEERPIDFD test

From: Alexander Mikhalitsyn <[email protected]>
Date: Tue, 21 Mar 2023 19:33:42 +0100
> Basic test to check consistency between:
> - SCM_CREDENTIALS and SCM_PIDFD
> - SO_PEERCRED and SO_PEERPIDFD
>
> Cc: "David S. Miller" <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> Cc: Jakub Kicinski <[email protected]>
> Cc: Paolo Abeni <[email protected]>
> Cc: Leon Romanovsky <[email protected]>
> Cc: David Ahern <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Christian Brauner <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> ---
> tools/testing/selftests/net/.gitignore | 1 +
> tools/testing/selftests/net/af_unix/Makefile | 3 +-
> .../testing/selftests/net/af_unix/scm_pidfd.c | 336 ++++++++++++++++++
> 3 files changed, 339 insertions(+), 1 deletion(-)
> create mode 100644 tools/testing/selftests/net/af_unix/scm_pidfd.c
>
> diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
> index a6911cae368c..f2d23a1df596 100644
> --- a/tools/testing/selftests/net/.gitignore
> +++ b/tools/testing/selftests/net/.gitignore
> @@ -25,6 +25,7 @@ reuseport_bpf_cpu
> reuseport_bpf_numa
> reuseport_dualstack
> rxtimestamp
> +scm_pidfd
> sk_bind_sendto_listen
> sk_connect_zero_addr
> socket
> diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
> index 1e4b397cece6..221c387a7d7f 100644
> --- a/tools/testing/selftests/net/af_unix/Makefile
> +++ b/tools/testing/selftests/net/af_unix/Makefile
> @@ -1,3 +1,4 @@
> -TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect
> +CFLAGS += $(KHDR_INCLUDES)
> +TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd
>
> include ../../lib.mk
> diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
> new file mode 100644
> index 000000000000..fa502510ee9e
> --- /dev/null
> +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
> @@ -0,0 +1,336 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#define _GNU_SOURCE
> +#include <error.h>
> +#include <limits.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <sys/socket.h>
> +#include <linux/socket.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <sys/un.h>
> +#include <sys/signal.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>

#include "../../kselftest_harness.h"

Could you rewrite with kselftest ?
https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html

Also, it would be better to have all combinations as FIXTURE_VARIANT()
covered by unix_passcred_enabled() like

(self, peer) = (0, 0), (SO_PASSPIDFD, 0), (0, SO_PASSPIDFD),
(SO_PASSPIDFD, SO_PASSPIDFD), ...
(SO_PASSPIDFD, SO_PEERCRED), ...

Thanks,
Kuniyuki


> +
> +#define clean_errno() (errno == 0 ? "None" : strerror(errno))
> +#define log_err(MSG, ...) \
> + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", __FILE__, __LINE__, \
> + clean_errno(), ##__VA_ARGS__)
> +
> +#ifndef SCM_PIDFD
> +#define SCM_PIDFD 0x04
> +#endif
> +
> +static pid_t client_pid;
> +static char sock_name[32];
> +
> +static void die(int status)
> +{
> + unlink(sock_name);
> + kill(client_pid, SIGTERM);
> + exit(status);
> +}
> +
> +static void child_die()
> +{
> + kill(getppid(), SIGTERM);
> + exit(1);
> +}
> +
> +static int safe_int(const char *numstr, int *converted)
> +{
> + char *err = NULL;
> + long sli;
> +
> + errno = 0;
> + sli = strtol(numstr, &err, 0);
> + if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
> + return -ERANGE;
> +
> + if (errno != 0 && sli == 0)
> + return -EINVAL;
> +
> + if (err == numstr || *err != '\0')
> + return -EINVAL;
> +
> + if (sli > INT_MAX || sli < INT_MIN)
> + return -ERANGE;
> +
> + *converted = (int)sli;
> + return 0;
> +}
> +
> +static int char_left_gc(const char *buffer, size_t len)
> +{
> + size_t i;
> +
> + for (i = 0; i < len; i++) {
> + if (buffer[i] == ' ' || buffer[i] == '\t')
> + continue;
> +
> + return i;
> + }
> +
> + return 0;
> +}
> +
> +static int char_right_gc(const char *buffer, size_t len)
> +{
> + int i;
> +
> + for (i = len - 1; i >= 0; i--) {
> + if (buffer[i] == ' ' || buffer[i] == '\t' ||
> + buffer[i] == '\n' || buffer[i] == '\0')
> + continue;
> +
> + return i + 1;
> + }
> +
> + return 0;
> +}
> +
> +static char *trim_whitespace_in_place(char *buffer)
> +{
> + buffer += char_left_gc(buffer, strlen(buffer));
> + buffer[char_right_gc(buffer, strlen(buffer))] = '\0';
> + return buffer;
> +}
> +
> +/* borrowed (with all helpers) from pidfd/pidfd_open_test.c */
> +static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)
> +{
> + int ret;
> + char path[512];
> + FILE *f;
> + size_t n = 0;
> + pid_t result = -1;
> + char *line = NULL;
> +
> + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd);
> +
> + f = fopen(path, "re");
> + if (!f)
> + return -1;
> +
> + while (getline(&line, &n, f) != -1) {
> + char *numstr;
> +
> + if (strncmp(line, key, keylen))
> + continue;
> +
> + numstr = trim_whitespace_in_place(line + 4);
> + ret = safe_int(numstr, &result);
> + if (ret < 0)
> + goto out;
> +
> + break;
> + }
> +
> +out:
> + free(line);
> + fclose(f);
> + return result;
> +}
> +
> +static int cmsg_check(int fd)
> +{
> + struct msghdr msg = { 0 };
> + struct cmsghdr *cmsg;
> + struct iovec iov;
> + struct ucred *ucred = NULL;
> + int data = 0;
> + char control[CMSG_SPACE(sizeof(struct ucred)) +
> + CMSG_SPACE(sizeof(int))] = { 0 };
> + int *pidfd = NULL;
> + pid_t parent_pid;
> + int err;
> +
> + iov.iov_base = &data;
> + iov.iov_len = sizeof(data);
> +
> + msg.msg_iov = &iov;
> + msg.msg_iovlen = 1;
> + msg.msg_control = control;
> + msg.msg_controllen = sizeof(control);
> +
> + err = recvmsg(fd, &msg, 0);
> + if (err < 0) {
> + log_err("recvmsg");
> + return 1;
> + }
> +
> + if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
> + log_err("recvmsg: truncated");
> + return 1;
> + }
> +
> + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
> + cmsg = CMSG_NXTHDR(&msg, cmsg)) {
> + if (cmsg->cmsg_level == SOL_SOCKET &&
> + cmsg->cmsg_type == SCM_PIDFD) {
> + if (cmsg->cmsg_len < sizeof(*pidfd)) {
> + log_err("CMSG parse: SCM_PIDFD wrong len");
> + return 1;
> + }
> +
> + pidfd = (void *)CMSG_DATA(cmsg);
> + }
> +
> + if (cmsg->cmsg_level == SOL_SOCKET &&
> + cmsg->cmsg_type == SCM_CREDENTIALS) {
> + if (cmsg->cmsg_len < sizeof(*ucred)) {
> + log_err("CMSG parse: SCM_CREDENTIALS wrong len");
> + return 1;
> + }
> +
> + ucred = (void *)CMSG_DATA(cmsg);
> + }
> + }
> +
> + /* send(pfd, "x", sizeof(char), 0) */
> + if (data != 'x') {
> + log_err("recvmsg: data corruption");
> + return 1;
> + }
> +
> + if (!pidfd) {
> + log_err("CMSG parse: SCM_PIDFD not found");
> + return 1;
> + }
> +
> + if (!ucred) {
> + log_err("CMSG parse: SCM_CREDENTIALS not found");
> + return 1;
> + }
> +
> + /* pidfd from SCM_PIDFD should point to the parent process PID */
> + parent_pid =
> + get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1);
> + if (parent_pid != getppid()) {
> + log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +void client(struct sockaddr_un *listen_addr)
> +{
> + int cfd;
> + socklen_t len;
> + struct ucred peer_cred;
> + int peer_pidfd;
> + pid_t peer_pid;
> + int on = 0;
> +
> + cfd = socket(AF_UNIX, SOCK_STREAM, 0);
> + if (cfd < 0) {
> + log_err("socket");
> + child_die();
> + }
> +
> + if (connect(cfd, (struct sockaddr *)listen_addr,
> + sizeof(*listen_addr)) != 0) {
> + log_err("connect");
> + child_die();
> + }
> +
> + on = 1;
> + if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
> + log_err("Failed to set SO_PASSCRED");
> + child_die();
> + }
> +
> + if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
> + log_err("Failed to set SO_PASSPIDFD");
> + child_die();
> + }
> +
> + if (cmsg_check(cfd)) {
> + log_err("cmsg_check failed");
> + child_die();
> + }
> +
> + len = sizeof(peer_cred);
> + if (getsockopt(cfd, SOL_SOCKET, SO_PEERCRED, &peer_cred, &len)) {
> + log_err("Failed to get SO_PEERCRED");
> + child_die();
> + }
> +
> + len = sizeof(peer_pidfd);
> + if (getsockopt(cfd, SOL_SOCKET, SO_PEERPIDFD, &peer_pidfd, &len)) {
> + log_err("Failed to get SO_PEERPIDFD");
> + child_die();
> + }
> +
> + /* pid from SO_PEERCRED should point to the parent process PID */
> + if (peer_cred.pid != getppid()) {
> + log_err("Failed to get SO_PEERPIDFD");
> + child_die();
> + }
> +
> + peer_pid = get_pid_from_fdinfo_file(peer_pidfd,
> + "Pid:", sizeof("Pid:") - 1);
> + if (peer_pid != peer_cred.pid) {
> + log_err("Failed to get SO_PEERPIDFD");
> + child_die();
> + }
> +}
> +
> +int main(int argc, char **argv)
> +{
> + int lfd, pfd;
> + int child_status = 0;
> + struct sockaddr_un listen_addr;
> +
> + lfd = socket(AF_UNIX, SOCK_STREAM, 0);
> + if (lfd < 0) {
> + perror("socket");
> + exit(1);
> + }
> +
> + memset(&listen_addr, 0, sizeof(listen_addr));
> + listen_addr.sun_family = AF_UNIX;
> + sprintf(sock_name, "scm_pidfd_%d", getpid());
> + unlink(sock_name);
> + strcpy(listen_addr.sun_path, sock_name);
> +
> + if ((bind(lfd, (struct sockaddr *)&listen_addr, sizeof(listen_addr))) !=
> + 0) {
> + perror("socket bind failed");
> + exit(1);
> + }
> +
> + if (listen(lfd, 1) < 0) {
> + perror("listen");
> + exit(1);
> + }
> +
> + client_pid = fork();
> + if (client_pid < 0) {
> + perror("fork");
> + exit(1);
> + }
> +
> + if (client_pid == 0) {
> + client(&listen_addr);
> + exit(0);
> + }
> +
> + pfd = accept(lfd, NULL, NULL);
> + if (pfd < 0) {
> + perror("accept");
> + die(1);
> + }
> +
> + if (send(pfd, "x", sizeof(char), 0) < 0) {
> + perror("send");
> + die(1);
> + }
> +
> + waitpid(client_pid, &child_status, 0);
> + die(WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
> + die(0);
> +}
> \ No newline at end of file
> --
> 2.34.1

2023-03-22 13:47:53

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH net-next v2 1/3] scm: add SO_PASSPIDFD and SCM_PIDFD

Hi Alexander,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on net-next/main]

url: https://github.com/intel-lab-lkp/linux/commits/Alexander-Mikhalitsyn/scm-add-SO_PASSPIDFD-and-SCM_PIDFD/20230322-024808
patch link: https://lore.kernel.org/r/20230321183342.617114-2-aleksandr.mikhalitsyn%40canonical.com
patch subject: [PATCH net-next v2 1/3] scm: add SO_PASSPIDFD and SCM_PIDFD
config: arm-allmodconfig (https://download.01.org/0day-ci/archive/20230322/[email protected]/config)
compiler: arm-linux-gnueabi-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/491b69039f4479e1e0fb3af635c96989cdd23734
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Alexander-Mikhalitsyn/scm-add-SO_PASSPIDFD-and-SCM_PIDFD/20230322-024808
git checkout 491b69039f4479e1e0fb3af635c96989cdd23734
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
| Link: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "pidfd_create" [net/unix/unix.ko] undefined!

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

2023-03-22 14:22:25

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH net-next v2 0/3] Add SCM_PIDFD and SO_PEERPIDFD

On Tue, Mar 21, 2023 at 07:33:39PM +0100, Alexander Mikhalitsyn wrote:
> 1. Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
> but it contains pidfd instead of plain pid, which allows programmers not
> to care about PID reuse problem.
>
> 2. Add SO_PEERPIDFD which allows to get pidfd of peer socket holder pidfd.
> This thing is direct analog of SO_PEERCRED which allows to get plain PID.
>
> 3. Add SCM_PIDFD / SO_PEERPIDFD kselftest
>
> Idea comes from UAPI kernel group:
> https://uapi-group.org/kernel-features/
>
> Big thanks to Christian Brauner and Lennart Poettering for productive
> discussions about this.
>
> Cc: "David S. Miller" <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> Cc: Jakub Kicinski <[email protected]>
> Cc: Paolo Abeni <[email protected]>
> Cc: Leon Romanovsky <[email protected]>
> Cc: David Ahern <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Christian Brauner <[email protected]>
> Cc: Kuniyuki Iwashima <[email protected]>
> Cc: Lennart Poettering <[email protected]>
>
> Alexander Mikhalitsyn (3):
> scm: add SO_PASSPIDFD and SCM_PIDFD
> net: core: add getsockopt SO_PEERPIDFD
> selftests: net: add SCM_PIDFD / SO_PEERPIDFD test
>
> arch/alpha/include/uapi/asm/socket.h | 3 +
> arch/mips/include/uapi/asm/socket.h | 3 +
> arch/parisc/include/uapi/asm/socket.h | 3 +
> arch/sparc/include/uapi/asm/socket.h | 3 +
> include/linux/net.h | 1 +
> include/linux/socket.h | 1 +
> include/net/scm.h | 14 +-
> include/uapi/asm-generic/socket.h | 3 +
> net/core/sock.c | 32 ++
> net/mptcp/sockopt.c | 1 +
> net/unix/af_unix.c | 18 +-
> tools/include/uapi/asm-generic/socket.h | 3 +
> tools/testing/selftests/net/.gitignore | 1 +
> tools/testing/selftests/net/af_unix/Makefile | 3 +-
> .../testing/selftests/net/af_unix/scm_pidfd.c | 336 ++++++++++++++++++
> 15 files changed, 417 insertions(+), 8 deletions(-)
> create mode 100644 tools/testing/selftests/net/af_unix/scm_pidfd.c

What's the commit for this work? Because this seems to fail to apply
cleanly on anything from v6.3-rc1 until v6.3-rc3.

2023-03-22 14:32:05

by Aleksandr Mikhalitsyn

[permalink] [raw]
Subject: Re: [PATCH net-next v2 0/3] Add SCM_PIDFD and SO_PEERPIDFD

On Wed, Mar 22, 2023 at 3:13 PM Christian Brauner <[email protected]> wrote:
>
> On Tue, Mar 21, 2023 at 07:33:39PM +0100, Alexander Mikhalitsyn wrote:
> > 1. Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
> > but it contains pidfd instead of plain pid, which allows programmers not
> > to care about PID reuse problem.
> >
> > 2. Add SO_PEERPIDFD which allows to get pidfd of peer socket holder pidfd.
> > This thing is direct analog of SO_PEERCRED which allows to get plain PID.
> >
> > 3. Add SCM_PIDFD / SO_PEERPIDFD kselftest
> >
> > Idea comes from UAPI kernel group:
> > https://uapi-group.org/kernel-features/
> >
> > Big thanks to Christian Brauner and Lennart Poettering for productive
> > discussions about this.
> >
> > Cc: "David S. Miller" <[email protected]>
> > Cc: Eric Dumazet <[email protected]>
> > Cc: Jakub Kicinski <[email protected]>
> > Cc: Paolo Abeni <[email protected]>
> > Cc: Leon Romanovsky <[email protected]>
> > Cc: David Ahern <[email protected]>
> > Cc: Arnd Bergmann <[email protected]>
> > Cc: Kees Cook <[email protected]>
> > Cc: Christian Brauner <[email protected]>
> > Cc: Kuniyuki Iwashima <[email protected]>
> > Cc: Lennart Poettering <[email protected]>
> >
> > Alexander Mikhalitsyn (3):
> > scm: add SO_PASSPIDFD and SCM_PIDFD
> > net: core: add getsockopt SO_PEERPIDFD
> > selftests: net: add SCM_PIDFD / SO_PEERPIDFD test
> >
> > arch/alpha/include/uapi/asm/socket.h | 3 +
> > arch/mips/include/uapi/asm/socket.h | 3 +
> > arch/parisc/include/uapi/asm/socket.h | 3 +
> > arch/sparc/include/uapi/asm/socket.h | 3 +
> > include/linux/net.h | 1 +
> > include/linux/socket.h | 1 +
> > include/net/scm.h | 14 +-
> > include/uapi/asm-generic/socket.h | 3 +
> > net/core/sock.c | 32 ++
> > net/mptcp/sockopt.c | 1 +
> > net/unix/af_unix.c | 18 +-
> > tools/include/uapi/asm-generic/socket.h | 3 +
> > tools/testing/selftests/net/.gitignore | 1 +
> > tools/testing/selftests/net/af_unix/Makefile | 3 +-
> > .../testing/selftests/net/af_unix/scm_pidfd.c | 336 ++++++++++++++++++
> > 15 files changed, 417 insertions(+), 8 deletions(-)
> > create mode 100644 tools/testing/selftests/net/af_unix/scm_pidfd.c
>
> What's the commit for this work? Because this seems to fail to apply
> cleanly on anything from v6.3-rc1 until v6.3-rc3.

It's based on net-next https://git.kernel.org/netdev/net-next/c/a02d83f9947d

Kind regards,
Alex

>

2023-03-22 15:43:34

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH net-next v2 2/3] net: core: add getsockopt SO_PEERPIDFD

On Tue, Mar 21, 2023 at 07:33:41PM +0100, Alexander Mikhalitsyn wrote:
> Add SO_PEERPIDFD which allows to get pidfd of peer socket holder pidfd.
> This thing is direct analog of SO_PEERCRED which allows to get plain PID.
>
> Cc: "David S. Miller" <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> Cc: Jakub Kicinski <[email protected]>
> Cc: Paolo Abeni <[email protected]>
> Cc: Leon Romanovsky <[email protected]>
> Cc: David Ahern <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Christian Brauner <[email protected]>
> Cc: Kuniyuki Iwashima <[email protected]>
> Cc: Lennart Poettering <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> ---
> v2:
> According to review comments from Kuniyuki Iwashima and Christian Brauner:
> - use pidfd_create(..) retval as a result
> - whitespace change
> ---
> arch/alpha/include/uapi/asm/socket.h | 1 +
> arch/mips/include/uapi/asm/socket.h | 1 +
> arch/parisc/include/uapi/asm/socket.h | 1 +
> arch/sparc/include/uapi/asm/socket.h | 1 +
> include/uapi/asm-generic/socket.h | 1 +
> net/core/sock.c | 21 +++++++++++++++++++++
> tools/include/uapi/asm-generic/socket.h | 1 +
> 7 files changed, 27 insertions(+)
>
> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> index ff310613ae64..e94f621903fe 100644
> --- a/arch/alpha/include/uapi/asm/socket.h
> +++ b/arch/alpha/include/uapi/asm/socket.h
> @@ -138,6 +138,7 @@
> #define SO_RCVMARK 75
>
> #define SO_PASSPIDFD 76
> +#define SO_PEERPIDFD 77
>
> #if !defined(__KERNEL__)
>
> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> index 762dcb80e4ec..60ebaed28a4c 100644
> --- a/arch/mips/include/uapi/asm/socket.h
> +++ b/arch/mips/include/uapi/asm/socket.h
> @@ -149,6 +149,7 @@
> #define SO_RCVMARK 75
>
> #define SO_PASSPIDFD 76
> +#define SO_PEERPIDFD 77
>
> #if !defined(__KERNEL__)
>
> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> index df16a3e16d64..be264c2b1a11 100644
> --- a/arch/parisc/include/uapi/asm/socket.h
> +++ b/arch/parisc/include/uapi/asm/socket.h
> @@ -130,6 +130,7 @@
> #define SO_RCVMARK 0x4049
>
> #define SO_PASSPIDFD 0x404A
> +#define SO_PEERPIDFD 0x404B
>
> #if !defined(__KERNEL__)
>
> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> index 6e2847804fea..682da3714686 100644
> --- a/arch/sparc/include/uapi/asm/socket.h
> +++ b/arch/sparc/include/uapi/asm/socket.h
> @@ -131,6 +131,7 @@
> #define SO_RCVMARK 0x0054
>
> #define SO_PASSPIDFD 0x0055
> +#define SO_PEERPIDFD 0x0056
>
> #if !defined(__KERNEL__)
>
> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
> index b76169fdb80b..8ce8a39a1e5f 100644
> --- a/include/uapi/asm-generic/socket.h
> +++ b/include/uapi/asm-generic/socket.h
> @@ -133,6 +133,7 @@
> #define SO_RCVMARK 75
>
> #define SO_PASSPIDFD 76
> +#define SO_PEERPIDFD 77
>
> #if !defined(__KERNEL__)
>
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 3f974246ba3e..85c269ca9d8a 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1763,6 +1763,27 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
> goto lenout;
> }
>
> + case SO_PEERPIDFD:
> + {
> + struct pid *peer_pid;
> + int pidfd;
> +
> + if (len > sizeof(pidfd))
> + len = sizeof(pidfd);
> +
> + spin_lock(&sk->sk_peer_lock);
> + peer_pid = get_pid(sk->sk_peer_pid);
> + spin_unlock(&sk->sk_peer_lock);
> +
> + pidfd = pidfd_create(peer_pid, 0);
> +
> + put_pid(peer_pid);
> +
> + if (copy_to_sockptr(optval, &pidfd, len))
> + return -EFAULT;

This leaks the pidfd. We could do:

if (copy_to_sockptr(optval, &pidfd, len)) {
close_fd(pidfd);
return -EFAULT;
}

but it's a nasty anti-pattern to install the fd in the caller's fdtable
and then close it again. So let's avoid it if we can. Since you can only
set one socket option per setsockopt() sycall we should be able to
reserve an fd and pidfd_file, do the stuff that might fail, and then
call fd_install. So that would roughly be:

peer_pid = get_pid(sk->sk_peer_pid);
pidfd_file = pidfd_file_create(peer_pid, 0, &pidfd);
f (copy_to_sockptr(optval, &pidfd, len))
return -EFAULT;
goto lenout:

.
.
.

lenout:
if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;

// Made it safely, install pidfd now.
fd_install(pidfd, pidfd_file)

(See below for the associated api I'm going to publish independent of
this as kernel/fork.c and fanotify both could use it.)

But now, let's look at net/socket.c there's another wrinkle. So let's say you
have successfully installed the pidfd then it seems you can still fail later:

if (level == SOL_SOCKET)
err = sock_getsockopt(sock, level, optname, optval, optlen);
else if (unlikely(!sock->ops->getsockopt))
err = -EOPNOTSUPP;
else
err = sock->ops->getsockopt(sock, level, optname, optval,
optlen);

if (!in_compat_syscall())
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
optval, optlen, max_optlen,
err);

out_put:
fput_light(sock->file, fput_needed);
return err;

If the bpf hook returns an error we've placed an fd into the caller's sockopt
buffer without their knowledge.

From 4fee16f0920308bee2531fd3b08484f607eb5830 Mon Sep 17 00:00:00 2001
From: Christian Brauner <[email protected]>
Date: Wed, 22 Mar 2023 15:59:02 +0100
Subject: [PATCH 1/3] [HERE BE DRAGONS - DRAFT - __UNTESTED__] pid: add
pidfd_file_create()

Reserve and fd and pidfile, do stuff that might fail, install fd when
point of no return.

[HERE BE DRAGONS - DRAFT - __UNTESTED__] pid: add pidfd_file_create()

Signed-off-by: Christian Brauner <[email protected]>
---
include/linux/pid.h | 1 +
kernel/pid.c | 45 +++++++++++++++++++++++++++++++++------------
2 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 343abf22092e..c486dbc4d7b6 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -80,6 +80,7 @@ extern struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
int pidfd_create(struct pid *pid, unsigned int flags);
+struct file *pidfd_file_create(struct pid *pid, unsigned int flags, int *pidfd);

static inline struct pid *get_pid(struct pid *pid)
{
diff --git a/kernel/pid.c b/kernel/pid.c
index 3fbc5e46b721..8d0924f1dbf6 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -576,6 +576,32 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
return task;
}

+struct file *pidfd_file_create(struct pid *pid, unsigned int flags, int *pidfd)
+{
+ int fd;
+ struct file *pidfile;
+
+ if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
+ return ERR_PTR(-EINVAL);
+
+ if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
+ return ERR_PTR(-EINVAL);
+
+ fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ return ERR_PTR(fd);
+
+ pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
+ flags | O_RDWR | O_CLOEXEC);
+ if (IS_ERR(pidfile)) {
+ put_unused_fd(fd);
+ return pidfile;
+ }
+ get_pid(pid); /* held by pidfile now */
+ *pidfd = fd;
+ return pidfile;
+}
+
/**
* pidfd_create() - Create a new pid file descriptor.
*
@@ -594,20 +620,15 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
*/
int pidfd_create(struct pid *pid, unsigned int flags)
{
- int fd;
+ int pidfd;
+ struct file *pidfile;

- if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
- return -EINVAL;
+ pidfile = pidfd_file_create(pid, flags, &pidfd);
+ if (IS_ERR(pidfile))
+ return PTR_ERR(pidfile);

- if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
- return -EINVAL;
-
- fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
- flags | O_RDWR | O_CLOEXEC);
- if (fd < 0)
- put_pid(pid);
-
- return fd;
+ fd_install(pidfd, pidfile);
+ return pidfd;
}

/**
--
2.34.1

From c336f1c6cc39faa5aef4fbedd3c4f8eca51d8436 Mon Sep 17 00:00:00 2001
From: Christian Brauner <[email protected]>
Date: Wed, 22 Mar 2023 15:59:54 +0100
Subject: [PATCH 2/3] [HERE BE DRAGONS - DRAFT - __UNTESTED__] fork: use
pidfd_file_create()

Signed-off-by: Christian Brauner <[email protected]>
---
kernel/fork.c | 11 +----------
1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index f68954d05e89..c8dc78ee0a74 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2296,20 +2296,11 @@ static __latent_entropy struct task_struct *copy_process(
* if the fd table isn't shared).
*/
if (clone_flags & CLONE_PIDFD) {
- retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
- if (retval < 0)
- goto bad_fork_free_pid;
-
- pidfd = retval;
-
- pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
- O_RDWR | O_CLOEXEC);
+ pidfile = pidfd_file_create(pid, O_RDWR | O_CLOEXEC, &pidfd);
if (IS_ERR(pidfile)) {
- put_unused_fd(pidfd);
retval = PTR_ERR(pidfile);
goto bad_fork_free_pid;
}
- get_pid(pid); /* held by pidfile now */

retval = put_user(pidfd, args->pidfd);
if (retval)
--
2.34.1

From 0897f68fe06a8777d8ec600fdc719143f76095b1 Mon Sep 17 00:00:00 2001
From: Christian Brauner <[email protected]>
Date: Wed, 22 Mar 2023 16:02:50 +0100
Subject: [PATCH 3/3] [HERE BE DRAGONS - DRAFT - __UNTESTED__] fanotify: use
pidfd_file_create()

Signed-off-by: Christian Brauner <[email protected]>
---
fs/notify/fanotify/fanotify_user.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8f430bfad487..4a8db6b5f690 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -665,6 +665,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
struct file *f = NULL;
int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD;
+ struct file *pidfd_file = NULL;

pr_debug("%s: group=%p event=%p\n", __func__, group, event);

@@ -718,9 +719,11 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
!pid_has_task(event->pid, PIDTYPE_TGID)) {
pidfd = FAN_NOPIDFD;
} else {
- pidfd = pidfd_create(event->pid, 0);
- if (pidfd < 0)
+ pidfd_file = pidfd_file_create(event->pid, 0, &pidfd);
+ if (IS_ERR(pidfd_file)) {
pidfd = FAN_EPIDFD;
+ pidfd_file = NULL;
+ }
}
}

@@ -750,6 +753,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,

if (f)
fd_install(fd, f);
+ if (pidfd_file)
+ fd_install(pidfd, pidfd_file);

return metadata.event_len;

@@ -759,8 +764,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
fput(f);
}

- if (pidfd >= 0)
- close_fd(pidfd);
+ if (pidfd >= 0) {
+ put_unused_fd(pidfd);
+ fput(pidfd_file);
+ }

return ret;
}
--
2.34.1

2023-03-22 15:57:38

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH net-next v2 1/3] scm: add SO_PASSPIDFD and SCM_PIDFD

On Tue, Mar 21, 2023 at 07:33:40PM +0100, Alexander Mikhalitsyn wrote:
> Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
> but it contains pidfd instead of plain pid, which allows programmers not
> to care about PID reuse problem.
>
> Idea comes from UAPI kernel group:
> https://uapi-group.org/kernel-features/
>
> Big thanks to Christian Brauner and Lennart Poettering for productive
> discussions about this.
>
> Cc: "David S. Miller" <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> Cc: Jakub Kicinski <[email protected]>
> Cc: Paolo Abeni <[email protected]>
> Cc: Leon Romanovsky <[email protected]>
> Cc: David Ahern <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Christian Brauner <[email protected]>
> Cc: Kuniyuki Iwashima <[email protected]>
> Cc: Lennart Poettering <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> ---
> v2:
> According to review comments from Kuniyuki Iwashima and Christian Brauner:
> - use pidfd_create(..) retval as a result
> - whitespace change
> ---
> arch/alpha/include/uapi/asm/socket.h | 2 ++
> arch/mips/include/uapi/asm/socket.h | 2 ++
> arch/parisc/include/uapi/asm/socket.h | 2 ++
> arch/sparc/include/uapi/asm/socket.h | 2 ++
> include/linux/net.h | 1 +
> include/linux/socket.h | 1 +
> include/net/scm.h | 14 ++++++++++++--
> include/uapi/asm-generic/socket.h | 2 ++
> net/core/sock.c | 11 +++++++++++
> net/mptcp/sockopt.c | 1 +
> net/unix/af_unix.c | 18 +++++++++++++-----
> tools/include/uapi/asm-generic/socket.h | 2 ++
> 12 files changed, 51 insertions(+), 7 deletions(-)
>
> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> index 739891b94136..ff310613ae64 100644
> --- a/arch/alpha/include/uapi/asm/socket.h
> +++ b/arch/alpha/include/uapi/asm/socket.h
> @@ -137,6 +137,8 @@
>
> #define SO_RCVMARK 75
>
> +#define SO_PASSPIDFD 76
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> index 18f3d95ecfec..762dcb80e4ec 100644
> --- a/arch/mips/include/uapi/asm/socket.h
> +++ b/arch/mips/include/uapi/asm/socket.h
> @@ -148,6 +148,8 @@
>
> #define SO_RCVMARK 75
>
> +#define SO_PASSPIDFD 76
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> index f486d3dfb6bb..df16a3e16d64 100644
> --- a/arch/parisc/include/uapi/asm/socket.h
> +++ b/arch/parisc/include/uapi/asm/socket.h
> @@ -129,6 +129,8 @@
>
> #define SO_RCVMARK 0x4049
>
> +#define SO_PASSPIDFD 0x404A
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> index 2fda57a3ea86..6e2847804fea 100644
> --- a/arch/sparc/include/uapi/asm/socket.h
> +++ b/arch/sparc/include/uapi/asm/socket.h
> @@ -130,6 +130,8 @@
>
> #define SO_RCVMARK 0x0054
>
> +#define SO_PASSPIDFD 0x0055
> +
> #if !defined(__KERNEL__)
>
>
> diff --git a/include/linux/net.h b/include/linux/net.h
> index b73ad8e3c212..c234dfbe7a30 100644
> --- a/include/linux/net.h
> +++ b/include/linux/net.h
> @@ -43,6 +43,7 @@ struct net;
> #define SOCK_PASSSEC 4
> #define SOCK_SUPPORT_ZC 5
> #define SOCK_CUSTOM_SOCKOPT 6
> +#define SOCK_PASSPIDFD 7
>
> #ifndef ARCH_HAS_SOCKET_TYPES
> /**
> diff --git a/include/linux/socket.h b/include/linux/socket.h
> index 13c3a237b9c9..6bf90f251910 100644
> --- a/include/linux/socket.h
> +++ b/include/linux/socket.h
> @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
> #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
> #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
> #define SCM_SECURITY 0x03 /* rw: security label */
> +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */
>
> struct ucred {
> __u32 pid;
> diff --git a/include/net/scm.h b/include/net/scm.h
> index 585adc1346bd..0c717ae9c8db 100644
> --- a/include/net/scm.h
> +++ b/include/net/scm.h
> @@ -124,8 +124,9 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
> struct scm_cookie *scm, int flags)
> {
> if (!msg->msg_control) {
> - if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp ||
> - scm_has_secdata(sock))
> + if (test_bit(SOCK_PASSCRED, &sock->flags) ||
> + test_bit(SOCK_PASSPIDFD, &sock->flags) ||
> + scm->fp || scm_has_secdata(sock))
> msg->msg_flags |= MSG_CTRUNC;
> scm_destroy(scm);
> return;
> @@ -141,6 +142,15 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
> put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds);
> }
>
> + if (test_bit(SOCK_PASSPIDFD, &sock->flags)) {
> + int pidfd;
> +
> + WARN_ON_ONCE(!scm->pid);
> + pidfd = pidfd_create(scm->pid, 0);
> +
> + put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd);

So here we need to also make sure that we can't end up in a situation
where the receiver gets an error message and discards the message but
we've snuck an fd into their fdtable. So callers of scm_recv() should be
in a path where the message can't fail anymore and we're about to return
to userspace.

2023-03-22 16:22:34

by Aleksandr Mikhalitsyn

[permalink] [raw]
Subject: Re: [PATCH net-next v2 2/3] net: core: add getsockopt SO_PEERPIDFD

On Wed, Mar 22, 2023 at 4:35 PM Christian Brauner <[email protected]> wrote:
>
> On Tue, Mar 21, 2023 at 07:33:41PM +0100, Alexander Mikhalitsyn wrote:
> > Add SO_PEERPIDFD which allows to get pidfd of peer socket holder pidfd.
> > This thing is direct analog of SO_PEERCRED which allows to get plain PID.
> >
> > Cc: "David S. Miller" <[email protected]>
> > Cc: Eric Dumazet <[email protected]>
> > Cc: Jakub Kicinski <[email protected]>
> > Cc: Paolo Abeni <[email protected]>
> > Cc: Leon Romanovsky <[email protected]>
> > Cc: David Ahern <[email protected]>
> > Cc: Arnd Bergmann <[email protected]>
> > Cc: Kees Cook <[email protected]>
> > Cc: Christian Brauner <[email protected]>
> > Cc: Kuniyuki Iwashima <[email protected]>
> > Cc: Lennart Poettering <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Cc: [email protected]
> > Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> > ---
> > v2:
> > According to review comments from Kuniyuki Iwashima and Christian Brauner:
> > - use pidfd_create(..) retval as a result
> > - whitespace change
> > ---
> > arch/alpha/include/uapi/asm/socket.h | 1 +
> > arch/mips/include/uapi/asm/socket.h | 1 +
> > arch/parisc/include/uapi/asm/socket.h | 1 +
> > arch/sparc/include/uapi/asm/socket.h | 1 +
> > include/uapi/asm-generic/socket.h | 1 +
> > net/core/sock.c | 21 +++++++++++++++++++++
> > tools/include/uapi/asm-generic/socket.h | 1 +
> > 7 files changed, 27 insertions(+)
> >
> > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> > index ff310613ae64..e94f621903fe 100644
> > --- a/arch/alpha/include/uapi/asm/socket.h
> > +++ b/arch/alpha/include/uapi/asm/socket.h
> > @@ -138,6 +138,7 @@
> > #define SO_RCVMARK 75
> >
> > #define SO_PASSPIDFD 76
> > +#define SO_PEERPIDFD 77
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> > index 762dcb80e4ec..60ebaed28a4c 100644
> > --- a/arch/mips/include/uapi/asm/socket.h
> > +++ b/arch/mips/include/uapi/asm/socket.h
> > @@ -149,6 +149,7 @@
> > #define SO_RCVMARK 75
> >
> > #define SO_PASSPIDFD 76
> > +#define SO_PEERPIDFD 77
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> > index df16a3e16d64..be264c2b1a11 100644
> > --- a/arch/parisc/include/uapi/asm/socket.h
> > +++ b/arch/parisc/include/uapi/asm/socket.h
> > @@ -130,6 +130,7 @@
> > #define SO_RCVMARK 0x4049
> >
> > #define SO_PASSPIDFD 0x404A
> > +#define SO_PEERPIDFD 0x404B
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> > index 6e2847804fea..682da3714686 100644
> > --- a/arch/sparc/include/uapi/asm/socket.h
> > +++ b/arch/sparc/include/uapi/asm/socket.h
> > @@ -131,6 +131,7 @@
> > #define SO_RCVMARK 0x0054
> >
> > #define SO_PASSPIDFD 0x0055
> > +#define SO_PEERPIDFD 0x0056
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
> > index b76169fdb80b..8ce8a39a1e5f 100644
> > --- a/include/uapi/asm-generic/socket.h
> > +++ b/include/uapi/asm-generic/socket.h
> > @@ -133,6 +133,7 @@
> > #define SO_RCVMARK 75
> >
> > #define SO_PASSPIDFD 76
> > +#define SO_PEERPIDFD 77
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/net/core/sock.c b/net/core/sock.c
> > index 3f974246ba3e..85c269ca9d8a 100644
> > --- a/net/core/sock.c
> > +++ b/net/core/sock.c
> > @@ -1763,6 +1763,27 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
> > goto lenout;
> > }
> >
> > + case SO_PEERPIDFD:
> > + {
> > + struct pid *peer_pid;
> > + int pidfd;
> > +
> > + if (len > sizeof(pidfd))
> > + len = sizeof(pidfd);
> > +
> > + spin_lock(&sk->sk_peer_lock);
> > + peer_pid = get_pid(sk->sk_peer_pid);
> > + spin_unlock(&sk->sk_peer_lock);
> > +
> > + pidfd = pidfd_create(peer_pid, 0);
> > +
> > + put_pid(peer_pid);
> > +
> > + if (copy_to_sockptr(optval, &pidfd, len))
> > + return -EFAULT;
>
> This leaks the pidfd. We could do:
>
> if (copy_to_sockptr(optval, &pidfd, len)) {
> close_fd(pidfd);
> return -EFAULT;
> }

Ah, my bad. Thanks for pointing this out!

>
> but it's a nasty anti-pattern to install the fd in the caller's fdtable
> and then close it again. So let's avoid it if we can. Since you can only
> set one socket option per setsockopt() sycall we should be able to
> reserve an fd and pidfd_file, do the stuff that might fail, and then
> call fd_install. So that would roughly be:
>
> peer_pid = get_pid(sk->sk_peer_pid);
> pidfd_file = pidfd_file_create(peer_pid, 0, &pidfd);
> f (copy_to_sockptr(optval, &pidfd, len))
> return -EFAULT;
> goto lenout:
>
> .
> .
> .
>
> lenout:
> if (copy_to_sockptr(optlen, &len, sizeof(int)))
> return -EFAULT;
>
> // Made it safely, install pidfd now.
> fd_install(pidfd, pidfd_file)
>
> (See below for the associated api I'm going to publish independent of
> this as kernel/fork.c and fanotify both could use it.)
>
> But now, let's look at net/socket.c there's another wrinkle. So let's say you
> have successfully installed the pidfd then it seems you can still fail later:
>
> if (level == SOL_SOCKET)
> err = sock_getsockopt(sock, level, optname, optval, optlen);
> else if (unlikely(!sock->ops->getsockopt))
> err = -EOPNOTSUPP;
> else
> err = sock->ops->getsockopt(sock, level, optname, optval,
> optlen);
>
> if (!in_compat_syscall())
> err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
> optval, optlen, max_optlen,
> err);
>
> out_put:
> fput_light(sock->file, fput_needed);
> return err;
>
> If the bpf hook returns an error we've placed an fd into the caller's sockopt
> buffer without their knowledge.

yes, so we need to postpone fd_install to the end of __sys_getsockopt.
I'll think about that.

>
> From 4fee16f0920308bee2531fd3b08484f607eb5830 Mon Sep 17 00:00:00 2001
> From: Christian Brauner <[email protected]>
> Date: Wed, 22 Mar 2023 15:59:02 +0100
> Subject: [PATCH 1/3] [HERE BE DRAGONS - DRAFT - __UNTESTED__] pid: add
> pidfd_file_create()
>
> Reserve and fd and pidfile, do stuff that might fail, install fd when
> point of no return.
>
> [HERE BE DRAGONS - DRAFT - __UNTESTED__] pid: add pidfd_file_create()
>
> Signed-off-by: Christian Brauner <[email protected]>
> ---
> include/linux/pid.h | 1 +
> kernel/pid.c | 45 +++++++++++++++++++++++++++++++++------------
> 2 files changed, 34 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 343abf22092e..c486dbc4d7b6 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -80,6 +80,7 @@ extern struct pid *pidfd_pid(const struct file *file);
> struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
> struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
> int pidfd_create(struct pid *pid, unsigned int flags);
> +struct file *pidfd_file_create(struct pid *pid, unsigned int flags, int *pidfd);
>
> static inline struct pid *get_pid(struct pid *pid)
> {
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 3fbc5e46b721..8d0924f1dbf6 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -576,6 +576,32 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
> return task;
> }
>
> +struct file *pidfd_file_create(struct pid *pid, unsigned int flags, int *pidfd)
> +{
> + int fd;
> + struct file *pidfile;
> +
> + if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
> + return ERR_PTR(-EINVAL);
> +
> + if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
> + return ERR_PTR(-EINVAL);
> +
> + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
> + if (fd < 0)
> + return ERR_PTR(fd);
> +
> + pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
> + flags | O_RDWR | O_CLOEXEC);
> + if (IS_ERR(pidfile)) {
> + put_unused_fd(fd);
> + return pidfile;
> + }
> + get_pid(pid); /* held by pidfile now */
> + *pidfd = fd;
> + return pidfile;
> +}
> +
> /**
> * pidfd_create() - Create a new pid file descriptor.
> *
> @@ -594,20 +620,15 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
> */
> int pidfd_create(struct pid *pid, unsigned int flags)
> {
> - int fd;
> + int pidfd;
> + struct file *pidfile;
>
> - if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
> - return -EINVAL;
> + pidfile = pidfd_file_create(pid, flags, &pidfd);
> + if (IS_ERR(pidfile))
> + return PTR_ERR(pidfile);
>
> - if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
> - return -EINVAL;
> -
> - fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
> - flags | O_RDWR | O_CLOEXEC);
> - if (fd < 0)
> - put_pid(pid);
> -
> - return fd;
> + fd_install(pidfd, pidfile);
> + return pidfd;
> }
>
> /**
> --
> 2.34.1
>
> From c336f1c6cc39faa5aef4fbedd3c4f8eca51d8436 Mon Sep 17 00:00:00 2001
> From: Christian Brauner <[email protected]>
> Date: Wed, 22 Mar 2023 15:59:54 +0100
> Subject: [PATCH 2/3] [HERE BE DRAGONS - DRAFT - __UNTESTED__] fork: use
> pidfd_file_create()
>
> Signed-off-by: Christian Brauner <[email protected]>
> ---
> kernel/fork.c | 11 +----------
> 1 file changed, 1 insertion(+), 10 deletions(-)
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index f68954d05e89..c8dc78ee0a74 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -2296,20 +2296,11 @@ static __latent_entropy struct task_struct *copy_process(
> * if the fd table isn't shared).
> */
> if (clone_flags & CLONE_PIDFD) {
> - retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
> - if (retval < 0)
> - goto bad_fork_free_pid;
> -
> - pidfd = retval;
> -
> - pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
> - O_RDWR | O_CLOEXEC);
> + pidfile = pidfd_file_create(pid, O_RDWR | O_CLOEXEC, &pidfd);
> if (IS_ERR(pidfile)) {
> - put_unused_fd(pidfd);
> retval = PTR_ERR(pidfile);
> goto bad_fork_free_pid;
> }
> - get_pid(pid); /* held by pidfile now */
>
> retval = put_user(pidfd, args->pidfd);
> if (retval)
> --
> 2.34.1
>
> From 0897f68fe06a8777d8ec600fdc719143f76095b1 Mon Sep 17 00:00:00 2001
> From: Christian Brauner <[email protected]>
> Date: Wed, 22 Mar 2023 16:02:50 +0100
> Subject: [PATCH 3/3] [HERE BE DRAGONS - DRAFT - __UNTESTED__] fanotify: use
> pidfd_file_create()
>
> Signed-off-by: Christian Brauner <[email protected]>
> ---
> fs/notify/fanotify/fanotify_user.c | 15 +++++++++++----
> 1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index 8f430bfad487..4a8db6b5f690 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -665,6 +665,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
> unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
> struct file *f = NULL;
> int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD;
> + struct file *pidfd_file = NULL;
>
> pr_debug("%s: group=%p event=%p\n", __func__, group, event);
>
> @@ -718,9 +719,11 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
> !pid_has_task(event->pid, PIDTYPE_TGID)) {
> pidfd = FAN_NOPIDFD;
> } else {
> - pidfd = pidfd_create(event->pid, 0);
> - if (pidfd < 0)
> + pidfd_file = pidfd_file_create(event->pid, 0, &pidfd);
> + if (IS_ERR(pidfd_file)) {
> pidfd = FAN_EPIDFD;
> + pidfd_file = NULL;
> + }
> }
> }
>
> @@ -750,6 +753,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
>
> if (f)
> fd_install(fd, f);
> + if (pidfd_file)
> + fd_install(pidfd, pidfd_file);
>
> return metadata.event_len;
>
> @@ -759,8 +764,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
> fput(f);
> }
>
> - if (pidfd >= 0)
> - close_fd(pidfd);
> + if (pidfd >= 0) {
> + put_unused_fd(pidfd);
> + fput(pidfd_file);
> + }
>
> return ret;
> }
> --
> 2.34.1
>

2023-03-28 15:48:13

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH net-next v2 2/3] net: core: add getsockopt SO_PEERPIDFD

On Wed, Mar 22, 2023 at 04:35:51PM +0100, Christian Brauner wrote:
> On Tue, Mar 21, 2023 at 07:33:41PM +0100, Alexander Mikhalitsyn wrote:
> > Add SO_PEERPIDFD which allows to get pidfd of peer socket holder pidfd.
> > This thing is direct analog of SO_PEERCRED which allows to get plain PID.
> >
> > Cc: "David S. Miller" <[email protected]>
> > Cc: Eric Dumazet <[email protected]>
> > Cc: Jakub Kicinski <[email protected]>
> > Cc: Paolo Abeni <[email protected]>
> > Cc: Leon Romanovsky <[email protected]>
> > Cc: David Ahern <[email protected]>
> > Cc: Arnd Bergmann <[email protected]>
> > Cc: Kees Cook <[email protected]>
> > Cc: Christian Brauner <[email protected]>
> > Cc: Kuniyuki Iwashima <[email protected]>
> > Cc: Lennart Poettering <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Cc: [email protected]
> > Signed-off-by: Alexander Mikhalitsyn <[email protected]>
> > ---
> > v2:
> > According to review comments from Kuniyuki Iwashima and Christian Brauner:
> > - use pidfd_create(..) retval as a result
> > - whitespace change
> > ---
> > arch/alpha/include/uapi/asm/socket.h | 1 +
> > arch/mips/include/uapi/asm/socket.h | 1 +
> > arch/parisc/include/uapi/asm/socket.h | 1 +
> > arch/sparc/include/uapi/asm/socket.h | 1 +
> > include/uapi/asm-generic/socket.h | 1 +
> > net/core/sock.c | 21 +++++++++++++++++++++
> > tools/include/uapi/asm-generic/socket.h | 1 +
> > 7 files changed, 27 insertions(+)
> >
> > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> > index ff310613ae64..e94f621903fe 100644
> > --- a/arch/alpha/include/uapi/asm/socket.h
> > +++ b/arch/alpha/include/uapi/asm/socket.h
> > @@ -138,6 +138,7 @@
> > #define SO_RCVMARK 75
> >
> > #define SO_PASSPIDFD 76
> > +#define SO_PEERPIDFD 77
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> > index 762dcb80e4ec..60ebaed28a4c 100644
> > --- a/arch/mips/include/uapi/asm/socket.h
> > +++ b/arch/mips/include/uapi/asm/socket.h
> > @@ -149,6 +149,7 @@
> > #define SO_RCVMARK 75
> >
> > #define SO_PASSPIDFD 76
> > +#define SO_PEERPIDFD 77
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> > index df16a3e16d64..be264c2b1a11 100644
> > --- a/arch/parisc/include/uapi/asm/socket.h
> > +++ b/arch/parisc/include/uapi/asm/socket.h
> > @@ -130,6 +130,7 @@
> > #define SO_RCVMARK 0x4049
> >
> > #define SO_PASSPIDFD 0x404A
> > +#define SO_PEERPIDFD 0x404B
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> > index 6e2847804fea..682da3714686 100644
> > --- a/arch/sparc/include/uapi/asm/socket.h
> > +++ b/arch/sparc/include/uapi/asm/socket.h
> > @@ -131,6 +131,7 @@
> > #define SO_RCVMARK 0x0054
> >
> > #define SO_PASSPIDFD 0x0055
> > +#define SO_PEERPIDFD 0x0056
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
> > index b76169fdb80b..8ce8a39a1e5f 100644
> > --- a/include/uapi/asm-generic/socket.h
> > +++ b/include/uapi/asm-generic/socket.h
> > @@ -133,6 +133,7 @@
> > #define SO_RCVMARK 75
> >
> > #define SO_PASSPIDFD 76
> > +#define SO_PEERPIDFD 77
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/net/core/sock.c b/net/core/sock.c
> > index 3f974246ba3e..85c269ca9d8a 100644
> > --- a/net/core/sock.c
> > +++ b/net/core/sock.c
> > @@ -1763,6 +1763,27 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
> > goto lenout;
> > }
> >
> > + case SO_PEERPIDFD:
> > + {
> > + struct pid *peer_pid;
> > + int pidfd;
> > +
> > + if (len > sizeof(pidfd))
> > + len = sizeof(pidfd);
> > +
> > + spin_lock(&sk->sk_peer_lock);
> > + peer_pid = get_pid(sk->sk_peer_pid);
> > + spin_unlock(&sk->sk_peer_lock);
> > +
> > + pidfd = pidfd_create(peer_pid, 0);
> > +
> > + put_pid(peer_pid);
> > +
> > + if (copy_to_sockptr(optval, &pidfd, len))
> > + return -EFAULT;
>
> This leaks the pidfd. We could do:
>
> if (copy_to_sockptr(optval, &pidfd, len)) {
> close_fd(pidfd);
> return -EFAULT;
> }
>
> but it's a nasty anti-pattern to install the fd in the caller's fdtable
> and then close it again. So let's avoid it if we can. Since you can only
> set one socket option per setsockopt() sycall we should be able to
> reserve an fd and pidfd_file, do the stuff that might fail, and then
> call fd_install. So that would roughly be:
>
> peer_pid = get_pid(sk->sk_peer_pid);
> pidfd_file = pidfd_file_create(peer_pid, 0, &pidfd);
> f (copy_to_sockptr(optval, &pidfd, len))
> return -EFAULT;
> goto lenout:
>
> .
> .
> .
>
> lenout:
> if (copy_to_sockptr(optlen, &len, sizeof(int)))
> return -EFAULT;
>
> // Made it safely, install pidfd now.
> fd_install(pidfd, pidfd_file)
>
> (See below for the associated api I'm going to publish independent of
> this as kernel/fork.c and fanotify both could use it.)

Sent out yesterday:
https://lore.kernel.org/lkml/20230328090026.b54a4jhccntfraey@quack3