Hello!
This is a bit of thread-merge between [1] and [2]. tl;dr: add a way for
a seccomp user_notif process manager to inject files into the managed
process in order to handle emulation of various fd-returning syscalls
across security boundaries. Containers folks and Chrome are in need
of the feature, and investigating this solution uncovered (and fixed)
implementation issues with existing file sending routines.
I intend to carry this in the seccomp tree, unless someone has objections.
:) Please review and test!
-Kees
[1] https://lore.kernel.org/lkml/[email protected]/
[2] https://lore.kernel.org/lkml/[email protected]/
Kees Cook (9):
net/scm: Regularize compat handling of scm_detach_fds()
fs: Move __scm_install_fd() to __fd_install_received()
fs: Add fd_install_received() wrapper for __fd_install_received()
pidfd: Replace open-coded partial fd_install_received()
fs: Expand __fd_install_received() to accept fd
selftests/seccomp: Make kcmp() less required
selftests/seccomp: Rename user_trap_syscall() to user_notif_syscall()
seccomp: Switch addfd to Extensible Argument ioctl
seccomp: Fix ioctl number for SECCOMP_IOCTL_NOTIF_ID_VALID
Sargun Dhillon (2):
seccomp: Introduce addfd ioctl to seccomp user notifier
selftests/seccomp: Test SECCOMP_IOCTL_NOTIF_ADDFD
fs/file.c | 65 ++++
include/linux/file.h | 16 +
include/uapi/linux/seccomp.h | 25 +-
kernel/pid.c | 11 +-
kernel/seccomp.c | 181 ++++++++-
net/compat.c | 55 ++-
net/core/scm.c | 50 +--
tools/testing/selftests/seccomp/seccomp_bpf.c | 350 +++++++++++++++---
8 files changed, 618 insertions(+), 135 deletions(-)
--
2.25.1
Duplicate the cleanups from commit 2618d530dd8b ("net/scm: cleanup
scm_detach_fds") into the compat code.
Move the check added in commit 1f466e1f15cf ("net: cleanly handle kernel
vs user buffers for ->msg_control") to before the compat call, even
though it should be impossible for an in-kernel call to also be compat.
Correct the int "flags" argument to unsigned int to match fd_install()
and similar APIs.
Regularize any remaining differences, including a whitespace issue,
a checkpatch warning, and add the check from commit 6900317f5eff ("net,
scm: fix PaX detected msg_controllen overflow in scm_detach_fds") which
fixed an overflow unique to 64-bit. To avoid confusion when comparing
the compat handler to the native handler, just include the same check
in the compat handler.
Fixes: 48a87cc26c13 ("net: netprio: fd passed in SCM_RIGHTS datagram not set correctly")
Fixes: d84295067fc7 ("net: net_cls: fd passed in SCM_RIGHTS datagram not set correctly")
Signed-off-by: Kees Cook <[email protected]>
---
include/net/scm.h | 1 +
net/compat.c | 55 +++++++++++++++++++++--------------------------
net/core/scm.c | 18 ++++++++--------
3 files changed, 35 insertions(+), 39 deletions(-)
diff --git a/include/net/scm.h b/include/net/scm.h
index 1ce365f4c256..581a94d6c613 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -37,6 +37,7 @@ struct scm_cookie {
#endif
};
+int __scm_install_fd(struct file *file, int __user *ufd, unsigned int o_flags);
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm);
void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm);
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm);
diff --git a/net/compat.c b/net/compat.c
index 5e3041a2c37d..27d477fdcaa0 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -281,39 +281,31 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
return 0;
}
-void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
+static int scm_max_fds_compat(struct msghdr *msg)
{
- struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
- int fdmax = (kmsg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int);
- int fdnum = scm->fp->count;
- struct file **fp = scm->fp->fp;
- int __user *cmfptr;
- int err = 0, i;
+ if (msg->msg_controllen <= sizeof(struct compat_cmsghdr))
+ return 0;
+ return (msg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int);
+}
- if (fdnum < fdmax)
- fdmax = fdnum;
+void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
+{
+ struct compat_cmsghdr __user *cm =
+ (struct compat_cmsghdr __user *)msg->msg_control;
+ unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
+ int fdmax = min_t(int, scm_max_fds_compat(msg), scm->fp->count);
+ int __user *cmsg_data = CMSG_USER_DATA(cm);
+ int err = 0, i;
- for (i = 0, cmfptr = (int __user *) CMSG_COMPAT_DATA(cm); i < fdmax; i++, cmfptr++) {
- int new_fd;
- err = security_file_receive(fp[i]);
+ for (i = 0; i < fdmax; i++) {
+ err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err)
break;
- err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & kmsg->msg_flags
- ? O_CLOEXEC : 0);
- if (err < 0)
- break;
- new_fd = err;
- err = put_user(new_fd, cmfptr);
- if (err) {
- put_unused_fd(new_fd);
- break;
- }
- /* Bump the usage count and install the file. */
- fd_install(new_fd, get_file(fp[i]));
}
if (i > 0) {
int cmlen = CMSG_COMPAT_LEN(i * sizeof(int));
+
err = put_user(SOL_SOCKET, &cm->cmsg_level);
if (!err)
err = put_user(SCM_RIGHTS, &cm->cmsg_type);
@@ -321,16 +313,19 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
err = put_user(cmlen, &cm->cmsg_len);
if (!err) {
cmlen = CMSG_COMPAT_SPACE(i * sizeof(int));
- kmsg->msg_control += cmlen;
- kmsg->msg_controllen -= cmlen;
+ if (msg->msg_controllen < cmlen)
+ cmlen = msg->msg_controllen;
+ msg->msg_control += cmlen;
+ msg->msg_controllen -= cmlen;
}
}
- if (i < fdnum)
- kmsg->msg_flags |= MSG_CTRUNC;
+
+ if (i < scm->fp->count || (scm->fp->count && fdmax <= 0))
+ msg->msg_flags |= MSG_CTRUNC;
/*
- * All of the files that fit in the message have had their
- * usage counts incremented, so we just free the list.
+ * All of the files that fit in the message have had their usage counts
+ * incremented, so we just free the list.
*/
__scm_destroy(scm);
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 875df1c2989d..6151678c73ed 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -280,7 +280,7 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter
}
EXPORT_SYMBOL(put_cmsg_scm_timestamping);
-static int __scm_install_fd(struct file *file, int __user *ufd, int o_flags)
+int __scm_install_fd(struct file *file, int __user *ufd, unsigned int o_flags)
{
struct socket *sock;
int new_fd;
@@ -319,29 +319,29 @@ static int scm_max_fds(struct msghdr *msg)
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{
- struct cmsghdr __user *cm
- = (__force struct cmsghdr __user*)msg->msg_control;
- int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
+ struct cmsghdr __user *cm =
+ (__force struct cmsghdr __user *)msg->msg_control;
+ unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count);
int __user *cmsg_data = CMSG_USER_DATA(cm);
int err = 0, i;
+ /* no use for FD passing from kernel space callers */
+ if (WARN_ON_ONCE(!msg->msg_control_is_user))
+ return;
+
if (msg->msg_flags & MSG_CMSG_COMPAT) {
scm_detach_fds_compat(msg, scm);
return;
}
- /* no use for FD passing from kernel space callers */
- if (WARN_ON_ONCE(!msg->msg_control_is_user))
- return;
-
for (i = 0; i < fdmax; i++) {
err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err)
break;
}
- if (i > 0) {
+ if (i > 0) {
int cmlen = CMSG_LEN(i * sizeof(int));
err = put_user(SOL_SOCKET, &cm->cmsg_level);
--
2.25.1
This patch is based on discussions[1] with Sargun Dhillon, Christian
Brauner, and David Laight. Instead of building size into the addfd
structure, make it a function of the ioctl command (which is how sizes are
normally passed to ioctls). To support forward and backward compatibility,
just mask out the direction and size, and match everything. The size (and
any future direction) checks are done along with copy_struct_from_user()
logic. Also update the selftests to check size bounds.
[1] https://lore.kernel.org/lkml/[email protected]
Signed-off-by: Kees Cook <[email protected]>
---
include/uapi/linux/seccomp.h | 2 -
kernel/seccomp.c | 21 ++++++----
tools/testing/selftests/seccomp/seccomp_bpf.c | 40 ++++++++++++++++---
3 files changed, 49 insertions(+), 14 deletions(-)
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index c347160378e5..473a61695ac3 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -118,7 +118,6 @@ struct seccomp_notif_resp {
/**
* struct seccomp_notif_addfd
- * @size: The size of the seccomp_notif_addfd structure
* @id: The ID of the seccomp notification
* @flags: SECCOMP_ADDFD_FLAG_*
* @srcfd: The local fd number
@@ -126,7 +125,6 @@ struct seccomp_notif_resp {
* @newfd_flags: The O_* flags the remote FD should have applied
*/
struct seccomp_notif_addfd {
- __u64 size;
__u64 id;
__u32 flags;
__u32 srcfd;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 3c913f3b8451..9660abf91135 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1292,17 +1292,17 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter,
}
static long seccomp_notify_addfd(struct seccomp_filter *filter,
- struct seccomp_notif_addfd __user *uaddfd)
+ struct seccomp_notif_addfd __user *uaddfd,
+ unsigned int size)
{
struct seccomp_notif_addfd addfd;
struct seccomp_knotif *knotif;
struct seccomp_kaddfd kaddfd;
- u64 size;
int ret;
- ret = get_user(size, &uaddfd->size);
- if (ret)
- return ret;
+ /* 24 is original sizeof(struct seccomp_notif_addfd) */
+ if (size < 24 || size >= PAGE_SIZE)
+ return -EINVAL;
ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
if (ret)
@@ -1391,6 +1391,7 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
struct seccomp_filter *filter = file->private_data;
void __user *buf = (void __user *)arg;
+ /* Fixed-size ioctls */
switch (cmd) {
case SECCOMP_IOCTL_NOTIF_RECV:
return seccomp_notify_recv(filter, buf);
@@ -1398,11 +1399,17 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
return seccomp_notify_send(filter, buf);
case SECCOMP_IOCTL_NOTIF_ID_VALID:
return seccomp_notify_id_valid(filter, buf);
- case SECCOMP_IOCTL_NOTIF_ADDFD:
- return seccomp_notify_addfd(filter, buf);
+ }
+
+ /* Extensible Argument ioctls */
+#define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
+ switch (EA_IOCTL(cmd)) {
+ case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
+ return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
default:
return -EINVAL;
}
+#undef EA_IOCTL
}
static __poll_t seccomp_notify_poll(struct file *file,
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 95b134933831..cf1480e498ea 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -216,7 +216,6 @@ struct seccomp_notif_sizes {
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
struct seccomp_notif_addfd {
- __u64 size;
__u64 id;
__u32 flags;
__u32 srcfd;
@@ -225,6 +224,22 @@ struct seccomp_notif_addfd {
};
#endif
+struct seccomp_notif_addfd_small {
+ __u64 id;
+ char weird[4];
+};
+#define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \
+ SECCOMP_IOW(3, struct seccomp_notif_addfd_small)
+
+struct seccomp_notif_addfd_big {
+ union {
+ struct seccomp_notif_addfd addfd;
+ char buf[sizeof(struct seccomp_notif_addfd) + 8];
+ };
+};
+#define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \
+ SECCOMP_IOWR(3, struct seccomp_notif_addfd_big)
+
#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
#define PTRACE_EVENTMSG_SYSCALL_EXIT 2
@@ -3853,6 +3868,8 @@ TEST(user_notification_sendfd)
long ret;
int status, listener, memfd, fd;
struct seccomp_notif_addfd addfd = {};
+ struct seccomp_notif_addfd_small small = {};
+ struct seccomp_notif_addfd_big big = {};
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
/* 100 ms */
@@ -3882,7 +3899,6 @@ TEST(user_notification_sendfd)
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
- addfd.size = sizeof(addfd);
addfd.srcfd = memfd;
addfd.newfd = 0;
addfd.id = req.id;
@@ -3906,6 +3922,16 @@ TEST(user_notification_sendfd)
EXPECT_EQ(errno, EINVAL);
addfd.newfd = 0;
+ /* Verify small size cannot be set */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* Verify we can't send bits filled in unknown buffer area */
+ memset(&big, 0xAA, sizeof(big));
+ big.addfd = addfd;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
+ EXPECT_EQ(errno, E2BIG);
+
/* Verify we can set an arbitrary remote fd */
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
/*
@@ -3921,10 +3947,15 @@ TEST(user_notification_sendfd)
EXPECT_EQ(ret, 0);
}
+ /* Verify we can set an arbitrary remote fd with large size */
+ memset(&big, 0x0, sizeof(big));
+ big.addfd = addfd;
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
+ EXPECT_EQ(fd, 6);
+
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
-
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
EXPECT_EQ(fd, 42);
ret = filecmp(getpid(), pid, memfd, fd);
@@ -3935,10 +3966,10 @@ TEST(user_notification_sendfd)
EXPECT_EQ(ret, 0);
}
+ /* Resume syscall */
resp.id = req.id;
resp.error = 0;
resp.val = USER_NOTIF_MAGIC;
-
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
/*
@@ -4006,7 +4037,6 @@ TEST(user_notification_sendfd_rlimit)
ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
- addfd.size = sizeof(addfd);
addfd.srcfd = memfd;
addfd.newfd_flags = O_CLOEXEC;
addfd.newfd = 0;
--
2.25.1
From: Sargun Dhillon <[email protected]>
This adds a seccomp notifier ioctl which allows for the listener to
"add" file descriptors to a process which originated a seccomp user
notification. This allows calls like mount, and mknod to be "implemented",
as the return value, and the arguments are data in memory. On the other
hand, calls like connect can be "implemented" using pidfd_getfd.
Unfortunately, there are calls which return file descriptors, like
open, which are vulnerable to ToCToU attacks, and require that the more
privileged supervisor can inspect the argument, and perform the syscall
on behalf of the process generating the notification. This allows the
file descriptor generated from that open call to be returned to the
calling process.
In addition, there is functionality to allow for replacement of specific
file descriptors, following dup2-like semantics.
As a note, the seccomp_notif_addfd structure is laid out based on 8-byte
alignment without requiring packing as there have been packing issues
with uapi highlighted before[1][2]. Although we could overload the
newfd field and use -1 to indicate that it is not to be used, doing
so requires changing the size of the fd field, and introduces struct
packing complexity.
[1]: https://lore.kernel.org/lkml/[email protected]/
[2]: https://lore.kernel.org/lkml/[email protected]/
Suggested-by: Matt Denton <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Sargun Dhillon <[email protected]>
Signed-off-by: Kees Cook <[email protected]>
---
include/uapi/linux/seccomp.h | 25 ++++++
kernel/seccomp.c | 165 ++++++++++++++++++++++++++++++++++-
2 files changed, 189 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index c1735455bc53..c347160378e5 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -113,6 +113,27 @@ struct seccomp_notif_resp {
__u32 flags;
};
+/* valid flags for seccomp_notif_addfd */
+#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
+
+/**
+ * struct seccomp_notif_addfd
+ * @size: The size of the seccomp_notif_addfd structure
+ * @id: The ID of the seccomp notification
+ * @flags: SECCOMP_ADDFD_FLAG_*
+ * @srcfd: The local fd number
+ * @newfd: Optional remote FD number if SETFD option is set, otherwise 0.
+ * @newfd_flags: The O_* flags the remote FD should have applied
+ */
+struct seccomp_notif_addfd {
+ __u64 size;
+ __u64 id;
+ __u32 flags;
+ __u32 srcfd;
+ __u32 newfd;
+ __u32 newfd_flags;
+};
+
#define SECCOMP_IOC_MAGIC '!'
#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr)
#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type)
@@ -124,4 +145,8 @@ struct seccomp_notif_resp {
#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
struct seccomp_notif_resp)
#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64)
+/* On success, the return value is the remote process's added fd number */
+#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
+ struct seccomp_notif_addfd)
+
#endif /* _UAPI_LINUX_SECCOMP_H */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 0016cad0e605..3c913f3b8451 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -78,10 +78,42 @@ struct seccomp_knotif {
long val;
u32 flags;
- /* Signals when this has entered SECCOMP_NOTIFY_REPLIED */
+ /*
+ * Signals when this has changed states, such as the listener
+ * dying, a new seccomp addfd message, or changing to REPLIED
+ */
struct completion ready;
struct list_head list;
+
+ /* outstanding addfd requests */
+ struct list_head addfd;
+};
+
+/**
+ * struct seccomp_kaddfd - container for seccomp_addfd ioctl messages
+ *
+ * @file: A reference to the file to install in the other task
+ * @fd: The fd number to install it at. If the fd number is -1, it means the
+ * installing process should allocate the fd as normal.
+ * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
+ * is allowed.
+ * @ret: The return value of the installing process. It is set to the fd num
+ * upon success (>= 0).
+ * @completion: Indicates that the installing process has completed fd
+ * installation, or gone away (either due to successful
+ * reply, or signal)
+ *
+ */
+struct seccomp_kaddfd {
+ struct file *file;
+ int fd;
+ unsigned int flags;
+
+ /* To only be set on reply */
+ int ret;
+ struct completion completion;
+ struct list_head list;
};
/**
@@ -784,6 +816,17 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
return filter->notif->next_id++;
}
+static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
+{
+ /*
+ * Remove the notification, and reset the list pointers, indicating
+ * that it has been handled.
+ */
+ list_del_init(&addfd->list);
+ addfd->ret = fd_replace_received(addfd->fd, addfd->file, addfd->flags);
+ complete(&addfd->completion);
+}
+
static int seccomp_do_user_notification(int this_syscall,
struct seccomp_filter *match,
const struct seccomp_data *sd)
@@ -792,6 +835,7 @@ static int seccomp_do_user_notification(int this_syscall,
u32 flags = 0;
long ret = 0;
struct seccomp_knotif n = {};
+ struct seccomp_kaddfd *addfd, *tmp;
mutex_lock(&match->notify_lock);
err = -ENOSYS;
@@ -804,6 +848,7 @@ static int seccomp_do_user_notification(int this_syscall,
n.id = seccomp_next_notify_id(match);
init_completion(&n.ready);
list_add(&n.list, &match->notif->notifications);
+ INIT_LIST_HEAD(&n.addfd);
up(&match->notif->request);
wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
@@ -812,14 +857,31 @@ static int seccomp_do_user_notification(int this_syscall,
/*
* This is where we wait for a reply from userspace.
*/
+wait:
err = wait_for_completion_interruptible(&n.ready);
mutex_lock(&match->notify_lock);
if (err == 0) {
+ /* Check if we were woken up by a addfd message */
+ addfd = list_first_entry_or_null(&n.addfd,
+ struct seccomp_kaddfd, list);
+ if (addfd && n.state != SECCOMP_NOTIFY_REPLIED) {
+ seccomp_handle_addfd(addfd);
+ mutex_unlock(&match->notify_lock);
+ goto wait;
+ }
ret = n.val;
err = n.error;
flags = n.flags;
}
+ /* If there were any pending addfd calls, clear them out */
+ list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
+ /* The process went away before we got a chance to handle it */
+ addfd->ret = -ESRCH;
+ list_del_init(&addfd->list);
+ complete(&addfd->completion);
+ }
+
/*
* Note that it's possible the listener died in between the time when
* we were notified of a respons (or a signal) and when we were able to
@@ -1060,6 +1122,11 @@ static int seccomp_notify_release(struct inode *inode, struct file *file)
knotif->error = -ENOSYS;
knotif->val = 0;
+ /*
+ * We do not need to wake up any pending addfd messages, as
+ * the notifier will do that for us, as this just looks
+ * like a standard reply.
+ */
complete(&knotif->ready);
}
@@ -1224,6 +1291,100 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter,
return ret;
}
+static long seccomp_notify_addfd(struct seccomp_filter *filter,
+ struct seccomp_notif_addfd __user *uaddfd)
+{
+ struct seccomp_notif_addfd addfd;
+ struct seccomp_knotif *knotif;
+ struct seccomp_kaddfd kaddfd;
+ u64 size;
+ int ret;
+
+ ret = get_user(size, &uaddfd->size);
+ if (ret)
+ return ret;
+
+ ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
+ if (ret)
+ return ret;
+
+ if (addfd.newfd_flags & ~O_CLOEXEC)
+ return -EINVAL;
+
+ if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD)
+ return -EINVAL;
+
+ if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
+ return -EINVAL;
+
+ kaddfd.file = fget(addfd.srcfd);
+ if (!kaddfd.file)
+ return -EBADF;
+
+ kaddfd.flags = addfd.newfd_flags;
+ kaddfd.fd = (addfd.flags & SECCOMP_ADDFD_FLAG_SETFD) ?
+ addfd.newfd : -1;
+ init_completion(&kaddfd.completion);
+
+ ret = mutex_lock_interruptible(&filter->notify_lock);
+ if (ret < 0)
+ goto out;
+
+ knotif = find_notification(filter, addfd.id);
+ if (!knotif) {
+ ret = -ENOENT;
+ goto out_unlock;
+ }
+
+ /*
+ * We do not want to allow for FD injection to occur before the
+ * notification has been picked up by a userspace handler, or after
+ * the notification has been replied to.
+ */
+ if (knotif->state != SECCOMP_NOTIFY_SENT) {
+ ret = -EINPROGRESS;
+ goto out_unlock;
+ }
+
+ list_add(&kaddfd.list, &knotif->addfd);
+ complete(&knotif->ready);
+ mutex_unlock(&filter->notify_lock);
+
+ /* Now we wait for it to be processed or be interrupted */
+ ret = wait_for_completion_interruptible(&kaddfd.completion);
+ if (ret == 0) {
+ /*
+ * We had a successful completion. The other side has already
+ * removed us from the addfd queue, and
+ * wait_for_completion_interruptible has a memory barrier upon
+ * success that lets us read this value directly without
+ * locking.
+ */
+ ret = kaddfd.ret;
+ goto out;
+ }
+
+ mutex_lock(&filter->notify_lock);
+ /*
+ * Even though we were woken up by a signal and not a successful
+ * completion, a completion may have happened in the mean time.
+ *
+ * We need to check again if the addfd request has been handled,
+ * and if not, we will remove it from the queue.
+ */
+ if (list_empty(&kaddfd.list))
+ ret = kaddfd.ret;
+ else
+ list_del(&kaddfd.list);
+
+out_unlock:
+ mutex_unlock(&filter->notify_lock);
+out:
+ fput(kaddfd.file);
+
+ return ret;
+}
+
static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -1237,6 +1398,8 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
return seccomp_notify_send(filter, buf);
case SECCOMP_IOCTL_NOTIF_ID_VALID:
return seccomp_notify_id_valid(filter, buf);
+ case SECCOMP_IOCTL_NOTIF_ADDFD:
+ return seccomp_notify_addfd(filter, buf);
default:
return -EINVAL;
}
--
2.25.1
When SECCOMP_IOCTL_NOTIF_ID_VALID was first introduced it had the wrong
direction flag set. While this isn't a big deal as nothing currently
enforces these bits in the kernel, it should be defined correctly. Fix
the define and provide support for the old command until it is no longer
needed for backward compatibility.
Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
Cc: [email protected]
Signed-off-by: Kees Cook <[email protected]>
---
include/uapi/linux/seccomp.h | 2 +-
kernel/seccomp.c | 9 +++++++++
tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 473a61695ac3..6ba18b82a02e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -142,7 +142,7 @@ struct seccomp_notif_addfd {
#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
struct seccomp_notif_resp)
-#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64)
+#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
/* On success, the return value is the remote process's added fd number */
#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
struct seccomp_notif_addfd)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 9660abf91135..61e556bca338 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -43,6 +43,14 @@
#include <linux/anon_inodes.h>
#include <linux/lockdep.h>
+/*
+ * When SECCOMP_IOCTL_NOTIF_ID_VALID was first introduced, it had the
+ * wrong direction flag in the ioctl number. This is the broken one,
+ * which the kernel needs to keep supporting until all userspaces stop
+ * using the wrong command number.
+ */
+#define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64)
+
enum notify_state {
SECCOMP_NOTIFY_INIT,
SECCOMP_NOTIFY_SENT,
@@ -1397,6 +1405,7 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
return seccomp_notify_recv(filter, buf);
case SECCOMP_IOCTL_NOTIF_SEND:
return seccomp_notify_send(filter, buf);
+ case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
case SECCOMP_IOCTL_NOTIF_ID_VALID:
return seccomp_notify_id_valid(filter, buf);
}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index cf1480e498ea..403c6d0f149e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -184,7 +184,7 @@ struct seccomp_metadata {
#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
struct seccomp_notif_resp)
-#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64)
+#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
struct seccomp_notif {
__u64 id;
--
2.25.1
The seccomp tests are a bit noisy without CONFIG_CHECKPOINT_RESTORE (due
to missing the kcmp() syscall). The seccomp tests are more accurate with
kcmp(), but it's not strictly required. Refactor the tests to use
alternatives (comparing fd numbers), and provide a central test for
kcmp() so there is a single XFAIL instead of many. Continue to produce
warnings for the other tests, though.
Additionally adds some more bad flag EINVAL tests to the addfd selftest.
Signed-off-by: Kees Cook <[email protected]>
---
tools/testing/selftests/seccomp/seccomp_bpf.c | 100 +++++++++++-------
1 file changed, 64 insertions(+), 36 deletions(-)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index c4e264b37c30..40ed846744e4 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -258,6 +258,27 @@ int seccomp(unsigned int op, unsigned int flags, void *args)
#define SIBLING_EXIT_FAILURE 0xbadface
#define SIBLING_EXIT_NEWPRIVS 0xbadfeed
+static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
+{
+#ifdef __NR_kcmp
+ errno = 0;
+ return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+TEST(kcmp)
+{
+ int ret;
+
+ ret = filecmp(getpid(), getpid(), 1, 1);
+ EXPECT_EQ(ret, 0);
+ if (ret != 0 && errno == ENOSYS)
+ XFAIL(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
+}
+
TEST(mode_strict_support)
{
long ret;
@@ -3606,16 +3627,6 @@ TEST(seccomp_get_notif_sizes)
EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
}
-static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
-{
-#ifdef __NR_kcmp
- return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
-#else
- errno = ENOSYS;
- return -1;
-#endif
-}
-
TEST(user_notification_continue)
{
pid_t pid;
@@ -3640,20 +3651,20 @@ TEST(user_notification_continue)
int dup_fd, pipe_fds[2];
pid_t self;
- ret = pipe(pipe_fds);
- if (ret < 0)
- exit(1);
+ ASSERT_GE(pipe(pipe_fds), 0);
dup_fd = dup(pipe_fds[0]);
- if (dup_fd < 0)
- exit(1);
+ ASSERT_GE(dup_fd, 0);
+ EXPECT_NE(pipe_fds[0], dup_fd);
self = getpid();
-
ret = filecmp(self, self, pipe_fds[0], dup_fd);
- if (ret)
- exit(2);
-
+ if (ret != 0) {
+ if (ret < 0 && errno == ENOSYS) {
+ TH_LOG("kcmp() syscall missing (test is less accurate)");
+ } else
+ ASSERT_EQ(ret, 0);
+ }
exit(0);
}
@@ -3700,12 +3711,7 @@ TEST(user_notification_continue)
skip:
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
- EXPECT_EQ(0, WEXITSTATUS(status)) {
- if (WEXITSTATUS(status) == 2) {
- XFAIL(return, "Kernel does not support kcmp() syscall");
- return;
- }
- }
+ EXPECT_EQ(0, WEXITSTATUS(status));
}
TEST(user_notification_filter_empty)
@@ -3847,7 +3853,7 @@ TEST(user_notification_sendfd)
{
pid_t pid;
long ret;
- int status, listener, memfd;
+ int status, listener, memfd, fd;
struct seccomp_notif_addfd addfd = {};
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
@@ -3880,34 +3886,56 @@ TEST(user_notification_sendfd)
addfd.size = sizeof(addfd);
addfd.srcfd = memfd;
- addfd.newfd_flags = O_CLOEXEC;
addfd.newfd = 0;
addfd.id = req.id;
- addfd.flags = 0xff;
+ addfd.flags = 0;
+
+ /* Verify bad newfd_flags cannot be set */
+ addfd.newfd_flags = ~O_CLOEXEC;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EINVAL);
+ addfd.newfd_flags = O_CLOEXEC;
/* Verify bad flags cannot be set */
+ addfd.flags = 0xff;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EINVAL);
+ addfd.flags = 0;
/* Verify that remote_fd cannot be set without setting flags */
- addfd.flags = 0;
addfd.newfd = 1;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EINVAL);
-
- /* Verify we can set an arbitrary remote fd */
addfd.newfd = 0;
- ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
- EXPECT_GE(ret, 0);
- EXPECT_EQ(filecmp(getpid(), pid, memfd, ret), 0);
+ /* Verify we can set an arbitrary remote fd */
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ /*
+ * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd),
+ * 4(listener), so the newly allocated fd should be 5.
+ */
+ EXPECT_EQ(fd, 5);
+ ret = filecmp(getpid(), pid, memfd, fd);
+ if (ret != 0) {
+ if (ret < 0 && errno == ENOSYS) {
+ TH_LOG("kcmp() syscall missing (test is less accurate)");
+ } else
+ EXPECT_EQ(ret, 0);
+ }
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
- EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 42);
- EXPECT_EQ(filecmp(getpid(), pid, memfd, 42), 0);
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ EXPECT_EQ(fd, 42);
+ ret = filecmp(getpid(), pid, memfd, fd);
+ if (ret != 0) {
+ if (ret < 0 && errno == ENOSYS) {
+ TH_LOG("kcmp() syscall missing (test is less accurate)");
+ } else
+ EXPECT_EQ(ret, 0);
+ }
resp.id = req.id;
resp.error = 0;
--
2.25.1
The user_trap_syscall() helper creates a filter with
SECCOMP_RET_USER_NOTIF. To avoid confusion with SECCOMP_RET_TRAP, rename
the helper to user_notif_syscall().
Additionally fix a redundant "return" after XFAIL.
Signed-off-by: Kees Cook <[email protected]>
---
tools/testing/selftests/seccomp/seccomp_bpf.c | 60 +++++++++----------
1 file changed, 29 insertions(+), 31 deletions(-)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 40ed846744e4..95b134933831 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -3110,10 +3110,8 @@ TEST(get_metadata)
long ret;
/* Only real root can get metadata. */
- if (geteuid()) {
- XFAIL(return, "get_metadata requires real root");
- return;
- }
+ if (geteuid())
+ XFAIL(return, "get_metadata test requires real root");
ASSERT_EQ(0, pipe(pipefd));
@@ -3170,7 +3168,7 @@ TEST(get_metadata)
ASSERT_EQ(0, kill(pid, SIGKILL));
}
-static int user_trap_syscall(int nr, unsigned int flags)
+static int user_notif_syscall(int nr, unsigned int flags)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
@@ -3216,7 +3214,7 @@ TEST(user_notification_basic)
/* Check that we get -ENOSYS with no listener attached */
if (pid == 0) {
- if (user_trap_syscall(__NR_getppid, 0) < 0)
+ if (user_notif_syscall(__NR_getppid, 0) < 0)
exit(1);
ret = syscall(__NR_getppid);
exit(ret >= 0 || errno != ENOSYS);
@@ -3233,13 +3231,13 @@ TEST(user_notification_basic)
EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
/* Check that the basic notification machinery works */
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/* Installing a second listener in the chain should EBUSY */
- EXPECT_EQ(user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER),
+ EXPECT_EQ(user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER),
-1);
EXPECT_EQ(errno, EBUSY);
@@ -3303,12 +3301,12 @@ TEST(user_notification_with_tsync)
/* these were exclusive */
flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
SECCOMP_FILTER_FLAG_TSYNC;
- ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
+ ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags));
ASSERT_EQ(EINVAL, errno);
/* but now they're not */
flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
- ret = user_trap_syscall(__NR_getppid, flags);
+ ret = user_notif_syscall(__NR_getppid, flags);
close(ret);
ASSERT_LE(0, ret);
}
@@ -3326,8 +3324,8 @@ TEST(user_notification_kill_in_middle)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/*
@@ -3380,8 +3378,8 @@ TEST(user_notification_signal)
ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
- listener = user_trap_syscall(__NR_gettid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_gettid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3450,8 +3448,8 @@ TEST(user_notification_closed_listener)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/*
@@ -3484,8 +3482,8 @@ TEST(user_notification_child_pid_ns)
ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3524,8 +3522,8 @@ TEST(user_notification_sibling_pid_ns)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3589,8 +3587,8 @@ TEST(user_notification_fault_recv)
ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3641,7 +3639,7 @@ TEST(user_notification_continue)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3736,7 +3734,7 @@ TEST(user_notification_filter_empty)
if (pid == 0) {
int listener;
- listener = user_trap_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER);
if (listener < 0)
_exit(EXIT_FAILURE);
@@ -3792,7 +3790,7 @@ TEST(user_notification_filter_empty_threaded)
int listener, status;
pthread_t thread;
- listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
if (listener < 0)
_exit(EXIT_FAILURE);
@@ -3869,8 +3867,8 @@ TEST(user_notification_sendfd)
}
/* Check that the basic notification machinery works */
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3993,8 +3991,8 @@ TEST(user_notification_sendfd_rlimit)
}
/* Check that the basic notification machinery works */
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
--
2.25.1
For both pidfd and seccomp, the __user pointer is not used. Update
__fd_install_received() to make writing to ufd optional. (ufd
itself cannot checked for NULL because this changes the SCM_RIGHTS
interface behavior.) In these cases, the new fd needs to be returned
on success. Update the existing callers to handle it. Add new wrapper
fd_install_received() for pidfd and seccomp that does not use the ufd
argument.
Signed-off-by: Kees Cook <[email protected]>
---
fs/file.c | 26 +++++++++++++++++---------
include/linux/file.h | 10 +++++++---
net/compat.c | 2 +-
net/core/scm.c | 2 +-
4 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/fs/file.c b/fs/file.c
index fcfddae0d252..14a8ef74efb2 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -944,11 +944,14 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
* @o_flags: the O_* flags to apply to the new fd entry
*
* Installs a received file into the file descriptor table, with appropriate
- * checks and count updates. Optionally writes the fd number to userspace.
+ * checks and count updates. Optionally writes the fd number to userspace, if
+ * @ufd_required is true (@ufd cannot just be tested for NULL because NULL may
+ * actually get passed into SCM_RIGHTS).
*
- * Returns -ve on error.
+ * Returns newly install fd or -ve on error.
*/
-int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_flags)
+int __fd_install_received(struct file *file, bool ufd_required, int __user *ufd,
+ unsigned int o_flags)
{
struct socket *sock;
int new_fd;
@@ -962,20 +965,25 @@ int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_fla
if (new_fd < 0)
return new_fd;
- error = put_user(new_fd, ufd);
- if (error) {
- put_unused_fd(new_fd);
- return error;
+ if (ufd_required) {
+ error = put_user(new_fd, ufd);
+ if (error) {
+ put_unused_fd(new_fd);
+ return error;
+ }
}
- /* Bump the usage count and install the file. */
+ /* Bump the usage count and install the file. The resulting value of
+ * "error" is ignored here since we only need to take action when
+ * the file is a socket and testing "sock" for NULL is sufficient.
+ */
sock = sock_from_file(file, &error);
if (sock) {
sock_update_netprioidx(&sock->sk->sk_cgrp_data);
sock_update_classid(&sock->sk->sk_cgrp_data);
}
fd_install(new_fd, get_file(file));
- return 0;
+ return new_fd;
}
static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
diff --git a/include/linux/file.h b/include/linux/file.h
index fe18a1a0d555..999a2c56db07 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -91,12 +91,16 @@ extern void put_unused_fd(unsigned int fd);
extern void fd_install(unsigned int fd, struct file *file);
-extern int __fd_install_received(struct file *file, int __user *ufd,
- unsigned int o_flags);
+extern int __fd_install_received(struct file *file, bool ufd_required,
+ int __user *ufd, unsigned int o_flags);
static inline int fd_install_received_user(struct file *file, int __user *ufd,
unsigned int o_flags)
{
- return __fd_install_received(file, ufd, o_flags);
+ return __fd_install_received(file, true, ufd, o_flags);
+}
+static inline int fd_install_received(struct file *file, unsigned int o_flags)
+{
+ return __fd_install_received(file, false, NULL, o_flags);
}
extern void flush_delayed_fput(void);
diff --git a/net/compat.c b/net/compat.c
index 94f288e8dac5..71494337cca7 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -299,7 +299,7 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
for (i = 0; i < fdmax; i++) {
err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags);
- if (err)
+ if (err < 0)
break;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index df190f1fdd28..b9a0442ebd26 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -307,7 +307,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
for (i = 0; i < fdmax; i++) {
err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags);
- if (err)
+ if (err < 0)
break;
}
--
2.25.1
From: Sargun Dhillon <[email protected]>
Test whether we can add file descriptors in response to notifications.
This injects the file descriptors via notifications, and then uses
kcmp to determine whether or not it has been successful.
It also includes some basic sanity checking for arguments.
Signed-off-by: Sargun Dhillon <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Kees Cook <[email protected]>
---
tools/testing/selftests/seccomp/seccomp_bpf.c | 188 ++++++++++++++++++
1 file changed, 188 insertions(+)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 402ccb3a4e52..c4e264b37c30 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -45,6 +45,7 @@
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <linux/kcmp.h>
+#include <sys/resource.h>
#include <unistd.h>
#include <sys/syscall.h>
@@ -168,7 +169,9 @@ struct seccomp_metadata {
#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
+#endif
+#ifndef SECCOMP_RET_USER_NOTIF
#define SECCOMP_RET_USER_NOTIF 0x7fc00000U
#define SECCOMP_IOC_MAGIC '!'
@@ -204,6 +207,24 @@ struct seccomp_notif_sizes {
};
#endif
+#ifndef SECCOMP_IOCTL_NOTIF_ADDFD
+/* On success, the return value is the remote process's added fd number */
+#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
+ struct seccomp_notif_addfd)
+
+/* valid flags for seccomp_notif_addfd */
+#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
+
+struct seccomp_notif_addfd {
+ __u64 size;
+ __u64 id;
+ __u32 flags;
+ __u32 srcfd;
+ __u32 newfd;
+ __u32 newfd_flags;
+};
+#endif
+
#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
#define PTRACE_EVENTMSG_SYSCALL_EXIT 2
@@ -3822,6 +3843,173 @@ TEST(user_notification_filter_empty_threaded)
EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
}
+TEST(user_notification_sendfd)
+{
+ pid_t pid;
+ long ret;
+ int status, listener, memfd;
+ struct seccomp_notif_addfd addfd = {};
+ struct seccomp_notif req = {};
+ struct seccomp_notif_resp resp = {};
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ memfd = memfd_create("test", 0);
+ ASSERT_GE(memfd, 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Check that the basic notification machinery works */
+ listener = user_trap_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
+ exit(1);
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
+ }
+
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ addfd.size = sizeof(addfd);
+ addfd.srcfd = memfd;
+ addfd.newfd_flags = O_CLOEXEC;
+ addfd.newfd = 0;
+ addfd.id = req.id;
+ addfd.flags = 0xff;
+
+ /* Verify bad flags cannot be set */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* Verify that remote_fd cannot be set without setting flags */
+ addfd.flags = 0;
+ addfd.newfd = 1;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* Verify we can set an arbitrary remote fd */
+ addfd.newfd = 0;
+
+ ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ EXPECT_GE(ret, 0);
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, ret), 0);
+
+ /* Verify we can set a specific remote fd */
+ addfd.newfd = 42;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
+
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 42);
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, 42), 0);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ /*
+ * This sets the ID of the ADD FD to the last request plus 1. The
+ * notification ID increments 1 per notification.
+ */
+ addfd.id = req.id + 1;
+
+ /* This spins until the underlying notification is generated */
+ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
+ errno != -EINPROGRESS)
+ nanosleep(&delay, NULL);
+
+ memset(&req, 0, sizeof(req));
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ ASSERT_EQ(addfd.id, req.id);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ close(memfd);
+}
+
+TEST(user_notification_sendfd_rlimit)
+{
+ pid_t pid;
+ long ret;
+ int status, listener, memfd;
+ struct seccomp_notif_addfd addfd = {};
+ struct seccomp_notif req = {};
+ struct seccomp_notif_resp resp = {};
+ const struct rlimit lim = {
+ .rlim_cur = 0,
+ .rlim_max = 0,
+ };
+
+ memfd = memfd_create("test", 0);
+ ASSERT_GE(memfd, 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Check that the basic notification machinery works */
+ listener = user_trap_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0)
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
+
+
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
+
+ addfd.size = sizeof(addfd);
+ addfd.srcfd = memfd;
+ addfd.newfd_flags = O_CLOEXEC;
+ addfd.newfd = 0;
+ addfd.id = req.id;
+ addfd.flags = 0;
+
+ /* Should probably spot check /proc/sys/fs/file-nr */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EMFILE);
+
+ addfd.newfd = 100;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EBADF);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ close(memfd);
+}
+
/*
* TODO:
* - expand NNP testing
--
2.25.1
Expand __fd_install_received() with support for replace_fd() for the
coming seccomp "addfd" ioctl(). Add new wrapper fd_replace_received()
for the new mode and update existing wrappers to retain old mode.
Signed-off-by: Kees Cook <[email protected]>
---
fs/file.c | 22 ++++++++++++++++------
include/linux/file.h | 10 +++++++---
2 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/fs/file.c b/fs/file.c
index 14a8ef74efb2..b583e7c60571 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -950,8 +950,8 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
*
* Returns newly install fd or -ve on error.
*/
-int __fd_install_received(struct file *file, bool ufd_required, int __user *ufd,
- unsigned int o_flags)
+int __fd_install_received(int fd, struct file *file, bool ufd_required,
+ int __user *ufd, unsigned int o_flags)
{
struct socket *sock;
int new_fd;
@@ -961,9 +961,11 @@ int __fd_install_received(struct file *file, bool ufd_required, int __user *ufd,
if (error)
return error;
- new_fd = get_unused_fd_flags(o_flags);
- if (new_fd < 0)
- return new_fd;
+ if (fd < 0) {
+ new_fd = get_unused_fd_flags(o_flags);
+ if (new_fd < 0)
+ return new_fd;
+ }
if (ufd_required) {
error = put_user(new_fd, ufd);
@@ -973,6 +975,15 @@ int __fd_install_received(struct file *file, bool ufd_required, int __user *ufd,
}
}
+ if (fd < 0)
+ fd_install(new_fd, get_file(file));
+ else {
+ new_fd = fd;
+ error = replace_fd(new_fd, file, o_flags);
+ if (error)
+ return error;
+ }
+
/* Bump the usage count and install the file. The resulting value of
* "error" is ignored here since we only need to take action when
* the file is a socket and testing "sock" for NULL is sufficient.
@@ -982,7 +993,6 @@ int __fd_install_received(struct file *file, bool ufd_required, int __user *ufd,
sock_update_netprioidx(&sock->sk->sk_cgrp_data);
sock_update_classid(&sock->sk->sk_cgrp_data);
}
- fd_install(new_fd, get_file(file));
return new_fd;
}
diff --git a/include/linux/file.h b/include/linux/file.h
index 999a2c56db07..f1d16e24a12e 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -91,16 +91,20 @@ extern void put_unused_fd(unsigned int fd);
extern void fd_install(unsigned int fd, struct file *file);
-extern int __fd_install_received(struct file *file, bool ufd_required,
+extern int __fd_install_received(int fd, struct file *file, bool ufd_required,
int __user *ufd, unsigned int o_flags);
static inline int fd_install_received_user(struct file *file, int __user *ufd,
unsigned int o_flags)
{
- return __fd_install_received(file, true, ufd, o_flags);
+ return __fd_install_received(-1, file, true, ufd, o_flags);
}
static inline int fd_install_received(struct file *file, unsigned int o_flags)
{
- return __fd_install_received(file, false, NULL, o_flags);
+ return __fd_install_received(-1, file, false, NULL, o_flags);
+}
+static inline int fd_replace_received(int fd, struct file *file, unsigned int o_flags)
+{
+ return __fd_install_received(fd, file, false, NULL, o_flags);
}
extern void flush_delayed_fput(void);
--
2.25.1
On Mon, Jun 15, 2020 at 08:25:23PM -0700, Kees Cook wrote:
> This patch is based on discussions[1] with Sargun Dhillon, Christian
> Brauner, and David Laight. Instead of building size into the addfd
> structure, make it a function of the ioctl command (which is how sizes are
> normally passed to ioctls). To support forward and backward compatibility,
> just mask out the direction and size, and match everything. The size (and
> any future direction) checks are done along with copy_struct_from_user()
> logic. Also update the selftests to check size bounds.
>
> [1] https://lore.kernel.org/lkml/[email protected]
>
> Signed-off-by: Kees Cook <[email protected]>
> ---
> include/uapi/linux/seccomp.h | 2 -
> kernel/seccomp.c | 21 ++++++----
> tools/testing/selftests/seccomp/seccomp_bpf.c | 40 ++++++++++++++++---
> 3 files changed, 49 insertions(+), 14 deletions(-)
>
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index c347160378e5..473a61695ac3 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -118,7 +118,6 @@ struct seccomp_notif_resp {
>
> /**
> * struct seccomp_notif_addfd
> - * @size: The size of the seccomp_notif_addfd structure
> * @id: The ID of the seccomp notification
> * @flags: SECCOMP_ADDFD_FLAG_*
> * @srcfd: The local fd number
> @@ -126,7 +125,6 @@ struct seccomp_notif_resp {
> * @newfd_flags: The O_* flags the remote FD should have applied
> */
> struct seccomp_notif_addfd {
> - __u64 size;
Huh? Won't this break builds?
Tycho
On Mon, Jun 15, 2020 at 08:25:21PM -0700, Kees Cook wrote:
> The seccomp tests are a bit noisy without CONFIG_CHECKPOINT_RESTORE (due
> to missing the kcmp() syscall). The seccomp tests are more accurate with
> kcmp(), but it's not strictly required. Refactor the tests to use
> alternatives (comparing fd numbers), and provide a central test for
> kcmp() so there is a single XFAIL instead of many. Continue to produce
> warnings for the other tests, though.
>
> Additionally adds some more bad flag EINVAL tests to the addfd selftest.
>
> Signed-off-by: Kees Cook <[email protected]>
This looks fine, but I wonder if this is enough motivation for taking
kcmp() out of CONFIG_CHECKPOINT_RESTORE guards?
Tycho
On Tue, Jun 16, 2020 at 08:57:25AM -0600, Tycho Andersen wrote:
> On Mon, Jun 15, 2020 at 08:25:21PM -0700, Kees Cook wrote:
> > The seccomp tests are a bit noisy without CONFIG_CHECKPOINT_RESTORE (due
> > to missing the kcmp() syscall). The seccomp tests are more accurate with
> > kcmp(), but it's not strictly required. Refactor the tests to use
> > alternatives (comparing fd numbers), and provide a central test for
> > kcmp() so there is a single XFAIL instead of many. Continue to produce
> > warnings for the other tests, though.
> >
> > Additionally adds some more bad flag EINVAL tests to the addfd selftest.
> >
> > Signed-off-by: Kees Cook <[email protected]>
>
> This looks fine, but I wonder if this is enough motivation for taking
> kcmp() out of CONFIG_CHECKPOINT_RESTORE guards?
Do you mean in the kernel? I'd rather not -- it's a relatively powerful
primitive. Maybe if there were other users needing it, but there doesn't
seem to have been much demand.
--
Kees Cook
On Tue, Jun 16, 2020 at 08:55:46AM -0600, Tycho Andersen wrote:
> On Mon, Jun 15, 2020 at 08:25:23PM -0700, Kees Cook wrote:
> > This patch is based on discussions[1] with Sargun Dhillon, Christian
> > Brauner, and David Laight. Instead of building size into the addfd
> > structure, make it a function of the ioctl command (which is how sizes are
> > normally passed to ioctls). To support forward and backward compatibility,
> > just mask out the direction and size, and match everything. The size (and
> > any future direction) checks are done along with copy_struct_from_user()
> > logic. Also update the selftests to check size bounds.
> >
> > [1] https://lore.kernel.org/lkml/[email protected]
> >
> > Signed-off-by: Kees Cook <[email protected]>
> > ---
> > include/uapi/linux/seccomp.h | 2 -
> > kernel/seccomp.c | 21 ++++++----
> > tools/testing/selftests/seccomp/seccomp_bpf.c | 40 ++++++++++++++++---
> > 3 files changed, 49 insertions(+), 14 deletions(-)
> >
> > diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> > index c347160378e5..473a61695ac3 100644
> > --- a/include/uapi/linux/seccomp.h
> > +++ b/include/uapi/linux/seccomp.h
> > @@ -118,7 +118,6 @@ struct seccomp_notif_resp {
> >
> > /**
> > * struct seccomp_notif_addfd
> > - * @size: The size of the seccomp_notif_addfd structure
> > * @id: The ID of the seccomp notification
> > * @flags: SECCOMP_ADDFD_FLAG_*
> > * @srcfd: The local fd number
> > @@ -126,7 +125,6 @@ struct seccomp_notif_resp {
> > * @newfd_flags: The O_* flags the remote FD should have applied
> > */
> > struct seccomp_notif_addfd {
> > - __u64 size;
>
> Huh? Won't this break builds?
Only if they use addfd without this patch? :) Are you saying I should
collapse this patch into the main addfd and test patches?
--
Kees Cook
On Tue, Jun 16, 2020 at 09:05:29AM -0700, Kees Cook wrote:
> On Tue, Jun 16, 2020 at 08:55:46AM -0600, Tycho Andersen wrote:
> > On Mon, Jun 15, 2020 at 08:25:23PM -0700, Kees Cook wrote:
> > > This patch is based on discussions[1] with Sargun Dhillon, Christian
> > > Brauner, and David Laight. Instead of building size into the addfd
> > > structure, make it a function of the ioctl command (which is how sizes are
> > > normally passed to ioctls). To support forward and backward compatibility,
> > > just mask out the direction and size, and match everything. The size (and
> > > any future direction) checks are done along with copy_struct_from_user()
> > > logic. Also update the selftests to check size bounds.
> > >
> > > [1] https://lore.kernel.org/lkml/[email protected]
> > >
> > > Signed-off-by: Kees Cook <[email protected]>
> > > ---
> > > include/uapi/linux/seccomp.h | 2 -
> > > kernel/seccomp.c | 21 ++++++----
> > > tools/testing/selftests/seccomp/seccomp_bpf.c | 40 ++++++++++++++++---
> > > 3 files changed, 49 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> > > index c347160378e5..473a61695ac3 100644
> > > --- a/include/uapi/linux/seccomp.h
> > > +++ b/include/uapi/linux/seccomp.h
> > > @@ -118,7 +118,6 @@ struct seccomp_notif_resp {
> > >
> > > /**
> > > * struct seccomp_notif_addfd
> > > - * @size: The size of the seccomp_notif_addfd structure
> > > * @id: The ID of the seccomp notification
> > > * @flags: SECCOMP_ADDFD_FLAG_*
> > > * @srcfd: The local fd number
> > > @@ -126,7 +125,6 @@ struct seccomp_notif_resp {
> > > * @newfd_flags: The O_* flags the remote FD should have applied
> > > */
> > > struct seccomp_notif_addfd {
> > > - __u64 size;
> >
> > Huh? Won't this break builds?
>
> Only if they use addfd without this patch? :) Are you saying I should
> collapse this patch into the main addfd and test patches?
Oh, derp, I see :) Yeah, maybe that would be good.
Tycho
On Mon, Jun 15, 2020 at 08:25:22PM -0700, Kees Cook wrote:
> The user_trap_syscall() helper creates a filter with
> SECCOMP_RET_USER_NOTIF. To avoid confusion with SECCOMP_RET_TRAP, rename
> the helper to user_notif_syscall().
>
> Additionally fix a redundant "return" after XFAIL.
>
> Signed-off-by: Kees Cook <[email protected]>
Reviewed-by: Tycho Andersen <[email protected]>
From: Kees Cook
> Sent: 16 June 2020 04:25
>
> For both pidfd and seccomp, the __user pointer is not used. Update
> __fd_install_received() to make writing to ufd optional. (ufd
> itself cannot checked for NULL because this changes the SCM_RIGHTS
> interface behavior.) In these cases, the new fd needs to be returned
> on success. Update the existing callers to handle it. Add new wrapper
> fd_install_received() for pidfd and seccomp that does not use the ufd
> argument.
...>
> static inline int fd_install_received_user(struct file *file, int __user *ufd,
> unsigned int o_flags)
> {
> - return __fd_install_received(file, ufd, o_flags);
> + return __fd_install_received(file, true, ufd, o_flags);
> +}
Can you get rid of the 'return user' parameter by adding
if (!ufd) return -EFAULT;
to the above wrapper, then checking for NULL in the function?
Or does that do the wrong horrid things in the fail path?
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On Wed, Jun 17, 2020 at 03:35:20PM +0000, David Laight wrote:
> From: Kees Cook
> > Sent: 16 June 2020 04:25
> >
> > For both pidfd and seccomp, the __user pointer is not used. Update
> > __fd_install_received() to make writing to ufd optional. (ufd
> > itself cannot checked for NULL because this changes the SCM_RIGHTS
> > interface behavior.) In these cases, the new fd needs to be returned
> > on success. Update the existing callers to handle it. Add new wrapper
> > fd_install_received() for pidfd and seccomp that does not use the ufd
> > argument.
> ...>
> > static inline int fd_install_received_user(struct file *file, int __user *ufd,
> > unsigned int o_flags)
> > {
> > - return __fd_install_received(file, ufd, o_flags);
> > + return __fd_install_received(file, true, ufd, o_flags);
> > +}
>
> Can you get rid of the 'return user' parameter by adding
> if (!ufd) return -EFAULT;
> to the above wrapper, then checking for NULL in the function?
>
> Or does that do the wrong horrid things in the fail path?
Oh, hm. No, that shouldn't break the failure path, since everything gets
unwound in __fd_install_received if the ufd write fails.
Effectively this (I'll chop it up into the correct patches):
diff --git a/fs/file.c b/fs/file.c
index b583e7c60571..3b80324a31cc 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -939,18 +939,16 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
*
* @fd: fd to install into (if negative, a new fd will be allocated)
* @file: struct file that was received from another process
- * @ufd_required: true to use @ufd for writing fd number to userspace
* @ufd: __user pointer to write new fd number to
* @o_flags: the O_* flags to apply to the new fd entry
*
* Installs a received file into the file descriptor table, with appropriate
* checks and count updates. Optionally writes the fd number to userspace, if
- * @ufd_required is true (@ufd cannot just be tested for NULL because NULL may
- * actually get passed into SCM_RIGHTS).
+ * @ufd is non-NULL.
*
* Returns newly install fd or -ve on error.
*/
-int __fd_install_received(int fd, struct file *file, bool ufd_required,
+int __fd_install_received(int fd, struct file *file,
int __user *ufd, unsigned int o_flags)
{
struct socket *sock;
@@ -967,7 +965,7 @@ int __fd_install_received(int fd, struct file *file, bool ufd_required,
return new_fd;
}
- if (ufd_required) {
+ if (ufd) {
error = put_user(new_fd, ufd);
if (error) {
put_unused_fd(new_fd);
diff --git a/include/linux/file.h b/include/linux/file.h
index f1d16e24a12e..2ade0d90bc5e 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -91,20 +91,22 @@ extern void put_unused_fd(unsigned int fd);
extern void fd_install(unsigned int fd, struct file *file);
-extern int __fd_install_received(int fd, struct file *file, bool ufd_required,
+extern int __fd_install_received(int fd, struct file *file,
int __user *ufd, unsigned int o_flags);
static inline int fd_install_received_user(struct file *file, int __user *ufd,
unsigned int o_flags)
{
- return __fd_install_received(-1, file, true, ufd, o_flags);
+ if (ufd == NULL)
+ return -EFAULT;
+ return __fd_install_received(-1, file, ufd, o_flags);
}
static inline int fd_install_received(struct file *file, unsigned int o_flags)
{
- return __fd_install_received(-1, file, false, NULL, o_flags);
+ return __fd_install_received(-1, file, NULL, o_flags);
}
static inline int fd_replace_received(int fd, struct file *file, unsigned int o_flags)
{
- return __fd_install_received(fd, file, false, NULL, o_flags);
+ return __fd_install_received(fd, file, NULL, o_flags);
}
extern void flush_delayed_fput(void);
--
Kees Cook
From: Kees Cook
> Sent: 17 June 2020 20:58
> On Wed, Jun 17, 2020 at 03:35:20PM +0000, David Laight wrote:
> > From: Kees Cook
> > > Sent: 16 June 2020 04:25
> > >
> > > For both pidfd and seccomp, the __user pointer is not used. Update
> > > __fd_install_received() to make writing to ufd optional. (ufd
> > > itself cannot checked for NULL because this changes the SCM_RIGHTS
> > > interface behavior.) In these cases, the new fd needs to be returned
> > > on success. Update the existing callers to handle it. Add new wrapper
> > > fd_install_received() for pidfd and seccomp that does not use the ufd
> > > argument.
> > ...>
> > > static inline int fd_install_received_user(struct file *file, int __user *ufd,
> > > unsigned int o_flags)
> > > {
> > > - return __fd_install_received(file, ufd, o_flags);
> > > + return __fd_install_received(file, true, ufd, o_flags);
> > > +}
> >
> > Can you get rid of the 'return user' parameter by adding
> > if (!ufd) return -EFAULT;
> > to the above wrapper, then checking for NULL in the function?
> >
> > Or does that do the wrong horrid things in the fail path?
>
> Oh, hm. No, that shouldn't break the failure path, since everything gets
> unwound in __fd_install_received if the ufd write fails.
>
> Effectively this (I'll chop it up into the correct patches):
Yep, that's what i was thinking...
Personally I'm not sure that it matters whether the file is left
attached to a process fd when the copy_to_user() fails.
The kernel data structures are consistent either way.
So sane code relies on catching SIGSEGV, fixing thigs up,
and carrying on.
(IIRC the original /bin/sh code called sbrk() in its SIGSEGV
handler instead of doing the limit check in malloc()!)
The important error path is 'failing to get an fd number'.
In that case the caller needs to keep the 'file *' or close it.
I've not looked at the code, but I wonder if you need to pass
the 'file *' by reference so that you can consume it (write NULL)
and return an error.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On Mon, Jun 15, 2020 at 08:25:13PM -0700, Kees Cook wrote:
> Hello!
>
> This is a bit of thread-merge between [1] and [2]. tl;dr: add a way for
> a seccomp user_notif process manager to inject files into the managed
> process in order to handle emulation of various fd-returning syscalls
> across security boundaries. Containers folks and Chrome are in need
> of the feature, and investigating this solution uncovered (and fixed)
> implementation issues with existing file sending routines.
>
> I intend to carry this in the seccomp tree, unless someone has objections.
> :) Please review and test!
>
> -Kees
>
> [1] https://lore.kernel.org/lkml/[email protected]/
> [2] https://lore.kernel.org/lkml/[email protected]/
>
> Kees Cook (9):
> net/scm: Regularize compat handling of scm_detach_fds()
> fs: Move __scm_install_fd() to __fd_install_received()
> fs: Add fd_install_received() wrapper for __fd_install_received()
> pidfd: Replace open-coded partial fd_install_received()
> fs: Expand __fd_install_received() to accept fd
> selftests/seccomp: Make kcmp() less required
> selftests/seccomp: Rename user_trap_syscall() to user_notif_syscall()
> seccomp: Switch addfd to Extensible Argument ioctl
> seccomp: Fix ioctl number for SECCOMP_IOCTL_NOTIF_ID_VALID
>
This looks much cleaner than the original patchset. Thanks.
Reviewed-by: Sargun Dhillon <[email protected]>
on the pidfd, change fs* changes.
> Sargun Dhillon (2):
> seccomp: Introduce addfd ioctl to seccomp user notifier
> selftests/seccomp: Test SECCOMP_IOCTL_NOTIF_ADDFD
>
> fs/file.c | 65 ++++
> include/linux/file.h | 16 +
> include/uapi/linux/seccomp.h | 25 +-
> kernel/pid.c | 11 +-
> kernel/seccomp.c | 181 ++++++++-
> net/compat.c | 55 ++-
> net/core/scm.c | 50 +--
> tools/testing/selftests/seccomp/seccomp_bpf.c | 350 +++++++++++++++---
> 8 files changed, 618 insertions(+), 135 deletions(-)
>
> --
> 2.25.1
>