2019-12-09 07:08:49

by Sargun Dhillon

[permalink] [raw]
Subject: [PATCH v2 4/4] samples: Add example of using PTRACE_GETFD in conjunction with user trap

This sample adds the usage of SECCOMP_RET_USER_NOTIF together with ptrace
PTRACE_GETFD. It shows trapping a syscall, and handling it by extracting
the FD into the parent process without stopping the child process.
Although, in this example, there's no explicit policy separation in
the two processes, it can be generalized into the example of a transparent
proxy.

Signed-off-by: Sargun Dhillon <[email protected]>
---
samples/seccomp/.gitignore | 1 +
samples/seccomp/Makefile | 9 +-
samples/seccomp/user-trap-ptrace.c | 193 +++++++++++++++++++++++++++++
3 files changed, 202 insertions(+), 1 deletion(-)
create mode 100644 samples/seccomp/user-trap-ptrace.c

diff --git a/samples/seccomp/.gitignore b/samples/seccomp/.gitignore
index d1e2e817d556..169bc130ec39 100644
--- a/samples/seccomp/.gitignore
+++ b/samples/seccomp/.gitignore
@@ -2,3 +2,4 @@ bpf-direct
bpf-fancy
dropper
user-trap
+user-trap-ptrace
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index 82b7347318d1..c0f3ef713f5b 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
ifndef CROSS_COMPILE
-hostprogs-y := bpf-fancy dropper bpf-direct user-trap
+hostprogs-y := bpf-fancy dropper bpf-direct user-trap user-trap-ptrace

HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
HOSTCFLAGS_bpf-fancy.o += -idirafter $(objtree)/include
@@ -24,6 +24,11 @@ HOSTCFLAGS_user-trap.o += -I$(objtree)/usr/include
HOSTCFLAGS_user-trap.o += -idirafter $(objtree)/include
user-trap-objs := user-trap.o user-trap-helper.o

+HOSTCFLAGS_user-trap-ptrace.o += -I$(objtree)/usr/include
+HOSTCFLAGS_user-trap-ptrace.o += -idirafter $(objtree)/include
+user-trap-ptrace-objs := user-trap-ptrace.o user-trap-helper.o
+
+
# Try to match the kernel target.
ifndef CONFIG_64BIT

@@ -39,10 +44,12 @@ HOSTCFLAGS_dropper.o += $(MFLAG)
HOSTCFLAGS_bpf-helper.o += $(MFLAG)
HOSTCFLAGS_bpf-fancy.o += $(MFLAG)
HOSTCFLAGS_user-trap.o += $(MFLAG)
+HOSTCFLAGS_user-trap-ptrace.o += $(MFLAG)
HOSTLDLIBS_bpf-direct += $(MFLAG)
HOSTLDLIBS_bpf-fancy += $(MFLAG)
HOSTLDLIBS_dropper += $(MFLAG)
HOSTLDLIBS_user-trap += $(MFLAG)
+HOSTLDLIBS_user-trap-ptrace += $(MFLAG)
endif
always := $(hostprogs-y)
endif
diff --git a/samples/seccomp/user-trap-ptrace.c b/samples/seccomp/user-trap-ptrace.c
new file mode 100644
index 000000000000..5cca1cb4916c
--- /dev/null
+++ b/samples/seccomp/user-trap-ptrace.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/seccomp.h>
+#include <linux/ptrace.h>
+#include <linux/prctl.h>
+#include <sys/socket.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <netinet/in.h>
+#include "user-trap-helper.h"
+
+#define CHILD_PORT_TRY_BIND 80
+#define CHILD_PORT_ACTUAL_BIND 4998
+
+static int ptrace(long request, long pid, void *addr, unsigned long data)
+{
+ errno = 0;
+ return syscall(__NR_ptrace, request, pid, addr, data);
+}
+
+static int ptrace_getfd(long pid, struct ptrace_getfd_args *args)
+{
+ errno = 0;
+ return syscall(__NR_ptrace, PTRACE_GETFD, pid, sizeof(*args), args);
+}
+
+static int tracee(void)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_port = htons(CHILD_PORT_TRY_BIND),
+ .sin_addr = {
+ .s_addr = htonl(INADDR_ANY)
+ }
+ };
+ socklen_t addrlen = sizeof(addr);
+ int sock, ret = 1;
+
+ sock = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock == -1) {
+ perror("socket");
+ goto out;
+ }
+
+
+ if (bind(sock, (struct sockaddr *) &addr, sizeof(addr))) {
+ perror("bind");
+ goto out;
+ }
+
+ printf("Child successfully performed bind operation\n");
+ if (getsockname(sock, (struct sockaddr *) &addr, &addrlen)) {
+ perror("getsockname");
+ goto out;
+ }
+
+
+ printf("Socket bound to port %d\n", ntohs(addr.sin_port));
+ assert(ntohs(addr.sin_port) == CHILD_PORT_ACTUAL_BIND);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int handle_req(int listener)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_port = htons(4998),
+ .sin_addr = {
+ .s_addr = htonl(INADDR_LOOPBACK)
+ }
+ };
+ struct ptrace_getfd_args getfd_args = {
+ .options = PTRACE_GETFD_O_CLOEXEC
+ };
+ struct seccomp_notif_sizes sizes;
+ struct seccomp_notif_resp *resp;
+ struct seccomp_notif *req;
+ int fd, ret = 1;
+
+ if (seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes) < 0) {
+ perror("seccomp(GET_NOTIF_SIZES)");
+ goto out;
+ }
+ req = malloc(sizes.seccomp_notif);
+ if (!req)
+ goto out;
+ memset(req, 0, sizeof(*req));
+
+ resp = malloc(sizes.seccomp_notif_resp);
+ if (!resp)
+ goto out_free_req;
+ memset(resp, 0, sizeof(*resp));
+
+ if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
+ perror("ioctl recv");
+ goto out;
+ }
+ printf("Child tried to call bind with fd: %lld\n", req->data.args[0]);
+ getfd_args.fd = req->data.args[0];
+ fd = ptrace_getfd(req->pid, &getfd_args);
+ if (fd == -1) {
+ perror("ptrace_getfd");
+ goto out_free_resp;
+ }
+ if (bind(fd, (struct sockaddr *) &addr, sizeof(addr))) {
+ perror("bind");
+ goto out_free_resp;
+ }
+
+ resp->id = req->id;
+ resp->error = 0;
+ resp->val = 0;
+ if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, resp) < 0) {
+ perror("ioctl send");
+ goto out_free_resp;
+ }
+
+ ret = 0;
+out_free_resp:
+ free(resp);
+out_free_req:
+ free(req);
+out:
+ return ret;
+}
+
+int main(void)
+{
+ int listener, sk_pair[2], ret = 1;
+ pid_t pid;
+
+ if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair) < 0) {
+ perror("socketpair");
+ goto out;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork");
+ goto close_pair;
+ }
+ if (pid == 0) {
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ perror("prctl(NO_NEW_PRIVS)");
+ exit(1);
+ }
+ listener = user_trap_syscall(__NR_bind,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ if (listener < 0) {
+ perror("seccomp");
+ exit(1);
+ }
+ if (send_fd(sk_pair[1], listener) < 0)
+ exit(1);
+ close(listener);
+ exit(tracee());
+ }
+
+ if (ptrace(PTRACE_SEIZE, pid, 0, PTRACE_O_EXITKILL)) {
+ perror("ptrace(PTRACE_SEIZE)");
+ goto kill_child;
+ }
+
+ listener = recv_fd(sk_pair[0]);
+ if (listener < 0)
+ goto kill_child;
+
+ if (handle_req(listener))
+ goto kill_child;
+
+ /* Wait for child to finish */
+ waitpid(pid, NULL, 0);
+
+ ret = 0;
+ goto close_pair;
+
+kill_child:
+ kill(pid, SIGKILL);
+close_pair:
+ close(sk_pair[0]);
+ close(sk_pair[1]);
+out:
+ return ret;
+}
--
2.20.1


2019-12-09 19:31:26

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] samples: Add example of using PTRACE_GETFD in conjunction with user trap

On 12/09, Sargun Dhillon wrote:
>
> +#define CHILD_PORT_TRY_BIND 80
> +#define CHILD_PORT_ACTUAL_BIND 4998

...

> +static int handle_req(int listener)
> +{
> + struct sockaddr_in addr = {
> + .sin_family = AF_INET,
> + .sin_port = htons(4998),

then I think
.sin_port = htons(CHILD_PORT_ACTUAL_BIND);

would be more clear...

> + .sin_addr = {
> + .s_addr = htonl(INADDR_LOOPBACK)
> + }
> + };
> + struct ptrace_getfd_args getfd_args = {
> + .options = PTRACE_GETFD_O_CLOEXEC
> + };
> + struct seccomp_notif_sizes sizes;
> + struct seccomp_notif_resp *resp;
> + struct seccomp_notif *req;
> + int fd, ret = 1;
> +
> + if (seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes) < 0) {
> + perror("seccomp(GET_NOTIF_SIZES)");
> + goto out;
> + }
> + req = malloc(sizes.seccomp_notif);
> + if (!req)
> + goto out;
> + memset(req, 0, sizeof(*req));
> +
> + resp = malloc(sizes.seccomp_notif_resp);
> + if (!resp)
> + goto out_free_req;
> + memset(resp, 0, sizeof(*resp));
> +
> + if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
> + perror("ioctl recv");
> + goto out;
> + }
> + printf("Child tried to call bind with fd: %lld\n", req->data.args[0]);
> + getfd_args.fd = req->data.args[0];
> + fd = ptrace_getfd(req->pid, &getfd_args);

and iiuc otherwise you do not need to ptrace the child. So you could remove
ptrace(PTRACE_SEIZE) in main() and just do

ptrace(PTRACE_SEIZE, req->pid);
fd = ptrace_getfd(req->pid, &getfd_args);
ptrace(PTRACE_DETACH, req->pid);

here. However, PTRACE_DETACH won't work, it needs the stopped tracee. We can
add PTRACE_DETACH_ASYNC, but this makes me think that PTRACE_GETFD has nothing
to do with ptrace.

May be a new syscall which does ptrace_may_access() + get_task_file() will make
more sense?

Oleg.

2019-12-09 19:51:51

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] samples: Add example of using PTRACE_GETFD in conjunction with user trap

On December 9, 2019 8:30:00 PM GMT+01:00, Oleg Nesterov <[email protected]> wrote:
>On 12/09, Sargun Dhillon wrote:
>>
>> +#define CHILD_PORT_TRY_BIND 80
>> +#define CHILD_PORT_ACTUAL_BIND 4998
>
>...
>
>> +static int handle_req(int listener)
>> +{
>> + struct sockaddr_in addr = {
>> + .sin_family = AF_INET,
>> + .sin_port = htons(4998),
>
>then I think
> .sin_port = htons(CHILD_PORT_ACTUAL_BIND);
>
>would be more clear...
>
>> + .sin_addr = {
>> + .s_addr = htonl(INADDR_LOOPBACK)
>> + }
>> + };
>> + struct ptrace_getfd_args getfd_args = {
>> + .options = PTRACE_GETFD_O_CLOEXEC
>> + };
>> + struct seccomp_notif_sizes sizes;
>> + struct seccomp_notif_resp *resp;
>> + struct seccomp_notif *req;
>> + int fd, ret = 1;
>> +
>> + if (seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes) < 0) {
>> + perror("seccomp(GET_NOTIF_SIZES)");
>> + goto out;
>> + }
>> + req = malloc(sizes.seccomp_notif);
>> + if (!req)
>> + goto out;
>> + memset(req, 0, sizeof(*req));
>> +
>> + resp = malloc(sizes.seccomp_notif_resp);
>> + if (!resp)
>> + goto out_free_req;
>> + memset(resp, 0, sizeof(*resp));
>> +
>> + if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
>> + perror("ioctl recv");
>> + goto out;
>> + }
>> + printf("Child tried to call bind with fd: %lld\n",
>req->data.args[0]);
>> + getfd_args.fd = req->data.args[0];
>> + fd = ptrace_getfd(req->pid, &getfd_args);
>
>and iiuc otherwise you do not need to ptrace the child. So you could
>remove
>ptrace(PTRACE_SEIZE) in main() and just do
>
> ptrace(PTRACE_SEIZE, req->pid);
> fd = ptrace_getfd(req->pid, &getfd_args);
> ptrace(PTRACE_DETACH, req->pid);
>
>here. However, PTRACE_DETACH won't work, it needs the stopped tracee.
>We can
>add PTRACE_DETACH_ASYNC, but this makes me think that PTRACE_GETFD has
>nothing
>to do with ptrace.
>
>May be a new syscall which does ptrace_may_access() + get_task_file()
>will make
>more sense?
>
>Oleg.

Once more since this annoying app uses html by default...

But we can already do this right now and this is just an improvement.
That's a bit rich for a new syscall imho...

Christian

2019-12-09 20:48:02

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] samples: Add example of using PTRACE_GETFD in conjunction with user trap

On 12/09, Christian Brauner wrote:
>
> >We can
> >add PTRACE_DETACH_ASYNC, but this makes me think that PTRACE_GETFD has
> >nothing
> >to do with ptrace.
> >
> >May be a new syscall which does ptrace_may_access() + get_task_file()
> >will make
> >more sense?
> >
> >Oleg.
>
> Once more since this annoying app uses html by default...
>
> But we can already do this right now and this is just an improvement.
> That's a bit rich for a new syscall imho...

I agree, and I won't really argue...

but the changelog in 2/4 says

The requirement that the tracer has attached to the tracee prior to the
capture of the file descriptor may be lifted at a later point.

so may be we should do this right now?

plus this part

@@ -1265,7 +1295,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
}

ret = ptrace_check_attach(child, request == PTRACE_KILL ||
- request == PTRACE_INTERRUPT);
+ request == PTRACE_INTERRUPT ||
+ request == PTRACE_GETFD);

actually means "we do not need ptrace, but we do not know where else we
can add this fd_install(get_task_file()).

Oleg.

2019-12-10 11:11:49

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] samples: Add example of using PTRACE_GETFD in conjunction with user trap

[I'm expanding the Cc to a few Firefox and glibc people since we've been
been talking about replacing SECCOMP_RET_TRAP with
SECCOMP_RET_USER_NOTIF for a bit now because the useage of
SECCOMP_RET_TRAP in the broker blocks desirable core glibc changes.
Even if just for their lurking pleasure. :)]

On Mon, Dec 09, 2019 at 09:46:35PM +0100, Oleg Nesterov wrote:
> On 12/09, Christian Brauner wrote:
> >
> > >We can
> > >add PTRACE_DETACH_ASYNC, but this makes me think that PTRACE_GETFD has
> > >nothing
> > >to do with ptrace.
> > >
> > >May be a new syscall which does ptrace_may_access() + get_task_file()
> > >will make
> > >more sense?
> > >
> > >Oleg.
> >
> > Once more since this annoying app uses html by default...
> >
> > But we can already do this right now and this is just an improvement.
> > That's a bit rich for a new syscall imho...
>
> I agree, and I won't really argue...
>
> but the changelog in 2/4 says
>
> The requirement that the tracer has attached to the tracee prior to the
> capture of the file descriptor may be lifted at a later point.
>
> so may be we should do this right now?

I think so, yes. This doesn't strike me as premature optimization but
rather as a core design questions.

>
> plus this part
>
> @@ -1265,7 +1295,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
> }
>
> ret = ptrace_check_attach(child, request == PTRACE_KILL ||
> - request == PTRACE_INTERRUPT);
> + request == PTRACE_INTERRUPT ||
> + request == PTRACE_GETFD);
>
> actually means "we do not need ptrace, but we do not know where else we
> can add this fd_install(get_task_file()).

Right, I totally get your point and I'm not a fan of this being in
ptrace() either.

The way I see is is that the main use-case for this feature is the
seccomp notifier and I can see this being useful. So the right place to
plumb this into might just be seccomp and specifically on to of the
notifier.
If we don't care about getting and setting fds at random points of
execution it might make sense to add new options to the notify ioctl():

#define SECCOMP_IOCTL_NOTIF_GET_FD SECCOMP_IOWR(3, <sensible struct>)
#define SECCOMP_IOCTL_NOTIF_SET_FD SECCOMP_IOWR(4, <sensible struct>)

which would let you get and set fds while the supervisee is blocked.

Christian

2019-12-10 15:36:45

by Tycho Andersen

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] samples: Add example of using PTRACE_GETFD in conjunction with user trap

On Tue, Dec 10, 2019 at 12:10:52PM +0100, Christian Brauner wrote:
>
> #define SECCOMP_IOCTL_NOTIF_SET_FD SECCOMP_IOWR(4, <sensible struct>)

There's even some code already for this one:
https://lore.kernel.org/linux-fsdevel/[email protected]/

Tycho

2019-12-10 16:10:24

by Sargun Dhillon

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] samples: Add example of using PTRACE_GETFD in conjunction with user trap

On Tue, Dec 10, 2019 at 3:10 AM Christian Brauner
<[email protected]> wrote:
>
> [I'm expanding the Cc to a few Firefox and glibc people since we've been
> been talking about replacing SECCOMP_RET_TRAP with
> SECCOMP_RET_USER_NOTIF for a bit now because the useage of
> SECCOMP_RET_TRAP in the broker blocks desirable core glibc changes.
> Even if just for their lurking pleasure. :)]
>
> On Mon, Dec 09, 2019 at 09:46:35PM +0100, Oleg Nesterov wrote:
> > On 12/09, Christian Brauner wrote
> >
> > I agree, and I won't really argue...
> >
> > but the changelog in 2/4 says
> >
> > The requirement that the tracer has attached to the tracee prior to the
> > capture of the file descriptor may be lifted at a later point.
> >
> > so may be we should do this right now?
>
> I think so, yes. This doesn't strike me as premature optimization but
> rather as a core design questions.
>
> >
> > plus this part
> >
> > @@ -1265,7 +1295,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
> > }
> >
> > ret = ptrace_check_attach(child, request == PTRACE_KILL ||
> > - request == PTRACE_INTERRUPT);
> > + request == PTRACE_INTERRUPT ||
> > + request == PTRACE_GETFD);
> >
> > actually means "we do not need ptrace, but we do not know where else we
> > can add this fd_install(get_task_file()).
>
> Right, I totally get your point and I'm not a fan of this being in
> ptrace() either.
>
> The way I see is is that the main use-case for this feature is the
> seccomp notifier and I can see this being useful. So the right place to
> plumb this into might just be seccomp and specifically on to of the
> notifier.
> If we don't care about getting and setting fds at random points of
> execution it might make sense to add new options to the notify ioctl():
>
> #define SECCOMP_IOCTL_NOTIF_GET_FD SECCOMP_IOWR(3, <sensible struct>)
> #define SECCOMP_IOCTL_NOTIF_SET_FD SECCOMP_IOWR(4, <sensible struct>)
>
> which would let you get and set fds while the supervisee is blocked.
>
> Christian
Doesn't SECCOMP_IOCTL_NOTIF_GET_FD have some ambiguity to it?
Specifically, because
multiple processes can have the same notifier attached to them? If we
choose to go down the
route of introducing an ioctl (which I'm not at all opposed to), I
would rather do it on pidfd. We
can then plumb seccomp notifier to send pidfd instead of raw pid. In
the mean time, folks
can just open up /proc/${PID}, and do the check cookie dance.

Christian,
As the maintainer of pidfd, what do you think?

2019-12-10 16:13:44

by Tycho Andersen

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] samples: Add example of using PTRACE_GETFD in conjunction with user trap

On Tue, Dec 10, 2019 at 08:07:45AM -0800, Sargun Dhillon wrote:
> On Tue, Dec 10, 2019 at 3:10 AM Christian Brauner
> <[email protected]> wrote:
> >
> > [I'm expanding the Cc to a few Firefox and glibc people since we've been
> > been talking about replacing SECCOMP_RET_TRAP with
> > SECCOMP_RET_USER_NOTIF for a bit now because the useage of
> > SECCOMP_RET_TRAP in the broker blocks desirable core glibc changes.
> > Even if just for their lurking pleasure. :)]
> >
> > On Mon, Dec 09, 2019 at 09:46:35PM +0100, Oleg Nesterov wrote:
> > > On 12/09, Christian Brauner wrote
> > >
> > > I agree, and I won't really argue...
> > >
> > > but the changelog in 2/4 says
> > >
> > > The requirement that the tracer has attached to the tracee prior to the
> > > capture of the file descriptor may be lifted at a later point.
> > >
> > > so may be we should do this right now?
> >
> > I think so, yes. This doesn't strike me as premature optimization but
> > rather as a core design questions.
> >
> > >
> > > plus this part
> > >
> > > @@ -1265,7 +1295,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
> > > }
> > >
> > > ret = ptrace_check_attach(child, request == PTRACE_KILL ||
> > > - request == PTRACE_INTERRUPT);
> > > + request == PTRACE_INTERRUPT ||
> > > + request == PTRACE_GETFD);
> > >
> > > actually means "we do not need ptrace, but we do not know where else we
> > > can add this fd_install(get_task_file()).
> >
> > Right, I totally get your point and I'm not a fan of this being in
> > ptrace() either.
> >
> > The way I see is is that the main use-case for this feature is the
> > seccomp notifier and I can see this being useful. So the right place to
> > plumb this into might just be seccomp and specifically on to of the
> > notifier.
> > If we don't care about getting and setting fds at random points of
> > execution it might make sense to add new options to the notify ioctl():
> >
> > #define SECCOMP_IOCTL_NOTIF_GET_FD SECCOMP_IOWR(3, <sensible struct>)
> > #define SECCOMP_IOCTL_NOTIF_SET_FD SECCOMP_IOWR(4, <sensible struct>)
> >
> > which would let you get and set fds while the supervisee is blocked.
> >
> > Christian
> Doesn't SECCOMP_IOCTL_NOTIF_GET_FD have some ambiguity to it?
> Specifically, because
> multiple processes can have the same notifier attached to them?

The id member corresponds to a particular syscall from a particular
pid, which makes it unique.

> If we
> choose to go down the
> route of introducing an ioctl (which I'm not at all opposed to), I
> would rather do it on pidfd. We
> can then plumb seccomp notifier to send pidfd instead of raw pid. In
> the mean time, folks
> can just open up /proc/${PID}, and do the check cookie dance.

This might be more generally useful, the problem is synchronization, I
guess.

Tycho

2019-12-10 16:40:51

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] samples: Add example of using PTRACE_GETFD in conjunction with user trap

On Tue, Dec 10, 2019 at 08:07:45AM -0800, Sargun Dhillon wrote:
> On Tue, Dec 10, 2019 at 3:10 AM Christian Brauner
> <[email protected]> wrote:
> >
> > [I'm expanding the Cc to a few Firefox and glibc people since we've been
> > been talking about replacing SECCOMP_RET_TRAP with
> > SECCOMP_RET_USER_NOTIF for a bit now because the useage of
> > SECCOMP_RET_TRAP in the broker blocks desirable core glibc changes.
> > Even if just for their lurking pleasure. :)]
> >
> > On Mon, Dec 09, 2019 at 09:46:35PM +0100, Oleg Nesterov wrote:
> > > On 12/09, Christian Brauner wrote
> > >
> > > I agree, and I won't really argue...
> > >
> > > but the changelog in 2/4 says
> > >
> > > The requirement that the tracer has attached to the tracee prior to the
> > > capture of the file descriptor may be lifted at a later point.
> > >
> > > so may be we should do this right now?
> >
> > I think so, yes. This doesn't strike me as premature optimization but
> > rather as a core design questions.
> >
> > >
> > > plus this part
> > >
> > > @@ -1265,7 +1295,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
> > > }
> > >
> > > ret = ptrace_check_attach(child, request == PTRACE_KILL ||
> > > - request == PTRACE_INTERRUPT);
> > > + request == PTRACE_INTERRUPT ||
> > > + request == PTRACE_GETFD);
> > >
> > > actually means "we do not need ptrace, but we do not know where else we
> > > can add this fd_install(get_task_file()).
> >
> > Right, I totally get your point and I'm not a fan of this being in
> > ptrace() either.
> >
> > The way I see is is that the main use-case for this feature is the
> > seccomp notifier and I can see this being useful. So the right place to
> > plumb this into might just be seccomp and specifically on to of the
> > notifier.
> > If we don't care about getting and setting fds at random points of
> > execution it might make sense to add new options to the notify ioctl():
> >
> > #define SECCOMP_IOCTL_NOTIF_GET_FD SECCOMP_IOWR(3, <sensible struct>)
> > #define SECCOMP_IOCTL_NOTIF_SET_FD SECCOMP_IOWR(4, <sensible struct>)
> >
> > which would let you get and set fds while the supervisee is blocked.
> >
> > Christian
> Doesn't SECCOMP_IOCTL_NOTIF_GET_FD have some ambiguity to it?

As Tycho mentioned, this is why we have a the tid of the calling task
but we also have a cookie per request.
The cookie is useful so that you can do
- receive request <chocolate> cookie
- open(/proc/<pid>{/mem})
- verify <chocolate> cookie still exists
- <chocolate> cookie still exists -> file descriptor refers to correct
task
- <chocolate> cookie gone -> task has been recycled

> Specifically, because
> multiple processes can have the same notifier attached to them? If we
> choose to go down the
> route of introducing an ioctl (which I'm not at all opposed to), I
> would rather do it on pidfd. We
> can then plumb seccomp notifier to send pidfd instead of raw pid. In
> the mean time, folks
> can just open up /proc/${PID}, and do the check cookie dance.
>
> Christian,
> As the maintainer of pidfd, what do you think?

Let me quote what I wrote to the Mozilla folks today. :)

"(One thing that always strikes me is that if my pidfd patches would've
been ready back when we did the seccomp notifier we could've added a
pidfd argument to the seccomp notifier kernel struct and if a flag is
set given back a pidfd alongside the notifier fd. This way none of this
revalidting the id stuff would've been necessary and you could also
safely translate from a pidfd into a /proc/<pid> directory to e.g. open
/proc/<pid>/mem. Anyway, that's not out of scope. One could still
write a patch for that to add a pidfd argument under a new flag to the
kernel struct. Should be rather trivial.)"

So yeah, it crossed my mind. ;)

I really would like to have this placed under a flag though...
I very much dislike the idea of receiving any kind of fd
- _especially a pidfd_ - implicitly.
So ideally this would be a flag to the receive ioctl(). Kees just got my
SECCOMP_USER_NOTIF_FLAG_CONTINUE patchset merged for v5.5 which adds the

#define SECCOMP_USER_NOTIF_FLAG_CONTINUE (1UL << 0)

flag which when set in the send case (i.e. supervisor -> kernel) will
cause the syscall to be executed.

When we add a new flag to get a pidfd it might make sense to rename the
CONTINUE flag in master before v5.5 is out to

#define SECCOMP_USER_NOTIF_SEND_FLAG_CONTINUE (1UL << 0)

to indicate that it's only valid for the SEND ioctl().

Then we add

#define SECCOMP_USER_NOTIF_RECV_FLAG_PIDFD (1UL << 0)

for v5.6. This way send and receive flags are named differently for
clarity. (I don't care about the name being long. Other people might
though _shrug_.)

Christian