As far as I can tell, O_NONBLOCK has no effect on a pidfd. When calling
waitid on a pidfd for a running process, it always blocks unless you
provide WNOHANG.
I don't think anything depends on that behavior. Would it be possible to
make O_NONBLOCK on a pidfd cause waitid on a running process to return
EWOULDBLOCK?
This would make it easier to use pidfd in some non-blocking event loops.
- Josh Triplett
On Tue, Aug 11, 2020 at 11:12:36AM -0700, Josh Triplett wrote:
> As far as I can tell, O_NONBLOCK has no effect on a pidfd. When calling
> waitid on a pidfd for a running process, it always blocks unless you
> provide WNOHANG.
>
> I don't think anything depends on that behavior. Would it be possible to
> make O_NONBLOCK on a pidfd cause waitid on a running process to return
> EWOULDBLOCK?
>
> This would make it easier to use pidfd in some non-blocking event loops.
Hey Josh,
Just to see I did a _horrible_ draft (cf. [1]) and it seems doable to me
and if you can provide a good rationale and a use-case then I think that
would be ok.
[1]:
diff --git a/kernel/exit.c b/kernel/exit.c
index 727150f28103..b43a0e126cee 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -934,6 +934,7 @@ struct wait_opts {
wait_queue_entry_t child_wait;
int notask_error;
+ bool wo_pidfd_nonblock;
};
static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
@@ -1462,6 +1463,11 @@ static long do_wait(struct wait_opts *wo)
notask:
retval = wo->notask_error;
if (!retval && !(wo->wo_flags & WNOHANG)) {
+ if (wo->wo_pidfd_nonblock) {
+ retval = -EWOULDBLOCK;
+ goto end;
+ }
+
retval = -ERESTARTSYS;
if (!signal_pending(current)) {
schedule();
@@ -1495,9 +1501,10 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
int options, struct rusage *ru)
{
struct wait_opts wo;
+ struct fd f;
struct pid *pid = NULL;
enum pid_type type;
- long ret;
+ long ret = 0;
if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
__WNOTHREAD|__WCLONE|__WALL))
@@ -1505,6 +1512,7 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
return -EINVAL;
+ wo.wo_pidfd_nonblock = false;
switch (which) {
case P_ALL:
type = PIDTYPE_MAX;
@@ -1531,9 +1539,22 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
if (upid < 0)
return -EINVAL;
- pid = pidfd_get_pid(upid);
+ f = fdget(upid);
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+
+ pid = pidfd_pid(f.file);
+
+ if (f.file->f_flags & O_NONBLOCK)
+ wo.wo_pidfd_nonblock = true;
+
if (IS_ERR(pid))
- return PTR_ERR(pid);
+ ret = PTR_ERR(pid);
+ else
+ get_pid(pid);
+ fdput(f);
+ if (ret)
+ return ret;
break;
default:
return -EINVAL;
On Tue, Aug 11, 2020 at 10:10:45PM +0200, Christian Brauner wrote:
> On Tue, Aug 11, 2020 at 11:12:36AM -0700, Josh Triplett wrote:
> > As far as I can tell, O_NONBLOCK has no effect on a pidfd. When calling
> > waitid on a pidfd for a running process, it always blocks unless you
> > provide WNOHANG.
> >
> > I don't think anything depends on that behavior. Would it be possible to
> > make O_NONBLOCK on a pidfd cause waitid on a running process to return
> > EWOULDBLOCK?
> >
> > This would make it easier to use pidfd in some non-blocking event loops.
>
> Hey Josh,
>
> Just to see I did a _horrible_ draft (cf. [1]) and it seems doable to me
> and if you can provide a good rationale and a use-case then I think that
> would be ok.
Rationale and use case: there are some non-blocking event loop
libraries, such as the Rust async-io library, that help build
epoll-based event loops around file descriptors. Those libraries
automatically set O_NONBLOCK on the file descriptors they manage, and
they treat EWOULDBLOCK errno codes specially, with semantics like "call
this function, if it returns EWOULDBLOCK then don't call it again until
epoll says the fd is ready". If setting O_NONBLOCK on pidfd caused
waitid to return EWOULDBLOCK, such libraries would Just Work with very
little effort.
Also, pidfd_open should accept O_NONBLOCK as a flag, which in addition
to saving a call to fcntl would allow userspace to detect if this works.
(Even if you want to use fcntl to set it later, you can always just open
your own PID with pidfd_open and check if you get EINVAL to know that
your kernel doesn't support this.)
Thanks,
Josh Triplett