2024-01-31 13:27:25

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH v3 0/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

Please see the interdiff below.

Also, I updated the changelog to document that the behaviour of
pidfd_poll(PIDFD_THREAD, pid-of-group-leader) is not well defined
if a sub-thread execs.

Do you agree with this semantics?

Oleg.
---

diff --git a/fs/exec.c b/fs/exec.c
index 73e4045df271..0fd7e668c477 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1143,7 +1143,11 @@ static int de_thread(struct task_struct *tsk)

BUG_ON(leader->exit_state != EXIT_ZOMBIE);
leader->exit_state = EXIT_DEAD;
-
+ /*
+ * leader and tsk exhanged their pids, the old pid dies,
+ * wake up the PIDFD_THREAD waiters.
+ */
+ do_notify_pidfd(leader);
/*
* We are going to release_task()->ptrace_unlink() silently,
* the tracer can sleep in do_wait(). EXIT_DEAD guarantees
diff --git a/include/linux/pid.h b/include/linux/pid.h
index e6a041cb8bac..8124d57752b9 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -70,10 +70,11 @@ extern const struct file_operations pidfd_fops;

struct file;

-extern struct pid *pidfd_pid(const struct file *file);
+struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
+void do_notify_pidfd(struct task_struct *task);

static inline struct pid *get_pid(struct pid *pid)
{
diff --git a/kernel/signal.c b/kernel/signal.c
index 5f48d2c4b409..9b40109f0c56 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2019,7 +2019,7 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
return ret;
}

-static void do_notify_pidfd(struct task_struct *task)
+void do_notify_pidfd(struct task_struct *task)
{
struct pid *pid;




2024-01-31 13:29:31

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

With this flag:

- pidfd_open() doesn't require that the target task must be
a thread-group leader

- pidfd_poll() succeeds when the task exits and becomes a
zombie (iow, passes exit_notify()), even if it is a leader
and thread-group is not empty.

This means that the behaviour of pidfd_poll(PIDFD_THREAD,
pid-of-group-leader) is not well defined if it races with
exec() from its sub-thread; pidfd_poll() can succeed or not
depending on whether pidfd_task_exited() is called before
or after exchange_tids().

Perhaps we can improve this behaviour later, pidfd_poll()
can probably take sig->group_exec_task into account. But
this doesn't really differ from the case when the leader
exits before other threads (so pidfd_poll() succeeds) and
then another thread execs and pidfd_poll() will block again.

thread_group_exited() is no longer used, perhaps it can die.

Co-developed-by: Tycho Andersen <[email protected]>
Signed-off-by: Oleg Nesterov <[email protected]>
---
fs/exec.c | 6 +++++-
include/linux/pid.h | 3 ++-
include/uapi/linux/pidfd.h | 3 ++-
kernel/exit.c | 7 +++++++
kernel/fork.c | 38 +++++++++++++++++++++++++++++++-------
kernel/pid.c | 14 +++-----------
kernel/signal.c | 6 ++++--
7 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 73e4045df271..0fd7e668c477 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1143,7 +1143,11 @@ static int de_thread(struct task_struct *tsk)

BUG_ON(leader->exit_state != EXIT_ZOMBIE);
leader->exit_state = EXIT_DEAD;
-
+ /*
+ * leader and tsk exhanged their pids, the old pid dies,
+ * wake up the PIDFD_THREAD waiters.
+ */
+ do_notify_pidfd(leader);
/*
* We are going to release_task()->ptrace_unlink() silently,
* the tracer can sleep in do_wait(). EXIT_DEAD guarantees
diff --git a/include/linux/pid.h b/include/linux/pid.h
index e6a041cb8bac..8124d57752b9 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -70,10 +70,11 @@ extern const struct file_operations pidfd_fops;

struct file;

-extern struct pid *pidfd_pid(const struct file *file);
+struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
+void do_notify_pidfd(struct task_struct *task);

static inline struct pid *get_pid(struct pid *pid)
{
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 5406fbc13074..2e6461459877 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -7,6 +7,7 @@
#include <linux/fcntl.h>

/* Flags for pidfd_open(). */
-#define PIDFD_NONBLOCK O_NONBLOCK
+#define PIDFD_NONBLOCK O_NONBLOCK
+#define PIDFD_THREAD O_EXCL

#endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index dfb963d2f862..493647fd7c07 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -739,6 +739,13 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);

tsk->exit_state = EXIT_ZOMBIE;
+ /*
+ * sub-thread or delay_group_leader(), wake up the
+ * PIDFD_THREAD waiters.
+ */
+ if (!thread_group_empty(tsk))
+ do_notify_pidfd(tsk);
+
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
thread_group_empty(tsk) &&
diff --git a/kernel/fork.c b/kernel/fork.c
index 347641398f9d..bea32071fff2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -101,6 +101,7 @@
#include <linux/user_events.h>
#include <linux/iommu.h>
#include <linux/rseq.h>
+#include <uapi/linux/pidfd.h>

#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -2050,6 +2051,8 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)

seq_put_decimal_ll(m, "Pid:\t", nr);

+ /* TODO: report PIDFD_THREAD */
+
#ifdef CONFIG_PID_NS
seq_put_decimal_ll(m, "\nNSpid:\t", nr);
if (nr > 0) {
@@ -2068,22 +2071,35 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
}
#endif

+static bool pidfd_task_exited(struct pid *pid, bool thread)
+{
+ struct task_struct *task;
+ bool exited;
+
+ rcu_read_lock();
+ task = pid_task(pid, PIDTYPE_PID);
+ exited = !task ||
+ (READ_ONCE(task->exit_state) && (thread || thread_group_empty(task)));
+ rcu_read_unlock();
+
+ return exited;
+}
+
/*
* Poll support for process exit notification.
*/
static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
{
struct pid *pid = file->private_data;
+ bool thread = file->f_flags & PIDFD_THREAD;
__poll_t poll_flags = 0;

poll_wait(file, &pid->wait_pidfd, pts);
-
/*
- * Inform pollers only when the whole thread group exits.
- * If the thread group leader exits before all other threads in the
- * group, then poll(2) should block, similar to the wait(2) family.
+ * Depending on PIDFD_THREAD, inform pollers when the thread
+ * or the whole thread-group exits.
*/
- if (thread_group_exited(pid))
+ if (pidfd_task_exited(pid, thread))
poll_flags = EPOLLIN | EPOLLRDNORM;

return poll_flags;
@@ -2141,6 +2157,11 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
return PTR_ERR(pidfd_file);
}
get_pid(pid); /* held by pidfd_file now */
+ /*
+ * anon_inode_getfile() ignores everything outside of the
+ * O_ACCMODE | O_NONBLOCK mask, set PIDFD_THREAD manually.
+ */
+ pidfd_file->f_flags |= (flags & PIDFD_THREAD);
*ret = pidfd_file;
return pidfd;
}
@@ -2154,7 +2175,8 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
* caller's file descriptor table. The pidfd is reserved but not installed yet.
*
- * The helper verifies that @pid is used as a thread group leader.
+ * The helper verifies that @pid is still in use, without PIDFD_THREAD the
+ * task identified by @pid must be a thread-group leader.
*
* If this function returns successfully the caller is responsible to either
* call fd_install() passing the returned pidfd and pidfd file as arguments in
@@ -2173,7 +2195,9 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
*/
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
- if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
+ bool thread = flags & PIDFD_THREAD;
+
+ if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID));
return -EINVAL;

return __pidfd_prepare(pid, flags, ret);
diff --git a/kernel/pid.c b/kernel/pid.c
index c7a3e359f8f5..e11144466828 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -552,11 +552,6 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
* Return the task associated with @pidfd. The function takes a reference on
* the returned task. The caller is responsible for releasing that reference.
*
- * Currently, the process identified by @pidfd is always a thread-group leader.
- * This restriction currently exists for all aspects of pidfds including pidfd
- * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling
- * (only supports thread group leaders).
- *
* Return: On success, the task_struct associated with the pidfd.
* On error, a negative errno number will be returned.
*/
@@ -615,11 +610,8 @@ static int pidfd_create(struct pid *pid, unsigned int flags)
* @flags: flags to pass
*
* This creates a new pid file descriptor with the O_CLOEXEC flag set for
- * the process identified by @pid. Currently, the process identified by
- * @pid must be a thread-group leader. This restriction currently exists
- * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
- * be used with CLONE_THREAD) and pidfd polling (only supports thread group
- * leaders).
+ * the task identified by @pid. Without PIDFD_THREAD flag the target task
+ * must be a thread-group leader.
*
* Return: On success, a cloexec pidfd is returned.
* On error, a negative errno number will be returned.
@@ -629,7 +621,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
int fd;
struct pid *p;

- if (flags & ~PIDFD_NONBLOCK)
+ if (flags & ~(PIDFD_NONBLOCK | PIDFD_THREAD))
return -EINVAL;

if (pid <= 0)
diff --git a/kernel/signal.c b/kernel/signal.c
index 9561a3962ca6..9b40109f0c56 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2019,7 +2019,7 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
return ret;
}

-static void do_notify_pidfd(struct task_struct *task)
+void do_notify_pidfd(struct task_struct *task)
{
struct pid *pid;

@@ -2051,7 +2051,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
WARN_ON_ONCE(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
/*
- * tsk is a group leader and has no threads, wake up the pidfd waiters.
+ * tsk is a group leader and has no threads, wake up the
+ * non-PIDFD_THREAD waiters.
*/
if (thread_group_empty(tsk))
do_notify_pidfd(tsk);
@@ -3926,6 +3927,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
prepare_kill_siginfo(sig, &kinfo);
}

+ /* TODO: respect PIDFD_THREAD */
ret = kill_pid_info(sig, &kinfo, pid);

err:
--
2.25.1.362.g51ebf55



2024-01-31 13:59:16

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v3 0/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On Wed, Jan 31, 2024 at 02:25:41PM +0100, Oleg Nesterov wrote:
> Please see the interdiff below.
>
> Also, I updated the changelog to document that the behaviour of
> pidfd_poll(PIDFD_THREAD, pid-of-group-leader) is not well defined
> if a sub-thread execs.
>
> Do you agree with this semantics?

Yeah, that seems fine to me.

2024-01-31 14:14:23

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On Wed, Jan 31, 2024 at 02:26:02PM +0100, Oleg Nesterov wrote:
> With this flag:
>
> - pidfd_open() doesn't require that the target task must be
> a thread-group leader
>
> - pidfd_poll() succeeds when the task exits and becomes a
> zombie (iow, passes exit_notify()), even if it is a leader
> and thread-group is not empty.
>
> This means that the behaviour of pidfd_poll(PIDFD_THREAD,
> pid-of-group-leader) is not well defined if it races with
> exec() from its sub-thread; pidfd_poll() can succeed or not
> depending on whether pidfd_task_exited() is called before
> or after exchange_tids().

I think that we can live with that for now. If it doesn't hinder Tycho's
use-case it's fine.

>
> Perhaps we can improve this behaviour later, pidfd_poll()
> can probably take sig->group_exec_task into account. But
> this doesn't really differ from the case when the leader
> exits before other threads (so pidfd_poll() succeeds) and
> then another thread execs and pidfd_poll() will block again.

.. because it assumes the old thread-group leader's struct pid, right.
One would hope that implementations don't poll on the same fd again
after they saw an exit event.

(I often wish that we could report custom data from poll similar to
kqueue on bsd then we could attach file specific information to the
notification. That would enable userspace to handle this the way they
want.)

>
> thread_group_exited() is no longer used, perhaps it can die.

Feel free to add a patch for that on top of it.

>
> Co-developed-by: Tycho Andersen <[email protected]>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> fs/exec.c | 6 +++++-
> include/linux/pid.h | 3 ++-
> include/uapi/linux/pidfd.h | 3 ++-
> kernel/exit.c | 7 +++++++
> kernel/fork.c | 38 +++++++++++++++++++++++++++++++-------
> kernel/pid.c | 14 +++-----------
> kernel/signal.c | 6 ++++--
> 7 files changed, 54 insertions(+), 23 deletions(-)
>
> diff --git a/fs/exec.c b/fs/exec.c
> index 73e4045df271..0fd7e668c477 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1143,7 +1143,11 @@ static int de_thread(struct task_struct *tsk)
>
> BUG_ON(leader->exit_state != EXIT_ZOMBIE);
> leader->exit_state = EXIT_DEAD;
> -
> + /*
> + * leader and tsk exhanged their pids, the old pid dies,
> + * wake up the PIDFD_THREAD waiters.
> + */
> + do_notify_pidfd(leader);
> /*
> * We are going to release_task()->ptrace_unlink() silently,
> * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index e6a041cb8bac..8124d57752b9 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -70,10 +70,11 @@ extern const struct file_operations pidfd_fops;
>
> struct file;
>
> -extern struct pid *pidfd_pid(const struct file *file);
> +struct pid *pidfd_pid(const struct file *file);
> struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
> struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
> int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
> +void do_notify_pidfd(struct task_struct *task);
>
> static inline struct pid *get_pid(struct pid *pid)
> {
> diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
> index 5406fbc13074..2e6461459877 100644
> --- a/include/uapi/linux/pidfd.h
> +++ b/include/uapi/linux/pidfd.h
> @@ -7,6 +7,7 @@
> #include <linux/fcntl.h>
>
> /* Flags for pidfd_open(). */
> -#define PIDFD_NONBLOCK O_NONBLOCK
> +#define PIDFD_NONBLOCK O_NONBLOCK
> +#define PIDFD_THREAD O_EXCL
>
> #endif /* _UAPI_LINUX_PIDFD_H */
> diff --git a/kernel/exit.c b/kernel/exit.c
> index dfb963d2f862..493647fd7c07 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -739,6 +739,13 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
> kill_orphaned_pgrp(tsk->group_leader, NULL);
>
> tsk->exit_state = EXIT_ZOMBIE;
> + /*
> + * sub-thread or delay_group_leader(), wake up the
> + * PIDFD_THREAD waiters.
> + */
> + if (!thread_group_empty(tsk))
> + do_notify_pidfd(tsk);
> +
> if (unlikely(tsk->ptrace)) {
> int sig = thread_group_leader(tsk) &&
> thread_group_empty(tsk) &&
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 347641398f9d..bea32071fff2 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -101,6 +101,7 @@
> #include <linux/user_events.h>
> #include <linux/iommu.h>
> #include <linux/rseq.h>
> +#include <uapi/linux/pidfd.h>
>
> #include <asm/pgalloc.h>
> #include <linux/uaccess.h>
> @@ -2050,6 +2051,8 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
>
> seq_put_decimal_ll(m, "Pid:\t", nr);
>
> + /* TODO: report PIDFD_THREAD */
> +
> #ifdef CONFIG_PID_NS
> seq_put_decimal_ll(m, "\nNSpid:\t", nr);
> if (nr > 0) {
> @@ -2068,22 +2071,35 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
> }
> #endif
>
> +static bool pidfd_task_exited(struct pid *pid, bool thread)
> +{
> + struct task_struct *task;
> + bool exited;
> +
> + rcu_read_lock();
> + task = pid_task(pid, PIDTYPE_PID);
> + exited = !task ||
> + (READ_ONCE(task->exit_state) && (thread || thread_group_empty(task)));
> + rcu_read_unlock();
> +
> + return exited;
> +}
> +
> /*
> * Poll support for process exit notification.
> */
> static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
> {
> struct pid *pid = file->private_data;
> + bool thread = file->f_flags & PIDFD_THREAD;
> __poll_t poll_flags = 0;
>
> poll_wait(file, &pid->wait_pidfd, pts);
> -
> /*
> - * Inform pollers only when the whole thread group exits.
> - * If the thread group leader exits before all other threads in the
> - * group, then poll(2) should block, similar to the wait(2) family.
> + * Depending on PIDFD_THREAD, inform pollers when the thread
> + * or the whole thread-group exits.
> */
> - if (thread_group_exited(pid))
> + if (pidfd_task_exited(pid, thread))
> poll_flags = EPOLLIN | EPOLLRDNORM;
>
> return poll_flags;
> @@ -2141,6 +2157,11 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
> return PTR_ERR(pidfd_file);
> }
> get_pid(pid); /* held by pidfd_file now */
> + /*
> + * anon_inode_getfile() ignores everything outside of the
> + * O_ACCMODE | O_NONBLOCK mask, set PIDFD_THREAD manually.
> + */
> + pidfd_file->f_flags |= (flags & PIDFD_THREAD);
> *ret = pidfd_file;
> return pidfd;
> }
> @@ -2154,7 +2175,8 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
> * Allocate a new file that stashes @pid and reserve a new pidfd number in the
> * caller's file descriptor table. The pidfd is reserved but not installed yet.
> *
> - * The helper verifies that @pid is used as a thread group leader.
> + * The helper verifies that @pid is still in use, without PIDFD_THREAD the
> + * task identified by @pid must be a thread-group leader.
> *
> * If this function returns successfully the caller is responsible to either
> * call fd_install() passing the returned pidfd and pidfd file as arguments in
> @@ -2173,7 +2195,9 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
> */
> int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
> {
> - if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
> + bool thread = flags & PIDFD_THREAD;
> +
> + if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID));
> return -EINVAL;
>
> return __pidfd_prepare(pid, flags, ret);
> diff --git a/kernel/pid.c b/kernel/pid.c
> index c7a3e359f8f5..e11144466828 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -552,11 +552,6 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
> * Return the task associated with @pidfd. The function takes a reference on
> * the returned task. The caller is responsible for releasing that reference.
> *
> - * Currently, the process identified by @pidfd is always a thread-group leader.
> - * This restriction currently exists for all aspects of pidfds including pidfd
> - * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling
> - * (only supports thread group leaders).
> - *
> * Return: On success, the task_struct associated with the pidfd.
> * On error, a negative errno number will be returned.
> */
> @@ -615,11 +610,8 @@ static int pidfd_create(struct pid *pid, unsigned int flags)
> * @flags: flags to pass
> *
> * This creates a new pid file descriptor with the O_CLOEXEC flag set for
> - * the process identified by @pid. Currently, the process identified by
> - * @pid must be a thread-group leader. This restriction currently exists
> - * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
> - * be used with CLONE_THREAD) and pidfd polling (only supports thread group
> - * leaders).
> + * the task identified by @pid. Without PIDFD_THREAD flag the target task
> + * must be a thread-group leader.
> *
> * Return: On success, a cloexec pidfd is returned.
> * On error, a negative errno number will be returned.
> @@ -629,7 +621,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
> int fd;
> struct pid *p;
>
> - if (flags & ~PIDFD_NONBLOCK)
> + if (flags & ~(PIDFD_NONBLOCK | PIDFD_THREAD))
> return -EINVAL;
>
> if (pid <= 0)
> diff --git a/kernel/signal.c b/kernel/signal.c
> index 9561a3962ca6..9b40109f0c56 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -2019,7 +2019,7 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
> return ret;
> }
>
> -static void do_notify_pidfd(struct task_struct *task)
> +void do_notify_pidfd(struct task_struct *task)
> {
> struct pid *pid;
>
> @@ -2051,7 +2051,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
> WARN_ON_ONCE(!tsk->ptrace &&
> (tsk->group_leader != tsk || !thread_group_empty(tsk)));
> /*
> - * tsk is a group leader and has no threads, wake up the pidfd waiters.
> + * tsk is a group leader and has no threads, wake up the
> + * non-PIDFD_THREAD waiters.
> */
> if (thread_group_empty(tsk))
> do_notify_pidfd(tsk);
> @@ -3926,6 +3927,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
> prepare_kill_siginfo(sig, &kinfo);
> }
>
> + /* TODO: respect PIDFD_THREAD */
> ret = kill_pid_info(sig, &kinfo, pid);
>
> err:
> --
> 2.25.1.362.g51ebf55
>
>

2024-01-31 14:22:07

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On 01/31, Christian Brauner wrote:
>
> On Wed, Jan 31, 2024 at 02:26:02PM +0100, Oleg Nesterov wrote:
> > With this flag:
> >
> > - pidfd_open() doesn't require that the target task must be
> > a thread-group leader
> >
> > - pidfd_poll() succeeds when the task exits and becomes a
> > zombie (iow, passes exit_notify()), even if it is a leader
> > and thread-group is not empty.
> >
> > This means that the behaviour of pidfd_poll(PIDFD_THREAD,
> > pid-of-group-leader) is not well defined if it races with
> > exec() from its sub-thread; pidfd_poll() can succeed or not
> > depending on whether pidfd_task_exited() is called before
> > or after exchange_tids().
>
> I think that we can live with that for now. If it doesn't hinder Tycho's
> use-case it's fine.

OK, good.

> (I often wish that we could report custom data from poll similar to
> kqueue on bsd then we could attach file specific information to the
> notification. That would enable userspace to handle this the way they
> want.)

Or at least perhaps we can change do_notify_pidfd() to use the keyed
wakeups... I'll try to take a look later.

> > thread_group_exited() is no longer used, perhaps it can die.
>
> Feel free to add a patch for that on top of it.

Yes, will do.

Oleg.


2024-01-31 14:41:40

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v3 0/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

Before I forget this...

After this patch we can easily add another feature, pidfd_poll()
can add, say, POLLHUP to poll_flags if the pid is "dead".

So the user can do

poll(pidfd, { .revents = POLLHUP });

and it will block until release_task() is called and this pid is
no longer in use (pid_task() == NULL).

Do you think this can be useful?

Oleg.


2024-01-31 15:11:32

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v3 0/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On Wed, Jan 31, 2024 at 03:12:04PM +0100, Oleg Nesterov wrote:
> Before I forget this...
>
> After this patch we can easily add another feature, pidfd_poll()
> can add, say, POLLHUP to poll_flags if the pid is "dead".
>
> So the user can do
>
> poll(pidfd, { .revents = POLLHUP });
>
> and it will block until release_task() is called and this pid is
> no longer in use (pid_task() == NULL).
>
> Do you think this can be useful?

Yeah, I think this is something that people would find useful. IIUC, it
would essentially allow them to do things like wait until a task has
been waited upon which for service managers is very useful information.

---

Btw, bigger picture for a second. You probably haven't kept track of
this but the fact that we got pidfds - thanks a lot to your review and
help - has made quite a huge difference in userspace. Since we last did
any meaningful work we got:

* systemd completely relying on pidfds to manage services to guard
against any pid races.
* Extended dbus to allow authentication via pidfds.
* Extended policy kit to enable secure authentication of processes via pidfds.
* Language support for pidfds: Go, Rust etc.
* An endless number of tools that added support for them.
* glibc support for pidfd apis.

There's a bunch more. That literally obliterated whole bug classes.

I think with PIDFD_THREAD support it might make it interesting to use it
for some pthread management as well.

2024-01-31 16:45:26

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v3 0/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On 01/31, Christian Brauner wrote:
>
> On Wed, Jan 31, 2024 at 03:12:04PM +0100, Oleg Nesterov wrote:
> >
> > After this patch we can easily add another feature, pidfd_poll()
> > can add, say, POLLHUP to poll_flags if the pid is "dead".
> >
> > So the user can do
> >
> > poll(pidfd, { .revents = POLLHUP });
> >
> > and it will block until release_task() is called and this pid is
> > no longer in use (pid_task() == NULL).
> >
> > Do you think this can be useful?
>
> Yeah, I think this is something that people would find useful. IIUC, it
> would essentially allow them to do things like wait until a task has
> been waited upon

Exactly.

OK. I'll try to make the (hopefully simple) patch on top of this one
on Friday, if Tycho agrees with V3. Will be busy tomorrow.

> * systemd completely relying on pidfds to manage services to guard
> against any pid races.
> * Extended dbus to allow authentication via pidfds.
> * Extended policy kit to enable secure authentication of processes via pidfds.
> * Language support for pidfds: Go, Rust etc.
> * An endless number of tools that added support for them.
> * glibc support for pidfd apis.
>
> There's a bunch more. That literally obliterated whole bug classes.

Thanks for this info!

Not that I ever thouhgt that pidfd is "useless", not at all, but as I said
(and as a Perl progammer ;) I simply do not know what people actually do with
pidfds ;)

Oleg.


2024-01-31 17:24:20

by Tycho Andersen

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

Hi Oleg,

On Wed, Jan 31, 2024 at 02:26:02PM +0100, Oleg Nesterov wrote:
> With this flag:
>
> - pidfd_open() doesn't require that the target task must be
> a thread-group leader
>
> - pidfd_poll() succeeds when the task exits and becomes a
> zombie (iow, passes exit_notify()), even if it is a leader
> and thread-group is not empty.
>
> This means that the behaviour of pidfd_poll(PIDFD_THREAD,
> pid-of-group-leader) is not well defined if it races with
> exec() from its sub-thread; pidfd_poll() can succeed or not
> depending on whether pidfd_task_exited() is called before
> or after exchange_tids().
>
> Perhaps we can improve this behaviour later, pidfd_poll()
> can probably take sig->group_exec_task into account. But
> this doesn't really differ from the case when the leader
> exits before other threads (so pidfd_poll() succeeds) and
> then another thread execs and pidfd_poll() will block again.

I don't have a strong opinion here; leaving it "undefined" for now is
fine with me.

> @@ -2173,7 +2195,9 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
> */
> int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
> {
> - if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
> + bool thread = flags & PIDFD_THREAD;
> +
> + if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID));

Small typo here, trailing ;. When I fix that, tests pass for me.

Assuming that's fixed up:

Reviewed-by: Tycho Andersen <[email protected]>
Tested-by: Tycho Andersen <[email protected]>

Thanks for your help!

Tycho

2024-01-31 18:03:23

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On 01/31, Tycho Andersen wrote:
>
> > int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
> > {
> > - if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
> > + bool thread = flags & PIDFD_THREAD;
> > +
> > + if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID));
>
> Small typo here, trailing ;. When I fix that, tests pass for me.

OOPS.

Christian, should I spam lkml with v4, or will you add this fix yourself?

> Assuming that's fixed up:
>
> Reviewed-by: Tycho Andersen <[email protected]>
> Tested-by: Tycho Andersen <[email protected]>

Thanks Tycho!!!

Oleg.


2024-01-31 18:22:38

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On Wed, Jan 31, 2024 at 07:01:47PM +0100, Oleg Nesterov wrote:
> On 01/31, Tycho Andersen wrote:
> >
> > > int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
> > > {
> > > - if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
> > > + bool thread = flags & PIDFD_THREAD;
> > > +
> > > + if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID));
> >
> > Small typo here, trailing ;. When I fix that, tests pass for me.
>
> OOPS.
>
> Christian, should I spam lkml with v4, or will you add this fix yourself?

I fixed that up. No need to resend!

2024-02-01 17:35:16

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On Wed, Jan 31, 2024 at 02:26:02PM +0100, Oleg Nesterov wrote:
> With this flag:
>
> - pidfd_open() doesn't require that the target task must be
> a thread-group leader
>
> - pidfd_poll() succeeds when the task exits and becomes a
> zombie (iow, passes exit_notify()), even if it is a leader
> and thread-group is not empty.
>
> This means that the behaviour of pidfd_poll(PIDFD_THREAD,
> pid-of-group-leader) is not well defined if it races with
> exec() from its sub-thread; pidfd_poll() can succeed or not
> depending on whether pidfd_task_exited() is called before
> or after exchange_tids().
>
> Perhaps we can improve this behaviour later, pidfd_poll()
> can probably take sig->group_exec_task into account. But
> this doesn't really differ from the case when the leader
> exits before other threads (so pidfd_poll() succeeds) and
> then another thread execs and pidfd_poll() will block again.
>
> thread_group_exited() is no longer used, perhaps it can die.
>
> Co-developed-by: Tycho Andersen <[email protected]>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> fs/exec.c | 6 +++++-
> include/linux/pid.h | 3 ++-
> include/uapi/linux/pidfd.h | 3 ++-
> kernel/exit.c | 7 +++++++
> kernel/fork.c | 38 +++++++++++++++++++++++++++++++-------
> kernel/pid.c | 14 +++-----------
> kernel/signal.c | 6 ++++--
> 7 files changed, 54 insertions(+), 23 deletions(-)
>
> diff --git a/fs/exec.c b/fs/exec.c
> index 73e4045df271..0fd7e668c477 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1143,7 +1143,11 @@ static int de_thread(struct task_struct *tsk)
>
> BUG_ON(leader->exit_state != EXIT_ZOMBIE);
> leader->exit_state = EXIT_DEAD;
> -
> + /*
> + * leader and tsk exhanged their pids, the old pid dies,
> + * wake up the PIDFD_THREAD waiters.
> + */
> + do_notify_pidfd(leader);
> /*
> * We are going to release_task()->ptrace_unlink() silently,
> * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index e6a041cb8bac..8124d57752b9 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -70,10 +70,11 @@ extern const struct file_operations pidfd_fops;
>
> struct file;
>
> -extern struct pid *pidfd_pid(const struct file *file);
> +struct pid *pidfd_pid(const struct file *file);
> struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
> struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
> int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
> +void do_notify_pidfd(struct task_struct *task);
>
> static inline struct pid *get_pid(struct pid *pid)
> {
> diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
> index 5406fbc13074..2e6461459877 100644
> --- a/include/uapi/linux/pidfd.h
> +++ b/include/uapi/linux/pidfd.h
> @@ -7,6 +7,7 @@
> #include <linux/fcntl.h>
>
> /* Flags for pidfd_open(). */
> -#define PIDFD_NONBLOCK O_NONBLOCK
> +#define PIDFD_NONBLOCK O_NONBLOCK
> +#define PIDFD_THREAD O_EXCL
>
> #endif /* _UAPI_LINUX_PIDFD_H */
> diff --git a/kernel/exit.c b/kernel/exit.c
> index dfb963d2f862..493647fd7c07 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -739,6 +739,13 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
> kill_orphaned_pgrp(tsk->group_leader, NULL);
>
> tsk->exit_state = EXIT_ZOMBIE;
> + /*
> + * sub-thread or delay_group_leader(), wake up the
> + * PIDFD_THREAD waiters.
> + */
> + if (!thread_group_empty(tsk))
> + do_notify_pidfd(tsk);
> +
> if (unlikely(tsk->ptrace)) {
> int sig = thread_group_leader(tsk) &&
> thread_group_empty(tsk) &&
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 347641398f9d..bea32071fff2 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -101,6 +101,7 @@
> #include <linux/user_events.h>
> #include <linux/iommu.h>
> #include <linux/rseq.h>
> +#include <uapi/linux/pidfd.h>
>
> #include <asm/pgalloc.h>
> #include <linux/uaccess.h>
> @@ -2050,6 +2051,8 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
>
> seq_put_decimal_ll(m, "Pid:\t", nr);
>
> + /* TODO: report PIDFD_THREAD */

One more thing that came to my mind. We also support waitid(P_PIDFD,
pidfd). And I just looked through the code and I think it does the right
thing when we pass it a PIDFD_THREAD pidfd because wait_consider_task()
rejects task->exit_signal != SIGCHLD in eligible_child() and
CLONE_THREAD implies task->exit_signal == -1.

2024-02-01 18:14:28

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On 02/01, Christian Brauner wrote:
>
> One more thing that came to my mind. We also support waitid(P_PIDFD,
> pidfd). And I just looked through the code and I think it does the right
> thing when we pass it a PIDFD_THREAD pidfd

Yes, I too looked into kernel_waitid_prepare(P_PIDFD) and didn't notice
anything wrong,

> because wait_consider_task()

Even simpler, I think. waitid(P_PIDFD, pidfd_with_PIDFD_THREAD) doesn't
really differ from waitid(P_PID, pidfd_pid(pidfd_with_PIDFD_THREAD)), so
it should work "as expected".

Oleg.


2024-02-02 12:50:39

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

> @@ -2050,6 +2051,8 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
>
> seq_put_decimal_ll(m, "Pid:\t", nr);
>
> + /* TODO: report PIDFD_THREAD */

So I think we don't need to do anything here. Since PIDFD_THREAD sets
O_EXCL in file->f_flags and in contrast to do_dentry_open() it isn't
dropped. So userspace can already detect PIDFD_NONBLOCK as O_NONBLOCK
and PIDFD_THREAD as O_EXCL.

2024-02-05 15:25:14

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On 02/02, Christian Brauner wrote:
>
> > @@ -2050,6 +2051,8 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
> >
> > seq_put_decimal_ll(m, "Pid:\t", nr);
> >
> > + /* TODO: report PIDFD_THREAD */
>
> So I think we don't need to do anything here. Since PIDFD_THREAD sets
> O_EXCL in file->f_flags and in contrast to do_dentry_open() it isn't
> dropped. So userspace can already detect PIDFD_NONBLOCK as O_NONBLOCK
> and PIDFD_THREAD as O_EXCL.

Ah, indeed, I didn't know that fs/proc/fd.c:seq_show() reports ->f_flags.
Thanks.



OK, what about another TODO in sys_pidfd_send_signal() ?

I mean, should I send a simple patch which changes pidfd_send_signal()
to use do_send_specific() if PIDFD_THREAD ? Or do you think this should
be controlled by pidfd_send_signal's "flags" argument?

I honestly do not know what makes more sense.

Oleg.


2024-02-06 13:38:40

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On Mon, Feb 05, 2024 at 04:22:41PM +0100, Oleg Nesterov wrote:
> On 02/02, Christian Brauner wrote:
> >
> > > @@ -2050,6 +2051,8 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
> > >
> > > seq_put_decimal_ll(m, "Pid:\t", nr);
> > >
> > > + /* TODO: report PIDFD_THREAD */
> >
> > So I think we don't need to do anything here. Since PIDFD_THREAD sets
> > O_EXCL in file->f_flags and in contrast to do_dentry_open() it isn't
> > dropped. So userspace can already detect PIDFD_NONBLOCK as O_NONBLOCK
> > and PIDFD_THREAD as O_EXCL.
>
> Ah, indeed, I didn't know that fs/proc/fd.c:seq_show() reports ->f_flags.
> Thanks.
>
>
>
> OK, what about another TODO in sys_pidfd_send_signal() ?
>
> I mean, should I send a simple patch which changes pidfd_send_signal()
> to use do_send_specific() if PIDFD_THREAD ? Or do you think this should

Yes, by default the type of pidfd should determine the scope of the
signal as discussed elsewhere.

(And later we can add flags that allow the caller to change the scope of
the signal. So that Andy's use-cases are covered as well.)

2024-02-08 07:56:44

by Oliver Sang

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()



Hello,

kernel test robot noticed "last_state.load_disk_fail" on:

commit: aa9c201b150f15cf12e8df8af531b2c74ae1a8fc ("[PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()")
url: https://github.com/intel-lab-lkp/linux/commits/Oleg-Nesterov/pidfd-implement-PIDFD_THREAD-flag-for-pidfd_open/20240131-213408
base: https://git.kernel.org/cgit/linux/kernel/git/vfs/vfs.git vfs.all
patch link: https://lore.kernel.org/all/[email protected]/
patch subject: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

in testcase: boot

compiler: gcc-12
test machine: 128 threads 2 sockets Intel(R) Xeon(R) Platinum 8358 CPU @ 2.60GHz (Ice Lake) with 128G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-lkp/[email protected]



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20240208/[email protected]


as in dmesg in above link:

LKP: ttyS0: 1408: can't load the disk /dev/disk/by-id/ata-SanDisk_SDSSDH3250G_182971800454-part1, skip testing...

we know this should be related with some early issues but we failed to figure
out. so here also attached parent dmesg as dmesg-e0ee7b583f.xz FYI.


--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


Attachments:
(No filename) (1.56 kB)
dmesg-e0ee7b583f.xz (25.35 kB)
Download all attachments

2024-02-08 08:58:57

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On Thu, Feb 08, 2024 at 03:54:44PM +0800, kernel test robot wrote:
>
>
> Hello,
>
> kernel test robot noticed "last_state.load_disk_fail" on:
>
> commit: aa9c201b150f15cf12e8df8af531b2c74ae1a8fc ("[PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()")
> url: https://github.com/intel-lab-lkp/linux/commits/Oleg-Nesterov/pidfd-implement-PIDFD_THREAD-flag-for-pidfd_open/20240131-213408
> base: https://git.kernel.org/cgit/linux/kernel/git/vfs/vfs.git vfs.all
> patch link: https://lore.kernel.org/all/[email protected]/
> patch subject: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()
>
> in testcase: boot
>
> compiler: gcc-12
> test machine: 128 threads 2 sockets Intel(R) Xeon(R) Platinum 8358 CPU @ 2.60GHz (Ice Lake) with 128G memory
>
> (please refer to attached dmesg/kmsg for entire log/backtrace)
>
>
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <[email protected]>
> | Closes: https://lore.kernel.org/oe-lkp/[email protected]
>
>
>
> The kernel config and materials to reproduce are available at:
> https://download.01.org/0day-ci/archive/20240208/[email protected]
>
>
> as in dmesg in above link:
>
> LKP: ttyS0: 1408: can't load the disk /dev/disk/by-id/ata-SanDisk_SDSSDH3250G_182971800454-part1, skip testing...
>
> we know this should be related with some early issues but we failed to figure
> out. so here also attached parent dmesg as dmesg-e0ee7b583f.xz FYI.

I have a hard time seeing how this would be caused by any of Oleg's
changes. Plus, the merges from the vfs tree linked under "url:" above
are all pretty out of date?

2024-02-08 09:20:17

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()

On 02/08, Christian Brauner wrote:
>
> On Thu, Feb 08, 2024 at 03:54:44PM +0800, kernel test robot wrote:
> >
> >
> > Hello,
> >
> > kernel test robot noticed "last_state.load_disk_fail" on:
> >
> > commit: aa9c201b150f15cf12e8df8af531b2c74ae1a8fc ("[PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()")
> > url: https://github.com/intel-lab-lkp/linux/commits/Oleg-Nesterov/pidfd-implement-PIDFD_THREAD-flag-for-pidfd_open/20240131-213408
> > base: https://git.kernel.org/cgit/linux/kernel/git/vfs/vfs.git vfs.all
> > patch link: https://lore.kernel.org/all/[email protected]/
> > patch subject: [PATCH v3 1/1] pidfd: implement PIDFD_THREAD flag for pidfd_open()
> >
> > in testcase: boot
> >
> > compiler: gcc-12
> > test machine: 128 threads 2 sockets Intel(R) Xeon(R) Platinum 8358 CPU @ 2.60GHz (Ice Lake) with 128G memory
> >
> > (please refer to attached dmesg/kmsg for entire log/backtrace)
> >
> >
> >
> > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > the same patch/commit), kindly add following tags
> > | Reported-by: kernel test robot <[email protected]>
> > | Closes: https://lore.kernel.org/oe-lkp/[email protected]
> >
> >
> >
> > The kernel config and materials to reproduce are available at:
> > https://download.01.org/0day-ci/archive/20240208/[email protected]
> >
> >
> > as in dmesg in above link:
> >
> > LKP: ttyS0: 1408: can't load the disk /dev/disk/by-id/ata-SanDisk_SDSSDH3250G_182971800454-part1, skip testing...
> >
> > we know this should be related with some early issues but we failed to figure
> > out. so here also attached parent dmesg as dmesg-e0ee7b583f.xz FYI.
>
> I have a hard time seeing how this would be caused by any of Oleg's
> changes. Plus, the merges from the vfs tree linked under "url:" above
> are all pretty out of date?

Yes, the patch under url still has the extra trailing ";" after "if"
so pidfd_prepare() will always fail. Fixed by Tycho.

Oleg.