2021-11-15 09:27:43

by Vladimir Divjak

[permalink] [raw]
Subject: [PATCH] coredump-ptrace: Delayed delivery of SIGSTOP

Allow SIGSTOP to be delivered to the dying process,
if it is coming from coredump user mode helper (umh)
process, but delay it until coredump is finished,
at which point it will be re-triggered in coredump_finish().

When processing this signal, set the tasks of the dying process
directly to TASK_TRACED state during complete_signal(),
instead of attempting signal_wake_up().

Do so to allow the umh process to ptrace(PTRACE_ATTACH,...)
to the dying process, whose coredump it's handling.

* Problem description:
In automotive and/or embedded environments,
the storage capacity to store, and/or
network capabilities to upload
a complete core file can easily be a limiting factor,
making offline crash analysis difficult.

* Solution:
Allow the user mode coredump helper process
to perform ptrace on the dying process in order to obtain
useful information such as user mode stacktrace,
thereby improving the offline debugging possibilities
for such environments.

Signed-off-by: Vladimir Divjak <[email protected]>
---
fs/coredump.c | 18 +++++++++--
include/linux/mm_types.h | 2 ++
include/linux/umh.h | 1 +
kernel/signal.c | 64 +++++++++++++++++++++++++++++++++++++---
kernel/umh.c | 7 +++--
5 files changed, 84 insertions(+), 8 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 2868e3e171ae..9a51a1a2168d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -487,6 +487,20 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
{
struct core_thread *curr, *next;
struct task_struct *task;
+ int signr;
+ struct ksignal ksig;
+
+ current->mm->core_state->core_dumped = true;
+
+ /*
+ * Check if there is a SIGSTOP pending, and if so, re-trigger its delivery
+ * allowing the coredump umh process to do a ptrace on this one.
+ */
+ spin_lock_irq(&current->sighand->siglock);
+ signr = next_signal(&current->pending, &current->blocked);
+ spin_unlock_irq(&current->sighand->siglock);
+ if (signr == SIGSTOP)
+ get_signal(&ksig);

spin_lock_irq(&current->sighand->siglock);
if (core_dumped && !__fatal_signal_pending(current))
@@ -601,7 +615,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
*/
.mm_flags = mm->flags,
};
-
+ core_state.core_dumped = false;
audit_core_dumps(siginfo->si_signo);

binfmt = mm->binfmt;
@@ -695,7 +709,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
if (sub_info)
retval = call_usermodehelper_exec(sub_info,
UMH_WAIT_EXEC);
-
+ core_state.umh_pid = sub_info->pid;
kfree(helper_argv);
if (retval) {
printk(KERN_INFO "Core dump to |%s pipe failed\n",
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6613b26a8894..475b3d8cd399 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -381,6 +381,8 @@ struct core_state {
atomic_t nr_threads;
struct core_thread dumper;
struct completion startup;
+ bool core_dumped;
+ pid_t umh_pid;
};

struct kioctx_table;
diff --git a/include/linux/umh.h b/include/linux/umh.h
index 244aff638220..b2bbcafe7c98 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -24,6 +24,7 @@ struct subprocess_info {
char **envp;
int wait;
int retval;
+ pid_t pid;
int (*init)(struct subprocess_info *info, struct cred *new);
void (*cleanup)(struct subprocess_info *info);
void *data;
diff --git a/kernel/signal.c b/kernel/signal.c
index 66e88649cf74..5e7812644c8a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -943,8 +943,22 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
sigset_t flush;

if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
- if (!(signal->flags & SIGNAL_GROUP_EXIT))
- return sig == SIGKILL;
+ if (!(signal->flags & SIGNAL_GROUP_EXIT)) {
+ /*
+ * If the signal is for the process being core-dumped
+ * and the signal is SIGSTOP sent by the coredump umh process
+ * let it through (in addition to SIGKILL)
+ * allowing the coredump umh process to ptrace the dying process.
+ */
+ bool sig_from_umh = false;
+
+ if (unlikely(p->mm && p->mm->core_state &&
+ p->mm->core_state->umh_pid == current->tgid)) {
+ sig_from_umh = true;
+ }
+ return sig == SIGKILL || (sig == SIGSTOP && sig_from_umh);
+ }
+
/*
* The process is in the middle of dying, nothing to do.
*/
@@ -1014,8 +1028,18 @@ static inline bool wants_signal(int sig, struct task_struct *p)
if (sigismember(&p->blocked, sig))
return false;

- if (p->flags & PF_EXITING)
+ if (p->flags & PF_EXITING) {
+ /*
+ * Ignore the fact the process is exiting,
+ * if it's being core-dumped, and the signal is SIGSTOP
+ * allowing the coredump umh process to ptrace the dying process.
+ * See prepare_signal().
+ */
+ if (unlikely(p->mm && p->mm->core_state && sig == SIGSTOP))
+ return true;
+
return false;
+ }

if (sig == SIGKILL)
return true;
@@ -1094,6 +1118,22 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
}
}

+ /*
+ * If the signal is completed for a process being core-dumped,
+ * and the signal is SIGSTOP, there is no point in waking up its tasks,
+ * as they are either dumping the core, or in uninterruptible state,
+ * so skip the wake up if core-dump is not yet completed.
+ * Instead, if the core-dump has been completed, see coredump_finish()
+ * set the task state directly to TASK_TRACED,
+ * allowing the coredump umh process to ptrace the dying process.
+ */
+ if (unlikely(t->mm && t->mm->core_state) && sig == SIGSTOP) {
+ if (t->mm->core_state->core_dumped)
+ t->state = TASK_TRACED;
+
+ return;
+ }
+
/*
* The signal is already in the shared-pending queue.
* Tell the chosen thread to wake up and dequeue it.
@@ -2586,6 +2626,7 @@ bool get_signal(struct ksignal *ksig)
struct sighand_struct *sighand = current->sighand;
struct signal_struct *signal = current->signal;
int signr;
+ bool sigstop_pending = false;

if (unlikely(current->task_works))
task_work_run();
@@ -2651,8 +2692,23 @@ bool get_signal(struct ksignal *ksig)
goto relock;
}

+
+ /*
+ * If this task is being core-dumped,
+ * and the next signal is SIGSTOP, allow its delivery
+ * to enable the coredump umh process to ptrace the dying one.
+ */
+ if (unlikely(current->mm && current->mm->core_state)) {
+ int nextsig = 0;
+
+ nextsig = next_signal(&current->pending, &current->blocked);
+ if (nextsig == SIGSTOP) {
+ sigstop_pending = true;
+ }
+ }
+
/* Has this task already been marked for death? */
- if (signal_group_exit(signal)) {
+ if (signal_group_exit(signal) && !sigstop_pending) {
ksig->info.si_signo = signr = SIGKILL;
sigdelset(&current->pending.signal, SIGKILL);
trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
diff --git a/kernel/umh.c b/kernel/umh.c
index 36c123360ab8..8ac027c75d70 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -107,6 +107,7 @@ static int call_usermodehelper_exec_async(void *data)
}

commit_creds(new);
+ sub_info->pid = task_pid_nr(current);

wait_for_initramfs();
retval = kernel_execve(sub_info->path,
@@ -133,10 +134,12 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
/* If SIGCLD is ignored do_wait won't populate the status. */
kernel_sigaction(SIGCHLD, SIG_DFL);
pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
- if (pid < 0)
+ if (pid < 0) {
sub_info->retval = pid;
- else
+ } else {
+ sub_info->pid = pid;
kernel_wait(pid, &sub_info->retval);
+ }

/* Restore default kernel sig handler */
kernel_sigaction(SIGCHLD, SIG_IGN);
--
2.25.1



2021-11-15 14:29:45

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH] coredump-ptrace: Delayed delivery of SIGSTOP


Added Oleg and linux-api.

Vladimir Divjak <[email protected]> writes:

> Allow SIGSTOP to be delivered to the dying process,
> if it is coming from coredump user mode helper (umh)
> process, but delay it until coredump is finished,
> at which point it will be re-triggered in coredump_finish().
>
> When processing this signal, set the tasks of the dying process
> directly to TASK_TRACED state during complete_signal(),
> instead of attempting signal_wake_up().
>
> Do so to allow the umh process to ptrace(PTRACE_ATTACH,...)
> to the dying process, whose coredump it's handling.
>
> * Problem description:
> In automotive and/or embedded environments,
> the storage capacity to store, and/or
> network capabilities to upload
> a complete core file can easily be a limiting factor,
> making offline crash analysis difficult.
>
> * Solution:
> Allow the user mode coredump helper process
> to perform ptrace on the dying process in order to obtain
> useful information such as user mode stacktrace,
> thereby improving the offline debugging possibilities
> for such environments.

I don't think PTRACE_ATTACH is fundamentally wrong during a coredump.

Allowing SIGSTOP is fundamentally wrong. Processing any signal after
receiving a fatal signal that will result in coredump is wrong. There
is a small exception for SIGKILL which will terminate the coredump.


I think what you are actually looking for is PTRACE_SEIZE and stopping
at PTRACE_EVENT_EXIT both of which already exist.

There may be something preventing them from working and if some please
let me know. I took a quick skim through the code and it looks like
PTRACE_SEIZE and PTRACE_EVENT_EXIT should just work with no changes
to the current code.


I think this is also possible by filtering the coredump that is piped to
a userspace coredump process. So I am not certain what is gained. But
if PTRACE_SEIZE and PTRACE_EVENT_EXIT already work there is no point
in not allowing what you are looking for either.


I really really don't like the patch below. It gives me the heebie
jeebies. It is doing so many weird and questionable things on so many
layers.

There is already a mechanism to get the pid of the user mode helper.
All you need to do is capture the pid in the init/setup routine
passed to call_usermodehelper_setup.

Supporting SIGSTOP when the process is dying is horrible, and
with PTRACE_SEIZE completely unnecessary.

There is no reason to believe next_signal will necessary be SIGSTOP
if a process is being coredumps. There may be all manner of pending
signals.

I don't think you can safely change the task state of another process.
You certainly can't do it without using the task state change helpers.
There also need to be a verification that the task is in some kind of
stop state before it might be safe.

In general I have having trouble finding even a line of the code
change below that is not wrong for some reason. So please please don't
start with this code change if PTRACE_SEIZE + PTRACE_EVENT_EXIT needs
some work.

Eric


> Signed-off-by: Vladimir Divjak <[email protected]>
> ---
> fs/coredump.c | 18 +++++++++--
> include/linux/mm_types.h | 2 ++
> include/linux/umh.h | 1 +
> kernel/signal.c | 64 +++++++++++++++++++++++++++++++++++++---
> kernel/umh.c | 7 +++--
> 5 files changed, 84 insertions(+), 8 deletions(-)
>
> diff --git a/fs/coredump.c b/fs/coredump.c
> index 2868e3e171ae..9a51a1a2168d 100644
> --- a/fs/coredump.c
> +++ b/fs/coredump.c
> @@ -487,6 +487,20 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
> {
> struct core_thread *curr, *next;
> struct task_struct *task;
> + int signr;
> + struct ksignal ksig;
> +
> + current->mm->core_state->core_dumped = true;
^^^^^^^^^^^ See the core_dumped argument

It is completely possible for coredump_finish to be called because
the coredump was interrupted with SIGKILL. In which case reporting that
the was dumped is incorrect.

> +
> + /*
> + * Check if there is a SIGSTOP pending, and if so, re-trigger its delivery
> + * allowing the coredump umh process to do a ptrace on this one.
> + */
> + spin_lock_irq(&current->sighand->siglock);
> + signr = next_signal(&current->pending, &current->blocked);
> + spin_unlock_irq(&current->sighand->siglock);
> + if (signr == SIGSTOP)
> + get_signal(&ksig);
>
> spin_lock_irq(&current->sighand->siglock);
> if (core_dumped && !__fatal_signal_pending(current))
> @@ -601,7 +615,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
> */
> .mm_flags = mm->flags,
> };
> -
> + core_state.core_dumped = false;
> audit_core_dumps(siginfo->si_signo);
>
> binfmt = mm->binfmt;
> @@ -695,7 +709,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
> if (sub_info)
> retval = call_usermodehelper_exec(sub_info,
> UMH_WAIT_EXEC);
> -
> + core_state.umh_pid = sub_info->pid;
> kfree(helper_argv);
> if (retval) {
> printk(KERN_INFO "Core dump to |%s pipe failed\n",
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 6613b26a8894..475b3d8cd399 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -381,6 +381,8 @@ struct core_state {
> atomic_t nr_threads;
> struct core_thread dumper;
> struct completion startup;
> + bool core_dumped;
> + pid_t umh_pid;
> };
>
> struct kioctx_table;
> diff --git a/include/linux/umh.h b/include/linux/umh.h
> index 244aff638220..b2bbcafe7c98 100644
> --- a/include/linux/umh.h
> +++ b/include/linux/umh.h
> @@ -24,6 +24,7 @@ struct subprocess_info {
> char **envp;
> int wait;
> int retval;
> + pid_t pid;
> int (*init)(struct subprocess_info *info, struct cred *new);
> void (*cleanup)(struct subprocess_info *info);
> void *data;
> diff --git a/kernel/signal.c b/kernel/signal.c
> index 66e88649cf74..5e7812644c8a 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -943,8 +943,22 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
> sigset_t flush;
>
> if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
> - if (!(signal->flags & SIGNAL_GROUP_EXIT))
> - return sig == SIGKILL;
> + if (!(signal->flags & SIGNAL_GROUP_EXIT)) {
> + /*
> + * If the signal is for the process being core-dumped
> + * and the signal is SIGSTOP sent by the coredump umh process
> + * let it through (in addition to SIGKILL)
> + * allowing the coredump umh process to ptrace the dying process.
> + */
> + bool sig_from_umh = false;
> +
> + if (unlikely(p->mm && p->mm->core_state &&
> + p->mm->core_state->umh_pid == current->tgid)) {
> + sig_from_umh = true;
> + }
> + return sig == SIGKILL || (sig == SIGSTOP && sig_from_umh);
> + }
> +
> /*
> * The process is in the middle of dying, nothing to do.
> */
> @@ -1014,8 +1028,18 @@ static inline bool wants_signal(int sig, struct task_struct *p)
> if (sigismember(&p->blocked, sig))
> return false;
>
> - if (p->flags & PF_EXITING)
> + if (p->flags & PF_EXITING) {
> + /*
> + * Ignore the fact the process is exiting,
> + * if it's being core-dumped, and the signal is SIGSTOP
> + * allowing the coredump umh process to ptrace the dying process.
> + * See prepare_signal().
> + */
> + if (unlikely(p->mm && p->mm->core_state && sig == SIGSTOP))
> + return true;
> +
> return false;
> + }
>
> if (sig == SIGKILL)
> return true;
> @@ -1094,6 +1118,22 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
> }
> }
>
> + /*
> + * If the signal is completed for a process being core-dumped,
> + * and the signal is SIGSTOP, there is no point in waking up its tasks,
> + * as they are either dumping the core, or in uninterruptible state,
> + * so skip the wake up if core-dump is not yet completed.
> + * Instead, if the core-dump has been completed, see coredump_finish()
> + * set the task state directly to TASK_TRACED,
> + * allowing the coredump umh process to ptrace the dying process.
> + */
> + if (unlikely(t->mm && t->mm->core_state) && sig == SIGSTOP) {
> + if (t->mm->core_state->core_dumped)
> + t->state = TASK_TRACED;
> +
> + return;
> + }
> +
> /*
> * The signal is already in the shared-pending queue.
> * Tell the chosen thread to wake up and dequeue it.
> @@ -2586,6 +2626,7 @@ bool get_signal(struct ksignal *ksig)
> struct sighand_struct *sighand = current->sighand;
> struct signal_struct *signal = current->signal;
> int signr;
> + bool sigstop_pending = false;
>
> if (unlikely(current->task_works))
> task_work_run();
> @@ -2651,8 +2692,23 @@ bool get_signal(struct ksignal *ksig)
> goto relock;
> }
>
> +
> + /*
> + * If this task is being core-dumped,
> + * and the next signal is SIGSTOP, allow its delivery
> + * to enable the coredump umh process to ptrace the dying one.
> + */
> + if (unlikely(current->mm && current->mm->core_state)) {
> + int nextsig = 0;
> +
> + nextsig = next_signal(&current->pending, &current->blocked);
> + if (nextsig == SIGSTOP) {
> + sigstop_pending = true;
> + }
> + }
> +
> /* Has this task already been marked for death? */
> - if (signal_group_exit(signal)) {
> + if (signal_group_exit(signal) && !sigstop_pending) {
> ksig->info.si_signo = signr = SIGKILL;
> sigdelset(&current->pending.signal, SIGKILL);
> trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
> diff --git a/kernel/umh.c b/kernel/umh.c
> index 36c123360ab8..8ac027c75d70 100644
> --- a/kernel/umh.c
> +++ b/kernel/umh.c
> @@ -107,6 +107,7 @@ static int call_usermodehelper_exec_async(void *data)
> }
>
> commit_creds(new);
> + sub_info->pid = task_pid_nr(current);
>
> wait_for_initramfs();
> retval = kernel_execve(sub_info->path,
> @@ -133,10 +134,12 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
> /* If SIGCLD is ignored do_wait won't populate the status. */
> kernel_sigaction(SIGCHLD, SIG_DFL);
> pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
> - if (pid < 0)
> + if (pid < 0) {
> sub_info->retval = pid;
> - else
> + } else {
> + sub_info->pid = pid;
> kernel_wait(pid, &sub_info->retval);
> + }
>
> /* Restore default kernel sig handler */
> kernel_sigaction(SIGCHLD, SIG_IGN);