Since 00cd5c37af (ptrace: permit ptracing of /sbin/init) we can now
trace init processes. init is initially protected with
SIGNAL_UNKILLABLE which will prevent fatal signals such as SIGSTOP, but
there are a number of paths during tracing where SIGNAL_UNKILLABLE can
be implicitly cleared.
This can result in init becoming stoppable/killable after tracing. For
example, running:
while true; do kill -STOP 1; done &
strace -p 1
and then stopping strace and the kill loop will result in init being
left in state TASK_STOPPED. Sending SIGCONT to init will resume it, but
init will now respond to future SIGSTOP signals rather than ignoring
them.
Instead of direct assignment to struct signal_struct::flags, provide
accessors that protect SIGNAL_UNKILLABLE.
Cc: Alexander Viro <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Signed-off-by: Jamie Iles <[email protected]>
---
The task being left in state TASK_STOPPED could still be seen as slightly
unexpected, I suspect this is pending signals that got set because the task
was traced but delivered after detach. Perhaps signals that would normally be
ignored because of SIGNAL_UNKILLABLE should be flushed on PTRACE_DETACH?
fs/coredump.c | 4 ++--
include/linux/sched.h | 22 +++++++++++++++++++++-
kernel/exit.c | 4 ++--
kernel/signal.c | 8 ++++----
4 files changed, 29 insertions(+), 9 deletions(-)
diff --git a/fs/coredump.c b/fs/coredump.c
index eb9c92c9b20f..290ba7e69b28 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -312,7 +312,7 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
int nr = 0;
/* ignore all signals except SIGKILL, see prepare_signal() */
- start->signal->flags = SIGNAL_GROUP_COREDUMP | flags;
+ signal_set_flags(start->signal, SIGNAL_GROUP_COREDUMP | flags);
start->signal->group_exit_code = exit_code;
start->signal->group_stop_count = 0;
@@ -451,7 +451,7 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
if (core_dumped && !__fatal_signal_pending(current))
current->signal->group_exit_code |= 0x80;
current->signal->group_exit_task = NULL;
- current->signal->flags = SIGNAL_GROUP_EXIT;
+ signal_set_flags(current->signal, SIGNAL_GROUP_EXIT);
spin_unlock_irq(¤t->sighand->siglock);
next = mm->core_state->dumper.next;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b0ec92..26e5f10b338c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -696,7 +696,11 @@ struct signal_struct {
/* thread group stop support, overloads group_exit_code too */
int group_stop_count;
- unsigned int flags; /* see SIGNAL_* flags below */
+ /*
+ * see SIGNAL_* flags below. Set and clear flags with
+ * signal_set_flags()/signal_clear_flags().
+ */
+ unsigned int flags;
/*
* PR_SET_CHILD_SUBREAPER marks a process, like a service
@@ -829,6 +833,22 @@ struct signal_struct {
#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */
+/*
+ * Protected flags that should not normally be set/cleared.
+ */
+#define SIGNAL_PROTECTED_MASK SIGNAL_UNKILLABLE
+
+static inline void signal_set_flags(struct signal_struct *sig,
+ unsigned int flags)
+{
+ sig->flags = (sig->flags & SIGNAL_PROTECTED_MASK) | flags;
+}
+
+static inline void signal_clear_flags(struct signal_struct *sig,
+ unsigned int flags)
+{
+ sig->flags &= (~flags | SIGNAL_PROTECTED_MASK);
+}
/* If true, all threads except ->group_exit_task have pending SIGKILL */
static inline int signal_group_exit(const struct signal_struct *sig)
diff --git a/kernel/exit.c b/kernel/exit.c
index 9d68c45ebbe3..89a7f8860efa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -922,7 +922,7 @@ do_group_exit(int exit_code)
exit_code = sig->group_exit_code;
else {
sig->group_exit_code = exit_code;
- sig->flags = SIGNAL_GROUP_EXIT;
+ signal_set_flags(sig, SIGNAL_GROUP_EXIT);
zap_other_threads(current);
}
spin_unlock_irq(&sighand->siglock);
@@ -1315,7 +1315,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
return 0;
}
if (!unlikely(wo->wo_flags & WNOWAIT))
- p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
+ signal_clear_flags(p->signal, SIGNAL_STOP_CONTINUED);
uid = from_kuid_munged(current_user_ns(), task_uid(p));
spin_unlock_irq(&p->sighand->siglock);
diff --git a/kernel/signal.c b/kernel/signal.c
index 75761acc77cf..0394e1205d1e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -346,7 +346,7 @@ static bool task_participate_group_stop(struct task_struct *task)
* fresh group stop. Read comment in do_signal_stop() for details.
*/
if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
- sig->flags = SIGNAL_STOP_STOPPED;
+ signal_set_flags(sig, SIGNAL_STOP_STOPPED);
return true;
}
return false;
@@ -837,7 +837,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
* will take ->siglock, notice SIGNAL_CLD_MASK, and
* notify its parent. See get_signal_to_deliver().
*/
- signal->flags = why | SIGNAL_STOP_CONTINUED;
+ signal_set_flags(signal, why | SIGNAL_STOP_CONTINUED);
signal->group_stop_count = 0;
signal->group_exit_code = 0;
}
@@ -922,7 +922,7 @@ static void complete_signal(int sig, struct task_struct *p, int group)
* running and doing things after a slower
* thread has the fatal signal pending.
*/
- signal->flags = SIGNAL_GROUP_EXIT;
+ signal_set_flags(signal, SIGNAL_GROUP_EXIT);
signal->group_exit_code = sig;
signal->group_stop_count = 0;
t = p;
@@ -2161,7 +2161,7 @@ int get_signal(struct ksignal *ksig)
else
why = CLD_STOPPED;
- signal->flags &= ~SIGNAL_CLD_MASK;
+ signal_clear_flags(signal, SIGNAL_CLD_MASK);
spin_unlock_irq(&sighand->siglock);
--
2.9.2
On 11/16, Jamie Iles wrote:
>
> This can result in init becoming stoppable/killable after tracing. For
> example, running:
>
> while true; do kill -STOP 1; done &
> strace -p 1
> and then stopping strace and the kill loop will result in init being
> left in state TASK_STOPPED. Sending SIGCONT to init will resume it, but
> init will now respond to future SIGSTOP signals rather than ignoring
> them.
Yes, and a lot more... I forgot about these problems again.
Jamie, sorry for delay, I'll try to read the patch and reply tomorrow.
Oleg.
Hi Oleg,
On Thu, Nov 17, 2016 at 08:04:20PM +0100, Oleg Nesterov wrote:
> On 11/16, Jamie Iles wrote:
> >
> > This can result in init becoming stoppable/killable after tracing. For
> > example, running:
> >
> > while true; do kill -STOP 1; done &
> > strace -p 1
>
> > and then stopping strace and the kill loop will result in init being
> > left in state TASK_STOPPED. Sending SIGCONT to init will resume it, but
> > init will now respond to future SIGSTOP signals rather than ignoring
> > them.
>
> Yes, and a lot more... I forgot about these problems again.
>
> Jamie, sorry for delay, I'll try to read the patch and reply tomorrow.
Did you get chance to look at the patch? I did have another thought -
rather than the accessors, we could change signal_struct to have:
unsigned int unkillable:1;
unsigned int flags:31;
to separate signal_unkillable from flags, making it a bit safer in the
future.
Jamie
Jamie,
I am really sorry for the huge delay.
On 11/16, Jamie Iles wrote:
>
> Since 00cd5c37af (ptrace: permit ptracing of /sbin/init) we can now
> trace init processes. init is initially protected with
> SIGNAL_UNKILLABLE which will prevent fatal signals such as SIGSTOP, but
> there are a number of paths during tracing where SIGNAL_UNKILLABLE can
> be implicitly cleared.
Yes, old problem which nobody bothered to fix ;) Thanks for doing this.
To remind, there are more problems with SIGNAL_UNKILLABLE, but this
patch looks like a good start to me.
However, I would like to ask you to make V2, see below.
> +static inline void signal_set_flags(struct signal_struct *sig,
> + unsigned int flags)
> +{
> + sig->flags = (sig->flags & SIGNAL_PROTECTED_MASK) | flags;
> +}
OK, agreed, probably the helper makes sense, but I think it is overused
in this patch, please see below. In short, I'd suggest to use it only in
the jctl code, at least for now.
> +static inline void signal_clear_flags(struct signal_struct *sig,
> + unsigned int flags)
> +{
> + sig->flags &= (~flags | SIGNAL_PROTECTED_MASK);
> +}
But this one looks pointless.
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -922,7 +922,7 @@ do_group_exit(int exit_code)
> exit_code = sig->group_exit_code;
> else {
> sig->group_exit_code = exit_code;
> - sig->flags = SIGNAL_GROUP_EXIT;
> + signal_set_flags(sig, SIGNAL_GROUP_EXIT);
Well. I am not sure about this change. SIGNAL_GROUP_EXIT is the terminal
state, it is fine to clear UNKILLABLE.
Yes, perhaps this actually makes sense, and we want to make UNKILLABLE
immutable, but then we need to change force_sig_info() too, at least.
And btw it should be changed anyway, in particular UNKILLABLE can be
wrongly cleared if the task is traced. But I'd prefer to do this later.
The same for the similar changes in zap_process(), coredump_finish(),
and complete_signal().
> @@ -1315,7 +1315,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
> return 0;
> }
> if (!unlikely(wo->wo_flags & WNOWAIT))
> - p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
> + signal_clear_flags(p->signal, SIGNAL_STOP_CONTINUED);
Why? "flags &= ~SIGNAL_STOP_CONTINUED" can't clear other bits, so this
change and the helper itself looks pointless.
If you add this helper for readability I won't argue. But then it should
not deny SIGNAL_PROTECTED_MASK, and force_sig_info() should use it too.
> @@ -346,7 +346,7 @@ static bool task_participate_group_stop(struct task_struct *task)
> * fresh group stop. Read comment in do_signal_stop() for details.
> */
> if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
> - sig->flags = SIGNAL_STOP_STOPPED;
> + signal_set_flags(sig, SIGNAL_STOP_STOPPED);
> return true;
OK,
> @@ -837,7 +837,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
> * will take ->siglock, notice SIGNAL_CLD_MASK, and
> * notify its parent. See get_signal_to_deliver().
> */
> - signal->flags = why | SIGNAL_STOP_CONTINUED;
> + signal_set_flags(signal, why | SIGNAL_STOP_CONTINUED);
OK,
> @@ -922,7 +922,7 @@ static void complete_signal(int sig, struct task_struct *p, int group)
> * running and doing things after a slower
> * thread has the fatal signal pending.
> */
> - signal->flags = SIGNAL_GROUP_EXIT;
> + signal_set_flags(signal, SIGNAL_GROUP_EXIT);
Again, I'd prefer to just set SIGNAL_GROUP_EXIT, like in do_group_exit().
Note also that SIGNAL_UNKILLABLE can't be set in this case, see the check
above, so this change has no effect. And at the same time this code needs
the changes too, this SIGNAL_UNKILLABLE check is not 100% correct, but this
is off-topic.
So I think it would be better to start with the minimal change which fixes
task_participate_group_stop() and prepare_signal() only. And while I won't
insist, perhaps we should add
#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | SIGNAL_STOP_CONTINUED)
signal_set_stop_flags(signal, flags)
{
signal->flags = (signal->flags & ~SIGNAL_STOP_MASK) | flags;
}
instead of signal_set_flags(). This way we can, say, add
WARN_ON(signal->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)) into this helper.
Then later we can add signal_set_exit_flags() which manipulates
GROUP_EXIT/COREDUMP/UNKILLABLE.
Not sure, up to you.
Oleg.
Hi Oleg,
On Tue, Nov 29, 2016 at 03:06:00PM +0100, Oleg Nesterov wrote:
> Jamie,
>
> I am really sorry for the huge delay.
No problem!
> On 11/16, Jamie Iles wrote:
> >
> > Since 00cd5c37af (ptrace: permit ptracing of /sbin/init) we can now
> > trace init processes. init is initially protected with
> > SIGNAL_UNKILLABLE which will prevent fatal signals such as SIGSTOP, but
> > there are a number of paths during tracing where SIGNAL_UNKILLABLE can
> > be implicitly cleared.
>
> Yes, old problem which nobody bothered to fix ;) Thanks for doing this.
> To remind, there are more problems with SIGNAL_UNKILLABLE, but this
> patch looks like a good start to me.
>
> However, I would like to ask you to make V2, see below.
>
> > +static inline void signal_set_flags(struct signal_struct *sig,
> > + unsigned int flags)
> > +{
> > + sig->flags = (sig->flags & SIGNAL_PROTECTED_MASK) | flags;
> > +}
>
> OK, agreed, probably the helper makes sense, but I think it is overused
> in this patch, please see below. In short, I'd suggest to use it only in
> the jctl code, at least for now.
>
> > +static inline void signal_clear_flags(struct signal_struct *sig,
> > + unsigned int flags)
> > +{
> > + sig->flags &= (~flags | SIGNAL_PROTECTED_MASK);
> > +}
>
> But this one looks pointless.
Well the intent was to have set/clear helpers and *always* use those so
that it's clear when SIGNAL_UNKILLABLE is being intentionally cleared.
At the moment there are sites where it is cleared intentionally, and
others as a consequence of direct assignment.
> > --- a/kernel/exit.c
> > +++ b/kernel/exit.c
> > @@ -922,7 +922,7 @@ do_group_exit(int exit_code)
> > exit_code = sig->group_exit_code;
> > else {
> > sig->group_exit_code = exit_code;
> > - sig->flags = SIGNAL_GROUP_EXIT;
> > + signal_set_flags(sig, SIGNAL_GROUP_EXIT);
>
> Well. I am not sure about this change. SIGNAL_GROUP_EXIT is the terminal
> state, it is fine to clear UNKILLABLE.
>
> Yes, perhaps this actually makes sense, and we want to make UNKILLABLE
> immutable, but then we need to change force_sig_info() too, at least.
> And btw it should be changed anyway, in particular UNKILLABLE can be
> wrongly cleared if the task is traced. But I'd prefer to do this later.
>
> The same for the similar changes in zap_process(), coredump_finish(),
> and complete_signal().
Okay.
> > @@ -922,7 +922,7 @@ static void complete_signal(int sig, struct
> > task_struct *p, int group)
> > * running and doing things after a slower
> > * thread has the fatal signal pending.
> > */
> > - signal->flags = SIGNAL_GROUP_EXIT;
> > + signal_set_flags(signal, SIGNAL_GROUP_EXIT);
>
> Again, I'd prefer to just set SIGNAL_GROUP_EXIT, like in do_group_exit().
> Note also that SIGNAL_UNKILLABLE can't be set in this case, see the check
> above, so this change has no effect. And at the same time this code needs
> the changes too, this SIGNAL_UNKILLABLE check is not 100% correct, but this
> is off-topic.
>
> So I think it would be better to start with the minimal change which fixes
> task_participate_group_stop() and prepare_signal() only. And while I won't
> insist, perhaps we should add
>
> #define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | SIGNAL_STOP_CONTINUED)
>
> signal_set_stop_flags(signal, flags)
> {
> signal->flags = (signal->flags & ~SIGNAL_STOP_MASK) | flags;
> }
>
> instead of signal_set_flags(). This way we can, say, add
> WARN_ON(signal->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)) into this helper.
> Then later we can add signal_set_exit_flags() which manipulates
> GROUP_EXIT/COREDUMP/UNKILLABLE.
>
> Not sure, up to you.
Right, so part of the challenge was figuring out where SIGNAL_UNKILLABLE
should be cleared, and where it shouldn't. So is making it an explicit
boolean in a bitfield a better approach? That way we can continue to
manipulate flags as required and then explicitly clear SIGNAL_UNKILLABLE
when it needs to be cleared? SIGNAL_UKILLABLE feels like enough of a
corner case that it has easy potential for regression in the future if
signal_struct::flags is assigned to.
Thanks,
Jamie
On 11/29, Jamie Iles wrote:
>
> On Tue, Nov 29, 2016 at 03:06:00PM +0100, Oleg Nesterov wrote:
> >
> > > +static inline void signal_clear_flags(struct signal_struct *sig,
> > > + unsigned int flags)
> > > +{
> > > + sig->flags &= (~flags | SIGNAL_PROTECTED_MASK);
> > > +}
> >
> > But this one looks pointless.
>
> Well the intent was to have set/clear helpers and *always* use those so
> that it's clear when SIGNAL_UNKILLABLE is being intentionally cleared.
But you can't clear UNKILLABLE intentionally if you do "flags &= WHATEVER".
Unless WHATEVER included UNKILLABLE of course, but in this case we do want
to clear this bit, so I do not understand the purpose.
And in any case this helper should not deny SIGNAL_PROTECTED_MASK silently,
this just adds the confusion.
> > So I think it would be better to start with the minimal change which fixes
> > task_participate_group_stop() and prepare_signal() only. And while I won't
> > insist, perhaps we should add
> >
> > #define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | SIGNAL_STOP_CONTINUED)
> >
> > signal_set_stop_flags(signal, flags)
> > {
> > signal->flags = (signal->flags & ~SIGNAL_STOP_MASK) | flags;
> > }
> >
> > instead of signal_set_flags(). This way we can, say, add
> > WARN_ON(signal->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)) into this helper.
> > Then later we can add signal_set_exit_flags() which manipulates
> > GROUP_EXIT/COREDUMP/UNKILLABLE.
> >
> > Not sure, up to you.
>
> Right, so part of the challenge was figuring out where SIGNAL_UNKILLABLE
> should be cleared, and where it shouldn't. So is making it an explicit
> boolean in a bitfield a better approach?
Well, I don't think so. Contrary, I think we should turn ->is_child_subreaper
and ->is_child_subreaper into SIGNAL_ flags. And btw these bitfields were
added exactly because we can't add the new SIGNAL_ flags until we cleanup
the current code, to ensure they can't be cleared unintentionally just like
UNKILLABLE. And we can have more flags, so this is not only about UNKILLABLE.
And that is why I suggest to add SIGNAL_STOP_MASK/signal_set_stop_flags
instead of SIGNAL_PROTECTED_MASK/signal_set_flags.
If we add set_signal_exit_flags() later, they should use the different
"preserve" masks. Say, signal_set_stop_flags() must never clear
SIGNAL_GROUP_EXIT (actually it must never see this flag set). On the
other hand, set_signal_exit_flags(GROUP_EXIT) must clear GROUP_STOPPED/etc
but may be preserve UNKILLABLE (later). So bitfields will only complicate
this all.
Oleg.