2022-06-17 21:10:33

by Madhavan T. Venkataraman

[permalink] [raw]
Subject: [PATCH v15 4/6] arm64: Introduce stack trace reliability checks in the unwinder

From: "Madhavan T. Venkataraman" <[email protected]>

There are some kernel features and conditions that make a stack trace
unreliable. Callers may require the unwinder to detect these cases.
E.g., livepatch.

Introduce a new function called unwind_check_reliability() that will
detect these cases and set a flag in the stack frame. Call
unwind_check_reliability() for every frame in unwind().

Introduce the first reliability check in unwind_check_reliability() - If
a return PC is not a valid kernel text address, consider the stack
trace unreliable. It could be some generated code. Other reliability checks
will be added in the future.

Let unwind() return a boolean to indicate if the stack trace is
reliable.

Signed-off-by: Madhavan T. Venkataraman <[email protected]>
Reviewed-by: Mark Brown <[email protected]>
---
arch/arm64/kernel/stacktrace.c | 31 +++++++++++++++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index c749129aba5a..5ef2ce217324 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -44,6 +44,8 @@
* @final_fp: Pointer to the final frame.
*
* @failed: Unwind failed.
+ *
+ * @reliable: Stack trace is reliable.
*/
struct unwind_state {
unsigned long fp;
@@ -57,6 +59,7 @@ struct unwind_state {
struct task_struct *task;
unsigned long final_fp;
bool failed;
+ bool reliable;
};

static void unwind_init_common(struct unwind_state *state,
@@ -80,6 +83,7 @@ static void unwind_init_common(struct unwind_state *state,
state->prev_fp = 0;
state->prev_type = STACK_TYPE_UNKNOWN;
state->failed = false;
+ state->reliable = true;

/* Stack trace terminates here. */
state->final_fp = (unsigned long)task_pt_regs(task)->stackframe;
@@ -242,11 +246,34 @@ static void notrace unwind_next(struct unwind_state *state)
}
NOKPROBE_SYMBOL(unwind_next);

-static void notrace unwind(struct unwind_state *state,
+/*
+ * Check the stack frame for conditions that make further unwinding unreliable.
+ */
+static void unwind_check_reliability(struct unwind_state *state)
+{
+ if (state->fp == state->final_fp) {
+ /* Final frame; no more unwind, no need to check reliability */
+ return;
+ }
+
+ /*
+ * If the PC is not a known kernel text address, then we cannot
+ * be sure that a subsequent unwind will be reliable, as we
+ * don't know that the code follows our unwind requirements.
+ */
+ if (!__kernel_text_address(state->pc))
+ state->reliable = false;
+}
+
+static bool notrace unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry, void *cookie)
{
- while (unwind_continue(state, consume_entry, cookie))
+ unwind_check_reliability(state);
+ while (unwind_continue(state, consume_entry, cookie)) {
unwind_next(state);
+ unwind_check_reliability(state);
+ }
+ return !state->failed && state->reliable;
}
NOKPROBE_SYMBOL(unwind);

--
2.25.1


2022-06-26 08:47:51

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v15 4/6] arm64: Introduce stack trace reliability checks in the unwinder

On Fri, Jun 17, 2022 at 04:07:15PM -0500, [email protected] wrote:
> From: "Madhavan T. Venkataraman" <[email protected]>
>
> There are some kernel features and conditions that make a stack trace
> unreliable. Callers may require the unwinder to detect these cases.
> E.g., livepatch.
>
> Introduce a new function called unwind_check_reliability() that will
> detect these cases and set a flag in the stack frame. Call
> unwind_check_reliability() for every frame in unwind().
>
> Introduce the first reliability check in unwind_check_reliability() - If
> a return PC is not a valid kernel text address, consider the stack
> trace unreliable. It could be some generated code. Other reliability checks
> will be added in the future.
>
> Let unwind() return a boolean to indicate if the stack trace is
> reliable.
>
> Signed-off-by: Madhavan T. Venkataraman <[email protected]>
> Reviewed-by: Mark Brown <[email protected]>
> ---
> arch/arm64/kernel/stacktrace.c | 31 +++++++++++++++++++++++++++++--
> 1 file changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index c749129aba5a..5ef2ce217324 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -44,6 +44,8 @@
> * @final_fp: Pointer to the final frame.
> *
> * @failed: Unwind failed.
> + *
> + * @reliable: Stack trace is reliable.
> */

I would strongly prefer if we could have something like an
unwind_state_is_reliable() helper, and just use that directly, rather than
storing that into the state.

That way, we can opt-into any expensive checks in the reliable unwinder (e.g.
__kernel_text_address), and can use them elsewhere for informative purposes
(e.g. when dumping a stacktrace out to the console).

> struct unwind_state {
> unsigned long fp;
> @@ -57,6 +59,7 @@ struct unwind_state {
> struct task_struct *task;
> unsigned long final_fp;
> bool failed;
> + bool reliable;
> };
>
> static void unwind_init_common(struct unwind_state *state,
> @@ -80,6 +83,7 @@ static void unwind_init_common(struct unwind_state *state,
> state->prev_fp = 0;
> state->prev_type = STACK_TYPE_UNKNOWN;
> state->failed = false;
> + state->reliable = true;
>
> /* Stack trace terminates here. */
> state->final_fp = (unsigned long)task_pt_regs(task)->stackframe;
> @@ -242,11 +246,34 @@ static void notrace unwind_next(struct unwind_state *state)
> }
> NOKPROBE_SYMBOL(unwind_next);
>
> -static void notrace unwind(struct unwind_state *state,
> +/*
> + * Check the stack frame for conditions that make further unwinding unreliable.
> + */
> +static void unwind_check_reliability(struct unwind_state *state)
> +{
> + if (state->fp == state->final_fp) {
> + /* Final frame; no more unwind, no need to check reliability */
> + return;
> + }
> +
> + /*
> + * If the PC is not a known kernel text address, then we cannot
> + * be sure that a subsequent unwind will be reliable, as we
> + * don't know that the code follows our unwind requirements.
> + */
> + if (!__kernel_text_address(state->pc))
> + state->reliable = false;
> +}

I'd strongly prefer that we split this into two helpers, e.g.

static inline bool unwind_state_is_final(struct unwind_state *state)
{
return state->fp == state->final_fp;
}

static inline bool unwind_state_is_reliable(struct unwind_state *state)
{
return __kernel_text_address(state->pc);
}

> +
> +static bool notrace unwind(struct unwind_state *state,
> stack_trace_consume_fn consume_entry, void *cookie)
> {
> - while (unwind_continue(state, consume_entry, cookie))
> + unwind_check_reliability(state);
> + while (unwind_continue(state, consume_entry, cookie)) {
> unwind_next(state);
> + unwind_check_reliability(state);

This is going to slow down regular unwinds even when the reliablity value is
not consumed (e.g. for KASAN traces on alloc and free), so I don't think this
should live here, and should be intreoduced with arch_stack_walk_reliable().

Thanks,
Mark.

> + }
> + return !state->failed && state->reliable;
> }
> NOKPROBE_SYMBOL(unwind);
>
> --
> 2.25.1
>

2022-06-27 05:16:32

by Madhavan T. Venkataraman

[permalink] [raw]
Subject: Re: [PATCH v15 4/6] arm64: Introduce stack trace reliability checks in the unwinder



On 6/26/22 03:32, Mark Rutland wrote:
> On Fri, Jun 17, 2022 at 04:07:15PM -0500, [email protected] wrote:
>> From: "Madhavan T. Venkataraman" <[email protected]>
>>
>> There are some kernel features and conditions that make a stack trace
>> unreliable. Callers may require the unwinder to detect these cases.
>> E.g., livepatch.
>>
>> Introduce a new function called unwind_check_reliability() that will
>> detect these cases and set a flag in the stack frame. Call
>> unwind_check_reliability() for every frame in unwind().
>>
>> Introduce the first reliability check in unwind_check_reliability() - If
>> a return PC is not a valid kernel text address, consider the stack
>> trace unreliable. It could be some generated code. Other reliability checks
>> will be added in the future.
>>
>> Let unwind() return a boolean to indicate if the stack trace is
>> reliable.
>>
>> Signed-off-by: Madhavan T. Venkataraman <[email protected]>
>> Reviewed-by: Mark Brown <[email protected]>
>> ---
>> arch/arm64/kernel/stacktrace.c | 31 +++++++++++++++++++++++++++++--
>> 1 file changed, 29 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
>> index c749129aba5a..5ef2ce217324 100644
>> --- a/arch/arm64/kernel/stacktrace.c
>> +++ b/arch/arm64/kernel/stacktrace.c
>> @@ -44,6 +44,8 @@
>> * @final_fp: Pointer to the final frame.
>> *
>> * @failed: Unwind failed.
>> + *
>> + * @reliable: Stack trace is reliable.
>> */
>
> I would strongly prefer if we could have something like an
> unwind_state_is_reliable() helper, and just use that directly, rather than
> storing that into the state.
>
> That way, we can opt-into any expensive checks in the reliable unwinder (e.g.
> __kernel_text_address), and can use them elsewhere for informative purposes
> (e.g. when dumping a stacktrace out to the console).
>
>> struct unwind_state {
>> unsigned long fp;
>> @@ -57,6 +59,7 @@ struct unwind_state {
>> struct task_struct *task;
>> unsigned long final_fp;
>> bool failed;
>> + bool reliable;
>> };
>>
>> static void unwind_init_common(struct unwind_state *state,
>> @@ -80,6 +83,7 @@ static void unwind_init_common(struct unwind_state *state,
>> state->prev_fp = 0;
>> state->prev_type = STACK_TYPE_UNKNOWN;
>> state->failed = false;
>> + state->reliable = true;
>>
>> /* Stack trace terminates here. */
>> state->final_fp = (unsigned long)task_pt_regs(task)->stackframe;
>> @@ -242,11 +246,34 @@ static void notrace unwind_next(struct unwind_state *state)
>> }
>> NOKPROBE_SYMBOL(unwind_next);
>>
>> -static void notrace unwind(struct unwind_state *state,
>> +/*
>> + * Check the stack frame for conditions that make further unwinding unreliable.
>> + */
>> +static void unwind_check_reliability(struct unwind_state *state)
>> +{
>> + if (state->fp == state->final_fp) {
>> + /* Final frame; no more unwind, no need to check reliability */
>> + return;
>> + }
>> +
>> + /*
>> + * If the PC is not a known kernel text address, then we cannot
>> + * be sure that a subsequent unwind will be reliable, as we
>> + * don't know that the code follows our unwind requirements.
>> + */
>> + if (!__kernel_text_address(state->pc))
>> + state->reliable = false;
>> +}
>
> I'd strongly prefer that we split this into two helpers, e.g.
>
> static inline bool unwind_state_is_final(struct unwind_state *state)
> {
> return state->fp == state->final_fp;
> }
>
> static inline bool unwind_state_is_reliable(struct unwind_state *state)
> {
> return __kernel_text_address(state->pc);
> }
>
>> +
>> +static bool notrace unwind(struct unwind_state *state,
>> stack_trace_consume_fn consume_entry, void *cookie)
>> {
>> - while (unwind_continue(state, consume_entry, cookie))
>> + unwind_check_reliability(state);
>> + while (unwind_continue(state, consume_entry, cookie)) {
>> unwind_next(state);
>> + unwind_check_reliability(state);
>
> This is going to slow down regular unwinds even when the reliablity value is
> not consumed (e.g. for KASAN traces on alloc and free), so I don't think this
> should live here, and should be intreoduced with arch_stack_walk_reliable().
>

So, I have been thinking about this whole reliability check thing. Instead of
checking many different things for reliability, I believe that a single frame
pointer validation check is sufficient. I am attempting to do that in my
other frame pointer validation patch series. Hopefully, in that patch series,
I can prove that that one check is sufficient. We will continue this discussion
there.

So, for now, I am dropping the reliability checks patches from the series.
I will just send the unwind loop reorg in v16 and focus on getting that
upstreamed.

Thanks.

Madhavan