2020-04-16 17:36:57

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH v5 04/17] x86,ftrace: Fix ftrace_regs_caller() unwind

The ftrace_regs_caller() trampoline does something 'funny' when there
is a direct-caller present. In that case it stuffs the 'direct-caller'
address on the return stack and then exits the function. This then
results in 'returning' to the direct-caller with the exact registers
we came in with -- an indirect tail-call without using a register.

This however (rightfully) confuses objtool because the function shares
a few instruction in order to have a single exit path, but the stack
layout is different for them, depending through which path we came
there.

This is currently cludged by forcing the stack state to the non-direct
case, but this generates actively wrong (ORC) unwind information for
the direct case, leading to potential broken unwinds.

Fix this issue by fully separating the exit paths. This results in
having to poke a second RET into the trampoline copy, see
ftrace_regs_caller_ret.

This brings us to a second objtool problem, in order for it to
perceive the 'jmp ftrace_epilogue' as a function exit, it needs to be
recognised as a tail call. In order to make that happen,
ftrace_epilogue needs to be the start of an STT_FUNC, so re-arrange
code to make this so.

Finally, a third issue is that objtool requires functions to exit with
the same stack layout they started with, which is obviously violated
in the direct case, employ the new HINT_RET_OFFSET to tell objtool
this is an expected exception.

Together, this results in generating correct ORC unwind information
for the ftrace_regs_caller() function and it's trampoline copies.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/kernel/ftrace.c | 12 ++++++++++--
arch/x86/kernel/ftrace_64.S | 32 +++++++++++++++-----------------
2 files changed, 25 insertions(+), 19 deletions(-)

--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -282,7 +282,8 @@ static inline void tramp_free(void *tram

/* Defined as markers to the end of the ftrace default trampolines */
extern void ftrace_regs_caller_end(void);
-extern void ftrace_epilogue(void);
+extern void ftrace_regs_caller_ret(void);
+extern void ftrace_caller_end(void);
extern void ftrace_caller_op_ptr(void);
extern void ftrace_regs_caller_op_ptr(void);

@@ -334,7 +335,7 @@ create_trampoline(struct ftrace_ops *ops
call_offset = (unsigned long)ftrace_regs_call;
} else {
start_offset = (unsigned long)ftrace_caller;
- end_offset = (unsigned long)ftrace_epilogue;
+ end_offset = (unsigned long)ftrace_caller_end;
op_offset = (unsigned long)ftrace_caller_op_ptr;
call_offset = (unsigned long)ftrace_call;
}
@@ -366,6 +367,13 @@ create_trampoline(struct ftrace_ops *ops
if (WARN_ON(ret < 0))
goto fail;

+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+ ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
+ ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
+ if (WARN_ON(ret < 0))
+ goto fail;
+ }
+
/*
* The address of the ftrace_ops that is used for this trampoline
* is stored at the end of the trampoline. This will be used to
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -157,8 +157,12 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBA
* think twice before adding any new code or changing the
* layout here.
*/
-SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL)
+SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)

+ jmp ftrace_epilogue
+SYM_FUNC_END(ftrace_caller);
+
+SYM_FUNC_START(ftrace_epilogue)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
jmp ftrace_stub
@@ -170,14 +174,12 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L
*/
SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
retq
-SYM_FUNC_END(ftrace_caller)
+SYM_FUNC_END(ftrace_epilogue)

SYM_FUNC_START(ftrace_regs_caller)
/* Save the current flags before any operations that can change them */
pushfq

- UNWIND_HINT_SAVE
-
/* added 8 bytes to save flags */
save_mcount_regs 8
/* save_mcount_regs fills in first two parameters */
@@ -233,7 +235,10 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_
movq ORIG_RAX(%rsp), %rax
movq %rax, MCOUNT_REG_SIZE-8(%rsp)

- /* If ORIG_RAX is anything but zero, make this a call to that */
+ /*
+ * If ORIG_RAX is anything but zero, make this a call to that.
+ * See arch_ftrace_set_direct_caller().
+ */
movq ORIG_RAX(%rsp), %rax
cmpq $0, %rax
je 1f
@@ -244,20 +249,14 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_
movq %rax, MCOUNT_REG_SIZE(%rsp)

restore_mcount_regs 8
+ /* Restore flags */
+ popfq

- jmp 2f
+SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL);
+ UNWIND_HINT_RET_OFFSET
+ jmp ftrace_epilogue

1: restore_mcount_regs
-
-
-2:
- /*
- * The stack layout is nondetermistic here, depending on which path was
- * taken. This confuses objtool and ORC, rightfully so. For now,
- * pretend the stack always looks like the non-direct case.
- */
- UNWIND_HINT_RESTORE
-
/* Restore flags */
popfq

@@ -268,7 +267,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_
* to the return.
*/
SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
-
jmp ftrace_epilogue

SYM_FUNC_END(ftrace_regs_caller)



2020-04-17 19:23:27

by Alexandre Chartre

[permalink] [raw]
Subject: Re: [PATCH v5 04/17] x86,ftrace: Fix ftrace_regs_caller() unwind


On 4/16/20 1:47 PM, Peter Zijlstra wrote:
> The ftrace_regs_caller() trampoline does something 'funny' when there
> is a direct-caller present. In that case it stuffs the 'direct-caller'
> address on the return stack and then exits the function. This then
> results in 'returning' to the direct-caller with the exact registers
> we came in with -- an indirect tail-call without using a register.
>
> This however (rightfully) confuses objtool because the function shares
> a few instruction in order to have a single exit path, but the stack
> layout is different for them, depending through which path we came
> there.
>
> This is currently cludged by forcing the stack state to the non-direct
> case, but this generates actively wrong (ORC) unwind information for
> the direct case, leading to potential broken unwinds.
>
> Fix this issue by fully separating the exit paths. This results in
> having to poke a second RET into the trampoline copy, see
> ftrace_regs_caller_ret.
>
> This brings us to a second objtool problem, in order for it to
> perceive the 'jmp ftrace_epilogue' as a function exit, it needs to be
> recognised as a tail call. In order to make that happen,
> ftrace_epilogue needs to be the start of an STT_FUNC, so re-arrange
> code to make this so.
>
> Finally, a third issue is that objtool requires functions to exit with
> the same stack layout they started with, which is obviously violated
> in the direct case, employ the new HINT_RET_OFFSET to tell objtool
> this is an expected exception.
>
> Together, this results in generating correct ORC unwind information
> for the ftrace_regs_caller() function and it's trampoline copies.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> arch/x86/kernel/ftrace.c | 12 ++++++++++--
> arch/x86/kernel/ftrace_64.S | 32 +++++++++++++++-----------------
> 2 files changed, 25 insertions(+), 19 deletions(-)
>
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -282,7 +282,8 @@ static inline void tramp_free(void *tram
>
> /* Defined as markers to the end of the ftrace default trampolines */
> extern void ftrace_regs_caller_end(void);
> -extern void ftrace_epilogue(void);
> +extern void ftrace_regs_caller_ret(void);
> +extern void ftrace_caller_end(void);
> extern void ftrace_caller_op_ptr(void);
> extern void ftrace_regs_caller_op_ptr(void);
>
> @@ -334,7 +335,7 @@ create_trampoline(struct ftrace_ops *ops
> call_offset = (unsigned long)ftrace_regs_call;
> } else {
> start_offset = (unsigned long)ftrace_caller;
> - end_offset = (unsigned long)ftrace_epilogue;
> + end_offset = (unsigned long)ftrace_caller_end;
> op_offset = (unsigned long)ftrace_caller_op_ptr;
> call_offset = (unsigned long)ftrace_call;
> }
> @@ -366,6 +367,13 @@ create_trampoline(struct ftrace_ops *ops
> if (WARN_ON(ret < 0))
> goto fail;
>
> + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
> + ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);

It might be safer to use start_offset instead of ftrace_regs_caller (in
case start_offset is ever changed to something different from ftrace_regs_caller
in the future):

ip = trampoline + (ftrace_regs_caller_ret - start_offset);

alex.

> + ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
> + if (WARN_ON(ret < 0))
> + goto fail;
> + }
> +
> /*
> * The address of the ftrace_ops that is used for this trampoline
> * is stored at the end of the trampoline. This will be used to
> --- a/arch/x86/kernel/ftrace_64.S
> +++ b/arch/x86/kernel/ftrace_64.S
> @@ -157,8 +157,12 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBA
> * think twice before adding any new code or changing the
> * layout here.
> */
> -SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL)
> +SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)
>
> + jmp ftrace_epilogue
> +SYM_FUNC_END(ftrace_caller);
> +
> +SYM_FUNC_START(ftrace_epilogue)
> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
> jmp ftrace_stub
> @@ -170,14 +174,12 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L
> */
> SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
> retq
> -SYM_FUNC_END(ftrace_caller)
> +SYM_FUNC_END(ftrace_epilogue)
>
> SYM_FUNC_START(ftrace_regs_caller)
> /* Save the current flags before any operations that can change them */
> pushfq
>
> - UNWIND_HINT_SAVE
> -
> /* added 8 bytes to save flags */
> save_mcount_regs 8
> /* save_mcount_regs fills in first two parameters */
> @@ -233,7 +235,10 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_
> movq ORIG_RAX(%rsp), %rax
> movq %rax, MCOUNT_REG_SIZE-8(%rsp)
>
> - /* If ORIG_RAX is anything but zero, make this a call to that */
> + /*
> + * If ORIG_RAX is anything but zero, make this a call to that.
> + * See arch_ftrace_set_direct_caller().
> + */
> movq ORIG_RAX(%rsp), %rax
> cmpq $0, %rax
> je 1f
> @@ -244,20 +249,14 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_
> movq %rax, MCOUNT_REG_SIZE(%rsp)
>
> restore_mcount_regs 8
> + /* Restore flags */
> + popfq
>
> - jmp 2f
> +SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL);
> + UNWIND_HINT_RET_OFFSET
> + jmp ftrace_epilogue
>
> 1: restore_mcount_regs
> -
> -
> -2:
> - /*
> - * The stack layout is nondetermistic here, depending on which path was
> - * taken. This confuses objtool and ORC, rightfully so. For now,
> - * pretend the stack always looks like the non-direct case.
> - */
> - UNWIND_HINT_RESTORE
> -
> /* Restore flags */
> popfq
>
> @@ -268,7 +267,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_
> * to the return.
> */
> SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
> -
> jmp ftrace_epilogue
>
> SYM_FUNC_END(ftrace_regs_caller)
>
>

2020-04-22 00:36:22

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v5 04/17] x86,ftrace: Fix ftrace_regs_caller() unwind

Hi Peter,

After looking at the code, I realized that the only possible way to do
the "direct call" part, is if the direct function helper is executed
there. All other ftrace_ops, will not call that path.

I added a trampoline that points to ftrace_regs_caller to the direct
ftrace_ops, to force it never to allocate its own trampoline, and thus
no created trampoline should ever do the jump for a direct caller.

By doing this, I was able to move the code around to be a bit simpler,
and not need the double modifications (the double ret).

Of course, if any created trampoline were to touch the ORIG_RAX, then
it would crash. We could always nop that compare in the created
trampoline if that is a concern.

Anyway, I added the below patch on top of your patches and it appears
to work. Thoughts?

-- Steve

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 867c126ddabe..2e11250d5647 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -367,13 +367,6 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
if (WARN_ON(ret < 0))
goto fail;

- if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
- ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
- ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
- if (WARN_ON(ret < 0))
- goto fail;
- }
-
/*
* The address of the ftrace_ops that is used for this trampoline
* is stored at the end of the trampoline. This will be used to
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 9738ed23964e..1f2afaa8f71f 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -241,22 +241,9 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
*/
movq ORIG_RAX(%rsp), %rax
testq %rax, %rax
- jz 1f
+ jnz 1f

- /* Swap the flags with orig_rax */
- movq MCOUNT_REG_SIZE(%rsp), %rdi
- movq %rdi, MCOUNT_REG_SIZE-8(%rsp)
- movq %rax, MCOUNT_REG_SIZE(%rsp)
-
- restore_mcount_regs 8
- /* Restore flags */
- popfq
-
-SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL);
- UNWIND_HINT_RET_OFFSET
- jmp ftrace_epilogue
-
-1: restore_mcount_regs
+ restore_mcount_regs
/* Restore flags */
popfq

@@ -267,8 +254,19 @@ SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL);
* to the return.
*/
SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
+ UNWIND_HINT_RET_OFFSET
jmp ftrace_epilogue

+ /* Swap the flags with orig_rax */
+1: movq MCOUNT_REG_SIZE(%rsp), %rdi
+ movq %rdi, MCOUNT_REG_SIZE-8(%rsp)
+ movq %rax, MCOUNT_REG_SIZE(%rsp)
+
+ restore_mcount_regs 8
+ /* Restore flags */
+ popfq
+ jmp ftrace_epilogue
+
SYM_FUNC_END(ftrace_regs_caller)


diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 041694a1eb74..8f540eef7511 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2399,6 +2399,13 @@ struct ftrace_ops direct_ops = {
.flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_RECURSION_SAFE
| FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
| FTRACE_OPS_FL_PERMANENT,
+ /*
+ * By declaring the main trampoline as this trampoline
+ * it will never have one allocated for it. This is fine
+ * as this should only be used if we are in the
+ * ftrace_ops_list function.
+ */
+ .trampoline = FTRACE_REGS_ADDR,
};
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */

2020-04-22 09:48:49

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v5 04/17] x86,ftrace: Fix ftrace_regs_caller() unwind

On Tue, Apr 21, 2020 at 08:33:45PM -0400, Steven Rostedt wrote:
> Hi Peter,
>
> After looking at the code, I realized that the only possible way to do
> the "direct call" part, is if the direct function helper is executed
> there. All other ftrace_ops, will not call that path.
>
> I added a trampoline that points to ftrace_regs_caller to the direct
> ftrace_ops, to force it never to allocate its own trampoline, and thus
> no created trampoline should ever do the jump for a direct caller.
>
> By doing this, I was able to move the code around to be a bit simpler,
> and not need the double modifications (the double ret).
>
> Of course, if any created trampoline were to touch the ORIG_RAX, then
> it would crash. We could always nop that compare in the created
> trampoline if that is a concern.
>
> Anyway, I added the below patch on top of your patches and it appears
> to work. Thoughts?

So can I push out those patches as is? :-) We can do this on top I
suppose.

Firstly; your patch isn't objtool clean:

arch/x86/kernel/ftrace_64.o: warning: objtool: ftrace_regs_caller()+0x199: sibling call from callable instruction with modified stack frame

So 10 points deduction for that :-). You got the RET_OFFSET hint on the
wrong sibling call.

Secondly, yes, that ORIG_RAX issue for copies, that _might_ crash and
burn, but who knows, you're jumping into uninitialized memory afaict. So
that definitely needs fixing. I'm also not sure of stubbing out the
test, that's actually more work than poking in the 1 extra ret and also
gives unexpected behaviour. If anything we should poke an UD2 at the 1:
location in the copy.

Over all, I'm thinking we should keep it as is. If you want to simplify
the poking we can do something like the below; reading a known retq is
daft.

---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 867c126ddabe..b334adbacb0e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -307,8 +307,6 @@ union ftrace_op_code_union {
} __attribute__((packed));
};

-#define RET_SIZE 1
-
static unsigned long
create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
{
@@ -319,7 +317,6 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
unsigned long offset;
unsigned long npages;
unsigned long size;
- unsigned long retq;
unsigned long *ptr;
void *trampoline;
void *ip;
@@ -347,11 +344,11 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
* the iret , as well as the address of the ftrace_ops this
* trampoline is used for.
*/
- trampoline = alloc_tramp(size + RET_SIZE + sizeof(void *));
+ trampoline = alloc_tramp(size + RET_INSN_SIZE + sizeof(void *));
if (!trampoline)
return 0;

- *tramp_size = size + RET_SIZE + sizeof(void *);
+ *tramp_size = size + RET_INSN_SIZE + sizeof(void *);
npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);

/* Copy ftrace_caller onto the trampoline memory */
@@ -359,19 +356,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
if (WARN_ON(ret < 0))
goto fail;

- ip = trampoline + size;
-
/* The trampoline ends with ret(q) */
- retq = (unsigned long)ftrace_stub;
- ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
- if (WARN_ON(ret < 0))
- goto fail;
+ ip = trampoline + size;
+ *(u8 *)ip = RET_INSN_OPCODE;

if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
- ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
- if (WARN_ON(ret < 0))
- goto fail;
+ *(u8 *)ip = RET_INSN_OPCODE;
}

/*
@@ -382,7 +373,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
* the global function_trace_op variable.
*/

- ptr = (unsigned long *)(trampoline + size + RET_SIZE);
+ ptr = (unsigned long *)(trampoline + size + RET_INSN_SIZE);
*ptr = (unsigned long)ops;

op_offset -= start_offset;

2020-04-22 13:36:29

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v5 04/17] x86,ftrace: Fix ftrace_regs_caller() unwind

On Wed, 22 Apr 2020 11:44:46 +0200
Peter Zijlstra <[email protected]> wrote:

> > Anyway, I added the below patch on top of your patches and it appears
> > to work. Thoughts?
>
> So can I push out those patches as is? :-) We can do this on top I
> suppose.

My tests are still running on your series. I want to make sure that nothing
breaks between them. If the tests succeed, then sure, I'm OK building on
top of this series.


>
> Firstly; your patch isn't objtool clean:
>
> arch/x86/kernel/ftrace_64.o: warning: objtool: ftrace_regs_caller()+0x199: sibling call from callable instruction with modified stack frame
>
> So 10 points deduction for that :-). You got the RET_OFFSET hint on the
> wrong sibling call.

Ah, I just did his patch quickly and made sure that it booted and ran
function tracing and direct calls. I didn't bother caring much about the
objtool annotations.

>
> Secondly, yes, that ORIG_RAX issue for copies, that _might_ crash and
> burn, but who knows, you're jumping into uninitialized memory afaict. So
> that definitely needs fixing. I'm also not sure of stubbing out the
> test, that's actually more work than poking in the 1 extra ret and also
> gives unexpected behaviour. If anything we should poke an UD2 at the 1:
> location in the copy.

Well, that would crash the system just as well.

We could also just make the jnz into a nop (which would make the trampoline
slightly faster as well).

>
> Over all, I'm thinking we should keep it as is. If you want to simplify
> the poking we can do something like the below; reading a known retq is
> daft.

I'm actually more concerned about the performance of the created
trampoline, as that is a fast path. I rather get rid of that jump.

I'll play a little more with it while the tests continue.

-- Steve



>
> ---
> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 867c126ddabe..b334adbacb0e 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -307,8 +307,6 @@ union ftrace_op_code_union {
> } __attribute__((packed));
> };
>
> -#define RET_SIZE 1
> -
> static unsigned long
> create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
> {
> @@ -319,7 +317,6 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
> unsigned long offset;
> unsigned long npages;
> unsigned long size;
> - unsigned long retq;
> unsigned long *ptr;
> void *trampoline;
> void *ip;
> @@ -347,11 +344,11 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
> * the iret , as well as the address of the ftrace_ops this
> * trampoline is used for.
> */
> - trampoline = alloc_tramp(size + RET_SIZE + sizeof(void *));
> + trampoline = alloc_tramp(size + RET_INSN_SIZE + sizeof(void *));
> if (!trampoline)
> return 0;
>
> - *tramp_size = size + RET_SIZE + sizeof(void *);
> + *tramp_size = size + RET_INSN_SIZE + sizeof(void *);
> npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
>
> /* Copy ftrace_caller onto the trampoline memory */
> @@ -359,19 +356,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
> if (WARN_ON(ret < 0))
> goto fail;
>
> - ip = trampoline + size;
> -
> /* The trampoline ends with ret(q) */
> - retq = (unsigned long)ftrace_stub;
> - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
> - if (WARN_ON(ret < 0))
> - goto fail;
> + ip = trampoline + size;
> + *(u8 *)ip = RET_INSN_OPCODE;
>
> if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
> ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
> - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
> - if (WARN_ON(ret < 0))
> - goto fail;
> + *(u8 *)ip = RET_INSN_OPCODE;
> }
>
> /*
> @@ -382,7 +373,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
> * the global function_trace_op variable.
> */
>
> - ptr = (unsigned long *)(trampoline + size + RET_SIZE);
> + ptr = (unsigned long *)(trampoline + size + RET_INSN_SIZE);
> *ptr = (unsigned long)ops;
>
> op_offset -= start_offset;

2020-04-22 20:23:32

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v5 04/17] x86,ftrace: Fix ftrace_regs_caller() unwind

On Wed, 22 Apr 2020 11:44:46 +0200
Peter Zijlstra <[email protected]> wrote:

> So can I push out those patches as is? :-) We can do this on top I
> suppose.

My tests just finished. You can add my:

Reviewed-by: Steven Rostedt (VMware) <[email protected]>

to the three patches that touch the ftrace code in this series.

I'll continue to tinker with my patches that modify that code as well.

-- Steve

Subject: [tip: objtool/core] x86,ftrace: Fix ftrace_regs_caller() unwind

The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 5d6cd2a33032b6283b43d9787807f21c1298fd6d
Gitweb: https://git.kernel.org/tip/5d6cd2a33032b6283b43d9787807f21c1298fd6d
Author: Peter Zijlstra <[email protected]>
AuthorDate: Wed, 01 Apr 2020 16:53:19 +02:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Wed, 22 Apr 2020 23:10:05 +02:00

x86,ftrace: Fix ftrace_regs_caller() unwind

The ftrace_regs_caller() trampoline does something 'funny' when there
is a direct-caller present. In that case it stuffs the 'direct-caller'
address on the return stack and then exits the function. This then
results in 'returning' to the direct-caller with the exact registers
we came in with -- an indirect tail-call without using a register.

This however (rightfully) confuses objtool because the function shares
a few instruction in order to have a single exit path, but the stack
layout is different for them, depending through which path we came
there.

This is currently cludged by forcing the stack state to the non-direct
case, but this generates actively wrong (ORC) unwind information for
the direct case, leading to potential broken unwinds.

Fix this issue by fully separating the exit paths. This results in
having to poke a second RET into the trampoline copy, see
ftrace_regs_caller_ret.

This brings us to a second objtool problem, in order for it to
perceive the 'jmp ftrace_epilogue' as a function exit, it needs to be
recognised as a tail call. In order to make that happen,
ftrace_epilogue needs to be the start of an STT_FUNC, so re-arrange
code to make this so.

Finally, a third issue is that objtool requires functions to exit with
the same stack layout they started with, which is obviously violated
in the direct case, employ the new HINT_RET_OFFSET to tell objtool
this is an expected exception.

Together, this results in generating correct ORC unwind information
for the ftrace_regs_caller() function and it's trampoline copies.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Miroslav Benes <[email protected]>
Reviewed-by: Alexandre Chartre <[email protected]>
Reviewed-by: Steven Rostedt (VMware) <[email protected]>
Acked-by: Josh Poimboeuf <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/kernel/ftrace.c | 12 ++++++++++--
arch/x86/kernel/ftrace_64.S | 32 +++++++++++++++-----------------
2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 37a0aea..867c126 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -282,7 +282,8 @@ static inline void tramp_free(void *tramp) { }

/* Defined as markers to the end of the ftrace default trampolines */
extern void ftrace_regs_caller_end(void);
-extern void ftrace_epilogue(void);
+extern void ftrace_regs_caller_ret(void);
+extern void ftrace_caller_end(void);
extern void ftrace_caller_op_ptr(void);
extern void ftrace_regs_caller_op_ptr(void);

@@ -334,7 +335,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
call_offset = (unsigned long)ftrace_regs_call;
} else {
start_offset = (unsigned long)ftrace_caller;
- end_offset = (unsigned long)ftrace_epilogue;
+ end_offset = (unsigned long)ftrace_caller_end;
op_offset = (unsigned long)ftrace_caller_op_ptr;
call_offset = (unsigned long)ftrace_call;
}
@@ -366,6 +367,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
if (WARN_ON(ret < 0))
goto fail;

+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+ ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
+ ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
+ if (WARN_ON(ret < 0))
+ goto fail;
+ }
+
/*
* The address of the ftrace_ops that is used for this trampoline
* is stored at the end of the trampoline. This will be used to
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 369e61f..7657dc7 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -157,8 +157,12 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
* think twice before adding any new code or changing the
* layout here.
*/
-SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL)
+SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)

+ jmp ftrace_epilogue
+SYM_FUNC_END(ftrace_caller);
+
+SYM_FUNC_START(ftrace_epilogue)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
jmp ftrace_stub
@@ -170,14 +174,12 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
*/
SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
retq
-SYM_FUNC_END(ftrace_caller)
+SYM_FUNC_END(ftrace_epilogue)

SYM_FUNC_START(ftrace_regs_caller)
/* Save the current flags before any operations that can change them */
pushfq

- UNWIND_HINT_SAVE
-
/* added 8 bytes to save flags */
save_mcount_regs 8
/* save_mcount_regs fills in first two parameters */
@@ -233,7 +235,10 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
movq ORIG_RAX(%rsp), %rax
movq %rax, MCOUNT_REG_SIZE-8(%rsp)

- /* If ORIG_RAX is anything but zero, make this a call to that */
+ /*
+ * If ORIG_RAX is anything but zero, make this a call to that.
+ * See arch_ftrace_set_direct_caller().
+ */
movq ORIG_RAX(%rsp), %rax
cmpq $0, %rax
je 1f
@@ -244,20 +249,14 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
movq %rax, MCOUNT_REG_SIZE(%rsp)

restore_mcount_regs 8
+ /* Restore flags */
+ popfq

- jmp 2f
+SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL);
+ UNWIND_HINT_RET_OFFSET
+ jmp ftrace_epilogue

1: restore_mcount_regs
-
-
-2:
- /*
- * The stack layout is nondetermistic here, depending on which path was
- * taken. This confuses objtool and ORC, rightfully so. For now,
- * pretend the stack always looks like the non-direct case.
- */
- UNWIND_HINT_RESTORE
-
/* Restore flags */
popfq

@@ -268,7 +267,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
* to the return.
*/
SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
-
jmp ftrace_epilogue

SYM_FUNC_END(ftrace_regs_caller)

Subject: [tip: objtool/core] x86,ftrace: Fix ftrace_regs_caller() unwind

The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 0298739b7983cf9bf4fcfb4bfb815c539bdb87ca
Gitweb: https://git.kernel.org/tip/0298739b7983cf9bf4fcfb4bfb815c539bdb87ca
Author: Peter Zijlstra <[email protected]>
AuthorDate: Wed, 01 Apr 2020 16:53:19 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Wed, 22 Apr 2020 10:53:50 +02:00

x86,ftrace: Fix ftrace_regs_caller() unwind

The ftrace_regs_caller() trampoline does something 'funny' when there
is a direct-caller present. In that case it stuffs the 'direct-caller'
address on the return stack and then exits the function. This then
results in 'returning' to the direct-caller with the exact registers
we came in with -- an indirect tail-call without using a register.

This however (rightfully) confuses objtool because the function shares
a few instruction in order to have a single exit path, but the stack
layout is different for them, depending through which path we came
there.

This is currently cludged by forcing the stack state to the non-direct
case, but this generates actively wrong (ORC) unwind information for
the direct case, leading to potential broken unwinds.

Fix this issue by fully separating the exit paths. This results in
having to poke a second RET into the trampoline copy, see
ftrace_regs_caller_ret.

This brings us to a second objtool problem, in order for it to
perceive the 'jmp ftrace_epilogue' as a function exit, it needs to be
recognised as a tail call. In order to make that happen,
ftrace_epilogue needs to be the start of an STT_FUNC, so re-arrange
code to make this so.

Finally, a third issue is that objtool requires functions to exit with
the same stack layout they started with, which is obviously violated
in the direct case, employ the new HINT_RET_OFFSET to tell objtool
this is an expected exception.

Together, this results in generating correct ORC unwind information
for the ftrace_regs_caller() function and it's trampoline copies.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Miroslav Benes <[email protected]>
Reviewed-by: Alexandre Chartre <[email protected]>
Acked-by: Josh Poimboeuf <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/ftrace.c | 12 ++++++++++--
arch/x86/kernel/ftrace_64.S | 32 +++++++++++++++-----------------
2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 37a0aea..867c126 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -282,7 +282,8 @@ static inline void tramp_free(void *tramp) { }

/* Defined as markers to the end of the ftrace default trampolines */
extern void ftrace_regs_caller_end(void);
-extern void ftrace_epilogue(void);
+extern void ftrace_regs_caller_ret(void);
+extern void ftrace_caller_end(void);
extern void ftrace_caller_op_ptr(void);
extern void ftrace_regs_caller_op_ptr(void);

@@ -334,7 +335,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
call_offset = (unsigned long)ftrace_regs_call;
} else {
start_offset = (unsigned long)ftrace_caller;
- end_offset = (unsigned long)ftrace_epilogue;
+ end_offset = (unsigned long)ftrace_caller_end;
op_offset = (unsigned long)ftrace_caller_op_ptr;
call_offset = (unsigned long)ftrace_call;
}
@@ -366,6 +367,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
if (WARN_ON(ret < 0))
goto fail;

+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+ ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
+ ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
+ if (WARN_ON(ret < 0))
+ goto fail;
+ }
+
/*
* The address of the ftrace_ops that is used for this trampoline
* is stored at the end of the trampoline. This will be used to
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 369e61f..7657dc7 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -157,8 +157,12 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
* think twice before adding any new code or changing the
* layout here.
*/
-SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL)
+SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)

+ jmp ftrace_epilogue
+SYM_FUNC_END(ftrace_caller);
+
+SYM_FUNC_START(ftrace_epilogue)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
jmp ftrace_stub
@@ -170,14 +174,12 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
*/
SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
retq
-SYM_FUNC_END(ftrace_caller)
+SYM_FUNC_END(ftrace_epilogue)

SYM_FUNC_START(ftrace_regs_caller)
/* Save the current flags before any operations that can change them */
pushfq

- UNWIND_HINT_SAVE
-
/* added 8 bytes to save flags */
save_mcount_regs 8
/* save_mcount_regs fills in first two parameters */
@@ -233,7 +235,10 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
movq ORIG_RAX(%rsp), %rax
movq %rax, MCOUNT_REG_SIZE-8(%rsp)

- /* If ORIG_RAX is anything but zero, make this a call to that */
+ /*
+ * If ORIG_RAX is anything but zero, make this a call to that.
+ * See arch_ftrace_set_direct_caller().
+ */
movq ORIG_RAX(%rsp), %rax
cmpq $0, %rax
je 1f
@@ -244,20 +249,14 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
movq %rax, MCOUNT_REG_SIZE(%rsp)

restore_mcount_regs 8
+ /* Restore flags */
+ popfq

- jmp 2f
+SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL);
+ UNWIND_HINT_RET_OFFSET
+ jmp ftrace_epilogue

1: restore_mcount_regs
-
-
-2:
- /*
- * The stack layout is nondetermistic here, depending on which path was
- * taken. This confuses objtool and ORC, rightfully so. For now,
- * pretend the stack always looks like the non-direct case.
- */
- UNWIND_HINT_RESTORE
-
/* Restore flags */
popfq

@@ -268,7 +267,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
* to the return.
*/
SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
-
jmp ftrace_epilogue

SYM_FUNC_END(ftrace_regs_caller)