2023-08-09 08:27:34

by Peter Zijlstra

[permalink] [raw]
Subject: [RFC][PATCH 12/17] x86/cpu: Rename original retbleed return thunk

Rename the original retbleed return thunk from __x86_return_thunk to
zen_return_thunk, matching zen_untrain_ret.

Pull the dummy __x86_return_thunk from the !CPU_UNRET_ENTRY case and
explicitly set zen_return_thunk in the retbleed=unret case.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/include/asm/nospec-branch.h | 2 ++
arch/x86/kernel/cpu/bugs.c | 1 +
arch/x86/kernel/vmlinux.lds.S | 2 +-
arch/x86/lib/retpoline.S | 25 +++++++++++--------------
tools/objtool/check.c | 9 +++++++--
5 files changed, 22 insertions(+), 17 deletions(-)

--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -339,6 +339,8 @@ extern retpoline_thunk_t __x86_indirect_
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];

extern void __x86_return_thunk(void);
+
+extern void zen_return_thunk(void);
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);

--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -953,6 +953,7 @@ static void __init retbleed_select_mitig

case RETBLEED_MITIGATION_UNRET:
setup_force_cpu_cap(X86_FEATURE_UNRET);
+ x86_return_thunk = zen_return_thunk;
do_rethunk:
setup_force_cpu_cap(X86_FEATURE_RETHUNK);

--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -523,7 +523,7 @@ INIT_PER_CPU(irq_stack_backing_store);
#endif

#ifdef CONFIG_CPU_UNRET_ENTRY
-. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned");
+. = ASSERT((zen_return_thunk & 0x3f) == 0, "zen_return_thunk not cacheline-aligned");
. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
/*
* GNU ld cannot do XOR so do: (A | B) - (A & B) in order to compute the XOR
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -161,7 +161,7 @@ __EXPORT_THUNK(srso_untrain_ret_alias)

.section .text.__x86.rethunk_safe

-/* Needs a definition for the __x86_return_thunk alternative below. */
+/* Needs a definition for the zen_return_thunk alternative below. */
SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
add $8, %_ASM_SP
UNWIND_HINT_FUNC
@@ -174,7 +174,7 @@ SYM_FUNC_END(srso_safe_ret_alias)

/*
* Safety details here pertain to the AMD Zen{1,2} microarchitecture:
- * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * 1) The RET at zen_return_thunk must be on a 64 byte boundary, for
* alignment within the BTB.
* 2) The instruction at zen_untrain_ret must contain, and not
* end with, the 0xc3 byte of the RET.
@@ -182,7 +182,7 @@ SYM_FUNC_END(srso_safe_ret_alias)
* from re-poisioning the BTB prediction.
*/
.align 64
- .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc
+ .skip 64 - (zen_return_thunk - zen_untrain_ret), 0xcc
SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
ANNOTATE_NOENDBR
/*
@@ -190,16 +190,16 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL,
*
* TEST $0xcc, %bl
* LFENCE
- * JMP __x86_return_thunk
+ * JMP zen_return_thunk
*
* Executing the TEST instruction has a side effect of evicting any BTB
* prediction (potentially attacker controlled) attached to the RET, as
- * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ * zen_return_thunk + 1 isn't an instruction boundary at the moment.
*/
.byte 0xf6

/*
- * As executed from __x86_return_thunk, this is a plain RET.
+ * As executed from zen_return_thunk, this is a plain RET.
*
* As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
*
@@ -211,13 +211,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL,
* With SMT enabled and STIBP active, a sibling thread cannot poison
* RET's prediction to a type of its choice, but can evict the
* prediction due to competitive sharing. If the prediction is
- * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * evicted, zen_return_thunk will suffer Straight Line Speculation
* which will be contained safely by the INT3.
*/
-SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+SYM_INNER_LABEL(zen_return_thunk, SYM_L_GLOBAL)
ret
int3
-SYM_CODE_END(__x86_return_thunk)
+SYM_CODE_END(zen_return_thunk)

/*
* Ensure the TEST decoding / BTB invalidation is complete.
@@ -228,7 +228,7 @@ SYM_CODE_END(__x86_return_thunk)
* Jump back and execute the RET in the middle of the TEST instruction.
* INT3 is for SLS protection.
*/
- jmp __x86_return_thunk
+ jmp zen_return_thunk
int3
SYM_FUNC_END(zen_untrain_ret)
__EXPORT_THUNK(zen_untrain_ret)
@@ -288,7 +288,7 @@ SYM_CODE_START(srso_alias_return_thunk)
ud2
SYM_CODE_END(srso_alias_return_thunk)

-#else /* CONFIG_CPU_UNRET_ENTRY */
+#endif /* CONFIG_CPU_UNRET_ENTRY */

.section .text.__x86.return_thunk

@@ -299,9 +299,6 @@ SYM_CODE_START(__x86_return_thunk)
ret
int3
SYM_CODE_END(__x86_return_thunk)
-
-#endif /* CONFIG_CPU_UNRET_ENTRY */
-
__EXPORT_THUNK(__x86_return_thunk)

#endif /* CONFIG_RETHUNK */
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -455,7 +455,12 @@ static int decode_instructions(struct ob
return -1;
}

- if (func->return_thunk || !strcmp(func->name, "srso_safe_ret") || func->alias != func)
+ /*
+ * Both zen_return_thunk() and srso_safe_ret() are embedded inside
+ * another instruction and objtool doesn't grok that. Skip validating them.
+ */
+ if (!strcmp(func->name, "zen_return_thunk") ||
+ !strcmp(func->name, "srso_safe_ret") || func->alias != func)
continue;

if (!find_insn(file, sec, func->offset)) {
@@ -1583,7 +1588,7 @@ static int add_jump_destinations(struct
* middle of another instruction. Objtool only
* knows about the outer instruction.
*/
- if (sym && sym->return_thunk) {
+ if (sym && !strcmp(sym->name, "zen_return_thunk")) {
add_return_call(file, insn, false);
continue;
}




2023-08-09 14:32:27

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [RFC][PATCH 12/17] x86/cpu: Rename original retbleed return thunk

On Wed, Aug 09, 2023 at 09:12:30AM +0200, Peter Zijlstra wrote:
> +++ b/tools/objtool/check.c
> @@ -455,7 +455,12 @@ static int decode_instructions(struct ob
> return -1;
> }
>
> - if (func->return_thunk || !strcmp(func->name, "srso_safe_ret") || func->alias != func)
> + /*
> + * Both zen_return_thunk() and srso_safe_ret() are embedded inside
> + * another instruction and objtool doesn't grok that. Skip validating them.
> + */
> + if (!strcmp(func->name, "zen_return_thunk") ||
> + !strcmp(func->name, "srso_safe_ret") || func->alias != func)

Hm, speaking of renaming they should probably be called
retbleed_return_thunk() and srso_return_thunk().

--
Josh

2023-08-09 16:58:54

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 12/17] x86/cpu: Rename original retbleed return thunk

On Wed, Aug 09, 2023 at 10:20:31AM -0400, Josh Poimboeuf wrote:
> On Wed, Aug 09, 2023 at 09:12:30AM +0200, Peter Zijlstra wrote:
> > +++ b/tools/objtool/check.c
> > @@ -455,7 +455,12 @@ static int decode_instructions(struct ob
> > return -1;
> > }
> >
> > - if (func->return_thunk || !strcmp(func->name, "srso_safe_ret") || func->alias != func)
> > + /*
> > + * Both zen_return_thunk() and srso_safe_ret() are embedded inside
> > + * another instruction and objtool doesn't grok that. Skip validating them.
> > + */
> > + if (!strcmp(func->name, "zen_return_thunk") ||
> > + !strcmp(func->name, "srso_safe_ret") || func->alias != func)
>
> Hm, speaking of renaming they should probably be called
> retbleed_return_thunk() and srso_return_thunk().

Yes, clearly naming is better in daylight. Let me regex that.

2023-08-10 13:46:51

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 12/17] x86/cpu: Rename original retbleed return thunk

On Thu, Aug 10, 2023 at 12:06:17PM +0100, [email protected] wrote:
> On 09/08/2023 3:22 pm, Peter Zijlstra wrote:
> > On Wed, Aug 09, 2023 at 10:20:31AM -0400, Josh Poimboeuf wrote:
> >> On Wed, Aug 09, 2023 at 09:12:30AM +0200, Peter Zijlstra wrote:
> >>> +++ b/tools/objtool/check.c
> >>> @@ -455,7 +455,12 @@ static int decode_instructions(struct ob
> >>> return -1;
> >>> }
> >>>
> >>> - if (func->return_thunk || !strcmp(func->name, "srso_safe_ret") || func->alias != func)
> >>> + /*
> >>> + * Both zen_return_thunk() and srso_safe_ret() are embedded inside
> >>> + * another instruction and objtool doesn't grok that. Skip validating them.
> >>> + */
> >>> + if (!strcmp(func->name, "zen_return_thunk") ||
> >>> + !strcmp(func->name, "srso_safe_ret") || func->alias != func)
> >> Hm, speaking of renaming they should probably be called
> >> retbleed_return_thunk() and srso_return_thunk().
> > Yes, clearly naming is better in daylight. Let me regex that.
>
> btc_*, not retbleed_*.
>
> That way it matches the terminology you'll find in the AMD whitepaper
> about what's going on, and there's already an entirely different issue
> called Retbleed.

So BTC as a whole is the fact that AMD predicts the type of an
instruction and then picks a predictor to predict the target of that
instruction, no?

The retbleed issue is that BTC is used to trick the thing into thinking
the RET is not a return but an indirect branch (type confusion) and then
pick the BTB predictor for a target, which got trained to point at
targer of choice.

The SRSO thing employs the same type confusion to train the RSB/RAP and
have RET, now correctly predicted to be a RET and using said RSB/RAP, to
jump to target of choice.

So in that regards, I don't think BTC is a good name for the thing.

2023-08-14 11:35:29

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 12/17] x86/cpu: Rename original retbleed return thunk

On Sun, Aug 13, 2023 at 04:23:27PM +0100, [email protected] wrote:
> On 10/08/2023 2:02 pm, Peter Zijlstra wrote:

> > So BTC as a whole is the fact that AMD predicts the type of an
> > instruction and then picks a predictor to predict the target of that
> > instruction, no?
>
> No.
>
> "Branch Type Confusion" is the technical name AMD gave last year's
> issue.? Hence the name of the whitepaper about it,
> https://www.amd.com/system/files/documents/technical-guidance-for-mitigating-branch-type-confusion.pdf

Bah, then what do we call the actual underlying issue that the AMD
branch predictor starts by predicting the next instruction type --
before it has been decoded -- meaning it can predict it wrong, which
then leads to a tons of other issues, including but not limited to:

SLS through JMP (or pretty much anything else)
RET from BTB

?

Calling *THAT* branch-type-confusion makes a heap more sense to me.


2023-08-14 14:03:25

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 12/17] x86/cpu: Rename original retbleed return thunk

On Mon, Aug 14, 2023 at 12:31:04PM +0100, [email protected] wrote:
> On 14/08/2023 11:34 am, Peter Zijlstra wrote:
> > On Sun, Aug 13, 2023 at 04:23:27PM +0100, [email protected] wrote:
> >> On 10/08/2023 2:02 pm, Peter Zijlstra wrote:
> >>> So BTC as a whole is the fact that AMD predicts the type of an
> >>> instruction and then picks a predictor to predict the target of that
> >>> instruction, no?
> >> No.
> >>
> >> "Branch Type Confusion" is the technical name AMD gave last year's
> >> issue.? Hence the name of the whitepaper about it,
> >> https://www.amd.com/system/files/documents/technical-guidance-for-mitigating-branch-type-confusion.pdf
> > Bah, then what do we call the actual underlying issue that the AMD
> > branch predictor starts by predicting the next instruction type --
> > before it has been decoded -- meaning it can predict it wrong, which
> > then leads to a tons of other issues, including but not limited to:
> >
> > SLS through JMP (or pretty much anything else)
> > RET from BTB
> >
> > ?
> >
> > Calling *THAT* branch-type-confusion makes a heap more sense to me.
>
> You know the branch predictor being ahead of decode is a property that
> exists in both vendors CPUs and has done for more than a decade
> already?? Bad branch type predictions are relevant for a number of the
> Intel issues too - they just don't talk as openly about it.
>
> The thing that missing from AMD Zen2-and-older CPUs is the early stall
> in decode when the branch type prediction is discovered to be wrong.?
> Intel have this early feeback cycle, as do AMD Zen3 and later.

This early stall avoids the actual type confusion from escaping
(mostly).

> And yes - this is why SRSO is not an extension of BTC.? The
> micro-architectural details are very different.

Yeah, it is more related to intel-retbleed, both exhaust the return
stack... /me runs :-)