2022-02-24 15:59:01

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

The bits required to make the hardware go.. Of note is that, provided
the syscall entry points are covered with ENDBR, #CP doesn't need to
be an IST because we'll never hit the syscall gap.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/include/asm/cpu.h | 4 +
arch/x86/include/asm/cpufeatures.h | 1
arch/x86/include/asm/idtentry.h | 5 ++
arch/x86/include/asm/msr-index.h | 20 +++++++-
arch/x86/include/asm/traps.h | 2
arch/x86/include/uapi/asm/processor-flags.h | 2
arch/x86/kernel/cpu/common.c | 28 ++++++++++-
arch/x86/kernel/idt.c | 4 +
arch/x86/kernel/machine_kexec_64.c | 2
arch/x86/kernel/traps.c | 70 ++++++++++++++++++++++++++++
10 files changed, 136 insertions(+), 2 deletions(-)

--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -7,6 +7,7 @@
#include <linux/topology.h>
#include <linux/nodemask.h>
#include <linux/percpu.h>
+#include <asm/ibt.h>

#ifdef CONFIG_SMP

@@ -72,4 +73,7 @@ void init_ia32_feat_ctl(struct cpuinfo_x
#else
static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {}
#endif
+
+extern __noendbr void cet_disable(void);
+
#endif /* _ASM_X86_CPU_H */
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -387,6 +387,7 @@
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */
#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -617,6 +617,11 @@ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_dou
DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault);
#endif

+/* #CP */
+#ifdef CONFIG_X86_KERNEL_IBT
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection);
+#endif
+
/* #VC */
#ifdef CONFIG_AMD_MEM_ENCRYPT
DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication);
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -362,11 +362,29 @@
#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d

-
#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1

+/* Control-flow Enforcement Technology MSRs */
+#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */
+#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */
+#define CET_SHSTK_EN BIT_ULL(0)
+#define CET_WRSS_EN BIT_ULL(1)
+#define CET_ENDBR_EN BIT_ULL(2)
+#define CET_LEG_IW_EN BIT_ULL(3)
+#define CET_NO_TRACK_EN BIT_ULL(4)
+#define CET_SUPPRESS_DISABLE BIT_ULL(5)
+#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9))
+#define CET_SUPPRESS BIT_ULL(10)
+#define CET_WAIT_ENDBR BIT_ULL(11)
+
+#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */
+#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */
+#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */
+#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */
+#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */
+
/* Hardware P state interface */
#define MSR_PPERF 0x0000064e
#define MSR_PERF_LIMIT_REASONS 0x0000064f
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -18,6 +18,8 @@ void __init trap_init(void);
asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
#endif

+extern bool ibt_selftest(void);
+
#ifdef CONFIG_X86_F00F_BUG
/* For handling the FOOF bug */
void handle_invalid_op(struct pt_regs *regs);
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -130,6 +130,8 @@
#define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT)
#define X86_CR4_PKE_BIT 22 /* enable Protection Keys support */
#define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT)
+#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */
+#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT)

/*
* x86-64 Task Priority Register, CR8
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,6 +59,7 @@
#include <asm/cpu_device_id.h>
#include <asm/uv/uv.h>
#include <asm/sigframe.h>
+#include <asm/traps.h>

#include "cpu.h"

@@ -438,7 +439,8 @@ static __always_inline void setup_umip(s

/* These bits should not change their value after CPU init is finished. */
static const unsigned long cr4_pinned_mask =
- X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
+ X86_CR4_FSGSBASE | X86_CR4_CET;
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;

@@ -592,6 +594,29 @@ static __init int setup_disable_pku(char
__setup("nopku", setup_disable_pku);
#endif /* CONFIG_X86_64 */

+static __always_inline void setup_cet(struct cpuinfo_x86 *c)
+{
+ u64 msr = CET_ENDBR_EN;
+
+ if (!HAS_KERNEL_IBT ||
+ !cpu_feature_enabled(X86_FEATURE_IBT))
+ return;
+
+ wrmsrl(MSR_IA32_S_CET, msr);
+ cr4_set_bits(X86_CR4_CET);
+
+ if (!ibt_selftest()) {
+ pr_err("IBT selftest: Failed!\n");
+ setup_clear_cpu_cap(X86_FEATURE_IBT);
+ }
+}
+
+__noendbr void cet_disable(void)
+{
+ if (cpu_feature_enabled(X86_FEATURE_IBT))
+ wrmsrl(MSR_IA32_S_CET, 0);
+}
+
/*
* Some CPU features depend on higher CPUID levels, which may not always
* be available due to CPUID level capping or broken virtualization
@@ -1709,6 +1734,7 @@ static void identify_cpu(struct cpuinfo_

x86_init_rdrand(c);
setup_pku(c);
+ setup_cet(c);

/*
* Clear/Set all flags overridden by options, need do it
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -104,6 +104,10 @@ static const __initconst struct idt_data
ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE),
#endif

+#ifdef CONFIG_X86_KERNEL_IBT
+ INTG(X86_TRAP_CP, asm_exc_control_protection),
+#endif
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC),
#endif
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -27,6 +27,7 @@
#include <asm/kexec-bzimage64.h>
#include <asm/setup.h>
#include <asm/set_memory.h>
+#include <asm/cpu.h>

#ifdef CONFIG_ACPI
/*
@@ -310,6 +311,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
hw_breakpoint_disable();
+ cet_disable();

if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -210,6 +210,76 @@ DEFINE_IDTENTRY(exc_overflow)
do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL);
}

+#ifdef CONFIG_X86_KERNEL_IBT
+
+static __ro_after_init bool ibt_fatal = true;
+
+extern const unsigned long ibt_selftest_ip; /* defined in asm beow */
+
+DEFINE_IDTENTRY_ERRORCODE(exc_control_protection)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_IBT)) {
+ pr_err("Unexpected #CP\n");
+ BUG();
+ }
+
+ if (WARN_ON_ONCE(user_mode(regs) || error_code != 3))
+ return;
+
+ if (unlikely(regs->ip == ibt_selftest_ip)) {
+ regs->ax = 0;
+ return;
+ }
+
+ pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs));
+ if (!ibt_fatal) {
+ printk(KERN_DEFAULT CUT_HERE);
+ __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
+ return;
+ }
+ BUG();
+}
+
+bool ibt_selftest(void)
+{
+ unsigned long ret;
+
+ asm ("1: lea 2f(%%rip), %%rax\n\t"
+ ANNOTATE_RETPOLINE_SAFE
+ " jmp *%%rax\n\t"
+ ASM_REACHABLE
+ ANNOTATE_NOENDBR
+ "2: nop\n\t"
+
+ /* unsigned ibt_selftest_ip = 2b */
+ ".pushsection .rodata,\"a\"\n\t"
+ ".align 8\n\t"
+ ".type ibt_selftest_ip, @object\n\t"
+ ".size ibt_selftest_ip, 8\n\t"
+ "ibt_selftest_ip:\n\t"
+ ".quad 2b\n\t"
+ ".popsection\n\t"
+
+ : "=a" (ret) : : "memory");
+
+ return !ret;
+}
+
+static int __init ibt_setup(char *str)
+{
+ if (!strcmp(str, "off"))
+ setup_clear_cpu_cap(X86_FEATURE_IBT);
+
+ if (!strcmp(str, "warn"))
+ ibt_fatal = false;
+
+ return 1;
+}
+
+__setup("ibt=", ibt_setup);
+
+#endif /* CONFIG_X86_KERNEL_IBT */
+
#ifdef CONFIG_X86_F00F_BUG
void handle_invalid_op(struct pt_regs *regs)
#else



2022-02-25 00:22:07

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Thu, Feb 24, 2022 at 03:51:56PM +0100, Peter Zijlstra wrote:
> +static __always_inline void setup_cet(struct cpuinfo_x86 *c)
> +{
> + u64 msr = CET_ENDBR_EN;
> +
> + if (!HAS_KERNEL_IBT ||
> + !cpu_feature_enabled(X86_FEATURE_IBT))
> + return;

If you add X86_FEATURE_BIT to arch/x86/include/asm/disabled-features.h,
the HAS_KERNEL_IBT check becomes redundant.

> +bool ibt_selftest(void)
> +{
> + unsigned long ret;
> +
> + asm ("1: lea 2f(%%rip), %%rax\n\t"
> + ANNOTATE_RETPOLINE_SAFE
> + " jmp *%%rax\n\t"
> + ASM_REACHABLE
> + ANNOTATE_NOENDBR
> + "2: nop\n\t"
> +
> + /* unsigned ibt_selftest_ip = 2b */
> + ".pushsection .rodata,\"a\"\n\t"
> + ".align 8\n\t"
> + ".type ibt_selftest_ip, @object\n\t"
> + ".size ibt_selftest_ip, 8\n\t"
> + "ibt_selftest_ip:\n\t"
> + ".quad 2b\n\t"
> + ".popsection\n\t"

It's still seems silly to make this variable in asm.

Also .rodata isn't going to work for CPU hotplug.

--
Josh

2022-02-25 12:32:51

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Thu, Feb 24, 2022 at 03:55:16PM -0800, Josh Poimboeuf wrote:
> On Thu, Feb 24, 2022 at 03:51:56PM +0100, Peter Zijlstra wrote:
> > +static __always_inline void setup_cet(struct cpuinfo_x86 *c)
> > +{
> > + u64 msr = CET_ENDBR_EN;
> > +
> > + if (!HAS_KERNEL_IBT ||
> > + !cpu_feature_enabled(X86_FEATURE_IBT))
> > + return;
>
> If you add X86_FEATURE_BIT to arch/x86/include/asm/disabled-features.h,
> the HAS_KERNEL_IBT check becomes redundant.

Cute.

> > +bool ibt_selftest(void)
> > +{
> > + unsigned long ret;
> > +
> > + asm ("1: lea 2f(%%rip), %%rax\n\t"
> > + ANNOTATE_RETPOLINE_SAFE
> > + " jmp *%%rax\n\t"
> > + ASM_REACHABLE
> > + ANNOTATE_NOENDBR
> > + "2: nop\n\t"
> > +
> > + /* unsigned ibt_selftest_ip = 2b */
> > + ".pushsection .rodata,\"a\"\n\t"
> > + ".align 8\n\t"
> > + ".type ibt_selftest_ip, @object\n\t"
> > + ".size ibt_selftest_ip, 8\n\t"
> > + "ibt_selftest_ip:\n\t"
> > + ".quad 2b\n\t"
> > + ".popsection\n\t"
>
> It's still seems silly to make this variable in asm.
>
> Also .rodata isn't going to work for CPU hotplug.

It's just the IP, that stays invariant. I'm not sure how else to match
regs->ip to 2 in #CP.

2022-02-25 15:05:10

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Fri, Feb 25, 2022 at 11:51:01AM +0100, Peter Zijlstra wrote:
> On Thu, Feb 24, 2022 at 03:55:16PM -0800, Josh Poimboeuf wrote:
> > On Thu, Feb 24, 2022 at 03:51:56PM +0100, Peter Zijlstra wrote:
> > > +static __always_inline void setup_cet(struct cpuinfo_x86 *c)
> > > +{
> > > + u64 msr = CET_ENDBR_EN;
> > > +
> > > + if (!HAS_KERNEL_IBT ||
> > > + !cpu_feature_enabled(X86_FEATURE_IBT))
> > > + return;
> >
> > If you add X86_FEATURE_BIT to arch/x86/include/asm/disabled-features.h,
> > the HAS_KERNEL_IBT check becomes redundant.
>
> Cute.

On second thought; I'm not sure that's desirable. Ideally KVM would
still expose IBT if present on the hardware, even if the host kernel
doesn't use it.

Killing the feature when the host doesn't use it seems unfortunate.

2022-02-26 00:17:19

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Thu, Feb 24, 2022 at 03:51:56PM +0100, Peter Zijlstra wrote:
> [...]
> @@ -438,7 +439,8 @@ static __always_inline void setup_umip(s
>
> /* These bits should not change their value after CPU init is finished. */
> static const unsigned long cr4_pinned_mask =
> - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
> + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
> + X86_CR4_FSGSBASE | X86_CR4_CET;

Thanks!

> static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
> static unsigned long cr4_pinned_bits __ro_after_init;
>
> @@ -592,6 +594,29 @@ static __init int setup_disable_pku(char
> __setup("nopku", setup_disable_pku);
> #endif /* CONFIG_X86_64 */
>
> +static __always_inline void setup_cet(struct cpuinfo_x86 *c)
> +{
> + u64 msr = CET_ENDBR_EN;
> +
> + if (!HAS_KERNEL_IBT ||
> + !cpu_feature_enabled(X86_FEATURE_IBT))
> + return;
> +
> + wrmsrl(MSR_IA32_S_CET, msr);
> + cr4_set_bits(X86_CR4_CET);
> +
> + if (!ibt_selftest()) {
> + pr_err("IBT selftest: Failed!\n");
> + setup_clear_cpu_cap(X86_FEATURE_IBT);
> + }

For easy boot-output testing, I'd love to see something like:

} else {
pr_info("CET detected: Indirect Branch Tracking enabled.\n")
}

or maybe:
pr_info("CET detected: Indirect Branch Tracking is %s.\n",
ibt_fatal ? "enforced" : "warning only");

> [...]
> +bool ibt_selftest(void)
> +{
> + unsigned long ret;
> +
> + asm ("1: lea 2f(%%rip), %%rax\n\t"
> + ANNOTATE_RETPOLINE_SAFE
> + " jmp *%%rax\n\t"
> + ASM_REACHABLE
> + ANNOTATE_NOENDBR
> + "2: nop\n\t"
> +
> + /* unsigned ibt_selftest_ip = 2b */
> + ".pushsection .rodata,\"a\"\n\t"
> + ".align 8\n\t"
> + ".type ibt_selftest_ip, @object\n\t"
> + ".size ibt_selftest_ip, 8\n\t"
> + "ibt_selftest_ip:\n\t"
> + ".quad 2b\n\t"
> + ".popsection\n\t"
> +
> + : "=a" (ret) : : "memory");
> +
> + return !ret;
> +}

I did something like this for LKDTM, but I realize it depends on having no
frame pointer, and is likely x86-specific too, as I think arm64's function
preamble is responsible for pushing the return address on the stack:


static volatile int lkdtm_just_count;

/* Function taking one argument, returning int. */
static noinline void *lkdtm_just_return(void)
{
/* Do something after preamble but before label. */
lkdtm_just_count++;

yolo:
{
void *right_here = &&yolo;

OPTIMIZER_HIDE_VAR(right_here);
return right_here;
}
}
/*
* This tries to call an indirect function in the middle.
*/
void lkdtm_CFI_FORWARD_ENTRY(void)
{
/*
* Matches lkdtm_increment_void()'s prototype, but not
* lkdtm_increment_int()'s prototype.
*/
void * (*func)(void);

func = lkdtm_just_return;
pr_info("Calling actual function entry point %px ...\n", func);
func = func();

pr_info("Calling middle of function %px ...\n", func);
func = func();

pr_err("FAIL: survived non-entry point call!\n");
#ifdef CONFIG_X86
pr_expected_config(CONFIG_X86_BTI);
#endif
}

--
Kees Cook

2022-02-26 01:44:48

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Fri, Feb 25, 2022 at 11:51:01AM +0100, Peter Zijlstra wrote:
> > > +bool ibt_selftest(void)
> > > +{
> > > + unsigned long ret;
> > > +
> > > + asm ("1: lea 2f(%%rip), %%rax\n\t"
> > > + ANNOTATE_RETPOLINE_SAFE
> > > + " jmp *%%rax\n\t"
> > > + ASM_REACHABLE
> > > + ANNOTATE_NOENDBR
> > > + "2: nop\n\t"
> > > +
> > > + /* unsigned ibt_selftest_ip = 2b */
> > > + ".pushsection .rodata,\"a\"\n\t"
> > > + ".align 8\n\t"
> > > + ".type ibt_selftest_ip, @object\n\t"
> > > + ".size ibt_selftest_ip, 8\n\t"
> > > + "ibt_selftest_ip:\n\t"
> > > + ".quad 2b\n\t"
> > > + ".popsection\n\t"
> >
> > It's still seems silly to make this variable in asm.
> >
> > Also .rodata isn't going to work for CPU hotplug.
>
> It's just the IP, that stays invariant. I'm not sure how else to match
> regs->ip to 2 in #CP.

Ah, I see what you mean now. Still, it can just reference the code
label itself without having to allocate storage:

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4806fa0adec7..cfaa05ddd1ec 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -225,7 +225,7 @@ static void handle_endbr(struct pt_regs *regs)
BUG();
}

-extern const unsigned long ibt_selftest_ip; /* defined in asm beow */
+void ibt_selftest_ip(void); /* code label defined in asm below */

DEFINE_IDTENTRY_ERRORCODE(exc_control_protection)
{
@@ -237,7 +237,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_control_protection)
if (WARN_ON_ONCE(user_mode(regs) || error_code != 3))
return;

- if (unlikely(regs->ip == ibt_selftest_ip)) {
+ if (unlikely(regs->ip == (unsigned long)ibt_selftest_ip)) {
regs->ax = 0;
return;
}
@@ -249,22 +249,12 @@ bool ibt_selftest(void)
{
unsigned long ret;

- asm ("1: lea 2f(%%rip), %%rax\n\t"
+ asm ("1: lea ibt_selftest_ip(%%rip), %%rax\n\t"
ANNOTATE_RETPOLINE_SAFE
" jmp *%%rax\n\t"
ASM_REACHABLE
ANNOTATE_NOENDBR
- "2: nop\n\t"
-
- /* unsigned ibt_selftest_ip = 2b */
- ".pushsection .rodata,\"a\"\n\t"
- ".align 8\n\t"
- ".type ibt_selftest_ip, @object\n\t"
- ".size ibt_selftest_ip, 8\n\t"
- "ibt_selftest_ip:\n\t"
- ".quad 2b\n\t"
- ".popsection\n\t"
-
+ "ibt_selftest_ip: nop\n\t"
: "=a" (ret) : : "memory");

return !ret;

2022-02-26 02:40:32

by Edgecombe, Rick P

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Thu, 2022-02-24 at 15:51 +0100, Peter Zijlstra wrote:
> +__noendbr void cet_disable(void)
> +{
> + if (cpu_feature_enabled(X86_FEATURE_IBT))
> + wrmsrl(MSR_IA32_S_CET, 0);
> +}
> +

Did this actually work? There are actually two problems with kexecing
when CET is enabled. One is leaving the enforcement enabled when the
new kernel can't handle it. The other is that CR4.CET and CR0.WP are
tied together such that if you try to disable CR0.WP while CR4.CET is
still set, it will #GP. CR0.WP gets unset during kexec/boot in the new
kernel, so it blows up if you just disable IBT with the MSR and leave
the CR4 bit set.

I was under the impression that this had been tested in the userspace
series, but apparently not as I've just produced the CR0.WP issue. So
it needs to be fixed in that series too. Userspace doesn't really need
it pinned, so it should be easy.

For kernel IBT, to enumerate a few options for kexec/pinning:

1. Just remove CR4.CET from the pinning mask, and unset it normally.
2. Leave it in the pinning mask and add separate non-pin-checking
inlined CR4 write late in the kexec path to unset CR4.CET.
3. Remove the unsetting of CR0.WP in the boot path. This would
only support kexecing to new kernels (there were actually patches
for this from the KVM CR pinning stuff that detected whether the
new kernel was bootable and failed gracefully IIRC). It's
potentially fraught and not great to lose kexecing to old kernels.
4. Do (1) for now and then follow this series with a larger solution
that does (2) and also adds some level of MSR pinning.

2022-02-26 13:53:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Fri, Feb 25, 2022 at 03:51:22PM -0800, Josh Poimboeuf wrote:
> Ah, I see what you mean now. Still, it can just reference the code
> label itself without having to allocate storage:

Ooh, cute. Yes ofcourse... duh.

2022-03-01 17:30:44

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Fri, Feb 25, 2022 at 07:59:15PM +0000, Edgecombe, Rick P wrote:
> On Thu, 2022-02-24 at 15:51 +0100, Peter Zijlstra wrote:
> > +__noendbr void cet_disable(void)
> > +{
> > + if (cpu_feature_enabled(X86_FEATURE_IBT))
> > + wrmsrl(MSR_IA32_S_CET, 0);
> > +}
> > +
>
> Did this actually work?

No idea,.. I don't generally have kexec clue.

> There are actually two problems with kexecing
> when CET is enabled. One is leaving the enforcement enabled when the
> new kernel can't handle it. The other is that CR4.CET and CR0.WP are
> tied together such that if you try to disable CR0.WP while CR4.CET is
> still set, it will #GP. CR0.WP gets unset during kexec/boot in the new
> kernel, so it blows up if you just disable IBT with the MSR and leave
> the CR4 bit set.
>
> I was under the impression that this had been tested in the userspace
> series, but apparently not as I've just produced the CR0.WP issue. So
> it needs to be fixed in that series too. Userspace doesn't really need
> it pinned, so it should be easy.

So I see CR0 frobbing in identity_mapped and CR4 frobbing right after
it. Is there a reason to first do CR0 and then CR4 or can we flip them?
Otherwise we need to do CR4 twice.

(Also, whoever wrote that function with _5_ identically named labels in
it deserves something painful. Also, wth's up with that jmp 1f; 1:)

Something like so?

diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 399f075ccdc4..5b65f6ec5ee6 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -114,6 +114,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
/* store the start address on the stack */
pushq %rdx

+ /*
+ * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
+ * below.
+ */
+ movq %cr4, %rax
+ andq $~(X86_CR4_CET), %rax
+ movq %rax, %cr4
+
/*
* Set cr0 to a known state:
* - Paging enabled

2022-03-02 00:57:39

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Tue, Mar 01, 2022 at 04:14:42PM +0100, Peter Zijlstra wrote:

> Something like so?
>
> diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
> index 399f075ccdc4..5b65f6ec5ee6 100644
> --- a/arch/x86/kernel/relocate_kernel_64.S
> +++ b/arch/x86/kernel/relocate_kernel_64.S
> @@ -114,6 +114,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
> /* store the start address on the stack */
> pushq %rdx
>
> + /*
> + * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
> + * below.
> + */
> + movq %cr4, %rax
> + andq $~(X86_CR4_CET), %rax
> + movq %rax, %cr4
> +
> /*
> * Set cr0 to a known state:
> * - Paging enabled

I *think* it worked, I 'apt install kexec-tools' and copied the magic
commands Josh gave over IRC and the machine went and came back real
quick.

Lacking useful console I can't say much more.

I pushed out a version with these things on.

2022-03-02 16:24:05

by Edgecombe, Rick P

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Tue, 2022-03-01 at 15:13 -0800, Josh Poimboeuf wrote:
> On Tue, Mar 01, 2022 at 10:02:45PM +0100, Peter Zijlstra wrote:
> > On Tue, Mar 01, 2022 at 04:14:42PM +0100, Peter Zijlstra wrote:
> >
> > > Something like so?
> > >
> > > diff --git a/arch/x86/kernel/relocate_kernel_64.S
> > > b/arch/x86/kernel/relocate_kernel_64.S
> > > index 399f075ccdc4..5b65f6ec5ee6 100644
> > > --- a/arch/x86/kernel/relocate_kernel_64.S
> > > +++ b/arch/x86/kernel/relocate_kernel_64.S
> > > @@ -114,6 +114,14 @@
> > > SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
> > > /* store the start address on the stack */
> > > pushq %rdx
> > >
> > > + /*
> > > + * Clear X86_CR4_CET (if it was set) such that we can clear
> > > CR0_WP
> > > + * below.
> > > + */
> > > + movq %cr4, %rax
> > > + andq $~(X86_CR4_CET), %rax
> > > + movq %rax, %cr4
> > > +
> > > /*
> > > * Set cr0 to a known state:
> > > * - Paging enabled
> >
> > I *think* it worked, I 'apt install kexec-tools' and copied the
> > magic
> > commands Josh gave over IRC and the machine went and came back real
> > quick.
> >
> > Lacking useful console I can't say much more.
> >
> > I pushed out a version with these things on.
>
> I just used your latest git tree, kexec into a non-IBT kernel worked
> for
> me as well.

I copied this over to the userspace series and it worked where it
failed before. And looking at the code, it seems like it should work as
well.

As for pinning strength, I'm not understanding this kexec asm enough to
say for sure how much better it is than just removing the bit from the
pinning mask. I think some future hardening around preventing turning
off IBT might still be worthwhile.

Kees, I think you brought up the pinning, what do you think of this?

2022-03-02 21:12:12

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Wed, Mar 02, 2022 at 01:59:46AM +0000, Edgecombe, Rick P wrote:
> As for pinning strength, I'm not understanding this kexec asm enough to
> say for sure how much better it is than just removing the bit from the
> pinning mask. I think some future hardening around preventing turning
> off IBT might still be worthwhile.
>
> Kees, I think you brought up the pinning, what do you think of this?

IIRC the whole purpose of that was to ensure that the
cr4_update_irqsoff() function itself isn't a useful gadget to manipulate
CR4 with.

2022-03-02 22:41:28

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Tue, Mar 01, 2022 at 10:02:45PM +0100, Peter Zijlstra wrote:
> On Tue, Mar 01, 2022 at 04:14:42PM +0100, Peter Zijlstra wrote:
>
> > Something like so?
> >
> > diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
> > index 399f075ccdc4..5b65f6ec5ee6 100644
> > --- a/arch/x86/kernel/relocate_kernel_64.S
> > +++ b/arch/x86/kernel/relocate_kernel_64.S
> > @@ -114,6 +114,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
> > /* store the start address on the stack */
> > pushq %rdx
> >
> > + /*
> > + * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
> > + * below.
> > + */
> > + movq %cr4, %rax
> > + andq $~(X86_CR4_CET), %rax
> > + movq %rax, %cr4
> > +
> > /*
> > * Set cr0 to a known state:
> > * - Paging enabled
>
> I *think* it worked, I 'apt install kexec-tools' and copied the magic
> commands Josh gave over IRC and the machine went and came back real
> quick.
>
> Lacking useful console I can't say much more.
>
> I pushed out a version with these things on.

I just used your latest git tree, kexec into a non-IBT kernel worked for
me as well.

--
Josh

2022-03-03 00:05:27

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v2 18/39] x86/ibt: Add IBT feature, MSR and #CP handling

On Wed, Mar 02, 2022 at 02:49:08PM +0100, Peter Zijlstra wrote:
> On Wed, Mar 02, 2022 at 01:59:46AM +0000, Edgecombe, Rick P wrote:
> > As for pinning strength, I'm not understanding this kexec asm enough to
> > say for sure how much better it is than just removing the bit from the
> > pinning mask. I think some future hardening around preventing turning
> > off IBT might still be worthwhile.
> >
> > Kees, I think you brought up the pinning, what do you think of this?
>
> IIRC the whole purpose of that was to ensure that the
> cr4_update_irqsoff() function itself isn't a useful gadget to manipulate
> CR4 with.

Right -- as long as the paths that touch cr4 are either respecting
pinning or are inlined at a point of no return (i.e. performing kexec),
they shouldn't end up being very helpful gadgets.

e.g. imagine the scenario of a controlled write to a function pointer
than just aims at a valid endbr function that manipulates cr: an
attacker will just call into that first to disable IBT, and then carry
on doing things as normal.

There is an argument to be made that if an attacker has that kind of
control, they could call any set of functions to do what they want,
but the point is to create an unfriendly environment to attackers so
that future defenses compose well together -- e.g. FGKASLR.

--
Kees Cook