2023-06-13 00:41:51

by Edgecombe, Rick P

[permalink] [raw]
Subject: [PATCH v9 28/42] x86/shstk: Add user-mode shadow stack support

Introduce basic shadow stack enabling/disabling/allocation routines.
A task's shadow stack is allocated from memory with VM_SHADOW_STACK flag
and has a fixed size of min(RLIMIT_STACK, 4GB).

Keep the task's shadow stack address and size in thread_struct. This will
be copied when cloning new threads, but needs to be cleared during exec,
so add a function to do this.

32 bit shadow stack is not expected to have many users and it will
complicate the signal implementation. So do not support IA32 emulation
or x32.

Co-developed-by: Yu-cheng Yu <[email protected]>
Signed-off-by: Yu-cheng Yu <[email protected]>
Signed-off-by: Rick Edgecombe <[email protected]>
Reviewed-by: Borislav Petkov (AMD) <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Mike Rapoport (IBM) <[email protected]>
Tested-by: Pengfei Xu <[email protected]>
Tested-by: John Allen <[email protected]>
Tested-by: Kees Cook <[email protected]>
---
arch/x86/include/asm/processor.h | 2 +
arch/x86/include/asm/shstk.h | 7 ++
arch/x86/include/uapi/asm/prctl.h | 3 +
arch/x86/kernel/shstk.c | 145 ++++++++++++++++++++++++++++++
4 files changed, 157 insertions(+)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 407d5551b6a7..2a5ec5750ba7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -479,6 +479,8 @@ struct thread_struct {
#ifdef CONFIG_X86_USER_SHADOW_STACK
unsigned long features;
unsigned long features_locked;
+
+ struct thread_shstk shstk;
#endif

/* Floating point and extended processor state */
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
index ec753809f074..2b1f7c9b9995 100644
--- a/arch/x86/include/asm/shstk.h
+++ b/arch/x86/include/asm/shstk.h
@@ -8,12 +8,19 @@
struct task_struct;

#ifdef CONFIG_X86_USER_SHADOW_STACK
+struct thread_shstk {
+ u64 base;
+ u64 size;
+};
+
long shstk_prctl(struct task_struct *task, int option, unsigned long features);
void reset_thread_features(void);
+void shstk_free(struct task_struct *p);
#else
static inline long shstk_prctl(struct task_struct *task, int option,
unsigned long arg2) { return -EINVAL; }
static inline void reset_thread_features(void) {}
+static inline void shstk_free(struct task_struct *p) {}
#endif /* CONFIG_X86_USER_SHADOW_STACK */

#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 1cd44ecc9ce0..6a8e0e1bff4a 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -34,4 +34,7 @@
#define ARCH_SHSTK_DISABLE 0x5002
#define ARCH_SHSTK_LOCK 0x5003

+/* ARCH_SHSTK_ features bits */
+#define ARCH_SHSTK_SHSTK (1ULL << 0)
+
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
index 41ed6552e0a5..3cb85224d856 100644
--- a/arch/x86/kernel/shstk.c
+++ b/arch/x86/kernel/shstk.c
@@ -8,14 +8,159 @@

#include <linux/sched.h>
#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/sched/signal.h>
+#include <linux/compat.h>
+#include <linux/sizes.h>
+#include <linux/user.h>
+#include <asm/msr.h>
+#include <asm/fpu/xstate.h>
+#include <asm/fpu/types.h>
+#include <asm/shstk.h>
+#include <asm/special_insns.h>
+#include <asm/fpu/api.h>
#include <asm/prctl.h>

+static bool features_enabled(unsigned long features)
+{
+ return current->thread.features & features;
+}
+
+static void features_set(unsigned long features)
+{
+ current->thread.features |= features;
+}
+
+static void features_clr(unsigned long features)
+{
+ current->thread.features &= ~features;
+}
+
+static unsigned long alloc_shstk(unsigned long size)
+{
+ int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, unused;
+
+ mmap_write_lock(mm);
+ addr = do_mmap(NULL, addr, size, PROT_READ, flags,
+ VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
+
+ mmap_write_unlock(mm);
+
+ return addr;
+}
+
+static unsigned long adjust_shstk_size(unsigned long size)
+{
+ if (size)
+ return PAGE_ALIGN(size);
+
+ return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
+}
+
+static void unmap_shadow_stack(u64 base, u64 size)
+{
+ while (1) {
+ int r;
+
+ r = vm_munmap(base, size);
+
+ /*
+ * vm_munmap() returns -EINTR when mmap_lock is held by
+ * something else, and that lock should not be held for a
+ * long time. Retry it for the case.
+ */
+ if (r == -EINTR) {
+ cond_resched();
+ continue;
+ }
+
+ /*
+ * For all other types of vm_munmap() failure, either the
+ * system is out of memory or there is bug.
+ */
+ WARN_ON_ONCE(r);
+ break;
+ }
+}
+
+static int shstk_setup(void)
+{
+ struct thread_shstk *shstk = &current->thread.shstk;
+ unsigned long addr, size;
+
+ /* Already enabled */
+ if (features_enabled(ARCH_SHSTK_SHSTK))
+ return 0;
+
+ /* Also not supported for 32 bit and x32 */
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
+ return -EOPNOTSUPP;
+
+ size = adjust_shstk_size(0);
+ addr = alloc_shstk(size);
+ if (IS_ERR_VALUE(addr))
+ return PTR_ERR((void *)addr);
+
+ fpregs_lock_and_load();
+ wrmsrl(MSR_IA32_PL3_SSP, addr + size);
+ wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
+ fpregs_unlock();
+
+ shstk->base = addr;
+ shstk->size = size;
+ features_set(ARCH_SHSTK_SHSTK);
+
+ return 0;
+}
+
void reset_thread_features(void)
{
+ memset(&current->thread.shstk, 0, sizeof(struct thread_shstk));
current->thread.features = 0;
current->thread.features_locked = 0;
}

+void shstk_free(struct task_struct *tsk)
+{
+ struct thread_shstk *shstk = &tsk->thread.shstk;
+
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+ !features_enabled(ARCH_SHSTK_SHSTK))
+ return;
+
+ if (!tsk->mm)
+ return;
+
+ unmap_shadow_stack(shstk->base, shstk->size);
+}
+
+static int shstk_disable(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+ return -EOPNOTSUPP;
+
+ /* Already disabled? */
+ if (!features_enabled(ARCH_SHSTK_SHSTK))
+ return 0;
+
+ fpregs_lock_and_load();
+ /* Disable WRSS too when disabling shadow stack */
+ wrmsrl(MSR_IA32_U_CET, 0);
+ wrmsrl(MSR_IA32_PL3_SSP, 0);
+ fpregs_unlock();
+
+ shstk_free(current);
+ features_clr(ARCH_SHSTK_SHSTK);
+
+ return 0;
+}
+
long shstk_prctl(struct task_struct *task, int option, unsigned long features)
{
if (option == ARCH_SHSTK_LOCK) {
--
2.34.1



2023-06-27 17:58:13

by Mark Brown

[permalink] [raw]
Subject: Re: [PATCH v9 28/42] x86/shstk: Add user-mode shadow stack support

On Mon, Jun 12, 2023 at 05:10:54PM -0700, Rick Edgecombe wrote:

> +static void unmap_shadow_stack(u64 base, u64 size)
> +{
> + while (1) {
> + int r;
> +
> + r = vm_munmap(base, size);
> +
> + /*
> + * vm_munmap() returns -EINTR when mmap_lock is held by
> + * something else, and that lock should not be held for a
> + * long time. Retry it for the case.
> + */
> + if (r == -EINTR) {
> + cond_resched();
> + continue;
> + }

This looks generic, not even shadow stack specific - was there any
discussion of making it a vm_munmap_retry() (that's not a great name...)
or similar? I didn't see any in old versions of the thread but I
might've missed something.


Attachments:
(No filename) (702.00 B)
signature.asc (499.00 B)
Download all attachments

2023-06-28 00:13:44

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH v9 28/42] x86/shstk: Add user-mode shadow stack support

On 6/27/23 10:20, Mark Brown wrote:
> On Mon, Jun 12, 2023 at 05:10:54PM -0700, Rick Edgecombe wrote:
>
>> +static void unmap_shadow_stack(u64 base, u64 size)
>> +{
>> + while (1) {
>> + int r;
>> +
>> + r = vm_munmap(base, size);
>> +
>> + /*
>> + * vm_munmap() returns -EINTR when mmap_lock is held by
>> + * something else, and that lock should not be held for a
>> + * long time. Retry it for the case.
>> + */
>> + if (r == -EINTR) {
>> + cond_resched();
>> + continue;
>> + }
> This looks generic, not even shadow stack specific - was there any
> discussion of making it a vm_munmap_retry() (that's not a great name...)
> or similar? I didn't see any in old versions of the thread but I
> might've missed something.

Yeah, that looks odd. Also odd is that none of the other users of
vm_munmap() bother to check the return value (except for one passthrough
in the nommu code).

I don't think the EINTR happens during contention, though. It's really
there to be able break out in the face of SIGKILL. I think that's why
nobody handles it: the task is dying anyway and nobody cares.

Rick, was this hunk here for a specific reason or were you just trying
to be diligent in handling errors?

2023-06-28 00:57:23

by Edgecombe, Rick P

[permalink] [raw]
Subject: Re: [PATCH v9 28/42] x86/shstk: Add user-mode shadow stack support

On Tue, 2023-06-27 at 16:46 -0700, Dave Hansen wrote:
> > This looks generic, not even shadow stack specific - was there any
> > discussion of making it a vm_munmap_retry() (that's not a great
> > name...)
> > or similar?  I didn't see any in old versions of the thread but I
> > might've missed something.
>
> Yeah, that looks odd.  Also odd is that none of the other users of
> vm_munmap() bother to check the return value (except for one
> passthrough
> in the nommu code).
>
> I don't think the EINTR happens during contention, though.  It's really
> there to be able break out in the face of SIGKILL.  I think that's why
> nobody handles it: the task is dying anyway and nobody cares.
>
> Rick, was this hunk here for a specific reason or were you just trying
> to be diligent in handling errors?

I'm not aware of any specific required cases. I think it is just pure
dilligence, originating from this comment:
https://lore.kernel.org/lkml/[email protected]/#t

I didn't see a need to remove it. 

The SIGKILL certaintly sounds like something more true than the
comment, but I can't do much of dive until next week. I would think we
need to handle EINTR differently to not WARN at least.

Yea, some of it does seem like the kind of thing that could live
outside of x86 shstk.c. But I'm not sure about the WARN part. That
should probably live in the caller. I guess someday we might even find
some shadow stack specific logic that could be cross-arch.

2023-07-07 00:26:51

by Edgecombe, Rick P

[permalink] [raw]
Subject: [PATCH] x86/shstk: Don't retry vm_munmap() on -EINTR

The existing comment around handling vm_munmap() failure when freeing a
shadow stack is wrong. It asserts that vm_munmap() returns -EINTR when
the mmap lock is only being held for a short time, and so the caller
should retry. Based on this wrong understanding, unmap_shadow_stack() will
loop retrying vm_munmap().

What -EINTR actually means in this case is that the process is going
away (see ae79878), and the whole MM will be torn down soon. In order
to facilitate this, the task should not linger in the kernel, but
actually do the opposite. So don't loop in this scenario, just abandon
the operation and let exit_mmap() clean it up. Also, update the comment
to reflect the actual meaning of the error code.

Signed-off-by: Rick Edgecombe <[email protected]>
---
arch/x86/kernel/shstk.c | 34 +++++++++++++++-------------------
1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
index 47f5204b0fa9..cd10d074a444 100644
--- a/arch/x86/kernel/shstk.c
+++ b/arch/x86/kernel/shstk.c
@@ -134,28 +134,24 @@ static unsigned long adjust_shstk_size(unsigned long size)

static void unmap_shadow_stack(u64 base, u64 size)
{
- while (1) {
- int r;
+ int r;

- r = vm_munmap(base, size);
+ r = vm_munmap(base, size);

- /*
- * vm_munmap() returns -EINTR when mmap_lock is held by
- * something else, and that lock should not be held for a
- * long time. Retry it for the case.
- */
- if (r == -EINTR) {
- cond_resched();
- continue;
- }
+ /*
+ * mmap_write_lock_killable() failed with -EINTR. This means
+ * the process is about to die and have it's MM cleaned up.
+ * This task shouldn't ever make it back to userspace. In this
+ * case it is ok to leak a shadow stack, so just exit out.
+ */
+ if (r == -EINTR)
+ return;

- /*
- * For all other types of vm_munmap() failure, either the
- * system is out of memory or there is bug.
- */
- WARN_ON_ONCE(r);
- break;
- }
+ /*
+ * For all other types of vm_munmap() failure, either the
+ * system is out of memory or there is bug.
+ */
+ WARN_ON_ONCE(r);
}

static int shstk_setup(void)
--
2.34.1